feat: otel metrics and expanded collecting (#2647)

(passed tests in last run)
This commit is contained in:
Andy Li
2025-06-05 17:20:14 -07:00
committed by GitHub
parent ef52a852a3
commit 80f6e97ca9
60 changed files with 499 additions and 161 deletions

View File

@@ -41,6 +41,7 @@ from letta.log import get_logger
from letta.memory import summarize_messages
from letta.orm import User
from letta.orm.enums import ToolType
from letta.otel.tracing import log_event, trace_method
from letta.schemas.agent import AgentState, AgentStepResponse, UpdateAgent, get_prompt_template_for_agent_type
from letta.schemas.block import BlockUpdate
from letta.schemas.embedding_config import EmbeddingConfig
@@ -72,7 +73,6 @@ from letta.services.tool_manager import ToolManager
from letta.settings import settings, summarizer_settings
from letta.streaming_interface import StreamingRefreshCLIInterface
from letta.system import get_heartbeat, get_token_limit_warning, package_function_response, package_summarize_message, package_user_message
from letta.tracing import log_event, trace_method
from letta.utils import count_tokens, get_friendly_error_msg, get_tool_call_id, log_telemetry, parse_json, validate_function_response
logger = get_logger(__name__)

View File

@@ -16,7 +16,7 @@ from letta.agents.helpers import (
)
from letta.errors import ContextWindowExceededError
from letta.helpers import ToolRulesSolver
from letta.helpers.datetime_helpers import get_utc_timestamp_ns
from letta.helpers.datetime_helpers import AsyncTimer, get_utc_timestamp_ns, ns_to_ms
from letta.helpers.tool_execution_helper import enable_strict_mode
from letta.interfaces.anthropic_streaming_interface import AnthropicStreamingInterface
from letta.interfaces.openai_streaming_interface import OpenAIStreamingInterface
@@ -25,6 +25,9 @@ from letta.llm_api.llm_client_base import LLMClientBase
from letta.local_llm.constants import INNER_THOUGHTS_KWARG
from letta.log import get_logger
from letta.orm.enums import ToolType
from letta.otel.context import get_ctx_attributes
from letta.otel.metric_registry import MetricRegistry
from letta.otel.tracing import log_event, trace_method, tracer
from letta.schemas.agent import AgentState
from letta.schemas.enums import MessageRole, MessageStreamStatus
from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
@@ -48,7 +51,7 @@ from letta.services.telemetry_manager import NoopTelemetryManager, TelemetryMana
from letta.services.tool_executor.tool_execution_manager import ToolExecutionManager
from letta.settings import model_settings
from letta.system import package_function_response
from letta.tracing import log_event, trace_method, tracer
from letta.types import JsonDict
from letta.utils import log_telemetry, validate_function_response
logger = get_logger(__name__)
@@ -178,7 +181,7 @@ class LettaAgent(BaseAgent):
# log llm request time
now = get_utc_timestamp_ns()
llm_request_ns = now - step_start
agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ns // 1_000_000})
agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": ns_to_ms(llm_request_ns)})
response = llm_client.convert_response_to_chat_completion(response_data, in_context_messages, agent_state.llm_config)
@@ -210,7 +213,7 @@ class LettaAgent(BaseAgent):
# log LLM request time
now = get_utc_timestamp_ns()
llm_request_ns = now - step_start
agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ns // 1_000_000})
agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": ns_to_ms(llm_request_ns)})
persisted_messages, should_continue = await self._handle_ai_response(
tool_call,
@@ -227,7 +230,7 @@ class LettaAgent(BaseAgent):
# log step time
now = get_utc_timestamp_ns()
step_ns = now - step_start
agent_step_span.add_event(name="step_ms", attributes={"duration_ms": step_ns // 1_000_000})
agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
agent_step_span.end()
# Log LLM Trace
@@ -267,7 +270,7 @@ class LettaAgent(BaseAgent):
if request_start_timestamp_ns:
now = get_utc_timestamp_ns()
request_ns = now - request_start_timestamp_ns
request_span.add_event(name="letta_request_ms", attributes={"duration_ms": request_ns // 1_000_000})
request_span.add_event(name="letta_request_ms", attributes={"duration_ms": ns_to_ms(request_ns)})
request_span.end()
# Return back usage
@@ -321,7 +324,7 @@ class LettaAgent(BaseAgent):
# log LLM request time
now = get_utc_timestamp_ns()
llm_request_ns = now - step_start
agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ns // 1_000_000})
agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": ns_to_ms(llm_request_ns)})
# TODO: add run_id
usage.step_count += 1
@@ -363,7 +366,7 @@ class LettaAgent(BaseAgent):
# log step time
now = get_utc_timestamp_ns()
step_ns = now - step_start
agent_step_span.add_event(name="step_ms", attributes={"duration_ms": step_ns // 1_000_000})
agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
agent_step_span.end()
# Log LLM Trace
@@ -384,7 +387,7 @@ class LettaAgent(BaseAgent):
if request_start_timestamp_ns:
now = get_utc_timestamp_ns()
request_ns = now - request_start_timestamp_ns
request_span.add_event(name="request_ms", attributes={"duration_ms": request_ns // 1_000_000})
request_span.add_event(name="request_ms", attributes={"duration_ms": ns_to_ms(request_ns)})
request_span.end()
# Extend the in context message ids
@@ -480,7 +483,7 @@ class LettaAgent(BaseAgent):
if first_chunk and request_span is not None:
now = get_utc_timestamp_ns()
ttft_ns = now - request_start_timestamp_ns
request_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ttft_ns // 1_000_000})
request_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ns_to_ms(ttft_ns)})
first_chunk = False
yield f"data: {chunk.model_dump_json()}\n\n"
@@ -490,6 +493,9 @@ class LettaAgent(BaseAgent):
usage.completion_tokens += interface.output_tokens
usage.prompt_tokens += interface.input_tokens
usage.total_tokens += interface.input_tokens + interface.output_tokens
MetricRegistry().message_output_tokens.record(
interface.output_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
)
# Persist input messages if not already
# Special strategy to lower TTFT
@@ -500,7 +506,7 @@ class LettaAgent(BaseAgent):
# log LLM request time
now = get_utc_timestamp_ns()
llm_request_ns = now - step_start
agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ns // 1_000_000})
agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": ns_to_ms(llm_request_ns)})
# Process resulting stream content
tool_call = interface.get_tool_call_object()
@@ -525,7 +531,7 @@ class LettaAgent(BaseAgent):
# log total step time
now = get_utc_timestamp_ns()
step_ns = now - step_start
agent_step_span.add_event(name="step_ms", attributes={"duration_ms": step_ns // 1_000_000})
agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
agent_step_span.end()
# TODO (cliandy): the stream POST request span has ended at this point, we should tie this to the stream
@@ -576,7 +582,7 @@ class LettaAgent(BaseAgent):
if request_start_timestamp_ns:
now = get_utc_timestamp_ns()
request_ns = now - request_start_timestamp_ns
request_span.add_event(name="letta_request_ms", attributes={"duration_ms": request_ns // 1_000_000})
request_span.add_event(name="letta_request_ms", attributes={"duration_ms": ns_to_ms(request_ns)})
request_span.end()
# TODO: Also yield out a letta usage stats SSE
@@ -603,10 +609,16 @@ class LettaAgent(BaseAgent):
)
log_event("agent.stream_no_tokens.llm_request.created")
async with AsyncTimer() as timer:
response = await llm_client.request_async(request_data, agent_state.llm_config)
MetricRegistry().llm_execution_time_ms_histogram.record(
timer.elapsed_ms,
dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model}),
)
# Attempt LLM request
return (
request_data,
await llm_client.request_async(request_data, agent_state.llm_config),
response,
current_in_context_messages,
new_in_context_messages,
)
@@ -653,9 +665,7 @@ class LettaAgent(BaseAgent):
if first_chunk and ttft_span is not None:
provider_request_start_timestamp_ns = get_utc_timestamp_ns()
provider_req_start_ns = provider_request_start_timestamp_ns - request_start_timestamp_ns
ttft_span.add_event(
name="provider_req_start_ns", attributes={"provider_req_start_ms": provider_req_start_ns // 1_000_000}
)
ttft_span.add_event(name="provider_req_start_ns", attributes={"provider_req_start_ms": ns_to_ms(provider_req_start_ns)})
# Attempt LLM request
return (
@@ -861,6 +871,7 @@ class LettaAgent(BaseAgent):
tool_args=tool_args,
agent_state=agent_state,
agent_step_span=agent_step_span,
step_id=step_id,
)
log_telemetry(
self.logger, "_handle_ai_response execute tool finish", tool_execution_result=tool_execution_result, tool_call_id=tool_call_id
@@ -938,10 +949,15 @@ class LettaAgent(BaseAgent):
@trace_method
async def _execute_tool(
self, tool_name: str, tool_args: dict, agent_state: AgentState, agent_step_span: Optional["Span"] = None
self,
tool_name: str,
tool_args: JsonDict,
agent_state: AgentState,
agent_step_span: Optional["Span"] = None,
step_id: str | None = None,
) -> "ToolExecutionResult":
"""
Executes a tool and returns (result, success_flag).
Executes a tool and returns the ToolExecutionResult.
"""
from letta.schemas.tool_execution_result import ToolExecutionResult
@@ -973,7 +989,10 @@ class LettaAgent(BaseAgent):
# TODO: Integrate sandbox result
log_event(name=f"start_{tool_name}_execution", attributes=tool_args)
tool_execution_result = await tool_execution_manager.execute_tool_async(
function_name=tool_name, function_args=tool_args, tool=target_tool
function_name=tool_name,
function_args=tool_args,
tool=target_tool,
step_id=step_id,
)
if agent_step_span:
end_time = get_utc_timestamp_ns()
@@ -981,7 +1000,7 @@ class LettaAgent(BaseAgent):
name="tool_execution_completed",
attributes={
"tool_name": target_tool.name,
"duration_ms": (end_time - start_time) // 1_000_000,
"duration_ms": ns_to_ms((end_time - start_time)),
"success": tool_execution_result.success_flag,
"tool_type": target_tool.tool_type,
"tool_id": target_tool.id,

View File

@@ -16,6 +16,7 @@ from letta.llm_api.llm_client import LLMClient
from letta.local_llm.constants import INNER_THOUGHTS_KWARG
from letta.log import get_logger
from letta.orm.enums import ToolType
from letta.otel.tracing import log_event, trace_method
from letta.schemas.agent import AgentState, AgentStepState
from letta.schemas.enums import AgentStepStatus, JobStatus, MessageStreamStatus, ProviderType
from letta.schemas.job import JobUpdate
@@ -39,7 +40,6 @@ from letta.services.passage_manager import PassageManager
from letta.services.sandbox_config_manager import SandboxConfigManager
from letta.services.tool_executor.tool_execution_manager import ToolExecutionManager
from letta.settings import tool_settings
from letta.tracing import log_event, trace_method
logger = get_logger(__name__)

View File

@@ -3,6 +3,7 @@ from typing import AsyncGenerator, List, Optional, Tuple, Union
from letta.agents.helpers import _create_letta_response, serialize_message_history
from letta.agents.letta_agent import LettaAgent
from letta.orm.enums import ToolType
from letta.otel.tracing import trace_method
from letta.schemas.agent import AgentState
from letta.schemas.block import BlockUpdate
from letta.schemas.enums import MessageStreamStatus
@@ -17,7 +18,7 @@ from letta.services.message_manager import MessageManager
from letta.services.passage_manager import PassageManager
from letta.services.summarizer.enums import SummarizationMode
from letta.services.summarizer.summarizer import Summarizer
from letta.tracing import trace_method
from letta.types import JsonDict
class VoiceSleeptimeAgent(LettaAgent):
@@ -89,9 +90,16 @@ class VoiceSleeptimeAgent(LettaAgent):
)
@trace_method
async def _execute_tool(self, tool_name: str, tool_args: dict, agent_state: AgentState, agent_step_span: Optional["Span"] = None):
async def _execute_tool(
self,
tool_name: str,
tool_args: JsonDict,
agent_state: AgentState,
agent_step_span: Optional["Span"] = None,
step_id: str | None = None,
) -> "ToolExecutionResult":
"""
Executes a tool and returns (result, success_flag).
Executes a tool and returns the ToolExecutionResult
"""
from letta.schemas.tool_execution_result import ToolExecutionResult

View File

@@ -5,6 +5,7 @@ from typing import AsyncGenerator, List, Optional
from letta.agents.base_agent import BaseAgent
from letta.agents.letta_agent import LettaAgent
from letta.groups.helpers import stringify_message
from letta.otel.tracing import trace_method
from letta.schemas.enums import JobStatus
from letta.schemas.group import Group, ManagerType
from letta.schemas.job import JobUpdate
@@ -21,7 +22,6 @@ from letta.services.message_manager import MessageManager
from letta.services.passage_manager import PassageManager
from letta.services.step_manager import NoopStepManager, StepManager
from letta.services.telemetry_manager import NoopTelemetryManager, TelemetryManager
from letta.tracing import trace_method
class SleeptimeMultiAgentV2(BaseAgent):

View File

@@ -1,7 +1,9 @@
import re
import time
from datetime import datetime, timedelta, timezone
from datetime import datetime, timedelta
from datetime import timezone as dt_timezone
from time import strftime
from typing import Callable
import pytz
@@ -66,7 +68,7 @@ def get_local_time(timezone=None):
def get_utc_time() -> datetime:
"""Get the current UTC time"""
# return datetime.now(pytz.utc)
return datetime.now(timezone.utc)
return datetime.now(dt_timezone.utc)
def get_utc_time_int() -> int:
@@ -78,9 +80,13 @@ def get_utc_timestamp_ns() -> int:
return int(time.time_ns())
def ns_to_ms(ns: int) -> int:
return ns // 1_000_000
def timestamp_to_datetime(timestamp_seconds: int) -> datetime:
"""Convert Unix timestamp in seconds to UTC datetime object"""
return datetime.fromtimestamp(timestamp_seconds, tz=timezone.utc)
return datetime.fromtimestamp(timestamp_seconds, tz=dt_timezone.utc)
def format_datetime(dt):
@@ -105,3 +111,41 @@ def extract_date_from_timestamp(timestamp):
def is_utc_datetime(dt: datetime) -> bool:
return dt.tzinfo is not None and dt.tzinfo.utcoffset(dt) == timedelta(0)
class AsyncTimer:
"""An async context manager for timing async code execution.
Takes in an optional callback_func to call on exit with arguments
taking in the elapsed_ms and exc if present.
Do not use the start and end times outside of this function as they are relative.
"""
def __init__(self, callback_func: Callable | None = None):
self._start_time_ns = None
self._end_time_ns = None
self.elapsed_ns = None
self.callback_func = callback_func
async def __aenter__(self):
self._start_time_ns = time.perf_counter_ns()
return self
async def __aexit__(self, exc_type, exc, tb):
self._end_time_ns = time.perf_counter_ns()
self.elapsed_ns = self._end_time_ns - self._start_time_ns
if self.callback_func:
from asyncio import iscoroutinefunction
if iscoroutinefunction(self.callback_func):
await self.callback_func(self.elapsed_ms, exc)
else:
self.callback_func(self.elapsed_ms, exc)
return False
@property
def elapsed_ms(self):
if self.elapsed_ns is not None:
return ns_to_ms(self.elapsed_ns)
return None

View File

@@ -1,7 +1,12 @@
# TODO (cliandy): consolidate with decorators later
from functools import wraps
def singleton(cls):
"""Decorator to make a class a Singleton class."""
instances = {}
@wraps(cls)
def get_instance(*args, **kwargs):
if cls not in instances:
instances[cls] = cls(*args, **kwargs)

View File

@@ -23,7 +23,7 @@ from anthropic.types.beta import (
)
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
from letta.helpers.datetime_helpers import get_utc_timestamp_ns
from letta.helpers.datetime_helpers import get_utc_timestamp_ns, ns_to_ms
from letta.local_llm.constants import INNER_THOUGHTS_KWARG
from letta.log import get_logger
from letta.schemas.letta_message import (
@@ -131,7 +131,7 @@ class AnthropicStreamingInterface:
now = get_utc_timestamp_ns()
ttft_ns = now - provider_request_start_timestamp_ns
ttft_span.add_event(
name="anthropic_time_to_first_token_ms", attributes={"anthropic_time_to_first_token_ms": ttft_ns // 1_000_000}
name="anthropic_time_to_first_token_ms", attributes={"anthropic_time_to_first_token_ms": ns_to_ms(ttft_ns)}
)
first_chunk = False

View File

@@ -5,7 +5,7 @@ from openai import AsyncStream
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
from letta.helpers.datetime_helpers import get_utc_timestamp_ns
from letta.helpers.datetime_helpers import get_utc_timestamp_ns, ns_to_ms
from letta.schemas.letta_message import AssistantMessage, LettaMessage, ReasoningMessage, ToolCallDelta, ToolCallMessage
from letta.schemas.letta_message_content import TextContent
from letta.schemas.message import Message
@@ -85,7 +85,7 @@ class OpenAIStreamingInterface:
now = get_utc_timestamp_ns()
ttft_ns = now - provider_request_start_timestamp_ns
ttft_span.add_event(
name="openai_time_to_first_token_ms", attributes={"openai_time_to_first_token_ms": ttft_ns // 1_000_000}
name="openai_time_to_first_token_ms", attributes={"openai_time_to_first_token_ms": ns_to_ms(ttft_ns)}
)
first_chunk = False

View File

@@ -26,6 +26,7 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
from letta.log import get_logger
from letta.otel.tracing import log_event
from letta.schemas.enums import ProviderCategory
from letta.schemas.message import Message as _Message
from letta.schemas.message import MessageRole as _MessageRole
@@ -45,7 +46,6 @@ from letta.services.provider_manager import ProviderManager
from letta.services.user_manager import UserManager
from letta.settings import model_settings
from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
from letta.tracing import log_event
logger = get_logger(__name__)

View File

@@ -27,6 +27,7 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_in
from letta.llm_api.llm_client_base import LLMClientBase
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
from letta.log import get_logger
from letta.otel.tracing import trace_method
from letta.schemas.enums import ProviderCategory
from letta.schemas.llm_config import LLMConfig
from letta.schemas.message import Message as PydanticMessage
@@ -36,7 +37,6 @@ from letta.schemas.openai.chat_completion_response import Message as ChoiceMessa
from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
from letta.services.provider_manager import ProviderManager
from letta.settings import model_settings
from letta.tracing import trace_method
DUMMY_FIRST_USER_MESSAGE = "User initializing bootup sequence."

View File

@@ -12,12 +12,12 @@ from letta.llm_api.llm_client_base import LLMClientBase
from letta.local_llm.json_parser import clean_json_string_extra_backslash
from letta.local_llm.utils import count_tokens
from letta.log import get_logger
from letta.otel.tracing import trace_method
from letta.schemas.llm_config import LLMConfig
from letta.schemas.message import Message as PydanticMessage
from letta.schemas.openai.chat_completion_request import Tool
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
from letta.settings import model_settings, settings
from letta.tracing import trace_method
from letta.utils import get_tool_call_id
logger = get_logger(__name__)

View File

@@ -26,6 +26,7 @@ from letta.local_llm.chat_completion_proxy import get_chat_completion
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
from letta.orm.user import User
from letta.otel.tracing import log_event, trace_method
from letta.schemas.enums import ProviderCategory
from letta.schemas.llm_config import LLMConfig
from letta.schemas.message import Message
@@ -35,7 +36,6 @@ from letta.schemas.provider_trace import ProviderTraceCreate
from letta.services.telemetry_manager import TelemetryManager
from letta.settings import ModelSettings
from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
from letta.tracing import log_event, trace_method
LLM_API_PROVIDER_OPTIONS = ["openai", "azure", "anthropic", "google_ai", "cohere", "local", "groq", "deepseek"]

View File

@@ -6,13 +6,13 @@ from openai import AsyncStream, Stream
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
from letta.errors import LLMError
from letta.otel.tracing import log_event, trace_method
from letta.schemas.embedding_config import EmbeddingConfig
from letta.schemas.llm_config import LLMConfig
from letta.schemas.message import Message
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
from letta.schemas.provider_trace import ProviderTraceCreate
from letta.services.telemetry_manager import TelemetryManager
from letta.tracing import log_event, trace_method
if TYPE_CHECKING:
from letta.orm import User

View File

@@ -19,6 +19,7 @@ from letta.llm_api.openai_client import (
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
from letta.log import get_logger
from letta.otel.tracing import log_event
from letta.schemas.llm_config import LLMConfig
from letta.schemas.message import Message as _Message
from letta.schemas.message import MessageRole as _MessageRole
@@ -36,7 +37,6 @@ from letta.schemas.openai.chat_completion_response import (
)
from letta.schemas.openai.embedding_response import EmbeddingResponse
from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
from letta.tracing import log_event
from letta.utils import get_tool_call_id, smart_urljoin
logger = get_logger(__name__)

View File

@@ -23,6 +23,7 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_st
from letta.llm_api.llm_client_base import LLMClientBase
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
from letta.log import get_logger
from letta.otel.tracing import trace_method
from letta.schemas.embedding_config import EmbeddingConfig
from letta.schemas.enums import ProviderCategory, ProviderType
from letta.schemas.llm_config import LLMConfig
@@ -34,7 +35,6 @@ from letta.schemas.openai.chat_completion_request import Tool as OpenAITool
from letta.schemas.openai.chat_completion_request import ToolFunctionChoice, cast_message_to_subtype
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
from letta.settings import model_settings
from letta.tracing import trace_method
logger = get_logger(__name__)

View File

@@ -20,9 +20,9 @@ from letta.local_llm.utils import count_tokens, get_available_wrappers
from letta.local_llm.vllm.api import get_vllm_completion
from letta.local_llm.webui.api import get_webui_completion
from letta.local_llm.webui.legacy_api import get_webui_completion as get_webui_completion_legacy
from letta.otel.tracing import log_event
from letta.prompts.gpt_summarize import SYSTEM as SUMMARIZE_SYSTEM_MESSAGE
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, Message, ToolCall, UsageStatistics
from letta.tracing import log_event
from letta.utils import get_tool_call_id
has_shown_warning = False

View File

@@ -3,6 +3,7 @@ from typing import TYPE_CHECKING, Callable, Dict, List
from letta.constants import MESSAGE_SUMMARY_REQUEST_ACK
from letta.llm_api.llm_api_tools import create
from letta.llm_api.llm_client import LLMClient
from letta.otel.tracing import trace_method
from letta.prompts.gpt_summarize import SYSTEM as SUMMARY_PROMPT_SYSTEM
from letta.schemas.agent import AgentState
from letta.schemas.enums import MessageRole
@@ -10,7 +11,6 @@ from letta.schemas.letta_message_content import TextContent
from letta.schemas.memory import Memory
from letta.schemas.message import Message
from letta.settings import summarizer_settings
from letta.tracing import trace_method
from letta.utils import count_tokens, printd
if TYPE_CHECKING:

0
letta/otel/__init__.py Normal file
View File

26
letta/otel/context.py Normal file
View File

@@ -0,0 +1,26 @@
from contextvars import ContextVar
from typing import Any, Dict
# Create context var at module level (outside middleware)
request_attributes: ContextVar[Dict[str, Any]] = ContextVar("request_attributes", default={})
# Helper functions
def set_ctx_attributes(attrs: Dict[str, Any]):
"""Set attributes in current context"""
current = request_attributes.get()
new_attrs = {**current, **attrs}
request_attributes.set(new_attrs)
def add_ctx_attribute(key: str, value: Any):
"""Add single attribute to current context"""
current = request_attributes.get()
new_attrs = {**current, key: value}
request_attributes.set(new_attrs)
def get_ctx_attributes() -> Dict[str, Any]:
"""Get all attributes from current context"""
print(request_attributes.get())
return request_attributes.get()

0
letta/otel/events.py Normal file
View File

View File

@@ -0,0 +1,122 @@
from dataclasses import dataclass, field
from functools import partial
from opentelemetry import metrics
from opentelemetry.metrics import Counter, Histogram
from letta.helpers.singleton import singleton
from letta.otel.metrics import get_letta_meter
@singleton
@dataclass(frozen=True)
class MetricRegistry:
"""Registry of all application metrics
Metrics are composed of the following:
- name
- description
- unit: UCUM unit of the metric (i.e. 'By' for bytes, 'ms' for milliseconds, '1' for count
- bucket_bounds (list[float] | None): the explicit bucket bounds for histogram metrics
and instruments are of types Counter, Histogram, and Gauge
The relationship between the various models is as follows:
project_id -N:1-> base_template_id -N:1-> template_id -N:1-> agent_id
agent_id -1:1+-> model_name
agent_id -1:N -> tool_name
"""
Instrument = Counter | Histogram
_metrics: dict[str, Instrument] = field(default_factory=dict, init=False)
_meter: metrics.Meter = field(init=False)
def __post_init__(self):
object.__setattr__(self, "_meter", get_letta_meter())
def _get_or_create_metric(self, name: str, factory):
"""Lazy initialization of metrics."""
if name not in self._metrics:
self._metrics[name] = factory()
return self._metrics[name]
# (includes base attributes: project, template_base, template, agent)
@property
def user_message_counter(self) -> Counter:
return self._get_or_create_metric(
"count_user_message",
partial(
self._meter.create_counter,
name="count_user_message",
description="Counts the number of messages sent by the user",
unit="1",
),
)
# (includes tool_name, tool_execution_success, & step_id on failure)
@property
def tool_execution_counter(self) -> Counter:
return self._get_or_create_metric(
"count_tool_execution",
partial(self._meter.create_counter, name="count_tool_execution", description="Counts the number of tools executed.", unit="1"),
)
# project_id + model
@property
def ttft_ms_histogram(self) -> Histogram:
return self._get_or_create_metric(
"hist_ttft_ms",
partial(self._meter.create_histogram, name="hist_ttft_ms", description="Histogram for the Time to First Token (ms)", unit="ms"),
)
# (includes model name)
@property
def llm_execution_time_ms_histogram(self) -> Histogram:
return self._get_or_create_metric(
"hist_llm_execution_time_ms",
partial(
self._meter.create_histogram,
name="hist_llm_execution_time_ms",
description="Histogram for LLM execution time (ms)",
unit="ms",
),
)
# (includes tool name)
@property
def tool_execution_time_ms_histogram(self) -> Histogram:
return self._get_or_create_metric(
"hist_tool_execution_time_ms",
partial(
self._meter.create_histogram,
name="hist_tool_execution_time_ms",
description="Histogram for tool execution time (ms)",
unit="ms",
),
)
# TODO (cliandy): instrument this
@property
def message_cost(self) -> Histogram:
return self._get_or_create_metric(
"hist_message_cost_usd",
partial(
self._meter.create_histogram,
name="hist_message_cost_usd",
description="Histogram for cost of messages (usd) per step",
unit="usd",
),
)
# (includes model name)
@property
def message_output_tokens(self) -> Histogram:
return self._get_or_create_metric(
"hist_message_output_tokens",
partial(
self._meter.create_histogram,
name="hist_message_output_tokens",
description="Histogram for output tokens generated by LLM per step",
unit="1",
),
)

66
letta/otel/metrics.py Normal file
View File

@@ -0,0 +1,66 @@
from fastapi import FastAPI, Request
from opentelemetry import metrics
from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
from opentelemetry.metrics import NoOpMeter
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
from letta.log import get_logger
from letta.otel.context import add_ctx_attribute
from letta.otel.resource import get_resource, is_pytest_environment
logger = get_logger(__name__)
_meter: metrics.Meter = NoOpMeter("noop")
_is_metrics_initialized: bool = False
async def _otel_metric_middleware(request: Request, call_next):
if not _is_metrics_initialized:
return await call_next(request)
header_attributes = {
"x-organization-id": "organization.id",
"x-project-id": "project.id",
"x-base-template-id": "base_template.id",
"x-template-id": "template.id",
"x-agent-id": "agent.id",
}
try:
for header_key, otel_key in header_attributes.items():
header_value = request.headers.get(header_key)
if header_value:
add_ctx_attribute(otel_key, header_value)
return await call_next(request)
except Exception:
raise
def setup_metrics(
endpoint: str,
app: FastAPI | None = None,
service_name: str = "memgpt-server",
) -> None:
if is_pytest_environment():
return
assert endpoint
global _is_metrics_initialized, _meter
otlp_metric_exporter = OTLPMetricExporter(endpoint=endpoint)
metric_reader = PeriodicExportingMetricReader(exporter=otlp_metric_exporter)
meter_provider = MeterProvider(resource=get_resource(service_name), metric_readers=[metric_reader])
metrics.set_meter_provider(meter_provider)
_meter = metrics.get_meter(__name__)
if app:
app.middleware("http")(_otel_metric_middleware)
_is_metrics_initialized = True
def get_letta_meter() -> metrics.Meter | None:
"""Returns the global letta meter if metrics are initialized."""
if not _is_metrics_initialized or isinstance(_meter, NoOpMeter):
logger.warning("Metrics are not initialized or meter is not available.")
return _meter

26
letta/otel/resource.py Normal file
View File

@@ -0,0 +1,26 @@
import os
import sys
import uuid
from opentelemetry.sdk.resources import Resource
from letta import __version__ as letta_version
_resources = {}
def get_resource(service_name: str) -> Resource:
_env = os.getenv("LETTA_ENVIRONMENT")
if service_name not in _resources:
resource_dict = {
"service.name": service_name,
"letta.version": letta_version,
}
if _env != "PRODUCTION":
resource_dict["device.id"] = uuid.getnode() # MAC address as unique device identifier,
_resources[(service_name, _env)] = Resource.create(resource_dict)
return _resources[(service_name, _env)]
def is_pytest_environment():
return "pytest" in sys.modules

View File

@@ -1,6 +1,5 @@
import inspect
import re
import sys
import time
from functools import wraps
from typing import Any, Dict, List, Optional
@@ -11,15 +10,18 @@ from fastapi.responses import JSONResponse
from opentelemetry import trace
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.instrumentation.requests import RequestsInstrumentor
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.trace import Status, StatusCode
from letta import __version__ as letta_version
from letta.log import get_logger
from letta.otel.resource import get_resource, is_pytest_environment
from letta.settings import settings
logger = get_logger(__name__) # TODO: set up logger config for this
tracer = trace.get_tracer(__name__)
_is_tracing_initialized = False
_excluded_v1_endpoints_regex: List[str] = [
# "^GET /v1/agents/(?P<agent_id>[^/]+)/messages$",
# "^GET /v1/agents/(?P<agent_id>[^/]+)/context$",
@@ -30,11 +32,7 @@ _excluded_v1_endpoints_regex: List[str] = [
]
def is_pytest_environment():
return "pytest" in sys.modules
async def trace_request_middleware(request: Request, call_next):
async def _trace_request_middleware(request: Request, call_next):
if not _is_tracing_initialized:
return await call_next(request)
initial_span_name = f"{request.method} {request.url.path}"
@@ -56,7 +54,7 @@ async def trace_request_middleware(request: Request, call_next):
raise
async def update_trace_attributes(request: Request):
async def _update_trace_attributes(request: Request):
"""Dependency to update trace attributes after FastAPI has processed the request"""
if not _is_tracing_initialized:
return
@@ -78,35 +76,19 @@ async def update_trace_attributes(request: Request):
for key, value in request.path_params.items():
span.set_attribute(f"http.{key}", value)
# Add user ID if available
user_id = request.headers.get("user_id")
if user_id:
span.set_attribute("user.id", user_id)
# Add organization_id if available
organization_id = request.headers.get("x-organization-id")
if organization_id:
span.set_attribute("organization.id", organization_id)
# Add project_id if available
project_id = request.headers.get("x-project-id")
if project_id:
span.set_attribute("project.id", project_id)
# Add agent_id if available
agent_id = request.headers.get("x-agent-id")
if agent_id:
span.set_attribute("agent.id", agent_id)
# Add template_id if available
template_id = request.headers.get("x-template-id")
if template_id:
span.set_attribute("template.id", template_id)
# Add base_template_id if available
base_template_id = request.headers.get("x-base-template-id")
if base_template_id:
span.set_attribute("base_template.id", base_template_id)
# Add the following headers to span if available
header_attributes = {
"user_id": "user.id",
"x-organization-id": "organization.id",
"x-project-id": "project.id",
"x-agent-id": "agent.id",
"x-template-id": "template.id",
"x-base-template-id": "base_template.id",
}
for header_key, span_key in header_attributes.items():
header_value = request.headers.get(header_key)
if header_value:
span.set_attribute(span_key, header_value)
# Add request body if available
try:
@@ -117,7 +99,7 @@ async def update_trace_attributes(request: Request):
pass
async def trace_error_handler(_request: Request, exc: Exception) -> JSONResponse:
async def _trace_error_handler(_request: Request, exc: Exception) -> JSONResponse:
status_code = getattr(exc, "status_code", 500)
error_msg = str(exc)
@@ -142,49 +124,44 @@ def setup_tracing(
) -> None:
if is_pytest_environment():
return
assert endpoint
global _is_tracing_initialized
provider = TracerProvider(resource=Resource.create({"service.name": service_name}))
import uuid
tracer_provider = TracerProvider(resource=get_resource(service_name))
tracer_provider.add_span_processor(BatchSpanProcessor(OTLPSpanExporter(endpoint=endpoint)))
_is_tracing_initialized = True
trace.set_tracer_provider(tracer_provider)
provider = TracerProvider(
resource=Resource.create(
{
"service.name": service_name,
"device.id": uuid.getnode(), # MAC address as unique device identifier,
"letta.version": letta_version,
}
)
)
if endpoint:
provider.add_span_processor(BatchSpanProcessor(OTLPSpanExporter(endpoint=endpoint)))
_is_tracing_initialized = True
trace.set_tracer_provider(provider)
# Instrumentors (e.g., RequestsInstrumentor)
def requests_callback(span: trace.Span, _: Any, response: Any) -> None:
if hasattr(response, "status_code"):
span.set_status(Status(StatusCode.OK if response.status_code < 400 else StatusCode.ERROR))
def requests_callback(span: trace.Span, _: Any, response: Any) -> None:
if hasattr(response, "status_code"):
span.set_status(Status(StatusCode.OK if response.status_code < 400 else StatusCode.ERROR))
RequestsInstrumentor().instrument(response_hook=requests_callback)
RequestsInstrumentor().instrument(response_hook=requests_callback)
if settings.sqlalchemy_tracing:
from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
if app:
# Add middleware first
app.middleware("http")(trace_request_middleware)
SQLAlchemyInstrumentor().instrument()
# Add dependency to v1 routes
from letta.server.rest_api.routers.v1 import ROUTERS as v1_routes
if app:
# Add middleware first
app.middleware("http")(_trace_request_middleware)
for router in v1_routes:
for route in router.routes:
full_path = ((next(iter(route.methods)) + " ") if route.methods else "") + "/v1" + route.path
if not any(re.match(regex, full_path) for regex in _excluded_v1_endpoints_regex):
route.dependencies.append(Depends(update_trace_attributes))
# Add dependency to v1 routes
from letta.server.rest_api.routers.v1 import ROUTERS as V1_ROUTES
# Register exception handlers
app.exception_handler(HTTPException)(trace_error_handler)
app.exception_handler(RequestValidationError)(trace_error_handler)
app.exception_handler(Exception)(trace_error_handler)
for router in V1_ROUTES:
for route in router.routes:
full_path = ((next(iter(route.methods)) + " ") if route.methods else "") + "/v1" + route.path
if not any(re.match(regex, full_path) for regex in _excluded_v1_endpoints_regex):
route.dependencies.append(Depends(_update_trace_attributes))
# Register exception handlers for tracing
app.exception_handler(HTTPException)(_trace_error_handler)
app.exception_handler(RequestValidationError)(_trace_error_handler)
app.exception_handler(Exception)(_trace_error_handler)
def trace_method(func):

View File

@@ -13,8 +13,8 @@ from sqlalchemy.orm import sessionmaker
from letta.config import LettaConfig
from letta.log import get_logger
from letta.otel.tracing import trace_method
from letta.settings import settings
from letta.tracing import trace_method
logger = get_logger(__name__)

View File

@@ -256,13 +256,15 @@ def create_application() -> "FastAPI":
print(f"▶ Using OTLP tracing with endpoint: {otlp_endpoint}")
env_name_suffix = os.getenv("ENV_NAME")
service_name = f"letta-server-{env_name_suffix.lower()}" if env_name_suffix else "letta-server"
from letta.tracing import setup_tracing
from letta.otel.metrics import setup_metrics
from letta.otel.tracing import setup_tracing
setup_tracing(
endpoint=otlp_endpoint,
app=app,
service_name=service_name,
)
setup_metrics(endpoint=otlp_endpoint, app=app, service_name=service_name)
for route in v1_routes:
app.include_router(route, prefix=API_PREFIX)
@@ -339,14 +341,14 @@ def start_server(
target="letta.server.rest_api.app:app",
# factory=True,
interface="asgi",
address=host or "localhost",
address=host or "127.0.0.1", # Note granian address must be an ip address
port=port or REST_DEFAULT_PORT,
workers=settings.uvicorn_workers,
# threads=
reload=reload or settings.uvicorn_reload,
reload_ignore_patterns=["openapi_letta.json"],
reload_ignore_worker_failure=True,
reload_tick=100,
reload_tick=4000, # set to 4s to prevent crashing on weird state
# log_level="info"
ssl_keyfile="certs/localhost-key.pem",
ssl_cert="certs/localhost.pem",
@@ -380,14 +382,14 @@ def start_server(
target="letta.server.rest_api.app:app",
# factory=True,
interface="asgi",
address=host or "localhost",
address=host or "127.0.0.1", # Note granian address must be an ip address
port=port or REST_DEFAULT_PORT,
workers=settings.uvicorn_workers,
# threads=
reload=reload or settings.uvicorn_reload,
reload_ignore_patterns=["openapi_letta.json"],
reload_ignore_worker_failure=True,
reload_tick=100,
reload_tick=4000, # set to 4s to prevent crashing on weird state
# log_level="info"
).serve()
else:

View File

@@ -17,6 +17,8 @@ from letta.groups.sleeptime_multi_agent_v2 import SleeptimeMultiAgentV2
from letta.helpers.datetime_helpers import get_utc_timestamp_ns
from letta.log import get_logger
from letta.orm.errors import NoResultFound
from letta.otel.context import get_ctx_attributes
from letta.otel.metric_registry import MetricRegistry
from letta.schemas.agent import AgentState, AgentType, CreateAgent, UpdateAgent
from letta.schemas.block import Block, BlockUpdate
from letta.schemas.group import Group
@@ -663,6 +665,8 @@ async def send_message(
Process a user message and return the agent's response.
This endpoint accepts a message from a user and processes it through the agent.
"""
MetricRegistry().user_message_counter.add(1, get_ctx_attributes())
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
request_start_timestamp_ns = get_utc_timestamp_ns()
# TODO: This is redundant, remove soon
@@ -741,7 +745,8 @@ async def send_message_streaming(
This endpoint accepts a message from a user and processes it through the agent.
It will stream the steps of the response always, and stream the tokens if 'stream_tokens' is set to True.
"""
request_start_timestamp_ns = get_utc_timestamp_ns()
MetricRegistry().user_message_counter.add(1, get_ctx_attributes())
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
# TODO: This is redundant, remove soon
agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group"])
@@ -877,6 +882,7 @@ async def send_message_async(
Asynchronously process a user message and return a run object.
The actual processing happens in the background, and the status can be checked using the run ID.
"""
MetricRegistry().user_message_counter.add(1, get_ctx_attributes())
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
# Create a new job

View File

@@ -15,9 +15,12 @@ from pydantic import BaseModel
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG, FUNC_FAILED_HEARTBEAT_MESSAGE, REQ_HEARTBEAT_MESSAGE
from letta.errors import ContextWindowExceededError, RateLimitExceededError
from letta.helpers.datetime_helpers import get_utc_time, get_utc_timestamp_ns
from letta.helpers.datetime_helpers import get_utc_time, get_utc_timestamp_ns, ns_to_ms
from letta.helpers.message_helper import convert_message_creates_to_messages
from letta.log import get_logger
from letta.otel.context import get_ctx_attributes
from letta.otel.metric_registry import MetricRegistry
from letta.otel.tracing import tracer
from letta.schemas.enums import MessageRole
from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
from letta.schemas.llm_config import LLMConfig
@@ -27,7 +30,6 @@ from letta.schemas.usage import LettaUsageStatistics
from letta.schemas.user import User
from letta.server.rest_api.interface import StreamingServerInterface
from letta.system import get_heartbeat, package_function_response
from letta.tracing import tracer
if TYPE_CHECKING:
from letta.server.server import SyncServer
@@ -81,8 +83,12 @@ async def sse_async_generator(
if first_chunk and ttft_span is not None:
now = get_utc_timestamp_ns()
ttft_ns = now - request_start_timestamp_ns
ttft_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ttft_ns // 1_000_000})
ttft_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ns_to_ms(ttft_ns)})
ttft_span.end()
metric_attributes = get_ctx_attributes()
if llm_config:
metric_attributes["model.name"] = llm_config.model
MetricRegistry().ttft_ms_histogram.record(ns_to_ms(ttft_ns), metric_attributes)
first_chunk = False
# yield f"data: {json.dumps(chunk)}\n\n"

View File

@@ -34,6 +34,7 @@ from letta.interface import AgentInterface # abstract
from letta.interface import CLIInterface # for printing to terminal
from letta.log import get_logger
from letta.orm.errors import NoResultFound
from letta.otel.tracing import log_event, trace_method
from letta.prompts.gpt_system import get_system_text
from letta.schemas.agent import AgentState, AgentType, CreateAgent, UpdateAgent
from letta.schemas.block import Block, BlockUpdate, CreateBlock
@@ -101,7 +102,6 @@ from letta.services.tool_executor.tool_execution_manager import ToolExecutionMan
from letta.services.tool_manager import ToolManager
from letta.services.user_manager import UserManager
from letta.settings import model_settings, settings, tool_settings
from letta.tracing import log_event, trace_method
from letta.utils import get_friendly_error_msg, get_persona_text, make_key
config = LettaConfig.load()

View File

@@ -37,6 +37,7 @@ from letta.orm.errors import NoResultFound
from letta.orm.sandbox_config import AgentEnvironmentVariable
from letta.orm.sandbox_config import AgentEnvironmentVariable as AgentEnvironmentVariableModel
from letta.orm.sqlalchemy_base import AccessType
from letta.otel.tracing import trace_method
from letta.schemas.agent import AgentState as PydanticAgentState
from letta.schemas.agent import AgentType, CreateAgent, UpdateAgent, get_prompt_template_for_agent_type
from letta.schemas.block import DEFAULT_BLOCKS
@@ -86,7 +87,6 @@ from letta.services.message_manager import MessageManager
from letta.services.passage_manager import PassageManager
from letta.services.source_manager import SourceManager
from letta.services.tool_manager import ToolManager
from letta.tracing import trace_method
from letta.utils import enforce_types, united_diff
logger = get_logger(__name__)

View File

@@ -9,12 +9,12 @@ from letta.orm.block import Block as BlockModel
from letta.orm.block_history import BlockHistory
from letta.orm.enums import ActorType
from letta.orm.errors import NoResultFound
from letta.otel.tracing import trace_method
from letta.schemas.agent import AgentState as PydanticAgentState
from letta.schemas.block import Block as PydanticBlock
from letta.schemas.block import BlockUpdate
from letta.schemas.user import User as PydanticUser
from letta.server.db import db_registry
from letta.tracing import trace_method
from letta.utils import enforce_types
logger = get_logger(__name__)

View File

@@ -5,11 +5,11 @@ from sqlalchemy import and_, func, select, update
from letta.orm.errors import NoResultFound
from letta.orm.files_agents import FileAgent as FileAgentModel
from letta.otel.tracing import trace_method
from letta.schemas.block import Block as PydanticBlock
from letta.schemas.file import FileAgent as PydanticFileAgent
from letta.schemas.user import User as PydanticUser
from letta.server.db import db_registry
from letta.tracing import trace_method
from letta.utils import enforce_types

View File

@@ -7,13 +7,13 @@ from letta.orm.agent import Agent as AgentModel
from letta.orm.errors import NoResultFound
from letta.orm.group import Group as GroupModel
from letta.orm.message import Message as MessageModel
from letta.otel.tracing import trace_method
from letta.schemas.group import Group as PydanticGroup
from letta.schemas.group import GroupCreate, GroupUpdate, ManagerType
from letta.schemas.letta_message import LettaMessage
from letta.schemas.message import Message as PydanticMessage
from letta.schemas.user import User as PydanticUser
from letta.server.db import db_registry
from letta.tracing import trace_method
from letta.utils import enforce_types

View File

@@ -16,6 +16,7 @@ from letta.orm.agents_tags import AgentsTags
from letta.orm.errors import NoResultFound
from letta.orm.identity import Identity
from letta.orm.sqlite_functions import adapt_array
from letta.otel.tracing import trace_method
from letta.prompts import gpt_system
from letta.schemas.agent import AgentState, AgentType
from letta.schemas.embedding_config import EmbeddingConfig
@@ -27,7 +28,6 @@ from letta.schemas.tool_rule import ToolRule
from letta.schemas.user import User
from letta.settings import settings
from letta.system import get_initial_boot_messages, get_login_event, package_function_response
from letta.tracing import trace_method
# Static methods

View File

@@ -7,11 +7,11 @@ from sqlalchemy.exc import NoResultFound
from letta.orm.agent import Agent as AgentModel
from letta.orm.block import Block as BlockModel
from letta.orm.identity import Identity as IdentityModel
from letta.otel.tracing import trace_method
from letta.schemas.identity import Identity as PydanticIdentity
from letta.schemas.identity import IdentityCreate, IdentityProperty, IdentityType, IdentityUpdate, IdentityUpsert
from letta.schemas.user import User as PydanticUser
from letta.server.db import db_registry
from letta.tracing import trace_method
from letta.utils import enforce_types

View File

@@ -14,6 +14,7 @@ from letta.orm.message import Message as MessageModel
from letta.orm.sqlalchemy_base import AccessType
from letta.orm.step import Step
from letta.orm.step import Step as StepModel
from letta.otel.tracing import trace_method
from letta.schemas.enums import JobStatus, MessageRole
from letta.schemas.job import BatchJob as PydanticBatchJob
from letta.schemas.job import Job as PydanticJob
@@ -25,7 +26,6 @@ from letta.schemas.step import Step as PydanticStep
from letta.schemas.usage import LettaUsageStatistics
from letta.schemas.user import User as PydanticUser
from letta.server.db import db_registry
from letta.tracing import trace_method
from letta.utils import enforce_types

View File

@@ -9,6 +9,7 @@ from letta.log import get_logger
from letta.orm import Message as MessageModel
from letta.orm.llm_batch_items import LLMBatchItem
from letta.orm.llm_batch_job import LLMBatchJob
from letta.otel.tracing import trace_method
from letta.schemas.agent import AgentStepState
from letta.schemas.enums import AgentStepStatus, JobStatus, ProviderType
from letta.schemas.llm_batch_job import LLMBatchItem as PydanticLLMBatchItem
@@ -17,7 +18,6 @@ from letta.schemas.llm_config import LLMConfig
from letta.schemas.message import Message as PydanticMessage
from letta.schemas.user import User as PydanticUser
from letta.server.db import db_registry
from letta.tracing import trace_method
from letta.utils import enforce_types
logger = get_logger(__name__)

View File

@@ -7,13 +7,13 @@ from letta.log import get_logger
from letta.orm.agent import Agent as AgentModel
from letta.orm.errors import NoResultFound
from letta.orm.message import Message as MessageModel
from letta.otel.tracing import trace_method
from letta.schemas.enums import MessageRole
from letta.schemas.letta_message import LettaMessageUpdateUnion
from letta.schemas.message import Message as PydanticMessage
from letta.schemas.message import MessageUpdate
from letta.schemas.user import User as PydanticUser
from letta.server.db import db_registry
from letta.tracing import trace_method
from letta.utils import enforce_types
logger = get_logger(__name__)

View File

@@ -2,10 +2,10 @@ from typing import List, Optional
from letta.orm.errors import NoResultFound
from letta.orm.organization import Organization as OrganizationModel
from letta.otel.tracing import trace_method
from letta.schemas.organization import Organization as PydanticOrganization
from letta.schemas.organization import OrganizationUpdate
from letta.server.db import db_registry
from letta.tracing import trace_method
from letta.utils import enforce_types

View File

@@ -11,11 +11,11 @@ from letta.constants import MAX_EMBEDDING_DIM
from letta.embeddings import embedding_model, parse_and_chunk_text
from letta.orm.errors import NoResultFound
from letta.orm.passage import AgentPassage, SourcePassage
from letta.otel.tracing import trace_method
from letta.schemas.agent import AgentState
from letta.schemas.passage import Passage as PydanticPassage
from letta.schemas.user import User as PydanticUser
from letta.server.db import db_registry
from letta.tracing import trace_method
from letta.utils import enforce_types

View File

@@ -1,7 +1,7 @@
import threading
from collections import defaultdict
from letta.tracing import trace_method
from letta.otel.tracing import trace_method
class PerAgentLockManager:

View File

@@ -1,12 +1,12 @@
from typing import List, Optional, Union
from letta.orm.provider import Provider as ProviderModel
from letta.otel.tracing import trace_method
from letta.schemas.enums import ProviderCategory, ProviderType
from letta.schemas.providers import Provider as PydanticProvider
from letta.schemas.providers import ProviderCheck, ProviderCreate, ProviderUpdate
from letta.schemas.user import User as PydanticUser
from letta.server.db import db_registry
from letta.tracing import trace_method
from letta.utils import enforce_types

View File

@@ -5,6 +5,7 @@ from letta.log import get_logger
from letta.orm.errors import NoResultFound
from letta.orm.sandbox_config import SandboxConfig as SandboxConfigModel
from letta.orm.sandbox_config import SandboxEnvironmentVariable as SandboxEnvVarModel
from letta.otel.tracing import trace_method
from letta.schemas.environment_variables import SandboxEnvironmentVariable as PydanticEnvVar
from letta.schemas.environment_variables import SandboxEnvironmentVariableCreate, SandboxEnvironmentVariableUpdate
from letta.schemas.sandbox_config import LocalSandboxConfig
@@ -12,7 +13,6 @@ from letta.schemas.sandbox_config import SandboxConfig as PydanticSandboxConfig
from letta.schemas.sandbox_config import SandboxConfigCreate, SandboxConfigUpdate, SandboxType
from letta.schemas.user import User as PydanticUser
from letta.server.db import db_registry
from letta.tracing import trace_method
from letta.utils import enforce_types, printd
logger = get_logger(__name__)

View File

@@ -10,13 +10,13 @@ from letta.orm.file import FileContent as FileContentModel
from letta.orm.file import FileMetadata as FileMetadataModel
from letta.orm.source import Source as SourceModel
from letta.orm.sqlalchemy_base import AccessType
from letta.otel.tracing import trace_method
from letta.schemas.agent import AgentState as PydanticAgentState
from letta.schemas.file import FileMetadata as PydanticFileMetadata
from letta.schemas.source import Source as PydanticSource
from letta.schemas.source import SourceUpdate
from letta.schemas.user import User as PydanticUser
from letta.server.db import db_registry
from letta.tracing import trace_method
from letta.utils import enforce_types, printd

View File

@@ -5,16 +5,16 @@ from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import Session
from letta.helpers.singleton import singleton
from letta.orm.errors import NoResultFound
from letta.orm.job import Job as JobModel
from letta.orm.sqlalchemy_base import AccessType
from letta.orm.step import Step as StepModel
from letta.otel.tracing import get_trace_id, trace_method
from letta.schemas.openai.chat_completion_response import UsageStatistics
from letta.schemas.step import Step as PydanticStep
from letta.schemas.user import User as PydanticUser
from letta.server.db import db_registry
from letta.services.helpers.noop_helper import singleton
from letta.tracing import get_trace_id, trace_method
from letta.utils import enforce_types

View File

@@ -6,11 +6,11 @@ from typing import List, Optional, Tuple, Union
from letta.agents.ephemeral_summary_agent import EphemeralSummaryAgent
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
from letta.log import get_logger
from letta.otel.tracing import trace_method
from letta.schemas.enums import MessageRole
from letta.schemas.letta_message_content import TextContent
from letta.schemas.message import Message, MessageCreate
from letta.services.summarizer.enums import SummarizationMode
from letta.tracing import trace_method
logger = get_logger(__name__)

View File

@@ -1,11 +1,11 @@
from letta.helpers.json_helpers import json_dumps, json_loads
from letta.helpers.singleton import singleton
from letta.orm.provider_trace import ProviderTrace as ProviderTraceModel
from letta.schemas.provider_trace import ProviderTrace as PydanticProviderTrace
from letta.schemas.provider_trace import ProviderTraceCreate
from letta.schemas.step import Step as PydanticStep
from letta.schemas.user import User as PydanticUser
from letta.server.db import db_registry
from letta.services.helpers.noop_helper import singleton
from letta.utils import enforce_types

View File

@@ -3,6 +3,7 @@ from textwrap import shorten
from typing import Any, Dict, Literal, Optional
from letta.constants import WEB_SEARCH_CLIP_CONTENT, WEB_SEARCH_INCLUDE_SCORE, WEB_SEARCH_SEPARATOR
from letta.otel.tracing import trace_method
from letta.schemas.agent import AgentState
from letta.schemas.sandbox_config import SandboxConfig
from letta.schemas.tool import Tool
@@ -10,7 +11,6 @@ from letta.schemas.tool_execution_result import ToolExecutionResult
from letta.schemas.user import User
from letta.services.tool_executor.tool_executor_base import ToolExecutor
from letta.settings import tool_settings
from letta.tracing import trace_method
class LettaBuiltinToolExecutor(ToolExecutor):

View File

@@ -3,13 +3,13 @@ from typing import Any, Dict, Optional
from letta.constants import COMPOSIO_ENTITY_ENV_VAR_KEY
from letta.functions.composio_helpers import execute_composio_action_async, generate_composio_action_from_func_name
from letta.helpers.composio_helpers import get_composio_api_key_async
from letta.otel.tracing import trace_method
from letta.schemas.agent import AgentState
from letta.schemas.sandbox_config import SandboxConfig
from letta.schemas.tool import Tool
from letta.schemas.tool_execution_result import ToolExecutionResult
from letta.schemas.user import User
from letta.services.tool_executor.tool_executor_base import ToolExecutor
from letta.tracing import trace_method
class ExternalComposioToolExecutor(ToolExecutor):

View File

@@ -1,6 +1,7 @@
from typing import Any, Dict, Optional
from letta.constants import MCP_TOOL_TAG_NAME_PREFIX
from letta.otel.tracing import trace_method
from letta.schemas.agent import AgentState
from letta.schemas.sandbox_config import SandboxConfig
from letta.schemas.tool import Tool
@@ -8,7 +9,6 @@ from letta.schemas.tool_execution_result import ToolExecutionResult
from letta.schemas.user import User
from letta.services.mcp_manager import MCPManager
from letta.services.tool_executor.tool_executor_base import ToolExecutor
from letta.tracing import trace_method
class ExternalMCPToolExecutor(ToolExecutor):

View File

@@ -2,8 +2,12 @@ import traceback
from typing import Any, Dict, Optional, Type
from letta.constants import FUNCTION_RETURN_VALUE_TRUNCATED
from letta.helpers.datetime_helpers import AsyncTimer
from letta.log import get_logger
from letta.orm.enums import ToolType
from letta.otel.context import get_ctx_attributes
from letta.otel.metric_registry import MetricRegistry
from letta.otel.tracing import trace_method
from letta.schemas.agent import AgentState
from letta.schemas.sandbox_config import SandboxConfig
from letta.schemas.tool import Tool
@@ -21,7 +25,6 @@ from letta.services.tool_executor.mcp_tool_executor import ExternalMCPToolExecut
from letta.services.tool_executor.multi_agent_tool_executor import LettaMultiAgentToolExecutor
from letta.services.tool_executor.tool_executor import SandboxToolExecutor
from letta.services.tool_executor.tool_executor_base import ToolExecutor
from letta.tracing import trace_method
from letta.utils import get_friendly_error_msg
@@ -85,10 +88,13 @@ class ToolExecutionManager:
self.sandbox_env_vars = sandbox_env_vars
@trace_method
async def execute_tool_async(self, function_name: str, function_args: dict, tool: Tool) -> ToolExecutionResult:
async def execute_tool_async(
self, function_name: str, function_args: dict, tool: Tool, step_id: str | None = None
) -> ToolExecutionResult:
"""
Execute a tool asynchronously and persist any state changes.
"""
status = "error" # set as default for tracking purposes
try:
executor = ToolExecutorFactory.get_executor(
tool.tool_type,
@@ -98,9 +104,17 @@ class ToolExecutionManager:
passage_manager=self.passage_manager,
actor=self.actor,
)
result = await executor.execute(
function_name, function_args, tool, self.actor, self.agent_state, self.sandbox_config, self.sandbox_env_vars
)
def _metrics_callback(exec_time_ms: int, exc):
return MetricRegistry().tool_execution_time_ms_histogram.record(
exec_time_ms, dict(get_ctx_attributes(), **{"tool.name": tool.name})
)
async with AsyncTimer(callback_func=_metrics_callback):
result = await executor.execute(
function_name, function_args, tool, self.actor, self.agent_state, self.sandbox_config, self.sandbox_env_vars
)
status = result.status
# trim result
return_str = str(result.func_return)
@@ -110,6 +124,7 @@ class ToolExecutionManager:
return result
except Exception as e:
status = "error"
self.logger.error(f"Error executing tool {function_name}: {str(e)}")
error_message = get_friendly_error_msg(
function_name=function_name,
@@ -121,3 +136,8 @@ class ToolExecutionManager:
func_return=error_message,
stderr=[traceback.format_exc()],
)
finally:
metric_attrs = {"tool.name": tool.name, "tool.execution_success": status == "success"}
if status == "error" and step_id:
metric_attrs["step.id"] = step_id
MetricRegistry().tool_execution_counter.add(1, dict(get_ctx_attributes(), **metric_attrs))

View File

@@ -11,6 +11,7 @@ from typing import Any, Dict, Optional
from letta.functions.helpers import generate_model_from_args_json_schema
from letta.log import get_logger
from letta.otel.tracing import log_event, trace_method
from letta.schemas.agent import AgentState
from letta.schemas.sandbox_config import SandboxConfig, SandboxType
from letta.schemas.tool import Tool
@@ -27,7 +28,6 @@ from letta.services.organization_manager import OrganizationManager
from letta.services.sandbox_config_manager import SandboxConfigManager
from letta.services.tool_manager import ToolManager
from letta.settings import tool_settings
from letta.tracing import log_event, trace_method
from letta.utils import get_friendly_error_msg
logger = get_logger(__name__)

View File

@@ -3,6 +3,7 @@ from typing import Any, Dict, Optional
from letta.functions.ast_parsers import coerce_dict_args_by_annotations, get_function_annotations_from_source
from letta.log import get_logger
from letta.otel.tracing import trace_method
from letta.schemas.agent import AgentState
from letta.schemas.sandbox_config import SandboxConfig
from letta.schemas.tool import Tool
@@ -13,7 +14,6 @@ from letta.services.tool_executor.tool_executor_base import ToolExecutor
from letta.services.tool_sandbox.e2b_sandbox import AsyncToolSandboxE2B
from letta.services.tool_sandbox.local_sandbox import AsyncToolSandboxLocal
from letta.settings import tool_settings
from letta.tracing import trace_method
from letta.types import JsonDict
from letta.utils import get_friendly_error_msg

View File

@@ -24,12 +24,12 @@ from letta.orm.enums import ToolType
# TODO: Remove this once we translate all of these to the ORM
from letta.orm.errors import NoResultFound
from letta.orm.tool import Tool as ToolModel
from letta.otel.tracing import trace_method
from letta.schemas.tool import Tool as PydanticTool
from letta.schemas.tool import ToolCreate, ToolUpdate
from letta.schemas.user import User as PydanticUser
from letta.server.db import db_registry
from letta.services.mcp.types import SSEServerConfig, StdioServerConfig
from letta.tracing import trace_method
from letta.utils import enforce_types, printd
logger = get_logger(__name__)

View File

@@ -3,13 +3,13 @@ from typing import TYPE_CHECKING, Any, Dict, Optional
from e2b_code_interpreter import AsyncSandbox
from letta.log import get_logger
from letta.otel.tracing import log_event, trace_method
from letta.schemas.agent import AgentState
from letta.schemas.sandbox_config import SandboxConfig, SandboxType
from letta.schemas.tool import Tool
from letta.schemas.tool_execution_result import ToolExecutionResult
from letta.services.helpers.tool_parser_helper import parse_stdout_best_effort
from letta.services.tool_sandbox.base import AsyncToolSandboxBase
from letta.tracing import log_event, trace_method
from letta.types import JsonDict
from letta.utils import get_friendly_error_msg

View File

@@ -8,6 +8,7 @@ from typing import Any, Dict, Optional
from pydantic.config import JsonDict
from letta.otel.tracing import log_event, trace_method
from letta.schemas.agent import AgentState
from letta.schemas.sandbox_config import SandboxConfig, SandboxType
from letta.schemas.tool import Tool
@@ -20,7 +21,6 @@ from letta.services.helpers.tool_execution_helper import (
from letta.services.helpers.tool_parser_helper import parse_stdout_best_effort
from letta.services.tool_sandbox.base import AsyncToolSandboxBase
from letta.settings import tool_settings
from letta.tracing import log_event, trace_method
from letta.utils import get_friendly_error_msg, parse_stderr_error_msg

View File

@@ -5,11 +5,11 @@ from sqlalchemy import select, text
from letta.orm.errors import NoResultFound
from letta.orm.organization import Organization as OrganizationModel
from letta.orm.user import User as UserModel
from letta.otel.tracing import trace_method
from letta.schemas.user import User as PydanticUser
from letta.schemas.user import UserUpdate
from letta.server.db import db_registry
from letta.services.organization_manager import OrganizationManager
from letta.tracing import trace_method
from letta.utils import enforce_types

View File

@@ -210,6 +210,7 @@ class Settings(BaseSettings):
use_uvloop: bool = False
use_granian: bool = False
sqlalchemy_tracing: bool = False
# event loop parallelism
event_loop_threadpool_max_workers: int = 43

View File

@@ -12,12 +12,18 @@ processors:
send_batch_size: 1024
exporters:
file:
file/traces:
path: ${HOME}/.letta/logs/traces.json
rotation:
max_megabytes: 100
max_days: 7
max_backups: 5
file/metrics:
path: ${HOME}/.letta/logs/metrics.json
rotation:
max_megabytes: 100
max_days: 7
max_backups: 5
clickhouse:
endpoint: ${CLICKHOUSE_ENDPOINT}
database: ${CLICKHOUSE_DATABASE}
@@ -40,4 +46,8 @@ service:
traces:
receivers: [otlp]
processors: [batch]
exporters: [file, clickhouse]
exporters: [file/traces, clickhouse]
metrics:
receivers: [otlp]
processors: [batch]
exporters: [file/metrics, clickhouse]