diff --git a/letta/adapters/letta_llm_adapter.py b/letta/adapters/letta_llm_adapter.py index 022eb9fd..8c4bc2e6 100644 --- a/letta/adapters/letta_llm_adapter.py +++ b/letta/adapters/letta_llm_adapter.py @@ -31,6 +31,7 @@ class LettaLLMAdapter(ABC): self.tool_call: ToolCall | None = None self.usage: LettaUsageStatistics = LettaUsageStatistics() self.telemetry_manager: TelemetryManager = TelemetryManager() + self.llm_request_finish_timestamp_ns: int | None = None @abstractmethod async def invoke_llm( diff --git a/letta/adapters/letta_llm_request_adapter.py b/letta/adapters/letta_llm_request_adapter.py index 3fd0803a..5a2471d6 100644 --- a/letta/adapters/letta_llm_request_adapter.py +++ b/letta/adapters/letta_llm_request_adapter.py @@ -2,6 +2,7 @@ import asyncio from typing import AsyncGenerator from letta.adapters.letta_llm_adapter import LettaLLMAdapter +from letta.helpers.datetime_helpers import get_utc_timestamp_ns from letta.schemas.letta_message import LettaMessage from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, TextContent from letta.schemas.provider_trace import ProviderTraceCreate @@ -42,6 +43,7 @@ class LettaLLMRequestAdapter(LettaLLMAdapter): # Make the blocking LLM request self.response_data = await self.llm_client.request_async(request_data, self.llm_config) + self.llm_request_finish_timestamp_ns = get_utc_timestamp_ns() # Convert response to chat completion format self.chat_completions_response = self.llm_client.convert_response_to_chat_completion(self.response_data, messages, self.llm_config) diff --git a/letta/adapters/letta_llm_stream_adapter.py b/letta/adapters/letta_llm_stream_adapter.py index d7323aa6..47a8f562 100644 --- a/letta/adapters/letta_llm_stream_adapter.py +++ b/letta/adapters/letta_llm_stream_adapter.py @@ -2,6 +2,7 @@ import asyncio from typing import AsyncGenerator from letta.adapters.letta_llm_adapter import LettaLLMAdapter +from letta.helpers.datetime_helpers import get_utc_timestamp_ns from letta.interfaces.anthropic_streaming_interface import AnthropicStreamingInterface from letta.interfaces.openai_streaming_interface import OpenAIStreamingInterface from letta.llm_api.llm_client_base import LLMClientBase @@ -78,6 +79,7 @@ class LettaLLMStreamAdapter(LettaLLMAdapter): yield chunk # After streaming completes, extract the accumulated data + self.llm_request_finish_timestamp_ns = get_utc_timestamp_ns() # Extract tool call from the interface try: diff --git a/letta/agents/letta_agent_v2.py b/letta/agents/letta_agent_v2.py index 3ffcfd22..1d619d96 100644 --- a/letta/agents/letta_agent_v2.py +++ b/letta/agents/letta_agent_v2.py @@ -360,7 +360,7 @@ class LettaAgentV2(BaseAgentV2): LettaMessage or dict: Chunks for streaming mode, or request data for dry_run """ step_progression = StepProgression.START - tool_call, reasoning_content, agent_step_span, first_chunk, logged_step = None, None, None, None, None + tool_call, reasoning_content, agent_step_span, first_chunk, logged_step, step_start_ns = None, None, None, None, None, None valid_tools = await self._get_valid_tools(messages) # remove messages input approval_request, approval_response = await self._maybe_get_approval_messages(messages) if approval_request and approval_response: @@ -413,6 +413,11 @@ class LettaAgentV2(BaseAgentV2): return provider_request_start_timestamp_ns = get_utc_timestamp_ns() + agent_step_span.add_event( + name="request_start_to_provider_request_start_ns", + attributes={"request_start_to_provider_request_start_ns": ns_to_ms(provider_request_start_timestamp_ns)}, + ) + try: invocation = llm_adapter.invoke_llm( request_data=request_data, @@ -432,12 +437,9 @@ class LettaAgentV2(BaseAgentV2): raise step_progression = StepProgression.RESPONSE_RECEIVED - stream_end_time_ns = get_utc_timestamp_ns() - llm_request_ns = stream_end_time_ns - provider_request_start_timestamp_ns + llm_request_ns = llm_adapter.llm_request_finish_timestamp_ns - provider_request_start_timestamp_ns step_metrics.llm_request_ns = llm_request_ns - - llm_request_ms = ns_to_ms(llm_request_ns) - agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ms}) + agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": ns_to_ms(llm_request_ns)}) self._update_global_usage_stats(llm_adapter.usage) @@ -503,6 +505,10 @@ class LettaAgentV2(BaseAgentV2): yield message step_progression = StepProgression.FINISHED + if agent_step_span is not None: + step_ns = get_utc_timestamp_ns() - step_start_ns + agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)}) + agent_step_span.end() def _initialize_state(self): self.should_continue = True