From eaf64fb510fafc23d533d41e5ccf56cc754191c0 Mon Sep 17 00:00:00 2001 From: Sarah Wooders Date: Tue, 3 Feb 2026 17:03:23 -0800 Subject: [PATCH] fix: add LLMCallType enum and ensure call_type is set on all provider traces (#9258) Co-authored-by: Letta --- letta/adapters/letta_llm_adapter.py | 3 +++ letta/adapters/letta_llm_request_adapter.py | 2 +- letta/adapters/letta_llm_stream_adapter.py | 7 ++++--- letta/adapters/simple_llm_request_adapter.py | 3 ++- letta/adapters/simple_llm_stream_adapter.py | 2 +- letta/agents/ephemeral_summary_agent.py | 4 ++-- letta/agents/letta_agent.py | 12 ++++++------ letta/agents/letta_agent_v2.py | 7 ++++++- letta/agents/letta_agent_v3.py | 5 ++++- letta/llm_api/llm_api_tools.py | 4 ++-- letta/llm_api/llm_client_base.py | 4 +++- letta/schemas/enums.py | 8 ++++++++ letta/server/rest_api/routers/v1/tools.py | 4 ++-- letta/services/summarizer/summarizer.py | 4 ++-- ...t_letta_llm_stream_adapter_error_handling.py | 9 +++++---- tests/test_provider_trace_agents.py | 14 +++++++++++--- tests/test_usage_parsing.py | 17 ++++++++++------- 17 files changed, 72 insertions(+), 37 deletions(-) diff --git a/letta/adapters/letta_llm_adapter.py b/letta/adapters/letta_llm_adapter.py index b00a8edb..2f21862d 100644 --- a/letta/adapters/letta_llm_adapter.py +++ b/letta/adapters/letta_llm_adapter.py @@ -2,6 +2,7 @@ from abc import ABC, abstractmethod from typing import AsyncGenerator from letta.llm_api.llm_client_base import LLMClientBase +from letta.schemas.enums import LLMCallType from letta.schemas.letta_message import LettaMessage from letta.schemas.letta_message_content import ReasoningContent, RedactedReasoningContent, TextContent from letta.schemas.llm_config import LLMConfig @@ -24,6 +25,7 @@ class LettaLLMAdapter(ABC): self, llm_client: LLMClientBase, llm_config: LLMConfig, + call_type: LLMCallType, agent_id: str | None = None, agent_tags: list[str] | None = None, run_id: str | None = None, @@ -32,6 +34,7 @@ class LettaLLMAdapter(ABC): ) -> None: self.llm_client: LLMClientBase = llm_client self.llm_config: LLMConfig = llm_config + self.call_type: LLMCallType = call_type self.agent_id: str | None = agent_id self.agent_tags: list[str] | None = agent_tags self.run_id: str | None = run_id diff --git a/letta/adapters/letta_llm_request_adapter.py b/letta/adapters/letta_llm_request_adapter.py index 49a3899c..17c3a77f 100644 --- a/letta/adapters/letta_llm_request_adapter.py +++ b/letta/adapters/letta_llm_request_adapter.py @@ -127,7 +127,7 @@ class LettaLLMRequestAdapter(LettaLLMAdapter): agent_id=self.agent_id, agent_tags=self.agent_tags, run_id=self.run_id, - call_type="agent_step", + call_type=self.call_type, org_id=self.org_id, user_id=self.user_id, llm_config=self.llm_config.model_dump() if self.llm_config else None, diff --git a/letta/adapters/letta_llm_stream_adapter.py b/letta/adapters/letta_llm_stream_adapter.py index b2ea94be..4ef7373e 100644 --- a/letta/adapters/letta_llm_stream_adapter.py +++ b/letta/adapters/letta_llm_stream_adapter.py @@ -6,7 +6,7 @@ from letta.interfaces.anthropic_streaming_interface import AnthropicStreamingInt from letta.interfaces.openai_streaming_interface import OpenAIStreamingInterface from letta.llm_api.llm_client_base import LLMClientBase from letta.otel.tracing import log_attributes, safe_json_dumps, trace_method -from letta.schemas.enums import ProviderType +from letta.schemas.enums import LLMCallType, ProviderType from letta.schemas.letta_message import LettaMessage from letta.schemas.llm_config import LLMConfig from letta.schemas.provider_trace import ProviderTrace @@ -30,13 +30,14 @@ class LettaLLMStreamAdapter(LettaLLMAdapter): self, llm_client: LLMClientBase, llm_config: LLMConfig, + call_type: LLMCallType, agent_id: str | None = None, agent_tags: list[str] | None = None, run_id: str | None = None, org_id: str | None = None, user_id: str | None = None, ) -> None: - super().__init__(llm_client, llm_config, agent_id=agent_id, agent_tags=agent_tags, run_id=run_id, org_id=org_id, user_id=user_id) + super().__init__(llm_client, llm_config, call_type=call_type, agent_id=agent_id, agent_tags=agent_tags, run_id=run_id, org_id=org_id, user_id=user_id) self.interface: OpenAIStreamingInterface | AnthropicStreamingInterface | None = None async def invoke_llm( @@ -205,7 +206,7 @@ class LettaLLMStreamAdapter(LettaLLMAdapter): agent_id=self.agent_id, agent_tags=self.agent_tags, run_id=self.run_id, - call_type="agent_step", + call_type=self.call_type, org_id=self.org_id, user_id=self.user_id, llm_config=self.llm_config.model_dump() if self.llm_config else None, diff --git a/letta/adapters/simple_llm_request_adapter.py b/letta/adapters/simple_llm_request_adapter.py index cf2dc741..7cf5b260 100644 --- a/letta/adapters/simple_llm_request_adapter.py +++ b/letta/adapters/simple_llm_request_adapter.py @@ -2,6 +2,7 @@ from typing import AsyncGenerator from letta.adapters.letta_llm_request_adapter import LettaLLMRequestAdapter from letta.helpers.datetime_helpers import get_utc_timestamp_ns +from letta.schemas.enums import LLMCallType from letta.schemas.letta_message import LettaMessage from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, TextContent from letta.schemas.usage import normalize_cache_tokens, normalize_reasoning_tokens @@ -45,7 +46,7 @@ class SimpleLLMRequestAdapter(LettaLLMRequestAdapter): agent_id=self.agent_id, agent_tags=self.agent_tags, run_id=self.run_id, - call_type="agent_step", + call_type=LLMCallType.agent_step, org_id=self.org_id, user_id=self.user_id, llm_config=self.llm_config.model_dump() if self.llm_config else None, diff --git a/letta/adapters/simple_llm_stream_adapter.py b/letta/adapters/simple_llm_stream_adapter.py index e29b8ac9..216c437c 100644 --- a/letta/adapters/simple_llm_stream_adapter.py +++ b/letta/adapters/simple_llm_stream_adapter.py @@ -254,7 +254,7 @@ class SimpleLLMStreamAdapter(LettaLLMStreamAdapter): agent_id=self.agent_id, agent_tags=self.agent_tags, run_id=self.run_id, - call_type="agent_step", + call_type=self.call_type, org_id=self.org_id, user_id=self.user_id, llm_config=self.llm_config.model_dump() if self.llm_config else None, diff --git a/letta/agents/ephemeral_summary_agent.py b/letta/agents/ephemeral_summary_agent.py index 3e990c9e..ca73d800 100644 --- a/letta/agents/ephemeral_summary_agent.py +++ b/letta/agents/ephemeral_summary_agent.py @@ -8,7 +8,7 @@ from letta.log import get_logger from letta.orm.errors import NoResultFound from letta.prompts.gpt_system import get_system_text from letta.schemas.block import Block, BlockUpdate -from letta.schemas.enums import MessageRole +from letta.schemas.enums import LLMCallType, MessageRole from letta.schemas.letta_message_content import TextContent from letta.schemas.message import Message, MessageCreate from letta.schemas.user import User @@ -92,7 +92,7 @@ class EphemeralSummaryAgent(BaseAgent): telemetry_manager=TelemetryManager(), agent_id=self.agent_id, agent_tags=agent_state.tags, - call_type="summarization", + call_type=LLMCallType.summarization, ) response_data = await llm_client.request_async_with_telemetry(request_data, agent_state.llm_config) response = await llm_client.convert_response_to_chat_completion(response_data, messages, agent_state.llm_config) diff --git a/letta/agents/letta_agent.py b/letta/agents/letta_agent.py index fee4b902..06ca2822 100644 --- a/letta/agents/letta_agent.py +++ b/letta/agents/letta_agent.py @@ -35,7 +35,7 @@ from letta.otel.context import get_ctx_attributes from letta.otel.metric_registry import MetricRegistry from letta.otel.tracing import log_event, trace_method, tracer from letta.schemas.agent import AgentState, UpdateAgent -from letta.schemas.enums import JobStatus, ProviderType, StepStatus, ToolType +from letta.schemas.enums import JobStatus, LLMCallType, ProviderType, StepStatus, ToolType from letta.schemas.letta_message import MessageType from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent from letta.schemas.letta_response import LettaResponse @@ -420,7 +420,7 @@ class LettaAgent(BaseAgent): agent_id=self.agent_id, agent_tags=agent_state.tags, run_id=self.current_run_id, - call_type="agent_step", + call_type=LLMCallType.agent_step, org_id=self.actor.organization_id, user_id=self.actor.id, llm_config=self.agent_state.llm_config.model_dump() if self.agent_state.llm_config else None, @@ -774,7 +774,7 @@ class LettaAgent(BaseAgent): agent_id=self.agent_id, agent_tags=agent_state.tags, run_id=self.current_run_id, - call_type="agent_step", + call_type=LLMCallType.agent_step, org_id=self.actor.organization_id, user_id=self.actor.id, llm_config=self.agent_state.llm_config.model_dump() if self.agent_state.llm_config else None, @@ -1252,7 +1252,7 @@ class LettaAgent(BaseAgent): agent_id=self.agent_id, agent_tags=agent_state.tags, run_id=self.current_run_id, - call_type="agent_step", + call_type=LLMCallType.agent_step, org_id=self.actor.organization_id, user_id=self.actor.id, llm_config=self.agent_state.llm_config.model_dump() if self.agent_state.llm_config else None, @@ -1486,7 +1486,7 @@ class LettaAgent(BaseAgent): agent_tags=agent_state.tags, run_id=self.current_run_id, step_id=step_metrics.id, - call_type="agent_step", + call_type=LLMCallType.agent_step, ) response = await llm_client.request_async_with_telemetry(request_data, agent_state.llm_config) @@ -1559,7 +1559,7 @@ class LettaAgent(BaseAgent): agent_tags=agent_state.tags, run_id=self.current_run_id, step_id=step_id, - call_type="agent_step", + call_type=LLMCallType.agent_step, ) # Attempt LLM request with telemetry wrapper diff --git a/letta/agents/letta_agent_v2.py b/letta/agents/letta_agent_v2.py index efa42849..85e6b86c 100644 --- a/letta/agents/letta_agent_v2.py +++ b/letta/agents/letta_agent_v2.py @@ -31,7 +31,7 @@ from letta.log import get_logger from letta.otel.tracing import log_event, trace_method, tracer from letta.prompts.prompt_generator import PromptGenerator from letta.schemas.agent import AgentState, UpdateAgent -from letta.schemas.enums import AgentType, MessageStreamStatus, RunStatus, StepStatus +from letta.schemas.enums import AgentType, LLMCallType, MessageStreamStatus, RunStatus, StepStatus from letta.schemas.letta_message import LettaMessage, MessageType from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent from letta.schemas.letta_request import ClientToolSchema @@ -158,6 +158,8 @@ class LettaAgentV2(BaseAgentV2): llm_adapter=LettaLLMRequestAdapter( llm_client=self.llm_client, llm_config=self.agent_state.llm_config, + call_type=LLMCallType.agent_step, + agent_id=self.agent_state.id, agent_tags=self.agent_state.tags, org_id=self.actor.organization_id, user_id=self.actor.id, @@ -216,6 +218,7 @@ class LettaAgentV2(BaseAgentV2): llm_adapter=LettaLLMRequestAdapter( llm_client=self.llm_client, llm_config=self.agent_state.llm_config, + call_type=LLMCallType.agent_step, agent_id=self.agent_state.id, agent_tags=self.agent_state.tags, run_id=run_id, @@ -305,6 +308,7 @@ class LettaAgentV2(BaseAgentV2): llm_adapter = LettaLLMStreamAdapter( llm_client=self.llm_client, llm_config=self.agent_state.llm_config, + call_type=LLMCallType.agent_step, agent_id=self.agent_state.id, agent_tags=self.agent_state.tags, run_id=run_id, @@ -315,6 +319,7 @@ class LettaAgentV2(BaseAgentV2): llm_adapter = LettaLLMRequestAdapter( llm_client=self.llm_client, llm_config=self.agent_state.llm_config, + call_type=LLMCallType.agent_step, agent_id=self.agent_state.id, agent_tags=self.agent_state.tags, run_id=run_id, diff --git a/letta/agents/letta_agent_v3.py b/letta/agents/letta_agent_v3.py index 70d499cf..9e52e0df 100644 --- a/letta/agents/letta_agent_v3.py +++ b/letta/agents/letta_agent_v3.py @@ -28,7 +28,7 @@ from letta.helpers.tool_execution_helper import enable_strict_mode from letta.local_llm.constants import INNER_THOUGHTS_KWARG from letta.otel.tracing import trace_method from letta.schemas.agent import AgentState -from letta.schemas.enums import MessageRole +from letta.schemas.enums import LLMCallType, MessageRole from letta.schemas.letta_message import ( ApprovalReturn, CompactionStats, @@ -209,6 +209,7 @@ class LettaAgentV3(LettaAgentV2): llm_adapter=SimpleLLMRequestAdapter( llm_client=self.llm_client, llm_config=self.agent_state.llm_config, + call_type=LLMCallType.agent_step, agent_id=self.agent_state.id, agent_tags=self.agent_state.tags, run_id=run_id, @@ -356,6 +357,7 @@ class LettaAgentV3(LettaAgentV2): llm_adapter = SimpleLLMStreamAdapter( llm_client=self.llm_client, llm_config=self.agent_state.llm_config, + call_type=LLMCallType.agent_step, agent_id=self.agent_state.id, agent_tags=self.agent_state.tags, run_id=run_id, @@ -366,6 +368,7 @@ class LettaAgentV3(LettaAgentV2): llm_adapter = SimpleLLMRequestAdapter( llm_client=self.llm_client, llm_config=self.agent_state.llm_config, + call_type=LLMCallType.agent_step, agent_id=self.agent_state.id, agent_tags=self.agent_state.tags, run_id=run_id, diff --git a/letta/llm_api/llm_api_tools.py b/letta/llm_api/llm_api_tools.py index 2fb9efba..c4f178e2 100644 --- a/letta/llm_api/llm_api_tools.py +++ b/letta/llm_api/llm_api_tools.py @@ -23,7 +23,7 @@ from letta.local_llm.constants import INNER_THOUGHTS_KWARG from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages from letta.orm.user import User from letta.otel.tracing import log_event, trace_method -from letta.schemas.enums import ProviderCategory +from letta.schemas.enums import LLMCallType, ProviderCategory from letta.schemas.llm_config import LLMConfig from letta.schemas.message import Message from letta.schemas.openai.chat_completion_response import ChatCompletionResponse @@ -245,7 +245,7 @@ def create( request_json=prepare_openai_payload(data), response_json=response.model_json_schema(), step_id=step_id, - call_type="agent_step", + call_type=LLMCallType.agent_step, ), ) diff --git a/letta/llm_api/llm_client_base.py b/letta/llm_api/llm_client_base.py index 8b506c05..c8bfbc7d 100644 --- a/letta/llm_api/llm_client_base.py +++ b/letta/llm_api/llm_client_base.py @@ -10,7 +10,7 @@ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk from letta.errors import ErrorCode, LLMConnectionError, LLMError from letta.otel.tracing import log_event, trace_method from letta.schemas.embedding_config import EmbeddingConfig -from letta.schemas.enums import AgentType, ProviderCategory +from letta.schemas.enums import AgentType, LLMCallType, ProviderCategory from letta.schemas.llm_config import LLMConfig from letta.schemas.message import Message from letta.schemas.openai.chat_completion_response import ChatCompletionResponse @@ -229,6 +229,7 @@ class LLMClientBase: request_json=request_data, response_json=response_data, step_id=step_id, + call_type=LLMCallType.agent_step, ), ) log_event(name="llm_response_received", attributes=response_data) @@ -262,6 +263,7 @@ class LLMClientBase: request_json=request_data, response_json=response_data, step_id=step_id, + call_type=LLMCallType.agent_step, ), ) diff --git a/letta/schemas/enums.py b/letta/schemas/enums.py index b7fc27f0..3996a160 100644 --- a/letta/schemas/enums.py +++ b/letta/schemas/enums.py @@ -96,6 +96,14 @@ class ProviderCategory(str, Enum): byok = "byok" +class LLMCallType(str, Enum): + """Type of LLM call for telemetry tracking.""" + + agent_step = "agent_step" + summarization = "summarization" + tool_generation = "tool_generation" + + class MessageRole(str, Enum): assistant = "assistant" user = "user" diff --git a/letta/server/rest_api/routers/v1/tools.py b/letta/server/rest_api/routers/v1/tools.py index ac6de569..6dc1dde3 100644 --- a/letta/server/rest_api/routers/v1/tools.py +++ b/letta/server/rest_api/routers/v1/tools.py @@ -26,7 +26,7 @@ from letta.log import get_logger from letta.orm.errors import UniqueConstraintViolationError from letta.orm.mcp_oauth import OAuthSessionStatus from letta.prompts.gpt_system import get_system_text -from letta.schemas.enums import AgentType, MessageRole, ToolType +from letta.schemas.enums import AgentType, LLMCallType, MessageRole, ToolType from letta.schemas.letta_message import ToolReturnMessage from letta.schemas.letta_message_content import TextContent from letta.schemas.mcp import UpdateSSEMCPServer, UpdateStdioMCPServer, UpdateStreamableHTTPMCPServer @@ -956,7 +956,7 @@ async def generate_tool_from_prompt( llm_client.set_telemetry_context( telemetry_manager=TelemetryManager(), - call_type="tool_generation", + call_type=LLMCallType.tool_generation, ) response_data = await llm_client.request_async_with_telemetry(request_data, llm_config) response = await llm_client.convert_response_to_chat_completion(response_data, input_messages, llm_config) diff --git a/letta/services/summarizer/summarizer.py b/letta/services/summarizer/summarizer.py index 9ff685bb..dc9680c4 100644 --- a/letta/services/summarizer/summarizer.py +++ b/letta/services/summarizer/summarizer.py @@ -16,7 +16,7 @@ from letta.llm_api.llm_client import LLMClient from letta.log import get_logger from letta.otel.tracing import trace_method from letta.prompts import gpt_summarize -from letta.schemas.enums import AgentType, MessageRole, ProviderType +from letta.schemas.enums import AgentType, LLMCallType, MessageRole, ProviderType from letta.schemas.letta_message_content import TextContent from letta.schemas.llm_config import LLMConfig from letta.schemas.message import Message, MessageCreate @@ -482,7 +482,7 @@ async def simple_summary( agent_tags=agent_tags, run_id=run_id, step_id=step_id, - call_type="summarization", + call_type=LLMCallType.summarization, org_id=actor.organization_id if actor else None, user_id=actor.id if actor else None, compaction_settings=compaction_settings, diff --git a/tests/adapters/test_letta_llm_stream_adapter_error_handling.py b/tests/adapters/test_letta_llm_stream_adapter_error_handling.py index 3241ce7e..58951c59 100644 --- a/tests/adapters/test_letta_llm_stream_adapter_error_handling.py +++ b/tests/adapters/test_letta_llm_stream_adapter_error_handling.py @@ -5,6 +5,7 @@ import pytest from letta.adapters.letta_llm_stream_adapter import LettaLLMStreamAdapter from letta.errors import ContextWindowExceededError, LLMConnectionError, LLMServerError from letta.llm_api.anthropic_client import AnthropicClient +from letta.schemas.enums import LLMCallType from letta.schemas.llm_config import LLMConfig @@ -42,7 +43,7 @@ async def test_letta_llm_stream_adapter_converts_anthropic_streaming_api_status_ llm_client = AnthropicClient() llm_config = LLMConfig(model="claude-sonnet-4-5-20250929", model_endpoint_type="anthropic", context_window=200000) - adapter = LettaLLMStreamAdapter(llm_client=llm_client, llm_config=llm_config) + adapter = LettaLLMStreamAdapter(llm_client=llm_client, llm_config=llm_config, call_type=LLMCallType.agent_step) gen = adapter.invoke_llm(request_data={}, messages=[], tools=[], use_assistant_message=True) with pytest.raises(LLMServerError): @@ -83,7 +84,7 @@ async def test_letta_llm_stream_adapter_converts_anthropic_413_request_too_large llm_client = AnthropicClient() llm_config = LLMConfig(model="claude-sonnet-4-5-20250929", model_endpoint_type="anthropic", context_window=200000) - adapter = LettaLLMStreamAdapter(llm_client=llm_client, llm_config=llm_config) + adapter = LettaLLMStreamAdapter(llm_client=llm_client, llm_config=llm_config, call_type=LLMCallType.agent_step) gen = adapter.invoke_llm(request_data={}, messages=[], tools=[], use_assistant_message=True) with pytest.raises(ContextWindowExceededError): @@ -117,7 +118,7 @@ async def test_letta_llm_stream_adapter_converts_httpx_read_error(monkeypatch): llm_client = AnthropicClient() llm_config = LLMConfig(model="claude-sonnet-4-5-20250929", model_endpoint_type="anthropic", context_window=200000) - adapter = LettaLLMStreamAdapter(llm_client=llm_client, llm_config=llm_config) + adapter = LettaLLMStreamAdapter(llm_client=llm_client, llm_config=llm_config, call_type=LLMCallType.agent_step) gen = adapter.invoke_llm(request_data={}, messages=[], tools=[], use_assistant_message=True) with pytest.raises(LLMConnectionError): @@ -151,7 +152,7 @@ async def test_letta_llm_stream_adapter_converts_httpx_write_error(monkeypatch): llm_client = AnthropicClient() llm_config = LLMConfig(model="claude-sonnet-4-5-20250929", model_endpoint_type="anthropic", context_window=200000) - adapter = LettaLLMStreamAdapter(llm_client=llm_client, llm_config=llm_config) + adapter = LettaLLMStreamAdapter(llm_client=llm_client, llm_config=llm_config, call_type=LLMCallType.agent_step) gen = adapter.invoke_llm(request_data={}, messages=[], tools=[], use_assistant_message=True) with pytest.raises(LLMConnectionError): diff --git a/tests/test_provider_trace_agents.py b/tests/test_provider_trace_agents.py index 830d776c..9adf52b6 100644 --- a/tests/test_provider_trace_agents.py +++ b/tests/test_provider_trace_agents.py @@ -198,6 +198,7 @@ class TestAdapterTelemetryAttributes: """Verify base LettaLLMAdapter has telemetry attributes.""" from letta.adapters.letta_llm_adapter import LettaLLMAdapter from letta.llm_api.llm_client import LLMClient + from letta.schemas.enums import LLMCallType mock_client = LLMClient.create(provider_type="openai", put_inner_thoughts_first=True) @@ -212,6 +213,7 @@ class TestAdapterTelemetryAttributes: adapter = TestAdapter( llm_client=mock_client, llm_config=mock_llm_config, + call_type=LLMCallType.agent_step, agent_id=agent_id, agent_tags=agent_tags, run_id=run_id, @@ -220,11 +222,13 @@ class TestAdapterTelemetryAttributes: assert adapter.agent_id == agent_id assert adapter.agent_tags == agent_tags assert adapter.run_id == run_id + assert adapter.call_type == LLMCallType.agent_step def test_request_adapter_inherits_telemetry_attributes(self, mock_llm_config): """Verify LettaLLMRequestAdapter inherits telemetry attributes.""" from letta.adapters.letta_llm_request_adapter import LettaLLMRequestAdapter from letta.llm_api.llm_client import LLMClient + from letta.schemas.enums import LLMCallType mock_client = LLMClient.create(provider_type="openai", put_inner_thoughts_first=True) @@ -235,6 +239,7 @@ class TestAdapterTelemetryAttributes: adapter = LettaLLMRequestAdapter( llm_client=mock_client, llm_config=mock_llm_config, + call_type=LLMCallType.agent_step, agent_id=agent_id, agent_tags=agent_tags, run_id=run_id, @@ -248,6 +253,7 @@ class TestAdapterTelemetryAttributes: """Verify LettaLLMStreamAdapter inherits telemetry attributes.""" from letta.adapters.letta_llm_stream_adapter import LettaLLMStreamAdapter from letta.llm_api.llm_client import LLMClient + from letta.schemas.enums import LLMCallType mock_client = LLMClient.create(provider_type="openai", put_inner_thoughts_first=True) @@ -258,6 +264,7 @@ class TestAdapterTelemetryAttributes: adapter = LettaLLMStreamAdapter( llm_client=mock_client, llm_config=mock_llm_config, + call_type=LLMCallType.agent_step, agent_id=agent_id, agent_tags=agent_tags, run_id=run_id, @@ -272,13 +279,14 @@ class TestAdapterTelemetryAttributes: from letta.adapters.letta_llm_request_adapter import LettaLLMRequestAdapter from letta.adapters.letta_llm_stream_adapter import LettaLLMStreamAdapter from letta.llm_api.llm_client import LLMClient + from letta.schemas.enums import LLMCallType mock_client = LLMClient.create(provider_type="openai", put_inner_thoughts_first=True) - request_adapter = LettaLLMRequestAdapter(llm_client=mock_client, llm_config=mock_llm_config) - stream_adapter = LettaLLMStreamAdapter(llm_client=mock_client, llm_config=mock_llm_config) + request_adapter = LettaLLMRequestAdapter(llm_client=mock_client, llm_config=mock_llm_config, call_type=LLMCallType.agent_step) + stream_adapter = LettaLLMStreamAdapter(llm_client=mock_client, llm_config=mock_llm_config, call_type=LLMCallType.agent_step) - for attr in ["agent_id", "agent_tags", "run_id"]: + for attr in ["agent_id", "agent_tags", "run_id", "call_type"]: assert hasattr(request_adapter, attr), f"LettaLLMRequestAdapter missing {attr}" assert hasattr(stream_adapter, attr), f"LettaLLMStreamAdapter missing {attr}" diff --git a/tests/test_usage_parsing.py b/tests/test_usage_parsing.py index 0b9dc1c2..7a40e597 100644 --- a/tests/test_usage_parsing.py +++ b/tests/test_usage_parsing.py @@ -24,7 +24,7 @@ from letta.errors import LLMAuthenticationError from letta.llm_api.anthropic_client import AnthropicClient from letta.llm_api.google_ai_client import GoogleAIClient from letta.llm_api.openai_client import OpenAIClient -from letta.schemas.enums import AgentType, MessageRole +from letta.schemas.enums import AgentType, LLMCallType, MessageRole from letta.schemas.letta_message_content import TextContent from letta.schemas.llm_config import LLMConfig from letta.schemas.message import Message @@ -156,6 +156,7 @@ async def test_openai_usage_via_adapter(): adapter = SimpleLLMRequestAdapter( llm_client=client, llm_config=llm_config, + call_type=LLMCallType.agent_step, ) messages = _build_simple_messages("Say hello in exactly 5 words.") @@ -209,6 +210,7 @@ async def test_anthropic_usage_via_adapter(): adapter = SimpleLLMRequestAdapter( llm_client=client, llm_config=llm_config, + call_type=LLMCallType.agent_step, ) # Anthropic requires a system message first @@ -262,6 +264,7 @@ async def test_gemini_usage_via_adapter(): adapter = SimpleLLMRequestAdapter( llm_client=client, llm_config=llm_config, + call_type=LLMCallType.agent_step, ) messages = _build_simple_messages("Say hello in exactly 5 words.") @@ -307,7 +310,7 @@ async def test_openai_prefix_caching_via_adapter(): llm_config = LLMConfig.default_config("gpt-4o-mini") # First request - should populate the cache - adapter1 = SimpleLLMRequestAdapter(llm_client=client, llm_config=llm_config) + adapter1 = SimpleLLMRequestAdapter(llm_client=client, llm_config=llm_config, call_type=LLMCallType.agent_step) messages1 = [ Message(role=MessageRole.system, content=[TextContent(text=LARGE_SYSTEM_PROMPT)]), Message(role=MessageRole.user, content=[TextContent(text="What is 2+2?")]), @@ -323,7 +326,7 @@ async def test_openai_prefix_caching_via_adapter(): print(f"Request 1 - prompt={adapter1.usage.prompt_tokens}, cached={adapter1.usage.cached_input_tokens}") # Second request - same system prompt, should hit cache - adapter2 = SimpleLLMRequestAdapter(llm_client=client, llm_config=llm_config) + adapter2 = SimpleLLMRequestAdapter(llm_client=client, llm_config=llm_config, call_type=LLMCallType.agent_step) messages2 = [ Message(role=MessageRole.system, content=[TextContent(text=LARGE_SYSTEM_PROMPT)]), Message(role=MessageRole.user, content=[TextContent(text="What is 3+3?")]), @@ -368,7 +371,7 @@ async def test_anthropic_prefix_caching_via_adapter(): ) # First request - adapter1 = SimpleLLMRequestAdapter(llm_client=client, llm_config=llm_config) + adapter1 = SimpleLLMRequestAdapter(llm_client=client, llm_config=llm_config, call_type=LLMCallType.agent_step) messages1 = [ Message(role=MessageRole.system, content=[TextContent(text=LARGE_SYSTEM_PROMPT)]), Message(role=MessageRole.user, content=[TextContent(text="What is 2+2?")]), @@ -386,7 +389,7 @@ async def test_anthropic_prefix_caching_via_adapter(): ) # Second request - adapter2 = SimpleLLMRequestAdapter(llm_client=client, llm_config=llm_config) + adapter2 = SimpleLLMRequestAdapter(llm_client=client, llm_config=llm_config, call_type=LLMCallType.agent_step) messages2 = [ Message(role=MessageRole.system, content=[TextContent(text=LARGE_SYSTEM_PROMPT)]), Message(role=MessageRole.user, content=[TextContent(text="What is 3+3?")]), @@ -435,7 +438,7 @@ async def test_gemini_prefix_caching_via_adapter(): ) # First request - adapter1 = SimpleLLMRequestAdapter(llm_client=client, llm_config=llm_config) + adapter1 = SimpleLLMRequestAdapter(llm_client=client, llm_config=llm_config, call_type=LLMCallType.agent_step) messages1 = [ Message(role=MessageRole.system, content=[TextContent(text=LARGE_SYSTEM_PROMPT)]), Message(role=MessageRole.user, content=[TextContent(text="What is 2+2?")]), @@ -451,7 +454,7 @@ async def test_gemini_prefix_caching_via_adapter(): print(f"Request 1 - prompt={adapter1.usage.prompt_tokens}, cached={adapter1.usage.cached_input_tokens}") # Second request - adapter2 = SimpleLLMRequestAdapter(llm_client=client, llm_config=llm_config) + adapter2 = SimpleLLMRequestAdapter(llm_client=client, llm_config=llm_config, call_type=LLMCallType.agent_step) messages2 = [ Message(role=MessageRole.system, content=[TextContent(text=LARGE_SYSTEM_PROMPT)]), Message(role=MessageRole.user, content=[TextContent(text="What is 3+3?")]),