From c801866d89f1a74393cd223f00939cee1e182d3e Mon Sep 17 00:00:00 2001 From: jnjpng Date: Wed, 4 Feb 2026 18:14:32 -0800 Subject: [PATCH] feat: add context token estimates to llm usage (#9295) * base * generate * update --- fern/openapi.json | 12 ++++++++++++ letta/agents/letta_agent_v3.py | 5 +++++ letta/schemas/usage.py | 6 ++++++ 3 files changed, 23 insertions(+) diff --git a/fern/openapi.json b/fern/openapi.json index 3230ece0..2135a1f2 100644 --- a/fern/openapi.json +++ b/fern/openapi.json @@ -37646,6 +37646,18 @@ ], "title": "Reasoning Tokens", "description": "The number of reasoning/thinking tokens generated. None if not reported by provider." + }, + "context_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Context Tokens", + "description": "Estimate of tokens currently in the context window." } }, "type": "object", diff --git a/letta/agents/letta_agent_v3.py b/letta/agents/letta_agent_v3.py index 9e52e0df..bd111eee 100644 --- a/letta/agents/letta_agent_v3.py +++ b/letta/agents/letta_agent_v3.py @@ -290,6 +290,8 @@ class LettaAgentV3(LettaAgentV2): ) if include_return_message_types: response_letta_messages = [m for m in response_letta_messages if m.message_type in include_return_message_types] + # Set context_tokens to expose actual context window usage (vs accumulated prompt_tokens) + self.usage.context_tokens = self.context_token_estimate result = LettaResponse(messages=response_letta_messages, stop_reason=self.stop_reason, usage=self.usage) if run_id: if self.job_update_metadata is None: @@ -480,6 +482,9 @@ class LettaAgentV3(LettaAgentV2): # Cleanup and finalize (only runs if no exception occurred) try: + # Set context_tokens to expose actual context window usage (vs accumulated prompt_tokens) + self.usage.context_tokens = self.context_token_estimate + if run_id: # Filter out LettaStopReason from messages (only valid in LettaStreamingResponse, not LettaResponse) filtered_messages = [m for m in response_letta_messages if not isinstance(m, LettaStopReason)] diff --git a/letta/schemas/usage.py b/letta/schemas/usage.py index d2f5191d..c066423f 100644 --- a/letta/schemas/usage.py +++ b/letta/schemas/usage.py @@ -127,6 +127,12 @@ class LettaUsageStatistics(BaseModel): None, description="The number of reasoning/thinking tokens generated. None if not reported by provider." ) + # Context window tracking + context_tokens: Optional[int] = Field( + None, + description="Estimate of tokens currently in the context window.", + ) + def to_usage(self, provider_type: Optional["ProviderType"] = None) -> "UsageStatistics": """Convert to UsageStatistics (OpenAI-compatible format).