feat: add context token estimates to llm usage (#9295)

* base

* generate

* update
This commit is contained in:
jnjpng
2026-02-04 18:14:32 -08:00
committed by Caren Thomas
parent e8db3ac89a
commit c801866d89
3 changed files with 23 additions and 0 deletions

View File

@@ -37646,6 +37646,18 @@
],
"title": "Reasoning Tokens",
"description": "The number of reasoning/thinking tokens generated. None if not reported by provider."
},
"context_tokens": {
"anyOf": [
{
"type": "integer"
},
{
"type": "null"
}
],
"title": "Context Tokens",
"description": "Estimate of tokens currently in the context window."
}
},
"type": "object",

View File

@@ -290,6 +290,8 @@ class LettaAgentV3(LettaAgentV2):
)
if include_return_message_types:
response_letta_messages = [m for m in response_letta_messages if m.message_type in include_return_message_types]
# Set context_tokens to expose actual context window usage (vs accumulated prompt_tokens)
self.usage.context_tokens = self.context_token_estimate
result = LettaResponse(messages=response_letta_messages, stop_reason=self.stop_reason, usage=self.usage)
if run_id:
if self.job_update_metadata is None:
@@ -480,6 +482,9 @@ class LettaAgentV3(LettaAgentV2):
# Cleanup and finalize (only runs if no exception occurred)
try:
# Set context_tokens to expose actual context window usage (vs accumulated prompt_tokens)
self.usage.context_tokens = self.context_token_estimate
if run_id:
# Filter out LettaStopReason from messages (only valid in LettaStreamingResponse, not LettaResponse)
filtered_messages = [m for m in response_letta_messages if not isinstance(m, LettaStopReason)]

View File

@@ -127,6 +127,12 @@ class LettaUsageStatistics(BaseModel):
None, description="The number of reasoning/thinking tokens generated. None if not reported by provider."
)
# Context window tracking
context_tokens: Optional[int] = Field(
None,
description="Estimate of tokens currently in the context window.",
)
def to_usage(self, provider_type: Optional["ProviderType"] = None) -> "UsageStatistics":
"""Convert to UsageStatistics (OpenAI-compatible format).