feat: add context token estimates to llm usage (#9295)
* base * generate * update
This commit is contained in:
@@ -37646,6 +37646,18 @@
|
||||
],
|
||||
"title": "Reasoning Tokens",
|
||||
"description": "The number of reasoning/thinking tokens generated. None if not reported by provider."
|
||||
},
|
||||
"context_tokens": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "integer"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"title": "Context Tokens",
|
||||
"description": "Estimate of tokens currently in the context window."
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
|
||||
@@ -290,6 +290,8 @@ class LettaAgentV3(LettaAgentV2):
|
||||
)
|
||||
if include_return_message_types:
|
||||
response_letta_messages = [m for m in response_letta_messages if m.message_type in include_return_message_types]
|
||||
# Set context_tokens to expose actual context window usage (vs accumulated prompt_tokens)
|
||||
self.usage.context_tokens = self.context_token_estimate
|
||||
result = LettaResponse(messages=response_letta_messages, stop_reason=self.stop_reason, usage=self.usage)
|
||||
if run_id:
|
||||
if self.job_update_metadata is None:
|
||||
@@ -480,6 +482,9 @@ class LettaAgentV3(LettaAgentV2):
|
||||
|
||||
# Cleanup and finalize (only runs if no exception occurred)
|
||||
try:
|
||||
# Set context_tokens to expose actual context window usage (vs accumulated prompt_tokens)
|
||||
self.usage.context_tokens = self.context_token_estimate
|
||||
|
||||
if run_id:
|
||||
# Filter out LettaStopReason from messages (only valid in LettaStreamingResponse, not LettaResponse)
|
||||
filtered_messages = [m for m in response_letta_messages if not isinstance(m, LettaStopReason)]
|
||||
|
||||
@@ -127,6 +127,12 @@ class LettaUsageStatistics(BaseModel):
|
||||
None, description="The number of reasoning/thinking tokens generated. None if not reported by provider."
|
||||
)
|
||||
|
||||
# Context window tracking
|
||||
context_tokens: Optional[int] = Field(
|
||||
None,
|
||||
description="Estimate of tokens currently in the context window.",
|
||||
)
|
||||
|
||||
def to_usage(self, provider_type: Optional["ProviderType"] = None) -> "UsageStatistics":
|
||||
"""Convert to UsageStatistics (OpenAI-compatible format).
|
||||
|
||||
|
||||
Reference in New Issue
Block a user