Files
letta-server/letta/schemas/usage.py
Kian Jones f5c4ab50f4 chore: add ty + pre-commit hook and repeal even more ruff rules (#9504)
* auto fixes

* auto fix pt2 and transitive deps and undefined var checking locals()

* manual fixes (ignored or letta-code fixed)

* fix circular import

* remove all ignores, add FastAPI rules and Ruff rules

* add ty and precommit

* ruff stuff

* ty check fixes

* ty check fixes pt 2

* error on invalid
2026-02-24 10:55:11 -08:00

185 lines
7.7 KiB
Python

from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Tuple, Union
from pydantic import BaseModel, Field
if TYPE_CHECKING:
from letta.schemas.enums import ProviderType
from letta.schemas.openai.chat_completion_response import (
UsageStatistics,
UsageStatisticsCompletionTokenDetails,
UsageStatisticsPromptTokenDetails,
)
def normalize_cache_tokens(
prompt_details: Union["UsageStatisticsPromptTokenDetails", Dict[str, Any], None],
) -> Tuple[int, int]:
"""
Extract normalized cache token counts from provider-specific prompt details.
Handles both Pydantic model objects (from adapters) and dict objects (from database).
Provider mappings:
- OpenAI/Gemini: cached_tokens -> cached_input_tokens
- Anthropic: cache_read_tokens -> cached_input_tokens, cache_creation_tokens -> cache_write_tokens
Args:
prompt_details: Provider-specific prompt token details (model or dict)
Returns:
Tuple of (cached_input_tokens, cache_write_tokens)
"""
if prompt_details is None:
return 0, 0
# Handle dict (from database storage)
if isinstance(prompt_details, dict):
cached_input = 0
if prompt_details.get("cached_tokens"):
cached_input = prompt_details.get("cached_tokens", 0)
elif prompt_details.get("cache_read_tokens"):
cached_input = prompt_details.get("cache_read_tokens", 0)
cache_write = prompt_details.get("cache_creation_tokens", 0) or 0
return cached_input, cache_write
# Handle Pydantic model (from adapters)
cached_input = 0
if hasattr(prompt_details, "cached_tokens") and prompt_details.cached_tokens:
cached_input = prompt_details.cached_tokens
elif hasattr(prompt_details, "cache_read_tokens") and prompt_details.cache_read_tokens:
cached_input = prompt_details.cache_read_tokens
cache_write = 0
if hasattr(prompt_details, "cache_creation_tokens") and prompt_details.cache_creation_tokens:
cache_write = prompt_details.cache_creation_tokens
return cached_input, cache_write
def normalize_reasoning_tokens(
completion_details: Union["UsageStatisticsCompletionTokenDetails", Dict[str, Any], None],
) -> int:
"""
Extract normalized reasoning token count from provider-specific completion details.
Handles both Pydantic model objects (from adapters) and dict objects (from database).
Provider mappings:
- OpenAI: completion_tokens_details.reasoning_tokens
- Gemini: thoughts_token_count (mapped to reasoning_tokens in UsageStatistics)
- Anthropic: thinking tokens are included in completion_tokens, not separately tracked
Args:
completion_details: Provider-specific completion token details (model or dict)
Returns:
The reasoning token count
"""
if completion_details is None:
return 0
# Handle dict (from database storage)
if isinstance(completion_details, dict):
return completion_details.get("reasoning_tokens", 0) or 0
# Handle Pydantic model (from adapters)
if hasattr(completion_details, "reasoning_tokens") and completion_details.reasoning_tokens:
return completion_details.reasoning_tokens
return 0
class LettaUsageStatistics(BaseModel):
"""
Usage statistics for the agent interaction.
Attributes:
completion_tokens (int): The number of tokens generated by the agent.
prompt_tokens (int): The number of tokens in the prompt.
total_tokens (int): The total number of tokens processed by the agent.
step_count (int): The number of steps taken by the agent.
cached_input_tokens (Optional[int]): The number of input tokens served from cache. None if not reported.
cache_write_tokens (Optional[int]): The number of input tokens written to cache. None if not reported.
reasoning_tokens (Optional[int]): The number of reasoning/thinking tokens generated. None if not reported.
"""
message_type: Literal["usage_statistics"] = "usage_statistics"
completion_tokens: int = Field(0, description="The number of tokens generated by the agent.")
prompt_tokens: int = Field(0, description="The number of tokens in the prompt.")
total_tokens: int = Field(0, description="The total number of tokens processed by the agent.")
step_count: int = Field(0, description="The number of steps taken by the agent.")
# TODO: Optional for now. This field makes everyone's lives easier
run_ids: Optional[List[str]] = Field(None, description="The background task run IDs associated with the agent interaction")
# Cache tracking (common across providers)
# None means provider didn't report this data, 0 means provider reported 0
cached_input_tokens: Optional[int] = Field(
None, description="The number of input tokens served from cache. None if not reported by provider."
)
cache_write_tokens: Optional[int] = Field(
None, description="The number of input tokens written to cache (Anthropic only). None if not reported by provider."
)
# Reasoning token tracking
# None means provider didn't report this data, 0 means provider reported 0
reasoning_tokens: Optional[int] = Field(
None, description="The number of reasoning/thinking tokens generated. None if not reported by provider."
)
# Context window tracking
context_tokens: Optional[int] = Field(
None,
description="Estimate of tokens currently in the context window.",
)
def to_usage(self, provider_type: Optional["ProviderType"] = None) -> "UsageStatistics":
"""Convert to UsageStatistics (OpenAI-compatible format).
Args:
provider_type: ProviderType enum indicating which provider format to use.
Used to determine which cache field to populate.
Returns:
UsageStatistics object with nested prompt/completion token details.
"""
from letta.schemas.enums import ProviderType
from letta.schemas.openai.chat_completion_response import (
UsageStatistics,
UsageStatisticsCompletionTokenDetails,
UsageStatisticsPromptTokenDetails,
)
# Providers that use Anthropic-style cache fields (cache_read_tokens, cache_creation_tokens)
anthropic_style_providers = {ProviderType.anthropic, ProviderType.bedrock}
# Build prompt_tokens_details if we have cache data
prompt_tokens_details = None
if self.cached_input_tokens is not None or self.cache_write_tokens is not None:
if provider_type in anthropic_style_providers:
# Anthropic uses cache_read_tokens and cache_creation_tokens
prompt_tokens_details = UsageStatisticsPromptTokenDetails(
cache_read_tokens=self.cached_input_tokens,
cache_creation_tokens=self.cache_write_tokens,
)
else:
# OpenAI/Gemini use cached_tokens
prompt_tokens_details = UsageStatisticsPromptTokenDetails(
cached_tokens=self.cached_input_tokens,
)
# Build completion_tokens_details if we have reasoning tokens
completion_tokens_details = None
if self.reasoning_tokens is not None:
completion_tokens_details = UsageStatisticsCompletionTokenDetails(
reasoning_tokens=self.reasoning_tokens,
)
return UsageStatistics(
prompt_tokens=self.prompt_tokens,
completion_tokens=self.completion_tokens,
total_tokens=self.total_tokens,
prompt_tokens_details=prompt_tokens_details,
completion_tokens_details=completion_tokens_details,
)