from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Tuple, Union from pydantic import BaseModel, Field from letta.schemas.message import Message if TYPE_CHECKING: from letta.schemas.openai.chat_completion_response import ( UsageStatisticsCompletionTokenDetails, UsageStatisticsPromptTokenDetails, ) def normalize_cache_tokens( prompt_details: Union["UsageStatisticsPromptTokenDetails", Dict[str, Any], None], ) -> Tuple[int, int]: """ Extract normalized cache token counts from provider-specific prompt details. Handles both Pydantic model objects (from adapters) and dict objects (from database). Provider mappings: - OpenAI/Gemini: cached_tokens -> cached_input_tokens - Anthropic: cache_read_tokens -> cached_input_tokens, cache_creation_tokens -> cache_write_tokens Args: prompt_details: Provider-specific prompt token details (model or dict) Returns: Tuple of (cached_input_tokens, cache_write_tokens) """ if prompt_details is None: return 0, 0 # Handle dict (from database storage) if isinstance(prompt_details, dict): cached_input = 0 if prompt_details.get("cached_tokens"): cached_input = prompt_details.get("cached_tokens", 0) elif prompt_details.get("cache_read_tokens"): cached_input = prompt_details.get("cache_read_tokens", 0) cache_write = prompt_details.get("cache_creation_tokens", 0) or 0 return cached_input, cache_write # Handle Pydantic model (from adapters) cached_input = 0 if hasattr(prompt_details, "cached_tokens") and prompt_details.cached_tokens: cached_input = prompt_details.cached_tokens elif hasattr(prompt_details, "cache_read_tokens") and prompt_details.cache_read_tokens: cached_input = prompt_details.cache_read_tokens cache_write = 0 if hasattr(prompt_details, "cache_creation_tokens") and prompt_details.cache_creation_tokens: cache_write = prompt_details.cache_creation_tokens return cached_input, cache_write def normalize_reasoning_tokens( completion_details: Union["UsageStatisticsCompletionTokenDetails", Dict[str, Any], None], ) -> int: """ Extract normalized reasoning token count from provider-specific completion details. Handles both Pydantic model objects (from adapters) and dict objects (from database). Provider mappings: - OpenAI: completion_tokens_details.reasoning_tokens - Gemini: thoughts_token_count (mapped to reasoning_tokens in UsageStatistics) - Anthropic: thinking tokens are included in completion_tokens, not separately tracked Args: completion_details: Provider-specific completion token details (model or dict) Returns: The reasoning token count """ if completion_details is None: return 0 # Handle dict (from database storage) if isinstance(completion_details, dict): return completion_details.get("reasoning_tokens", 0) or 0 # Handle Pydantic model (from adapters) if hasattr(completion_details, "reasoning_tokens") and completion_details.reasoning_tokens: return completion_details.reasoning_tokens return 0 class LettaUsageStatistics(BaseModel): """ Usage statistics for the agent interaction. Attributes: completion_tokens (int): The number of tokens generated by the agent. prompt_tokens (int): The number of tokens in the prompt. total_tokens (int): The total number of tokens processed by the agent. step_count (int): The number of steps taken by the agent. cached_input_tokens (Optional[int]): The number of input tokens served from cache. None if not reported. cache_write_tokens (Optional[int]): The number of input tokens written to cache. None if not reported. reasoning_tokens (Optional[int]): The number of reasoning/thinking tokens generated. None if not reported. """ message_type: Literal["usage_statistics"] = "usage_statistics" completion_tokens: int = Field(0, description="The number of tokens generated by the agent.") prompt_tokens: int = Field(0, description="The number of tokens in the prompt.") total_tokens: int = Field(0, description="The total number of tokens processed by the agent.") step_count: int = Field(0, description="The number of steps taken by the agent.") # TODO: Optional for now. This field makes everyone's lives easier run_ids: Optional[List[str]] = Field(None, description="The background task run IDs associated with the agent interaction") # Cache tracking (common across providers) # None means provider didn't report this data, 0 means provider reported 0 cached_input_tokens: Optional[int] = Field( None, description="The number of input tokens served from cache. None if not reported by provider." ) cache_write_tokens: Optional[int] = Field( None, description="The number of input tokens written to cache (Anthropic only). None if not reported by provider." ) # Reasoning token tracking # None means provider didn't report this data, 0 means provider reported 0 reasoning_tokens: Optional[int] = Field( None, description="The number of reasoning/thinking tokens generated. None if not reported by provider." ) def to_usage(self, provider_type: Optional["ProviderType"] = None) -> "UsageStatistics": """Convert to UsageStatistics (OpenAI-compatible format). Args: provider_type: ProviderType enum indicating which provider format to use. Used to determine which cache field to populate. Returns: UsageStatistics object with nested prompt/completion token details. """ from letta.schemas.enums import ProviderType from letta.schemas.openai.chat_completion_response import ( UsageStatistics, UsageStatisticsCompletionTokenDetails, UsageStatisticsPromptTokenDetails, ) # Providers that use Anthropic-style cache fields (cache_read_tokens, cache_creation_tokens) anthropic_style_providers = {ProviderType.anthropic, ProviderType.bedrock} # Build prompt_tokens_details if we have cache data prompt_tokens_details = None if self.cached_input_tokens is not None or self.cache_write_tokens is not None: if provider_type in anthropic_style_providers: # Anthropic uses cache_read_tokens and cache_creation_tokens prompt_tokens_details = UsageStatisticsPromptTokenDetails( cache_read_tokens=self.cached_input_tokens, cache_creation_tokens=self.cache_write_tokens, ) else: # OpenAI/Gemini use cached_tokens prompt_tokens_details = UsageStatisticsPromptTokenDetails( cached_tokens=self.cached_input_tokens, ) # Build completion_tokens_details if we have reasoning tokens completion_tokens_details = None if self.reasoning_tokens is not None: completion_tokens_details = UsageStatisticsCompletionTokenDetails( reasoning_tokens=self.reasoning_tokens, ) return UsageStatistics( prompt_tokens=self.prompt_tokens, completion_tokens=self.completion_tokens, total_tokens=self.total_tokens, prompt_tokens_details=prompt_tokens_details, completion_tokens_details=completion_tokens_details, )