feat: add tracking of advanced usage data (eg caching) [LET-6372] (#6449)

* feat: init refactor

* feat: add helper code

* fix: missing file + test

* fix: just state/publish api
This commit is contained in:
Charles Packer
2025-11-28 21:21:20 -08:00
committed by Caren Thomas
parent 807c5c18d9
commit 131891e05f
19 changed files with 895 additions and 9 deletions

View File

@@ -4,6 +4,7 @@ from letta.adapters.letta_llm_request_adapter import LettaLLMRequestAdapter
from letta.helpers.datetime_helpers import get_utc_timestamp_ns
from letta.schemas.letta_message import LettaMessage
from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, TextContent
from letta.schemas.usage import normalize_cache_tokens, normalize_reasoning_tokens
class SimpleLLMRequestAdapter(LettaLLMRequestAdapter):
@@ -85,6 +86,11 @@ class SimpleLLMRequestAdapter(LettaLLMRequestAdapter):
self.usage.prompt_tokens = self.chat_completions_response.usage.prompt_tokens
self.usage.total_tokens = self.chat_completions_response.usage.total_tokens
# Extract cache and reasoning token details using normalized helpers
usage = self.chat_completions_response.usage
self.usage.cached_input_tokens, self.usage.cache_write_tokens = normalize_cache_tokens(usage.prompt_tokens_details)
self.usage.reasoning_tokens = normalize_reasoning_tokens(usage.completion_tokens_details)
self.log_provider_trace(step_id=step_id, actor=actor)
yield None

View File

@@ -158,11 +158,34 @@ class SimpleLLMStreamAdapter(LettaLLMStreamAdapter):
if not output_tokens and hasattr(self.interface, "fallback_output_tokens"):
output_tokens = self.interface.fallback_output_tokens
# Extract cache token data (OpenAI/Gemini use cached_tokens)
cached_input_tokens = 0
if hasattr(self.interface, "cached_tokens") and self.interface.cached_tokens:
cached_input_tokens = self.interface.cached_tokens
# Anthropic uses cache_read_tokens for cache hits
elif hasattr(self.interface, "cache_read_tokens") and self.interface.cache_read_tokens:
cached_input_tokens = self.interface.cache_read_tokens
# Extract cache write tokens (Anthropic only)
cache_write_tokens = 0
if hasattr(self.interface, "cache_creation_tokens") and self.interface.cache_creation_tokens:
cache_write_tokens = self.interface.cache_creation_tokens
# Extract reasoning tokens (OpenAI o1/o3 models use reasoning_tokens, Gemini uses thinking_tokens)
reasoning_tokens = 0
if hasattr(self.interface, "reasoning_tokens") and self.interface.reasoning_tokens:
reasoning_tokens = self.interface.reasoning_tokens
elif hasattr(self.interface, "thinking_tokens") and self.interface.thinking_tokens:
reasoning_tokens = self.interface.thinking_tokens
self.usage = LettaUsageStatistics(
step_count=1,
completion_tokens=output_tokens or 0,
prompt_tokens=input_tokens or 0,
total_tokens=(input_tokens or 0) + (output_tokens or 0),
cached_input_tokens=cached_input_tokens,
cache_write_tokens=cache_write_tokens,
reasoning_tokens=reasoning_tokens,
)
else:
# Default usage statistics if not available