feat: add tracking of advanced usage data (eg caching) [LET-6372] (#6449)
* feat: init refactor * feat: add helper code * fix: missing file + test * fix: just state/publish api
This commit is contained in:
committed by
Caren Thomas
parent
807c5c18d9
commit
131891e05f
@@ -4,6 +4,7 @@ from letta.adapters.letta_llm_request_adapter import LettaLLMRequestAdapter
|
||||
from letta.helpers.datetime_helpers import get_utc_timestamp_ns
|
||||
from letta.schemas.letta_message import LettaMessage
|
||||
from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, TextContent
|
||||
from letta.schemas.usage import normalize_cache_tokens, normalize_reasoning_tokens
|
||||
|
||||
|
||||
class SimpleLLMRequestAdapter(LettaLLMRequestAdapter):
|
||||
@@ -85,6 +86,11 @@ class SimpleLLMRequestAdapter(LettaLLMRequestAdapter):
|
||||
self.usage.prompt_tokens = self.chat_completions_response.usage.prompt_tokens
|
||||
self.usage.total_tokens = self.chat_completions_response.usage.total_tokens
|
||||
|
||||
# Extract cache and reasoning token details using normalized helpers
|
||||
usage = self.chat_completions_response.usage
|
||||
self.usage.cached_input_tokens, self.usage.cache_write_tokens = normalize_cache_tokens(usage.prompt_tokens_details)
|
||||
self.usage.reasoning_tokens = normalize_reasoning_tokens(usage.completion_tokens_details)
|
||||
|
||||
self.log_provider_trace(step_id=step_id, actor=actor)
|
||||
|
||||
yield None
|
||||
|
||||
@@ -158,11 +158,34 @@ class SimpleLLMStreamAdapter(LettaLLMStreamAdapter):
|
||||
if not output_tokens and hasattr(self.interface, "fallback_output_tokens"):
|
||||
output_tokens = self.interface.fallback_output_tokens
|
||||
|
||||
# Extract cache token data (OpenAI/Gemini use cached_tokens)
|
||||
cached_input_tokens = 0
|
||||
if hasattr(self.interface, "cached_tokens") and self.interface.cached_tokens:
|
||||
cached_input_tokens = self.interface.cached_tokens
|
||||
# Anthropic uses cache_read_tokens for cache hits
|
||||
elif hasattr(self.interface, "cache_read_tokens") and self.interface.cache_read_tokens:
|
||||
cached_input_tokens = self.interface.cache_read_tokens
|
||||
|
||||
# Extract cache write tokens (Anthropic only)
|
||||
cache_write_tokens = 0
|
||||
if hasattr(self.interface, "cache_creation_tokens") and self.interface.cache_creation_tokens:
|
||||
cache_write_tokens = self.interface.cache_creation_tokens
|
||||
|
||||
# Extract reasoning tokens (OpenAI o1/o3 models use reasoning_tokens, Gemini uses thinking_tokens)
|
||||
reasoning_tokens = 0
|
||||
if hasattr(self.interface, "reasoning_tokens") and self.interface.reasoning_tokens:
|
||||
reasoning_tokens = self.interface.reasoning_tokens
|
||||
elif hasattr(self.interface, "thinking_tokens") and self.interface.thinking_tokens:
|
||||
reasoning_tokens = self.interface.thinking_tokens
|
||||
|
||||
self.usage = LettaUsageStatistics(
|
||||
step_count=1,
|
||||
completion_tokens=output_tokens or 0,
|
||||
prompt_tokens=input_tokens or 0,
|
||||
total_tokens=(input_tokens or 0) + (output_tokens or 0),
|
||||
cached_input_tokens=cached_input_tokens,
|
||||
cache_write_tokens=cache_write_tokens,
|
||||
reasoning_tokens=reasoning_tokens,
|
||||
)
|
||||
else:
|
||||
# Default usage statistics if not available
|
||||
|
||||
Reference in New Issue
Block a user