fix(core): patch usage data tracking for anthropic when context caching is on (#6997)

This commit is contained in:
Charles Packer
2025-12-14 18:14:09 -08:00
committed by Caren Thomas
parent a731e01e88
commit 33d39f4643
4 changed files with 46 additions and 8 deletions

View File

@@ -117,15 +117,47 @@ class LettaLLMStreamAdapter(LettaLLMAdapter):
if not output_tokens and hasattr(self.interface, "fallback_output_tokens"):
output_tokens = self.interface.fallback_output_tokens
# NOTE: For Anthropic, input_tokens is NON-cached only, so total_tokens here
# undercounts the actual total (missing cache_read + cache_creation tokens).
# For OpenAI/Gemini, input_tokens is already the total, so this is correct.
# See simple_llm_stream_adapter.py for the proper provider-aware calculation.
# Extract cache token data (OpenAI/Gemini use cached_tokens, Anthropic uses cache_read_tokens)
# None means provider didn't report, 0 means provider reported 0
cached_input_tokens = None
if hasattr(self.interface, "cached_tokens") and self.interface.cached_tokens is not None:
cached_input_tokens = self.interface.cached_tokens
elif hasattr(self.interface, "cache_read_tokens") and self.interface.cache_read_tokens is not None:
cached_input_tokens = self.interface.cache_read_tokens
# Extract cache write tokens (Anthropic only)
cache_write_tokens = None
if hasattr(self.interface, "cache_creation_tokens") and self.interface.cache_creation_tokens is not None:
cache_write_tokens = self.interface.cache_creation_tokens
# Extract reasoning tokens (OpenAI o1/o3 models use reasoning_tokens, Gemini uses thinking_tokens)
reasoning_tokens = None
if hasattr(self.interface, "reasoning_tokens") and self.interface.reasoning_tokens is not None:
reasoning_tokens = self.interface.reasoning_tokens
elif hasattr(self.interface, "thinking_tokens") and self.interface.thinking_tokens is not None:
reasoning_tokens = self.interface.thinking_tokens
# Calculate actual total input tokens
#
# ANTHROPIC: input_tokens is NON-cached only, must add cache tokens
# Total = input_tokens + cache_read_input_tokens + cache_creation_input_tokens
#
# OPENAI/GEMINI: input_tokens is already TOTAL
# cached_tokens is a subset, NOT additive
is_anthropic = hasattr(self.interface, "cache_read_tokens") or hasattr(self.interface, "cache_creation_tokens")
if is_anthropic:
actual_input_tokens = (input_tokens or 0) + (cached_input_tokens or 0) + (cache_write_tokens or 0)
else:
actual_input_tokens = input_tokens or 0
self.usage = LettaUsageStatistics(
step_count=1,
completion_tokens=output_tokens or 0,
prompt_tokens=input_tokens or 0,
total_tokens=(input_tokens or 0) + (output_tokens or 0),
prompt_tokens=actual_input_tokens,
total_tokens=actual_input_tokens + (output_tokens or 0),
cached_input_tokens=cached_input_tokens,
cache_write_tokens=cache_write_tokens,
reasoning_tokens=reasoning_tokens,
)
else:
# Default usage statistics if not available