fix(core): patch usage data tracking for anthropic when context caching is on (#6997)
This commit is contained in:
committed by
Caren Thomas
parent
a731e01e88
commit
33d39f4643
@@ -117,15 +117,47 @@ class LettaLLMStreamAdapter(LettaLLMAdapter):
|
||||
if not output_tokens and hasattr(self.interface, "fallback_output_tokens"):
|
||||
output_tokens = self.interface.fallback_output_tokens
|
||||
|
||||
# NOTE: For Anthropic, input_tokens is NON-cached only, so total_tokens here
|
||||
# undercounts the actual total (missing cache_read + cache_creation tokens).
|
||||
# For OpenAI/Gemini, input_tokens is already the total, so this is correct.
|
||||
# See simple_llm_stream_adapter.py for the proper provider-aware calculation.
|
||||
# Extract cache token data (OpenAI/Gemini use cached_tokens, Anthropic uses cache_read_tokens)
|
||||
# None means provider didn't report, 0 means provider reported 0
|
||||
cached_input_tokens = None
|
||||
if hasattr(self.interface, "cached_tokens") and self.interface.cached_tokens is not None:
|
||||
cached_input_tokens = self.interface.cached_tokens
|
||||
elif hasattr(self.interface, "cache_read_tokens") and self.interface.cache_read_tokens is not None:
|
||||
cached_input_tokens = self.interface.cache_read_tokens
|
||||
|
||||
# Extract cache write tokens (Anthropic only)
|
||||
cache_write_tokens = None
|
||||
if hasattr(self.interface, "cache_creation_tokens") and self.interface.cache_creation_tokens is not None:
|
||||
cache_write_tokens = self.interface.cache_creation_tokens
|
||||
|
||||
# Extract reasoning tokens (OpenAI o1/o3 models use reasoning_tokens, Gemini uses thinking_tokens)
|
||||
reasoning_tokens = None
|
||||
if hasattr(self.interface, "reasoning_tokens") and self.interface.reasoning_tokens is not None:
|
||||
reasoning_tokens = self.interface.reasoning_tokens
|
||||
elif hasattr(self.interface, "thinking_tokens") and self.interface.thinking_tokens is not None:
|
||||
reasoning_tokens = self.interface.thinking_tokens
|
||||
|
||||
# Calculate actual total input tokens
|
||||
#
|
||||
# ANTHROPIC: input_tokens is NON-cached only, must add cache tokens
|
||||
# Total = input_tokens + cache_read_input_tokens + cache_creation_input_tokens
|
||||
#
|
||||
# OPENAI/GEMINI: input_tokens is already TOTAL
|
||||
# cached_tokens is a subset, NOT additive
|
||||
is_anthropic = hasattr(self.interface, "cache_read_tokens") or hasattr(self.interface, "cache_creation_tokens")
|
||||
if is_anthropic:
|
||||
actual_input_tokens = (input_tokens or 0) + (cached_input_tokens or 0) + (cache_write_tokens or 0)
|
||||
else:
|
||||
actual_input_tokens = input_tokens or 0
|
||||
|
||||
self.usage = LettaUsageStatistics(
|
||||
step_count=1,
|
||||
completion_tokens=output_tokens or 0,
|
||||
prompt_tokens=input_tokens or 0,
|
||||
total_tokens=(input_tokens or 0) + (output_tokens or 0),
|
||||
prompt_tokens=actual_input_tokens,
|
||||
total_tokens=actual_input_tokens + (output_tokens or 0),
|
||||
cached_input_tokens=cached_input_tokens,
|
||||
cache_write_tokens=cache_write_tokens,
|
||||
reasoning_tokens=reasoning_tokens,
|
||||
)
|
||||
else:
|
||||
# Default usage statistics if not available
|
||||
|
||||
Reference in New Issue
Block a user