From e142d440d5aeea4da5bf629aab5637a57713f3e5 Mon Sep 17 00:00:00 2001 From: Charles Packer Date: Fri, 28 Nov 2025 18:52:21 -0800 Subject: [PATCH] fix: patch gemini token counting (#6445) fix: use usage_metadata.candidates_token_count for counting total tokens --- letta/interfaces/gemini_streaming_interface.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/letta/interfaces/gemini_streaming_interface.py b/letta/interfaces/gemini_streaming_interface.py index c7245a8e..020dbad1 100644 --- a/letta/interfaces/gemini_streaming_interface.py +++ b/letta/interfaces/gemini_streaming_interface.py @@ -166,8 +166,11 @@ class SimpleGeminiStreamingInterface: if usage_metadata: if usage_metadata.prompt_token_count: self.input_tokens = usage_metadata.prompt_token_count - if usage_metadata.total_token_count: - self.output_tokens = usage_metadata.total_token_count - usage_metadata.prompt_token_count + # Use candidates_token_count directly for output tokens. + # Do NOT use (total_token_count - prompt_token_count) as that incorrectly + # includes thinking/reasoning tokens which can be 10-100x the actual output. + if usage_metadata.candidates_token_count: + self.output_tokens = usage_metadata.candidates_token_count if not event.candidates or len(event.candidates) == 0: return