fix(core): distinguish between null and 0 for prompt caching (#6451)

* fix(core): distinguish between null and 0 for prompt caching

* fix: runtime errors

* fix: just publish just sgate
This commit is contained in:
Charles Packer
2025-11-29 00:09:43 -08:00
committed by Caren Thomas
parent 131891e05f
commit 88a3743cc8
10 changed files with 182 additions and 84 deletions

View File

@@ -631,8 +631,12 @@ class GoogleVertexClient(LLMClientBase):
# }
if response.usage_metadata:
# Extract cache token data if available (Gemini uses cached_content_token_count)
# Use `is not None` to capture 0 values (meaning "provider reported 0 cached tokens")
prompt_tokens_details = None
if hasattr(response.usage_metadata, "cached_content_token_count") and response.usage_metadata.cached_content_token_count:
if (
hasattr(response.usage_metadata, "cached_content_token_count")
and response.usage_metadata.cached_content_token_count is not None
):
from letta.schemas.openai.chat_completion_response import UsageStatisticsPromptTokenDetails
prompt_tokens_details = UsageStatisticsPromptTokenDetails(
@@ -640,8 +644,9 @@ class GoogleVertexClient(LLMClientBase):
)
# Extract thinking/reasoning token data if available (Gemini uses thoughts_token_count)
# Use `is not None` to capture 0 values (meaning "provider reported 0 reasoning tokens")
completion_tokens_details = None
if hasattr(response.usage_metadata, "thoughts_token_count") and response.usage_metadata.thoughts_token_count:
if hasattr(response.usage_metadata, "thoughts_token_count") and response.usage_metadata.thoughts_token_count is not None:
from letta.schemas.openai.chat_completion_response import UsageStatisticsCompletionTokenDetails
completion_tokens_details = UsageStatisticsCompletionTokenDetails(