From eaeac54798ce2a76ac107331b8f4a66e027d2c56 Mon Sep 17 00:00:00 2001 From: cthomas Date: Sat, 24 May 2025 09:42:34 -0700 Subject: [PATCH] fix: google clients thinking config (#2414) Co-authored-by: Sarah Wooders --- letta/llm_api/google_ai_client.py | 15 --------------- letta/llm_api/google_vertex_client.py | 11 ++++++----- 2 files changed, 6 insertions(+), 20 deletions(-) diff --git a/letta/llm_api/google_ai_client.py b/letta/llm_api/google_ai_client.py index f1d8e091..47671398 100644 --- a/letta/llm_api/google_ai_client.py +++ b/letta/llm_api/google_ai_client.py @@ -7,10 +7,7 @@ from letta.errors import ErrorCode, LLMAuthenticationError, LLMError from letta.llm_api.google_constants import GOOGLE_MODEL_FOR_API_KEY_CHECK from letta.llm_api.google_vertex_client import GoogleVertexClient from letta.log import get_logger -from letta.schemas.llm_config import LLMConfig -from letta.schemas.message import Message as PydanticMessage from letta.settings import model_settings -from letta.tracing import trace_method logger = get_logger(__name__) @@ -20,18 +17,6 @@ class GoogleAIClient(GoogleVertexClient): def _get_client(self): return genai.Client(api_key=model_settings.gemini_api_key) - @trace_method - def build_request_data( - self, - messages: List[PydanticMessage], - llm_config: LLMConfig, - tools: List[dict], - force_tool_call: Optional[str] = None, - ) -> dict: - request = super().build_request_data(messages, llm_config, tools, force_tool_call) - del request["config"]["thinking_config"] - return request - def get_gemini_endpoint_and_headers( base_url: str, model: Optional[str], api_key: str, key_in_header: bool = True, generate_content: bool = False diff --git a/letta/llm_api/google_vertex_client.py b/letta/llm_api/google_vertex_client.py index afc80ebd..48c9324d 100644 --- a/letta/llm_api/google_vertex_client.py +++ b/letta/llm_api/google_vertex_client.py @@ -241,13 +241,14 @@ class GoogleVertexClient(LLMClientBase): ) request_data["config"]["tool_config"] = tool_config.model_dump() - # Add thinking_config + # Add thinking_config for flash # If enable_reasoner is False, set thinking_budget to 0 # Otherwise, use the value from max_reasoning_tokens - thinking_config = ThinkingConfig( - thinking_budget=llm_config.max_reasoning_tokens if llm_config.enable_reasoner else 0, - ) - request_data["config"]["thinking_config"] = thinking_config.model_dump() + if "flash" in llm_config.model: + thinking_config = ThinkingConfig( + thinking_budget=llm_config.max_reasoning_tokens if llm_config.enable_reasoner else 0, + ) + request_data["config"]["thinking_config"] = thinking_config.model_dump() return request_data