From eaeac54798ce2a76ac107331b8f4a66e027d2c56 Mon Sep 17 00:00:00 2001
From: cthomas <caren@letta.com>
Date: Sat, 24 May 2025 09:42:34 -0700
Subject: [PATCH] fix: google clients thinking config (#2414)

Co-authored-by: Sarah Wooders <sarahwooders@gmail.com>
---
 letta/llm_api/google_ai_client.py     | 15 ---------------
 letta/llm_api/google_vertex_client.py | 11 ++++++-----
 2 files changed, 6 insertions(+), 20 deletions(-)

diff --git a/letta/llm_api/google_ai_client.py b/letta/llm_api/google_ai_client.py
index f1d8e091..47671398 100644
--- a/letta/llm_api/google_ai_client.py
+++ b/letta/llm_api/google_ai_client.py
@@ -7,10 +7,7 @@ from letta.errors import ErrorCode, LLMAuthenticationError, LLMError
 from letta.llm_api.google_constants import GOOGLE_MODEL_FOR_API_KEY_CHECK
 from letta.llm_api.google_vertex_client import GoogleVertexClient
 from letta.log import get_logger
-from letta.schemas.llm_config import LLMConfig
-from letta.schemas.message import Message as PydanticMessage
 from letta.settings import model_settings
-from letta.tracing import trace_method
 
 logger = get_logger(__name__)
 
@@ -20,18 +17,6 @@ class GoogleAIClient(GoogleVertexClient):
     def _get_client(self):
         return genai.Client(api_key=model_settings.gemini_api_key)
 
-    @trace_method
-    def build_request_data(
-        self,
-        messages: List[PydanticMessage],
-        llm_config: LLMConfig,
-        tools: List[dict],
-        force_tool_call: Optional[str] = None,
-    ) -> dict:
-        request = super().build_request_data(messages, llm_config, tools, force_tool_call)
-        del request["config"]["thinking_config"]
-        return request
-
 
 def get_gemini_endpoint_and_headers(
     base_url: str, model: Optional[str], api_key: str, key_in_header: bool = True, generate_content: bool = False
diff --git a/letta/llm_api/google_vertex_client.py b/letta/llm_api/google_vertex_client.py
index afc80ebd..48c9324d 100644
--- a/letta/llm_api/google_vertex_client.py
+++ b/letta/llm_api/google_vertex_client.py
@@ -241,13 +241,14 @@ class GoogleVertexClient(LLMClientBase):
             )
             request_data["config"]["tool_config"] = tool_config.model_dump()
 
-        # Add thinking_config
+        # Add thinking_config for flash
         # If enable_reasoner is False, set thinking_budget to 0
         # Otherwise, use the value from max_reasoning_tokens
-        thinking_config = ThinkingConfig(
-            thinking_budget=llm_config.max_reasoning_tokens if llm_config.enable_reasoner else 0,
-        )
-        request_data["config"]["thinking_config"] = thinking_config.model_dump()
+        if "flash" in llm_config.model:
+            thinking_config = ThinkingConfig(
+                thinking_budget=llm_config.max_reasoning_tokens if llm_config.enable_reasoner else 0,
+            )
+            request_data["config"]["thinking_config"] = thinking_config.model_dump()
 
         return request_data