From 9adeb122cd1d173b56deba5b71426d5b0ffaa679 Mon Sep 17 00:00:00 2001
From: jnjpng <jin@letta.com>
Date: Fri, 21 Nov 2025 17:06:18 -0800
Subject: [PATCH] feat: set thinking level high for gemini 3 (#6330)

* base

* include thoughts

* update

* clean

---------

Co-authored-by: Letta Bot <noreply@letta.com>
---
 letta/llm_api/google_vertex_client.py | 32 ++++++++++++++++-----------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/letta/llm_api/google_vertex_client.py b/letta/llm_api/google_vertex_client.py
index 70a6704d..b9c19e83 100644
--- a/letta/llm_api/google_vertex_client.py
+++ b/letta/llm_api/google_vertex_client.py
@@ -374,22 +374,28 @@ class GoogleVertexClient(LLMClientBase):
         # If enable_reasoner is False, set thinking_budget to 0
         # Otherwise, use the value from max_reasoning_tokens
         if self.is_reasoning_model(llm_config) or "flash" in llm_config.model:
-            # Gemini reasoning models may fail to call tools even with FunctionCallingConfigMode.ANY if thinking is fully disabled, set to minimum to prevent tool call failure
-            thinking_budget = llm_config.max_reasoning_tokens if llm_config.enable_reasoner else self.get_thinking_budget(llm_config.model)
-            if thinking_budget <= 0:
-                logger.warning(
-                    f"Thinking budget of {thinking_budget} for Gemini reasoning model {llm_config.model}, this will likely cause tool call failures"
+            if llm_config.model.startswith("gemini-3"):
+                # letting thinking_level to default to high by not specifying thinking_budget
+                thinking_config = ThinkingConfig(include_thoughts=True)
+            else:
+                # Gemini reasoning models may fail to call tools even with FunctionCallingConfigMode.ANY if thinking is fully disabled, set to minimum to prevent tool call failure
+                thinking_budget = (
+                    llm_config.max_reasoning_tokens if llm_config.enable_reasoner else self.get_thinking_budget(llm_config.model)
                 )
-                # For models that require thinking mode (2.5 Pro, 3.x), override with minimum valid budget
-                if llm_config.model.startswith("gemini-2.5-pro") or llm_config.model.startswith("gemini-3"):
-                    thinking_budget = 128
+                if thinking_budget <= 0:
                     logger.warning(
-                        f"Overriding thinking_budget to {thinking_budget} for model {llm_config.model} which requires thinking mode"
+                        f"Thinking budget of {thinking_budget} for Gemini reasoning model {llm_config.model}, this will likely cause tool call failures"
                     )
-            thinking_config = ThinkingConfig(
-                thinking_budget=(thinking_budget),
-                include_thoughts=(thinking_budget > 1),
-            )
+                    # For models that require thinking mode (2.5 Pro, 3.x), override with minimum valid budget
+                    if llm_config.model.startswith("gemini-2.5-pro"):
+                        thinking_budget = 128
+                        logger.warning(
+                            f"Overriding thinking_budget to {thinking_budget} for model {llm_config.model} which requires thinking mode"
+                        )
+                thinking_config = ThinkingConfig(
+                    thinking_budget=(thinking_budget),
+                    include_thoughts=(thinking_budget > 1),
+                )
             request_data["config"]["thinking_config"] = thinking_config.model_dump()
 
         return request_data