From 9adeb122cd1d173b56deba5b71426d5b0ffaa679 Mon Sep 17 00:00:00 2001 From: jnjpng Date: Fri, 21 Nov 2025 17:06:18 -0800 Subject: [PATCH] feat: set thinking level high for gemini 3 (#6330) * base * include thoughts * update * clean --------- Co-authored-by: Letta Bot --- letta/llm_api/google_vertex_client.py | 32 ++++++++++++++++----------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/letta/llm_api/google_vertex_client.py b/letta/llm_api/google_vertex_client.py index 70a6704d..b9c19e83 100644 --- a/letta/llm_api/google_vertex_client.py +++ b/letta/llm_api/google_vertex_client.py @@ -374,22 +374,28 @@ class GoogleVertexClient(LLMClientBase): # If enable_reasoner is False, set thinking_budget to 0 # Otherwise, use the value from max_reasoning_tokens if self.is_reasoning_model(llm_config) or "flash" in llm_config.model: - # Gemini reasoning models may fail to call tools even with FunctionCallingConfigMode.ANY if thinking is fully disabled, set to minimum to prevent tool call failure - thinking_budget = llm_config.max_reasoning_tokens if llm_config.enable_reasoner else self.get_thinking_budget(llm_config.model) - if thinking_budget <= 0: - logger.warning( - f"Thinking budget of {thinking_budget} for Gemini reasoning model {llm_config.model}, this will likely cause tool call failures" + if llm_config.model.startswith("gemini-3"): + # letting thinking_level to default to high by not specifying thinking_budget + thinking_config = ThinkingConfig(include_thoughts=True) + else: + # Gemini reasoning models may fail to call tools even with FunctionCallingConfigMode.ANY if thinking is fully disabled, set to minimum to prevent tool call failure + thinking_budget = ( + llm_config.max_reasoning_tokens if llm_config.enable_reasoner else self.get_thinking_budget(llm_config.model) ) - # For models that require thinking mode (2.5 Pro, 3.x), override with minimum valid budget - if llm_config.model.startswith("gemini-2.5-pro") or llm_config.model.startswith("gemini-3"): - thinking_budget = 128 + if thinking_budget <= 0: logger.warning( - f"Overriding thinking_budget to {thinking_budget} for model {llm_config.model} which requires thinking mode" + f"Thinking budget of {thinking_budget} for Gemini reasoning model {llm_config.model}, this will likely cause tool call failures" ) - thinking_config = ThinkingConfig( - thinking_budget=(thinking_budget), - include_thoughts=(thinking_budget > 1), - ) + # For models that require thinking mode (2.5 Pro, 3.x), override with minimum valid budget + if llm_config.model.startswith("gemini-2.5-pro"): + thinking_budget = 128 + logger.warning( + f"Overriding thinking_budget to {thinking_budget} for model {llm_config.model} which requires thinking mode" + ) + thinking_config = ThinkingConfig( + thinking_budget=(thinking_budget), + include_thoughts=(thinking_budget > 1), + ) request_data["config"]["thinking_config"] = thinking_config.model_dump() return request_data