From 2d8e3efd9ed23f244fe441e4b80f2b062ddac413 Mon Sep 17 00:00:00 2001
From: cthomas <caren@letta.com>
Date: Fri, 3 Oct 2025 17:27:20 -0700
Subject: [PATCH] feat: update reasoning toggle test (#5136)

---
 letta/schemas/llm_config.py | 17 ++++++++++++-----
 tests/test_providers.py     |  8 ++++----
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py
index 426f11ed..1c6b6a64 100644
--- a/letta/schemas/llm_config.py
+++ b/letta/schemas/llm_config.py
@@ -286,17 +286,24 @@ class LLMConfig(BaseModel):
                 return config
 
             # Anthropic 3.7/4 and Gemini: toggle honored
-            if (
-                cls.is_anthropic_reasoning_model(config)
-                or cls.is_google_vertex_reasoning_model(config)
-                or cls.is_google_ai_reasoning_model(config)
-            ):
+            is_google_reasoner_with_configurable_thinking = (
+                cls.is_google_vertex_reasoning_model(config) or cls.is_google_ai_reasoning_model(config)
+            ) and not config.model.startswith("gemini-2.5-pro")
+            if cls.is_anthropic_reasoning_model(config) or is_google_reasoner_with_configurable_thinking:
                 config.enable_reasoner = bool(reasoning)
                 config.put_inner_thoughts_in_kwargs = False
                 if config.enable_reasoner and config.max_reasoning_tokens == 0:
                     config.max_reasoning_tokens = 1024
                 return config
 
+            # Google Gemini 2.5 Pro: not possible to disable
+            if config.model.startswith("gemini-2.5-pro"):
+                config.put_inner_thoughts_in_kwargs = False
+                config.enable_reasoner = True
+                if config.max_reasoning_tokens == 0:
+                    config.max_reasoning_tokens = 1024
+                return config
+
             # Everything else: disabled (no inner_thoughts-in-kwargs simulation)
             config.put_inner_thoughts_in_kwargs = False
             config.enable_reasoner = False
diff --git a/tests/test_providers.py b/tests/test_providers.py
index 0816be45..71bd3ae1 100644
--- a/tests/test_providers.py
+++ b/tests/test_providers.py
@@ -341,7 +341,7 @@ async def test_provider_llm_models_consistency():
         ("google_vertex/gemini-2.5-flash", AgentType.memgpt_v2_agent, False, False, False, 0, None),
         ("google_vertex/gemini-2.5-pro", AgentType.memgpt_v2_agent, True, True, True, 1024, None),
         ("google_vertex/gemini-2.5-pro", AgentType.memgpt_v2_agent, True, True, True, 1024, None),
-        # assistant message agent loop
+        # letta_v1_agent loop
         ("openai/gpt-4o-mini", AgentType.letta_v1_agent, True, False, False, 0, None),
         ("openai/gpt-4o-mini", AgentType.letta_v1_agent, False, False, False, 0, None),
         ("openai/o3-mini", AgentType.letta_v1_agent, True, True, False, 0, "medium"),
@@ -354,10 +354,10 @@ async def test_provider_llm_models_consistency():
         ("anthropic/claude-sonnet-4", AgentType.letta_v1_agent, False, False, False, 0, None),
         ("google_vertex/gemini-2.0-flash", AgentType.letta_v1_agent, True, False, False, 0, None),
         ("google_vertex/gemini-2.0-flash", AgentType.letta_v1_agent, False, False, False, 0, None),
-        ("google_vertex/gemini-2.5-flash", AgentType.letta_v1_agent, True, False, False, 0, None),
+        ("google_vertex/gemini-2.5-flash", AgentType.letta_v1_agent, True, True, False, 1024, None),
         ("google_vertex/gemini-2.5-flash", AgentType.letta_v1_agent, False, False, False, 0, None),
-        ("google_vertex/gemini-2.5-pro", AgentType.letta_v1_agent, True, False, False, 0, None),
-        ("google_vertex/gemini-2.5-pro", AgentType.letta_v1_agent, True, False, False, 0, None),
+        ("google_vertex/gemini-2.5-pro", AgentType.letta_v1_agent, True, True, False, 1024, None),
+        ("google_vertex/gemini-2.5-pro", AgentType.letta_v1_agent, False, True, False, 1024, None),
     ],
 )
 def test_reasoning_toggle_by_provider(