From 2d8e3efd9ed23f244fe441e4b80f2b062ddac413 Mon Sep 17 00:00:00 2001 From: cthomas Date: Fri, 3 Oct 2025 17:27:20 -0700 Subject: [PATCH] feat: update reasoning toggle test (#5136) --- letta/schemas/llm_config.py | 17 ++++++++++++----- tests/test_providers.py | 8 ++++---- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py index 426f11ed..1c6b6a64 100644 --- a/letta/schemas/llm_config.py +++ b/letta/schemas/llm_config.py @@ -286,17 +286,24 @@ class LLMConfig(BaseModel): return config # Anthropic 3.7/4 and Gemini: toggle honored - if ( - cls.is_anthropic_reasoning_model(config) - or cls.is_google_vertex_reasoning_model(config) - or cls.is_google_ai_reasoning_model(config) - ): + is_google_reasoner_with_configurable_thinking = ( + cls.is_google_vertex_reasoning_model(config) or cls.is_google_ai_reasoning_model(config) + ) and not config.model.startswith("gemini-2.5-pro") + if cls.is_anthropic_reasoning_model(config) or is_google_reasoner_with_configurable_thinking: config.enable_reasoner = bool(reasoning) config.put_inner_thoughts_in_kwargs = False if config.enable_reasoner and config.max_reasoning_tokens == 0: config.max_reasoning_tokens = 1024 return config + # Google Gemini 2.5 Pro: not possible to disable + if config.model.startswith("gemini-2.5-pro"): + config.put_inner_thoughts_in_kwargs = False + config.enable_reasoner = True + if config.max_reasoning_tokens == 0: + config.max_reasoning_tokens = 1024 + return config + # Everything else: disabled (no inner_thoughts-in-kwargs simulation) config.put_inner_thoughts_in_kwargs = False config.enable_reasoner = False diff --git a/tests/test_providers.py b/tests/test_providers.py index 0816be45..71bd3ae1 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -341,7 +341,7 @@ async def test_provider_llm_models_consistency(): ("google_vertex/gemini-2.5-flash", AgentType.memgpt_v2_agent, False, False, False, 0, None), ("google_vertex/gemini-2.5-pro", AgentType.memgpt_v2_agent, True, True, True, 1024, None), ("google_vertex/gemini-2.5-pro", AgentType.memgpt_v2_agent, True, True, True, 1024, None), - # assistant message agent loop + # letta_v1_agent loop ("openai/gpt-4o-mini", AgentType.letta_v1_agent, True, False, False, 0, None), ("openai/gpt-4o-mini", AgentType.letta_v1_agent, False, False, False, 0, None), ("openai/o3-mini", AgentType.letta_v1_agent, True, True, False, 0, "medium"), @@ -354,10 +354,10 @@ async def test_provider_llm_models_consistency(): ("anthropic/claude-sonnet-4", AgentType.letta_v1_agent, False, False, False, 0, None), ("google_vertex/gemini-2.0-flash", AgentType.letta_v1_agent, True, False, False, 0, None), ("google_vertex/gemini-2.0-flash", AgentType.letta_v1_agent, False, False, False, 0, None), - ("google_vertex/gemini-2.5-flash", AgentType.letta_v1_agent, True, False, False, 0, None), + ("google_vertex/gemini-2.5-flash", AgentType.letta_v1_agent, True, True, False, 1024, None), ("google_vertex/gemini-2.5-flash", AgentType.letta_v1_agent, False, False, False, 0, None), - ("google_vertex/gemini-2.5-pro", AgentType.letta_v1_agent, True, False, False, 0, None), - ("google_vertex/gemini-2.5-pro", AgentType.letta_v1_agent, True, False, False, 0, None), + ("google_vertex/gemini-2.5-pro", AgentType.letta_v1_agent, True, True, False, 1024, None), + ("google_vertex/gemini-2.5-pro", AgentType.letta_v1_agent, False, True, False, 1024, None), ], ) def test_reasoning_toggle_by_provider(