diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py index 7933d7c6..72d4c00e 100644 --- a/letta/schemas/llm_config.py +++ b/letta/schemas/llm_config.py @@ -210,9 +210,11 @@ class LLMConfig(BaseModel): # Handle as non-reasoner until we support summary config.put_inner_thoughts_in_kwargs = True config.enable_reasoner = True - - config.put_inner_thoughts_in_kwargs = False - config.enable_reasoner = False + if config.max_reasoning_tokens == 0: + config.max_reasoning_tokens = 1024 + else: + config.put_inner_thoughts_in_kwargs = False + config.enable_reasoner = False else: config.enable_reasoner = True diff --git a/tests/test_providers.py b/tests/test_providers.py index cb7a54bd..a1d55f27 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -320,33 +320,33 @@ async def test_provider_llm_models_consistency(): @pytest.mark.parametrize( - "handle, expected_enable_reasoner, expected_put_inner_thoughts_in_kwargs, expected_max_reasoning_tokens, expected_reasoning_effort, expected_exception", + "handle, reasoning, expected_enable_reasoner, expected_put_inner_thoughts_in_kwargs, expected_max_reasoning_tokens, expected_reasoning_effort", [ - ("openai/gpt-4o-mini", True, True, 0, None, None), - ("openai/gpt-4o-mini", False, False, 0, None, None), - ("openai/o3-mini", True, False, 0, "medium", None), - ("openai/o3-mini", False, False, 0, None, ValueError), - ("anthropic/claude-3.5-sonnet", True, True, 0, None, None), - ("anthropic/claude-3.5-sonnet", False, False, 0, None, None), - ("anthropic/claude-3-7-sonnet", True, False, 1024, None, None), - ("anthropic/claude-3-7-sonnet", False, False, 0, None, None), - ("anthropic/claude-sonnet-4", True, False, 1024, None, None), - ("anthropic/claude-sonnet-4", False, False, 0, None, None), - ("google_vertex/gemini-2.0-flash", True, True, 0, None, None), - ("google_vertex/gemini-2.0-flash", False, False, 0, None, None), - ("google_vertex/gemini-2.5-flash", True, True, 1024, None, None), - ("google_vertex/gemini-2.5-flash", False, False, 0, None, None), - ("google_vertex/gemini-2.5-pro", True, True, 1024, None, None), - ("google_vertex/gemini-2.5-pro", False, False, 0, None, ValueError), + ("openai/gpt-4o-mini", True, True, True, 0, None), + ("openai/gpt-4o-mini", False, False, False, 0, None), + ("openai/o3-mini", True, True, False, 0, "medium"), + ("openai/o3-mini", False, True, False, 0, "medium"), + ("anthropic/claude-3.5-sonnet", True, True, True, 0, None), + ("anthropic/claude-3.5-sonnet", False, False, False, 0, None), + ("anthropic/claude-3-7-sonnet", True, True, False, 1024, None), + ("anthropic/claude-3-7-sonnet", False, False, False, 0, None), + ("anthropic/claude-sonnet-4", True, True, False, 1024, None), + ("anthropic/claude-sonnet-4", False, False, False, 0, None), + ("google_vertex/gemini-2.0-flash", True, True, True, 0, None), + ("google_vertex/gemini-2.0-flash", False, False, False, 0, None), + ("google_vertex/gemini-2.5-flash", True, True, True, 1024, None), + ("google_vertex/gemini-2.5-flash", False, False, False, 0, None), + ("google_vertex/gemini-2.5-pro", True, True, True, 1024, None), + ("google_vertex/gemini-2.5-pro", True, True, True, 1024, None), ], ) def test_reasoning_toggle_by_provider( handle: str, + reasoning: bool, expected_enable_reasoner: bool, expected_put_inner_thoughts_in_kwargs: bool, expected_max_reasoning_tokens: int, expected_reasoning_effort: Optional[Literal["minimal", "low", "medium", "high"]], - expected_exception: Optional[Exception], ): model_endpoint_type, model = handle.split("/") config = LLMConfig( @@ -355,13 +355,9 @@ def test_reasoning_toggle_by_provider( handle=handle, context_window=1024, ) - if expected_exception: - with pytest.raises(expected_exception): - LLMConfig.apply_reasoning_setting_to_config(config, reasoning=expected_enable_reasoner) - else: - new_config = LLMConfig.apply_reasoning_setting_to_config(config, reasoning=expected_enable_reasoner) + new_config = LLMConfig.apply_reasoning_setting_to_config(config, reasoning=reasoning) - assert new_config.enable_reasoner == expected_enable_reasoner - assert new_config.put_inner_thoughts_in_kwargs == expected_put_inner_thoughts_in_kwargs - assert new_config.reasoning_effort == expected_reasoning_effort - assert new_config.max_reasoning_tokens == expected_max_reasoning_tokens + assert new_config.enable_reasoner == expected_enable_reasoner + assert new_config.put_inner_thoughts_in_kwargs == expected_put_inner_thoughts_in_kwargs + assert new_config.reasoning_effort == expected_reasoning_effort + assert new_config.max_reasoning_tokens == expected_max_reasoning_tokens