From 5cf807574f30089f537275eb53e5e07fee66fb4d Mon Sep 17 00:00:00 2001 From: cthomas Date: Mon, 11 Aug 2025 16:55:45 -0700 Subject: [PATCH] feat: consolidate reasoning model checks (#3862) --- .github/scripts/model-sweep/model_sweep.py | 2 +- .../send-message-integration-tests.yaml | 2 +- letta/llm_api/anthropic_client.py | 11 ++++++++-- letta/llm_api/google_vertex_client.py | 3 +++ letta/llm_api/llm_client_base.py | 4 ++++ letta/llm_api/openai_client.py | 5 ++++- letta/schemas/llm_config.py | 21 ++++++------------- .../llm_model_configs/claude-3-7-sonnet.json | 8 ------- ...net.json => claude-4-sonnet-extended.json} | 4 +++- tests/integration_test_send_message.py | 3 +-- 10 files changed, 32 insertions(+), 31 deletions(-) delete mode 100644 tests/configs/llm_model_configs/claude-3-7-sonnet.json rename tests/configs/llm_model_configs/{claude-4-sonnet.json => claude-4-sonnet-extended.json} (65%) diff --git a/.github/scripts/model-sweep/model_sweep.py b/.github/scripts/model-sweep/model_sweep.py index c92b9364..322b427b 100644 --- a/.github/scripts/model-sweep/model_sweep.py +++ b/.github/scripts/model-sweep/model_sweep.py @@ -96,7 +96,7 @@ all_configs = [ "openai-gpt-4o-mini.json", # "azure-gpt-4o-mini.json", # TODO: Re-enable on new agent loop "claude-3-5-sonnet.json", - "claude-3-7-sonnet.json", + "claude-4-sonnet-extended.json", "claude-3-7-sonnet-extended.json", "gemini-1.5-pro.json", "gemini-2.5-flash-vertex.json", diff --git a/.github/workflows/send-message-integration-tests.yaml b/.github/workflows/send-message-integration-tests.yaml index eee96265..795614a9 100644 --- a/.github/workflows/send-message-integration-tests.yaml +++ b/.github/workflows/send-message-integration-tests.yaml @@ -19,7 +19,7 @@ jobs: - "openai-gpt-4o-mini.json" - "azure-gpt-4o-mini.json" - "claude-3-5-sonnet.json" - - "claude-3-7-sonnet.json" + - "claude-4-sonnet-extended.json" - "claude-3-7-sonnet-extended.json" - "gemini-pro.json" - "gemini-vertex.json" diff --git a/letta/llm_api/anthropic_client.py b/letta/llm_api/anthropic_client.py index 7b886dfc..7c6cc744 100644 --- a/letta/llm_api/anthropic_client.py +++ b/letta/llm_api/anthropic_client.py @@ -182,7 +182,7 @@ class AnthropicClient(LLMClientBase): } # Extended Thinking - if llm_config.enable_reasoner: + if self.is_reasoning_model(llm_config) and llm_config.enable_reasoner: data["thinking"] = { "type": "enabled", "budget_tokens": llm_config.max_reasoning_tokens, @@ -200,7 +200,7 @@ class AnthropicClient(LLMClientBase): # Special case for summarization path tools_for_request = None tool_choice = None - elif llm_config.enable_reasoner: + elif self.is_reasoning_model(llm_config) and llm_config.enable_reasoner: # NOTE: reasoning models currently do not allow for `any` tool_choice = {"type": "auto", "disable_parallel_tool_use": True} tools_for_request = [OpenAITool(function=f) for f in tools] @@ -296,6 +296,13 @@ class AnthropicClient(LLMClientBase): token_count -= 8 return token_count + def is_reasoning_model(self, llm_config: LLMConfig) -> bool: + return ( + llm_config.model.startswith("claude-3-7-sonnet") + or llm_config.model.startswith("claude-sonnet-4") + or llm_config.model.startswith("claude-opus-4") + ) + @trace_method def handle_llm_error(self, e: Exception) -> Exception: if isinstance(e, anthropic.APITimeoutError): diff --git a/letta/llm_api/google_vertex_client.py b/letta/llm_api/google_vertex_client.py index 32ca5d69..fa6cf17d 100644 --- a/letta/llm_api/google_vertex_client.py +++ b/letta/llm_api/google_vertex_client.py @@ -504,6 +504,9 @@ class GoogleVertexClient(LLMClientBase): return 1 return 0 + def is_reasoning_model(self, llm_config: LLMConfig) -> bool: + return llm_config.model.startswith("gemini-2.5-flash") or llm_config.model.startswith("gemini-2.5-pro") + @trace_method def handle_llm_error(self, e: Exception) -> Exception: # Fallback to base implementation diff --git a/letta/llm_api/llm_client_base.py b/letta/llm_api/llm_client_base.py index 81ab852b..038caaa1 100644 --- a/letta/llm_api/llm_client_base.py +++ b/letta/llm_api/llm_client_base.py @@ -174,6 +174,10 @@ class LLMClientBase: """ raise NotImplementedError(f"Streaming is not supported for {llm_config.model_endpoint_type}") + @abstractmethod + def is_reasoning_model(self, llm_config: LLMConfig) -> bool: + raise NotImplementedError + @abstractmethod def handle_llm_error(self, e: Exception) -> Exception: """ diff --git a/letta/llm_api/openai_client.py b/letta/llm_api/openai_client.py index 2b54a6e3..159372d2 100644 --- a/letta/llm_api/openai_client.py +++ b/letta/llm_api/openai_client.py @@ -276,6 +276,9 @@ class OpenAIClient(LLMClientBase): response: ChatCompletion = await client.chat.completions.create(**request_data) return response.model_dump() + def is_reasoning_model(self, llm_config: LLMConfig) -> bool: + return is_openai_reasoning_model(llm_config.model) + @trace_method def convert_response_to_chat_completion( self, @@ -298,7 +301,7 @@ class OpenAIClient(LLMClientBase): ) # If we used a reasoning model, create a content part for the ommitted reasoning - if is_openai_reasoning_model(llm_config.model): + if self.is_reasoning_model(llm_config): chat_completion_response.choices[0].message.omitted_reasoning_content = True return chat_completion_response diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py index ecb2c663..24b7902d 100644 --- a/letta/schemas/llm_config.py +++ b/letta/schemas/llm_config.py @@ -94,6 +94,9 @@ class LLMConfig(BaseModel): """ model = values.get("model") + if model is None: + return values + # Define models where we want put_inner_thoughts_in_kwargs to be False avoid_put_inner_thoughts_in_kwargs = ["gpt-4"] @@ -107,25 +110,13 @@ class LLMConfig(BaseModel): if is_openai_reasoning_model(model): values["put_inner_thoughts_in_kwargs"] = False - if values.get("enable_reasoner") and values.get("model_endpoint_type") == "anthropic": + if values.get("model_endpoint_type") == "anthropic" and ( + model.startswith("claude-3-7-sonnet") or model.startswith("claude-sonnet-4") or model.startswith("claude-opus-4") + ): values["put_inner_thoughts_in_kwargs"] = False return values - @model_validator(mode="after") - def issue_warning_for_reasoning_constraints(self) -> "LLMConfig": - if self.enable_reasoner: - if self.max_reasoning_tokens is None: - logger.warning("max_reasoning_tokens must be set when enable_reasoner is True") - if self.max_tokens is not None and self.max_reasoning_tokens >= self.max_tokens: - logger.warning("max_tokens must be greater than max_reasoning_tokens (thinking budget)") - if self.put_inner_thoughts_in_kwargs: - logger.debug("Extended thinking is not compatible with put_inner_thoughts_in_kwargs") - elif self.max_reasoning_tokens and not self.enable_reasoner: - logger.warning("model will not use reasoning unless enable_reasoner is set to True") - - return self - @classmethod def default_config(cls, model_name: str): """ diff --git a/tests/configs/llm_model_configs/claude-3-7-sonnet.json b/tests/configs/llm_model_configs/claude-3-7-sonnet.json deleted file mode 100644 index beecaa75..00000000 --- a/tests/configs/llm_model_configs/claude-3-7-sonnet.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "model": "claude-3-7-sonnet-20250219", - "model_endpoint_type": "anthropic", - "model_endpoint": "https://api.anthropic.com/v1", - "model_wrapper": null, - "context_window": 200000, - "put_inner_thoughts_in_kwargs": true -} diff --git a/tests/configs/llm_model_configs/claude-4-sonnet.json b/tests/configs/llm_model_configs/claude-4-sonnet-extended.json similarity index 65% rename from tests/configs/llm_model_configs/claude-4-sonnet.json rename to tests/configs/llm_model_configs/claude-4-sonnet-extended.json index 12a582e0..e622c788 100644 --- a/tests/configs/llm_model_configs/claude-4-sonnet.json +++ b/tests/configs/llm_model_configs/claude-4-sonnet-extended.json @@ -4,5 +4,7 @@ "model_endpoint": "https://api.anthropic.com/v1", "model_wrapper": null, "context_window": 200000, - "put_inner_thoughts_in_kwargs": true + "put_inner_thoughts_in_kwargs": false, + "enable_reasoner": true, + "max_reasoning_tokens": 1024 } diff --git a/tests/integration_test_send_message.py b/tests/integration_test_send_message.py index ed44ed67..e0798841 100644 --- a/tests/integration_test_send_message.py +++ b/tests/integration_test_send_message.py @@ -118,9 +118,8 @@ all_configs = [ "openai-o3.json", "openai-o4-mini.json", "azure-gpt-4o-mini.json", - "claude-4-sonnet.json", + "claude-4-sonnet-extended.json", "claude-3-5-sonnet.json", - "claude-3-7-sonnet.json", "claude-3-7-sonnet-extended.json", "bedrock-claude-4-sonnet.json", "gemini-1.5-pro.json",