From 5cf807574f30089f537275eb53e5e07fee66fb4d Mon Sep 17 00:00:00 2001
From: cthomas <caren@letta.com>
Date: Mon, 11 Aug 2025 16:55:45 -0700
Subject: [PATCH] feat: consolidate reasoning model checks (#3862)

---
 .github/scripts/model-sweep/model_sweep.py    |  2 +-
 .../send-message-integration-tests.yaml       |  2 +-
 letta/llm_api/anthropic_client.py             | 11 ++++++++--
 letta/llm_api/google_vertex_client.py         |  3 +++
 letta/llm_api/llm_client_base.py              |  4 ++++
 letta/llm_api/openai_client.py                |  5 ++++-
 letta/schemas/llm_config.py                   | 21 ++++++-------------
 .../llm_model_configs/claude-3-7-sonnet.json  |  8 -------
 ...net.json => claude-4-sonnet-extended.json} |  4 +++-
 tests/integration_test_send_message.py        |  3 +--
 10 files changed, 32 insertions(+), 31 deletions(-)
 delete mode 100644 tests/configs/llm_model_configs/claude-3-7-sonnet.json
 rename tests/configs/llm_model_configs/{claude-4-sonnet.json => claude-4-sonnet-extended.json} (65%)

diff --git a/.github/scripts/model-sweep/model_sweep.py b/.github/scripts/model-sweep/model_sweep.py
index c92b9364..322b427b 100644
--- a/.github/scripts/model-sweep/model_sweep.py
+++ b/.github/scripts/model-sweep/model_sweep.py
@@ -96,7 +96,7 @@ all_configs = [
     "openai-gpt-4o-mini.json",
     # "azure-gpt-4o-mini.json", # TODO: Re-enable on new agent loop
     "claude-3-5-sonnet.json",
-    "claude-3-7-sonnet.json",
+    "claude-4-sonnet-extended.json",
     "claude-3-7-sonnet-extended.json",
     "gemini-1.5-pro.json",
     "gemini-2.5-flash-vertex.json",
diff --git a/.github/workflows/send-message-integration-tests.yaml b/.github/workflows/send-message-integration-tests.yaml
index eee96265..795614a9 100644
--- a/.github/workflows/send-message-integration-tests.yaml
+++ b/.github/workflows/send-message-integration-tests.yaml
@@ -19,7 +19,7 @@ jobs:
           - "openai-gpt-4o-mini.json"
           - "azure-gpt-4o-mini.json"
           - "claude-3-5-sonnet.json"
-          - "claude-3-7-sonnet.json"
+          - "claude-4-sonnet-extended.json"
           - "claude-3-7-sonnet-extended.json"
           - "gemini-pro.json"
           - "gemini-vertex.json"
diff --git a/letta/llm_api/anthropic_client.py b/letta/llm_api/anthropic_client.py
index 7b886dfc..7c6cc744 100644
--- a/letta/llm_api/anthropic_client.py
+++ b/letta/llm_api/anthropic_client.py
@@ -182,7 +182,7 @@ class AnthropicClient(LLMClientBase):
         }
 
         # Extended Thinking
-        if llm_config.enable_reasoner:
+        if self.is_reasoning_model(llm_config) and llm_config.enable_reasoner:
             data["thinking"] = {
                 "type": "enabled",
                 "budget_tokens": llm_config.max_reasoning_tokens,
@@ -200,7 +200,7 @@ class AnthropicClient(LLMClientBase):
             # Special case for summarization path
             tools_for_request = None
             tool_choice = None
-        elif llm_config.enable_reasoner:
+        elif self.is_reasoning_model(llm_config) and llm_config.enable_reasoner:
             # NOTE: reasoning models currently do not allow for `any`
             tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
             tools_for_request = [OpenAITool(function=f) for f in tools]
@@ -296,6 +296,13 @@ class AnthropicClient(LLMClientBase):
             token_count -= 8
         return token_count
 
+    def is_reasoning_model(self, llm_config: LLMConfig) -> bool:
+        return (
+            llm_config.model.startswith("claude-3-7-sonnet")
+            or llm_config.model.startswith("claude-sonnet-4")
+            or llm_config.model.startswith("claude-opus-4")
+        )
+
     @trace_method
     def handle_llm_error(self, e: Exception) -> Exception:
         if isinstance(e, anthropic.APITimeoutError):
diff --git a/letta/llm_api/google_vertex_client.py b/letta/llm_api/google_vertex_client.py
index 32ca5d69..fa6cf17d 100644
--- a/letta/llm_api/google_vertex_client.py
+++ b/letta/llm_api/google_vertex_client.py
@@ -504,6 +504,9 @@ class GoogleVertexClient(LLMClientBase):
                 return 1
         return 0
 
+    def is_reasoning_model(self, llm_config: LLMConfig) -> bool:
+        return llm_config.model.startswith("gemini-2.5-flash") or llm_config.model.startswith("gemini-2.5-pro")
+
     @trace_method
     def handle_llm_error(self, e: Exception) -> Exception:
         # Fallback to base implementation
diff --git a/letta/llm_api/llm_client_base.py b/letta/llm_api/llm_client_base.py
index 81ab852b..038caaa1 100644
--- a/letta/llm_api/llm_client_base.py
+++ b/letta/llm_api/llm_client_base.py
@@ -174,6 +174,10 @@ class LLMClientBase:
         """
         raise NotImplementedError(f"Streaming is not supported for {llm_config.model_endpoint_type}")
 
+    @abstractmethod
+    def is_reasoning_model(self, llm_config: LLMConfig) -> bool:
+        raise NotImplementedError
+
     @abstractmethod
     def handle_llm_error(self, e: Exception) -> Exception:
         """
diff --git a/letta/llm_api/openai_client.py b/letta/llm_api/openai_client.py
index 2b54a6e3..159372d2 100644
--- a/letta/llm_api/openai_client.py
+++ b/letta/llm_api/openai_client.py
@@ -276,6 +276,9 @@ class OpenAIClient(LLMClientBase):
         response: ChatCompletion = await client.chat.completions.create(**request_data)
         return response.model_dump()
 
+    def is_reasoning_model(self, llm_config: LLMConfig) -> bool:
+        return is_openai_reasoning_model(llm_config.model)
+
     @trace_method
     def convert_response_to_chat_completion(
         self,
@@ -298,7 +301,7 @@ class OpenAIClient(LLMClientBase):
             )
 
         # If we used a reasoning model, create a content part for the ommitted reasoning
-        if is_openai_reasoning_model(llm_config.model):
+        if self.is_reasoning_model(llm_config):
             chat_completion_response.choices[0].message.omitted_reasoning_content = True
 
         return chat_completion_response
diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py
index ecb2c663..24b7902d 100644
--- a/letta/schemas/llm_config.py
+++ b/letta/schemas/llm_config.py
@@ -94,6 +94,9 @@ class LLMConfig(BaseModel):
         """
         model = values.get("model")
 
+        if model is None:
+            return values
+
         # Define models where we want put_inner_thoughts_in_kwargs to be False
         avoid_put_inner_thoughts_in_kwargs = ["gpt-4"]
 
@@ -107,25 +110,13 @@ class LLMConfig(BaseModel):
         if is_openai_reasoning_model(model):
             values["put_inner_thoughts_in_kwargs"] = False
 
-        if values.get("enable_reasoner") and values.get("model_endpoint_type") == "anthropic":
+        if values.get("model_endpoint_type") == "anthropic" and (
+            model.startswith("claude-3-7-sonnet") or model.startswith("claude-sonnet-4") or model.startswith("claude-opus-4")
+        ):
             values["put_inner_thoughts_in_kwargs"] = False
 
         return values
 
-    @model_validator(mode="after")
-    def issue_warning_for_reasoning_constraints(self) -> "LLMConfig":
-        if self.enable_reasoner:
-            if self.max_reasoning_tokens is None:
-                logger.warning("max_reasoning_tokens must be set when enable_reasoner is True")
-            if self.max_tokens is not None and self.max_reasoning_tokens >= self.max_tokens:
-                logger.warning("max_tokens must be greater than max_reasoning_tokens (thinking budget)")
-            if self.put_inner_thoughts_in_kwargs:
-                logger.debug("Extended thinking is not compatible with put_inner_thoughts_in_kwargs")
-        elif self.max_reasoning_tokens and not self.enable_reasoner:
-            logger.warning("model will not use reasoning unless enable_reasoner is set to True")
-
-        return self
-
     @classmethod
     def default_config(cls, model_name: str):
         """
diff --git a/tests/configs/llm_model_configs/claude-3-7-sonnet.json b/tests/configs/llm_model_configs/claude-3-7-sonnet.json
deleted file mode 100644
index beecaa75..00000000
--- a/tests/configs/llm_model_configs/claude-3-7-sonnet.json
+++ /dev/null
@@ -1,8 +0,0 @@
-{
-  "model": "claude-3-7-sonnet-20250219",
-  "model_endpoint_type": "anthropic",
-  "model_endpoint": "https://api.anthropic.com/v1",
-  "model_wrapper": null,
-  "context_window": 200000,
-  "put_inner_thoughts_in_kwargs": true
-}
diff --git a/tests/configs/llm_model_configs/claude-4-sonnet.json b/tests/configs/llm_model_configs/claude-4-sonnet-extended.json
similarity index 65%
rename from tests/configs/llm_model_configs/claude-4-sonnet.json
rename to tests/configs/llm_model_configs/claude-4-sonnet-extended.json
index 12a582e0..e622c788 100644
--- a/tests/configs/llm_model_configs/claude-4-sonnet.json
+++ b/tests/configs/llm_model_configs/claude-4-sonnet-extended.json
@@ -4,5 +4,7 @@
   "model_endpoint": "https://api.anthropic.com/v1",
   "model_wrapper": null,
   "context_window": 200000,
-  "put_inner_thoughts_in_kwargs": true
+  "put_inner_thoughts_in_kwargs": false,
+  "enable_reasoner": true,
+  "max_reasoning_tokens": 1024
 }
diff --git a/tests/integration_test_send_message.py b/tests/integration_test_send_message.py
index ed44ed67..e0798841 100644
--- a/tests/integration_test_send_message.py
+++ b/tests/integration_test_send_message.py
@@ -118,9 +118,8 @@ all_configs = [
     "openai-o3.json",
     "openai-o4-mini.json",
     "azure-gpt-4o-mini.json",
-    "claude-4-sonnet.json",
+    "claude-4-sonnet-extended.json",
     "claude-3-5-sonnet.json",
-    "claude-3-7-sonnet.json",
     "claude-3-7-sonnet-extended.json",
     "bedrock-claude-4-sonnet.json",
     "gemini-1.5-pro.json",