From 39ddda81cc1a8b2ef991384899f88cdf5963ce83 Mon Sep 17 00:00:00 2001
From: Devansh Jain <31609257+devanshrj@users.noreply.github.com>
Date: Tue, 17 Feb 2026 10:13:05 -0800
Subject: [PATCH] feat: add Anthropic Sonnet 4.6 (#9408)

---
 letta/llm_api/anthropic_client.py    | 60 ++++++++++++++++++----------
 letta/schemas/llm_config.py          | 12 +++++-
 letta/schemas/providers/anthropic.py |  9 ++++-
 letta/settings.py                    |  2 +-
 4 files changed, 56 insertions(+), 27 deletions(-)

diff --git a/letta/llm_api/anthropic_client.py b/letta/llm_api/anthropic_client.py
index ece74740..36c4868f 100644
--- a/letta/llm_api/anthropic_client.py
+++ b/letta/llm_api/anthropic_client.py
@@ -65,9 +65,9 @@ class AnthropicClient(LLMClientBase):
         client = self._get_anthropic_client(llm_config, async_client=False)
         betas: list[str] = []
 
-        # Opus 4.6 Auto Thinking
+        # Opus 4.6 / Sonnet 4.6 Auto Thinking
         if llm_config.enable_reasoner:
-            if llm_config.model.startswith("claude-opus-4-6"):
+            if llm_config.model.startswith("claude-opus-4-6") or llm_config.model.startswith("claude-sonnet-4-6"):
                 betas.append("adaptive-thinking-2026-01-28")
             # Interleaved thinking for other reasoners (sync path parity)
             else:
@@ -86,13 +86,17 @@ class AnthropicClient(LLMClientBase):
         except Exception:
             pass
 
-        # Effort parameter for Opus 4.5 and Opus 4.6 - to extend to other models, modify the model check
+        # Effort parameter for Opus 4.5, Opus 4.6, and Sonnet 4.6 - to extend to other models, modify the model check
         if (
-            llm_config.model.startswith("claude-opus-4-5") or llm_config.model.startswith("claude-opus-4-6")
+            llm_config.model.startswith("claude-opus-4-5")
+            or llm_config.model.startswith("claude-opus-4-6")
+            or llm_config.model.startswith("claude-sonnet-4-6")
         ) and llm_config.effort is not None:
             betas.append("effort-2025-11-24")
-            # Max effort beta for Opus 4.6
-            if llm_config.model.startswith("claude-opus-4-6") and llm_config.effort == "max":
+            # Max effort beta for Opus 4.6 / Sonnet 4.6
+            if (
+                llm_config.model.startswith("claude-opus-4-6") or llm_config.model.startswith("claude-sonnet-4-6")
+            ) and llm_config.effort == "max":
                 betas.append("max-effort-2026-01-24")
 
         # Context management for Opus 4.5 to preserve thinking blocks (improves cache hits)
@@ -134,9 +138,9 @@ class AnthropicClient(LLMClientBase):
         client = await self._get_anthropic_client_async(llm_config, async_client=True)
         betas: list[str] = []
 
-        # Opus 4.6 Auto Thinking
+        # Opus 4.6 / Sonnet 4.6 Auto Thinking
         if llm_config.enable_reasoner:
-            if llm_config.model.startswith("claude-opus-4-6"):
+            if llm_config.model.startswith("claude-opus-4-6") or llm_config.model.startswith("claude-sonnet-4-6"):
                 betas.append("adaptive-thinking-2026-01-28")
             # Interleaved thinking for other reasoners (sync path parity)
             else:
@@ -155,13 +159,17 @@ class AnthropicClient(LLMClientBase):
         except Exception:
             pass
 
-        # Effort parameter for Opus 4.5 and Opus 4.6 - to extend to other models, modify the model check
+        # Effort parameter for Opus 4.5, Opus 4.6, and Sonnet 4.6 - to extend to other models, modify the model check
         if (
-            llm_config.model.startswith("claude-opus-4-5") or llm_config.model.startswith("claude-opus-4-6")
+            llm_config.model.startswith("claude-opus-4-5")
+            or llm_config.model.startswith("claude-opus-4-6")
+            or llm_config.model.startswith("claude-sonnet-4-6")
         ) and llm_config.effort is not None:
             betas.append("effort-2025-11-24")
-            # Max effort beta for Opus 4.6
-            if llm_config.model.startswith("claude-opus-4-6") and llm_config.effort == "max":
+            # Max effort beta for Opus 4.6 / Sonnet 4.6
+            if (
+                llm_config.model.startswith("claude-opus-4-6") or llm_config.model.startswith("claude-sonnet-4-6")
+            ) and llm_config.effort == "max":
                 betas.append("max-effort-2026-01-24")
 
         # Context management for Opus 4.5 to preserve thinking blocks (improves cache hits)
@@ -311,9 +319,9 @@ class AnthropicClient(LLMClientBase):
         # See: https://docs.anthropic.com/en/docs/build-with-claude/tool-use/fine-grained-streaming
         betas = ["fine-grained-tool-streaming-2025-05-14"]
 
-        # Opus 4.6 Auto Thinking
+        # Opus 4.6 / Sonnet 4.6 Auto Thinking
         if llm_config.enable_reasoner:
-            if llm_config.model.startswith("claude-opus-4-6"):
+            if llm_config.model.startswith("claude-opus-4-6") or llm_config.model.startswith("claude-sonnet-4-6"):
                 betas.append("adaptive-thinking-2026-01-28")
             # Interleaved thinking for other reasoners (sync path parity)
             else:
@@ -332,13 +340,17 @@ class AnthropicClient(LLMClientBase):
         except Exception:
             pass
 
-        # Effort parameter for Opus 4.5 and Opus 4.6 - to extend to other models, modify the model check
+        # Effort parameter for Opus 4.5, Opus 4.6, and Sonnet 4.6 - to extend to other models, modify the model check
         if (
-            llm_config.model.startswith("claude-opus-4-5") or llm_config.model.startswith("claude-opus-4-6")
+            llm_config.model.startswith("claude-opus-4-5")
+            or llm_config.model.startswith("claude-opus-4-6")
+            or llm_config.model.startswith("claude-sonnet-4-6")
         ) and llm_config.effort is not None:
             betas.append("effort-2025-11-24")
-            # Max effort beta for Opus 4.6
-            if llm_config.model.startswith("claude-opus-4-6") and llm_config.effort == "max":
+            # Max effort beta for Opus 4.6 / Sonnet 4.6
+            if (
+                llm_config.model.startswith("claude-opus-4-6") or llm_config.model.startswith("claude-sonnet-4-6")
+            ) and llm_config.effort == "max":
                 betas.append("max-effort-2026-01-24")
 
         # Context management for Opus 4.5 to preserve thinking blocks (improves cache hits)
@@ -528,8 +540,8 @@ class AnthropicClient(LLMClientBase):
         )
 
         if should_enable_thinking:
-            # Opus 4.6 uses Auto Thinking (no budget tokens)
-            if llm_config.model.startswith("claude-opus-4-6"):
+            # Opus 4.6 / Sonnet 4.6 uses Auto Thinking (no budget tokens)
+            if llm_config.model.startswith("claude-opus-4-6") or llm_config.model.startswith("claude-sonnet-4-6"):
                 data["thinking"] = {
                     "type": "adaptive",
                 }
@@ -550,10 +562,12 @@ class AnthropicClient(LLMClientBase):
             # Silently disable prefix_fill for now
             prefix_fill = False
 
-        # Effort configuration for Opus 4.5 and Opus 4.6 (controls token spending)
+        # Effort configuration for Opus 4.5, Opus 4.6, and Sonnet 4.6 (controls token spending)
         # To extend to other models, modify the model check
         if (
-            llm_config.model.startswith("claude-opus-4-5") or llm_config.model.startswith("claude-opus-4-6")
+            llm_config.model.startswith("claude-opus-4-5")
+            or llm_config.model.startswith("claude-opus-4-6")
+            or llm_config.model.startswith("claude-sonnet-4-6")
         ) and llm_config.effort is not None:
             data["output_config"] = {"effort": llm_config.effort}
 
@@ -935,6 +949,8 @@ class AnthropicClient(LLMClientBase):
             or llm_config.model.startswith("claude-opus-4-5")
             # Opus 4.6 support - uses Auto Thinking
             or llm_config.model.startswith("claude-opus-4-6")
+            # Sonnet 4.6 support - same API as Opus 4.6
+            or llm_config.model.startswith("claude-sonnet-4-6")
         )
 
     @trace_method
diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py
index 6955b9a9..78a43a5c 100644
--- a/letta/schemas/llm_config.py
+++ b/letta/schemas/llm_config.py
@@ -562,7 +562,11 @@ class LLMConfig(BaseModel):
                 if config.enable_reasoner and config.max_reasoning_tokens == 0:
                     config.max_reasoning_tokens = 1024
                 # Set default effort level for Claude Opus 4.5 and Opus 4.6
-                if (config.model.startswith("claude-opus-4-5") or config.model.startswith("claude-opus-4-6")) and config.effort is None:
+                if (
+                    config.model.startswith("claude-opus-4-5")
+                    or config.model.startswith("claude-opus-4-6")
+                    or config.model.startswith("claude-sonnet-4-6")
+                ) and config.effort is None:
                     config.effort = "medium"
                 return config
 
@@ -631,7 +635,11 @@ class LLMConfig(BaseModel):
                 if config.max_reasoning_tokens == 0:
                     config.max_reasoning_tokens = 1024
                 # Set default effort level for Claude Opus 4.5 and Opus 4.6
-                if (config.model.startswith("claude-opus-4-5") or config.model.startswith("claude-opus-4-6")) and config.effort is None:
+                if (
+                    config.model.startswith("claude-opus-4-5")
+                    or config.model.startswith("claude-opus-4-6")
+                    or config.model.startswith("claude-sonnet-4-6")
+                ) and config.effort is None:
                     config.effort = "medium"
             elif cls.is_google_vertex_reasoning_model(config) or cls.is_google_ai_reasoning_model(config):
                 # Handle as non-reasoner until we support summary
diff --git a/letta/schemas/providers/anthropic.py b/letta/schemas/providers/anthropic.py
index 803426e0..398c3cfe 100644
--- a/letta/schemas/providers/anthropic.py
+++ b/letta/schemas/providers/anthropic.py
@@ -113,6 +113,11 @@ MODEL_LIST = [
         "name": "claude-opus-4-6",
         "context_window": 200000,
     },
+    ## Sonnet 4.6
+    {
+        "name": "claude-sonnet-4-6",
+        "context_window": 200000,
+    },
 ]
 
 
@@ -139,8 +144,8 @@ class AnthropicProvider(Provider):
 
     def get_default_max_output_tokens(self, model_name: str) -> int:
         """Get the default max output tokens for Anthropic models."""
-        if "claude-opus-4-6" in model_name:
-            return 21000  # Opus 4.6 supports up to 128k with streaming, use 21k as default
+        if "claude-opus-4-6" in model_name or "claude-sonnet-4-6" in model_name:
+            return 21000  # Opus 4.6 / Sonnet 4.6 supports up to 128k with streaming, use 21k as default
         elif "opus" in model_name:
             return 16384
         elif "sonnet" in model_name:
diff --git a/letta/settings.py b/letta/settings.py
index b2302b57..42459e0f 100644
--- a/letta/settings.py
+++ b/letta/settings.py
@@ -170,7 +170,7 @@ class ModelSettings(BaseSettings):
     anthropic_sonnet_1m: bool = Field(
         default=False,
         description=(
-            "Enable 1M-token context window for Claude Sonnet 4/4.5. When true, adds the"
+            "Enable 1M-token context window for Claude Sonnet 4/4.5/4.6. When true, adds the"
             " 'context-1m-2025-08-07' beta to Anthropic requests and sets model context_window"
             " to 1,000,000 instead of 200,000. Note: This feature is in beta and not available"
             " to all orgs; once GA, this flag can be removed and behavior can default to on."