From 39ddda81cc1a8b2ef991384899f88cdf5963ce83 Mon Sep 17 00:00:00 2001 From: Devansh Jain <31609257+devanshrj@users.noreply.github.com> Date: Tue, 17 Feb 2026 10:13:05 -0800 Subject: [PATCH] feat: add Anthropic Sonnet 4.6 (#9408) --- letta/llm_api/anthropic_client.py | 60 ++++++++++++++++++---------- letta/schemas/llm_config.py | 12 +++++- letta/schemas/providers/anthropic.py | 9 ++++- letta/settings.py | 2 +- 4 files changed, 56 insertions(+), 27 deletions(-) diff --git a/letta/llm_api/anthropic_client.py b/letta/llm_api/anthropic_client.py index ece74740..36c4868f 100644 --- a/letta/llm_api/anthropic_client.py +++ b/letta/llm_api/anthropic_client.py @@ -65,9 +65,9 @@ class AnthropicClient(LLMClientBase): client = self._get_anthropic_client(llm_config, async_client=False) betas: list[str] = [] - # Opus 4.6 Auto Thinking + # Opus 4.6 / Sonnet 4.6 Auto Thinking if llm_config.enable_reasoner: - if llm_config.model.startswith("claude-opus-4-6"): + if llm_config.model.startswith("claude-opus-4-6") or llm_config.model.startswith("claude-sonnet-4-6"): betas.append("adaptive-thinking-2026-01-28") # Interleaved thinking for other reasoners (sync path parity) else: @@ -86,13 +86,17 @@ class AnthropicClient(LLMClientBase): except Exception: pass - # Effort parameter for Opus 4.5 and Opus 4.6 - to extend to other models, modify the model check + # Effort parameter for Opus 4.5, Opus 4.6, and Sonnet 4.6 - to extend to other models, modify the model check if ( - llm_config.model.startswith("claude-opus-4-5") or llm_config.model.startswith("claude-opus-4-6") + llm_config.model.startswith("claude-opus-4-5") + or llm_config.model.startswith("claude-opus-4-6") + or llm_config.model.startswith("claude-sonnet-4-6") ) and llm_config.effort is not None: betas.append("effort-2025-11-24") - # Max effort beta for Opus 4.6 - if llm_config.model.startswith("claude-opus-4-6") and llm_config.effort == "max": + # Max effort beta for Opus 4.6 / Sonnet 4.6 + if ( + llm_config.model.startswith("claude-opus-4-6") or llm_config.model.startswith("claude-sonnet-4-6") + ) and llm_config.effort == "max": betas.append("max-effort-2026-01-24") # Context management for Opus 4.5 to preserve thinking blocks (improves cache hits) @@ -134,9 +138,9 @@ class AnthropicClient(LLMClientBase): client = await self._get_anthropic_client_async(llm_config, async_client=True) betas: list[str] = [] - # Opus 4.6 Auto Thinking + # Opus 4.6 / Sonnet 4.6 Auto Thinking if llm_config.enable_reasoner: - if llm_config.model.startswith("claude-opus-4-6"): + if llm_config.model.startswith("claude-opus-4-6") or llm_config.model.startswith("claude-sonnet-4-6"): betas.append("adaptive-thinking-2026-01-28") # Interleaved thinking for other reasoners (sync path parity) else: @@ -155,13 +159,17 @@ class AnthropicClient(LLMClientBase): except Exception: pass - # Effort parameter for Opus 4.5 and Opus 4.6 - to extend to other models, modify the model check + # Effort parameter for Opus 4.5, Opus 4.6, and Sonnet 4.6 - to extend to other models, modify the model check if ( - llm_config.model.startswith("claude-opus-4-5") or llm_config.model.startswith("claude-opus-4-6") + llm_config.model.startswith("claude-opus-4-5") + or llm_config.model.startswith("claude-opus-4-6") + or llm_config.model.startswith("claude-sonnet-4-6") ) and llm_config.effort is not None: betas.append("effort-2025-11-24") - # Max effort beta for Opus 4.6 - if llm_config.model.startswith("claude-opus-4-6") and llm_config.effort == "max": + # Max effort beta for Opus 4.6 / Sonnet 4.6 + if ( + llm_config.model.startswith("claude-opus-4-6") or llm_config.model.startswith("claude-sonnet-4-6") + ) and llm_config.effort == "max": betas.append("max-effort-2026-01-24") # Context management for Opus 4.5 to preserve thinking blocks (improves cache hits) @@ -311,9 +319,9 @@ class AnthropicClient(LLMClientBase): # See: https://docs.anthropic.com/en/docs/build-with-claude/tool-use/fine-grained-streaming betas = ["fine-grained-tool-streaming-2025-05-14"] - # Opus 4.6 Auto Thinking + # Opus 4.6 / Sonnet 4.6 Auto Thinking if llm_config.enable_reasoner: - if llm_config.model.startswith("claude-opus-4-6"): + if llm_config.model.startswith("claude-opus-4-6") or llm_config.model.startswith("claude-sonnet-4-6"): betas.append("adaptive-thinking-2026-01-28") # Interleaved thinking for other reasoners (sync path parity) else: @@ -332,13 +340,17 @@ class AnthropicClient(LLMClientBase): except Exception: pass - # Effort parameter for Opus 4.5 and Opus 4.6 - to extend to other models, modify the model check + # Effort parameter for Opus 4.5, Opus 4.6, and Sonnet 4.6 - to extend to other models, modify the model check if ( - llm_config.model.startswith("claude-opus-4-5") or llm_config.model.startswith("claude-opus-4-6") + llm_config.model.startswith("claude-opus-4-5") + or llm_config.model.startswith("claude-opus-4-6") + or llm_config.model.startswith("claude-sonnet-4-6") ) and llm_config.effort is not None: betas.append("effort-2025-11-24") - # Max effort beta for Opus 4.6 - if llm_config.model.startswith("claude-opus-4-6") and llm_config.effort == "max": + # Max effort beta for Opus 4.6 / Sonnet 4.6 + if ( + llm_config.model.startswith("claude-opus-4-6") or llm_config.model.startswith("claude-sonnet-4-6") + ) and llm_config.effort == "max": betas.append("max-effort-2026-01-24") # Context management for Opus 4.5 to preserve thinking blocks (improves cache hits) @@ -528,8 +540,8 @@ class AnthropicClient(LLMClientBase): ) if should_enable_thinking: - # Opus 4.6 uses Auto Thinking (no budget tokens) - if llm_config.model.startswith("claude-opus-4-6"): + # Opus 4.6 / Sonnet 4.6 uses Auto Thinking (no budget tokens) + if llm_config.model.startswith("claude-opus-4-6") or llm_config.model.startswith("claude-sonnet-4-6"): data["thinking"] = { "type": "adaptive", } @@ -550,10 +562,12 @@ class AnthropicClient(LLMClientBase): # Silently disable prefix_fill for now prefix_fill = False - # Effort configuration for Opus 4.5 and Opus 4.6 (controls token spending) + # Effort configuration for Opus 4.5, Opus 4.6, and Sonnet 4.6 (controls token spending) # To extend to other models, modify the model check if ( - llm_config.model.startswith("claude-opus-4-5") or llm_config.model.startswith("claude-opus-4-6") + llm_config.model.startswith("claude-opus-4-5") + or llm_config.model.startswith("claude-opus-4-6") + or llm_config.model.startswith("claude-sonnet-4-6") ) and llm_config.effort is not None: data["output_config"] = {"effort": llm_config.effort} @@ -935,6 +949,8 @@ class AnthropicClient(LLMClientBase): or llm_config.model.startswith("claude-opus-4-5") # Opus 4.6 support - uses Auto Thinking or llm_config.model.startswith("claude-opus-4-6") + # Sonnet 4.6 support - same API as Opus 4.6 + or llm_config.model.startswith("claude-sonnet-4-6") ) @trace_method diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py index 6955b9a9..78a43a5c 100644 --- a/letta/schemas/llm_config.py +++ b/letta/schemas/llm_config.py @@ -562,7 +562,11 @@ class LLMConfig(BaseModel): if config.enable_reasoner and config.max_reasoning_tokens == 0: config.max_reasoning_tokens = 1024 # Set default effort level for Claude Opus 4.5 and Opus 4.6 - if (config.model.startswith("claude-opus-4-5") or config.model.startswith("claude-opus-4-6")) and config.effort is None: + if ( + config.model.startswith("claude-opus-4-5") + or config.model.startswith("claude-opus-4-6") + or config.model.startswith("claude-sonnet-4-6") + ) and config.effort is None: config.effort = "medium" return config @@ -631,7 +635,11 @@ class LLMConfig(BaseModel): if config.max_reasoning_tokens == 0: config.max_reasoning_tokens = 1024 # Set default effort level for Claude Opus 4.5 and Opus 4.6 - if (config.model.startswith("claude-opus-4-5") or config.model.startswith("claude-opus-4-6")) and config.effort is None: + if ( + config.model.startswith("claude-opus-4-5") + or config.model.startswith("claude-opus-4-6") + or config.model.startswith("claude-sonnet-4-6") + ) and config.effort is None: config.effort = "medium" elif cls.is_google_vertex_reasoning_model(config) or cls.is_google_ai_reasoning_model(config): # Handle as non-reasoner until we support summary diff --git a/letta/schemas/providers/anthropic.py b/letta/schemas/providers/anthropic.py index 803426e0..398c3cfe 100644 --- a/letta/schemas/providers/anthropic.py +++ b/letta/schemas/providers/anthropic.py @@ -113,6 +113,11 @@ MODEL_LIST = [ "name": "claude-opus-4-6", "context_window": 200000, }, + ## Sonnet 4.6 + { + "name": "claude-sonnet-4-6", + "context_window": 200000, + }, ] @@ -139,8 +144,8 @@ class AnthropicProvider(Provider): def get_default_max_output_tokens(self, model_name: str) -> int: """Get the default max output tokens for Anthropic models.""" - if "claude-opus-4-6" in model_name: - return 21000 # Opus 4.6 supports up to 128k with streaming, use 21k as default + if "claude-opus-4-6" in model_name or "claude-sonnet-4-6" in model_name: + return 21000 # Opus 4.6 / Sonnet 4.6 supports up to 128k with streaming, use 21k as default elif "opus" in model_name: return 16384 elif "sonnet" in model_name: diff --git a/letta/settings.py b/letta/settings.py index b2302b57..42459e0f 100644 --- a/letta/settings.py +++ b/letta/settings.py @@ -170,7 +170,7 @@ class ModelSettings(BaseSettings): anthropic_sonnet_1m: bool = Field( default=False, description=( - "Enable 1M-token context window for Claude Sonnet 4/4.5. When true, adds the" + "Enable 1M-token context window for Claude Sonnet 4/4.5/4.6. When true, adds the" " 'context-1m-2025-08-07' beta to Anthropic requests and sets model context_window" " to 1,000,000 instead of 200,000. Note: This feature is in beta and not available" " to all orgs; once GA, this flag can be removed and behavior can default to on."