diff --git a/fern/openapi.json b/fern/openapi.json index 3284662a..4a487294 100644 --- a/fern/openapi.json +++ b/fern/openapi.json @@ -36592,14 +36592,14 @@ "anyOf": [ { "type": "string", - "enum": ["low", "medium", "high"] + "enum": ["low", "medium", "high", "max"] }, { "type": "null" } ], "title": "Effort", - "description": "The effort level for Anthropic Opus 4.5 model (controls token spending). Not setting this gives similar performance to 'high'." + "description": "The effort level for Anthropic models that support it (Opus 4.5, Opus 4.6). Controls token spending and thinking behavior. Not setting this gives similar performance to 'high'." }, "frequency_penalty": { "anyOf": [ @@ -39118,14 +39118,14 @@ "anyOf": [ { "type": "string", - "enum": ["low", "medium", "high"] + "enum": ["low", "medium", "high", "max"] }, { "type": "null" } ], "title": "Effort", - "description": "The effort level for Anthropic Opus 4.5 model (controls token spending). Not setting this gives similar performance to 'high'." + "description": "The effort level for Anthropic models that support it (Opus 4.5, Opus 4.6). Controls token spending and thinking behavior. Not setting this gives similar performance to 'high'." }, "frequency_penalty": { "anyOf": [ diff --git a/letta/llm_api/anthropic_client.py b/letta/llm_api/anthropic_client.py index 8acd65dd..54a743ea 100644 --- a/letta/llm_api/anthropic_client.py +++ b/letta/llm_api/anthropic_client.py @@ -62,9 +62,15 @@ class AnthropicClient(LLMClientBase): def request(self, request_data: dict, llm_config: LLMConfig) -> dict: client = self._get_anthropic_client(llm_config, async_client=False) betas: list[str] = [] - # Interleaved thinking for reasoner (sync path parity) + + # Opus 4.6 Auto Thinking if llm_config.enable_reasoner: - betas.append("interleaved-thinking-2025-05-14") + if llm_config.model.startswith("claude-opus-4-6"): + betas.append("adaptive-thinking-2026-01-28") + # Interleaved thinking for other reasoners (sync path parity) + else: + betas.append("interleaved-thinking-2025-05-14") + # 1M context beta for Sonnet 4/4.5 when enabled try: from letta.settings import model_settings @@ -76,9 +82,14 @@ class AnthropicClient(LLMClientBase): except Exception: pass - # Opus 4.5 effort parameter - to extend to other models, modify the model check - if llm_config.model.startswith("claude-opus-4-5") and llm_config.effort is not None: + # Effort parameter for Opus 4.5 and Opus 4.6 - to extend to other models, modify the model check + if ( + llm_config.model.startswith("claude-opus-4-5") or llm_config.model.startswith("claude-opus-4-6") + ) and llm_config.effort is not None: betas.append("effort-2025-11-24") + # Max effort beta for Opus 4.6 + if llm_config.model.startswith("claude-opus-4-6") and llm_config.effort == "max": + betas.append("max-effort-2026-01-24") # Context management for Opus 4.5 to preserve thinking blocks (improves cache hits) if llm_config.model.startswith("claude-opus-4-5") and llm_config.enable_reasoner: @@ -98,9 +109,14 @@ class AnthropicClient(LLMClientBase): async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict: client = await self._get_anthropic_client_async(llm_config, async_client=True) betas: list[str] = [] - # interleaved thinking for reasoner + + # Opus 4.6 Auto Thinking if llm_config.enable_reasoner: - betas.append("interleaved-thinking-2025-05-14") + if llm_config.model.startswith("claude-opus-4-6"): + betas.append("adaptive-thinking-2026-01-28") + # Interleaved thinking for other reasoners (sync path parity) + else: + betas.append("interleaved-thinking-2025-05-14") # 1M context beta for Sonnet 4/4.5 when enabled try: @@ -113,9 +129,14 @@ class AnthropicClient(LLMClientBase): except Exception: pass - # Opus 4.5 effort parameter - to extend to other models, modify the model check - if llm_config.model.startswith("claude-opus-4-5") and llm_config.effort is not None: + # Effort parameter for Opus 4.5 and Opus 4.6 - to extend to other models, modify the model check + if ( + llm_config.model.startswith("claude-opus-4-5") or llm_config.model.startswith("claude-opus-4-6") + ) and llm_config.effort is not None: betas.append("effort-2025-11-24") + # Max effort beta for Opus 4.6 + if llm_config.model.startswith("claude-opus-4-6") and llm_config.effort == "max": + betas.append("max-effort-2026-01-24") # Context management for Opus 4.5 to preserve thinking blocks (improves cache hits) if llm_config.model.startswith("claude-opus-4-5") and llm_config.enable_reasoner: @@ -262,10 +283,13 @@ class AnthropicClient(LLMClientBase): # See: https://docs.anthropic.com/en/docs/build-with-claude/tool-use/fine-grained-streaming betas = ["fine-grained-tool-streaming-2025-05-14"] - # If extended thinking, turn on interleaved header - # https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#interleaved-thinking + # Opus 4.6 Auto Thinking if llm_config.enable_reasoner: - betas.append("interleaved-thinking-2025-05-14") + if llm_config.model.startswith("claude-opus-4-6"): + betas.append("adaptive-thinking-2026-01-28") + # Interleaved thinking for other reasoners (sync path parity) + else: + betas.append("interleaved-thinking-2025-05-14") # 1M context beta for Sonnet 4/4.5 when enabled try: @@ -278,9 +302,14 @@ class AnthropicClient(LLMClientBase): except Exception: pass - # Opus 4.5 effort parameter - to extend to other models, modify the model check - if llm_config.model.startswith("claude-opus-4-5") and llm_config.effort is not None: + # Effort parameter for Opus 4.5 and Opus 4.6 - to extend to other models, modify the model check + if ( + llm_config.model.startswith("claude-opus-4-5") or llm_config.model.startswith("claude-opus-4-6") + ) and llm_config.effort is not None: betas.append("effort-2025-11-24") + # Max effort beta for Opus 4.6 + if llm_config.model.startswith("claude-opus-4-6") and llm_config.effort == "max": + betas.append("max-effort-2026-01-24") # Context management for Opus 4.5 to preserve thinking blocks (improves cache hits) if llm_config.model.startswith("claude-opus-4-5") and llm_config.enable_reasoner: @@ -462,24 +491,33 @@ class AnthropicClient(LLMClientBase): # Extended Thinking if self.is_reasoning_model(llm_config) and llm_config.enable_reasoner: - thinking_budget = max(llm_config.max_reasoning_tokens, 1024) - if thinking_budget != llm_config.max_reasoning_tokens: - logger.warning( - f"Max reasoning tokens must be at least 1024 for Claude. Setting max_reasoning_tokens to 1024 for model {llm_config.model}." - ) - data["thinking"] = { - "type": "enabled", - "budget_tokens": thinking_budget, - } + # Opus 4.6 uses Auto Thinking (no budget tokens) + if llm_config.model.startswith("claude-opus-4-6"): + data["thinking"] = { + "type": "adaptive", + } + else: + # Traditional extended thinking with budget tokens + thinking_budget = max(llm_config.max_reasoning_tokens, 1024) + if thinking_budget != llm_config.max_reasoning_tokens: + logger.warning( + f"Max reasoning tokens must be at least 1024 for Claude. Setting max_reasoning_tokens to 1024 for model {llm_config.model}." + ) + data["thinking"] = { + "type": "enabled", + "budget_tokens": thinking_budget, + } # `temperature` may only be set to 1 when thinking is enabled. Please consult our documentation at https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking' data["temperature"] = 1.0 # Silently disable prefix_fill for now prefix_fill = False - # Effort configuration for Opus 4.5 (controls token spending) + # Effort configuration for Opus 4.5 and Opus 4.6 (controls token spending) # To extend to other models, modify the model check - if llm_config.model.startswith("claude-opus-4-5") and llm_config.effort is not None: + if ( + llm_config.model.startswith("claude-opus-4-5") or llm_config.model.startswith("claude-opus-4-6") + ) and llm_config.effort is not None: data["output_config"] = {"effort": llm_config.effort} # Context management for Opus 4.5 to preserve thinking blocks and improve cache hits @@ -851,6 +889,8 @@ class AnthropicClient(LLMClientBase): or llm_config.model.startswith("claude-haiku-4-5") # Opus 4.5 support - to extend effort parameter to other models, modify this check or llm_config.model.startswith("claude-opus-4-5") + # Opus 4.6 support - uses Auto Thinking + or llm_config.model.startswith("claude-opus-4-6") ) @trace_method diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py index 4c9ad3fd..e0953d40 100644 --- a/letta/schemas/llm_config.py +++ b/letta/schemas/llm_config.py @@ -82,9 +82,9 @@ class LLMConfig(BaseModel): 0, description="Configurable thinking budget for extended thinking. Used for enable_reasoner and also for Google Vertex models like Gemini 2.5 Flash. Minimum value is 1024 when used with enable_reasoner.", ) - effort: Optional[Literal["low", "medium", "high"]] = Field( + effort: Optional[Literal["low", "medium", "high", "max"]] = Field( None, - description="The effort level for Anthropic Opus 4.5 model (controls token spending). Not setting this gives similar performance to 'high'.", + description="The effort level for Anthropic models that support it (Opus 4.5, Opus 4.6). Controls token spending and thinking behavior. Not setting this gives similar performance to 'high'.", ) frequency_penalty: Optional[float] = Field( None, # Can also deafult to 0.0? @@ -190,6 +190,7 @@ class LLMConfig(BaseModel): or model.startswith("claude-opus-4") or model.startswith("claude-haiku-4-5") or model.startswith("claude-opus-4-5") + or model.startswith("claude-opus-4-6") ): values["put_inner_thoughts_in_kwargs"] = False @@ -441,6 +442,7 @@ class LLMConfig(BaseModel): or config.model.startswith("claude-3-7-sonnet") or config.model.startswith("claude-haiku-4-5") or config.model.startswith("claude-opus-4-5") + or config.model.startswith("claude-opus-4-6") ) @classmethod @@ -543,8 +545,8 @@ class LLMConfig(BaseModel): config.put_inner_thoughts_in_kwargs = False if config.enable_reasoner and config.max_reasoning_tokens == 0: config.max_reasoning_tokens = 1024 - # Set default effort level for Claude Opus 4.5 - if config.model.startswith("claude-opus-4-5") and config.effort is None: + # Set default effort level for Claude Opus 4.5 and Opus 4.6 + if (config.model.startswith("claude-opus-4-5") or config.model.startswith("claude-opus-4-6")) and config.effort is None: config.effort = "medium" return config @@ -612,8 +614,8 @@ class LLMConfig(BaseModel): config.put_inner_thoughts_in_kwargs = False if config.max_reasoning_tokens == 0: config.max_reasoning_tokens = 1024 - # Set default effort level for Claude Opus 4.5 - if config.model.startswith("claude-opus-4-5") and config.effort is None: + # Set default effort level for Claude Opus 4.5 and Opus 4.6 + if (config.model.startswith("claude-opus-4-5") or config.model.startswith("claude-opus-4-6")) and config.effort is None: config.effort = "medium" elif cls.is_google_vertex_reasoning_model(config) or cls.is_google_ai_reasoning_model(config): # Handle as non-reasoner until we support summary diff --git a/letta/schemas/providers/anthropic.py b/letta/schemas/providers/anthropic.py index 2e2faf6c..fbb7a2c3 100644 --- a/letta/schemas/providers/anthropic.py +++ b/letta/schemas/providers/anthropic.py @@ -108,6 +108,11 @@ MODEL_LIST = [ "name": "claude-opus-4-5-20251101", "context_window": 200000, }, + ## Opus 4.6 + { + "name": "claude-opus-4-6", + "context_window": 200000, + }, ] @@ -134,7 +139,9 @@ class AnthropicProvider(Provider): def get_default_max_output_tokens(self, model_name: str) -> int: """Get the default max output tokens for Anthropic models.""" - if "opus" in model_name: + if "claude-opus-4-6" in model_name: + return 21000 # Opus 4.6 supports up to 128k with streaming, use 21k as default + elif "opus" in model_name: return 16384 elif "sonnet" in model_name: return 16384