feat: add Anthropic Sonnet 4.6 (#9408)
This commit is contained in:
committed by
Caren Thomas
parent
25d54dd896
commit
39ddda81cc
@@ -65,9 +65,9 @@ class AnthropicClient(LLMClientBase):
|
|||||||
client = self._get_anthropic_client(llm_config, async_client=False)
|
client = self._get_anthropic_client(llm_config, async_client=False)
|
||||||
betas: list[str] = []
|
betas: list[str] = []
|
||||||
|
|
||||||
# Opus 4.6 Auto Thinking
|
# Opus 4.6 / Sonnet 4.6 Auto Thinking
|
||||||
if llm_config.enable_reasoner:
|
if llm_config.enable_reasoner:
|
||||||
if llm_config.model.startswith("claude-opus-4-6"):
|
if llm_config.model.startswith("claude-opus-4-6") or llm_config.model.startswith("claude-sonnet-4-6"):
|
||||||
betas.append("adaptive-thinking-2026-01-28")
|
betas.append("adaptive-thinking-2026-01-28")
|
||||||
# Interleaved thinking for other reasoners (sync path parity)
|
# Interleaved thinking for other reasoners (sync path parity)
|
||||||
else:
|
else:
|
||||||
@@ -86,13 +86,17 @@ class AnthropicClient(LLMClientBase):
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Effort parameter for Opus 4.5 and Opus 4.6 - to extend to other models, modify the model check
|
# Effort parameter for Opus 4.5, Opus 4.6, and Sonnet 4.6 - to extend to other models, modify the model check
|
||||||
if (
|
if (
|
||||||
llm_config.model.startswith("claude-opus-4-5") or llm_config.model.startswith("claude-opus-4-6")
|
llm_config.model.startswith("claude-opus-4-5")
|
||||||
|
or llm_config.model.startswith("claude-opus-4-6")
|
||||||
|
or llm_config.model.startswith("claude-sonnet-4-6")
|
||||||
) and llm_config.effort is not None:
|
) and llm_config.effort is not None:
|
||||||
betas.append("effort-2025-11-24")
|
betas.append("effort-2025-11-24")
|
||||||
# Max effort beta for Opus 4.6
|
# Max effort beta for Opus 4.6 / Sonnet 4.6
|
||||||
if llm_config.model.startswith("claude-opus-4-6") and llm_config.effort == "max":
|
if (
|
||||||
|
llm_config.model.startswith("claude-opus-4-6") or llm_config.model.startswith("claude-sonnet-4-6")
|
||||||
|
) and llm_config.effort == "max":
|
||||||
betas.append("max-effort-2026-01-24")
|
betas.append("max-effort-2026-01-24")
|
||||||
|
|
||||||
# Context management for Opus 4.5 to preserve thinking blocks (improves cache hits)
|
# Context management for Opus 4.5 to preserve thinking blocks (improves cache hits)
|
||||||
@@ -134,9 +138,9 @@ class AnthropicClient(LLMClientBase):
|
|||||||
client = await self._get_anthropic_client_async(llm_config, async_client=True)
|
client = await self._get_anthropic_client_async(llm_config, async_client=True)
|
||||||
betas: list[str] = []
|
betas: list[str] = []
|
||||||
|
|
||||||
# Opus 4.6 Auto Thinking
|
# Opus 4.6 / Sonnet 4.6 Auto Thinking
|
||||||
if llm_config.enable_reasoner:
|
if llm_config.enable_reasoner:
|
||||||
if llm_config.model.startswith("claude-opus-4-6"):
|
if llm_config.model.startswith("claude-opus-4-6") or llm_config.model.startswith("claude-sonnet-4-6"):
|
||||||
betas.append("adaptive-thinking-2026-01-28")
|
betas.append("adaptive-thinking-2026-01-28")
|
||||||
# Interleaved thinking for other reasoners (sync path parity)
|
# Interleaved thinking for other reasoners (sync path parity)
|
||||||
else:
|
else:
|
||||||
@@ -155,13 +159,17 @@ class AnthropicClient(LLMClientBase):
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Effort parameter for Opus 4.5 and Opus 4.6 - to extend to other models, modify the model check
|
# Effort parameter for Opus 4.5, Opus 4.6, and Sonnet 4.6 - to extend to other models, modify the model check
|
||||||
if (
|
if (
|
||||||
llm_config.model.startswith("claude-opus-4-5") or llm_config.model.startswith("claude-opus-4-6")
|
llm_config.model.startswith("claude-opus-4-5")
|
||||||
|
or llm_config.model.startswith("claude-opus-4-6")
|
||||||
|
or llm_config.model.startswith("claude-sonnet-4-6")
|
||||||
) and llm_config.effort is not None:
|
) and llm_config.effort is not None:
|
||||||
betas.append("effort-2025-11-24")
|
betas.append("effort-2025-11-24")
|
||||||
# Max effort beta for Opus 4.6
|
# Max effort beta for Opus 4.6 / Sonnet 4.6
|
||||||
if llm_config.model.startswith("claude-opus-4-6") and llm_config.effort == "max":
|
if (
|
||||||
|
llm_config.model.startswith("claude-opus-4-6") or llm_config.model.startswith("claude-sonnet-4-6")
|
||||||
|
) and llm_config.effort == "max":
|
||||||
betas.append("max-effort-2026-01-24")
|
betas.append("max-effort-2026-01-24")
|
||||||
|
|
||||||
# Context management for Opus 4.5 to preserve thinking blocks (improves cache hits)
|
# Context management for Opus 4.5 to preserve thinking blocks (improves cache hits)
|
||||||
@@ -311,9 +319,9 @@ class AnthropicClient(LLMClientBase):
|
|||||||
# See: https://docs.anthropic.com/en/docs/build-with-claude/tool-use/fine-grained-streaming
|
# See: https://docs.anthropic.com/en/docs/build-with-claude/tool-use/fine-grained-streaming
|
||||||
betas = ["fine-grained-tool-streaming-2025-05-14"]
|
betas = ["fine-grained-tool-streaming-2025-05-14"]
|
||||||
|
|
||||||
# Opus 4.6 Auto Thinking
|
# Opus 4.6 / Sonnet 4.6 Auto Thinking
|
||||||
if llm_config.enable_reasoner:
|
if llm_config.enable_reasoner:
|
||||||
if llm_config.model.startswith("claude-opus-4-6"):
|
if llm_config.model.startswith("claude-opus-4-6") or llm_config.model.startswith("claude-sonnet-4-6"):
|
||||||
betas.append("adaptive-thinking-2026-01-28")
|
betas.append("adaptive-thinking-2026-01-28")
|
||||||
# Interleaved thinking for other reasoners (sync path parity)
|
# Interleaved thinking for other reasoners (sync path parity)
|
||||||
else:
|
else:
|
||||||
@@ -332,13 +340,17 @@ class AnthropicClient(LLMClientBase):
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Effort parameter for Opus 4.5 and Opus 4.6 - to extend to other models, modify the model check
|
# Effort parameter for Opus 4.5, Opus 4.6, and Sonnet 4.6 - to extend to other models, modify the model check
|
||||||
if (
|
if (
|
||||||
llm_config.model.startswith("claude-opus-4-5") or llm_config.model.startswith("claude-opus-4-6")
|
llm_config.model.startswith("claude-opus-4-5")
|
||||||
|
or llm_config.model.startswith("claude-opus-4-6")
|
||||||
|
or llm_config.model.startswith("claude-sonnet-4-6")
|
||||||
) and llm_config.effort is not None:
|
) and llm_config.effort is not None:
|
||||||
betas.append("effort-2025-11-24")
|
betas.append("effort-2025-11-24")
|
||||||
# Max effort beta for Opus 4.6
|
# Max effort beta for Opus 4.6 / Sonnet 4.6
|
||||||
if llm_config.model.startswith("claude-opus-4-6") and llm_config.effort == "max":
|
if (
|
||||||
|
llm_config.model.startswith("claude-opus-4-6") or llm_config.model.startswith("claude-sonnet-4-6")
|
||||||
|
) and llm_config.effort == "max":
|
||||||
betas.append("max-effort-2026-01-24")
|
betas.append("max-effort-2026-01-24")
|
||||||
|
|
||||||
# Context management for Opus 4.5 to preserve thinking blocks (improves cache hits)
|
# Context management for Opus 4.5 to preserve thinking blocks (improves cache hits)
|
||||||
@@ -528,8 +540,8 @@ class AnthropicClient(LLMClientBase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
if should_enable_thinking:
|
if should_enable_thinking:
|
||||||
# Opus 4.6 uses Auto Thinking (no budget tokens)
|
# Opus 4.6 / Sonnet 4.6 uses Auto Thinking (no budget tokens)
|
||||||
if llm_config.model.startswith("claude-opus-4-6"):
|
if llm_config.model.startswith("claude-opus-4-6") or llm_config.model.startswith("claude-sonnet-4-6"):
|
||||||
data["thinking"] = {
|
data["thinking"] = {
|
||||||
"type": "adaptive",
|
"type": "adaptive",
|
||||||
}
|
}
|
||||||
@@ -550,10 +562,12 @@ class AnthropicClient(LLMClientBase):
|
|||||||
# Silently disable prefix_fill for now
|
# Silently disable prefix_fill for now
|
||||||
prefix_fill = False
|
prefix_fill = False
|
||||||
|
|
||||||
# Effort configuration for Opus 4.5 and Opus 4.6 (controls token spending)
|
# Effort configuration for Opus 4.5, Opus 4.6, and Sonnet 4.6 (controls token spending)
|
||||||
# To extend to other models, modify the model check
|
# To extend to other models, modify the model check
|
||||||
if (
|
if (
|
||||||
llm_config.model.startswith("claude-opus-4-5") or llm_config.model.startswith("claude-opus-4-6")
|
llm_config.model.startswith("claude-opus-4-5")
|
||||||
|
or llm_config.model.startswith("claude-opus-4-6")
|
||||||
|
or llm_config.model.startswith("claude-sonnet-4-6")
|
||||||
) and llm_config.effort is not None:
|
) and llm_config.effort is not None:
|
||||||
data["output_config"] = {"effort": llm_config.effort}
|
data["output_config"] = {"effort": llm_config.effort}
|
||||||
|
|
||||||
@@ -935,6 +949,8 @@ class AnthropicClient(LLMClientBase):
|
|||||||
or llm_config.model.startswith("claude-opus-4-5")
|
or llm_config.model.startswith("claude-opus-4-5")
|
||||||
# Opus 4.6 support - uses Auto Thinking
|
# Opus 4.6 support - uses Auto Thinking
|
||||||
or llm_config.model.startswith("claude-opus-4-6")
|
or llm_config.model.startswith("claude-opus-4-6")
|
||||||
|
# Sonnet 4.6 support - same API as Opus 4.6
|
||||||
|
or llm_config.model.startswith("claude-sonnet-4-6")
|
||||||
)
|
)
|
||||||
|
|
||||||
@trace_method
|
@trace_method
|
||||||
|
|||||||
@@ -562,7 +562,11 @@ class LLMConfig(BaseModel):
|
|||||||
if config.enable_reasoner and config.max_reasoning_tokens == 0:
|
if config.enable_reasoner and config.max_reasoning_tokens == 0:
|
||||||
config.max_reasoning_tokens = 1024
|
config.max_reasoning_tokens = 1024
|
||||||
# Set default effort level for Claude Opus 4.5 and Opus 4.6
|
# Set default effort level for Claude Opus 4.5 and Opus 4.6
|
||||||
if (config.model.startswith("claude-opus-4-5") or config.model.startswith("claude-opus-4-6")) and config.effort is None:
|
if (
|
||||||
|
config.model.startswith("claude-opus-4-5")
|
||||||
|
or config.model.startswith("claude-opus-4-6")
|
||||||
|
or config.model.startswith("claude-sonnet-4-6")
|
||||||
|
) and config.effort is None:
|
||||||
config.effort = "medium"
|
config.effort = "medium"
|
||||||
return config
|
return config
|
||||||
|
|
||||||
@@ -631,7 +635,11 @@ class LLMConfig(BaseModel):
|
|||||||
if config.max_reasoning_tokens == 0:
|
if config.max_reasoning_tokens == 0:
|
||||||
config.max_reasoning_tokens = 1024
|
config.max_reasoning_tokens = 1024
|
||||||
# Set default effort level for Claude Opus 4.5 and Opus 4.6
|
# Set default effort level for Claude Opus 4.5 and Opus 4.6
|
||||||
if (config.model.startswith("claude-opus-4-5") or config.model.startswith("claude-opus-4-6")) and config.effort is None:
|
if (
|
||||||
|
config.model.startswith("claude-opus-4-5")
|
||||||
|
or config.model.startswith("claude-opus-4-6")
|
||||||
|
or config.model.startswith("claude-sonnet-4-6")
|
||||||
|
) and config.effort is None:
|
||||||
config.effort = "medium"
|
config.effort = "medium"
|
||||||
elif cls.is_google_vertex_reasoning_model(config) or cls.is_google_ai_reasoning_model(config):
|
elif cls.is_google_vertex_reasoning_model(config) or cls.is_google_ai_reasoning_model(config):
|
||||||
# Handle as non-reasoner until we support summary
|
# Handle as non-reasoner until we support summary
|
||||||
|
|||||||
@@ -113,6 +113,11 @@ MODEL_LIST = [
|
|||||||
"name": "claude-opus-4-6",
|
"name": "claude-opus-4-6",
|
||||||
"context_window": 200000,
|
"context_window": 200000,
|
||||||
},
|
},
|
||||||
|
## Sonnet 4.6
|
||||||
|
{
|
||||||
|
"name": "claude-sonnet-4-6",
|
||||||
|
"context_window": 200000,
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@@ -139,8 +144,8 @@ class AnthropicProvider(Provider):
|
|||||||
|
|
||||||
def get_default_max_output_tokens(self, model_name: str) -> int:
|
def get_default_max_output_tokens(self, model_name: str) -> int:
|
||||||
"""Get the default max output tokens for Anthropic models."""
|
"""Get the default max output tokens for Anthropic models."""
|
||||||
if "claude-opus-4-6" in model_name:
|
if "claude-opus-4-6" in model_name or "claude-sonnet-4-6" in model_name:
|
||||||
return 21000 # Opus 4.6 supports up to 128k with streaming, use 21k as default
|
return 21000 # Opus 4.6 / Sonnet 4.6 supports up to 128k with streaming, use 21k as default
|
||||||
elif "opus" in model_name:
|
elif "opus" in model_name:
|
||||||
return 16384
|
return 16384
|
||||||
elif "sonnet" in model_name:
|
elif "sonnet" in model_name:
|
||||||
|
|||||||
@@ -170,7 +170,7 @@ class ModelSettings(BaseSettings):
|
|||||||
anthropic_sonnet_1m: bool = Field(
|
anthropic_sonnet_1m: bool = Field(
|
||||||
default=False,
|
default=False,
|
||||||
description=(
|
description=(
|
||||||
"Enable 1M-token context window for Claude Sonnet 4/4.5. When true, adds the"
|
"Enable 1M-token context window for Claude Sonnet 4/4.5/4.6. When true, adds the"
|
||||||
" 'context-1m-2025-08-07' beta to Anthropic requests and sets model context_window"
|
" 'context-1m-2025-08-07' beta to Anthropic requests and sets model context_window"
|
||||||
" to 1,000,000 instead of 200,000. Note: This feature is in beta and not available"
|
" to 1,000,000 instead of 200,000. Note: This feature is in beta and not available"
|
||||||
" to all orgs; once GA, this flag can be removed and behavior can default to on."
|
" to all orgs; once GA, this flag can be removed and behavior can default to on."
|
||||||
|
|||||||
Reference in New Issue
Block a user