feat(core): add sonnet 1m support [LET-4620] (#5152)

feat(core): add sonnet 1m support
This commit is contained in:
Charles Packer
2025-10-04 20:21:04 -07:00
committed by Caren Thomas
parent 134e0203c2
commit 07a687880f
3 changed files with 64 additions and 2 deletions

View File

@@ -55,15 +55,46 @@ class AnthropicClient(LLMClientBase):
@deprecated("Synchronous version of this is no longer valid. Will result in model_dump of coroutine")
def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
client = self._get_anthropic_client(llm_config, async_client=False)
response = client.beta.messages.create(**request_data)
betas: list[str] = []
# 1M context beta for Sonnet 4/4.5 when enabled
try:
from letta.settings import model_settings
if model_settings.anthropic_sonnet_1m and (
llm_config.model.startswith("claude-sonnet-4") or llm_config.model.startswith("claude-sonnet-4-5")
):
betas.append("context-1m-2025-08-07")
except Exception:
pass
if betas:
response = client.beta.messages.create(**request_data, betas=betas)
else:
response = client.beta.messages.create(**request_data)
return response.model_dump()
@trace_method
async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
client = await self._get_anthropic_client_async(llm_config, async_client=True)
betas: list[str] = []
# interleaved thinking for reasoner
if llm_config.enable_reasoner:
response = await client.beta.messages.create(**request_data, betas=["interleaved-thinking-2025-05-14"])
betas.append("interleaved-thinking-2025-05-14")
# 1M context beta for Sonnet 4/4.5 when enabled
try:
from letta.settings import model_settings
if model_settings.anthropic_sonnet_1m and (
llm_config.model.startswith("claude-sonnet-4") or llm_config.model.startswith("claude-sonnet-4-5")
):
betas.append("context-1m-2025-08-07")
except Exception:
pass
if betas:
response = await client.beta.messages.create(**request_data, betas=betas)
else:
response = await client.beta.messages.create(**request_data)
@@ -84,6 +115,17 @@ class AnthropicClient(LLMClientBase):
if llm_config.enable_reasoner:
betas.append("interleaved-thinking-2025-05-14")
# 1M context beta for Sonnet 4/4.5 when enabled
try:
from letta.settings import model_settings
if model_settings.anthropic_sonnet_1m and (
llm_config.model.startswith("claude-sonnet-4") or llm_config.model.startswith("claude-sonnet-4-5")
):
betas.append("context-1m-2025-08-07")
except Exception:
pass
return await client.beta.messages.create(**request_data, betas=betas)
@trace_method

View File

@@ -148,6 +148,17 @@ class AnthropicProvider(Provider):
warnings.warn(f"Couldn't find context window size for model {model['id']}, defaulting to 200,000")
model["context_window"] = 200000
# Optional override: enable 1M context for Sonnet 4/4.5 when flag is set
try:
from letta.settings import model_settings
if model_settings.anthropic_sonnet_1m and (
model["id"].startswith("claude-sonnet-4") or model["id"].startswith("claude-sonnet-4-5")
):
model["context_window"] = 1_000_000
except Exception:
pass
max_tokens = 8192
if "claude-3-opus" in model["id"]:
max_tokens = 4096

View File

@@ -137,6 +137,15 @@ class ModelSettings(BaseSettings):
# anthropic
anthropic_api_key: Optional[str] = None
anthropic_max_retries: int = 3
anthropic_sonnet_1m: bool = Field(
default=False,
description=(
"Enable 1M-token context window for Claude Sonnet 4/4.5. When true, adds the"
" 'context-1m-2025-08-07' beta to Anthropic requests and sets model context_window"
" to 1,000,000 instead of 200,000. Note: This feature is in beta and not available"
" to all orgs; once GA, this flag can be removed and behavior can default to on."
),
)
# ollama
ollama_base_url: Optional[str] = None