From 07a687880fa908c6f6ff48ee5f49c24f662f4005 Mon Sep 17 00:00:00 2001 From: Charles Packer Date: Sat, 4 Oct 2025 20:21:04 -0700 Subject: [PATCH] feat(core): add sonnet 1m support [LET-4620] (#5152) feat(core): add sonnet 1m support --- letta/llm_api/anthropic_client.py | 46 ++++++++++++++++++++++++++-- letta/schemas/providers/anthropic.py | 11 +++++++ letta/settings.py | 9 ++++++ 3 files changed, 64 insertions(+), 2 deletions(-) diff --git a/letta/llm_api/anthropic_client.py b/letta/llm_api/anthropic_client.py index 7c8ebbb0..9a500fe2 100644 --- a/letta/llm_api/anthropic_client.py +++ b/letta/llm_api/anthropic_client.py @@ -55,15 +55,46 @@ class AnthropicClient(LLMClientBase): @deprecated("Synchronous version of this is no longer valid. Will result in model_dump of coroutine") def request(self, request_data: dict, llm_config: LLMConfig) -> dict: client = self._get_anthropic_client(llm_config, async_client=False) - response = client.beta.messages.create(**request_data) + betas: list[str] = [] + # 1M context beta for Sonnet 4/4.5 when enabled + try: + from letta.settings import model_settings + + if model_settings.anthropic_sonnet_1m and ( + llm_config.model.startswith("claude-sonnet-4") or llm_config.model.startswith("claude-sonnet-4-5") + ): + betas.append("context-1m-2025-08-07") + except Exception: + pass + + if betas: + response = client.beta.messages.create(**request_data, betas=betas) + else: + response = client.beta.messages.create(**request_data) return response.model_dump() @trace_method async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict: client = await self._get_anthropic_client_async(llm_config, async_client=True) + betas: list[str] = [] + # interleaved thinking for reasoner if llm_config.enable_reasoner: - response = await client.beta.messages.create(**request_data, betas=["interleaved-thinking-2025-05-14"]) + betas.append("interleaved-thinking-2025-05-14") + + # 1M context beta for Sonnet 4/4.5 when enabled + try: + from letta.settings import model_settings + + if model_settings.anthropic_sonnet_1m and ( + llm_config.model.startswith("claude-sonnet-4") or llm_config.model.startswith("claude-sonnet-4-5") + ): + betas.append("context-1m-2025-08-07") + except Exception: + pass + + if betas: + response = await client.beta.messages.create(**request_data, betas=betas) else: response = await client.beta.messages.create(**request_data) @@ -84,6 +115,17 @@ class AnthropicClient(LLMClientBase): if llm_config.enable_reasoner: betas.append("interleaved-thinking-2025-05-14") + # 1M context beta for Sonnet 4/4.5 when enabled + try: + from letta.settings import model_settings + + if model_settings.anthropic_sonnet_1m and ( + llm_config.model.startswith("claude-sonnet-4") or llm_config.model.startswith("claude-sonnet-4-5") + ): + betas.append("context-1m-2025-08-07") + except Exception: + pass + return await client.beta.messages.create(**request_data, betas=betas) @trace_method diff --git a/letta/schemas/providers/anthropic.py b/letta/schemas/providers/anthropic.py index 4b08324d..c72f315e 100644 --- a/letta/schemas/providers/anthropic.py +++ b/letta/schemas/providers/anthropic.py @@ -148,6 +148,17 @@ class AnthropicProvider(Provider): warnings.warn(f"Couldn't find context window size for model {model['id']}, defaulting to 200,000") model["context_window"] = 200000 + # Optional override: enable 1M context for Sonnet 4/4.5 when flag is set + try: + from letta.settings import model_settings + + if model_settings.anthropic_sonnet_1m and ( + model["id"].startswith("claude-sonnet-4") or model["id"].startswith("claude-sonnet-4-5") + ): + model["context_window"] = 1_000_000 + except Exception: + pass + max_tokens = 8192 if "claude-3-opus" in model["id"]: max_tokens = 4096 diff --git a/letta/settings.py b/letta/settings.py index 28882ac1..4aad0195 100644 --- a/letta/settings.py +++ b/letta/settings.py @@ -137,6 +137,15 @@ class ModelSettings(BaseSettings): # anthropic anthropic_api_key: Optional[str] = None anthropic_max_retries: int = 3 + anthropic_sonnet_1m: bool = Field( + default=False, + description=( + "Enable 1M-token context window for Claude Sonnet 4/4.5. When true, adds the" + " 'context-1m-2025-08-07' beta to Anthropic requests and sets model context_window" + " to 1,000,000 instead of 200,000. Note: This feature is in beta and not available" + " to all orgs; once GA, this flag can be removed and behavior can default to on." + ), + ) # ollama ollama_base_url: Optional[str] = None