feat(core): add sonnet 1m support [LET-4620] (#5152)
feat(core): add sonnet 1m support
This commit is contained in:
committed by
Caren Thomas
parent
134e0203c2
commit
07a687880f
@@ -55,15 +55,46 @@ class AnthropicClient(LLMClientBase):
|
||||
@deprecated("Synchronous version of this is no longer valid. Will result in model_dump of coroutine")
|
||||
def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
||||
client = self._get_anthropic_client(llm_config, async_client=False)
|
||||
response = client.beta.messages.create(**request_data)
|
||||
betas: list[str] = []
|
||||
# 1M context beta for Sonnet 4/4.5 when enabled
|
||||
try:
|
||||
from letta.settings import model_settings
|
||||
|
||||
if model_settings.anthropic_sonnet_1m and (
|
||||
llm_config.model.startswith("claude-sonnet-4") or llm_config.model.startswith("claude-sonnet-4-5")
|
||||
):
|
||||
betas.append("context-1m-2025-08-07")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if betas:
|
||||
response = client.beta.messages.create(**request_data, betas=betas)
|
||||
else:
|
||||
response = client.beta.messages.create(**request_data)
|
||||
return response.model_dump()
|
||||
|
||||
@trace_method
|
||||
async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
||||
client = await self._get_anthropic_client_async(llm_config, async_client=True)
|
||||
|
||||
betas: list[str] = []
|
||||
# interleaved thinking for reasoner
|
||||
if llm_config.enable_reasoner:
|
||||
response = await client.beta.messages.create(**request_data, betas=["interleaved-thinking-2025-05-14"])
|
||||
betas.append("interleaved-thinking-2025-05-14")
|
||||
|
||||
# 1M context beta for Sonnet 4/4.5 when enabled
|
||||
try:
|
||||
from letta.settings import model_settings
|
||||
|
||||
if model_settings.anthropic_sonnet_1m and (
|
||||
llm_config.model.startswith("claude-sonnet-4") or llm_config.model.startswith("claude-sonnet-4-5")
|
||||
):
|
||||
betas.append("context-1m-2025-08-07")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if betas:
|
||||
response = await client.beta.messages.create(**request_data, betas=betas)
|
||||
else:
|
||||
response = await client.beta.messages.create(**request_data)
|
||||
|
||||
@@ -84,6 +115,17 @@ class AnthropicClient(LLMClientBase):
|
||||
if llm_config.enable_reasoner:
|
||||
betas.append("interleaved-thinking-2025-05-14")
|
||||
|
||||
# 1M context beta for Sonnet 4/4.5 when enabled
|
||||
try:
|
||||
from letta.settings import model_settings
|
||||
|
||||
if model_settings.anthropic_sonnet_1m and (
|
||||
llm_config.model.startswith("claude-sonnet-4") or llm_config.model.startswith("claude-sonnet-4-5")
|
||||
):
|
||||
betas.append("context-1m-2025-08-07")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return await client.beta.messages.create(**request_data, betas=betas)
|
||||
|
||||
@trace_method
|
||||
|
||||
@@ -148,6 +148,17 @@ class AnthropicProvider(Provider):
|
||||
warnings.warn(f"Couldn't find context window size for model {model['id']}, defaulting to 200,000")
|
||||
model["context_window"] = 200000
|
||||
|
||||
# Optional override: enable 1M context for Sonnet 4/4.5 when flag is set
|
||||
try:
|
||||
from letta.settings import model_settings
|
||||
|
||||
if model_settings.anthropic_sonnet_1m and (
|
||||
model["id"].startswith("claude-sonnet-4") or model["id"].startswith("claude-sonnet-4-5")
|
||||
):
|
||||
model["context_window"] = 1_000_000
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
max_tokens = 8192
|
||||
if "claude-3-opus" in model["id"]:
|
||||
max_tokens = 4096
|
||||
|
||||
@@ -137,6 +137,15 @@ class ModelSettings(BaseSettings):
|
||||
# anthropic
|
||||
anthropic_api_key: Optional[str] = None
|
||||
anthropic_max_retries: int = 3
|
||||
anthropic_sonnet_1m: bool = Field(
|
||||
default=False,
|
||||
description=(
|
||||
"Enable 1M-token context window for Claude Sonnet 4/4.5. When true, adds the"
|
||||
" 'context-1m-2025-08-07' beta to Anthropic requests and sets model context_window"
|
||||
" to 1,000,000 instead of 200,000. Note: This feature is in beta and not available"
|
||||
" to all orgs; once GA, this flag can be removed and behavior can default to on."
|
||||
),
|
||||
)
|
||||
|
||||
# ollama
|
||||
ollama_base_url: Optional[str] = None
|
||||
|
||||
Reference in New Issue
Block a user