Files
letta-server/letta/schemas/providers/anthropic.py
2026-02-24 10:52:06 -08:00

242 lines
8.3 KiB
Python

from typing import Literal
from letta.log import get_logger
logger = get_logger(__name__)
import anthropic
from pydantic import Field
from letta.errors import ErrorCode, LLMAuthenticationError, LLMError
from letta.schemas.enums import ProviderCategory, ProviderType
from letta.schemas.llm_config import LLMConfig
from letta.schemas.providers.base import Provider
from letta.settings import model_settings
# https://docs.anthropic.com/claude/docs/models-overview
# Sadly hardcoded
MODEL_LIST = [
## Opus 4.1
{
"name": "claude-opus-4-1-20250805",
"context_window": 200000,
},
## Opus 3
{
"name": "claude-3-opus-20240229",
"context_window": 200000,
},
# 3 latest
{
"name": "claude-3-opus-latest",
"context_window": 200000,
},
# 4
{
"name": "claude-opus-4-20250514",
"context_window": 200000,
},
## Sonnet
# 3.0
{
"name": "claude-3-sonnet-20240229",
"context_window": 200000,
},
# 3.5
{
"name": "claude-3-5-sonnet-20240620",
"context_window": 200000,
},
# 3.5 new
{
"name": "claude-3-5-sonnet-20241022",
"context_window": 200000,
},
# 3.5 latest
{
"name": "claude-3-5-sonnet-latest",
"context_window": 200000,
},
# 3.7
{
"name": "claude-3-7-sonnet-20250219",
"context_window": 200000,
},
# 3.7 latest
{
"name": "claude-3-7-sonnet-latest",
"context_window": 200000,
},
# 4
{
"name": "claude-sonnet-4-20250514",
"context_window": 200000,
},
# 4.5
{
"name": "claude-sonnet-4-5-20250929",
"context_window": 200000,
},
## Haiku
# 3.0
{
"name": "claude-3-haiku-20240307",
"context_window": 200000,
},
# 3.5
{
"name": "claude-3-5-haiku-20241022",
"context_window": 200000,
},
# 3.5 latest
{
"name": "claude-3-5-haiku-latest",
"context_window": 200000,
},
# 4.5
{
"name": "claude-haiku-4-5-20251001",
"context_window": 200000,
},
# 4.5 latest
{
"name": "claude-haiku-4-5-latest",
"context_window": 200000,
},
## Opus 4.5
{
"name": "claude-opus-4-5-20251101",
"context_window": 200000,
},
## Opus 4.6
{
"name": "claude-opus-4-6",
"context_window": 200000,
},
]
class AnthropicProvider(Provider):
provider_type: Literal[ProviderType.anthropic] = Field(ProviderType.anthropic, description="The type of the provider.")
provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
api_key: str | None = Field(None, description="API key for the Anthropic API.", deprecated=True)
base_url: str = "https://api.anthropic.com/v1"
async def check_api_key(self):
api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None
if not api_key:
raise ValueError("No API key provided")
try:
# Use async Anthropic client
anthropic_client = anthropic.AsyncAnthropic(api_key=api_key)
# just use a cheap model to count some tokens - as of 5/7/2025 this is faster than fetching the list of models
await anthropic_client.messages.count_tokens(model=MODEL_LIST[-1]["name"], messages=[{"role": "user", "content": "a"}])
except anthropic.AuthenticationError as e:
raise LLMAuthenticationError(message=f"Failed to authenticate with Anthropic: {e}", code=ErrorCode.UNAUTHENTICATED)
except Exception as e:
raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR)
def get_default_max_output_tokens(self, model_name: str) -> int:
"""Get the default max output tokens for Anthropic models."""
if "claude-opus-4-6" in model_name:
return 21000 # Opus 4.6 supports up to 128k with streaming, use 21k as default
elif "opus" in model_name:
return 16384
elif "sonnet" in model_name:
return 16384
elif "haiku" in model_name:
return 8192
return 8192 # default for anthropic
async def list_llm_models_async(self) -> list[LLMConfig]:
"""
https://docs.anthropic.com/claude/docs/models-overview
NOTE: currently there is no GET /models, so we need to hardcode
"""
api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None
# For claude-pro-max provider, use OAuth Bearer token instead of api_key
is_oauth_provider = self.name == "claude-pro-max"
if api_key:
if is_oauth_provider:
anthropic_client = anthropic.AsyncAnthropic(
default_headers={
"Authorization": f"Bearer {api_key}",
"anthropic-version": "2023-06-01",
"anthropic-beta": "oauth-2025-04-20",
},
)
else:
anthropic_client = anthropic.AsyncAnthropic(api_key=api_key)
elif model_settings.anthropic_api_key:
anthropic_client = anthropic.AsyncAnthropic()
else:
raise ValueError("No API key provided")
models = await anthropic_client.models.list()
models_json = models.model_dump()
assert "data" in models_json, f"Anthropic model query response missing 'data' field: {models_json}"
models_data = models_json["data"]
return self._list_llm_models(models_data)
def _list_llm_models(self, models) -> list[LLMConfig]:
configs = []
for model in models:
if any((model.get("type") != "model", "id" not in model, model.get("id").startswith("claude-2"))):
continue
# Anthropic doesn't return the context window in their API
if "context_window" not in model:
# Remap list to name: context_window
model_library = {m["name"]: m["context_window"] for m in MODEL_LIST}
# Attempt to look it up in a hardcoded list
if model["id"] in model_library:
model["context_window"] = model_library[model["id"]]
else:
# On fallback, we can set 200k (generally safe), but we should warn the user
logger.warning(f"Couldn't find context window size for model {model['id']}, defaulting to 200,000")
model["context_window"] = 200000
# Optional override: enable 1M context for Sonnet 4/4.5 when flag is set
try:
from letta.settings import model_settings
if model_settings.anthropic_sonnet_1m and (
model["id"].startswith("claude-sonnet-4") or model["id"].startswith("claude-sonnet-4-5")
):
model["context_window"] = 1_000_000
except Exception:
pass
max_tokens = self.get_default_max_output_tokens(model["id"])
# TODO: set for 3-7 extended thinking mode
# NOTE: from 2025-02
# We set this to false by default, because Anthropic can
# natively support <thinking> tags inside of content fields
# However, putting COT inside of tool calls can make it more
# reliable for tool calling (no chance of a non-tool call step)
# Since tool_choice_type 'any' doesn't work with in-content COT
# NOTE For Haiku, it can be flaky if we don't enable this by default
# inner_thoughts_in_kwargs = True if "haiku" in model["id"] else False
inner_thoughts_in_kwargs = True # we no longer support thinking tags
configs.append(
LLMConfig(
model=model["id"],
model_endpoint_type="anthropic",
model_endpoint=self.base_url,
context_window=model["context_window"],
handle=self.get_handle(model["id"]),
put_inner_thoughts_in_kwargs=inner_thoughts_in_kwargs,
max_tokens=max_tokens,
provider_name=self.name,
provider_category=self.provider_category,
)
)
return configs