242 lines
8.3 KiB
Python
242 lines
8.3 KiB
Python
from typing import Literal
|
|
|
|
from letta.log import get_logger
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
import anthropic
|
|
from pydantic import Field
|
|
|
|
from letta.errors import ErrorCode, LLMAuthenticationError, LLMError
|
|
from letta.schemas.enums import ProviderCategory, ProviderType
|
|
from letta.schemas.llm_config import LLMConfig
|
|
from letta.schemas.providers.base import Provider
|
|
from letta.settings import model_settings
|
|
|
|
# https://docs.anthropic.com/claude/docs/models-overview
|
|
# Sadly hardcoded
|
|
MODEL_LIST = [
|
|
## Opus 4.1
|
|
{
|
|
"name": "claude-opus-4-1-20250805",
|
|
"context_window": 200000,
|
|
},
|
|
## Opus 3
|
|
{
|
|
"name": "claude-3-opus-20240229",
|
|
"context_window": 200000,
|
|
},
|
|
# 3 latest
|
|
{
|
|
"name": "claude-3-opus-latest",
|
|
"context_window": 200000,
|
|
},
|
|
# 4
|
|
{
|
|
"name": "claude-opus-4-20250514",
|
|
"context_window": 200000,
|
|
},
|
|
## Sonnet
|
|
# 3.0
|
|
{
|
|
"name": "claude-3-sonnet-20240229",
|
|
"context_window": 200000,
|
|
},
|
|
# 3.5
|
|
{
|
|
"name": "claude-3-5-sonnet-20240620",
|
|
"context_window": 200000,
|
|
},
|
|
# 3.5 new
|
|
{
|
|
"name": "claude-3-5-sonnet-20241022",
|
|
"context_window": 200000,
|
|
},
|
|
# 3.5 latest
|
|
{
|
|
"name": "claude-3-5-sonnet-latest",
|
|
"context_window": 200000,
|
|
},
|
|
# 3.7
|
|
{
|
|
"name": "claude-3-7-sonnet-20250219",
|
|
"context_window": 200000,
|
|
},
|
|
# 3.7 latest
|
|
{
|
|
"name": "claude-3-7-sonnet-latest",
|
|
"context_window": 200000,
|
|
},
|
|
# 4
|
|
{
|
|
"name": "claude-sonnet-4-20250514",
|
|
"context_window": 200000,
|
|
},
|
|
# 4.5
|
|
{
|
|
"name": "claude-sonnet-4-5-20250929",
|
|
"context_window": 200000,
|
|
},
|
|
## Haiku
|
|
# 3.0
|
|
{
|
|
"name": "claude-3-haiku-20240307",
|
|
"context_window": 200000,
|
|
},
|
|
# 3.5
|
|
{
|
|
"name": "claude-3-5-haiku-20241022",
|
|
"context_window": 200000,
|
|
},
|
|
# 3.5 latest
|
|
{
|
|
"name": "claude-3-5-haiku-latest",
|
|
"context_window": 200000,
|
|
},
|
|
# 4.5
|
|
{
|
|
"name": "claude-haiku-4-5-20251001",
|
|
"context_window": 200000,
|
|
},
|
|
# 4.5 latest
|
|
{
|
|
"name": "claude-haiku-4-5-latest",
|
|
"context_window": 200000,
|
|
},
|
|
## Opus 4.5
|
|
{
|
|
"name": "claude-opus-4-5-20251101",
|
|
"context_window": 200000,
|
|
},
|
|
## Opus 4.6
|
|
{
|
|
"name": "claude-opus-4-6",
|
|
"context_window": 200000,
|
|
},
|
|
]
|
|
|
|
|
|
class AnthropicProvider(Provider):
|
|
provider_type: Literal[ProviderType.anthropic] = Field(ProviderType.anthropic, description="The type of the provider.")
|
|
provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
|
|
api_key: str | None = Field(None, description="API key for the Anthropic API.", deprecated=True)
|
|
base_url: str = "https://api.anthropic.com/v1"
|
|
|
|
async def check_api_key(self):
|
|
api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None
|
|
if not api_key:
|
|
raise ValueError("No API key provided")
|
|
|
|
try:
|
|
# Use async Anthropic client
|
|
anthropic_client = anthropic.AsyncAnthropic(api_key=api_key)
|
|
# just use a cheap model to count some tokens - as of 5/7/2025 this is faster than fetching the list of models
|
|
await anthropic_client.messages.count_tokens(model=MODEL_LIST[-1]["name"], messages=[{"role": "user", "content": "a"}])
|
|
except anthropic.AuthenticationError as e:
|
|
raise LLMAuthenticationError(message=f"Failed to authenticate with Anthropic: {e}", code=ErrorCode.UNAUTHENTICATED)
|
|
except Exception as e:
|
|
raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR)
|
|
|
|
def get_default_max_output_tokens(self, model_name: str) -> int:
|
|
"""Get the default max output tokens for Anthropic models."""
|
|
if "claude-opus-4-6" in model_name:
|
|
return 21000 # Opus 4.6 supports up to 128k with streaming, use 21k as default
|
|
elif "opus" in model_name:
|
|
return 16384
|
|
elif "sonnet" in model_name:
|
|
return 16384
|
|
elif "haiku" in model_name:
|
|
return 8192
|
|
return 8192 # default for anthropic
|
|
|
|
async def list_llm_models_async(self) -> list[LLMConfig]:
|
|
"""
|
|
https://docs.anthropic.com/claude/docs/models-overview
|
|
|
|
NOTE: currently there is no GET /models, so we need to hardcode
|
|
"""
|
|
api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None
|
|
|
|
# For claude-pro-max provider, use OAuth Bearer token instead of api_key
|
|
is_oauth_provider = self.name == "claude-pro-max"
|
|
|
|
if api_key:
|
|
if is_oauth_provider:
|
|
anthropic_client = anthropic.AsyncAnthropic(
|
|
default_headers={
|
|
"Authorization": f"Bearer {api_key}",
|
|
"anthropic-version": "2023-06-01",
|
|
"anthropic-beta": "oauth-2025-04-20",
|
|
},
|
|
)
|
|
else:
|
|
anthropic_client = anthropic.AsyncAnthropic(api_key=api_key)
|
|
elif model_settings.anthropic_api_key:
|
|
anthropic_client = anthropic.AsyncAnthropic()
|
|
else:
|
|
raise ValueError("No API key provided")
|
|
|
|
models = await anthropic_client.models.list()
|
|
models_json = models.model_dump()
|
|
assert "data" in models_json, f"Anthropic model query response missing 'data' field: {models_json}"
|
|
models_data = models_json["data"]
|
|
|
|
return self._list_llm_models(models_data)
|
|
|
|
def _list_llm_models(self, models) -> list[LLMConfig]:
|
|
configs = []
|
|
for model in models:
|
|
if any((model.get("type") != "model", "id" not in model, model.get("id").startswith("claude-2"))):
|
|
continue
|
|
|
|
# Anthropic doesn't return the context window in their API
|
|
if "context_window" not in model:
|
|
# Remap list to name: context_window
|
|
model_library = {m["name"]: m["context_window"] for m in MODEL_LIST}
|
|
# Attempt to look it up in a hardcoded list
|
|
if model["id"] in model_library:
|
|
model["context_window"] = model_library[model["id"]]
|
|
else:
|
|
# On fallback, we can set 200k (generally safe), but we should warn the user
|
|
logger.warning(f"Couldn't find context window size for model {model['id']}, defaulting to 200,000")
|
|
model["context_window"] = 200000
|
|
|
|
# Optional override: enable 1M context for Sonnet 4/4.5 when flag is set
|
|
try:
|
|
from letta.settings import model_settings
|
|
|
|
if model_settings.anthropic_sonnet_1m and (
|
|
model["id"].startswith("claude-sonnet-4") or model["id"].startswith("claude-sonnet-4-5")
|
|
):
|
|
model["context_window"] = 1_000_000
|
|
except Exception:
|
|
pass
|
|
|
|
max_tokens = self.get_default_max_output_tokens(model["id"])
|
|
# TODO: set for 3-7 extended thinking mode
|
|
|
|
# NOTE: from 2025-02
|
|
# We set this to false by default, because Anthropic can
|
|
# natively support <thinking> tags inside of content fields
|
|
# However, putting COT inside of tool calls can make it more
|
|
# reliable for tool calling (no chance of a non-tool call step)
|
|
# Since tool_choice_type 'any' doesn't work with in-content COT
|
|
# NOTE For Haiku, it can be flaky if we don't enable this by default
|
|
# inner_thoughts_in_kwargs = True if "haiku" in model["id"] else False
|
|
inner_thoughts_in_kwargs = True # we no longer support thinking tags
|
|
|
|
configs.append(
|
|
LLMConfig(
|
|
model=model["id"],
|
|
model_endpoint_type="anthropic",
|
|
model_endpoint=self.base_url,
|
|
context_window=model["context_window"],
|
|
handle=self.get_handle(model["id"]),
|
|
put_inner_thoughts_in_kwargs=inner_thoughts_in_kwargs,
|
|
max_tokens=max_tokens,
|
|
provider_name=self.name,
|
|
provider_category=self.provider_category,
|
|
)
|
|
)
|
|
return configs
|