letta-server/letta/schemas/providers/anthropic.py

from typing import Literal

from letta.log import get_logger

logger = get_logger(__name__)

import anthropic
from pydantic import Field

from letta.errors import ErrorCode, LLMAuthenticationError, LLMError
from letta.schemas.enums import ProviderCategory, ProviderType
from letta.schemas.llm_config import LLMConfig
from letta.schemas.providers.base import Provider
from letta.settings import model_settings

# https://docs.anthropic.com/claude/docs/models-overview
# Sadly hardcoded
MODEL_LIST = [
    ## Opus 4.1
    {
        "name": "claude-opus-4-1-20250805",
        "context_window": 200000,
    },
    ## Opus 3
    {
        "name": "claude-3-opus-20240229",
        "context_window": 200000,
    },
    # 3 latest
    {
        "name": "claude-3-opus-latest",
        "context_window": 200000,
    },
    # 4
    {
        "name": "claude-opus-4-20250514",
        "context_window": 200000,
    },
    ## Sonnet
    # 3.0
    {
        "name": "claude-3-sonnet-20240229",
        "context_window": 200000,
    },
    # 3.5
    {
        "name": "claude-3-5-sonnet-20240620",
        "context_window": 200000,
    },
    # 3.5 new
    {
        "name": "claude-3-5-sonnet-20241022",
        "context_window": 200000,
    },
    # 3.5 latest
    {
        "name": "claude-3-5-sonnet-latest",
        "context_window": 200000,
    },
    # 3.7
    {
        "name": "claude-3-7-sonnet-20250219",
        "context_window": 200000,
    },
    # 3.7 latest
    {
        "name": "claude-3-7-sonnet-latest",
        "context_window": 200000,
    },
    # 4
    {
        "name": "claude-sonnet-4-20250514",
        "context_window": 200000,
    },
    # 4.5
    {
        "name": "claude-sonnet-4-5-20250929",
        "context_window": 200000,
    },
    ## Haiku
    # 3.0
    {
        "name": "claude-3-haiku-20240307",
        "context_window": 200000,
    },
    # 3.5
    {
        "name": "claude-3-5-haiku-20241022",
        "context_window": 200000,
    },
    # 3.5 latest
    {
        "name": "claude-3-5-haiku-latest",
        "context_window": 200000,
    },
    # 4.5
    {
        "name": "claude-haiku-4-5-20251001",
        "context_window": 200000,
    },
    # 4.5 latest
    {
        "name": "claude-haiku-4-5-latest",
        "context_window": 200000,
    },
    ## Opus 4.5
    {
        "name": "claude-opus-4-5-20251101",
        "context_window": 200000,
    },
    ## Opus 4.6
    {
        "name": "claude-opus-4-6",
        "context_window": 200000,
    },
]


class AnthropicProvider(Provider):
    provider_type: Literal[ProviderType.anthropic] = Field(ProviderType.anthropic, description="The type of the provider.")
    provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
    api_key: str | None = Field(None, description="API key for the Anthropic API.", deprecated=True)
    base_url: str = "https://api.anthropic.com/v1"

    async def check_api_key(self):
        api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None
        if not api_key:
            raise ValueError("No API key provided")

        try:
            # Use async Anthropic client
            anthropic_client = anthropic.AsyncAnthropic(api_key=api_key)
            # just use a cheap model to count some tokens - as of 5/7/2025 this is faster than fetching the list of models
            await anthropic_client.messages.count_tokens(model=MODEL_LIST[-1]["name"], messages=[{"role": "user", "content": "a"}])
        except anthropic.AuthenticationError as e:
            raise LLMAuthenticationError(message=f"Failed to authenticate with Anthropic: {e}", code=ErrorCode.UNAUTHENTICATED)
        except Exception as e:
            raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR)

    def get_default_max_output_tokens(self, model_name: str) -> int:
        """Get the default max output tokens for Anthropic models."""
        if "claude-opus-4-6" in model_name:
            return 21000  # Opus 4.6 supports up to 128k with streaming, use 21k as default
        elif "opus" in model_name:
            return 16384
        elif "sonnet" in model_name:
            return 16384
        elif "haiku" in model_name:
            return 8192
        return 8192  # default for anthropic

    async def list_llm_models_async(self) -> list[LLMConfig]:
        """
        https://docs.anthropic.com/claude/docs/models-overview

        NOTE: currently there is no GET /models, so we need to hardcode
        """
        api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None

        # For claude-pro-max provider, use OAuth Bearer token instead of api_key
        is_oauth_provider = self.name == "claude-pro-max"

        if api_key:
            if is_oauth_provider:
                anthropic_client = anthropic.AsyncAnthropic(
                    default_headers={
                        "Authorization": f"Bearer {api_key}",
                        "anthropic-version": "2023-06-01",
                        "anthropic-beta": "oauth-2025-04-20",
                    },
                )
            else:
                anthropic_client = anthropic.AsyncAnthropic(api_key=api_key)
        elif model_settings.anthropic_api_key:
            anthropic_client = anthropic.AsyncAnthropic()
        else:
            raise ValueError("No API key provided")

        models = await anthropic_client.models.list()
        models_json = models.model_dump()
        assert "data" in models_json, f"Anthropic model query response missing 'data' field: {models_json}"
        models_data = models_json["data"]

        return self._list_llm_models(models_data)

    def _list_llm_models(self, models) -> list[LLMConfig]:
        configs = []
        for model in models:
            if any((model.get("type") != "model", "id" not in model, model.get("id").startswith("claude-2"))):
                continue

            # Anthropic doesn't return the context window in their API
            if "context_window" not in model:
                # Remap list to name: context_window
                model_library = {m["name"]: m["context_window"] for m in MODEL_LIST}
                # Attempt to look it up in a hardcoded list
                if model["id"] in model_library:
                    model["context_window"] = model_library[model["id"]]
                else:
                    # On fallback, we can set 200k (generally safe), but we should warn the user
                    logger.warning(f"Couldn't find context window size for model {model['id']}, defaulting to 200,000")
                    model["context_window"] = 200000

            # Optional override: enable 1M context for Sonnet 4/4.5 when flag is set
            try:
                from letta.settings import model_settings

                if model_settings.anthropic_sonnet_1m and (
                    model["id"].startswith("claude-sonnet-4") or model["id"].startswith("claude-sonnet-4-5")
                ):
                    model["context_window"] = 1_000_000
            except Exception:
                pass

            max_tokens = self.get_default_max_output_tokens(model["id"])
            # TODO: set for 3-7 extended thinking mode

            # NOTE: from 2025-02
            # We set this to false by default, because Anthropic can
            # natively support <thinking> tags inside of content fields
            # However, putting COT inside of tool calls can make it more
            # reliable for tool calling (no chance of a non-tool call step)
            # Since tool_choice_type 'any' doesn't work with in-content COT
            # NOTE For Haiku, it can be flaky if we don't enable this by default
            # inner_thoughts_in_kwargs = True if "haiku" in model["id"] else False
            inner_thoughts_in_kwargs = True  # we no longer support thinking tags

            configs.append(
                LLMConfig(
                    model=model["id"],
                    model_endpoint_type="anthropic",
                    model_endpoint=self.base_url,
                    context_window=model["context_window"],
                    handle=self.get_handle(model["id"]),
                    put_inner_thoughts_in_kwargs=inner_thoughts_in_kwargs,
                    max_tokens=max_tokens,
                    provider_name=self.name,
                    provider_category=self.provider_category,
                )
            )
        return configs