chore: delete legacy anthropic client (#3908)

2025-08-13 15:53:27 -07:00
parent c66550a300
commit dac2d8bb16
2 changed files with 106 additions and 783 deletions
--- a/letta/llm_api/anthropic.py
+++ b/letta/llm_api/anthropic.py
@@ -1,775 +0,0 @@
-import json
-import re
-import warnings
-from typing import List, Optional, Union
-
-import anthropic
-from anthropic.types.beta import (
-    BetaRawContentBlockDeltaEvent,
-    BetaRawContentBlockStartEvent,
-    BetaRawContentBlockStopEvent,
-    BetaRawMessageDeltaEvent,
-    BetaRawMessageStartEvent,
-    BetaRawMessageStopEvent,
-    BetaRedactedThinkingBlock,
-    BetaTextBlock,
-    BetaThinkingBlock,
-    BetaToolUseBlock,
-)
-
-from letta.errors import ErrorCode, LLMAuthenticationError, LLMError
-from letta.helpers.datetime_helpers import get_utc_time_int
-from letta.llm_api.helpers import add_inner_thoughts_to_functions
-from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
-from letta.log import get_logger
-from letta.schemas.message import Message as _Message
-from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool
-from letta.schemas.openai.chat_completion_response import (
-    ChatCompletionChunkResponse,
-    ChatCompletionResponse,
-    Choice,
-    ChunkChoice,
-    FunctionCall,
-    FunctionCallDelta,
-)
-from letta.schemas.openai.chat_completion_response import Message as ChoiceMessage
-from letta.schemas.openai.chat_completion_response import MessageDelta, ToolCall, ToolCallDelta, UsageStatistics
-from letta.settings import model_settings
-
-logger = get_logger(__name__)
-
-BASE_URL = "https://api.anthropic.com/v1"
-
-
-# https://docs.anthropic.com/claude/docs/models-overview
-# Sadly hardcoded
-MODEL_LIST = [
-    ## Opus 4.1
-    {
-        "name": "claude-opus-4-1-20250805",
-        "context_window": 200000,
-    },
-    ## Opus 3
-    {
-        "name": "claude-3-opus-20240229",
-        "context_window": 200000,
-    },
-    # 3 latest
-    {
-        "name": "claude-3-opus-latest",
-        "context_window": 200000,
-    },
-    # 4
-    {
-        "name": "claude-opus-4-20250514",
-        "context_window": 200000,
-    },
-    ## Sonnet
-    # 3.0
-    {
-        "name": "claude-3-sonnet-20240229",
-        "context_window": 200000,
-    },
-    # 3.5
-    {
-        "name": "claude-3-5-sonnet-20240620",
-        "context_window": 200000,
-    },
-    # 3.5 new
-    {
-        "name": "claude-3-5-sonnet-20241022",
-        "context_window": 200000,
-    },
-    # 3.5 latest
-    {
-        "name": "claude-3-5-sonnet-latest",
-        "context_window": 200000,
-    },
-    # 3.7
-    {
-        "name": "claude-3-7-sonnet-20250219",
-        "context_window": 200000,
-    },
-    # 3.7 latest
-    {
-        "name": "claude-3-7-sonnet-latest",
-        "context_window": 200000,
-    },
-    # 4
-    {
-        "name": "claude-sonnet-4-20250514",
-        "context_window": 200000,
-    },
-    ## Haiku
-    # 3.0
-    {
-        "name": "claude-3-haiku-20240307",
-        "context_window": 200000,
-    },
-    # 3.5
-    {
-        "name": "claude-3-5-haiku-20241022",
-        "context_window": 200000,
-    },
-    # 3.5 latest
-    {
-        "name": "claude-3-5-haiku-latest",
-        "context_window": 200000,
-    },
-]
-
-DUMMY_FIRST_USER_MESSAGE = "User initializing bootup sequence."
-
-VALID_EVENT_TYPES = {"content_block_stop", "message_stop"}
-
-
-def anthropic_check_valid_api_key(api_key: Union[str, None]) -> None:
-    if api_key:
-        anthropic_client = anthropic.Anthropic(api_key=api_key)
-        try:
-            # just use a cheap model to count some tokens - as of 5/7/2025 this is faster than fetching the list of models
-            anthropic_client.messages.count_tokens(model=MODEL_LIST[-1]["name"], messages=[{"role": "user", "content": "a"}])
-        except anthropic.AuthenticationError as e:
-            raise LLMAuthenticationError(message=f"Failed to authenticate with Anthropic: {e}", code=ErrorCode.UNAUTHENTICATED)
-        except Exception as e:
-            raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR)
-    else:
-        raise ValueError("No API key provided")
-
-
-def antropic_get_model_context_window(url: str, api_key: Union[str, None], model: str) -> int:
-    for model_dict in anthropic_get_model_list(api_key=api_key):
-        if model_dict["name"] == model:
-            return model_dict["context_window"]
-    raise ValueError(f"Can't find model '{model}' in Anthropic model list")
-
-
-def anthropic_get_model_list(api_key: Optional[str]) -> dict:
-    """https://docs.anthropic.com/claude/docs/models-overview"""
-
-    # NOTE: currently there is no GET /models, so we need to hardcode
-    # return MODEL_LIST
-
-    if api_key:
-        anthropic_client = anthropic.Anthropic(api_key=api_key)
-    elif model_settings.anthropic_api_key:
-        anthropic_client = anthropic.Anthropic()
-    else:
-        raise ValueError("No API key provided")
-
-    models = anthropic_client.models.list()
-    models_json = models.model_dump()
-    assert "data" in models_json, f"Anthropic model query response missing 'data' field: {models_json}"
-    return models_json["data"]
-
-
-async def anthropic_get_model_list_async(api_key: Optional[str]) -> dict:
-    """https://docs.anthropic.com/claude/docs/models-overview"""
-
-    # NOTE: currently there is no GET /models, so we need to hardcode
-    # return MODEL_LIST
-
-    if api_key:
-        anthropic_client = anthropic.AsyncAnthropic(api_key=api_key)
-    elif model_settings.anthropic_api_key:
-        anthropic_client = anthropic.AsyncAnthropic()
-    else:
-        raise ValueError("No API key provided")
-
-    models = await anthropic_client.models.list()
-    models_json = models.model_dump()
-    assert "data" in models_json, f"Anthropic model query response missing 'data' field: {models_json}"
-    return models_json["data"]
-
-
-def convert_tools_to_anthropic_format(tools: List[Tool]) -> List[dict]:
-    """See: https://docs.anthropic.com/claude/docs/tool-use
-
-    OpenAI style:
-      "tools": [{
-        "type": "function",
-        "function": {
-            "name": "find_movies",
-            "description": "find ....",
-            "parameters": {
-              "type": "object",
-              "properties": {
-                 PARAM: {
-                   "type": PARAM_TYPE,  # eg "string"
-                   "description": PARAM_DESCRIPTION,
-                 },
-                 ...
-              },
-              "required": List[str],
-            }
-        }
-      }
-      ]
-
-    Anthropic style:
-      "tools": [{
-        "name": "find_movies",
-        "description": "find ....",
-        "input_schema": {
-          "type": "object",
-          "properties": {
-             PARAM: {
-               "type": PARAM_TYPE,  # eg "string"
-               "description": PARAM_DESCRIPTION,
-             },
-             ...
-          },
-          "required": List[str],
-        }
-      }
-      ]
-
-      Two small differences:
-        - 1 level less of nesting
-        - "parameters" -> "input_schema"
-    """
-    formatted_tools = []
-    for tool in tools:
-        formatted_tool = {
-            "name": tool.function.name,
-            "description": tool.function.description,
-            "input_schema": tool.function.parameters or {"type": "object", "properties": {}, "required": []},
-        }
-        formatted_tools.append(formatted_tool)
-
-    return formatted_tools
-
-
-def merge_tool_results_into_user_messages(messages: List[dict]):
-    """Anthropic API doesn't allow role 'tool'->'user' sequences
-
-    Example HTTP error:
-    messages: roles must alternate between "user" and "assistant", but found multiple "user" roles in a row
-
-    From: https://docs.anthropic.com/claude/docs/tool-use
-    You may be familiar with other APIs that return tool use as separate from the model's primary output,
-    or which use a special-purpose tool or function message role.
-    In contrast, Anthropic's models and API are built around alternating user and assistant messages,
-    where each message is an array of rich content blocks: text, image, tool_use, and tool_result.
-    """
-
-    # TODO walk through the messages list
-    # When a dict (dict_A) with 'role' == 'user' is followed by a dict with 'role' == 'user' (dict B), do the following
-    # dict_A["content"] = dict_A["content"] + dict_B["content"]
-
-    # The result should be a new merged_messages list that doesn't have any back-to-back dicts with 'role' == 'user'
-    merged_messages = []
-    if not messages:
-        return merged_messages
-
-    # Start with the first message in the list
-    current_message = messages[0]
-
-    for next_message in messages[1:]:
-        if current_message["role"] == "user" and next_message["role"] == "user":
-            # Merge contents of the next user message into current one
-            current_content = (
-                current_message["content"]
-                if isinstance(current_message["content"], list)
-                else [{"type": "text", "text": current_message["content"]}]
-            )
-            next_content = (
-                next_message["content"]
-                if isinstance(next_message["content"], list)
-                else [{"type": "text", "text": next_message["content"]}]
-            )
-            merged_content = current_content + next_content
-            current_message["content"] = merged_content
-        else:
-            # Append the current message to result as it's complete
-            merged_messages.append(current_message)
-            # Move on to the next message
-            current_message = next_message
-
-    # Append the last processed message to the result
-    merged_messages.append(current_message)
-
-    return merged_messages
-
-
-def remap_finish_reason(stop_reason: str) -> str:
-    """Remap Anthropic's 'stop_reason' to OpenAI 'finish_reason'
-
-    OpenAI: 'stop', 'length', 'function_call', 'content_filter', null
-    see: https://platform.openai.com/docs/guides/text-generation/chat-completions-api
-
-    From: https://docs.anthropic.com/claude/reference/migrating-from-text-completions-to-messages#stop-reason
-
-    Messages have a stop_reason of one of the following values:
-        "end_turn": The conversational turn ended naturally.
-        "stop_sequence": One of your specified custom stop sequences was generated.
-        "max_tokens": (unchanged)
-
-    """
-    if stop_reason == "end_turn":
-        return "stop"
-    elif stop_reason == "stop_sequence":
-        return "stop"
-    elif stop_reason == "max_tokens":
-        return "length"
-    elif stop_reason == "tool_use":
-        return "function_call"
-    else:
-        raise ValueError(f"Unexpected stop_reason: {stop_reason}")
-
-
-def strip_xml_tags(string: str, tag: Optional[str]) -> str:
-    if tag is None:
-        return string
-    # Construct the regular expression pattern to find the start and end tags
-    tag_pattern = f"<{tag}.*?>|</{tag}>"
-    # Use the regular expression to replace the tags with an empty string
-    return re.sub(tag_pattern, "", string)
-
-
-def strip_xml_tags_streaming(string: str, tag: Optional[str]) -> str:
-    if tag is None:
-        return string
-
-    # Handle common partial tag cases
-    parts_to_remove = [
-        "<",  # Leftover start bracket
-        f"<{tag}",  # Opening tag start
-        f"</{tag}",  # Closing tag start
-        f"/{tag}>",  # Closing tag end
-        f"{tag}>",  # Opening tag end
-        f"/{tag}",  # Partial closing tag without >
-        ">",  # Leftover end bracket
-    ]
-
-    result = string
-    for part in parts_to_remove:
-        result = result.replace(part, "")
-
-    return result
-
-
-def convert_anthropic_response_to_chatcompletion(
-    response: anthropic.types.Message,
-    inner_thoughts_xml_tag: Optional[str] = None,
-) -> ChatCompletionResponse:
-    """
-    Example response from Claude 3:
-    response.json = {
-        'id': 'msg_01W1xg9hdRzbeN2CfZM7zD2w',
-        'type': 'message',
-        'role': 'assistant',
-        'content': [
-            {
-                'type': 'text',
-                'text': "<thinking>Analyzing user login event. This is Chad's first
-    interaction with me. I will adjust my personality and rapport accordingly.</thinking>"
-            },
-            {
-                'type':
-                'tool_use',
-                'id': 'toolu_01Ka4AuCmfvxiidnBZuNfP1u',
-                'name': 'core_memory_append',
-                'input': {
-                    'name': 'human',
-                    'content': 'Chad is logging in for the first time. I will aim to build a warm
-    and welcoming rapport.',
-                    'request_heartbeat': True
-                }
-            }
-        ],
-        'model': 'claude-3-haiku-20240307',
-        'stop_reason': 'tool_use',
-        'stop_sequence': None,
-        'usage': {
-            'input_tokens': 3305,
-            'output_tokens': 141
-        }
-    }
-    """
-    prompt_tokens = response.usage.input_tokens
-    completion_tokens = response.usage.output_tokens
-    finish_reason = remap_finish_reason(response.stop_reason)
-
-    content = None
-    reasoning_content = None
-    reasoning_content_signature = None
-    redacted_reasoning_content = None
-    tool_calls = None
-
-    if len(response.content) > 0:
-        for content_part in response.content:
-            if content_part.type == "text":
-                content = strip_xml_tags(string=content_part.text, tag=inner_thoughts_xml_tag)
-            if content_part.type == "tool_use":
-                tool_calls = [
-                    ToolCall(
-                        id=content_part.id,
-                        type="function",
-                        function=FunctionCall(
-                            name=content_part.name,
-                            arguments=json.dumps(content_part.input, indent=2),
-                        ),
-                    )
-                ]
-            if content_part.type == "thinking":
-                reasoning_content = content_part.thinking
-                reasoning_content_signature = content_part.signature
-            if content_part.type == "redacted_thinking":
-                redacted_reasoning_content = content_part.data
-
-    else:
-        raise RuntimeError("Unexpected empty content in response")
-
-    assert response.role == "assistant"
-    choice = Choice(
-        index=0,
-        finish_reason=finish_reason,
-        message=ChoiceMessage(
-            role=response.role,
-            content=content,
-            reasoning_content=reasoning_content,
-            reasoning_content_signature=reasoning_content_signature,
-            redacted_reasoning_content=redacted_reasoning_content,
-            tool_calls=tool_calls,
-        ),
-    )
-
-    return ChatCompletionResponse(
-        id=response.id,
-        choices=[choice],
-        created=get_utc_time_int(),
-        model=response.model,
-        usage=UsageStatistics(
-            prompt_tokens=prompt_tokens,
-            completion_tokens=completion_tokens,
-            total_tokens=prompt_tokens + completion_tokens,
-        ),
-    )
-
-
-def convert_anthropic_stream_event_to_chatcompletion(
-    event: Union[
-        BetaRawMessageStartEvent,
-        BetaRawContentBlockStartEvent,
-        BetaRawContentBlockDeltaEvent,
-        BetaRawContentBlockStopEvent,
-        BetaRawMessageDeltaEvent,
-        BetaRawMessageStopEvent,
-    ],
-    message_id: str,
-    model: str,
-    inner_thoughts_xml_tag: Optional[str] = "thinking",
-) -> ChatCompletionChunkResponse:
-    """Convert Anthropic stream events to OpenAI ChatCompletionResponse format.
-
-        Args:
-            event: The event to convert
-            message_id: The ID of the message. Anthropic does not return this on every event, so we need to keep track of it
-            model: The model used. Anthropic does not return this on every event, so we need to keep track of it
-
-        Example response from OpenAI:
-
-        'id': 'MESSAGE_ID',
-        'choices': [
-            {
-                'finish_reason': None,
-                'index': 0,
-                'delta': {
-                    'content': None,
-                    'tool_calls': [
-                        {
-                            'index': 0,
-                            'id': None,
-                            'type': 'function',
-                            'function': {
-                                'name': None,
-                                'arguments': '_th'
-                            }
-                        }
-                    ],
-                    'function_call': None
-                },
-                'logprobs': None
-            }
-        ],
-        'created': 1713216662,
-        'model': 'gpt-4o-mini-2024-07-18',
-        'system_fingerprint': 'fp_bd83329f63',
-        'object': 'chat.completion.chunk'
-    }
-    """
-    # Get finish reason
-    finish_reason = None
-    completion_chunk_tokens = 0
-
-    # Get content and tool calls
-    content = None
-    reasoning_content = None
-    reasoning_content_signature = None
-    redacted_reasoning_content = None  # NOTE called "data" in the stream
-    tool_calls = None
-    if isinstance(event, BetaRawMessageStartEvent):
-        """
-        BetaRawMessageStartEvent(
-            message=BetaMessage(
-                content=[],
-                usage=BetaUsage(
-                    input_tokens=3086,
-                    output_tokens=1,
-                ),
-                ...,
-            ),
-            type='message_start'
-        )
-        """
-        completion_chunk_tokens += event.message.usage.output_tokens
-
-    elif isinstance(event, BetaRawMessageDeltaEvent):
-        """
-        BetaRawMessageDeltaEvent(
-            delta=Delta(
-                stop_reason='tool_use',
-                stop_sequence=None
-            ),
-            type='message_delta',
-            usage=BetaMessageDeltaUsage(output_tokens=45)
-        )
-        """
-        finish_reason = remap_finish_reason(event.delta.stop_reason)
-        completion_chunk_tokens += event.usage.output_tokens
-
-    elif isinstance(event, BetaRawContentBlockDeltaEvent):
-        """
-        BetaRawContentBlockDeltaEvent(
-            delta=BetaInputJSONDelta(
-                partial_json='lo',
-                type='input_json_delta'
-            ),
-            index=0,
-            type='content_block_delta'
-        )
-
-        OR
-
-        BetaRawContentBlockDeltaEvent(
-            delta=BetaTextDelta(
-                text='👋 ',
-                type='text_delta'
-            ),
-            index=0,
-            type='content_block_delta'
-        )
-
-        """
-        # ReACT COT
-        if event.delta.type == "text_delta":
-            content = strip_xml_tags_streaming(string=event.delta.text, tag=inner_thoughts_xml_tag)
-
-        # Extended thought COT
-        elif event.delta.type == "thinking_delta":
-            # Redacted doesn't come in the delta chunks, comes all at once
-            # "redacted_thinking blocks will not have any deltas associated and will be sent as a single event."
-            # Thinking might start with ""
-            if len(event.delta.thinking) > 0:
-                reasoning_content = event.delta.thinking
-
-        # Extended thought COT signature
-        elif event.delta.type == "signature_delta":
-            if len(event.delta.signature) > 0:
-                reasoning_content_signature = event.delta.signature
-
-        # Tool calling
-        elif event.delta.type == "input_json_delta":
-            tool_calls = [
-                ToolCallDelta(
-                    index=0,
-                    function=FunctionCallDelta(
-                        name=None,
-                        arguments=event.delta.partial_json,
-                    ),
-                )
-            ]
-        else:
-            warnings.warn("Unexpected delta type: " + event.delta.type)
-
-    elif isinstance(event, BetaRawContentBlockStartEvent):
-        """
-        BetaRawContentBlockStartEvent(
-             content_block=BetaToolUseBlock(
-                 id='toolu_01LmpZhRhR3WdrRdUrfkKfFw',
-                 input={},
-                 name='get_weather',
-                 type='tool_use'
-             ),
-             index=0,
-             type='content_block_start'
-         )
-
-         OR
-
-         BetaRawContentBlockStartEvent(
-             content_block=BetaTextBlock(
-                 text='',
-                 type='text'
-             ),
-             index=0,
-             type='content_block_start'
-         )
-        """
-        if isinstance(event.content_block, BetaToolUseBlock):
-            tool_calls = [
-                ToolCallDelta(
-                    index=0,
-                    id=event.content_block.id,
-                    function=FunctionCallDelta(
-                        name=event.content_block.name,
-                        arguments="",
-                    ),
-                )
-            ]
-        elif isinstance(event.content_block, BetaTextBlock):
-            content = event.content_block.text
-        elif isinstance(event.content_block, BetaThinkingBlock):
-            reasoning_content = event.content_block.thinking
-        elif isinstance(event.content_block, BetaRedactedThinkingBlock):
-            redacted_reasoning_content = event.content_block.data
-        else:
-            warnings.warn("Unexpected content start type: " + str(type(event.content_block)))
-    elif event.type in VALID_EVENT_TYPES:
-        pass
-    else:
-        warnings.warn("Unexpected event type: " + event.type)
-
-    # Initialize base response
-    choice = ChunkChoice(
-        index=0,
-        finish_reason=finish_reason,
-        delta=MessageDelta(
-            content=content,
-            reasoning_content=reasoning_content,
-            reasoning_content_signature=reasoning_content_signature,
-            redacted_reasoning_content=redacted_reasoning_content,
-            tool_calls=tool_calls,
-        ),
-    )
-    return ChatCompletionChunkResponse(
-        id=message_id,
-        choices=[choice],
-        created=get_utc_time_int(),
-        model=model,
-        output_tokens=completion_chunk_tokens,
-    )
-
-
-def _prepare_anthropic_request(
-    data: ChatCompletionRequest,
-    inner_thoughts_xml_tag: Optional[str] = "thinking",
-    # if true, prefix fill the generation with the thinking tag
-    prefix_fill: bool = False,
-    # if true, put COT inside the tool calls instead of inside the content
-    put_inner_thoughts_in_kwargs: bool = True,
-    bedrock: bool = False,
-    # extended thinking related fields
-    # https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
-    extended_thinking: bool = False,
-    max_reasoning_tokens: Optional[int] = None,
-) -> dict:
-    """Prepare the request data for Anthropic API format."""
-    if extended_thinking:
-        assert (
-            max_reasoning_tokens is not None and max_reasoning_tokens < data.max_tokens
-        ), "max tokens must be greater than thinking budget"
-        if put_inner_thoughts_in_kwargs:
-            logger.warning("Extended thinking not compatible with put_inner_thoughts_in_kwargs")
-            put_inner_thoughts_in_kwargs = False
-        # assert not prefix_fill, "extended thinking not compatible with prefix_fill"
-        # Silently disable prefix_fill for now
-        prefix_fill = False
-
-    # if needed, put inner thoughts as a kwarg for all tools
-    if data.tools and put_inner_thoughts_in_kwargs:
-        functions = add_inner_thoughts_to_functions(
-            functions=[t.function.model_dump() for t in data.tools],
-            inner_thoughts_key=INNER_THOUGHTS_KWARG,
-            inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
-        )
-        data.tools = [Tool(function=f) for f in functions]
-
-    # convert the tools to Anthropic's payload format
-    anthropic_tools = None if data.tools is None else convert_tools_to_anthropic_format(data.tools)
-
-    # pydantic -> dict
-    data = data.model_dump(exclude_none=True)
-
-    if extended_thinking:
-        data["thinking"] = {
-            "type": "enabled",
-            "budget_tokens": max_reasoning_tokens,
-        }
-        # `temperature` may only be set to 1 when thinking is enabled. Please consult our documentation at https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking'
-        data["temperature"] = 1.0
-
-    if "functions" in data:
-        raise ValueError("'functions' unexpected in Anthropic API payload")
-
-    # Handle tools
-    if "tools" in data and data["tools"] is None:
-        data.pop("tools")
-        data.pop("tool_choice", None)
-    elif anthropic_tools is not None:
-        # TODO eventually enable parallel tool use
-        data["tools"] = anthropic_tools
-
-    # Move 'system' to the top level
-    assert data["messages"][0]["role"] == "system", f"Expected 'system' role in messages[0]:\n{data['messages'][0]}"
-    data["system"] = data["messages"][0]["content"]
-    data["messages"] = data["messages"][1:]
-
-    # Process messages
-    for message in data["messages"]:
-        if "content" not in message:
-            message["content"] = None
-
-    # Convert to Anthropic format
-    msg_objs = [
-        _Message.dict_to_message(
-            agent_id=None,
-            openai_message_dict=m,
-        )
-        for m in data["messages"]
-    ]
-    data["messages"] = [
-        m.to_anthropic_dict(
-            inner_thoughts_xml_tag=inner_thoughts_xml_tag,
-            put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
-        )
-        for m in msg_objs
-    ]
-
-    # Ensure first message is user
-    if data["messages"][0]["role"] != "user":
-        data["messages"] = [{"role": "user", "content": DUMMY_FIRST_USER_MESSAGE}] + data["messages"]
-
-    # Handle alternating messages
-    data["messages"] = merge_tool_results_into_user_messages(data["messages"])
-
-    # Handle prefix fill (not compatible with inner-thouguhts-in-kwargs)
-    # https://docs.anthropic.com/en/api/messages#body-messages
-    # NOTE: cannot prefill with tools for opus:
-    # Your API request included an `assistant` message in the final position, which would pre-fill the `assistant` response. When using tools with "claude-3-opus-20240229"
-    if prefix_fill and not put_inner_thoughts_in_kwargs and "opus" not in data["model"]:
-        if not bedrock:  # not support for bedrock
-            data["messages"].append(
-                # Start the thinking process for the assistant
-                {"role": "assistant", "content": f"<{inner_thoughts_xml_tag}>"},
-            )
-
-    # Validate max_tokens
-    assert "max_tokens" in data, data
-
-    # Remove OpenAI-specific fields
-    for field in ["frequency_penalty", "logprobs", "n", "top_p", "presence_penalty", "user", "stream"]:
-        data.pop(field, None)
-
-    return data
--- a/letta/schemas/providers/anthropic.py
+++ b/letta/schemas/providers/anthropic.py
@@ -1,12 +1,90 @@
 import warnings
 from typing import Literal

+import anthropic
 from pydantic import Field

 from letta.schemas.enums import ProviderCategory, ProviderType
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.providers.base import Provider

+# https://docs.anthropic.com/claude/docs/models-overview
+# Sadly hardcoded
+MODEL_LIST = [
+    ## Opus 4.1
+    {
+        "name": "claude-opus-4-1-20250805",
+        "context_window": 200000,
+    },
+    ## Opus 3
+    {
+        "name": "claude-3-opus-20240229",
+        "context_window": 200000,
+    },
+    # 3 latest
+    {
+        "name": "claude-3-opus-latest",
+        "context_window": 200000,
+    },
+    # 4
+    {
+        "name": "claude-opus-4-20250514",
+        "context_window": 200000,
+    },
+    ## Sonnet
+    # 3.0
+    {
+        "name": "claude-3-sonnet-20240229",
+        "context_window": 200000,
+    },
+    # 3.5
+    {
+        "name": "claude-3-5-sonnet-20240620",
+        "context_window": 200000,
+    },
+    # 3.5 new
+    {
+        "name": "claude-3-5-sonnet-20241022",
+        "context_window": 200000,
+    },
+    # 3.5 latest
+    {
+        "name": "claude-3-5-sonnet-latest",
+        "context_window": 200000,
+    },
+    # 3.7
+    {
+        "name": "claude-3-7-sonnet-20250219",
+        "context_window": 200000,
+    },
+    # 3.7 latest
+    {
+        "name": "claude-3-7-sonnet-latest",
+        "context_window": 200000,
+    },
+    # 4
+    {
+        "name": "claude-sonnet-4-20250514",
+        "context_window": 200000,
+    },
+    ## Haiku
+    # 3.0
+    {
+        "name": "claude-3-haiku-20240307",
+        "context_window": 200000,
+    },
+    # 3.5
+    {
+        "name": "claude-3-5-haiku-20241022",
+        "context_window": 200000,
+    },
+    # 3.5 latest
+    {
+        "name": "claude-3-5-haiku-latest",
+        "context_window": 200000,
+    },
+]
+

 class AnthropicProvider(Provider):
    provider_type: Literal[ProviderType.anthropic] = Field(ProviderType.anthropic, description="The type of the provider.")
@@ -15,19 +93,39 @@ class AnthropicProvider(Provider):
    base_url: str = "https://api.anthropic.com/v1"

    async def check_api_key(self):
-        from letta.llm_api.anthropic import anthropic_check_valid_api_key
-
-        anthropic_check_valid_api_key(self.api_key)
+        if self.api_key:
+            anthropic_client = anthropic.Anthropic(api_key=self.api_key)
+            try:
+                # just use a cheap model to count some tokens - as of 5/7/2025 this is faster than fetching the list of models
+                anthropic_client.messages.count_tokens(model=MODEL_LIST[-1]["name"], messages=[{"role": "user", "content": "a"}])
+            except anthropic.AuthenticationError as e:
+                raise LLMAuthenticationError(message=f"Failed to authenticate with Anthropic: {e}", code=ErrorCode.UNAUTHENTICATED)
+            except Exception as e:
+                raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR)
+        else:
+            raise ValueError("No API key provided")

    async def list_llm_models_async(self) -> list[LLMConfig]:
-        from letta.llm_api.anthropic import anthropic_get_model_list_async
+        """
+        https://docs.anthropic.com/claude/docs/models-overview

-        models = await anthropic_get_model_list_async(api_key=self.api_key)
-        return self._list_llm_models(models)
+        NOTE: currently there is no GET /models, so we need to hardcode
+        """
+        if self.api_key:
+            anthropic_client = anthropic.AsyncAnthropic(api_key=self.api_key)
+        elif model_settings.anthropic_api_key:
+            anthropic_client = anthropic.AsyncAnthropic()
+        else:
+            raise ValueError("No API key provided")
+
+        models = await anthropic_client.models.list()
+        models_json = models.model_dump()
+        assert "data" in models_json, f"Anthropic model query response missing 'data' field: {models_json}"
+        models_data = models_json["data"]
+
+        return self._list_llm_models(models_data)

    def _list_llm_models(self, models) -> list[LLMConfig]:
-        from letta.llm_api.anthropic import MODEL_LIST
-
        configs = []
        for model in models:
            if any((model.get("type") != "model", "id" not in model, model.get("id").startswith("claude-2"))):