chore: clean up legacy bedrock path (#3906)

2025-08-13 15:26:27 -07:00
parent edf39c565f
commit 48f78991a8
3 changed files with 3 additions and 156 deletions
--- a/letta/llm_api/anthropic.py
+++ b/letta/llm_api/anthropic.py
@@ -1,11 +1,9 @@
 import json
 import re
-import time
 import warnings
-from typing import Generator, List, Optional, Union
+from typing import List, Optional, Union

 import anthropic
-from anthropic import PermissionDeniedError
 from anthropic.types.beta import (
    BetaRawContentBlockDeltaEvent,
    BetaRawContentBlockStartEvent,
@@ -19,14 +17,11 @@ from anthropic.types.beta import (
    BetaToolUseBlock,
 )

-from letta.errors import BedrockError, BedrockPermissionError, ErrorCode, LLMAuthenticationError, LLMError
+from letta.errors import ErrorCode, LLMAuthenticationError, LLMError
 from letta.helpers.datetime_helpers import get_utc_time_int
-from letta.llm_api.aws_bedrock import get_bedrock_client
 from letta.llm_api.helpers import add_inner_thoughts_to_functions
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
 from letta.log import get_logger
-from letta.otel.tracing import log_event
-from letta.schemas.enums import ProviderCategory
 from letta.schemas.message import Message as _Message
 from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool
 from letta.schemas.openai.chat_completion_response import (
@@ -39,8 +34,6 @@ from letta.schemas.openai.chat_completion_response import (
 )
 from letta.schemas.openai.chat_completion_response import Message as ChoiceMessage
 from letta.schemas.openai.chat_completion_response import MessageDelta, ToolCall, ToolCallDelta, UsageStatistics
-from letta.services.provider_manager import ProviderManager
-from letta.services.user_manager import UserManager
 from letta.settings import model_settings

 logger = get_logger(__name__)
@@ -780,102 +773,3 @@ def _prepare_anthropic_request(
        data.pop(field, None)

    return data
-
-
-def anthropic_bedrock_chat_completions_request(
-    data: ChatCompletionRequest,
-    inner_thoughts_xml_tag: Optional[str] = "thinking",
-    provider_name: Optional[str] = None,
-    provider_category: Optional[ProviderCategory] = None,
-    user_id: Optional[str] = None,
-) -> ChatCompletionResponse:
-    """Make a chat completion request to Anthropic via AWS Bedrock."""
-    data = _prepare_anthropic_request(data, inner_thoughts_xml_tag, bedrock=True)
-
-    # Get the client
-    if provider_category == ProviderCategory.byok:
-        actor = UserManager().get_user_or_default(user_id=user_id)
-        access_key, secret_key, region = ProviderManager().get_bedrock_credentials_async(provider_name, actor=actor)
-        client = get_bedrock_client(access_key, secret_key, region)
-    else:
-        client = get_bedrock_client()
-
-    # Make the request
-    try:
-        # bedrock does not support certain args
-        print("Warning: Tool rules not supported with Anthropic Bedrock")
-        data["tool_choice"] = {"type": "any"}
-        log_event(name="llm_request_sent", attributes=data)
-        response = client.messages.create(**data)
-        log_event(name="llm_response_received", attributes={"response": response.json()})
-        return convert_anthropic_response_to_chatcompletion(response=response, inner_thoughts_xml_tag=inner_thoughts_xml_tag)
-    except PermissionDeniedError:
-        raise BedrockPermissionError(f"User does not have access to the Bedrock model with the specified ID. {data['model']}")
-    except Exception as e:
-        raise BedrockError(f"Bedrock error: {e}")
-
-
-def anthropic_chat_completions_request_stream(
-    data: ChatCompletionRequest,
-    inner_thoughts_xml_tag: Optional[str] = "thinking",
-    put_inner_thoughts_in_kwargs: bool = False,
-    extended_thinking: bool = False,
-    max_reasoning_tokens: Optional[int] = None,
-    provider_name: Optional[str] = None,
-    provider_category: Optional[ProviderCategory] = None,
-    betas: List[str] = ["tools-2024-04-04"],
-    user_id: Optional[str] = None,
-) -> Generator[ChatCompletionChunkResponse, None, None]:
-    """Stream chat completions from Anthropic API.
-
-    Similar to OpenAI's streaming, but using Anthropic's native streaming support.
-    See: https://docs.anthropic.com/claude/reference/messages-streaming
-    """
-    data = _prepare_anthropic_request(
-        data=data,
-        inner_thoughts_xml_tag=inner_thoughts_xml_tag,
-        put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
-        extended_thinking=extended_thinking,
-        max_reasoning_tokens=max_reasoning_tokens,
-    )
-    if provider_category == ProviderCategory.byok:
-        actor = UserManager().get_user_or_default(user_id=user_id)
-        api_key = ProviderManager().get_override_key(provider_name, actor=actor)
-        anthropic_client = anthropic.Anthropic(api_key=api_key)
-    elif model_settings.anthropic_api_key:
-        anthropic_client = anthropic.Anthropic()
-
-    with anthropic_client.beta.messages.stream(
-        **data,
-        betas=betas,
-    ) as stream:
-        # Stream: https://github.com/anthropics/anthropic-sdk-python/blob/d212ec9f6d5e956f13bc0ddc3d86b5888a954383/src/anthropic/lib/streaming/_beta_messages.py#L22
-        message_id = None
-        model = None
-
-        for chunk in stream._raw_stream:
-            time.sleep(0.01)  # Anthropic is really fast, faster than frontend can upload.
-            if isinstance(chunk, BetaRawMessageStartEvent):
-                """
-                BetaRawMessageStartEvent(
-                    message=BetaMessage(
-                        id='MESSAGE ID HERE',
-                        content=[],
-                        model='claude-3-5-sonnet-20241022',
-                        role='assistant',
-                        stop_reason=None,
-                        stop_sequence=None,
-                        type='message',
-                        usage=BetaUsage(
-                            cache_creation_input_tokens=0,
-                            cache_read_input_tokens=0,
-                            input_tokens=30,
-                            output_tokens=4
-                        )
-                    ),
-                    type='message_start'
-                ),
-                """
-                message_id = chunk.message.id
-                model = chunk.message.model
-            yield convert_anthropic_stream_event_to_chatcompletion(chunk, message_id, model, inner_thoughts_xml_tag)
--- a/letta/llm_api/aws_bedrock.py
+++ b/letta/llm_api/aws_bedrock.py
@@ -3,7 +3,6 @@ Note that this formally only supports Anthropic Bedrock.
 TODO (cliandy): determine what other providers are supported and what is needed to add support.
 """

-import os
 from typing import Any, Optional

 from anthropic import AnthropicBedrock
@@ -14,19 +13,6 @@ from letta.settings import model_settings
 logger = get_logger(__name__)


-def has_valid_aws_credentials() -> bool:
-    """
-    Check if AWS credentials are properly configured.
-    """
-    return all(
-        (
-            os.getenv("AWS_ACCESS_KEY_ID"),
-            os.getenv("AWS_SECRET_ACCESS_KEY"),
-            os.getenv("AWS_DEFAULT_REGION"),
-        )
-    )
-
-
 def get_bedrock_client(
    access_key_id: Optional[str] = None,
    secret_key: Optional[str] = None,
--- a/letta/llm_api/llm_api_tools.py
+++ b/letta/llm_api/llm_api_tools.py
@@ -7,8 +7,6 @@ import requests

 from letta.constants import CLI_WARNING_PREFIX
 from letta.errors import LettaConfigurationError, RateLimitExceededError
-from letta.llm_api.anthropic import anthropic_bedrock_chat_completions_request
-from letta.llm_api.aws_bedrock import has_valid_aws_credentials
 from letta.llm_api.deepseek import build_deepseek_chat_completions_request, convert_deepseek_response_to_chatcompletion
 from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs
 from letta.llm_api.openai import (
@@ -25,7 +23,7 @@ from letta.otel.tracing import log_event, trace_method
 from letta.schemas.enums import ProviderCategory
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import Message
-from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, cast_message_to_subtype
+from letta.schemas.openai.chat_completion_request import ChatCompletionRequest
 from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
 from letta.schemas.provider_trace import ProviderTraceCreate
 from letta.services.telemetry_manager import TelemetryManager
@@ -384,37 +382,6 @@ def create(

        return response

-    elif llm_config.model_endpoint_type == "bedrock":
-        """Anthropic endpoint that goes via /embeddings instead of /chat/completions"""
-
-        if stream:
-            raise NotImplementedError("Streaming not yet implemented for Anthropic (via the /embeddings endpoint).")
-        if not use_tool_naming:
-            raise NotImplementedError("Only tool calling supported on Anthropic API requests")
-
-        if not has_valid_aws_credentials():
-            raise LettaConfigurationError(message="Invalid or missing AWS credentials. Please configure valid AWS credentials.")
-
-        tool_call = None
-        if force_tool_call is not None:
-            tool_call = {"type": "function", "function": {"name": force_tool_call}}
-            assert functions is not None
-
-        return anthropic_bedrock_chat_completions_request(
-            data=ChatCompletionRequest(
-                model=llm_config.model,
-                messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages],
-                tools=[{"type": "function", "function": f} for f in functions] if functions else None,
-                tool_choice=tool_call,
-                # user=str(user_id),
-                # NOTE: max_tokens is required for Anthropic API
-                max_tokens=llm_config.max_tokens,
-            ),
-            provider_name=llm_config.provider_name,
-            provider_category=llm_config.provider_category,
-            user_id=user_id,
-        )
-
    elif llm_config.model_endpoint_type == "deepseek":
        if model_settings.deepseek_api_key is None and llm_config.model_endpoint == "":
            # only is a problem if we are *not* using an openai proxy