From 48f78991a8cc5c4ebb04141185d87849e1912cfd Mon Sep 17 00:00:00 2001 From: cthomas Date: Wed, 13 Aug 2025 15:26:27 -0700 Subject: [PATCH] chore: clean up legacy bedrock path (#3906) --- letta/llm_api/anthropic.py | 110 +-------------------------------- letta/llm_api/aws_bedrock.py | 14 ----- letta/llm_api/llm_api_tools.py | 35 +---------- 3 files changed, 3 insertions(+), 156 deletions(-) diff --git a/letta/llm_api/anthropic.py b/letta/llm_api/anthropic.py index 4c0999c0..8b7cd2a3 100644 --- a/letta/llm_api/anthropic.py +++ b/letta/llm_api/anthropic.py @@ -1,11 +1,9 @@ import json import re -import time import warnings -from typing import Generator, List, Optional, Union +from typing import List, Optional, Union import anthropic -from anthropic import PermissionDeniedError from anthropic.types.beta import ( BetaRawContentBlockDeltaEvent, BetaRawContentBlockStartEvent, @@ -19,14 +17,11 @@ from anthropic.types.beta import ( BetaToolUseBlock, ) -from letta.errors import BedrockError, BedrockPermissionError, ErrorCode, LLMAuthenticationError, LLMError +from letta.errors import ErrorCode, LLMAuthenticationError, LLMError from letta.helpers.datetime_helpers import get_utc_time_int -from letta.llm_api.aws_bedrock import get_bedrock_client from letta.llm_api.helpers import add_inner_thoughts_to_functions from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION from letta.log import get_logger -from letta.otel.tracing import log_event -from letta.schemas.enums import ProviderCategory from letta.schemas.message import Message as _Message from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool from letta.schemas.openai.chat_completion_response import ( @@ -39,8 +34,6 @@ from letta.schemas.openai.chat_completion_response import ( ) from letta.schemas.openai.chat_completion_response import Message as ChoiceMessage from letta.schemas.openai.chat_completion_response import MessageDelta, ToolCall, ToolCallDelta, UsageStatistics -from letta.services.provider_manager import ProviderManager -from letta.services.user_manager import UserManager from letta.settings import model_settings logger = get_logger(__name__) @@ -780,102 +773,3 @@ def _prepare_anthropic_request( data.pop(field, None) return data - - -def anthropic_bedrock_chat_completions_request( - data: ChatCompletionRequest, - inner_thoughts_xml_tag: Optional[str] = "thinking", - provider_name: Optional[str] = None, - provider_category: Optional[ProviderCategory] = None, - user_id: Optional[str] = None, -) -> ChatCompletionResponse: - """Make a chat completion request to Anthropic via AWS Bedrock.""" - data = _prepare_anthropic_request(data, inner_thoughts_xml_tag, bedrock=True) - - # Get the client - if provider_category == ProviderCategory.byok: - actor = UserManager().get_user_or_default(user_id=user_id) - access_key, secret_key, region = ProviderManager().get_bedrock_credentials_async(provider_name, actor=actor) - client = get_bedrock_client(access_key, secret_key, region) - else: - client = get_bedrock_client() - - # Make the request - try: - # bedrock does not support certain args - print("Warning: Tool rules not supported with Anthropic Bedrock") - data["tool_choice"] = {"type": "any"} - log_event(name="llm_request_sent", attributes=data) - response = client.messages.create(**data) - log_event(name="llm_response_received", attributes={"response": response.json()}) - return convert_anthropic_response_to_chatcompletion(response=response, inner_thoughts_xml_tag=inner_thoughts_xml_tag) - except PermissionDeniedError: - raise BedrockPermissionError(f"User does not have access to the Bedrock model with the specified ID. {data['model']}") - except Exception as e: - raise BedrockError(f"Bedrock error: {e}") - - -def anthropic_chat_completions_request_stream( - data: ChatCompletionRequest, - inner_thoughts_xml_tag: Optional[str] = "thinking", - put_inner_thoughts_in_kwargs: bool = False, - extended_thinking: bool = False, - max_reasoning_tokens: Optional[int] = None, - provider_name: Optional[str] = None, - provider_category: Optional[ProviderCategory] = None, - betas: List[str] = ["tools-2024-04-04"], - user_id: Optional[str] = None, -) -> Generator[ChatCompletionChunkResponse, None, None]: - """Stream chat completions from Anthropic API. - - Similar to OpenAI's streaming, but using Anthropic's native streaming support. - See: https://docs.anthropic.com/claude/reference/messages-streaming - """ - data = _prepare_anthropic_request( - data=data, - inner_thoughts_xml_tag=inner_thoughts_xml_tag, - put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs, - extended_thinking=extended_thinking, - max_reasoning_tokens=max_reasoning_tokens, - ) - if provider_category == ProviderCategory.byok: - actor = UserManager().get_user_or_default(user_id=user_id) - api_key = ProviderManager().get_override_key(provider_name, actor=actor) - anthropic_client = anthropic.Anthropic(api_key=api_key) - elif model_settings.anthropic_api_key: - anthropic_client = anthropic.Anthropic() - - with anthropic_client.beta.messages.stream( - **data, - betas=betas, - ) as stream: - # Stream: https://github.com/anthropics/anthropic-sdk-python/blob/d212ec9f6d5e956f13bc0ddc3d86b5888a954383/src/anthropic/lib/streaming/_beta_messages.py#L22 - message_id = None - model = None - - for chunk in stream._raw_stream: - time.sleep(0.01) # Anthropic is really fast, faster than frontend can upload. - if isinstance(chunk, BetaRawMessageStartEvent): - """ - BetaRawMessageStartEvent( - message=BetaMessage( - id='MESSAGE ID HERE', - content=[], - model='claude-3-5-sonnet-20241022', - role='assistant', - stop_reason=None, - stop_sequence=None, - type='message', - usage=BetaUsage( - cache_creation_input_tokens=0, - cache_read_input_tokens=0, - input_tokens=30, - output_tokens=4 - ) - ), - type='message_start' - ), - """ - message_id = chunk.message.id - model = chunk.message.model - yield convert_anthropic_stream_event_to_chatcompletion(chunk, message_id, model, inner_thoughts_xml_tag) diff --git a/letta/llm_api/aws_bedrock.py b/letta/llm_api/aws_bedrock.py index fd994795..67497b4e 100644 --- a/letta/llm_api/aws_bedrock.py +++ b/letta/llm_api/aws_bedrock.py @@ -3,7 +3,6 @@ Note that this formally only supports Anthropic Bedrock. TODO (cliandy): determine what other providers are supported and what is needed to add support. """ -import os from typing import Any, Optional from anthropic import AnthropicBedrock @@ -14,19 +13,6 @@ from letta.settings import model_settings logger = get_logger(__name__) -def has_valid_aws_credentials() -> bool: - """ - Check if AWS credentials are properly configured. - """ - return all( - ( - os.getenv("AWS_ACCESS_KEY_ID"), - os.getenv("AWS_SECRET_ACCESS_KEY"), - os.getenv("AWS_DEFAULT_REGION"), - ) - ) - - def get_bedrock_client( access_key_id: Optional[str] = None, secret_key: Optional[str] = None, diff --git a/letta/llm_api/llm_api_tools.py b/letta/llm_api/llm_api_tools.py index 99974551..d63c5d28 100644 --- a/letta/llm_api/llm_api_tools.py +++ b/letta/llm_api/llm_api_tools.py @@ -7,8 +7,6 @@ import requests from letta.constants import CLI_WARNING_PREFIX from letta.errors import LettaConfigurationError, RateLimitExceededError -from letta.llm_api.anthropic import anthropic_bedrock_chat_completions_request -from letta.llm_api.aws_bedrock import has_valid_aws_credentials from letta.llm_api.deepseek import build_deepseek_chat_completions_request, convert_deepseek_response_to_chatcompletion from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs from letta.llm_api.openai import ( @@ -25,7 +23,7 @@ from letta.otel.tracing import log_event, trace_method from letta.schemas.enums import ProviderCategory from letta.schemas.llm_config import LLMConfig from letta.schemas.message import Message -from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, cast_message_to_subtype +from letta.schemas.openai.chat_completion_request import ChatCompletionRequest from letta.schemas.openai.chat_completion_response import ChatCompletionResponse from letta.schemas.provider_trace import ProviderTraceCreate from letta.services.telemetry_manager import TelemetryManager @@ -384,37 +382,6 @@ def create( return response - elif llm_config.model_endpoint_type == "bedrock": - """Anthropic endpoint that goes via /embeddings instead of /chat/completions""" - - if stream: - raise NotImplementedError("Streaming not yet implemented for Anthropic (via the /embeddings endpoint).") - if not use_tool_naming: - raise NotImplementedError("Only tool calling supported on Anthropic API requests") - - if not has_valid_aws_credentials(): - raise LettaConfigurationError(message="Invalid or missing AWS credentials. Please configure valid AWS credentials.") - - tool_call = None - if force_tool_call is not None: - tool_call = {"type": "function", "function": {"name": force_tool_call}} - assert functions is not None - - return anthropic_bedrock_chat_completions_request( - data=ChatCompletionRequest( - model=llm_config.model, - messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages], - tools=[{"type": "function", "function": f} for f in functions] if functions else None, - tool_choice=tool_call, - # user=str(user_id), - # NOTE: max_tokens is required for Anthropic API - max_tokens=llm_config.max_tokens, - ), - provider_name=llm_config.provider_name, - provider_category=llm_config.provider_category, - user_id=user_id, - ) - elif llm_config.model_endpoint_type == "deepseek": if model_settings.deepseek_api_key is None and llm_config.model_endpoint == "": # only is a problem if we are *not* using an openai proxy