chore: clean up legacy bedrock path (#3906)
This commit is contained in:
@@ -1,11 +1,9 @@
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import warnings
|
||||
from typing import Generator, List, Optional, Union
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import anthropic
|
||||
from anthropic import PermissionDeniedError
|
||||
from anthropic.types.beta import (
|
||||
BetaRawContentBlockDeltaEvent,
|
||||
BetaRawContentBlockStartEvent,
|
||||
@@ -19,14 +17,11 @@ from anthropic.types.beta import (
|
||||
BetaToolUseBlock,
|
||||
)
|
||||
|
||||
from letta.errors import BedrockError, BedrockPermissionError, ErrorCode, LLMAuthenticationError, LLMError
|
||||
from letta.errors import ErrorCode, LLMAuthenticationError, LLMError
|
||||
from letta.helpers.datetime_helpers import get_utc_time_int
|
||||
from letta.llm_api.aws_bedrock import get_bedrock_client
|
||||
from letta.llm_api.helpers import add_inner_thoughts_to_functions
|
||||
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
|
||||
from letta.log import get_logger
|
||||
from letta.otel.tracing import log_event
|
||||
from letta.schemas.enums import ProviderCategory
|
||||
from letta.schemas.message import Message as _Message
|
||||
from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool
|
||||
from letta.schemas.openai.chat_completion_response import (
|
||||
@@ -39,8 +34,6 @@ from letta.schemas.openai.chat_completion_response import (
|
||||
)
|
||||
from letta.schemas.openai.chat_completion_response import Message as ChoiceMessage
|
||||
from letta.schemas.openai.chat_completion_response import MessageDelta, ToolCall, ToolCallDelta, UsageStatistics
|
||||
from letta.services.provider_manager import ProviderManager
|
||||
from letta.services.user_manager import UserManager
|
||||
from letta.settings import model_settings
|
||||
|
||||
logger = get_logger(__name__)
|
||||
@@ -780,102 +773,3 @@ def _prepare_anthropic_request(
|
||||
data.pop(field, None)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def anthropic_bedrock_chat_completions_request(
|
||||
data: ChatCompletionRequest,
|
||||
inner_thoughts_xml_tag: Optional[str] = "thinking",
|
||||
provider_name: Optional[str] = None,
|
||||
provider_category: Optional[ProviderCategory] = None,
|
||||
user_id: Optional[str] = None,
|
||||
) -> ChatCompletionResponse:
|
||||
"""Make a chat completion request to Anthropic via AWS Bedrock."""
|
||||
data = _prepare_anthropic_request(data, inner_thoughts_xml_tag, bedrock=True)
|
||||
|
||||
# Get the client
|
||||
if provider_category == ProviderCategory.byok:
|
||||
actor = UserManager().get_user_or_default(user_id=user_id)
|
||||
access_key, secret_key, region = ProviderManager().get_bedrock_credentials_async(provider_name, actor=actor)
|
||||
client = get_bedrock_client(access_key, secret_key, region)
|
||||
else:
|
||||
client = get_bedrock_client()
|
||||
|
||||
# Make the request
|
||||
try:
|
||||
# bedrock does not support certain args
|
||||
print("Warning: Tool rules not supported with Anthropic Bedrock")
|
||||
data["tool_choice"] = {"type": "any"}
|
||||
log_event(name="llm_request_sent", attributes=data)
|
||||
response = client.messages.create(**data)
|
||||
log_event(name="llm_response_received", attributes={"response": response.json()})
|
||||
return convert_anthropic_response_to_chatcompletion(response=response, inner_thoughts_xml_tag=inner_thoughts_xml_tag)
|
||||
except PermissionDeniedError:
|
||||
raise BedrockPermissionError(f"User does not have access to the Bedrock model with the specified ID. {data['model']}")
|
||||
except Exception as e:
|
||||
raise BedrockError(f"Bedrock error: {e}")
|
||||
|
||||
|
||||
def anthropic_chat_completions_request_stream(
|
||||
data: ChatCompletionRequest,
|
||||
inner_thoughts_xml_tag: Optional[str] = "thinking",
|
||||
put_inner_thoughts_in_kwargs: bool = False,
|
||||
extended_thinking: bool = False,
|
||||
max_reasoning_tokens: Optional[int] = None,
|
||||
provider_name: Optional[str] = None,
|
||||
provider_category: Optional[ProviderCategory] = None,
|
||||
betas: List[str] = ["tools-2024-04-04"],
|
||||
user_id: Optional[str] = None,
|
||||
) -> Generator[ChatCompletionChunkResponse, None, None]:
|
||||
"""Stream chat completions from Anthropic API.
|
||||
|
||||
Similar to OpenAI's streaming, but using Anthropic's native streaming support.
|
||||
See: https://docs.anthropic.com/claude/reference/messages-streaming
|
||||
"""
|
||||
data = _prepare_anthropic_request(
|
||||
data=data,
|
||||
inner_thoughts_xml_tag=inner_thoughts_xml_tag,
|
||||
put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
|
||||
extended_thinking=extended_thinking,
|
||||
max_reasoning_tokens=max_reasoning_tokens,
|
||||
)
|
||||
if provider_category == ProviderCategory.byok:
|
||||
actor = UserManager().get_user_or_default(user_id=user_id)
|
||||
api_key = ProviderManager().get_override_key(provider_name, actor=actor)
|
||||
anthropic_client = anthropic.Anthropic(api_key=api_key)
|
||||
elif model_settings.anthropic_api_key:
|
||||
anthropic_client = anthropic.Anthropic()
|
||||
|
||||
with anthropic_client.beta.messages.stream(
|
||||
**data,
|
||||
betas=betas,
|
||||
) as stream:
|
||||
# Stream: https://github.com/anthropics/anthropic-sdk-python/blob/d212ec9f6d5e956f13bc0ddc3d86b5888a954383/src/anthropic/lib/streaming/_beta_messages.py#L22
|
||||
message_id = None
|
||||
model = None
|
||||
|
||||
for chunk in stream._raw_stream:
|
||||
time.sleep(0.01) # Anthropic is really fast, faster than frontend can upload.
|
||||
if isinstance(chunk, BetaRawMessageStartEvent):
|
||||
"""
|
||||
BetaRawMessageStartEvent(
|
||||
message=BetaMessage(
|
||||
id='MESSAGE ID HERE',
|
||||
content=[],
|
||||
model='claude-3-5-sonnet-20241022',
|
||||
role='assistant',
|
||||
stop_reason=None,
|
||||
stop_sequence=None,
|
||||
type='message',
|
||||
usage=BetaUsage(
|
||||
cache_creation_input_tokens=0,
|
||||
cache_read_input_tokens=0,
|
||||
input_tokens=30,
|
||||
output_tokens=4
|
||||
)
|
||||
),
|
||||
type='message_start'
|
||||
),
|
||||
"""
|
||||
message_id = chunk.message.id
|
||||
model = chunk.message.model
|
||||
yield convert_anthropic_stream_event_to_chatcompletion(chunk, message_id, model, inner_thoughts_xml_tag)
|
||||
|
||||
@@ -3,7 +3,6 @@ Note that this formally only supports Anthropic Bedrock.
|
||||
TODO (cliandy): determine what other providers are supported and what is needed to add support.
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Any, Optional
|
||||
|
||||
from anthropic import AnthropicBedrock
|
||||
@@ -14,19 +13,6 @@ from letta.settings import model_settings
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def has_valid_aws_credentials() -> bool:
|
||||
"""
|
||||
Check if AWS credentials are properly configured.
|
||||
"""
|
||||
return all(
|
||||
(
|
||||
os.getenv("AWS_ACCESS_KEY_ID"),
|
||||
os.getenv("AWS_SECRET_ACCESS_KEY"),
|
||||
os.getenv("AWS_DEFAULT_REGION"),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def get_bedrock_client(
|
||||
access_key_id: Optional[str] = None,
|
||||
secret_key: Optional[str] = None,
|
||||
|
||||
@@ -7,8 +7,6 @@ import requests
|
||||
|
||||
from letta.constants import CLI_WARNING_PREFIX
|
||||
from letta.errors import LettaConfigurationError, RateLimitExceededError
|
||||
from letta.llm_api.anthropic import anthropic_bedrock_chat_completions_request
|
||||
from letta.llm_api.aws_bedrock import has_valid_aws_credentials
|
||||
from letta.llm_api.deepseek import build_deepseek_chat_completions_request, convert_deepseek_response_to_chatcompletion
|
||||
from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs
|
||||
from letta.llm_api.openai import (
|
||||
@@ -25,7 +23,7 @@ from letta.otel.tracing import log_event, trace_method
|
||||
from letta.schemas.enums import ProviderCategory
|
||||
from letta.schemas.llm_config import LLMConfig
|
||||
from letta.schemas.message import Message
|
||||
from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, cast_message_to_subtype
|
||||
from letta.schemas.openai.chat_completion_request import ChatCompletionRequest
|
||||
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
||||
from letta.schemas.provider_trace import ProviderTraceCreate
|
||||
from letta.services.telemetry_manager import TelemetryManager
|
||||
@@ -384,37 +382,6 @@ def create(
|
||||
|
||||
return response
|
||||
|
||||
elif llm_config.model_endpoint_type == "bedrock":
|
||||
"""Anthropic endpoint that goes via /embeddings instead of /chat/completions"""
|
||||
|
||||
if stream:
|
||||
raise NotImplementedError("Streaming not yet implemented for Anthropic (via the /embeddings endpoint).")
|
||||
if not use_tool_naming:
|
||||
raise NotImplementedError("Only tool calling supported on Anthropic API requests")
|
||||
|
||||
if not has_valid_aws_credentials():
|
||||
raise LettaConfigurationError(message="Invalid or missing AWS credentials. Please configure valid AWS credentials.")
|
||||
|
||||
tool_call = None
|
||||
if force_tool_call is not None:
|
||||
tool_call = {"type": "function", "function": {"name": force_tool_call}}
|
||||
assert functions is not None
|
||||
|
||||
return anthropic_bedrock_chat_completions_request(
|
||||
data=ChatCompletionRequest(
|
||||
model=llm_config.model,
|
||||
messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages],
|
||||
tools=[{"type": "function", "function": f} for f in functions] if functions else None,
|
||||
tool_choice=tool_call,
|
||||
# user=str(user_id),
|
||||
# NOTE: max_tokens is required for Anthropic API
|
||||
max_tokens=llm_config.max_tokens,
|
||||
),
|
||||
provider_name=llm_config.provider_name,
|
||||
provider_category=llm_config.provider_category,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
elif llm_config.model_endpoint_type == "deepseek":
|
||||
if model_settings.deepseek_api_key is None and llm_config.model_endpoint == "":
|
||||
# only is a problem if we are *not* using an openai proxy
|
||||
|
||||
Reference in New Issue
Block a user