From 75fcf297d244ca5a22815eee2e2c0d67ce5891bd Mon Sep 17 00:00:00 2001 From: mlong93 <35275280+mlong93@users.noreply.github.com> Date: Fri, 17 Jan 2025 15:22:01 -0800 Subject: [PATCH] chore: Migrate to anthropic sdk (#693) Co-authored-by: Mindy Long --- letta/llm_api/anthropic.py | 106 +++++++++++++------------- letta/llm_api/llm_api_tools.py | 6 -- tests/test_model_letta_performance.py | 15 ++-- 3 files changed, 60 insertions(+), 67 deletions(-) diff --git a/letta/llm_api/anthropic.py b/letta/llm_api/anthropic.py index 888a8b49..3fd19700 100644 --- a/letta/llm_api/anthropic.py +++ b/letta/llm_api/anthropic.py @@ -2,8 +2,9 @@ import json import re from typing import List, Optional, Tuple, Union +import anthropic + from letta.llm_api.aws_bedrock import get_bedrock_client -from letta.llm_api.helpers import make_post_request from letta.schemas.message import Message from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall @@ -11,6 +12,7 @@ from letta.schemas.openai.chat_completion_response import ( Message as ChoiceMessage, # NOTE: avoid conflict with our own Letta Message datatype ) from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics +from letta.settings import model_settings from letta.utils import get_utc_time, smart_urljoin BASE_URL = "https://api.anthropic.com/v1" @@ -35,6 +37,9 @@ MODEL_LIST = [ DUMMY_FIRST_USER_MESSAGE = "User initializing bootup sequence." +if model_settings.anthropic_api_key: + anthropic_client = anthropic.Anthropic() + def antropic_get_model_context_window(url: str, api_key: Union[str, None], model: str) -> int: for model_dict in anthropic_get_model_list(url=url, api_key=api_key): @@ -196,7 +201,7 @@ def strip_xml_tags(string: str, tag: Optional[str]) -> str: def convert_anthropic_response_to_chatcompletion( - response_json: dict, # REST response from Google AI API + response: anthropic.types.Message, inner_thoughts_xml_tag: Optional[str] = None, ) -> ChatCompletionResponse: """ @@ -233,65 +238,67 @@ def convert_anthropic_response_to_chatcompletion( } } """ - prompt_tokens = response_json["usage"]["input_tokens"] - completion_tokens = response_json["usage"]["output_tokens"] + prompt_tokens = response.usage.input_tokens + completion_tokens = response.usage.output_tokens + finish_reason = remap_finish_reason(response.stop_reason) - finish_reason = remap_finish_reason(response_json["stop_reason"]) + content = None + tool_calls = None - if isinstance(response_json["content"], list): - if len(response_json["content"]) > 1: - # inner mono + function call - assert len(response_json["content"]) == 2, response_json - assert response_json["content"][0]["type"] == "text", response_json - assert response_json["content"][1]["type"] == "tool_use", response_json - content = strip_xml_tags(string=response_json["content"][0]["text"], tag=inner_thoughts_xml_tag) + if len(response.content) > 1: + # inner mono + function call + assert len(response.content) == 2 + text_block = response.content[0] + tool_block = response.content[1] + assert text_block.type == "text" + assert tool_block.type == "tool_use" + content = strip_xml_tags(string=text_block.text, tag=inner_thoughts_xml_tag) + tool_calls = [ + ToolCall( + id=tool_block.id, + type="function", + function=FunctionCall( + name=tool_block.name, + arguments=json.dumps(tool_block.input, indent=2), + ), + ) + ] + elif len(response.content) == 1: + block = response.content[0] + if block.type == "tool_use": + # function call only tool_calls = [ ToolCall( - id=response_json["content"][1]["id"], + id=block.id, type="function", function=FunctionCall( - name=response_json["content"][1]["name"], - arguments=json.dumps(response_json["content"][1]["input"], indent=2), + name=block.name, + arguments=json.dumps(block.input, indent=2), ), ) ] - elif len(response_json["content"]) == 1: - if response_json["content"][0]["type"] == "tool_use": - # function call only - content = None - tool_calls = [ - ToolCall( - id=response_json["content"][0]["id"], - type="function", - function=FunctionCall( - name=response_json["content"][0]["name"], - arguments=json.dumps(response_json["content"][0]["input"], indent=2), - ), - ) - ] - else: - # inner mono only - content = strip_xml_tags(string=response_json["content"][0]["text"], tag=inner_thoughts_xml_tag) - tool_calls = None + else: + # inner mono only + content = strip_xml_tags(string=block.text, tag=inner_thoughts_xml_tag) else: - raise RuntimeError("Unexpected type for content in response_json.") + raise RuntimeError("Unexpected empty content in response") - assert response_json["role"] == "assistant", response_json + assert response.role == "assistant" choice = Choice( index=0, finish_reason=finish_reason, message=ChoiceMessage( - role=response_json["role"], + role=response.role, content=content, tool_calls=tool_calls, ), ) return ChatCompletionResponse( - id=response_json["id"], + id=response.id, choices=[choice], created=get_utc_time(), - model=response_json["model"], + model=response.model, usage=UsageStatistics( prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, @@ -383,16 +390,17 @@ def get_anthropic_endpoint_and_headers( def anthropic_chat_completions_request( - url: str, - api_key: str, data: ChatCompletionRequest, inner_thoughts_xml_tag: Optional[str] = "thinking", + betas: List[str] = ["tools-2024-04-04"], ) -> ChatCompletionResponse: """https://docs.anthropic.com/claude/docs/tool-use""" - url, headers = get_anthropic_endpoint_and_headers(url, api_key) data = _prepare_anthropic_request(data, inner_thoughts_xml_tag) - response_json = make_post_request(url, headers, data) - return convert_anthropic_response_to_chatcompletion(response_json=response_json, inner_thoughts_xml_tag=inner_thoughts_xml_tag) + response = anthropic_client.beta.messages.create( + **data, + betas=betas, + ) + return convert_anthropic_response_to_chatcompletion(response=response, inner_thoughts_xml_tag=inner_thoughts_xml_tag) def anthropic_bedrock_chat_completions_request( @@ -406,13 +414,5 @@ def anthropic_bedrock_chat_completions_request( client = get_bedrock_client() # Make the request - response = client.messages.create( - model=data["model"], - max_tokens=data["max_tokens"], - messages=data["messages"], - tools=data.get("tools", None), - ) - - return convert_anthropic_response_to_chatcompletion( - response_json=json.loads(response.json()), inner_thoughts_xml_tag=inner_thoughts_xml_tag - ) + response = client.messages.create(**data) + return convert_anthropic_response_to_chatcompletion(response=response, inner_thoughts_xml_tag=inner_thoughts_xml_tag) diff --git a/letta/llm_api/llm_api_tools.py b/letta/llm_api/llm_api_tools.py index 8d161eca..431e0d97 100644 --- a/letta/llm_api/llm_api_tools.py +++ b/letta/llm_api/llm_api_tools.py @@ -23,7 +23,6 @@ from letta.schemas.llm_config import LLMConfig from letta.schemas.message import Message from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool, cast_message_to_subtype from letta.schemas.openai.chat_completion_response import ChatCompletionResponse -from letta.services.provider_manager import ProviderManager from letta.settings import ModelSettings from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface @@ -253,12 +252,7 @@ def create( tool_call = {"type": "function", "function": {"name": force_tool_call}} assert functions is not None - # load anthropic key from db in case a custom key has been stored - anthropic_key_override = ProviderManager().get_anthropic_key_override() - return anthropic_chat_completions_request( - url=llm_config.model_endpoint, - api_key=anthropic_key_override if anthropic_key_override else model_settings.anthropic_api_key, data=ChatCompletionRequest( model=llm_config.model, messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages], diff --git a/tests/test_model_letta_performance.py b/tests/test_model_letta_performance.py index 304d2030..bcc5c5f6 100644 --- a/tests/test_model_letta_performance.py +++ b/tests/test_model_letta_performance.py @@ -376,14 +376,13 @@ def test_bedrock_claude_sonnet_3_5_uses_external_tool(mock_e2b_api_key_none): print(f"Got successful response from client: \n\n{response}") -# TODO: Debug later -# @pytest.mark.anthropic_bedrock_basic -# @retry_until_success(max_attempts=5, sleep_time_seconds=2) -# def test_bedrock_claude_sonnet_3_5_recall_chat_memory(): -# filename = os.path.join(llm_config_dir, "bedrock-claude-3-5-sonnet.json") -# response = check_agent_recall_chat_memory(filename) -# # Log out successful response -# print(f"Got successful response from client: \n\n{response}") +@pytest.mark.anthropic_bedrock_basic +@retry_until_success(max_attempts=5, sleep_time_seconds=2) +def test_bedrock_claude_sonnet_3_5_recall_chat_memory(): + filename = os.path.join(llm_config_dir, "bedrock-claude-3-5-sonnet.json") + response = check_agent_recall_chat_memory(filename) + # Log out successful response + print(f"Got successful response from client: \n\n{response}") @pytest.mark.anthropic_bedrock_basic