From 75fcf297d244ca5a22815eee2e2c0d67ce5891bd Mon Sep 17 00:00:00 2001
From: mlong93 <35275280+mlong93@users.noreply.github.com>
Date: Fri, 17 Jan 2025 15:22:01 -0800
Subject: [PATCH] chore: Migrate to anthropic sdk (#693)

Co-authored-by: Mindy Long <mindy@letta.com>
---
 letta/llm_api/anthropic.py            | 106 +++++++++++++-------------
 letta/llm_api/llm_api_tools.py        |   6 --
 tests/test_model_letta_performance.py |  15 ++--
 3 files changed, 60 insertions(+), 67 deletions(-)

diff --git a/letta/llm_api/anthropic.py b/letta/llm_api/anthropic.py
index 888a8b49..3fd19700 100644
--- a/letta/llm_api/anthropic.py
+++ b/letta/llm_api/anthropic.py
@@ -2,8 +2,9 @@ import json
 import re
 from typing import List, Optional, Tuple, Union
 
+import anthropic
+
 from letta.llm_api.aws_bedrock import get_bedrock_client
-from letta.llm_api.helpers import make_post_request
 from letta.schemas.message import Message
 from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool
 from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall
@@ -11,6 +12,7 @@ from letta.schemas.openai.chat_completion_response import (
     Message as ChoiceMessage,  # NOTE: avoid conflict with our own Letta Message datatype
 )
 from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
+from letta.settings import model_settings
 from letta.utils import get_utc_time, smart_urljoin
 
 BASE_URL = "https://api.anthropic.com/v1"
@@ -35,6 +37,9 @@ MODEL_LIST = [
 
 DUMMY_FIRST_USER_MESSAGE = "User initializing bootup sequence."
 
+if model_settings.anthropic_api_key:
+    anthropic_client = anthropic.Anthropic()
+
 
 def antropic_get_model_context_window(url: str, api_key: Union[str, None], model: str) -> int:
     for model_dict in anthropic_get_model_list(url=url, api_key=api_key):
@@ -196,7 +201,7 @@ def strip_xml_tags(string: str, tag: Optional[str]) -> str:
 
 
 def convert_anthropic_response_to_chatcompletion(
-    response_json: dict,  # REST response from Google AI API
+    response: anthropic.types.Message,
     inner_thoughts_xml_tag: Optional[str] = None,
 ) -> ChatCompletionResponse:
     """
@@ -233,65 +238,67 @@ def convert_anthropic_response_to_chatcompletion(
         }
     }
     """
-    prompt_tokens = response_json["usage"]["input_tokens"]
-    completion_tokens = response_json["usage"]["output_tokens"]
+    prompt_tokens = response.usage.input_tokens
+    completion_tokens = response.usage.output_tokens
+    finish_reason = remap_finish_reason(response.stop_reason)
 
-    finish_reason = remap_finish_reason(response_json["stop_reason"])
+    content = None
+    tool_calls = None
 
-    if isinstance(response_json["content"], list):
-        if len(response_json["content"]) > 1:
-            # inner mono + function call
-            assert len(response_json["content"]) == 2, response_json
-            assert response_json["content"][0]["type"] == "text", response_json
-            assert response_json["content"][1]["type"] == "tool_use", response_json
-            content = strip_xml_tags(string=response_json["content"][0]["text"], tag=inner_thoughts_xml_tag)
+    if len(response.content) > 1:
+        # inner mono + function call
+        assert len(response.content) == 2
+        text_block = response.content[0]
+        tool_block = response.content[1]
+        assert text_block.type == "text"
+        assert tool_block.type == "tool_use"
+        content = strip_xml_tags(string=text_block.text, tag=inner_thoughts_xml_tag)
+        tool_calls = [
+            ToolCall(
+                id=tool_block.id,
+                type="function",
+                function=FunctionCall(
+                    name=tool_block.name,
+                    arguments=json.dumps(tool_block.input, indent=2),
+                ),
+            )
+        ]
+    elif len(response.content) == 1:
+        block = response.content[0]
+        if block.type == "tool_use":
+            # function call only
             tool_calls = [
                 ToolCall(
-                    id=response_json["content"][1]["id"],
+                    id=block.id,
                     type="function",
                     function=FunctionCall(
-                        name=response_json["content"][1]["name"],
-                        arguments=json.dumps(response_json["content"][1]["input"], indent=2),
+                        name=block.name,
+                        arguments=json.dumps(block.input, indent=2),
                     ),
                 )
             ]
-        elif len(response_json["content"]) == 1:
-            if response_json["content"][0]["type"] == "tool_use":
-                # function call only
-                content = None
-                tool_calls = [
-                    ToolCall(
-                        id=response_json["content"][0]["id"],
-                        type="function",
-                        function=FunctionCall(
-                            name=response_json["content"][0]["name"],
-                            arguments=json.dumps(response_json["content"][0]["input"], indent=2),
-                        ),
-                    )
-                ]
-            else:
-                # inner mono only
-                content = strip_xml_tags(string=response_json["content"][0]["text"], tag=inner_thoughts_xml_tag)
-                tool_calls = None
+        else:
+            # inner mono only
+            content = strip_xml_tags(string=block.text, tag=inner_thoughts_xml_tag)
     else:
-        raise RuntimeError("Unexpected type for content in response_json.")
+        raise RuntimeError("Unexpected empty content in response")
 
-    assert response_json["role"] == "assistant", response_json
+    assert response.role == "assistant"
     choice = Choice(
         index=0,
         finish_reason=finish_reason,
         message=ChoiceMessage(
-            role=response_json["role"],
+            role=response.role,
             content=content,
             tool_calls=tool_calls,
         ),
     )
 
     return ChatCompletionResponse(
-        id=response_json["id"],
+        id=response.id,
         choices=[choice],
         created=get_utc_time(),
-        model=response_json["model"],
+        model=response.model,
         usage=UsageStatistics(
             prompt_tokens=prompt_tokens,
             completion_tokens=completion_tokens,
@@ -383,16 +390,17 @@ def get_anthropic_endpoint_and_headers(
 
 
 def anthropic_chat_completions_request(
-    url: str,
-    api_key: str,
     data: ChatCompletionRequest,
     inner_thoughts_xml_tag: Optional[str] = "thinking",
+    betas: List[str] = ["tools-2024-04-04"],
 ) -> ChatCompletionResponse:
     """https://docs.anthropic.com/claude/docs/tool-use"""
-    url, headers = get_anthropic_endpoint_and_headers(url, api_key)
     data = _prepare_anthropic_request(data, inner_thoughts_xml_tag)
-    response_json = make_post_request(url, headers, data)
-    return convert_anthropic_response_to_chatcompletion(response_json=response_json, inner_thoughts_xml_tag=inner_thoughts_xml_tag)
+    response = anthropic_client.beta.messages.create(
+        **data,
+        betas=betas,
+    )
+    return convert_anthropic_response_to_chatcompletion(response=response, inner_thoughts_xml_tag=inner_thoughts_xml_tag)
 
 
 def anthropic_bedrock_chat_completions_request(
@@ -406,13 +414,5 @@ def anthropic_bedrock_chat_completions_request(
     client = get_bedrock_client()
 
     # Make the request
-    response = client.messages.create(
-        model=data["model"],
-        max_tokens=data["max_tokens"],
-        messages=data["messages"],
-        tools=data.get("tools", None),
-    )
-
-    return convert_anthropic_response_to_chatcompletion(
-        response_json=json.loads(response.json()), inner_thoughts_xml_tag=inner_thoughts_xml_tag
-    )
+    response = client.messages.create(**data)
+    return convert_anthropic_response_to_chatcompletion(response=response, inner_thoughts_xml_tag=inner_thoughts_xml_tag)
diff --git a/letta/llm_api/llm_api_tools.py b/letta/llm_api/llm_api_tools.py
index 8d161eca..431e0d97 100644
--- a/letta/llm_api/llm_api_tools.py
+++ b/letta/llm_api/llm_api_tools.py
@@ -23,7 +23,6 @@ from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import Message
 from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool, cast_message_to_subtype
 from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
-from letta.services.provider_manager import ProviderManager
 from letta.settings import ModelSettings
 from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
 
@@ -253,12 +252,7 @@ def create(
             tool_call = {"type": "function", "function": {"name": force_tool_call}}
             assert functions is not None
 
-        # load anthropic key from db in case a custom key has been stored
-        anthropic_key_override = ProviderManager().get_anthropic_key_override()
-
         return anthropic_chat_completions_request(
-            url=llm_config.model_endpoint,
-            api_key=anthropic_key_override if anthropic_key_override else model_settings.anthropic_api_key,
             data=ChatCompletionRequest(
                 model=llm_config.model,
                 messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages],
diff --git a/tests/test_model_letta_performance.py b/tests/test_model_letta_performance.py
index 304d2030..bcc5c5f6 100644
--- a/tests/test_model_letta_performance.py
+++ b/tests/test_model_letta_performance.py
@@ -376,14 +376,13 @@ def test_bedrock_claude_sonnet_3_5_uses_external_tool(mock_e2b_api_key_none):
     print(f"Got successful response from client: \n\n{response}")
 
 
-# TODO: Debug later
-# @pytest.mark.anthropic_bedrock_basic
-# @retry_until_success(max_attempts=5, sleep_time_seconds=2)
-# def test_bedrock_claude_sonnet_3_5_recall_chat_memory():
-#     filename = os.path.join(llm_config_dir, "bedrock-claude-3-5-sonnet.json")
-#     response = check_agent_recall_chat_memory(filename)
-#     # Log out successful response
-#     print(f"Got successful response from client: \n\n{response}")
+@pytest.mark.anthropic_bedrock_basic
+@retry_until_success(max_attempts=5, sleep_time_seconds=2)
+def test_bedrock_claude_sonnet_3_5_recall_chat_memory():
+    filename = os.path.join(llm_config_dir, "bedrock-claude-3-5-sonnet.json")
+    response = check_agent_recall_chat_memory(filename)
+    # Log out successful response
+    print(f"Got successful response from client: \n\n{response}")
 
 
 @pytest.mark.anthropic_bedrock_basic