diff --git a/letta/llm_api/google_ai.py b/letta/llm_api/google_ai.py index 1eec3eaa..27b2b88d 100644 --- a/letta/llm_api/google_ai.py +++ b/letta/llm_api/google_ai.py @@ -264,6 +264,7 @@ def convert_google_ai_response_to_chatcompletion( """ try: choices = [] + index = 0 for candidate in response_json["candidates"]: content = candidate["content"] @@ -272,86 +273,87 @@ def convert_google_ai_response_to_chatcompletion( parts = content["parts"] # TODO support parts / multimodal - assert len(parts) == 1, f"Multi-part not yet supported:\n{parts}" - response_message = parts[0] + # TODO support parallel tool calling natively + # TODO Alternative here is to throw away everything else except for the first part + for response_message in parts: + # Convert the actual message style to OpenAI style + if "functionCall" in response_message and response_message["functionCall"] is not None: + function_call = response_message["functionCall"] + assert isinstance(function_call, dict), function_call + function_name = function_call["name"] + assert isinstance(function_name, str), function_name + function_args = function_call["args"] + assert isinstance(function_args, dict), function_args - # Convert the actual message style to OpenAI style - if "functionCall" in response_message and response_message["functionCall"] is not None: - function_call = response_message["functionCall"] - assert isinstance(function_call, dict), function_call - function_name = function_call["name"] - assert isinstance(function_name, str), function_name - function_args = function_call["args"] - assert isinstance(function_args, dict), function_args + # NOTE: this also involves stripping the inner monologue out of the function + if pull_inner_thoughts_from_args: + from letta.local_llm.constants import INNER_THOUGHTS_KWARG - # NOTE: this also involves stripping the inner monologue out of the function - if pull_inner_thoughts_from_args: - from letta.local_llm.constants import INNER_THOUGHTS_KWARG + assert INNER_THOUGHTS_KWARG in function_args, f"Couldn't find inner thoughts in function args:\n{function_call}" + inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG) + assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}" + else: + inner_thoughts = None + + # Google AI API doesn't generate tool call IDs + openai_response_message = Message( + role="assistant", # NOTE: "model" -> "assistant" + content=inner_thoughts, + tool_calls=[ + ToolCall( + id=get_tool_call_id(), + type="function", + function=FunctionCall( + name=function_name, + arguments=clean_json_string_extra_backslash(json_dumps(function_args)), + ), + ) + ], + ) - assert INNER_THOUGHTS_KWARG in function_args, f"Couldn't find inner thoughts in function args:\n{function_call}" - inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG) - assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}" else: - inner_thoughts = None - # Google AI API doesn't generate tool call IDs - openai_response_message = Message( - role="assistant", # NOTE: "model" -> "assistant" - content=inner_thoughts, - tool_calls=[ - ToolCall( - id=get_tool_call_id(), - type="function", - function=FunctionCall( - name=function_name, - arguments=clean_json_string_extra_backslash(json_dumps(function_args)), - ), - ) - ], + # Inner thoughts are the content by default + inner_thoughts = response_message["text"] + + # Google AI API doesn't generate tool call IDs + openai_response_message = Message( + role="assistant", # NOTE: "model" -> "assistant" + content=inner_thoughts, + ) + + # Google AI API uses different finish reason strings than OpenAI + # OpenAI: 'stop', 'length', 'function_call', 'content_filter', null + # see: https://platform.openai.com/docs/guides/text-generation/chat-completions-api + # Google AI API: FINISH_REASON_UNSPECIFIED, STOP, MAX_TOKENS, SAFETY, RECITATION, OTHER + # see: https://ai.google.dev/api/python/google/ai/generativelanguage/Candidate/FinishReason + finish_reason = candidate["finishReason"] + if finish_reason == "STOP": + openai_finish_reason = ( + "function_call" + if openai_response_message.tool_calls is not None and len(openai_response_message.tool_calls) > 0 + else "stop" + ) + elif finish_reason == "MAX_TOKENS": + openai_finish_reason = "length" + elif finish_reason == "SAFETY": + openai_finish_reason = "content_filter" + elif finish_reason == "RECITATION": + openai_finish_reason = "content_filter" + else: + raise ValueError(f"Unrecognized finish reason in Google AI response: {finish_reason}") + + choices.append( + Choice( + finish_reason=openai_finish_reason, + index=index, + message=openai_response_message, + ) ) + index += 1 - else: - - # Inner thoughts are the content by default - inner_thoughts = response_message["text"] - - # Google AI API doesn't generate tool call IDs - openai_response_message = Message( - role="assistant", # NOTE: "model" -> "assistant" - content=inner_thoughts, - ) - - # Google AI API uses different finish reason strings than OpenAI - # OpenAI: 'stop', 'length', 'function_call', 'content_filter', null - # see: https://platform.openai.com/docs/guides/text-generation/chat-completions-api - # Google AI API: FINISH_REASON_UNSPECIFIED, STOP, MAX_TOKENS, SAFETY, RECITATION, OTHER - # see: https://ai.google.dev/api/python/google/ai/generativelanguage/Candidate/FinishReason - finish_reason = candidate["finishReason"] - if finish_reason == "STOP": - openai_finish_reason = ( - "function_call" - if openai_response_message.tool_calls is not None and len(openai_response_message.tool_calls) > 0 - else "stop" - ) - elif finish_reason == "MAX_TOKENS": - openai_finish_reason = "length" - elif finish_reason == "SAFETY": - openai_finish_reason = "content_filter" - elif finish_reason == "RECITATION": - openai_finish_reason = "content_filter" - else: - raise ValueError(f"Unrecognized finish reason in Google AI response: {finish_reason}") - - choices.append( - Choice( - finish_reason=openai_finish_reason, - index=candidate["index"], - message=openai_response_message, - ) - ) - - if len(choices) > 1: - raise UserWarning(f"Unexpected number of candidates in response (expected 1, got {len(choices)})") + # if len(choices) > 1: + # raise UserWarning(f"Unexpected number of candidates in response (expected 1, got {len(choices)})") # NOTE: some of the Google AI APIs show UsageMetadata in the response, but it seems to not exist? # "usageMetadata": { diff --git a/tests/helpers/endpoints_helper.py b/tests/helpers/endpoints_helper.py index 80014903..88c054ec 100644 --- a/tests/helpers/endpoints_helper.py +++ b/tests/helpers/endpoints_helper.py @@ -173,14 +173,14 @@ def check_agent_uses_external_tool(filename: str) -> LettaResponse: My name is Letta. - I am a personal assistant who answers a user's questions about a website `example.com`. When a user asks me a question about `example.com`, I will use a tool called {tool.name} which will search `example.com` and answer the relevant question. + I am a personal assistant who uses a tool called {tool.name} to star a desired github repo. Don’t forget - inner monologue / inner thoughts should always be different than the contents of send_message! send_message is how you communicate with the user, whereas inner thoughts are your own personal inner thoughts. """ agent_state = setup_agent(client, filename, memory_persona_str=persona, tool_ids=[tool.id]) - response = client.user_message(agent_id=agent_state.id, message="What's on the example.com website?") + response = client.user_message(agent_id=agent_state.id, message="Please star the repo with owner=letta-ai and repo=letta") # Basic checks assert_sanity_checks(response) diff --git a/tests/pytest.ini b/tests/pytest.ini index 7ffe833c..299f27d4 100644 --- a/tests/pytest.ini +++ b/tests/pytest.ini @@ -7,3 +7,7 @@ filterwarnings = markers = local_sandbox: mark test as part of local sandbox tests e2b_sandbox: mark test as part of E2B sandbox tests + openai_basic: Tests for OpenAI endpoints + anthropic_basic: Tests for Anthropic endpoints + azure_basic: Tests for Azure endpoints + gemini_basic: Tests for Gemini endpoints diff --git a/tests/test_model_letta_perfomance.py b/tests/test_model_letta_perfomance.py index d45654ea..d20d64ca 100644 --- a/tests/test_model_letta_perfomance.py +++ b/tests/test_model_letta_perfomance.py @@ -2,15 +2,15 @@ import functools import os import time +import pytest + from tests.helpers.endpoints_helper import ( check_agent_archival_memory_insert, check_agent_archival_memory_retrieval, check_agent_edit_core_memory, check_agent_recall_chat_memory, - check_agent_summarize_memory_simple, check_agent_uses_external_tool, check_first_response_is_valid_for_llm_endpoint, - check_response_contains_keyword, run_embedding_endpoint, ) @@ -84,6 +84,7 @@ def retry_until_success(max_attempts=10, sleep_time_seconds=4): # ====================================================================================================================== # OPENAI TESTS # ====================================================================================================================== +@pytest.mark.openai_basic @retry_until_success(max_attempts=5, sleep_time_seconds=2) def test_openai_gpt_4o_returns_valid_first_message(): filename = os.path.join(llm_config_dir, "openai-gpt-4o.json") @@ -92,23 +93,16 @@ def test_openai_gpt_4o_returns_valid_first_message(): print(f"Got successful response from client: \n\n{response}") +@pytest.mark.openai_basic @retry_until_success(max_attempts=5, sleep_time_seconds=2) -def test_openai_gpt_4o_returns_keyword(): - keyword = "banana" - filename = os.path.join(llm_config_dir, "openai-gpt-4o.json") - response = check_response_contains_keyword(filename, keyword=keyword) - # Log out successful response - print(f"Got successful response from client: \n\n{response}") - - -@retry_until_success(max_attempts=5, sleep_time_seconds=2) -def test_openai_gpt_4o_uses_external_tool(): +def test_openai_gpt_4o_uses_external_tool(mock_e2b_api_key_none): filename = os.path.join(llm_config_dir, "openai-gpt-4o.json") response = check_agent_uses_external_tool(filename) # Log out successful response print(f"Got successful response from client: \n\n{response}") +@pytest.mark.openai_basic @retry_until_success(max_attempts=5, sleep_time_seconds=2) def test_openai_gpt_4o_recall_chat_memory(): filename = os.path.join(llm_config_dir, "openai-gpt-4o.json") @@ -117,6 +111,7 @@ def test_openai_gpt_4o_recall_chat_memory(): print(f"Got successful response from client: \n\n{response}") +@pytest.mark.openai_basic @retry_until_success(max_attempts=5, sleep_time_seconds=2) def test_openai_gpt_4o_archival_memory_retrieval(): filename = os.path.join(llm_config_dir, "openai-gpt-4o.json") @@ -125,6 +120,7 @@ def test_openai_gpt_4o_archival_memory_retrieval(): print(f"Got successful response from client: \n\n{response}") +@pytest.mark.openai_basic @retry_until_success(max_attempts=5, sleep_time_seconds=2) def test_openai_gpt_4o_archival_memory_insert(): filename = os.path.join(llm_config_dir, "openai-gpt-4o.json") @@ -133,6 +129,7 @@ def test_openai_gpt_4o_archival_memory_insert(): print(f"Got successful response from client: \n\n{response}") +@pytest.mark.openai_basic @retry_until_success(max_attempts=5, sleep_time_seconds=2) def test_openai_gpt_4o_edit_core_memory(): filename = os.path.join(llm_config_dir, "openai-gpt-4o.json") @@ -141,13 +138,7 @@ def test_openai_gpt_4o_edit_core_memory(): print(f"Got successful response from client: \n\n{response}") -@retry_until_success(max_attempts=5, sleep_time_seconds=2) -def test_openai_gpt_4o_summarize_memory(): - filename = os.path.join(llm_config_dir, "openai-gpt-4o.json") - response = check_agent_summarize_memory_simple(filename) - print(f"Got successful response from client: \n\n{response}") - - +@pytest.mark.openai_basic @retry_until_success(max_attempts=5, sleep_time_seconds=2) def test_embedding_endpoint_openai(): filename = os.path.join(embedding_config_dir, "openai_embed.json") @@ -157,6 +148,8 @@ def test_embedding_endpoint_openai(): # ====================================================================================================================== # AZURE TESTS # ====================================================================================================================== +@pytest.mark.azure_basic +@retry_until_success(max_attempts=5, sleep_time_seconds=2) def test_azure_gpt_4o_mini_returns_valid_first_message(): filename = os.path.join(llm_config_dir, "azure-gpt-4o-mini.json") response = check_first_response_is_valid_for_llm_endpoint(filename) @@ -164,21 +157,17 @@ def test_azure_gpt_4o_mini_returns_valid_first_message(): print(f"Got successful response from client: \n\n{response}") -def test_azure_gpt_4o_mini_returns_keyword(): - keyword = "banana" - filename = os.path.join(llm_config_dir, "azure-gpt-4o-mini.json") - response = check_response_contains_keyword(filename, keyword=keyword) - # Log out successful response - print(f"Got successful response from client: \n\n{response}") - - -def test_azure_gpt_4o_mini_uses_external_tool(): +@pytest.mark.azure_basic +@retry_until_success(max_attempts=5, sleep_time_seconds=2) +def test_azure_gpt_4o_mini_uses_external_tool(mock_e2b_api_key_none): filename = os.path.join(llm_config_dir, "azure-gpt-4o-mini.json") response = check_agent_uses_external_tool(filename) # Log out successful response print(f"Got successful response from client: \n\n{response}") +@pytest.mark.azure_basic +@retry_until_success(max_attempts=5, sleep_time_seconds=2) def test_azure_gpt_4o_mini_recall_chat_memory(): filename = os.path.join(llm_config_dir, "azure-gpt-4o-mini.json") response = check_agent_recall_chat_memory(filename) @@ -186,6 +175,8 @@ def test_azure_gpt_4o_mini_recall_chat_memory(): print(f"Got successful response from client: \n\n{response}") +@pytest.mark.azure_basic +@retry_until_success(max_attempts=5, sleep_time_seconds=2) def test_azure_gpt_4o_mini_archival_memory_retrieval(): filename = os.path.join(llm_config_dir, "azure-gpt-4o-mini.json") response = check_agent_archival_memory_retrieval(filename) @@ -193,6 +184,8 @@ def test_azure_gpt_4o_mini_archival_memory_retrieval(): print(f"Got successful response from client: \n\n{response}") +@pytest.mark.azure_basic +@retry_until_success(max_attempts=5, sleep_time_seconds=2) def test_azure_gpt_4o_mini_edit_core_memory(): filename = os.path.join(llm_config_dir, "azure-gpt-4o-mini.json") response = check_agent_edit_core_memory(filename) @@ -200,6 +193,8 @@ def test_azure_gpt_4o_mini_edit_core_memory(): print(f"Got successful response from client: \n\n{response}") +@pytest.mark.azure_basic +@retry_until_success(max_attempts=5, sleep_time_seconds=2) def test_azure_embedding_endpoint(): filename = os.path.join(embedding_config_dir, "azure_embed.json") run_embedding_endpoint(filename) @@ -239,6 +234,8 @@ def test_embedding_endpoint_ollama(): # ====================================================================================================================== # ANTHROPIC TESTS # ====================================================================================================================== +@pytest.mark.anthropic_basic +@retry_until_success(max_attempts=5, sleep_time_seconds=2) def test_claude_haiku_3_5_returns_valid_first_message(): filename = os.path.join(llm_config_dir, "claude-3-5-haiku.json") response = check_first_response_is_valid_for_llm_endpoint(filename) @@ -246,21 +243,17 @@ def test_claude_haiku_3_5_returns_valid_first_message(): print(f"Got successful response from client: \n\n{response}") -def test_claude_haiku_3_5_returns_keyword(): - keyword = "banana" - filename = os.path.join(llm_config_dir, "claude-3-5-haiku.json") - response = check_response_contains_keyword(filename, keyword=keyword) - # Log out successful response - print(f"Got successful response from client: \n\n{response}") - - -def test_claude_haiku_3_5_uses_external_tool(): +@pytest.mark.anthropic_basic +@retry_until_success(max_attempts=5, sleep_time_seconds=2) +def test_claude_haiku_3_5_uses_external_tool(mock_e2b_api_key_none): filename = os.path.join(llm_config_dir, "claude-3-5-haiku.json") response = check_agent_uses_external_tool(filename) # Log out successful response print(f"Got successful response from client: \n\n{response}") +@pytest.mark.anthropic_basic +@retry_until_success(max_attempts=5, sleep_time_seconds=2) def test_claude_haiku_3_5_recall_chat_memory(): filename = os.path.join(llm_config_dir, "claude-3-5-haiku.json") response = check_agent_recall_chat_memory(filename) @@ -268,6 +261,8 @@ def test_claude_haiku_3_5_recall_chat_memory(): print(f"Got successful response from client: \n\n{response}") +@pytest.mark.anthropic_basic +@retry_until_success(max_attempts=5, sleep_time_seconds=2) def test_claude_haiku_3_5_archival_memory_retrieval(): filename = os.path.join(llm_config_dir, "claude-3-5-haiku.json") response = check_agent_archival_memory_retrieval(filename) @@ -275,6 +270,8 @@ def test_claude_haiku_3_5_archival_memory_retrieval(): print(f"Got successful response from client: \n\n{response}") +@pytest.mark.anthropic_basic +@retry_until_success(max_attempts=5, sleep_time_seconds=2) def test_claude_haiku_3_5_edit_core_memory(): filename = os.path.join(llm_config_dir, "claude-3-5-haiku.json") response = check_agent_edit_core_memory(filename) @@ -292,15 +289,7 @@ def test_groq_llama31_70b_returns_valid_first_message(): print(f"Got successful response from client: \n\n{response}") -def test_groq_llama31_70b_returns_keyword(): - keyword = "banana" - filename = os.path.join(llm_config_dir, "groq.json") - response = check_response_contains_keyword(filename, keyword=keyword) - # Log out successful response - print(f"Got successful response from client: \n\n{response}") - - -def test_groq_llama31_70b_uses_external_tool(): +def test_groq_llama31_70b_uses_external_tool(mock_e2b_api_key_none): filename = os.path.join(llm_config_dir, "groq.json") response = check_agent_uses_external_tool(filename) # Log out successful response @@ -332,6 +321,8 @@ def test_groq_llama31_70b_edit_core_memory(): # ====================================================================================================================== # GEMINI TESTS # ====================================================================================================================== +@pytest.mark.gemini_basic +@retry_until_success(max_attempts=5, sleep_time_seconds=2) def test_gemini_pro_15_returns_valid_first_message(): filename = os.path.join(llm_config_dir, "gemini-pro.json") response = check_first_response_is_valid_for_llm_endpoint(filename) @@ -339,21 +330,17 @@ def test_gemini_pro_15_returns_valid_first_message(): print(f"Got successful response from client: \n\n{response}") -def test_gemini_pro_15_returns_keyword(): - keyword = "banana" - filename = os.path.join(llm_config_dir, "gemini-pro.json") - response = check_response_contains_keyword(filename, keyword=keyword) - # Log out successful response - print(f"Got successful response from client: \n\n{response}") - - -def test_gemini_pro_15_uses_external_tool(): +@pytest.mark.gemini_basic +@retry_until_success(max_attempts=5, sleep_time_seconds=2) +def test_gemini_pro_15_uses_external_tool(mock_e2b_api_key_none): filename = os.path.join(llm_config_dir, "gemini-pro.json") response = check_agent_uses_external_tool(filename) # Log out successful response print(f"Got successful response from client: \n\n{response}") +@pytest.mark.gemini_basic +@retry_until_success(max_attempts=5, sleep_time_seconds=2) def test_gemini_pro_15_recall_chat_memory(): filename = os.path.join(llm_config_dir, "gemini-pro.json") response = check_agent_recall_chat_memory(filename) @@ -361,6 +348,8 @@ def test_gemini_pro_15_recall_chat_memory(): print(f"Got successful response from client: \n\n{response}") +@pytest.mark.gemini_basic +@retry_until_success(max_attempts=5, sleep_time_seconds=2) def test_gemini_pro_15_archival_memory_retrieval(): filename = os.path.join(llm_config_dir, "gemini-pro.json") response = check_agent_archival_memory_retrieval(filename) @@ -368,6 +357,8 @@ def test_gemini_pro_15_archival_memory_retrieval(): print(f"Got successful response from client: \n\n{response}") +@pytest.mark.gemini_basic +@retry_until_success(max_attempts=5, sleep_time_seconds=2) def test_gemini_pro_15_edit_core_memory(): filename = os.path.join(llm_config_dir, "gemini-pro.json") response = check_agent_edit_core_memory(filename) @@ -385,15 +376,7 @@ def test_together_llama_3_70b_returns_valid_first_message(): print(f"Got successful response from client: \n\n{response}") -def test_together_llama_3_70b_returns_keyword(): - keyword = "banana" - filename = os.path.join(llm_config_dir, "together-llama-3-70b.json") - response = check_response_contains_keyword(filename, keyword=keyword) - # Log out successful response - print(f"Got successful response from client: \n\n{response}") - - -def test_together_llama_3_70b_uses_external_tool(): +def test_together_llama_3_70b_uses_external_tool(mock_e2b_api_key_none): filename = os.path.join(llm_config_dir, "together-llama-3-70b.json") response = check_agent_uses_external_tool(filename) # Log out successful response