From 2d26365e42f67e6cd0d443cdc111fada5d7f5b6f Mon Sep 17 00:00:00 2001 From: Matthew Zhou Date: Fri, 15 Nov 2024 16:46:12 -0800 Subject: [PATCH] fix: Fix summarizer for Anthropic and add integration tests (#2046) --- .github/workflows/integration_tests.yml | 75 +++++++++++++++++++ .github/workflows/test_anthropic.yml | 12 +-- .github/workflows/test_azure.yml | 12 +-- .github/workflows/test_groq.yml | 12 +-- .github/workflows/test_memgpt_hosted.yml | 4 +- .github/workflows/test_ollama.yml | 4 +- .github/workflows/test_openai.yml | 16 ++-- .github/workflows/tests.yml | 2 +- .gitignore | 5 ++ examples/tool_rule_usage.py | 2 +- letta/agent.py | 3 +- letta/llm_api/anthropic.py | 40 +++++----- ...aude-3-opus.json => claude-3-5-haiku.json} | 2 +- tests/integration_test_summarizer.py | 68 +++++++++++++++++ tests/test_agent_tool_graph.py | 4 +- ...ints.py => test_model_letta_perfomance.py} | 25 ++++--- 16 files changed, 218 insertions(+), 68 deletions(-) create mode 100644 .github/workflows/integration_tests.yml rename tests/configs/llm_model_configs/{claude-3-opus.json => claude-3-5-haiku.json} (82%) create mode 100644 tests/integration_test_summarizer.py rename tests/{test_endpoints.py => test_model_letta_perfomance.py} (94%) diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml new file mode 100644 index 00000000..e3c673a3 --- /dev/null +++ b/.github/workflows/integration_tests.yml @@ -0,0 +1,75 @@ +name: Integration Tests + +env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + COMPOSIO_API_KEY: ${{ secrets.COMPOSIO_API_KEY }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} + GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} + AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }} + AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }} + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + run-integration-tests: + runs-on: ubuntu-latest + timeout-minutes: 15 + strategy: + fail-fast: false + matrix: + integration_test_suite: + - "integration_test_summarizer.py" + services: + qdrant: + image: qdrant/qdrant + ports: + - 6333:6333 + postgres: + image: pgvector/pgvector:pg17 + ports: + - 5432:5432 + env: + POSTGRES_HOST_AUTH_METHOD: trust + POSTGRES_DB: postgres + POSTGRES_USER: postgres + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python, Poetry, and Dependencies + uses: packetcoders/action-setup-cache-python-poetry@main + with: + python-version: "3.12" + poetry-version: "1.8.2" + install-args: "-E dev -E postgres -E milvus -E external-tools -E tests" + - name: Migrate database + env: + LETTA_PG_PORT: 5432 + LETTA_PG_USER: postgres + LETTA_PG_PASSWORD: postgres + LETTA_PG_DB: postgres + LETTA_PG_HOST: localhost + run: | + psql -h localhost -U postgres -d postgres -c 'CREATE EXTENSION vector' + poetry run alembic upgrade head + - name: Run core unit tests + env: + LETTA_PG_PORT: 5432 + LETTA_PG_USER: postgres + LETTA_PG_PASSWORD: postgres + LETTA_PG_DB: postgres + LETTA_PG_HOST: localhost + LETTA_SERVER_PASS: test_server_token + run: | + poetry run pytest -s -vv tests/${{ matrix.integration_test_suite }} diff --git a/.github/workflows/test_anthropic.yml b/.github/workflows/test_anthropic.yml index 2c4391ad..b8daa9ab 100644 --- a/.github/workflows/test_anthropic.yml +++ b/.github/workflows/test_anthropic.yml @@ -29,7 +29,7 @@ jobs: env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_claude_opus_3_returns_valid_first_message + poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_claude_opus_3_returns_valid_first_message echo "TEST_FIRST_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV continue-on-error: true @@ -38,7 +38,7 @@ jobs: env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_claude_opus_3_returns_keyword + poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_claude_opus_3_returns_keyword echo "TEST_KEYWORD_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV continue-on-error: true @@ -47,7 +47,7 @@ jobs: env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_claude_opus_3_uses_external_tool + poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_claude_opus_3_uses_external_tool echo "TEST_EXTERNAL_TOOL_EXIT_CODE=$?" >> $GITHUB_ENV continue-on-error: true @@ -56,7 +56,7 @@ jobs: env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_claude_opus_3_recall_chat_memory + poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_claude_opus_3_recall_chat_memory echo "TEST_CHAT_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV continue-on-error: true @@ -65,7 +65,7 @@ jobs: env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_claude_opus_3_archival_memory_retrieval + poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_claude_opus_3_archival_memory_retrieval echo "TEST_ARCHIVAL_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV continue-on-error: true @@ -74,7 +74,7 @@ jobs: env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_claude_opus_3_edit_core_memory + poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_claude_opus_3_edit_core_memory echo "TEST_CORE_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV continue-on-error: true diff --git a/.github/workflows/test_azure.yml b/.github/workflows/test_azure.yml index e18f512d..7ea6982c 100644 --- a/.github/workflows/test_azure.yml +++ b/.github/workflows/test_azure.yml @@ -31,7 +31,7 @@ jobs: AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }} AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }} run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_returns_valid_first_message + poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_azure_gpt_4o_mini_returns_valid_first_message echo "TEST_FIRST_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV continue-on-error: true @@ -41,7 +41,7 @@ jobs: AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }} AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }} run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_returns_keyword + poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_azure_gpt_4o_mini_returns_keyword echo "TEST_KEYWORD_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV continue-on-error: true @@ -51,7 +51,7 @@ jobs: AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }} AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }} run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_uses_external_tool + poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_azure_gpt_4o_mini_uses_external_tool echo "TEST_EXTERNAL_TOOL_EXIT_CODE=$?" >> $GITHUB_ENV continue-on-error: true @@ -61,7 +61,7 @@ jobs: AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }} AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }} run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_recall_chat_memory + poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_azure_gpt_4o_mini_recall_chat_memory echo "TEST_CHAT_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV continue-on-error: true @@ -71,7 +71,7 @@ jobs: AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }} AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }} run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_archival_memory_retrieval + poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_azure_gpt_4o_mini_archival_memory_retrieval echo "TEST_ARCHIVAL_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV continue-on-error: true @@ -81,7 +81,7 @@ jobs: AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }} AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }} run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_edit_core_memory + poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_azure_gpt_4o_mini_edit_core_memory echo "TEST_CORE_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV continue-on-error: true diff --git a/.github/workflows/test_groq.yml b/.github/workflows/test_groq.yml index f14da94a..327e5d05 100644 --- a/.github/workflows/test_groq.yml +++ b/.github/workflows/test_groq.yml @@ -29,7 +29,7 @@ jobs: env: GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_returns_valid_first_message + poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_groq_llama31_70b_returns_valid_first_message echo "TEST_FIRST_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV continue-on-error: true @@ -38,7 +38,7 @@ jobs: env: GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_returns_keyword + poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_groq_llama31_70b_returns_keyword echo "TEST_KEYWORD_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV continue-on-error: true @@ -47,7 +47,7 @@ jobs: env: GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_uses_external_tool + poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_groq_llama31_70b_uses_external_tool echo "TEST_EXTERNAL_TOOL_EXIT_CODE=$?" >> $GITHUB_ENV continue-on-error: true @@ -56,7 +56,7 @@ jobs: env: GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_recall_chat_memory + poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_groq_llama31_70b_recall_chat_memory echo "TEST_CHAT_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV continue-on-error: true @@ -65,7 +65,7 @@ jobs: env: GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_archival_memory_retrieval + poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_groq_llama31_70b_archival_memory_retrieval echo "TEST_ARCHIVAL_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV continue-on-error: true @@ -74,7 +74,7 @@ jobs: env: GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_edit_core_memory + poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_groq_llama31_70b_edit_core_memory echo "TEST_CORE_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV continue-on-error: true diff --git a/.github/workflows/test_memgpt_hosted.yml b/.github/workflows/test_memgpt_hosted.yml index 71e9a7f5..191ace57 100644 --- a/.github/workflows/test_memgpt_hosted.yml +++ b/.github/workflows/test_memgpt_hosted.yml @@ -23,9 +23,9 @@ jobs: - name: Test LLM endpoint run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_llm_endpoint_letta_hosted + poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_llm_endpoint_letta_hosted continue-on-error: true - name: Test embedding endpoint run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_embedding_endpoint_letta_hosted + poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_embedding_endpoint_letta_hosted diff --git a/.github/workflows/test_ollama.yml b/.github/workflows/test_ollama.yml index 010ff804..7ed6e913 100644 --- a/.github/workflows/test_ollama.yml +++ b/.github/workflows/test_ollama.yml @@ -34,11 +34,11 @@ jobs: - name: Test LLM endpoint run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_llm_endpoint_ollama + poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_llm_endpoint_ollama - name: Test embedding endpoint run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_embedding_endpoint_ollama + poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_embedding_endpoint_ollama - name: Test provider run: | diff --git a/.github/workflows/test_openai.yml b/.github/workflows/test_openai.yml index 1fce9c13..879177e9 100644 --- a/.github/workflows/test_openai.yml +++ b/.github/workflows/test_openai.yml @@ -29,53 +29,53 @@ jobs: env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_returns_valid_first_message + poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_returns_valid_first_message - name: Test model sends message with keyword id: test_keyword_message env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_returns_keyword + poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_returns_keyword - name: Test model uses external tool correctly id: test_external_tool env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_uses_external_tool + poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_uses_external_tool - name: Test model recalls chat memory id: test_chat_memory env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_recall_chat_memory + poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_recall_chat_memory - name: Test model uses 'archival_memory_search' to find secret id: test_archival_memory_search env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_archival_memory_retrieval + poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_archival_memory_retrieval - name: Test model uses 'archival_memory_insert' to insert archival memories id: test_archival_memory_insert env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_archival_memory_insert + poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_archival_memory_insert - name: Test model can edit core memories id: test_core_memory env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_edit_core_memory + poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_edit_core_memory - name: Test embedding endpoint id: test_embedding_endpoint env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_embedding_endpoint_openai + poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_embedding_endpoint_openai diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a80bc4eb..adda0fd4 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -131,4 +131,4 @@ jobs: LETTA_SERVER_PASS: test_server_token PYTHONPATH: ${{ github.workspace }}:${{ env.PYTHONPATH }} run: | - poetry run pytest -s -vv -k "not test_agent_tool_graph.py and not test_tool_rule_solver.py and not test_local_client.py and not test_o1_agent.py and not test_cli.py and not test_tools.py and not test_concurrent_connections.py and not test_quickstart and not test_endpoints and not test_storage and not test_server and not test_openai_client and not test_providers and not test_client.py" tests + poetry run pytest -s -vv -k "not integration_test_summarizer.py and not test_agent_tool_graph.py and not test_tool_rule_solver.py and not test_local_client.py and not test_o1_agent.py and not test_cli.py and not test_tools.py and not test_concurrent_connections.py and not test_quickstart and not test_model_letta_perfomance and not test_storage and not test_server and not test_openai_client and not test_providers and not test_client.py" tests diff --git a/.gitignore b/.gitignore index 1fcffd8a..f9330dd9 100644 --- a/.gitignore +++ b/.gitignore @@ -1018,3 +1018,8 @@ pgdata/ letta/.pytest_cache/ memgpy/pytest.ini **/**/pytest_cache + + +# local sandbox venvs +letta/services/tool_sandbox_env/* +tests/test_tool_sandbox/* diff --git a/examples/tool_rule_usage.py b/examples/tool_rule_usage.py index b408b1d0..c1079bc1 100644 --- a/examples/tool_rule_usage.py +++ b/examples/tool_rule_usage.py @@ -9,7 +9,7 @@ from tests.helpers.endpoints_helper import ( setup_agent, ) from tests.helpers.utils import cleanup -from tests.test_endpoints import llm_config_dir +from tests.test_model_letta_perfomance import llm_config_dir """ This example shows how you can constrain tool calls in your agent. diff --git a/letta/agent.py b/letta/agent.py index a3ddfb0f..f7bd6f0f 100644 --- a/letta/agent.py +++ b/letta/agent.py @@ -48,6 +48,7 @@ from letta.schemas.tool_rule import TerminalToolRule from letta.schemas.usage import LettaUsageStatistics from letta.services.source_manager import SourceManager from letta.services.user_manager import UserManager +from letta.streaming_interface import StreamingRefreshCLIInterface from letta.system import ( get_heartbeat, get_initial_boot_messages, @@ -229,7 +230,7 @@ class BaseAgent(ABC): class Agent(BaseAgent): def __init__( self, - interface: Optional[AgentInterface], + interface: Optional[Union[AgentInterface, StreamingRefreshCLIInterface]], # agents can be created from providing agent_state agent_state: AgentState, tools: List[Tool], diff --git a/letta/llm_api/anthropic.py b/letta/llm_api/anthropic.py index 15e4538f..9df4cec2 100644 --- a/letta/llm_api/anthropic.py +++ b/letta/llm_api/anthropic.py @@ -242,26 +242,28 @@ def convert_anthropic_response_to_chatcompletion( finish_reason = remap_finish_reason(response_json["stop_reason"]) if isinstance(response_json["content"], list): - # inner mono + function call - # TODO relax asserts - assert len(response_json["content"]) == 2, response_json - assert response_json["content"][0]["type"] == "text", response_json - assert response_json["content"][1]["type"] == "tool_use", response_json - content = strip_xml_tags(string=response_json["content"][0]["text"], tag=inner_thoughts_xml_tag) - tool_calls = [ - ToolCall( - id=response_json["content"][1]["id"], - type="function", - function=FunctionCall( - name=response_json["content"][1]["name"], - arguments=json.dumps(response_json["content"][1]["input"], indent=2), - ), - ) - ] + if len(response_json["content"]) > 1: + # inner mono + function call + assert len(response_json["content"]) == 2, response_json + assert response_json["content"][0]["type"] == "text", response_json + assert response_json["content"][1]["type"] == "tool_use", response_json + content = strip_xml_tags(string=response_json["content"][0]["text"], tag=inner_thoughts_xml_tag) + tool_calls = [ + ToolCall( + id=response_json["content"][1]["id"], + type="function", + function=FunctionCall( + name=response_json["content"][1]["name"], + arguments=json.dumps(response_json["content"][1]["input"], indent=2), + ), + ) + ] + else: + # Just inner mono + content = strip_xml_tags(string=response_json["content"][0]["text"], tag=inner_thoughts_xml_tag) + tool_calls = None else: - # just inner mono - content = strip_xml_tags(string=response_json["content"], tag=inner_thoughts_xml_tag) - tool_calls = None + raise RuntimeError("Unexpected type for content in response_json.") assert response_json["role"] == "assistant", response_json choice = Choice( diff --git a/tests/configs/llm_model_configs/claude-3-opus.json b/tests/configs/llm_model_configs/claude-3-5-haiku.json similarity index 82% rename from tests/configs/llm_model_configs/claude-3-opus.json rename to tests/configs/llm_model_configs/claude-3-5-haiku.json index 9516b870..89f4e0c5 100644 --- a/tests/configs/llm_model_configs/claude-3-opus.json +++ b/tests/configs/llm_model_configs/claude-3-5-haiku.json @@ -1,6 +1,6 @@ { "context_window": 200000, - "model": "claude-3-opus-20240229", + "model": "claude-3-5-haiku-20241022", "model_endpoint_type": "anthropic", "model_endpoint": "https://api.anthropic.com/v1", "model_wrapper": null, diff --git a/tests/integration_test_summarizer.py b/tests/integration_test_summarizer.py new file mode 100644 index 00000000..6fc73b47 --- /dev/null +++ b/tests/integration_test_summarizer.py @@ -0,0 +1,68 @@ +import json +import os +import uuid + +import pytest + +from letta import create_client +from letta.agent import Agent +from letta.schemas.embedding_config import EmbeddingConfig +from letta.schemas.llm_config import LLMConfig +from letta.streaming_interface import StreamingRefreshCLIInterface +from tests.helpers.endpoints_helper import EMBEDDING_CONFIG_PATH +from tests.helpers.utils import cleanup + +# constants +LLM_CONFIG_DIR = "tests/configs/llm_model_configs" +SUMMARY_KEY_PHRASE = "The following is a summary" + + +@pytest.mark.parametrize( + "config_filename", + [ + "openai-gpt-4o.json", + "azure-gpt-4o-mini.json", + "claude-3-5-haiku.json", + # "groq.json", TODO: Support groq, rate limiting currently makes it impossible to test + # "gemini-pro.json", TODO: Gemini is broken + ], +) +def test_summarizer(config_filename): + namespace = uuid.NAMESPACE_DNS + agent_name = str(uuid.uuid5(namespace, f"integration-test-summarizer-{config_filename}")) + + # Get the LLM config + filename = os.path.join(LLM_CONFIG_DIR, config_filename) + config_data = json.load(open(filename, "r")) + + # Create client and clean up agents + llm_config = LLMConfig(**config_data) + embedding_config = EmbeddingConfig(**json.load(open(EMBEDDING_CONFIG_PATH))) + client = create_client() + client.set_default_llm_config(llm_config) + client.set_default_embedding_config(embedding_config) + cleanup(client=client, agent_uuid=agent_name) + + # Create agent + agent_state = client.create_agent(name=agent_name, llm_config=llm_config, embedding_config=embedding_config) + tools = [client.get_tool(client.get_tool_id(name=tool_name)) for tool_name in agent_state.tools] + letta_agent = Agent(interface=StreamingRefreshCLIInterface(), agent_state=agent_state, tools=tools, first_message_verify_mono=False) + + # Make conversation + messages = [ + "Did you know that honey never spoils? Archaeologists have found pots of honey in ancient Egyptian tombs that are over 3,000 years old and still perfectly edible.", + "Octopuses have three hearts, and two of them stop beating when they swim.", + ] + + for m in messages: + letta_agent.step_user_message( + user_message_str=m, + first_message=False, + skip_verify=False, + stream=False, + ms=client.server.ms, + ) + + # Invoke a summarize + letta_agent.summarize_messages_inplace(preserve_last_N_messages=False) + assert SUMMARY_KEY_PHRASE in letta_agent.messages[1]["content"], f"Test failed for config: {config_filename}" diff --git a/tests/test_agent_tool_graph.py b/tests/test_agent_tool_graph.py index 2988949d..049e9978 100644 --- a/tests/test_agent_tool_graph.py +++ b/tests/test_agent_tool_graph.py @@ -1,4 +1,3 @@ -import os import uuid import pytest @@ -13,12 +12,11 @@ from tests.helpers.endpoints_helper import ( setup_agent, ) from tests.helpers.utils import cleanup -from tests.test_endpoints import llm_config_dir # Generate uuid for agent name for this example namespace = uuid.NAMESPACE_DNS agent_uuid = str(uuid.uuid5(namespace, "test_agent_tool_graph")) -config_file = os.path.join(llm_config_dir, "openai-gpt-4o.json") +config_file = "tests/configs/llm_model_configs/openai-gpt-4o.json" """Contrived tools for this test case""" diff --git a/tests/test_endpoints.py b/tests/test_model_letta_perfomance.py similarity index 94% rename from tests/test_endpoints.py rename to tests/test_model_letta_perfomance.py index 575ae13b..c5778772 100644 --- a/tests/test_endpoints.py +++ b/tests/test_model_letta_perfomance.py @@ -59,6 +59,7 @@ def retry_until_threshold(threshold=0.5, max_attempts=10, sleep_time_seconds=4): # ====================================================================================================================== # OPENAI TESTS # ====================================================================================================================== +@retry_until_threshold(threshold=0.75, max_attempts=4) def test_openai_gpt_4o_returns_valid_first_message(): filename = os.path.join(llm_config_dir, "openai-gpt-4o.json") response = check_first_response_is_valid_for_llm_endpoint(filename) @@ -205,44 +206,44 @@ def test_embedding_endpoint_ollama(): # ====================================================================================================================== # ANTHROPIC TESTS # ====================================================================================================================== -def test_claude_opus_3_returns_valid_first_message(): - filename = os.path.join(llm_config_dir, "claude-3-opus.json") +def test_claude_haiku_3_5_returns_valid_first_message(): + filename = os.path.join(llm_config_dir, "claude-3-5-haiku.json") response = check_first_response_is_valid_for_llm_endpoint(filename) # Log out successful response print(f"Got successful response from client: \n\n{response}") -def test_claude_opus_3_returns_keyword(): +def test_claude_haiku_3_5_returns_keyword(): keyword = "banana" - filename = os.path.join(llm_config_dir, "claude-3-opus.json") + filename = os.path.join(llm_config_dir, "claude-3-5-haiku.json") response = check_response_contains_keyword(filename, keyword=keyword) # Log out successful response print(f"Got successful response from client: \n\n{response}") -def test_claude_opus_3_uses_external_tool(): - filename = os.path.join(llm_config_dir, "claude-3-opus.json") +def test_claude_haiku_3_5_uses_external_tool(): + filename = os.path.join(llm_config_dir, "claude-3-5-haiku.json") response = check_agent_uses_external_tool(filename) # Log out successful response print(f"Got successful response from client: \n\n{response}") -def test_claude_opus_3_recall_chat_memory(): - filename = os.path.join(llm_config_dir, "claude-3-opus.json") +def test_claude_haiku_3_5_recall_chat_memory(): + filename = os.path.join(llm_config_dir, "claude-3-5-haiku.json") response = check_agent_recall_chat_memory(filename) # Log out successful response print(f"Got successful response from client: \n\n{response}") -def test_claude_opus_3_archival_memory_retrieval(): - filename = os.path.join(llm_config_dir, "claude-3-opus.json") +def test_claude_haiku_3_5_archival_memory_retrieval(): + filename = os.path.join(llm_config_dir, "claude-3-5-haiku.json") response = check_agent_archival_memory_retrieval(filename) # Log out successful response print(f"Got successful response from client: \n\n{response}") -def test_claude_opus_3_edit_core_memory(): - filename = os.path.join(llm_config_dir, "claude-3-opus.json") +def test_claude_haiku_3_5_edit_core_memory(): + filename = os.path.join(llm_config_dir, "claude-3-5-haiku.json") response = check_agent_edit_core_memory(filename) # Log out successful response print(f"Got successful response from client: \n\n{response}")