From 9acdaacc7c325836bfe0e160a805ed80d7a48682 Mon Sep 17 00:00:00 2001 From: Matthew Zhou Date: Thu, 24 Oct 2024 15:54:29 -0700 Subject: [PATCH] test: Add archival insert test to GPT-4 and make tests failure sensitive (#1930) --- .github/workflows/test_openai.yml | 47 ++++++------------------------- tests/helpers/endpoints_helper.py | 29 +++++++++++++++++++ tests/test_endpoints.py | 8 ++++++ 3 files changed, 45 insertions(+), 39 deletions(-) diff --git a/.github/workflows/test_openai.yml b/.github/workflows/test_openai.yml index f5957998..975d17b3 100644 --- a/.github/workflows/test_openai.yml +++ b/.github/workflows/test_openai.yml @@ -30,8 +30,6 @@ jobs: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} run: | poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4_returns_valid_first_message - echo "TEST_FIRST_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV - continue-on-error: true - name: Test model sends message with keyword id: test_keyword_message @@ -39,8 +37,6 @@ jobs: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} run: | poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4_returns_keyword - echo "TEST_KEYWORD_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV - continue-on-error: true - name: Test model uses external tool correctly id: test_external_tool @@ -48,8 +44,6 @@ jobs: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} run: | poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4_uses_external_tool - echo "TEST_EXTERNAL_TOOL_EXIT_CODE=$?" >> $GITHUB_ENV - continue-on-error: true - name: Test model recalls chat memory id: test_chat_memory @@ -57,17 +51,20 @@ jobs: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} run: | poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4_recall_chat_memory - echo "TEST_CHAT_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV - continue-on-error: true - name: Test model uses 'archival_memory_search' to find secret - id: test_archival_memory + id: test_archival_memory_search env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} run: | poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4_archival_memory_retrieval - echo "TEST_ARCHIVAL_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV - continue-on-error: true + + - name: Test model uses 'archival_memory_insert' to insert archival memories + id: test_archival_memory_insert + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: | + poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4_archival_memory_insert - name: Test model can edit core memories id: test_core_memory @@ -75,8 +72,6 @@ jobs: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} run: | poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4_edit_core_memory - echo "TEST_CORE_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV - continue-on-error: true - name: Test embedding endpoint id: test_embedding_endpoint @@ -84,29 +79,3 @@ jobs: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} run: | poetry run pytest -s -vv tests/test_endpoints.py::test_embedding_endpoint_openai - echo "TEST_EMBEDDING_ENDPOINT_EXIT_CODE=$?" >> $GITHUB_ENV - continue-on-error: true - - - name: Summarize test results - if: always() - run: | - echo "Test Results Summary:" - echo "Test first message: $([[ $TEST_FIRST_MESSAGE_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)" - echo "Test model sends message with keyword: $([[ $TEST_KEYWORD_MESSAGE_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)" - echo "Test model uses external tool: $([[ $TEST_EXTERNAL_TOOL_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)" - echo "Test model recalls chat memory: $([[ $TEST_CHAT_MEMORY_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)" - echo "Test model uses 'archival_memory_search' to find secret: $([[ $TEST_ARCHIVAL_MEMORY_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)" - echo "Test model can edit core memories: $([[ $TEST_CORE_MEMORY_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)" - echo "Test embedding endpoint: $([[ $TEST_EMBEDDING_ENDPOINT_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)" - - # Check if any test failed - if [[ $TEST_FIRST_MESSAGE_EXIT_CODE -ne 0 || \ - $TEST_KEYWORD_MESSAGE_EXIT_CODE -ne 0 || \ - $TEST_EXTERNAL_TOOL_EXIT_CODE -ne 0 || \ - $TEST_CHAT_MEMORY_EXIT_CODE -ne 0 || \ - $TEST_ARCHIVAL_MEMORY_EXIT_CODE -ne 0 || \ - $TEST_CORE_MEMORY_EXIT_CODE -ne 0 || \ - $TEST_EMBEDDING_ENDPOINT_EXIT_CODE -ne 0 ]]; then - echo "Some tests failed." - exit 78 - fi diff --git a/tests/helpers/endpoints_helper.py b/tests/helpers/endpoints_helper.py index 1935ea4b..225b323b 100644 --- a/tests/helpers/endpoints_helper.py +++ b/tests/helpers/endpoints_helper.py @@ -229,6 +229,35 @@ def check_agent_recall_chat_memory(filename: str) -> LettaResponse: return response +def check_agent_archival_memory_insert(filename: str) -> LettaResponse: + """ + Checks that the LLM will execute an archival memory insert. + + Note: This is acting on the Letta response, note the usage of `user_message` + """ + # Set up client + client = create_client() + cleanup(client=client, agent_uuid=agent_uuid) + agent_state = setup_agent(client, filename) + secret_word = "banana" + + response = client.user_message( + agent_id=agent_state.id, + message=f"Please insert the secret word '{secret_word}' into archival memory.", + ) + + # Basic checks + assert_sanity_checks(response) + + # Make sure archival_memory_search was called + assert_invoked_function_call(response.messages, "archival_memory_insert") + + # Make sure some inner monologue is present + assert_inner_monologue_is_present_and_valid(response.messages) + + return response + + def check_agent_archival_memory_retrieval(filename: str) -> LettaResponse: """ Checks that the LLM will execute an archival memory retrieval. diff --git a/tests/test_endpoints.py b/tests/test_endpoints.py index 855db930..08812311 100644 --- a/tests/test_endpoints.py +++ b/tests/test_endpoints.py @@ -3,6 +3,7 @@ import os import time from tests.helpers.endpoints_helper import ( + check_agent_archival_memory_insert, check_agent_archival_memory_retrieval, check_agent_edit_core_memory, check_agent_recall_chat_memory, @@ -93,6 +94,13 @@ def test_openai_gpt_4_archival_memory_retrieval(): print(f"Got successful response from client: \n\n{response}") +def test_openai_gpt_4_archival_memory_insert(): + filename = os.path.join(llm_config_dir, "gpt-4.json") + response = check_agent_archival_memory_insert(filename) + # Log out successful response + print(f"Got successful response from client: \n\n{response}") + + def test_openai_gpt_4_edit_core_memory(): filename = os.path.join(llm_config_dir, "gpt-4.json") response = check_agent_edit_core_memory(filename)