From 9acdaacc7c325836bfe0e160a805ed80d7a48682 Mon Sep 17 00:00:00 2001
From: Matthew Zhou <mattzh1314@gmail.com>
Date: Thu, 24 Oct 2024 15:54:29 -0700
Subject: [PATCH] test: Add archival insert test to GPT-4 and make tests
 failure sensitive (#1930)

---
 .github/workflows/test_openai.yml | 47 ++++++-------------------------
 tests/helpers/endpoints_helper.py | 29 +++++++++++++++++++
 tests/test_endpoints.py           |  8 ++++++
 3 files changed, 45 insertions(+), 39 deletions(-)

diff --git a/.github/workflows/test_openai.yml b/.github/workflows/test_openai.yml
index f5957998..975d17b3 100644
--- a/.github/workflows/test_openai.yml
+++ b/.github/workflows/test_openai.yml
@@ -30,8 +30,6 @@ jobs:
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       run: |
         poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4_returns_valid_first_message
-        echo "TEST_FIRST_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
-      continue-on-error: true
 
     - name: Test model sends message with keyword
       id: test_keyword_message
@@ -39,8 +37,6 @@ jobs:
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       run: |
         poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4_returns_keyword
-        echo "TEST_KEYWORD_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
-      continue-on-error: true
 
     - name: Test model uses external tool correctly
       id: test_external_tool
@@ -48,8 +44,6 @@ jobs:
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       run: |
         poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4_uses_external_tool
-        echo "TEST_EXTERNAL_TOOL_EXIT_CODE=$?" >> $GITHUB_ENV
-      continue-on-error: true
 
     - name: Test model recalls chat memory
       id: test_chat_memory
@@ -57,17 +51,20 @@ jobs:
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       run: |
         poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4_recall_chat_memory
-        echo "TEST_CHAT_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
-      continue-on-error: true
 
     - name: Test model uses 'archival_memory_search' to find secret
-      id: test_archival_memory
+      id: test_archival_memory_search
       env:
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       run: |
         poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4_archival_memory_retrieval
-        echo "TEST_ARCHIVAL_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
-      continue-on-error: true
+
+    - name: Test model uses 'archival_memory_insert' to insert archival memories
+      id: test_archival_memory_insert
+      env:
+        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+      run: |
+        poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4_archival_memory_insert
 
     - name: Test model can edit core memories
       id: test_core_memory
@@ -75,8 +72,6 @@ jobs:
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       run: |
         poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4_edit_core_memory
-        echo "TEST_CORE_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
-      continue-on-error: true
 
     - name: Test embedding endpoint
       id: test_embedding_endpoint
@@ -84,29 +79,3 @@ jobs:
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       run: |
         poetry run pytest -s -vv tests/test_endpoints.py::test_embedding_endpoint_openai
-        echo "TEST_EMBEDDING_ENDPOINT_EXIT_CODE=$?" >> $GITHUB_ENV
-      continue-on-error: true
-
-    - name: Summarize test results
-      if: always()
-      run: |
-        echo "Test Results Summary:"
-        echo "Test first message: $([[ $TEST_FIRST_MESSAGE_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)"
-        echo "Test model sends message with keyword: $([[ $TEST_KEYWORD_MESSAGE_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)"
-        echo "Test model uses external tool: $([[ $TEST_EXTERNAL_TOOL_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)"
-        echo "Test model recalls chat memory: $([[ $TEST_CHAT_MEMORY_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)"
-        echo "Test model uses 'archival_memory_search' to find secret: $([[ $TEST_ARCHIVAL_MEMORY_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)"
-        echo "Test model can edit core memories: $([[ $TEST_CORE_MEMORY_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)"
-        echo "Test embedding endpoint: $([[ $TEST_EMBEDDING_ENDPOINT_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)"
-
-        # Check if any test failed
-        if [[ $TEST_FIRST_MESSAGE_EXIT_CODE -ne 0 || \
-              $TEST_KEYWORD_MESSAGE_EXIT_CODE -ne 0 || \
-              $TEST_EXTERNAL_TOOL_EXIT_CODE -ne 0 || \
-              $TEST_CHAT_MEMORY_EXIT_CODE -ne 0 || \
-              $TEST_ARCHIVAL_MEMORY_EXIT_CODE -ne 0 || \
-              $TEST_CORE_MEMORY_EXIT_CODE -ne 0 || \
-              $TEST_EMBEDDING_ENDPOINT_EXIT_CODE -ne 0 ]]; then
-          echo "Some tests failed."
-          exit 78
-        fi
diff --git a/tests/helpers/endpoints_helper.py b/tests/helpers/endpoints_helper.py
index 1935ea4b..225b323b 100644
--- a/tests/helpers/endpoints_helper.py
+++ b/tests/helpers/endpoints_helper.py
@@ -229,6 +229,35 @@ def check_agent_recall_chat_memory(filename: str) -> LettaResponse:
     return response
 
 
+def check_agent_archival_memory_insert(filename: str) -> LettaResponse:
+    """
+    Checks that the LLM will execute an archival memory insert.
+
+    Note: This is acting on the Letta response, note the usage of `user_message`
+    """
+    # Set up client
+    client = create_client()
+    cleanup(client=client, agent_uuid=agent_uuid)
+    agent_state = setup_agent(client, filename)
+    secret_word = "banana"
+
+    response = client.user_message(
+        agent_id=agent_state.id,
+        message=f"Please insert the secret word '{secret_word}' into archival memory.",
+    )
+
+    # Basic checks
+    assert_sanity_checks(response)
+
+    # Make sure archival_memory_search was called
+    assert_invoked_function_call(response.messages, "archival_memory_insert")
+
+    # Make sure some inner monologue is present
+    assert_inner_monologue_is_present_and_valid(response.messages)
+
+    return response
+
+
 def check_agent_archival_memory_retrieval(filename: str) -> LettaResponse:
     """
     Checks that the LLM will execute an archival memory retrieval.
diff --git a/tests/test_endpoints.py b/tests/test_endpoints.py
index 855db930..08812311 100644
--- a/tests/test_endpoints.py
+++ b/tests/test_endpoints.py
@@ -3,6 +3,7 @@ import os
 import time
 
 from tests.helpers.endpoints_helper import (
+    check_agent_archival_memory_insert,
     check_agent_archival_memory_retrieval,
     check_agent_edit_core_memory,
     check_agent_recall_chat_memory,
@@ -93,6 +94,13 @@ def test_openai_gpt_4_archival_memory_retrieval():
     print(f"Got successful response from client: \n\n{response}")
 
 
+def test_openai_gpt_4_archival_memory_insert():
+    filename = os.path.join(llm_config_dir, "gpt-4.json")
+    response = check_agent_archival_memory_insert(filename)
+    # Log out successful response
+    print(f"Got successful response from client: \n\n{response}")
+
+
 def test_openai_gpt_4_edit_core_memory():
     filename = os.path.join(llm_config_dir, "gpt-4.json")
     response = check_agent_edit_core_memory(filename)