From 6bcec854d6746cf770ddb8d2a42eb86e443f8a6d Mon Sep 17 00:00:00 2001
From: Matt Zhou <mattzhou@Matts-MacBook-Pro.local>
Date: Fri, 4 Oct 2024 15:19:40 -0700
Subject: [PATCH] Add printed out responses for easier debugging from tests

---
 letta/schemas/block.py            |  2 +-
 letta/schemas/letta_response.py   | 11 ++++++++
 letta/server/server.py            |  2 +-
 tests/conftest.py                 |  5 ++++
 tests/helpers/endpoints_helper.py | 44 +++++++++++++++++++++++++------
 tests/test_endpoints.py           | 26 ++++++++++++++----
 6 files changed, 75 insertions(+), 15 deletions(-)
 create mode 100644 tests/conftest.py

diff --git a/letta/schemas/block.py b/letta/schemas/block.py
index b07172f4..8af2f47c 100644
--- a/letta/schemas/block.py
+++ b/letta/schemas/block.py
@@ -53,7 +53,7 @@ class BaseBlock(LettaBase, validate_assignment=True):
         super().__setattr__(name, value)
         if name == "value":
             # run validation
-            self.__class__.validate(self.dict(exclude_unset=True))
+            self.__class__.model_validate(self.model_dump(exclude_unset=True))
 
 
 class Block(BaseBlock):
diff --git a/letta/schemas/letta_response.py b/letta/schemas/letta_response.py
index d8b758a7..98dbec8f 100644
--- a/letta/schemas/letta_response.py
+++ b/letta/schemas/letta_response.py
@@ -1,3 +1,4 @@
+import json
 from typing import List, Union
 
 from pydantic import BaseModel, Field
@@ -23,6 +24,16 @@ class LettaResponse(BaseModel):
     messages: Union[List[Message], List[LettaMessage]] = Field(..., description="The messages returned by the agent.")
     usage: LettaUsageStatistics = Field(..., description="The usage statistics of the agent.")
 
+    def __str__(self):
+        return json.dumps(
+            {
+                "messages": [message.model_dump() for message in self.messages],
+                # Assume `Message` and `LettaMessage` have a `dict()` method
+                "usage": self.usage.model_dump(),  # Assume `LettaUsageStatistics` has a `dict()` method
+            },
+            indent=4,
+        )
+
 
 # The streaming response is either [DONE], [DONE_STEP], [DONE], an error, or a LettaMessage
 LettaStreamingResponse = Union[LettaMessage, MessageStreamStatus]
diff --git a/letta/server/server.py b/letta/server/server.py
index 80b4c4f1..2f5c9f03 100644
--- a/letta/server/server.py
+++ b/letta/server/server.py
@@ -456,7 +456,7 @@ class SyncServer(Server):
             logger.debug("Calling step_yield()")
             letta_agent.interface.step_yield()
 
-        return LettaUsageStatistics(**total_usage.dict(), step_count=step_count)
+        return LettaUsageStatistics(**total_usage.model_dump(), step_count=step_count)
 
     def _command(self, user_id: str, agent_id: str, command: str) -> LettaUsageStatistics:
         """Process a CLI command"""
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 00000000..90916a6e
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,5 @@
+import logging
+
+
+def pytest_configure(config):
+    logging.basicConfig(level=logging.DEBUG)
diff --git a/tests/helpers/endpoints_helper.py b/tests/helpers/endpoints_helper.py
index dee55e84..5149eec5 100644
--- a/tests/helpers/endpoints_helper.py
+++ b/tests/helpers/endpoints_helper.py
@@ -1,7 +1,11 @@
 import json
+import logging
 import uuid
 from typing import Callable, List, Optional, Union
 
+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger(__name__)
+
 from letta import LocalClient, RESTClient, create_client
 from letta.agent import Agent
 from letta.config import LettaConfig
@@ -26,7 +30,12 @@ from letta.schemas.letta_message import (
 from letta.schemas.letta_response import LettaResponse
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.memory import ChatMemory
-from letta.schemas.openai.chat_completion_response import Choice, FunctionCall, Message
+from letta.schemas.openai.chat_completion_response import (
+    ChatCompletionResponse,
+    Choice,
+    FunctionCall,
+    Message,
+)
 from letta.utils import get_human_text, get_persona_text
 from tests.helpers.utils import cleanup
 
@@ -68,7 +77,13 @@ def setup_agent(
     return agent_state
 
 
-def check_first_response_is_valid_for_llm_endpoint(filename: str, inner_thoughts_in_kwargs: bool = False):
+# ======================================================================================================================
+# Section: Complex E2E Tests
+# These functions describe individual testing scenarios.
+# ======================================================================================================================
+
+
+def check_first_response_is_valid_for_llm_endpoint(filename: str, inner_thoughts_in_kwargs: bool = False) -> ChatCompletionResponse:
     """
     Checks that the first response is valid:
 
@@ -110,8 +125,10 @@ def check_first_response_is_valid_for_llm_endpoint(filename: str, inner_thoughts
     # Assert that the message has an inner monologue
     assert_contains_correct_inner_monologue(choice, inner_thoughts_in_kwargs)
 
+    return response
 
-def check_response_contains_keyword(filename: str):
+
+def check_response_contains_keyword(filename: str, keyword="banana") -> LettaResponse:
     """
     Checks that the prompted response from the LLM contains a chosen keyword
 
@@ -121,7 +138,6 @@ def check_response_contains_keyword(filename: str):
     cleanup(client=client, agent_uuid=agent_uuid)
     agent_state = setup_agent(client, filename)
 
-    keyword = "banana"
     keyword_message = f'This is a test to see if you can see my message. If you can see my message, please respond by calling send_message using a message that includes the word "{keyword}"'
     response = client.user_message(agent_id=agent_state.id, message=keyword_message)
 
@@ -134,8 +150,10 @@ def check_response_contains_keyword(filename: str):
     # Make sure some inner monologue is present
     assert_inner_monologue_is_present_and_valid(response.messages)
 
+    return response
 
-def check_agent_uses_external_tool(filename: str):
+
+def check_agent_uses_external_tool(filename: str) -> LettaResponse:
     """
     Checks that the LLM will use external tools if instructed
 
@@ -177,8 +195,10 @@ def check_agent_uses_external_tool(filename: str):
     # Make sure some inner monologue is present
     assert_inner_monologue_is_present_and_valid(response.messages)
 
+    return response
 
-def check_agent_recall_chat_memory(filename: str):
+
+def check_agent_recall_chat_memory(filename: str) -> LettaResponse:
     """
     Checks that the LLM will recall the chat memory, specifically the human persona.
 
@@ -202,8 +222,10 @@ def check_agent_recall_chat_memory(filename: str):
     # Make sure some inner monologue is present
     assert_inner_monologue_is_present_and_valid(response.messages)
 
+    return response
 
-def check_agent_archival_memory_retrieval(filename: str):
+
+def check_agent_archival_memory_retrieval(filename: str) -> LettaResponse:
     """
     Checks that the LLM will execute an archival memory retrieval.
 
@@ -230,6 +252,8 @@ def check_agent_archival_memory_retrieval(filename: str):
     # Make sure some inner monologue is present
     assert_inner_monologue_is_present_and_valid(response.messages)
 
+    return response
+
 
 def run_embedding_endpoint(filename):
     # load JSON file
@@ -255,7 +279,7 @@ def assert_sanity_checks(response: LettaResponse):
     assert len(response.messages) > 0
 
 
-def assert_invoked_send_message_with_keyword(messages: List[LettaMessage], keyword: str) -> None:
+def assert_invoked_send_message_with_keyword(messages: List[LettaMessage], keyword: str, case_sensitive: bool = False) -> None:
     # Find first instance of send_message
     target_message = None
     for message in messages:
@@ -280,6 +304,10 @@ def assert_invoked_send_message_with_keyword(messages: List[LettaMessage], keywo
         )
 
     # Check that the keyword is in the message arguments
+    if not case_sensitive:
+        keyword = keyword.lower()
+        arguments["message"] = arguments["message"].lower()
+
     if not keyword in arguments["message"]:
         raise InvalidFunctionCallError(messages=[target_message], explanation=f"Message argument did not contain keyword={keyword}")
 
diff --git a/tests/test_endpoints.py b/tests/test_endpoints.py
index 3e5e3d0f..33be28df 100644
--- a/tests/test_endpoints.py
+++ b/tests/test_endpoints.py
@@ -19,27 +19,43 @@ llm_config_dir = "configs/llm_model_configs"
 # ======================================================================================================================
 def test_openai_gpt_4_returns_valid_first_message():
     filename = os.path.join(llm_config_dir, "gpt-4.json")
-    check_first_response_is_valid_for_llm_endpoint(filename)
+    response = check_first_response_is_valid_for_llm_endpoint(filename)
+
+    # Log out successful response
+    print(f"Got successful response from client: \n\n{response}")
 
 
 def test_openai_gpt_4_returns_keyword():
+    keyword = "banana"
     filename = os.path.join(llm_config_dir, "gpt-4.json")
-    check_response_contains_keyword(filename)
+    response = check_response_contains_keyword(filename, keyword=keyword)
+
+    # Log out successful response
+    print(f"Got successful response from client: \n\n{response}")
 
 
 def test_openai_gpt_4_uses_external_tool():
     filename = os.path.join(llm_config_dir, "gpt-4.json")
-    check_agent_uses_external_tool(filename)
+    response = check_agent_uses_external_tool(filename)
+
+    # Log out successful response
+    print(f"Got successful response from client: \n\n{response}")
 
 
 def test_openai_gpt_4_recall_chat_memory():
     filename = os.path.join(llm_config_dir, "gpt-4.json")
-    check_agent_recall_chat_memory(filename)
+    response = check_agent_recall_chat_memory(filename)
+
+    # Log out successful response
+    print(f"Got successful response from client: \n\n{response}")
 
 
 def test_openai_gpt_4_archival_memory_retrieval():
     filename = os.path.join(llm_config_dir, "gpt-4.json")
-    check_agent_archival_memory_retrieval(filename)
+    response = check_agent_archival_memory_retrieval(filename)
+
+    # Log out successful response
+    print(f"Got successful response from client: \n\n{response}")
 
 
 def test_embedding_endpoint_openai():