From e6fdf80971e110693269b6f3e23517832ac683fe Mon Sep 17 00:00:00 2001
From: Matthew Zhou <mattzh1314@gmail.com>
Date: Thu, 16 Jan 2025 09:17:06 -1000
Subject: [PATCH] fix: Add retry decorator to stochastic tests (#678)

---
 tests/helpers/utils.py                | 64 ++++++++++++++++++++++++++
 tests/test_base_functions.py          | 12 +++--
 tests/test_model_letta_performance.py | 65 +--------------------------
 3 files changed, 73 insertions(+), 68 deletions(-)

diff --git a/tests/helpers/utils.py b/tests/helpers/utils.py
index a1f13820..765c4612 100644
--- a/tests/helpers/utils.py
+++ b/tests/helpers/utils.py
@@ -1,3 +1,5 @@
+import functools
+import time
 from typing import Union
 
 from letta import LocalClient, RESTClient
@@ -8,6 +10,68 @@ from letta.schemas.tool import Tool
 from letta.schemas.user import User as PydanticUser
 
 
+def retry_until_threshold(threshold=0.5, max_attempts=10, sleep_time_seconds=4):
+    """
+    Decorator to retry a test until a failure threshold is crossed.
+
+    :param threshold: Expected passing rate (e.g., 0.5 means 50% success rate expected).
+    :param max_attempts: Maximum number of attempts to retry the test.
+    """
+
+    def decorator_retry(func):
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            success_count = 0
+            failure_count = 0
+
+            for attempt in range(max_attempts):
+                try:
+                    func(*args, **kwargs)
+                    success_count += 1
+                except Exception as e:
+                    failure_count += 1
+                    print(f"\033[93mAn attempt failed with error:\n{e}\033[0m")
+
+                time.sleep(sleep_time_seconds)
+
+            rate = success_count / max_attempts
+            if rate >= threshold:
+                print(f"Test met expected passing rate of {threshold:.2f}. Actual rate: {success_count}/{max_attempts}")
+            else:
+                raise AssertionError(
+                    f"Test did not meet expected passing rate of {threshold:.2f}. Actual rate: {success_count}/{max_attempts}"
+                )
+
+        return wrapper
+
+    return decorator_retry
+
+
+def retry_until_success(max_attempts=10, sleep_time_seconds=4):
+    """
+    Decorator to retry a function until it succeeds or the maximum number of attempts is reached.
+
+    :param max_attempts: Maximum number of attempts to retry the function.
+    :param sleep_time_seconds: Time to wait between attempts, in seconds.
+    """
+
+    def decorator_retry(func):
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            for attempt in range(1, max_attempts + 1):
+                try:
+                    return func(*args, **kwargs)
+                except Exception as e:
+                    print(f"\033[93mAttempt {attempt} failed with error:\n{e}\033[0m")
+                    if attempt == max_attempts:
+                        raise
+                    time.sleep(sleep_time_seconds)
+
+        return wrapper
+
+    return decorator_retry
+
+
 def cleanup(client: Union[LocalClient, RESTClient], agent_uuid: str):
     # Clear all agents
     for agent_state in client.list_agents():
diff --git a/tests/test_base_functions.py b/tests/test_base_functions.py
index 258083d8..8736825b 100644
--- a/tests/test_base_functions.py
+++ b/tests/test_base_functions.py
@@ -9,6 +9,7 @@ from letta import LocalClient, create_client
 from letta.schemas.embedding_config import EmbeddingConfig
 from letta.schemas.letta_message import ToolReturnMessage
 from letta.schemas.llm_config import LLMConfig
+from tests.helpers.utils import retry_until_success
 
 
 @pytest.fixture(scope="module")
@@ -115,6 +116,8 @@ def test_recall(client, agent_obj):
     assert keyword in result
 
 
+# This test is nondeterministic, so we retry until we get the perfect behavior from the LLM
+@retry_until_success(max_attempts=5, sleep_time_seconds=2)
 def test_send_message_to_agent(client, agent_obj, other_agent_obj):
     long_random_string = "".join(secrets.choice(string.ascii_letters + string.digits) for _ in range(10))
 
@@ -148,6 +151,8 @@ def test_send_message_to_agent(client, agent_obj, other_agent_obj):
     print(response.messages)
 
 
+# This test is nondeterministic, so we retry until we get the perfect behavior from the LLM
+@retry_until_success(max_attempts=5, sleep_time_seconds=2)
 def test_send_message_to_agents_with_tags(client):
     worker_tags = ["worker", "user-456"]
 
@@ -195,10 +200,9 @@ def test_send_message_to_agents_with_tags(client):
             break
 
     # Conversation search the worker agents
-    # TODO: This search if flaky for some reason
-    # for agent in worker_agents:
-    #     result = base_functions.conversation_search(agent, long_random_string)
-    #     assert long_random_string in result
+    for agent in worker_agents:
+        result = base_functions.conversation_search(agent, long_random_string)
+        assert long_random_string in result
 
     # Test that the agent can still receive messages fine
     response = client.send_message(agent_id=manager_agent.agent_state.id, role="user", message="So what did the other agents say?")
diff --git a/tests/test_model_letta_performance.py b/tests/test_model_letta_performance.py
index d20d64ca..4d72126f 100644
--- a/tests/test_model_letta_performance.py
+++ b/tests/test_model_letta_performance.py
@@ -1,6 +1,4 @@
-import functools
 import os
-import time
 
 import pytest
 
@@ -13,74 +11,13 @@ from tests.helpers.endpoints_helper import (
     check_first_response_is_valid_for_llm_endpoint,
     run_embedding_endpoint,
 )
+from tests.helpers.utils import retry_until_success, retry_until_threshold
 
 # directories
 embedding_config_dir = "tests/configs/embedding_model_configs"
 llm_config_dir = "tests/configs/llm_model_configs"
 
 
-def retry_until_threshold(threshold=0.5, max_attempts=10, sleep_time_seconds=4):
-    """
-    Decorator to retry a test until a failure threshold is crossed.
-
-    :param threshold: Expected passing rate (e.g., 0.5 means 50% success rate expected).
-    :param max_attempts: Maximum number of attempts to retry the test.
-    """
-
-    def decorator_retry(func):
-        @functools.wraps(func)
-        def wrapper(*args, **kwargs):
-            success_count = 0
-            failure_count = 0
-
-            for attempt in range(max_attempts):
-                try:
-                    func(*args, **kwargs)
-                    success_count += 1
-                except Exception as e:
-                    failure_count += 1
-                    print(f"\033[93mAn attempt failed with error:\n{e}\033[0m")
-
-                time.sleep(sleep_time_seconds)
-
-            rate = success_count / max_attempts
-            if rate >= threshold:
-                print(f"Test met expected passing rate of {threshold:.2f}. Actual rate: {success_count}/{max_attempts}")
-            else:
-                raise AssertionError(
-                    f"Test did not meet expected passing rate of {threshold:.2f}. Actual rate: {success_count}/{max_attempts}"
-                )
-
-        return wrapper
-
-    return decorator_retry
-
-
-def retry_until_success(max_attempts=10, sleep_time_seconds=4):
-    """
-    Decorator to retry a function until it succeeds or the maximum number of attempts is reached.
-
-    :param max_attempts: Maximum number of attempts to retry the function.
-    :param sleep_time_seconds: Time to wait between attempts, in seconds.
-    """
-
-    def decorator_retry(func):
-        @functools.wraps(func)
-        def wrapper(*args, **kwargs):
-            for attempt in range(1, max_attempts + 1):
-                try:
-                    return func(*args, **kwargs)
-                except Exception as e:
-                    print(f"\033[93mAttempt {attempt} failed with error:\n{e}\033[0m")
-                    if attempt == max_attempts:
-                        raise
-                    time.sleep(sleep_time_seconds)
-
-        return wrapper
-
-    return decorator_retry
-
-
 # ======================================================================================================================
 # OPENAI TESTS
 # ======================================================================================================================