From f3112f75a3bbdbc924caab436e6af5dfefdd3cce Mon Sep 17 00:00:00 2001
From: cthomas <caren@letta.com>
Date: Mon, 1 Sep 2025 11:10:31 -0700
Subject: [PATCH] feat: add human in the loop tests to CI (#4335)

feat: add human in the loop tests-to-ci
---
 tests/integration_test_human_in_the_loop.py | 120 +++++++++++---------
 1 file changed, 64 insertions(+), 56 deletions(-)

diff --git a/tests/integration_test_human_in_the_loop.py b/tests/integration_test_human_in_the_loop.py
index 8f3987fb..dc022ece 100644
--- a/tests/integration_test_human_in_the_loop.py
+++ b/tests/integration_test_human_in_the_loop.py
@@ -19,26 +19,29 @@ logger = get_logger(__name__)
 # Helper Functions and Constants
 # ------------------------------
 
-
-def requires_approval_tool(input_text: str) -> str:
-    """
-    A tool that requires approval before execution.
-    Args:
-        input_text (str): The input text to process.
-    Returns:
-        str: The processed text with 'APPROVED:' prefix.
-    """
-    return f"APPROVED: {input_text}"
-
-
 USER_MESSAGE_OTID = str(uuid.uuid4())
+USER_MESSAGE_CONTENT = "This is an automated test message. Call the get_secret_code_tool to get the code for text 'hello world'."
 USER_MESSAGE_TEST_APPROVAL: List[MessageCreate] = [
     MessageCreate(
         role="user",
-        content="This is an automated test message. Call the requires_approval_tool with the text 'test approval'.",
+        content=USER_MESSAGE_CONTENT,
         otid=USER_MESSAGE_OTID,
     )
 ]
+FAKE_REQUEST_ID = str(uuid.uuid4())
+SECRET_CODE = str(740845635798344975)
+
+
+def get_secret_code_tool(input_text: str) -> str:
+    """
+    A tool that returns the secret code based on the input. This tool requires approval before execution.
+    Args:
+        input_text (str): The input text to process.
+    Returns:
+        str: The secret code based on the input text.
+    """
+    return str(abs(hash(input_text)))
+
 
 # ------------------------------
 # Fixtures
@@ -98,7 +101,7 @@ def approval_tool_fixture(client: Letta):
     """
     client.tools.upsert_base_tools()
     approval_tool = client.tools.upsert_from_function(
-        func=requires_approval_tool,
+        func=get_secret_code_tool,
         # default_requires_approval=True, switch to this once it is supported in sdk
     )
     yield approval_tool
@@ -132,51 +135,66 @@ def agent(client: Letta, approval_tool_fixture) -> AgentState:
 # ------------------------------
 
 
-def test_send_message_with_approval_tool(
-    disable_e2b_api_key: Any,
-    client: Letta,
-    agent: AgentState,
-) -> None:
-    """
-    Tests sending a message to an agent with a tool that requires approval.
-    This test just verifies that the agent can send a message successfully.
-    The actual approval logic testing will be filled out by the user.
-    """
-    # Attempt to send approval without pending request
+def test_send_approval_without_pending_request(client, agent):
     with pytest.raises(ApiError, match="No tool call is currently awaiting approval"):
         client.agents.messages.create(
             agent_id=agent.id,
-            messages=[ApprovalCreate(approve=True, approval_request_id="fake_id")],
+            messages=[ApprovalCreate(approve=True, approval_request_id=FAKE_REQUEST_ID)],
         )
 
-    # Send a simple greeting message to test basic functionality
-    response = client.agents.messages.create(
+
+def test_send_user_message_with_pending_request(client, agent):
+    client.agents.messages.create(
         agent_id=agent.id,
         messages=USER_MESSAGE_TEST_APPROVAL,
     )
 
-    # Basic assertion that we got a response with an approval request
-    assert response.messages is not None
-    assert len(response.messages) == 2
-    assert response.messages[0].message_type == "reasoning_message"
-    assert response.messages[1].message_type == "approval_request_message"
-    approval_request_id = response.messages[0].id
-    tool_call_id = response.messages[1].tool_call.tool_call_id
-
-    # Attempt to send user message - should fail
     with pytest.raises(ApiError, match="Please approve or deny the pending request before continuing"):
         client.agents.messages.create(
             agent_id=agent.id,
             messages=[MessageCreate(role="user", content="hi")],
         )
 
-    # Attempt to send approval with incorrect id
+
+def test_send_approval_message_with_incorrect_request_id(client, agent):
+    client.agents.messages.create(
+        agent_id=agent.id,
+        messages=USER_MESSAGE_TEST_APPROVAL,
+    )
+
     with pytest.raises(ApiError, match="Invalid approval request ID"):
         client.agents.messages.create(
             agent_id=agent.id,
-            messages=[ApprovalCreate(approve=True, approval_request_id="fake_id")],
+            messages=[ApprovalCreate(approve=True, approval_request_id=FAKE_REQUEST_ID)],
         )
 
+
+def test_send_message_with_requires_approval_tool(
+    client: Letta,
+    agent: AgentState,
+) -> None:
+    response = client.agents.messages.create(
+        agent_id=agent.id,
+        messages=USER_MESSAGE_TEST_APPROVAL,
+    )
+
+    assert response.messages is not None
+    assert len(response.messages) == 2
+    assert response.messages[0].message_type == "reasoning_message"
+    assert response.messages[1].message_type == "approval_request_message"
+
+
+def test_approve_tool_call_request(
+    client: Letta,
+    agent: AgentState,
+) -> None:
+    response = client.agents.messages.create(
+        agent_id=agent.id,
+        messages=USER_MESSAGE_TEST_APPROVAL,
+    )
+    approval_request_id = response.messages[0].id
+    tool_call_id = response.messages[1].tool_call.tool_call_id
+
     response = client.agents.messages.create(
         agent_id=agent.id,
         messages=[
@@ -187,7 +205,6 @@ def test_send_message_with_approval_tool(
         ],
     )
 
-    # Basic assertion that we got a response with tool call return
     assert response.messages is not None
     assert len(response.messages) == 3
     assert response.messages[0].message_type == "tool_return_message"
@@ -197,38 +214,28 @@ def test_send_message_with_approval_tool(
     assert response.messages[2].message_type == "assistant_message"
 
 
-def test_deny(
-    disable_e2b_api_key: Any,
+def test_deny_tool_call_request(
     client: Letta,
     agent: AgentState,
 ) -> None:
-    """
-    Tests sending a message to an agent with a tool that requires approval.
-    This test just verifies that the agent can send a message successfully.
-    The actual approval logic testing will be filled out by the user.
-    """
-    # Send a simple greeting message to test basic functionality
     response = client.agents.messages.create(
         agent_id=agent.id,
         messages=USER_MESSAGE_TEST_APPROVAL,
     )
-
-    # Basic assertion that we got a response with an approval request
-    assert response.messages is not None
-    assert len(response.messages) == 2
-    assert response.messages[0].message_type == "reasoning_message"
-    assert response.messages[1].message_type == "approval_request_message"
     approval_request_id = response.messages[0].id
     tool_call_id = response.messages[1].tool_call.tool_call_id
 
     response = client.agents.messages.create(
         agent_id=agent.id,
         messages=[
-            ApprovalCreate(approve=False, approval_request_id=approval_request_id, reason="No don't do that, the answer is 2"),
+            ApprovalCreate(
+                approve=False,
+                approval_request_id=approval_request_id,
+                reason=f"You don't need to call the tool, the secret code is {SECRET_CODE}",
+            ),
         ],
     )
 
-    # Basic assertion that we got a response with tool call return
     assert response.messages is not None
     assert len(response.messages) == 3
     assert response.messages[0].message_type == "tool_return_message"
@@ -236,3 +243,4 @@ def test_deny(
     assert response.messages[0].status == "error"
     assert response.messages[1].message_type == "reasoning_message"
     assert response.messages[2].message_type == "assistant_message"
+    assert SECRET_CODE in response.messages[2].content