feat: add human in the loop tests to CI (#4335)

feat: add human in the loop tests-to-ci
2025-09-01 11:10:31 -07:00
parent 56452ae426
commit f3112f75a3
1 changed files with 64 additions and 56 deletions
--- a/tests/integration_test_human_in_the_loop.py
+++ b/tests/integration_test_human_in_the_loop.py
@@ -19,26 +19,29 @@ logger = get_logger(__name__)
 # Helper Functions and Constants
 # ------------------------------

-
-def requires_approval_tool(input_text: str) -> str:
-    """
-    A tool that requires approval before execution.
-    Args:
-        input_text (str): The input text to process.
-    Returns:
-        str: The processed text with 'APPROVED:' prefix.
-    """
-    return f"APPROVED: {input_text}"
-
-
 USER_MESSAGE_OTID = str(uuid.uuid4())
+USER_MESSAGE_CONTENT = "This is an automated test message. Call the get_secret_code_tool to get the code for text 'hello world'."
 USER_MESSAGE_TEST_APPROVAL: List[MessageCreate] = [
    MessageCreate(
        role="user",
-        content="This is an automated test message. Call the requires_approval_tool with the text 'test approval'.",
+        content=USER_MESSAGE_CONTENT,
        otid=USER_MESSAGE_OTID,
    )
 ]
+FAKE_REQUEST_ID = str(uuid.uuid4())
+SECRET_CODE = str(740845635798344975)
+
+
+def get_secret_code_tool(input_text: str) -> str:
+    """
+    A tool that returns the secret code based on the input. This tool requires approval before execution.
+    Args:
+        input_text (str): The input text to process.
+    Returns:
+        str: The secret code based on the input text.
+    """
+    return str(abs(hash(input_text)))
+

 # ------------------------------
 # Fixtures
@@ -98,7 +101,7 @@ def approval_tool_fixture(client: Letta):
    """
    client.tools.upsert_base_tools()
    approval_tool = client.tools.upsert_from_function(
-        func=requires_approval_tool,
+        func=get_secret_code_tool,
        # default_requires_approval=True, switch to this once it is supported in sdk
    )
    yield approval_tool
@@ -132,51 +135,66 @@ def agent(client: Letta, approval_tool_fixture) -> AgentState:
 # ------------------------------


-def test_send_message_with_approval_tool(
-    disable_e2b_api_key: Any,
-    client: Letta,
-    agent: AgentState,
-) -> None:
-    """
-    Tests sending a message to an agent with a tool that requires approval.
-    This test just verifies that the agent can send a message successfully.
-    The actual approval logic testing will be filled out by the user.
-    """
-    # Attempt to send approval without pending request
+def test_send_approval_without_pending_request(client, agent):
    with pytest.raises(ApiError, match="No tool call is currently awaiting approval"):
        client.agents.messages.create(
            agent_id=agent.id,
-            messages=[ApprovalCreate(approve=True, approval_request_id="fake_id")],
+            messages=[ApprovalCreate(approve=True, approval_request_id=FAKE_REQUEST_ID)],
        )

-    # Send a simple greeting message to test basic functionality
-    response = client.agents.messages.create(
+
+def test_send_user_message_with_pending_request(client, agent):
+    client.agents.messages.create(
        agent_id=agent.id,
        messages=USER_MESSAGE_TEST_APPROVAL,
    )

-    # Basic assertion that we got a response with an approval request
-    assert response.messages is not None
-    assert len(response.messages) == 2
-    assert response.messages[0].message_type == "reasoning_message"
-    assert response.messages[1].message_type == "approval_request_message"
-    approval_request_id = response.messages[0].id
-    tool_call_id = response.messages[1].tool_call.tool_call_id
-
-    # Attempt to send user message - should fail
    with pytest.raises(ApiError, match="Please approve or deny the pending request before continuing"):
        client.agents.messages.create(
            agent_id=agent.id,
            messages=[MessageCreate(role="user", content="hi")],
        )

-    # Attempt to send approval with incorrect id
+
+def test_send_approval_message_with_incorrect_request_id(client, agent):
+    client.agents.messages.create(
+        agent_id=agent.id,
+        messages=USER_MESSAGE_TEST_APPROVAL,
+    )
+
    with pytest.raises(ApiError, match="Invalid approval request ID"):
        client.agents.messages.create(
            agent_id=agent.id,
-            messages=[ApprovalCreate(approve=True, approval_request_id="fake_id")],
+            messages=[ApprovalCreate(approve=True, approval_request_id=FAKE_REQUEST_ID)],
        )

+
+def test_send_message_with_requires_approval_tool(
+    client: Letta,
+    agent: AgentState,
+) -> None:
+    response = client.agents.messages.create(
+        agent_id=agent.id,
+        messages=USER_MESSAGE_TEST_APPROVAL,
+    )
+
+    assert response.messages is not None
+    assert len(response.messages) == 2
+    assert response.messages[0].message_type == "reasoning_message"
+    assert response.messages[1].message_type == "approval_request_message"
+
+
+def test_approve_tool_call_request(
+    client: Letta,
+    agent: AgentState,
+) -> None:
+    response = client.agents.messages.create(
+        agent_id=agent.id,
+        messages=USER_MESSAGE_TEST_APPROVAL,
+    )
+    approval_request_id = response.messages[0].id
+    tool_call_id = response.messages[1].tool_call.tool_call_id
+
    response = client.agents.messages.create(
        agent_id=agent.id,
        messages=[
@@ -187,7 +205,6 @@ def test_send_message_with_approval_tool(
        ],
    )

-    # Basic assertion that we got a response with tool call return
    assert response.messages is not None
    assert len(response.messages) == 3
    assert response.messages[0].message_type == "tool_return_message"
@@ -197,38 +214,28 @@ def test_send_message_with_approval_tool(
    assert response.messages[2].message_type == "assistant_message"


-def test_deny(
-    disable_e2b_api_key: Any,
+def test_deny_tool_call_request(
    client: Letta,
    agent: AgentState,
 ) -> None:
-    """
-    Tests sending a message to an agent with a tool that requires approval.
-    This test just verifies that the agent can send a message successfully.
-    The actual approval logic testing will be filled out by the user.
-    """
-    # Send a simple greeting message to test basic functionality
    response = client.agents.messages.create(
        agent_id=agent.id,
        messages=USER_MESSAGE_TEST_APPROVAL,
    )
-
-    # Basic assertion that we got a response with an approval request
-    assert response.messages is not None
-    assert len(response.messages) == 2
-    assert response.messages[0].message_type == "reasoning_message"
-    assert response.messages[1].message_type == "approval_request_message"
    approval_request_id = response.messages[0].id
    tool_call_id = response.messages[1].tool_call.tool_call_id

    response = client.agents.messages.create(
        agent_id=agent.id,
        messages=[
-            ApprovalCreate(approve=False, approval_request_id=approval_request_id, reason="No don't do that, the answer is 2"),
+            ApprovalCreate(
+                approve=False,
+                approval_request_id=approval_request_id,
+                reason=f"You don't need to call the tool, the secret code is {SECRET_CODE}",
+            ),
        ],
    )

-    # Basic assertion that we got a response with tool call return
    assert response.messages is not None
    assert len(response.messages) == 3
    assert response.messages[0].message_type == "tool_return_message"
@@ -236,3 +243,4 @@ def test_deny(
    assert response.messages[0].status == "error"
    assert response.messages[1].message_type == "reasoning_message"
    assert response.messages[2].message_type == "assistant_message"
+    assert SECRET_CODE in response.messages[2].content