From f3112f75a3bbdbc924caab436e6af5dfefdd3cce Mon Sep 17 00:00:00 2001 From: cthomas Date: Mon, 1 Sep 2025 11:10:31 -0700 Subject: [PATCH] feat: add human in the loop tests to CI (#4335) feat: add human in the loop tests-to-ci --- tests/integration_test_human_in_the_loop.py | 120 +++++++++++--------- 1 file changed, 64 insertions(+), 56 deletions(-) diff --git a/tests/integration_test_human_in_the_loop.py b/tests/integration_test_human_in_the_loop.py index 8f3987fb..dc022ece 100644 --- a/tests/integration_test_human_in_the_loop.py +++ b/tests/integration_test_human_in_the_loop.py @@ -19,26 +19,29 @@ logger = get_logger(__name__) # Helper Functions and Constants # ------------------------------ - -def requires_approval_tool(input_text: str) -> str: - """ - A tool that requires approval before execution. - Args: - input_text (str): The input text to process. - Returns: - str: The processed text with 'APPROVED:' prefix. - """ - return f"APPROVED: {input_text}" - - USER_MESSAGE_OTID = str(uuid.uuid4()) +USER_MESSAGE_CONTENT = "This is an automated test message. Call the get_secret_code_tool to get the code for text 'hello world'." USER_MESSAGE_TEST_APPROVAL: List[MessageCreate] = [ MessageCreate( role="user", - content="This is an automated test message. Call the requires_approval_tool with the text 'test approval'.", + content=USER_MESSAGE_CONTENT, otid=USER_MESSAGE_OTID, ) ] +FAKE_REQUEST_ID = str(uuid.uuid4()) +SECRET_CODE = str(740845635798344975) + + +def get_secret_code_tool(input_text: str) -> str: + """ + A tool that returns the secret code based on the input. This tool requires approval before execution. + Args: + input_text (str): The input text to process. + Returns: + str: The secret code based on the input text. + """ + return str(abs(hash(input_text))) + # ------------------------------ # Fixtures @@ -98,7 +101,7 @@ def approval_tool_fixture(client: Letta): """ client.tools.upsert_base_tools() approval_tool = client.tools.upsert_from_function( - func=requires_approval_tool, + func=get_secret_code_tool, # default_requires_approval=True, switch to this once it is supported in sdk ) yield approval_tool @@ -132,51 +135,66 @@ def agent(client: Letta, approval_tool_fixture) -> AgentState: # ------------------------------ -def test_send_message_with_approval_tool( - disable_e2b_api_key: Any, - client: Letta, - agent: AgentState, -) -> None: - """ - Tests sending a message to an agent with a tool that requires approval. - This test just verifies that the agent can send a message successfully. - The actual approval logic testing will be filled out by the user. - """ - # Attempt to send approval without pending request +def test_send_approval_without_pending_request(client, agent): with pytest.raises(ApiError, match="No tool call is currently awaiting approval"): client.agents.messages.create( agent_id=agent.id, - messages=[ApprovalCreate(approve=True, approval_request_id="fake_id")], + messages=[ApprovalCreate(approve=True, approval_request_id=FAKE_REQUEST_ID)], ) - # Send a simple greeting message to test basic functionality - response = client.agents.messages.create( + +def test_send_user_message_with_pending_request(client, agent): + client.agents.messages.create( agent_id=agent.id, messages=USER_MESSAGE_TEST_APPROVAL, ) - # Basic assertion that we got a response with an approval request - assert response.messages is not None - assert len(response.messages) == 2 - assert response.messages[0].message_type == "reasoning_message" - assert response.messages[1].message_type == "approval_request_message" - approval_request_id = response.messages[0].id - tool_call_id = response.messages[1].tool_call.tool_call_id - - # Attempt to send user message - should fail with pytest.raises(ApiError, match="Please approve or deny the pending request before continuing"): client.agents.messages.create( agent_id=agent.id, messages=[MessageCreate(role="user", content="hi")], ) - # Attempt to send approval with incorrect id + +def test_send_approval_message_with_incorrect_request_id(client, agent): + client.agents.messages.create( + agent_id=agent.id, + messages=USER_MESSAGE_TEST_APPROVAL, + ) + with pytest.raises(ApiError, match="Invalid approval request ID"): client.agents.messages.create( agent_id=agent.id, - messages=[ApprovalCreate(approve=True, approval_request_id="fake_id")], + messages=[ApprovalCreate(approve=True, approval_request_id=FAKE_REQUEST_ID)], ) + +def test_send_message_with_requires_approval_tool( + client: Letta, + agent: AgentState, +) -> None: + response = client.agents.messages.create( + agent_id=agent.id, + messages=USER_MESSAGE_TEST_APPROVAL, + ) + + assert response.messages is not None + assert len(response.messages) == 2 + assert response.messages[0].message_type == "reasoning_message" + assert response.messages[1].message_type == "approval_request_message" + + +def test_approve_tool_call_request( + client: Letta, + agent: AgentState, +) -> None: + response = client.agents.messages.create( + agent_id=agent.id, + messages=USER_MESSAGE_TEST_APPROVAL, + ) + approval_request_id = response.messages[0].id + tool_call_id = response.messages[1].tool_call.tool_call_id + response = client.agents.messages.create( agent_id=agent.id, messages=[ @@ -187,7 +205,6 @@ def test_send_message_with_approval_tool( ], ) - # Basic assertion that we got a response with tool call return assert response.messages is not None assert len(response.messages) == 3 assert response.messages[0].message_type == "tool_return_message" @@ -197,38 +214,28 @@ def test_send_message_with_approval_tool( assert response.messages[2].message_type == "assistant_message" -def test_deny( - disable_e2b_api_key: Any, +def test_deny_tool_call_request( client: Letta, agent: AgentState, ) -> None: - """ - Tests sending a message to an agent with a tool that requires approval. - This test just verifies that the agent can send a message successfully. - The actual approval logic testing will be filled out by the user. - """ - # Send a simple greeting message to test basic functionality response = client.agents.messages.create( agent_id=agent.id, messages=USER_MESSAGE_TEST_APPROVAL, ) - - # Basic assertion that we got a response with an approval request - assert response.messages is not None - assert len(response.messages) == 2 - assert response.messages[0].message_type == "reasoning_message" - assert response.messages[1].message_type == "approval_request_message" approval_request_id = response.messages[0].id tool_call_id = response.messages[1].tool_call.tool_call_id response = client.agents.messages.create( agent_id=agent.id, messages=[ - ApprovalCreate(approve=False, approval_request_id=approval_request_id, reason="No don't do that, the answer is 2"), + ApprovalCreate( + approve=False, + approval_request_id=approval_request_id, + reason=f"You don't need to call the tool, the secret code is {SECRET_CODE}", + ), ], ) - # Basic assertion that we got a response with tool call return assert response.messages is not None assert len(response.messages) == 3 assert response.messages[0].message_type == "tool_return_message" @@ -236,3 +243,4 @@ def test_deny( assert response.messages[0].status == "error" assert response.messages[1].message_type == "reasoning_message" assert response.messages[2].message_type == "assistant_message" + assert SECRET_CODE in response.messages[2].content