feat: add human in the loop tests to CI (#4335)
feat: add human in the loop tests-to-ci
This commit is contained in:
@@ -19,26 +19,29 @@ logger = get_logger(__name__)
|
||||
# Helper Functions and Constants
|
||||
# ------------------------------
|
||||
|
||||
|
||||
def requires_approval_tool(input_text: str) -> str:
|
||||
"""
|
||||
A tool that requires approval before execution.
|
||||
Args:
|
||||
input_text (str): The input text to process.
|
||||
Returns:
|
||||
str: The processed text with 'APPROVED:' prefix.
|
||||
"""
|
||||
return f"APPROVED: {input_text}"
|
||||
|
||||
|
||||
USER_MESSAGE_OTID = str(uuid.uuid4())
|
||||
USER_MESSAGE_CONTENT = "This is an automated test message. Call the get_secret_code_tool to get the code for text 'hello world'."
|
||||
USER_MESSAGE_TEST_APPROVAL: List[MessageCreate] = [
|
||||
MessageCreate(
|
||||
role="user",
|
||||
content="This is an automated test message. Call the requires_approval_tool with the text 'test approval'.",
|
||||
content=USER_MESSAGE_CONTENT,
|
||||
otid=USER_MESSAGE_OTID,
|
||||
)
|
||||
]
|
||||
FAKE_REQUEST_ID = str(uuid.uuid4())
|
||||
SECRET_CODE = str(740845635798344975)
|
||||
|
||||
|
||||
def get_secret_code_tool(input_text: str) -> str:
|
||||
"""
|
||||
A tool that returns the secret code based on the input. This tool requires approval before execution.
|
||||
Args:
|
||||
input_text (str): The input text to process.
|
||||
Returns:
|
||||
str: The secret code based on the input text.
|
||||
"""
|
||||
return str(abs(hash(input_text)))
|
||||
|
||||
|
||||
# ------------------------------
|
||||
# Fixtures
|
||||
@@ -98,7 +101,7 @@ def approval_tool_fixture(client: Letta):
|
||||
"""
|
||||
client.tools.upsert_base_tools()
|
||||
approval_tool = client.tools.upsert_from_function(
|
||||
func=requires_approval_tool,
|
||||
func=get_secret_code_tool,
|
||||
# default_requires_approval=True, switch to this once it is supported in sdk
|
||||
)
|
||||
yield approval_tool
|
||||
@@ -132,51 +135,66 @@ def agent(client: Letta, approval_tool_fixture) -> AgentState:
|
||||
# ------------------------------
|
||||
|
||||
|
||||
def test_send_message_with_approval_tool(
|
||||
disable_e2b_api_key: Any,
|
||||
client: Letta,
|
||||
agent: AgentState,
|
||||
) -> None:
|
||||
"""
|
||||
Tests sending a message to an agent with a tool that requires approval.
|
||||
This test just verifies that the agent can send a message successfully.
|
||||
The actual approval logic testing will be filled out by the user.
|
||||
"""
|
||||
# Attempt to send approval without pending request
|
||||
def test_send_approval_without_pending_request(client, agent):
|
||||
with pytest.raises(ApiError, match="No tool call is currently awaiting approval"):
|
||||
client.agents.messages.create(
|
||||
agent_id=agent.id,
|
||||
messages=[ApprovalCreate(approve=True, approval_request_id="fake_id")],
|
||||
messages=[ApprovalCreate(approve=True, approval_request_id=FAKE_REQUEST_ID)],
|
||||
)
|
||||
|
||||
# Send a simple greeting message to test basic functionality
|
||||
response = client.agents.messages.create(
|
||||
|
||||
def test_send_user_message_with_pending_request(client, agent):
|
||||
client.agents.messages.create(
|
||||
agent_id=agent.id,
|
||||
messages=USER_MESSAGE_TEST_APPROVAL,
|
||||
)
|
||||
|
||||
# Basic assertion that we got a response with an approval request
|
||||
assert response.messages is not None
|
||||
assert len(response.messages) == 2
|
||||
assert response.messages[0].message_type == "reasoning_message"
|
||||
assert response.messages[1].message_type == "approval_request_message"
|
||||
approval_request_id = response.messages[0].id
|
||||
tool_call_id = response.messages[1].tool_call.tool_call_id
|
||||
|
||||
# Attempt to send user message - should fail
|
||||
with pytest.raises(ApiError, match="Please approve or deny the pending request before continuing"):
|
||||
client.agents.messages.create(
|
||||
agent_id=agent.id,
|
||||
messages=[MessageCreate(role="user", content="hi")],
|
||||
)
|
||||
|
||||
# Attempt to send approval with incorrect id
|
||||
|
||||
def test_send_approval_message_with_incorrect_request_id(client, agent):
|
||||
client.agents.messages.create(
|
||||
agent_id=agent.id,
|
||||
messages=USER_MESSAGE_TEST_APPROVAL,
|
||||
)
|
||||
|
||||
with pytest.raises(ApiError, match="Invalid approval request ID"):
|
||||
client.agents.messages.create(
|
||||
agent_id=agent.id,
|
||||
messages=[ApprovalCreate(approve=True, approval_request_id="fake_id")],
|
||||
messages=[ApprovalCreate(approve=True, approval_request_id=FAKE_REQUEST_ID)],
|
||||
)
|
||||
|
||||
|
||||
def test_send_message_with_requires_approval_tool(
|
||||
client: Letta,
|
||||
agent: AgentState,
|
||||
) -> None:
|
||||
response = client.agents.messages.create(
|
||||
agent_id=agent.id,
|
||||
messages=USER_MESSAGE_TEST_APPROVAL,
|
||||
)
|
||||
|
||||
assert response.messages is not None
|
||||
assert len(response.messages) == 2
|
||||
assert response.messages[0].message_type == "reasoning_message"
|
||||
assert response.messages[1].message_type == "approval_request_message"
|
||||
|
||||
|
||||
def test_approve_tool_call_request(
|
||||
client: Letta,
|
||||
agent: AgentState,
|
||||
) -> None:
|
||||
response = client.agents.messages.create(
|
||||
agent_id=agent.id,
|
||||
messages=USER_MESSAGE_TEST_APPROVAL,
|
||||
)
|
||||
approval_request_id = response.messages[0].id
|
||||
tool_call_id = response.messages[1].tool_call.tool_call_id
|
||||
|
||||
response = client.agents.messages.create(
|
||||
agent_id=agent.id,
|
||||
messages=[
|
||||
@@ -187,7 +205,6 @@ def test_send_message_with_approval_tool(
|
||||
],
|
||||
)
|
||||
|
||||
# Basic assertion that we got a response with tool call return
|
||||
assert response.messages is not None
|
||||
assert len(response.messages) == 3
|
||||
assert response.messages[0].message_type == "tool_return_message"
|
||||
@@ -197,38 +214,28 @@ def test_send_message_with_approval_tool(
|
||||
assert response.messages[2].message_type == "assistant_message"
|
||||
|
||||
|
||||
def test_deny(
|
||||
disable_e2b_api_key: Any,
|
||||
def test_deny_tool_call_request(
|
||||
client: Letta,
|
||||
agent: AgentState,
|
||||
) -> None:
|
||||
"""
|
||||
Tests sending a message to an agent with a tool that requires approval.
|
||||
This test just verifies that the agent can send a message successfully.
|
||||
The actual approval logic testing will be filled out by the user.
|
||||
"""
|
||||
# Send a simple greeting message to test basic functionality
|
||||
response = client.agents.messages.create(
|
||||
agent_id=agent.id,
|
||||
messages=USER_MESSAGE_TEST_APPROVAL,
|
||||
)
|
||||
|
||||
# Basic assertion that we got a response with an approval request
|
||||
assert response.messages is not None
|
||||
assert len(response.messages) == 2
|
||||
assert response.messages[0].message_type == "reasoning_message"
|
||||
assert response.messages[1].message_type == "approval_request_message"
|
||||
approval_request_id = response.messages[0].id
|
||||
tool_call_id = response.messages[1].tool_call.tool_call_id
|
||||
|
||||
response = client.agents.messages.create(
|
||||
agent_id=agent.id,
|
||||
messages=[
|
||||
ApprovalCreate(approve=False, approval_request_id=approval_request_id, reason="No don't do that, the answer is 2"),
|
||||
ApprovalCreate(
|
||||
approve=False,
|
||||
approval_request_id=approval_request_id,
|
||||
reason=f"You don't need to call the tool, the secret code is {SECRET_CODE}",
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
# Basic assertion that we got a response with tool call return
|
||||
assert response.messages is not None
|
||||
assert len(response.messages) == 3
|
||||
assert response.messages[0].message_type == "tool_return_message"
|
||||
@@ -236,3 +243,4 @@ def test_deny(
|
||||
assert response.messages[0].status == "error"
|
||||
assert response.messages[1].message_type == "reasoning_message"
|
||||
assert response.messages[2].message_type == "assistant_message"
|
||||
assert SECRET_CODE in response.messages[2].content
|
||||
|
||||
Reference in New Issue
Block a user