From 227b76fe0e81e9c67d1bfcaee7923e7ef6f409f7 Mon Sep 17 00:00:00 2001 From: Matthew Zhou Date: Tue, 1 Apr 2025 16:54:09 -0700 Subject: [PATCH] feat: Add testing for SDK `send_message` variants (#1520) --- poetry.lock | 8 +- pyproject.toml | 2 +- tests/conftest.py | 14 +- tests/integration_test_agent_tool_graph.py | 18 +- tests/integration_test_async_tool_sandbox.py | 20 +- tests/integration_test_chat_completions.py | 6 +- tests/integration_test_experimental.py | 6 +- .../integration_test_offline_memory_agent.py | 6 +- tests/integration_test_send_message.py | 333 ++++++++++++++++++ tests/integration_test_summarizer.py | 8 +- ...integration_test_tool_execution_sandbox.py | 28 +- tests/test_agent_serialization.py | 4 +- tests/test_client.py | 2 +- tests/test_client_legacy.py | 12 +- tests/test_model_letta_performance.py | 14 +- tests/test_server.py | 16 +- tests/test_streaming.py | 2 +- 17 files changed, 414 insertions(+), 85 deletions(-) create mode 100644 tests/integration_test_send_message.py diff --git a/poetry.lock b/poetry.lock index b788c521..aa195e18 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2695,13 +2695,13 @@ pytest = ["pytest (>=7.0.0)", "rich (>=13.9.4,<14.0.0)"] [[package]] name = "letta-client" -version = "0.1.84" +version = "0.1.91" description = "" optional = false python-versions = "<4.0,>=3.8" files = [ - {file = "letta_client-0.1.84-py3-none-any.whl", hash = "sha256:ac82b1d043dd6182b71f1abb339bc6b855f6aa851023ae67ae92c8b7c39ce0b5"}, - {file = "letta_client-0.1.84.tar.gz", hash = "sha256:5705db7e89b0f598bd3645c668a14c55bc7cbe55db35bfd291646ab3d6eec434"}, + {file = "letta_client-0.1.91-py3-none-any.whl", hash = "sha256:eb4508177dcbed5c4abc5cb1929cf67a7189851d9c310cab4e9bc8e4ce4d4d3f"}, + {file = "letta_client-0.1.91.tar.gz", hash = "sha256:26b9936c4fca9fc9238afeaa8ce25fa6d4ef30153c425f0cfdd54c19ca78e028"}, ] [package.dependencies] @@ -6707,4 +6707,4 @@ tests = ["wikipedia"] [metadata] lock-version = "2.0" python-versions = "<3.14,>=3.10" -content-hash = "6863aa7a366a80c9b7ba0904e1034974969184ecef5bf48abd5e02c33167ec71" +content-hash = "d5db02048c6ad56bd289a76b8fdf522284f330ff6993e0825bece04d0bdda2c8" diff --git a/pyproject.toml b/pyproject.toml index 4f2b127d..7048b5fa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,7 +74,7 @@ llama-index = "^0.12.2" llama-index-embeddings-openai = "^0.3.1" e2b-code-interpreter = {version = "^1.0.3", optional = true} anthropic = "^0.49.0" -letta_client = "^0.1.65" +letta_client = "^0.1.91" openai = "^1.60.0" opentelemetry-api = "1.30.0" opentelemetry-sdk = "1.30.0" diff --git a/tests/conftest.py b/tests/conftest.py index 78e60df1..220438e2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,5 @@ import logging +from typing import Generator import pytest @@ -12,19 +13,16 @@ def pytest_configure(config): @pytest.fixture -def mock_e2b_api_key_none(): +def disable_e2b_api_key() -> Generator[None, None, None]: + """ + Temporarily disables the E2B API key by setting `tool_settings.e2b_api_key` to None + for the duration of the test. Restores the original value afterward. + """ from letta.settings import tool_settings - # Store the original value of e2b_api_key original_api_key = tool_settings.e2b_api_key - - # Set e2b_api_key to None tool_settings.e2b_api_key = None - - # Yield control to the test yield - - # Restore the original value of e2b_api_key tool_settings.e2b_api_key = original_api_key diff --git a/tests/integration_test_agent_tool_graph.py b/tests/integration_test_agent_tool_graph.py index 6e17bd92..bc3aee7a 100644 --- a/tests/integration_test_agent_tool_graph.py +++ b/tests/integration_test_agent_tool_graph.py @@ -109,7 +109,7 @@ def auto_error(): @pytest.mark.timeout(60) # Sets a 60-second timeout for the test since this could loop infinitely -def test_single_path_agent_tool_call_graph(mock_e2b_api_key_none): +def test_single_path_agent_tool_call_graph(disable_e2b_api_key): client = create_client() cleanup(client=client, agent_uuid=agent_uuid) @@ -162,7 +162,7 @@ def test_single_path_agent_tool_call_graph(mock_e2b_api_key_none): cleanup(client=client, agent_uuid=agent_uuid) -def test_check_tool_rules_with_different_models(mock_e2b_api_key_none): +def test_check_tool_rules_with_different_models(disable_e2b_api_key): """Test that tool rules are properly checked for different model configurations.""" client = create_client() @@ -211,7 +211,7 @@ def test_check_tool_rules_with_different_models(mock_e2b_api_key_none): cleanup(client=client, agent_uuid=agent_uuid) -def test_claude_initial_tool_rule_enforced(mock_e2b_api_key_none): +def test_claude_initial_tool_rule_enforced(disable_e2b_api_key): """Test that the initial tool rule is enforced for the first message.""" client = create_client() @@ -262,7 +262,7 @@ def test_claude_initial_tool_rule_enforced(mock_e2b_api_key_none): @pytest.mark.timeout(60) # Sets a 60-second timeout for the test since this could loop infinitely -def test_agent_no_structured_output_with_one_child_tool(mock_e2b_api_key_none): +def test_agent_no_structured_output_with_one_child_tool(disable_e2b_api_key): client = create_client() cleanup(client=client, agent_uuid=agent_uuid) @@ -327,7 +327,7 @@ def test_agent_no_structured_output_with_one_child_tool(mock_e2b_api_key_none): # @pytest.mark.timeout(60) # Sets a 60-second timeout for the test since this could loop infinitely -# def test_agent_conditional_tool_easy(mock_e2b_api_key_none): +# def test_agent_conditional_tool_easy(disable_e2b_api_key): # """ # Test the agent with a conditional tool that has a child tool. # @@ -395,7 +395,7 @@ def test_agent_no_structured_output_with_one_child_tool(mock_e2b_api_key_none): # @pytest.mark.timeout(60) -# def test_agent_conditional_tool_without_default_child(mock_e2b_api_key_none): +# def test_agent_conditional_tool_without_default_child(disable_e2b_api_key): # """ # Test the agent with a conditional tool that allows any child tool to be called if a function returns None. # @@ -456,7 +456,7 @@ def test_agent_no_structured_output_with_one_child_tool(mock_e2b_api_key_none): # @pytest.mark.timeout(60) -# def test_agent_reload_remembers_function_response(mock_e2b_api_key_none): +# def test_agent_reload_remembers_function_response(disable_e2b_api_key): # """ # Test that when an agent is reloaded, it remembers the last function response for conditional tool chaining. # @@ -512,7 +512,7 @@ def test_agent_no_structured_output_with_one_child_tool(mock_e2b_api_key_none): # @pytest.mark.timeout(60) # Sets a 60-second timeout for the test since this could loop infinitely -# def test_simple_tool_rule(mock_e2b_api_key_none): +# def test_simple_tool_rule(disable_e2b_api_key): # """ # Test a simple tool rule where fourth_secret_word must be called after flip_coin. # @@ -676,7 +676,7 @@ def test_continue_tool_rule(): @pytest.mark.timeout(60) @retry_until_success(max_attempts=3, sleep_time_seconds=2) -def test_max_count_per_step_tool_rule_integration(mock_e2b_api_key_none): +def test_max_count_per_step_tool_rule_integration(disable_e2b_api_key): """ Test an agent with MaxCountPerStepToolRule to ensure a tool can only be called a limited number of times. diff --git a/tests/integration_test_async_tool_sandbox.py b/tests/integration_test_async_tool_sandbox.py index 11c64526..b85728db 100644 --- a/tests/integration_test_async_tool_sandbox.py +++ b/tests/integration_test_async_tool_sandbox.py @@ -253,7 +253,7 @@ def core_memory_tools(test_user): @pytest.mark.asyncio @pytest.mark.local_sandbox -async def test_local_sandbox_default(mock_e2b_api_key_none, add_integers_tool, test_user): +async def test_local_sandbox_default(disable_e2b_api_key, add_integers_tool, test_user): args = {"x": 10, "y": 5} # Mock and assert correct pathway was invoked @@ -270,7 +270,7 @@ async def test_local_sandbox_default(mock_e2b_api_key_none, add_integers_tool, t @pytest.mark.asyncio @pytest.mark.local_sandbox -async def test_local_sandbox_stateful_tool(mock_e2b_api_key_none, clear_core_memory_tool, test_user, agent_state): +async def test_local_sandbox_stateful_tool(disable_e2b_api_key, clear_core_memory_tool, test_user, agent_state): args = {} sandbox = AsyncToolSandboxLocal(clear_core_memory_tool.name, args, user=test_user) result = await sandbox.run(agent_state=agent_state) @@ -282,7 +282,7 @@ async def test_local_sandbox_stateful_tool(mock_e2b_api_key_none, clear_core_mem @pytest.mark.asyncio @pytest.mark.local_sandbox -async def test_local_sandbox_with_list_rv(mock_e2b_api_key_none, list_tool, test_user): +async def test_local_sandbox_with_list_rv(disable_e2b_api_key, list_tool, test_user): sandbox = AsyncToolSandboxLocal(list_tool.name, {}, user=test_user) result = await sandbox.run() assert len(result.func_return) == 5 @@ -290,7 +290,7 @@ async def test_local_sandbox_with_list_rv(mock_e2b_api_key_none, list_tool, test @pytest.mark.asyncio @pytest.mark.local_sandbox -async def test_local_sandbox_env(mock_e2b_api_key_none, get_env_tool, test_user): +async def test_local_sandbox_env(disable_e2b_api_key, get_env_tool, test_user): manager = SandboxConfigManager() sandbox_dir = str(Path(__file__).parent / "test_tool_sandbox") config_create = SandboxConfigCreate(config=LocalSandboxConfig(sandbox_dir=sandbox_dir).model_dump()) @@ -309,7 +309,7 @@ async def test_local_sandbox_env(mock_e2b_api_key_none, get_env_tool, test_user) @pytest.mark.asyncio @pytest.mark.local_sandbox -async def test_local_sandbox_per_agent_env(mock_e2b_api_key_none, get_env_tool, agent_state, test_user): +async def test_local_sandbox_per_agent_env(disable_e2b_api_key, get_env_tool, agent_state, test_user): manager = SandboxConfigManager() key = "secret_word" sandbox_dir = str(Path(__file__).parent / "test_tool_sandbox") @@ -331,7 +331,7 @@ async def test_local_sandbox_per_agent_env(mock_e2b_api_key_none, get_env_tool, @pytest.mark.asyncio @pytest.mark.local_sandbox async def test_local_sandbox_external_codebase_with_venv( - mock_e2b_api_key_none, custom_test_sandbox_config, external_codebase_tool, test_user + disable_e2b_api_key, custom_test_sandbox_config, external_codebase_tool, test_user ): args = {"percentage": 10} sandbox = AsyncToolSandboxLocal(external_codebase_tool.name, args, user=test_user) @@ -343,7 +343,7 @@ async def test_local_sandbox_external_codebase_with_venv( @pytest.mark.asyncio @pytest.mark.local_sandbox async def test_local_sandbox_with_venv_and_warnings_does_not_error( - mock_e2b_api_key_none, custom_test_sandbox_config, get_warning_tool, test_user + disable_e2b_api_key, custom_test_sandbox_config, get_warning_tool, test_user ): sandbox = AsyncToolSandboxLocal(get_warning_tool.name, {}, user=test_user) result = await sandbox.run() @@ -352,7 +352,7 @@ async def test_local_sandbox_with_venv_and_warnings_does_not_error( @pytest.mark.asyncio @pytest.mark.e2b_sandbox -async def test_local_sandbox_with_venv_errors(mock_e2b_api_key_none, custom_test_sandbox_config, always_err_tool, test_user): +async def test_local_sandbox_with_venv_errors(disable_e2b_api_key, custom_test_sandbox_config, always_err_tool, test_user): sandbox = AsyncToolSandboxLocal(always_err_tool.name, {}, user=test_user) result = await sandbox.run() assert len(result.stdout) != 0 @@ -363,7 +363,7 @@ async def test_local_sandbox_with_venv_errors(mock_e2b_api_key_none, custom_test @pytest.mark.asyncio @pytest.mark.e2b_sandbox -async def test_local_sandbox_with_venv_pip_installs_basic(mock_e2b_api_key_none, cowsay_tool, test_user): +async def test_local_sandbox_with_venv_pip_installs_basic(disable_e2b_api_key, cowsay_tool, test_user): manager = SandboxConfigManager() config_create = SandboxConfigCreate( config=LocalSandboxConfig(use_venv=True, pip_requirements=[PipRequirement(name="cowsay")]).model_dump() @@ -383,7 +383,7 @@ async def test_local_sandbox_with_venv_pip_installs_basic(mock_e2b_api_key_none, @pytest.mark.asyncio @pytest.mark.e2b_sandbox -async def test_local_sandbox_with_venv_pip_installs_with_update(mock_e2b_api_key_none, cowsay_tool, test_user): +async def test_local_sandbox_with_venv_pip_installs_with_update(disable_e2b_api_key, cowsay_tool, test_user): manager = SandboxConfigManager() config_create = SandboxConfigCreate(config=LocalSandboxConfig(use_venv=True).model_dump()) config = manager.create_or_update_sandbox_config(config_create, test_user) diff --git a/tests/integration_test_chat_completions.py b/tests/integration_test_chat_completions.py index d14ace0e..3eb8d2bd 100644 --- a/tests/integration_test_chat_completions.py +++ b/tests/integration_test_chat_completions.py @@ -158,7 +158,7 @@ def _assert_valid_chunk(chunk, idx, chunks): @pytest.mark.asyncio @pytest.mark.parametrize("message", ["Hi how are you today?"]) @pytest.mark.parametrize("endpoint", ["v1/voice-beta"]) -async def test_latency(mock_e2b_api_key_none, client, agent, message, endpoint): +async def test_latency(disable_e2b_api_key, client, agent, message, endpoint): """Tests chat completion streaming using the Async OpenAI client.""" request = _get_chat_request(message) @@ -172,7 +172,7 @@ async def test_latency(mock_e2b_api_key_none, client, agent, message, endpoint): @pytest.mark.asyncio @pytest.mark.parametrize("message", ["Use recall memory tool to recall what my name is."]) @pytest.mark.parametrize("endpoint", ["v1/voice-beta"]) -async def test_voice_recall_memory(mock_e2b_api_key_none, client, agent, message, endpoint): +async def test_voice_recall_memory(disable_e2b_api_key, client, agent, message, endpoint): """Tests chat completion streaming using the Async OpenAI client.""" request = _get_chat_request(message) @@ -193,7 +193,7 @@ async def test_voice_recall_memory(mock_e2b_api_key_none, client, agent, message @pytest.mark.asyncio @pytest.mark.parametrize("message", ["Tell me something interesting about bananas.", "What's the weather in SF?"]) @pytest.mark.parametrize("endpoint", ["openai/v1", "v1/voice-beta"]) -async def test_chat_completions_streaming_openai_client(mock_e2b_api_key_none, client, agent, message, endpoint): +async def test_chat_completions_streaming_openai_client(disable_e2b_api_key, client, agent, message, endpoint): """Tests chat completion streaming using the Async OpenAI client.""" request = _get_chat_request(message) diff --git a/tests/integration_test_experimental.py b/tests/integration_test_experimental.py index 111d5cc5..d1d4e486 100644 --- a/tests/integration_test_experimental.py +++ b/tests/integration_test_experimental.py @@ -238,7 +238,7 @@ def _assert_valid_chunk(chunk, idx, chunks): @pytest.mark.asyncio @pytest.mark.parametrize("message", ["What is the weather today in SF?"]) -async def test_new_agent_loop(mock_e2b_api_key_none, openai_client, agent_state, message): +async def test_new_agent_loop(disable_e2b_api_key, openai_client, agent_state, message): actor = UserManager().get_user_or_default(user_id="asf") agent = LettaAgent( agent_id=agent_state.id, @@ -254,7 +254,7 @@ async def test_new_agent_loop(mock_e2b_api_key_none, openai_client, agent_state, @pytest.mark.asyncio @pytest.mark.parametrize("message", ["Use your rethink tool to rethink the human memory considering Matt likes chicken."]) -async def test_rethink_tool(mock_e2b_api_key_none, openai_client, agent_state, message): +async def test_rethink_tool(disable_e2b_api_key, openai_client, agent_state, message): actor = UserManager().get_user_or_default(user_id="asf") agent = LettaAgent( agent_id=agent_state.id, @@ -271,7 +271,7 @@ async def test_rethink_tool(mock_e2b_api_key_none, openai_client, agent_state, m @pytest.mark.asyncio -async def test_multi_agent_broadcast(mock_e2b_api_key_none, client, openai_client, weather_tool): +async def test_multi_agent_broadcast(disable_e2b_api_key, client, openai_client, weather_tool): actor = UserManager().get_user_or_default(user_id="asf") stale_agents = AgentManager().list_agents(actor=actor, limit=300) diff --git a/tests/integration_test_offline_memory_agent.py b/tests/integration_test_offline_memory_agent.py index 5082251c..130fc1ef 100644 --- a/tests/integration_test_offline_memory_agent.py +++ b/tests/integration_test_offline_memory_agent.py @@ -28,7 +28,7 @@ def clear_agents(client): client.delete_agent(agent.id) -def test_ripple_edit(client, mock_e2b_api_key_none): +def test_ripple_edit(client, disable_e2b_api_key): trigger_rethink_memory_tool = client.create_or_update_tool(trigger_rethink_memory) send_message = client.server.tool_manager.get_tool_by_name(tool_name="send_message", actor=client.user) @@ -120,7 +120,7 @@ def test_ripple_edit(client, mock_e2b_api_key_none): client.delete_agent(offline_memory_agent.id) -def test_chat_only_agent(client, mock_e2b_api_key_none): +def test_chat_only_agent(client, disable_e2b_api_key): from letta.offline_memory_agent import finish_rethinking_memory, rethink_memory send_message = client.server.tool_manager.get_tool_by_name(tool_name="send_message", actor=client.user) @@ -202,7 +202,7 @@ def test_chat_only_agent(client, mock_e2b_api_key_none): client.delete_agent(offline_memory_agent.id) -def test_initial_message_sequence(client, mock_e2b_api_key_none): +def test_initial_message_sequence(client, disable_e2b_api_key): """ Test that when we set the initial sequence to an empty list, we do not get the default initial message sequence. diff --git a/tests/integration_test_send_message.py b/tests/integration_test_send_message.py new file mode 100644 index 00000000..d5d69969 --- /dev/null +++ b/tests/integration_test_send_message.py @@ -0,0 +1,333 @@ +import os +import threading +import time +from typing import Any, Dict, List + +import pytest +from dotenv import load_dotenv +from letta_client import AsyncLetta, Letta, Run, Tool +from letta_client.types import AssistantMessage, LettaUsageStatistics, ReasoningMessage, ToolCallMessage, ToolReturnMessage + +from letta.schemas.agent import AgentState + +# ------------------------------ +# Fixtures +# ------------------------------ + + +@pytest.fixture(scope="module") +def server_url() -> str: + """ + Provides the URL for the Letta server. + If the environment variable 'LETTA_SERVER_URL' is not set, this fixture + will start the Letta server in a background thread and return the default URL. + """ + + def _run_server() -> None: + """Starts the Letta server in a background thread.""" + load_dotenv() # Load environment variables from .env file + from letta.server.rest_api.app import start_server + + start_server(debug=True) + + # Retrieve server URL from environment, or default to localhost + url: str = os.getenv("LETTA_SERVER_URL", "http://localhost:8283") + + # If no environment variable is set, start the server in a background thread + if not os.getenv("LETTA_SERVER_URL"): + thread = threading.Thread(target=_run_server, daemon=True) + thread.start() + time.sleep(5) # Allow time for the server to start + + return url + + +@pytest.fixture(scope="module") +def client(server_url: str) -> Letta: + """ + Creates and returns a synchronous Letta REST client for testing. + """ + client_instance = Letta(base_url=server_url) + yield client_instance + + +@pytest.fixture(scope="module") +def async_client(server_url: str) -> AsyncLetta: + """ + Creates and returns an asynchronous Letta REST client for testing. + """ + async_client_instance = AsyncLetta(base_url=server_url) + yield async_client_instance + + +@pytest.fixture(scope="module") +def roll_dice_tool(client: Letta) -> Tool: + """ + Registers a simple roll dice tool with the provided client. + + The tool simulates rolling a six-sided die but returns a fixed result. + """ + + def roll_dice() -> str: + """ + Simulates rolling a die. + + Returns: + str: The roll result. + """ + # Note: The result here is intentionally incorrect for demonstration purposes. + return "Rolled a 10!" + + tool = client.tools.upsert_from_function(func=roll_dice) + yield tool + + +@pytest.fixture(scope="module") +def agent_state(client: Letta, roll_dice_tool: Tool) -> AgentState: + """ + Creates and returns an agent state for testing with a pre-configured agent. + The agent is named 'supervisor' and is configured with base tools and the roll_dice tool. + """ + agent_state_instance = client.agents.create( + name="supervisor", + include_base_tools=True, + tool_ids=[roll_dice_tool.id], + model="openai/gpt-4o", + embedding="letta/letta-free", + tags=["supervisor"], + ) + yield agent_state_instance + + +# ------------------------------ +# Helper Functions and Constants +# ------------------------------ + +USER_MESSAGE: List[Dict[str, str]] = [{"role": "user", "content": "Roll the dice."}] +TESTED_MODELS: List[str] = ["openai/gpt-4o"] + + +def assert_tool_response_messages(messages: List[Any]) -> None: + """ + Asserts that the messages list follows the expected sequence: + ReasoningMessage -> ToolCallMessage -> ToolReturnMessage -> + ReasoningMessage -> AssistantMessage. + """ + assert isinstance(messages[0], ReasoningMessage) + assert isinstance(messages[1], ToolCallMessage) + assert isinstance(messages[2], ToolReturnMessage) + assert isinstance(messages[3], ReasoningMessage) + assert isinstance(messages[4], AssistantMessage) + + +def assert_streaming_tool_response_messages(chunks: List[Any]) -> None: + """ + Validates that streaming responses contain at least one reasoning message, + one tool call, one tool return, one assistant message, and one usage statistics message. + """ + + def msg_groups(msg_type: Any) -> List[Any]: + return [c for c in chunks if isinstance(c, msg_type)] + + reasoning_msgs = msg_groups(ReasoningMessage) + tool_calls = msg_groups(ToolCallMessage) + tool_returns = msg_groups(ToolReturnMessage) + assistant_msgs = msg_groups(AssistantMessage) + usage_stats = msg_groups(LettaUsageStatistics) + + assert len(reasoning_msgs) >= 1 + assert len(tool_calls) == 1 + assert len(tool_returns) == 1 + assert len(assistant_msgs) == 1 + assert len(usage_stats) == 1 + + +def wait_for_run_completion(client: Letta, run_id: str, timeout: float = 30.0, interval: float = 0.5) -> Run: + """ + Polls the run status until it completes or fails. + + Args: + client (Letta): The synchronous Letta client. + run_id (str): The identifier of the run to wait for. + timeout (float): Maximum time to wait (in seconds). + interval (float): Interval between status checks (in seconds). + + Returns: + Run: The completed run object. + + Raises: + RuntimeError: If the run fails. + TimeoutError: If the run does not complete within the specified timeout. + """ + start = time.time() + while True: + run = client.runs.retrieve_run(run_id) + if run.status == "completed": + return run + if run.status == "failed": + raise RuntimeError(f"Run {run_id} did not complete: status = {run.status}") + if time.time() - start > timeout: + raise TimeoutError(f"Run {run_id} did not complete within {timeout} seconds (last status: {run.status})") + time.sleep(interval) + + +def assert_tool_response_dict_messages(messages: List[Dict[str, Any]]) -> None: + """ + Asserts that a list of message dictionaries contains the expected types and statuses. + + Expected order: + 1. reasoning_message + 2. tool_call_message + 3. tool_return_message (with status 'success') + 4. reasoning_message + 5. assistant_message + """ + assert isinstance(messages, list) + assert messages[0]["message_type"] == "reasoning_message" + assert messages[1]["message_type"] == "tool_call_message" + assert messages[2]["message_type"] == "tool_return_message" + assert messages[3]["message_type"] == "reasoning_message" + assert messages[4]["message_type"] == "assistant_message" + + tool_return = messages[2] + assert tool_return["status"] == "success" + + +# ------------------------------ +# Test Cases +# ------------------------------ + + +@pytest.mark.parametrize("model", TESTED_MODELS) +def test_send_message_sync_client( + disable_e2b_api_key: Any, + client: Letta, + agent_state: AgentState, + model: str, +) -> None: + """ + Tests sending a message with a synchronous client. + Verifies that the response messages follow the expected order. + """ + client.agents.modify(agent_id=agent_state.id, model=model) + response = client.agents.messages.create( + agent_id=agent_state.id, + messages=USER_MESSAGE, + ) + assert_tool_response_messages(response.messages) + + +@pytest.mark.asyncio +@pytest.mark.parametrize("model", TESTED_MODELS) +async def test_send_message_async_client( + disable_e2b_api_key: Any, + async_client: AsyncLetta, + agent_state: AgentState, + model: str, +) -> None: + """ + Tests sending a message with an asynchronous client. + Validates that the response messages match the expected sequence. + """ + await async_client.agents.modify(agent_id=agent_state.id, model=model) + response = await async_client.agents.messages.create( + agent_id=agent_state.id, + messages=USER_MESSAGE, + ) + assert_tool_response_messages(response.messages) + + +@pytest.mark.parametrize("model", TESTED_MODELS) +def test_send_message_streaming_sync_client( + disable_e2b_api_key: Any, + client: Letta, + agent_state: AgentState, + model: str, +) -> None: + """ + Tests sending a streaming message with a synchronous client. + Checks that each chunk in the stream has the correct message types. + """ + client.agents.modify(agent_id=agent_state.id, model=model) + response = client.agents.messages.create_stream( + agent_id=agent_state.id, + messages=USER_MESSAGE, + ) + chunks = list(response) + assert_streaming_tool_response_messages(chunks) + + +@pytest.mark.asyncio +@pytest.mark.parametrize("model", TESTED_MODELS) +async def test_send_message_streaming_async_client( + disable_e2b_api_key: Any, + async_client: AsyncLetta, + agent_state: AgentState, + model: str, +) -> None: + """ + Tests sending a streaming message with an asynchronous client. + Validates that the streaming response chunks include the correct message types. + """ + await async_client.agents.modify(agent_id=agent_state.id, model=model) + response = async_client.agents.messages.create_stream( + agent_id=agent_state.id, + messages=USER_MESSAGE, + ) + chunks = [chunk async for chunk in response] + assert_streaming_tool_response_messages(chunks) + + +@pytest.mark.parametrize("model", TESTED_MODELS) +def test_send_message_job_sync_client( + disable_e2b_api_key: Any, + client: Letta, + agent_state: AgentState, + model: str, +) -> None: + """ + Tests sending a message as an asynchronous job using the synchronous client. + Waits for job completion and asserts that the result messages are as expected. + """ + client.agents.modify(agent_id=agent_state.id, model=model) + + run = client.agents.messages.create_async( + agent_id=agent_state.id, + messages=USER_MESSAGE, + ) + run = wait_for_run_completion(client, run.id) + + result = run.metadata.get("result") + assert result is not None, "Run metadata missing 'result' key" + + messages = result["messages"] + assert_tool_response_dict_messages(messages) + + +@pytest.mark.asyncio +@pytest.mark.parametrize("model", TESTED_MODELS) +async def test_send_message_job_async_client( + disable_e2b_api_key: Any, + client: Letta, + async_client: AsyncLetta, + agent_state: AgentState, + model: str, +) -> None: + """ + Tests sending a message as an asynchronous job using the asynchronous client. + Waits for job completion and verifies that the resulting messages meet the expected format. + """ + await async_client.agents.modify(agent_id=agent_state.id, model=model) + + run = await async_client.agents.messages.create_async( + agent_id=agent_state.id, + messages=USER_MESSAGE, + ) + # Use the synchronous client to check job completion + run = wait_for_run_completion(client, run.id) + + result = run.metadata.get("result") + assert result is not None, "Run metadata missing 'result' key" + + messages = result["messages"] + assert_tool_response_dict_messages(messages) diff --git a/tests/integration_test_summarizer.py b/tests/integration_test_summarizer.py index 87c63245..b47ce3fa 100644 --- a/tests/integration_test_summarizer.py +++ b/tests/integration_test_summarizer.py @@ -115,7 +115,7 @@ def test_cutoff_calculation(mocker): assert messages[cutoff - 1].role == MessageRole.user -def test_summarize_many_messages_basic(client, mock_e2b_api_key_none): +def test_summarize_many_messages_basic(client, disable_e2b_api_key): small_context_llm_config = LLMConfig.default_config("gpt-4o-mini") small_context_llm_config.context_window = 3000 small_agent_state = client.create_agent( @@ -130,7 +130,7 @@ def test_summarize_many_messages_basic(client, mock_e2b_api_key_none): client.delete_agent(small_agent_state.id) -def test_summarize_large_message_does_not_loop_infinitely(client, mock_e2b_api_key_none): +def test_summarize_large_message_does_not_loop_infinitely(client, disable_e2b_api_key): small_context_llm_config = LLMConfig.default_config("gpt-4o-mini") small_context_llm_config.context_window = 2000 small_agent_state = client.create_agent( @@ -145,7 +145,7 @@ def test_summarize_large_message_does_not_loop_infinitely(client, mock_e2b_api_k client.delete_agent(small_agent_state.id) -def test_summarize_messages_inplace(client, agent_state, mock_e2b_api_key_none): +def test_summarize_messages_inplace(client, agent_state, disable_e2b_api_key): """Test summarization via sending the summarize CLI command or via a direct call to the agent object""" # First send a few messages (5) response = client.user_message( @@ -179,7 +179,7 @@ def test_summarize_messages_inplace(client, agent_state, mock_e2b_api_key_none): agent_obj.summarize_messages_inplace() -def test_auto_summarize(client, mock_e2b_api_key_none): +def test_auto_summarize(client, disable_e2b_api_key): """Test that the summarizer triggers by itself""" small_context_llm_config = LLMConfig.default_config("gpt-4o-mini") small_context_llm_config.context_window = 4000 diff --git a/tests/integration_test_tool_execution_sandbox.py b/tests/integration_test_tool_execution_sandbox.py index 0554e3d4..85aaab97 100644 --- a/tests/integration_test_tool_execution_sandbox.py +++ b/tests/integration_test_tool_execution_sandbox.py @@ -251,7 +251,7 @@ def core_memory_tools(test_user): @pytest.mark.local_sandbox -def test_local_sandbox_default(mock_e2b_api_key_none, add_integers_tool, test_user): +def test_local_sandbox_default(disable_e2b_api_key, add_integers_tool, test_user): args = {"x": 10, "y": 5} # Mock and assert correct pathway was invoked @@ -267,7 +267,7 @@ def test_local_sandbox_default(mock_e2b_api_key_none, add_integers_tool, test_us @pytest.mark.local_sandbox -def test_local_sandbox_stateful_tool(mock_e2b_api_key_none, clear_core_memory_tool, test_user, agent_state): +def test_local_sandbox_stateful_tool(disable_e2b_api_key, clear_core_memory_tool, test_user, agent_state): args = {} # Run again to get actual response sandbox = ToolExecutionSandbox(clear_core_memory_tool.name, args, user=test_user) @@ -278,14 +278,14 @@ def test_local_sandbox_stateful_tool(mock_e2b_api_key_none, clear_core_memory_to @pytest.mark.local_sandbox -def test_local_sandbox_with_list_rv(mock_e2b_api_key_none, list_tool, test_user): +def test_local_sandbox_with_list_rv(disable_e2b_api_key, list_tool, test_user): sandbox = ToolExecutionSandbox(list_tool.name, {}, user=test_user) result = sandbox.run() assert len(result.func_return) == 5 @pytest.mark.local_sandbox -def test_local_sandbox_env(mock_e2b_api_key_none, get_env_tool, test_user): +def test_local_sandbox_env(disable_e2b_api_key, get_env_tool, test_user): manager = SandboxConfigManager() # Make a custom local sandbox config @@ -311,7 +311,7 @@ def test_local_sandbox_env(mock_e2b_api_key_none, get_env_tool, test_user): @pytest.mark.local_sandbox -def test_local_sandbox_per_agent_env(mock_e2b_api_key_none, get_env_tool, agent_state, test_user): +def test_local_sandbox_per_agent_env(disable_e2b_api_key, get_env_tool, agent_state, test_user): manager = SandboxConfigManager() key = "secret_word" @@ -346,7 +346,7 @@ def test_local_sandbox_per_agent_env(mock_e2b_api_key_none, get_env_tool, agent_ @pytest.mark.local_sandbox -def test_local_sandbox_external_codebase_with_venv(mock_e2b_api_key_none, custom_test_sandbox_config, external_codebase_tool, test_user): +def test_local_sandbox_external_codebase_with_venv(disable_e2b_api_key, custom_test_sandbox_config, external_codebase_tool, test_user): # Set the args args = {"percentage": 10} @@ -360,16 +360,14 @@ def test_local_sandbox_external_codebase_with_venv(mock_e2b_api_key_none, custom @pytest.mark.local_sandbox -def test_local_sandbox_with_venv_and_warnings_does_not_error( - mock_e2b_api_key_none, custom_test_sandbox_config, get_warning_tool, test_user -): +def test_local_sandbox_with_venv_and_warnings_does_not_error(disable_e2b_api_key, custom_test_sandbox_config, get_warning_tool, test_user): sandbox = ToolExecutionSandbox(get_warning_tool.name, {}, user=test_user) result = sandbox.run() assert result.func_return == "Hello World" @pytest.mark.e2b_sandbox -def test_local_sandbox_with_venv_errors(mock_e2b_api_key_none, custom_test_sandbox_config, always_err_tool, test_user): +def test_local_sandbox_with_venv_errors(disable_e2b_api_key, custom_test_sandbox_config, always_err_tool, test_user): sandbox = ToolExecutionSandbox(always_err_tool.name, {}, user=test_user) # run the sandbox @@ -381,7 +379,7 @@ def test_local_sandbox_with_venv_errors(mock_e2b_api_key_none, custom_test_sandb @pytest.mark.e2b_sandbox -def test_local_sandbox_with_venv_pip_installs_basic(mock_e2b_api_key_none, cowsay_tool, test_user): +def test_local_sandbox_with_venv_pip_installs_basic(disable_e2b_api_key, cowsay_tool, test_user): manager = SandboxConfigManager() config_create = SandboxConfigCreate( config=LocalSandboxConfig(use_venv=True, pip_requirements=[PipRequirement(name="cowsay")]).model_dump() @@ -401,7 +399,7 @@ def test_local_sandbox_with_venv_pip_installs_basic(mock_e2b_api_key_none, cowsa @pytest.mark.e2b_sandbox -def test_local_sandbox_with_venv_pip_installs_with_update(mock_e2b_api_key_none, cowsay_tool, test_user): +def test_local_sandbox_with_venv_pip_installs_with_update(disable_e2b_api_key, cowsay_tool, test_user): manager = SandboxConfigManager() config_create = SandboxConfigCreate(config=LocalSandboxConfig(use_venv=True).model_dump()) config = manager.create_or_update_sandbox_config(config_create, test_user) @@ -602,7 +600,7 @@ class TestCoreMemoryTools: # Local sandbox tests @pytest.mark.local_sandbox - def test_core_memory_replace_local(self, mock_e2b_api_key_none, core_memory_tools, test_user, agent_state): + def test_core_memory_replace_local(self, disable_e2b_api_key, core_memory_tools, test_user, agent_state): """Test successful replacement of content in core memory - local sandbox.""" new_name = "Charles" args = {"label": "human", "old_content": "Chad", "new_content": new_name} @@ -613,7 +611,7 @@ class TestCoreMemoryTools: assert result.func_return is None @pytest.mark.local_sandbox - def test_core_memory_append_local(self, mock_e2b_api_key_none, core_memory_tools, test_user, agent_state): + def test_core_memory_append_local(self, disable_e2b_api_key, core_memory_tools, test_user, agent_state): """Test successful appending of content to core memory - local sandbox.""" append_text = "\nLikes coffee" args = {"label": "human", "content": append_text} @@ -624,7 +622,7 @@ class TestCoreMemoryTools: assert result.func_return is None @pytest.mark.local_sandbox - def test_core_memory_replace_error_local(self, mock_e2b_api_key_none, core_memory_tools, test_user, agent_state): + def test_core_memory_replace_error_local(self, disable_e2b_api_key, core_memory_tools, test_user, agent_state): """Test error handling when trying to replace non-existent content - local sandbox.""" nonexistent_name = "Alexander Wang" args = {"label": "human", "old_content": nonexistent_name, "new_content": "Charles"} diff --git a/tests/test_agent_serialization.py b/tests/test_agent_serialization.py index d73bffb1..2c8b1ddc 100644 --- a/tests/test_agent_serialization.py +++ b/tests/test_agent_serialization.py @@ -476,7 +476,7 @@ def test_agent_serialize_with_user_messages(local_client, server, serialize_test ) -def test_agent_serialize_tool_calls(mock_e2b_api_key_none, local_client, server, serialize_test_agent, default_user, other_user): +def test_agent_serialize_tool_calls(disable_e2b_api_key, local_client, server, serialize_test_agent, default_user, other_user): """Test deserializing JSON into an Agent instance.""" append_copy_suffix = False server.send_messages( @@ -512,7 +512,7 @@ def test_agent_serialize_tool_calls(mock_e2b_api_key_none, local_client, server, assert copy_agent_response.completion_tokens > 0 and copy_agent_response.step_count > 0 -def test_agent_serialize_update_blocks(mock_e2b_api_key_none, local_client, server, serialize_test_agent, default_user, other_user): +def test_agent_serialize_update_blocks(disable_e2b_api_key, local_client, server, serialize_test_agent, default_user, other_user): """Test deserializing JSON into an Agent instance.""" append_copy_suffix = False server.send_messages( diff --git a/tests/test_client.py b/tests/test_client.py index f0da5930..10121100 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -267,7 +267,7 @@ def test_agent_tags(client: Letta): # -------------------------------------------------------------------------------------------------------------------- # Agent memory blocks # -------------------------------------------------------------------------------------------------------------------- -def test_shared_blocks(mock_e2b_api_key_none, client: Letta): +def test_shared_blocks(disable_e2b_api_key, client: Letta): # create a block block = client.blocks.create(label="human", value="username: sarah") diff --git a/tests/test_client_legacy.py b/tests/test_client_legacy.py index 3a3a1b3e..ba164542 100644 --- a/tests/test_client_legacy.py +++ b/tests/test_client_legacy.py @@ -124,7 +124,7 @@ def default_user(default_organization): yield user -def test_agent(mock_e2b_api_key_none, client: Union[LocalClient, RESTClient], agent: AgentState): +def test_agent(disable_e2b_api_key, client: Union[LocalClient, RESTClient], agent: AgentState): # test client.rename_agent new_name = "RenamedTestAgent" @@ -143,7 +143,7 @@ def test_agent(mock_e2b_api_key_none, client: Union[LocalClient, RESTClient], ag assert client.agent_exists(agent_id=delete_agent.id) == False, "Agent deletion failed" -def test_memory(mock_e2b_api_key_none, client: Union[LocalClient, RESTClient], agent: AgentState): +def test_memory(disable_e2b_api_key, client: Union[LocalClient, RESTClient], agent: AgentState): # _reset_config() memory_response = client.get_in_context_memory(agent_id=agent.id) @@ -159,7 +159,7 @@ def test_memory(mock_e2b_api_key_none, client: Union[LocalClient, RESTClient], a ), "Memory update failed" -def test_agent_interactions(mock_e2b_api_key_none, client: Union[LocalClient, RESTClient], agent: AgentState): +def test_agent_interactions(disable_e2b_api_key, client: Union[LocalClient, RESTClient], agent: AgentState): # test that it is a LettaMessage message = "Hello again, agent!" print("Sending message", message) @@ -182,7 +182,7 @@ def test_agent_interactions(mock_e2b_api_key_none, client: Union[LocalClient, RE # TODO: add streaming tests -def test_archival_memory(mock_e2b_api_key_none, client: Union[LocalClient, RESTClient], agent: AgentState): +def test_archival_memory(disable_e2b_api_key, client: Union[LocalClient, RESTClient], agent: AgentState): # _reset_config() memory_content = "Archival memory content" @@ -216,7 +216,7 @@ def test_archival_memory(mock_e2b_api_key_none, client: Union[LocalClient, RESTC client.get_archival_memory(agent.id) -def test_core_memory(mock_e2b_api_key_none, client: Union[LocalClient, RESTClient], agent: AgentState): +def test_core_memory(disable_e2b_api_key, client: Union[LocalClient, RESTClient], agent: AgentState): response = client.send_message(agent_id=agent.id, message="Update your core memory to remember that my name is Timber!", role="user") print("Response", response) @@ -234,7 +234,7 @@ def test_core_memory(mock_e2b_api_key_none, client: Union[LocalClient, RESTClien ], ) def test_streaming_send_message( - mock_e2b_api_key_none, + disable_e2b_api_key, client: RESTClient, agent: AgentState, stream_tokens: bool, diff --git a/tests/test_model_letta_performance.py b/tests/test_model_letta_performance.py index ea9c30ea..41f2da64 100644 --- a/tests/test_model_letta_performance.py +++ b/tests/test_model_letta_performance.py @@ -32,7 +32,7 @@ def test_openai_gpt_4o_returns_valid_first_message(): @pytest.mark.openai_basic @retry_until_success(max_attempts=5, sleep_time_seconds=2) -def test_openai_gpt_4o_uses_external_tool(mock_e2b_api_key_none): +def test_openai_gpt_4o_uses_external_tool(disable_e2b_api_key): filename = os.path.join(llm_config_dir, "openai-gpt-4o.json") response = check_agent_uses_external_tool(filename) # Log out successful response @@ -96,7 +96,7 @@ def test_azure_gpt_4o_mini_returns_valid_first_message(): @pytest.mark.azure_basic @retry_until_success(max_attempts=5, sleep_time_seconds=2) -def test_azure_gpt_4o_mini_uses_external_tool(mock_e2b_api_key_none): +def test_azure_gpt_4o_mini_uses_external_tool(disable_e2b_api_key): filename = os.path.join(llm_config_dir, "azure-gpt-4o-mini.json") response = check_agent_uses_external_tool(filename) # Log out successful response @@ -182,7 +182,7 @@ def test_claude_haiku_3_5_returns_valid_first_message(): @pytest.mark.anthropic_basic @retry_until_success(max_attempts=5, sleep_time_seconds=2) -def test_claude_haiku_3_5_uses_external_tool(mock_e2b_api_key_none): +def test_claude_haiku_3_5_uses_external_tool(disable_e2b_api_key): filename = os.path.join(llm_config_dir, "claude-3-5-haiku.json") response = check_agent_uses_external_tool(filename) # Log out successful response @@ -226,7 +226,7 @@ def test_groq_llama31_70b_returns_valid_first_message(): print(f"Got successful response from client: \n\n{response}") -def test_groq_llama31_70b_uses_external_tool(mock_e2b_api_key_none): +def test_groq_llama31_70b_uses_external_tool(disable_e2b_api_key): filename = os.path.join(llm_config_dir, "groq.json") response = check_agent_uses_external_tool(filename) # Log out successful response @@ -269,7 +269,7 @@ def test_gemini_pro_15_returns_valid_first_message(): @pytest.mark.gemini_basic @retry_until_success(max_attempts=5, sleep_time_seconds=2) -def test_gemini_pro_15_uses_external_tool(mock_e2b_api_key_none): +def test_gemini_pro_15_uses_external_tool(disable_e2b_api_key): filename = os.path.join(llm_config_dir, "gemini-pro.json") response = check_agent_uses_external_tool(filename) # Log out successful response @@ -349,7 +349,7 @@ def test_together_llama_3_70b_returns_valid_first_message(): print(f"Got successful response from client: \n\n{response}") -def test_together_llama_3_70b_uses_external_tool(mock_e2b_api_key_none): +def test_together_llama_3_70b_uses_external_tool(disable_e2b_api_key): filename = os.path.join(llm_config_dir, "together-llama-3-70b.json") response = check_agent_uses_external_tool(filename) # Log out successful response @@ -405,7 +405,7 @@ def test_bedrock_claude_sonnet_3_5_returns_valid_first_message(): @pytest.mark.anthropic_bedrock_basic @retry_until_success(max_attempts=5, sleep_time_seconds=2) -def test_bedrock_claude_sonnet_3_5_uses_external_tool(mock_e2b_api_key_none): +def test_bedrock_claude_sonnet_3_5_uses_external_tool(disable_e2b_api_key): filename = os.path.join(llm_config_dir, "bedrock-claude-3-5-sonnet.json") response = check_agent_uses_external_tool(filename) # Log out successful response diff --git a/tests/test_server.py b/tests/test_server.py index c6db4da6..ec79fed5 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -801,7 +801,7 @@ def ingest(message: str): import pytest -def test_tool_run_basic(server, mock_e2b_api_key_none, user): +def test_tool_run_basic(server, disable_e2b_api_key, user): """Test running a simple tool from source""" result = server.run_tool_from_source( actor=user, @@ -815,7 +815,7 @@ def test_tool_run_basic(server, mock_e2b_api_key_none, user): assert not result.stderr -def test_tool_run_with_env_var(server, mock_e2b_api_key_none, user): +def test_tool_run_with_env_var(server, disable_e2b_api_key, user): """Test running a tool that uses an environment variable""" result = server.run_tool_from_source( actor=user, @@ -830,7 +830,7 @@ def test_tool_run_with_env_var(server, mock_e2b_api_key_none, user): assert not result.stderr -def test_tool_run_invalid_args(server, mock_e2b_api_key_none, user): +def test_tool_run_invalid_args(server, disable_e2b_api_key, user): """Test running a tool with incorrect arguments""" result = server.run_tool_from_source( actor=user, @@ -846,7 +846,7 @@ def test_tool_run_invalid_args(server, mock_e2b_api_key_none, user): assert "missing 1 required positional argument" in result.stderr[0] -def test_tool_run_with_distractor(server, mock_e2b_api_key_none, user): +def test_tool_run_with_distractor(server, disable_e2b_api_key, user): """Test running a tool with a distractor function in the source""" result = server.run_tool_from_source( actor=user, @@ -861,7 +861,7 @@ def test_tool_run_with_distractor(server, mock_e2b_api_key_none, user): assert not result.stderr -def test_tool_run_explicit_tool_name(server, mock_e2b_api_key_none, user): +def test_tool_run_explicit_tool_name(server, disable_e2b_api_key, user): """Test selecting a tool by name when multiple tools exist in the source""" result = server.run_tool_from_source( actor=user, @@ -877,7 +877,7 @@ def test_tool_run_explicit_tool_name(server, mock_e2b_api_key_none, user): assert not result.stderr -def test_tool_run_util_function(server, mock_e2b_api_key_none, user): +def test_tool_run_util_function(server, disable_e2b_api_key, user): """Test selecting a utility function that does not return anything meaningful""" result = server.run_tool_from_source( actor=user, @@ -893,7 +893,7 @@ def test_tool_run_util_function(server, mock_e2b_api_key_none, user): assert not result.stderr -def test_tool_run_with_explicit_json_schema(server, mock_e2b_api_key_none, user): +def test_tool_run_with_explicit_json_schema(server, disable_e2b_api_key, user): """Test overriding the autogenerated JSON schema with an explicit one""" explicit_json_schema = { "name": "ingest", @@ -936,7 +936,7 @@ def test_composio_client_simple(server): assert len(actions) > 0 -def test_memory_rebuild_count(server, user, mock_e2b_api_key_none, base_tools, base_memory_tools): +def test_memory_rebuild_count(server, user, disable_e2b_api_key, base_tools, base_memory_tools): """Test that the memory rebuild is generating the correct number of role=system messages""" actor = user # create agent diff --git a/tests/test_streaming.py b/tests/test_streaming.py index 55300ab5..18432250 100644 --- a/tests/test_streaming.py +++ b/tests/test_streaming.py @@ -59,7 +59,7 @@ def agent(client: Letta): ], ) def test_streaming_send_message( - mock_e2b_api_key_none, + disable_e2b_api_key, client: Letta, agent: AgentState, stream_tokens: bool,