From 7669896184d04e58ebc38e1e8eca696ecb3d58fd Mon Sep 17 00:00:00 2001 From: Sarah Wooders Date: Fri, 2 Jan 2026 15:41:58 -0800 Subject: [PATCH] feat: allow client-side tools to be specified in request (#8220) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: allow client-side tools to be specified in request Add `client_tools` field to LettaRequest to allow passing tool schemas at message creation time without requiring server-side registration. When the agent calls a client-side tool, execution pauses with stop_reason=requires_approval for the client to provide tool returns. - Add ClientToolSchema class for request-level tool schemas - Merge client tools with agent tools in _get_valid_tools() - Treat client-side tool calls as requiring approval - Add integration tests for client-side tools flow 🤖 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta * test: add comprehensive end-to-end test for client-side tools Update integration test to verify the complete flow: - Agent calls client-side tool and pauses - Client provides tool return with secret code - Agent processes and responds - User asks about the code, agent recalls it - Validate full conversation history makes sense 🤖 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta * update apis * fix: client-side tools schema format and test assertions - Use flat schema format for client tools (matching t.json_schema) - Support both object and dict access for client tools - Fix stop_reason assertions to access .stop_reason attribute 🤖 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta * refactor: simplify client_tools access pattern ClientToolSchema objects always have .name attribute 🤖 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta * fix: add client_tools parameter to LettaAgentV2 for API compatibility V2 agent doesn't use client_tools but needs the parameter to match the base class signature. 🤖 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta * revert: remove client_tools from LettaRequestConfig Client-side tools don't work with background jobs since there's no client present to provide tool returns. 🤖 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta * fix: add client_tools parameter to SleeptimeMultiAgent classes Add client_tools to step() and stream() methods in: - SleeptimeMultiAgentV3 - SleeptimeMultiAgentV4 🤖 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta * chore: regenerate API specs for client_tools support 🤖 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta --------- Co-authored-by: Letta --- fern/openapi.json | 98 +++++++ letta/agents/base_agent_v2.py | 15 +- letta/agents/letta_agent_v2.py | 5 + letta/agents/letta_agent_v3.py | 40 ++- letta/groups/sleeptime_multi_agent_v3.py | 5 + letta/groups/sleeptime_multi_agent_v4.py | 5 + letta/schemas/letta_request.py | 22 +- letta/server/rest_api/routers/v1/agents.py | 1 + letta/services/streaming_service.py | 5 +- tests/integration_test_client_side_tools.py | 268 ++++++++++++++++++++ 10 files changed, 458 insertions(+), 6 deletions(-) create mode 100644 tests/integration_test_client_side_tools.py diff --git a/fern/openapi.json b/fern/openapi.json index 3f8b1ca9..633914df 100644 --- a/fern/openapi.json +++ b/fern/openapi.json @@ -25760,6 +25760,44 @@ "title": "ChoiceLogprobs", "description": "Log probability information for the choice." }, + "ClientToolSchema": { + "properties": { + "name": { + "type": "string", + "title": "Name", + "description": "The name of the tool function" + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "Description of what the tool does" + }, + "parameters": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Parameters", + "description": "JSON Schema for the function parameters" + } + }, + "type": "object", + "required": ["name"], + "title": "ClientToolSchema", + "description": "Schema for a client-side tool passed in the request.\n\nClient-side tools are executed by the client, not the server. When the agent\ncalls a client-side tool, execution pauses and returns control to the client\nto execute the tool and provide the result." + }, "CodeInput": { "properties": { "code": { @@ -32338,6 +32376,21 @@ "default": true, "deprecated": true }, + "client_tools": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/ClientToolSchema" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Client Tools", + "description": "Client-side tools that the agent can call. When the agent calls a client-side tool, execution pauses and returns control to the client to execute the tool and provide the result via a ToolReturn." + }, "callback_url": { "anyOf": [ { @@ -32497,6 +32550,21 @@ "default": true, "deprecated": true }, + "client_tools": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/ClientToolSchema" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Client Tools", + "description": "Client-side tools that the agent can call. When the agent calls a client-side tool, execution pauses and returns control to the client to execute the tool and provide the result via a ToolReturn." + }, "agent_id": { "type": "string", "title": "Agent Id", @@ -32740,6 +32808,21 @@ "description": "If set to True, enables reasoning before responses or tool calls from the agent.", "default": true, "deprecated": true + }, + "client_tools": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/ClientToolSchema" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Client Tools", + "description": "Client-side tools that the agent can call. When the agent calls a client-side tool, execution pauses and returns control to the client to execute the tool and provide the result via a ToolReturn." } }, "type": "object", @@ -32955,6 +33038,21 @@ "default": true, "deprecated": true }, + "client_tools": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/ClientToolSchema" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Client Tools", + "description": "Client-side tools that the agent can call. When the agent calls a client-side tool, execution pauses and returns control to the client to execute the tool and provide the result via a ToolReturn." + }, "streaming": { "type": "boolean", "title": "Streaming", diff --git a/letta/agents/base_agent_v2.py b/letta/agents/base_agent_v2.py index c6a3a00d..847db90f 100644 --- a/letta/agents/base_agent_v2.py +++ b/letta/agents/base_agent_v2.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import AsyncGenerator +from typing import TYPE_CHECKING, AsyncGenerator from letta.constants import DEFAULT_MAX_STEPS from letta.log import get_logger @@ -10,6 +10,9 @@ from letta.schemas.letta_response import LettaResponse from letta.schemas.message import MessageCreate from letta.schemas.user import User +if TYPE_CHECKING: + from letta.schemas.letta_request import ClientToolSchema + class BaseAgentV2(ABC): """ @@ -42,9 +45,14 @@ class BaseAgentV2(ABC): use_assistant_message: bool = True, include_return_message_types: list[MessageType] | None = None, request_start_timestamp_ns: int | None = None, + client_tools: list["ClientToolSchema"] | None = None, ) -> LettaResponse: """ Execute the agent loop in blocking mode, returning all messages at once. + + Args: + client_tools: Optional list of client-side tools. When called, execution pauses + for client to provide tool returns. """ raise NotImplementedError @@ -58,11 +66,16 @@ class BaseAgentV2(ABC): use_assistant_message: bool = True, include_return_message_types: list[MessageType] | None = None, request_start_timestamp_ns: int | None = None, + client_tools: list["ClientToolSchema"] | None = None, ) -> AsyncGenerator[LettaMessage | LegacyLettaMessage | MessageStreamStatus, None]: """ Execute the agent loop in streaming mode, yielding chunks as they become available. If stream_tokens is True, individual tokens are streamed as they arrive from the LLM, providing the lowest latency experience, otherwise each complete step (reasoning + tool call + tool return) is yielded as it completes. + + Args: + client_tools: Optional list of client-side tools. When called, execution pauses + for client to provide tool returns. """ raise NotImplementedError diff --git a/letta/agents/letta_agent_v2.py b/letta/agents/letta_agent_v2.py index c8a71172..46974cec 100644 --- a/letta/agents/letta_agent_v2.py +++ b/letta/agents/letta_agent_v2.py @@ -34,6 +34,7 @@ from letta.schemas.agent import AgentState, UpdateAgent from letta.schemas.enums import AgentType, MessageStreamStatus, RunStatus, StepStatus from letta.schemas.letta_message import LettaMessage, MessageType from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent +from letta.schemas.letta_request import ClientToolSchema from letta.schemas.letta_response import LettaResponse from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType from letta.schemas.message import Message, MessageCreate, MessageUpdate @@ -173,6 +174,7 @@ class LettaAgentV2(BaseAgentV2): use_assistant_message: bool = True, include_return_message_types: list[MessageType] | None = None, request_start_timestamp_ns: int | None = None, + client_tools: list[ClientToolSchema] | None = None, ) -> LettaResponse: """ Execute the agent loop in blocking mode, returning all messages at once. @@ -184,6 +186,7 @@ class LettaAgentV2(BaseAgentV2): use_assistant_message: Whether to use assistant message format include_return_message_types: Filter for which message types to return request_start_timestamp_ns: Start time for tracking request duration + client_tools: Optional list of client-side tools (not used in V2, for API compatibility) Returns: LettaResponse: Complete response with all messages and metadata @@ -251,6 +254,7 @@ class LettaAgentV2(BaseAgentV2): use_assistant_message: bool = True, include_return_message_types: list[MessageType] | None = None, request_start_timestamp_ns: int | None = None, + client_tools: list[ClientToolSchema] | None = None, ) -> AsyncGenerator[str, None]: """ Execute the agent loop in streaming mode, yielding chunks as they become available. @@ -268,6 +272,7 @@ class LettaAgentV2(BaseAgentV2): use_assistant_message: Whether to use assistant message format include_return_message_types: Filter for which message types to return request_start_timestamp_ns: Start time for tracking request duration + client_tools: Optional list of client-side tools (not used in V2, for API compatibility) Yields: str: JSON-formatted SSE data chunks for each completed step diff --git a/letta/agents/letta_agent_v3.py b/letta/agents/letta_agent_v3.py index abd583fb..06e6d508 100644 --- a/letta/agents/letta_agent_v3.py +++ b/letta/agents/letta_agent_v3.py @@ -31,6 +31,7 @@ from letta.schemas.agent import AgentState from letta.schemas.enums import MessageRole from letta.schemas.letta_message import ApprovalReturn, LettaErrorMessage, LettaMessage, MessageType from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent +from letta.schemas.letta_request import ClientToolSchema from letta.schemas.letta_response import LettaResponse from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType from letta.schemas.llm_config import LLMConfig @@ -79,6 +80,8 @@ class LettaAgentV3(LettaAgentV2): # affecting step-level telemetry. self.context_token_estimate: int | None = None self.in_context_messages: list[Message] = [] # in-memory tracker + # Client-side tools passed in the request (executed by client, not server) + self.client_tools: list[ClientToolSchema] = [] def _compute_tool_return_truncation_chars(self) -> int: """Compute a dynamic cap for tool returns in requests. @@ -101,6 +104,7 @@ class LettaAgentV3(LettaAgentV2): use_assistant_message: bool = True, # NOTE: not used include_return_message_types: list[MessageType] | None = None, request_start_timestamp_ns: int | None = None, + client_tools: list[ClientToolSchema] | None = None, ) -> LettaResponse: """ Execute the agent loop in blocking mode, returning all messages at once. @@ -112,11 +116,14 @@ class LettaAgentV3(LettaAgentV2): use_assistant_message: Whether to use assistant message format include_return_message_types: Filter for which message types to return request_start_timestamp_ns: Start time for tracking request duration + client_tools: Optional list of client-side tools. When called, execution pauses + for client to provide tool returns. Returns: LettaResponse: Complete response with all messages and metadata """ self._initialize_state() + self.client_tools = client_tools or [] request_span = self._request_checkpoint_start(request_start_timestamp_ns=request_start_timestamp_ns) response_letta_messages = [] @@ -234,6 +241,7 @@ class LettaAgentV3(LettaAgentV2): use_assistant_message: bool = True, # NOTE: not used include_return_message_types: list[MessageType] | None = None, request_start_timestamp_ns: int | None = None, + client_tools: list[ClientToolSchema] | None = None, ) -> AsyncGenerator[str, None]: """ Execute the agent loop in streaming mode, yielding chunks as they become available. @@ -251,11 +259,14 @@ class LettaAgentV3(LettaAgentV2): use_assistant_message: Whether to use assistant message format include_return_message_types: Filter for which message types to return request_start_timestamp_ns: Start time for tracking request duration + client_tools: Optional list of client-side tools. When called, execution pauses + for client to provide tool returns. Yields: str: JSON-formatted SSE data chunks for each completed step """ self._initialize_state() + self.client_tools = client_tools or [] request_span = self._request_checkpoint_start(request_start_timestamp_ns=request_start_timestamp_ns) response_letta_messages = [] first_chunk = True @@ -973,10 +984,22 @@ class LettaAgentV3(LettaAgentV2): messages_to_persist = (initial_messages or []) + assistant_message return messages_to_persist, continue_stepping, stop_reason - # 2. Check whether tool call requires approval + # 2. Check whether tool call requires approval (includes client-side tools) if not is_approval_response: - requested_tool_calls = [t for t in tool_calls if tool_rules_solver.is_requires_approval_tool(t.function.name)] - allowed_tool_calls = [t for t in tool_calls if not tool_rules_solver.is_requires_approval_tool(t.function.name)] + # Get names of client-side tools (these are executed by client, not server) + client_tool_names = {ct.name for ct in self.client_tools} if self.client_tools else set() + + # Tools requiring approval: requires_approval tools OR client-side tools + requested_tool_calls = [ + t + for t in tool_calls + if tool_rules_solver.is_requires_approval_tool(t.function.name) or t.function.name in client_tool_names + ] + allowed_tool_calls = [ + t + for t in tool_calls + if not tool_rules_solver.is_requires_approval_tool(t.function.name) and t.function.name not in client_tool_names + ] if requested_tool_calls: approval_messages = create_approval_request_message_from_llm_response( agent_id=self.agent_state.id, @@ -1327,6 +1350,17 @@ class LettaAgentV3(LettaAgentV2): error_on_empty=False, # Return empty list instead of raising error ) or list(set(t.name for t in tools)) allowed_tools = [enable_strict_mode(t.json_schema) for t in tools if t.name in set(valid_tool_names)] + + # Merge client-side tools (use flat format matching enable_strict_mode output) + if self.client_tools: + for ct in self.client_tools: + client_tool_schema = { + "name": ct.name, + "description": ct.description, + "parameters": ct.parameters or {"type": "object", "properties": {}}, + } + allowed_tools.append(client_tool_schema) + terminal_tool_names = {rule.tool_name for rule in self.tool_rules_solver.terminal_tool_rules} allowed_tools = runtime_override_tool_json_schema( tool_list=allowed_tools, diff --git a/letta/groups/sleeptime_multi_agent_v3.py b/letta/groups/sleeptime_multi_agent_v3.py index 9a1d338a..6c4ea830 100644 --- a/letta/groups/sleeptime_multi_agent_v3.py +++ b/letta/groups/sleeptime_multi_agent_v3.py @@ -12,6 +12,7 @@ from letta.schemas.group import Group, ManagerType from letta.schemas.job import JobUpdate from letta.schemas.letta_message import MessageType from letta.schemas.letta_message_content import TextContent +from letta.schemas.letta_request import ClientToolSchema from letta.schemas.letta_response import LettaResponse from letta.schemas.letta_stop_reason import StopReasonType from letta.schemas.message import Message, MessageCreate @@ -45,6 +46,7 @@ class SleeptimeMultiAgentV3(LettaAgentV2): use_assistant_message: bool = False, include_return_message_types: list[MessageType] | None = None, request_start_timestamp_ns: int | None = None, + client_tools: list[ClientToolSchema] | None = None, ) -> LettaResponse: self.run_ids = [] @@ -58,6 +60,7 @@ class SleeptimeMultiAgentV3(LettaAgentV2): use_assistant_message=use_assistant_message, include_return_message_types=include_return_message_types, request_start_timestamp_ns=request_start_timestamp_ns, + client_tools=client_tools, ) await self.run_sleeptime_agents() @@ -75,6 +78,7 @@ class SleeptimeMultiAgentV3(LettaAgentV2): use_assistant_message: bool = True, request_start_timestamp_ns: int | None = None, include_return_message_types: list[MessageType] | None = None, + client_tools: list[ClientToolSchema] | None = None, ) -> AsyncGenerator[str, None]: self.run_ids = [] @@ -91,6 +95,7 @@ class SleeptimeMultiAgentV3(LettaAgentV2): use_assistant_message=use_assistant_message, include_return_message_types=include_return_message_types, request_start_timestamp_ns=request_start_timestamp_ns, + client_tools=client_tools, ): yield chunk finally: diff --git a/letta/groups/sleeptime_multi_agent_v4.py b/letta/groups/sleeptime_multi_agent_v4.py index bd4c9cf1..c1a8e68b 100644 --- a/letta/groups/sleeptime_multi_agent_v4.py +++ b/letta/groups/sleeptime_multi_agent_v4.py @@ -12,6 +12,7 @@ from letta.schemas.group import Group, ManagerType from letta.schemas.job import JobUpdate from letta.schemas.letta_message import MessageType from letta.schemas.letta_message_content import TextContent +from letta.schemas.letta_request import ClientToolSchema from letta.schemas.letta_response import LettaResponse from letta.schemas.letta_stop_reason import StopReasonType from letta.schemas.message import Message, MessageCreate @@ -45,6 +46,7 @@ class SleeptimeMultiAgentV4(LettaAgentV3): use_assistant_message: bool = True, include_return_message_types: list[MessageType] | None = None, request_start_timestamp_ns: int | None = None, + client_tools: list[ClientToolSchema] | None = None, ) -> LettaResponse: self.run_ids = [] @@ -58,6 +60,7 @@ class SleeptimeMultiAgentV4(LettaAgentV3): use_assistant_message=use_assistant_message, include_return_message_types=include_return_message_types, request_start_timestamp_ns=request_start_timestamp_ns, + client_tools=client_tools, ) run_ids = await self.run_sleeptime_agents() @@ -74,6 +77,7 @@ class SleeptimeMultiAgentV4(LettaAgentV3): use_assistant_message: bool = True, request_start_timestamp_ns: int | None = None, include_return_message_types: list[MessageType] | None = None, + client_tools: list[ClientToolSchema] | None = None, ) -> AsyncGenerator[str, None]: self.run_ids = [] @@ -90,6 +94,7 @@ class SleeptimeMultiAgentV4(LettaAgentV3): use_assistant_message=use_assistant_message, include_return_message_types=include_return_message_types, request_start_timestamp_ns=request_start_timestamp_ns, + client_tools=client_tools, ): yield chunk finally: diff --git a/letta/schemas/letta_request.py b/letta/schemas/letta_request.py index 7d4d2c18..207bee9e 100644 --- a/letta/schemas/letta_request.py +++ b/letta/schemas/letta_request.py @@ -1,5 +1,5 @@ import uuid -from typing import List, Optional, Union +from typing import Any, Dict, List, Optional, Union from pydantic import BaseModel, Field, HttpUrl, field_validator, model_validator @@ -9,6 +9,19 @@ from letta.schemas.letta_message_content import LettaMessageContentUnion from letta.schemas.message import MessageCreate, MessageCreateUnion, MessageRole +class ClientToolSchema(BaseModel): + """Schema for a client-side tool passed in the request. + + Client-side tools are executed by the client, not the server. When the agent + calls a client-side tool, execution pauses and returns control to the client + to execute the tool and provide the result. + """ + + name: str = Field(..., description="The name of the tool function") + description: Optional[str] = Field(None, description="Description of what the tool does") + parameters: Optional[Dict[str, Any]] = Field(None, description="JSON Schema for the function parameters") + + class LettaRequest(BaseModel): messages: Optional[List[MessageCreateUnion]] = Field(None, description="The messages to be sent to the agent.") input: Optional[Union[str, List[LettaMessageContentUnion]]] = Field( @@ -45,6 +58,13 @@ class LettaRequest(BaseModel): deprecated=True, ) + # Client-side tools + client_tools: Optional[List[ClientToolSchema]] = Field( + None, + description="Client-side tools that the agent can call. When the agent calls a client-side tool, " + "execution pauses and returns control to the client to execute the tool and provide the result via a ToolReturn.", + ) + @field_validator("messages", mode="before") @classmethod def add_default_type_to_messages(cls, v): diff --git a/letta/server/rest_api/routers/v1/agents.py b/letta/server/rest_api/routers/v1/agents.py index 75236944..8c7d68e0 100644 --- a/letta/server/rest_api/routers/v1/agents.py +++ b/letta/server/rest_api/routers/v1/agents.py @@ -1560,6 +1560,7 @@ async def send_message( use_assistant_message=request.use_assistant_message, request_start_timestamp_ns=request_start_timestamp_ns, include_return_message_types=request.include_return_message_types, + client_tools=request.client_tools, ) else: result = await server.send_message_to_agent( diff --git a/letta/services/streaming_service.py b/letta/services/streaming_service.py index 081051a2..f39f30a1 100644 --- a/letta/services/streaming_service.py +++ b/letta/services/streaming_service.py @@ -29,7 +29,7 @@ from letta.schemas.enums import AgentType, MessageStreamStatus, RunStatus from letta.schemas.job import LettaRequestConfig from letta.schemas.letta_message import AssistantMessage, LettaErrorMessage, MessageType from letta.schemas.letta_message_content import TextContent -from letta.schemas.letta_request import LettaStreamingRequest +from letta.schemas.letta_request import ClientToolSchema, LettaStreamingRequest from letta.schemas.letta_response import LettaResponse from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType from letta.schemas.message import MessageCreate @@ -123,6 +123,7 @@ class StreamingService: request_start_timestamp_ns=request_start_timestamp_ns, include_return_message_types=request.include_return_message_types, actor=actor, + client_tools=request.client_tools, ) # handle background streaming if requested @@ -287,6 +288,7 @@ class StreamingService: request_start_timestamp_ns: int, include_return_message_types: Optional[list[MessageType]], actor: User, + client_tools: Optional[list[ClientToolSchema]] = None, ) -> AsyncIterator: """ Create a stream with unified error handling. @@ -313,6 +315,7 @@ class StreamingService: use_assistant_message=use_assistant_message, request_start_timestamp_ns=request_start_timestamp_ns, include_return_message_types=include_return_message_types, + client_tools=client_tools, ) async for chunk in stream: diff --git a/tests/integration_test_client_side_tools.py b/tests/integration_test_client_side_tools.py new file mode 100644 index 00000000..2d29e6c0 --- /dev/null +++ b/tests/integration_test_client_side_tools.py @@ -0,0 +1,268 @@ +""" +Integration tests for client-side tools passed in the request. + +These tests verify that: +1. Client-side tools can be specified in the request without pre-registration on the server +2. When the agent calls a client-side tool, execution pauses (stop_reason=requires_approval) +3. Client can provide tool returns via the approval response mechanism +4. Agent continues execution after receiving tool returns +""" + +import uuid + +import pytest +from letta_client import Letta + +# ------------------------------ +# Constants +# ------------------------------ + +SECRET_CODE = "CLIENT_SIDE_SECRET_12345" + +# Models to test - both Anthropic and OpenAI +TEST_MODELS = [ + "anthropic/claude-sonnet-4-5-20250929", + "openai/gpt-4o-mini", +] + + +def get_client_tool_schema(): + """Returns a client-side tool schema for testing.""" + return { + "name": "get_secret_code", + "description": "Returns a secret code for the given input text. This tool is executed client-side. You MUST call this tool when the user asks for a secret code.", + "parameters": { + "type": "object", + "properties": { + "input_text": { + "type": "string", + "description": "The input text to process", + } + }, + "required": ["input_text"], + }, + } + + +# ------------------------------ +# Fixtures +# ------------------------------ + + +@pytest.fixture +def client(server_url: str) -> Letta: + """Create a Letta client.""" + return Letta(base_url=server_url) + + +# ------------------------------ +# Test Cases +# ------------------------------ + + +class TestClientSideTools: + """Test client-side tools using the SDK client.""" + + @pytest.mark.parametrize("model", TEST_MODELS) + def test_client_side_tool_full_flow(self, client: Letta, model: str) -> None: + """ + Test the complete end-to-end flow: + 1. User asks agent to get a secret code + 2. Agent calls client-side tool, execution pauses + 3. Client provides the tool return with the secret code + 4. Agent processes the result and continues execution + 5. User asks what the code was + 6. Agent recalls and reports the secret code + """ + # Create agent for this test + agent = client.agents.create( + name=f"client_tools_test_{uuid.uuid4().hex[:8]}", + model=model, + embedding="openai/text-embedding-3-small", + include_base_tools=False, + tool_ids=[], + include_base_tool_rules=False, + tool_rules=[], + ) + + try: + tool_schema = get_client_tool_schema() + print(f"\n=== Testing with model: {model} ===") + + # Step 1: User asks for the secret code - agent should call the tool + print("\nStep 1: Asking agent to call get_secret_code tool...") + response1 = client.agents.messages.create( + agent_id=agent.id, + messages=[{"role": "user", "content": "Please call the get_secret_code tool with input 'hello world'."}], + client_tools=[tool_schema], + ) + + # Validate Step 1: Should pause with approval request + assert response1.stop_reason.stop_reason == "requires_approval", f"Expected requires_approval, got {response1.stop_reason}" + assert response1.messages[-1].message_type == "approval_request_message" + assert response1.messages[-1].tool_call is not None + assert response1.messages[-1].tool_call.name == "get_secret_code" + + tool_call_id = response1.messages[-1].tool_call.tool_call_id + print(f" ✓ Agent called get_secret_code tool (call_id: {tool_call_id})") + + # Step 2: Provide the tool return (simulating client-side execution) + print(f"\nStep 2: Providing tool return with secret code: {SECRET_CODE}") + response2 = client.agents.messages.create( + agent_id=agent.id, + messages=[ + { + "type": "approval", + "approvals": [ + { + "type": "tool", + "tool_call_id": tool_call_id, + "tool_return": SECRET_CODE, + "status": "success", + } + ], + } + ], + client_tools=[tool_schema], + ) + + # Validate Step 2: Agent should receive tool return and CONTINUE execution + assert response2.messages is not None + assert len(response2.messages) >= 1 + + # First message should be the tool return + assert response2.messages[0].message_type == "tool_return_message" + assert response2.messages[0].status == "success" + assert response2.messages[0].tool_return == SECRET_CODE + print(" ✓ Tool return message received with secret code") + + # Agent should continue and eventually end turn (not require more approval) + assert response2.stop_reason.stop_reason in [ + "end_turn", + "tool_rule", + "max_steps", + ], f"Expected end_turn/tool_rule/max_steps, got {response2.stop_reason}" + print(f" ✓ Agent continued execution (stop_reason: {response2.stop_reason})") + + # Check that agent produced a response after the tool return + assistant_messages_step2 = [msg for msg in response2.messages if msg.message_type == "assistant_message"] + assert len(assistant_messages_step2) > 0, "Agent should produce an assistant message after receiving tool return" + print(f" ✓ Agent produced {len(assistant_messages_step2)} assistant message(s)") + + # Step 3: Ask the agent what the secret code was (testing memory/context) + print("\nStep 3: Asking agent to recall the secret code...") + response3 = client.agents.messages.create( + agent_id=agent.id, + messages=[{"role": "user", "content": "What was the exact secret code that the tool returned? Please repeat it."}], + client_tools=[tool_schema], + ) + + # Validate Step 3: Agent should recall and report the secret code + assert response3.stop_reason.stop_reason in ["end_turn", "tool_rule", "max_steps"] + + # Find the assistant message in the response + assistant_messages = [msg for msg in response3.messages if msg.message_type == "assistant_message"] + assert len(assistant_messages) > 0, "Agent should have responded with an assistant message" + + # The agent should mention the secret code in its response + assistant_content = " ".join([msg.content for msg in assistant_messages if msg.content]) + print(f" ✓ Agent response: {assistant_content[:200]}...") + assert SECRET_CODE in assistant_content, f"Agent should mention '{SECRET_CODE}' in response. Got: {assistant_content}" + print(" ✓ Agent correctly recalled the secret code!") + + # Step 4: Validate the full conversation history makes sense + print("\nStep 4: Validating conversation history...") + all_messages = client.agents.messages.list(agent_id=agent.id, limit=100).items + message_types = [msg.message_type for msg in all_messages] + + assert "user_message" in message_types, "Should have user messages" + assert "tool_return_message" in message_types, "Should have tool return message" + assert "assistant_message" in message_types, "Should have assistant messages" + + # Verify the tool return message contains our secret code + tool_return_msgs = [msg for msg in all_messages if msg.message_type == "tool_return_message"] + assert any(msg.tool_return == SECRET_CODE for msg in tool_return_msgs), "Tool return should contain secret code" + + print(f"\n✓ Full flow validated successfully for {model}!") + + finally: + # Cleanup + client.agents.delete(agent_id=agent.id) + + @pytest.mark.parametrize("model", TEST_MODELS) + def test_client_side_tool_error_return(self, client: Letta, model: str) -> None: + """ + Test providing an error status for a client-side tool return. + The agent should handle the error gracefully and continue execution. + """ + # Create agent for this test + agent = client.agents.create( + name=f"client_tools_error_test_{uuid.uuid4().hex[:8]}", + model=model, + embedding="openai/text-embedding-3-small", + include_base_tools=False, + tool_ids=[], + include_base_tool_rules=False, + tool_rules=[], + ) + + try: + tool_schema = get_client_tool_schema() + print(f"\n=== Testing error return with model: {model} ===") + + # Step 1: Trigger the client-side tool call + print("\nStep 1: Triggering tool call...") + response1 = client.agents.messages.create( + agent_id=agent.id, + messages=[{"role": "user", "content": "Please call the get_secret_code tool with input 'hello'."}], + client_tools=[tool_schema], + ) + + assert response1.stop_reason.stop_reason == "requires_approval" + tool_call_id = response1.messages[-1].tool_call.tool_call_id + print(f" ✓ Agent called tool (call_id: {tool_call_id})") + + # Step 2: Provide an error response + error_message = "Error: Unable to compute secret code - service unavailable" + print(f"\nStep 2: Providing error response: {error_message}") + response2 = client.agents.messages.create( + agent_id=agent.id, + messages=[ + { + "type": "approval", + "approvals": [ + { + "type": "tool", + "tool_call_id": tool_call_id, + "tool_return": error_message, + "status": "error", + } + ], + } + ], + client_tools=[tool_schema], + ) + + messages = response2.messages + + assert messages is not None + assert messages[0].message_type == "tool_return_message" + assert messages[0].status == "error" + print(" ✓ Tool return shows error status") + + # Agent should continue execution even after error + assert response2.stop_reason.stop_reason in ["end_turn", "tool_rule", "max_steps"], ( + f"Expected agent to continue, got {response2.stop_reason}" + ) + print(f" ✓ Agent continued execution after error (stop_reason: {response2.stop_reason})") + + # Agent should have produced a response acknowledging the error + assistant_messages = [msg for msg in messages if msg.message_type == "assistant_message"] + assert len(assistant_messages) > 0, "Agent should respond after receiving error" + print(" ✓ Agent produced response after error") + + print(f"\n✓ Error handling validated successfully for {model}!") + + finally: + # Cleanup + client.agents.delete(agent_id=agent.id)