feat: Add parallel tool call streaming for anthropic [LET-4601] (#5225)

* wip * Fix parallel tool calling interface * wip * wip adapt using id field * Integrate new multi tool return schemas into parallel tool calling * Remove example script * Reset changes to llm stream adapter since old agent loop should not enable parallel tool calling * Clean up fallback logic for extracting tool calls * Remove redundant check * Simplify logic * Clean up logic in handle ai response * Fix tests * Write anthropic dict conversion to be back compatible * wip * Double write tool call id for legacy reasons * Fix override args failures * Patch for approvals * Revert comments * Remove extraneous prints
2025-10-10 15:16:08 -07:00
parent 1c285f5170
commit bb8a7889e0
6 changed files with 563 additions and 310 deletions
--- a/letta/adapters/letta_llm_adapter.py
+++ b/letta/adapters/letta_llm_adapter.py
@@ -30,6 +30,7 @@ class LettaLLMAdapter(ABC):
        self.reasoning_content: list[TextContent | ReasoningContent | RedactedReasoningContent] | None = None
        self.content: list[TextContent | ReasoningContent | RedactedReasoningContent] | None = None
        self.tool_call: ToolCall | None = None
+        self.tool_calls: list[ToolCall] = []
        self.usage: LettaUsageStatistics = LettaUsageStatistics()
        self.telemetry_manager: TelemetryManager = TelemetryManager()
        self.llm_request_finish_timestamp_ns: int | None = None
--- a/letta/adapters/simple_llm_stream_adapter.py
+++ b/letta/adapters/simple_llm_stream_adapter.py
@@ -25,6 +25,24 @@ class SimpleLLMStreamAdapter(LettaLLMStreamAdapter):
    specific streaming formats.
    """

+    def _extract_tool_calls(self) -> list:
+        """extract tool calls from interface, trying parallel API first then single API"""
+        # try multi-call api if available
+        if hasattr(self.interface, "get_tool_call_objects"):
+            try:
+                calls = self.interface.get_tool_call_objects()
+                if calls:
+                    return calls
+            except Exception:
+                pass
+
+        # fallback to single-call api
+        try:
+            single = self.interface.get_tool_call_object()
+            return [single] if single else []
+        except Exception:
+            return []
+
    async def invoke_llm(
        self,
        request_data: dict,
@@ -102,12 +120,10 @@ class SimpleLLMStreamAdapter(LettaLLMStreamAdapter):
        # After streaming completes, extract the accumulated data
        self.llm_request_finish_timestamp_ns = get_utc_timestamp_ns()

-        # Extract tool call from the interface
-        try:
-            self.tool_call = self.interface.get_tool_call_object()
-        except ValueError as e:
-            # No tool call, handle upstream
-            self.tool_call = None
+        # extract tool calls from interface (supports both single and parallel calls)
+        self.tool_calls = self._extract_tool_calls()
+        # preserve legacy single-call field for existing consumers
+        self.tool_call = self.tool_calls[-1] if self.tool_calls else None

        # Extract reasoning content from the interface
        # TODO this should probably just be called "content"?
--- a/letta/agents/letta_agent_v3.py
+++ b/letta/agents/letta_agent_v3.py
@@ -1,3 +1,4 @@
+import asyncio
 import uuid
 from typing import AsyncGenerator, Optional

@@ -34,7 +35,11 @@ from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatist
 from letta.schemas.step import StepProgression
 from letta.schemas.step_metrics import StepMetrics
 from letta.schemas.tool_execution_result import ToolExecutionResult
-from letta.server.rest_api.utils import create_approval_request_message_from_llm_response, create_letta_messages_from_llm_response
+from letta.server.rest_api.utils import (
+    create_approval_request_message_from_llm_response,
+    create_letta_messages_from_llm_response,
+    create_parallel_tool_messages_from_llm_response,
+)
 from letta.services.helpers.tool_parser_helper import runtime_override_tool_json_schema
 from letta.settings import settings, summarizer_settings
 from letta.system import package_function_response
@@ -324,6 +329,28 @@ class LettaAgentV3(LettaAgentV2):
                            force_tool_call=force_tool_call,
                            requires_subsequent_tool_call=self._require_tool_call,
                        )
+                        # TODO: Extend to more providers, and also approval tool rules
+                        # Enable Anthropic parallel tool use when no tool rules are attached
+                        try:
+                            if self.agent_state.llm_config.model_endpoint_type in ["anthropic", "bedrock"]:
+                                no_tool_rules = not self.agent_state.tool_rules or len(self.agent_state.tool_rules) == 0
+                                requires_approval = self.tool_rules_solver.get_requires_approval_tools(
+                                    set([t["name"] for t in valid_tools])
+                                )
+                                has_approval_tools = len(requires_approval) > 0
+                                if (
+                                    isinstance(request_data.get("tool_choice"), dict)
+                                    and "disable_parallel_tool_use" in request_data["tool_choice"]
+                                ):
+                                    # Gate parallel tool use on both: no tool rules and no approval-required tools
+                                    if no_tool_rules and not has_approval_tools:
+                                        request_data["tool_choice"]["disable_parallel_tool_use"] = False
+                                    else:
+                                        # Explicitly disable when approvals exist (TODO support later) or tool rules present
+                                        request_data["tool_choice"]["disable_parallel_tool_use"] = True
+                        except Exception:
+                            # if this fails, we simply don't enable parallel tool use
+                            pass
                        if dry_run:
                            yield request_data
                            return
@@ -372,12 +399,31 @@ class LettaAgentV3(LettaAgentV2):

                self._update_global_usage_stats(llm_adapter.usage)

-            # Handle the AI response with the extracted data
-            # NOTE: in v3 loop, no tool call is OK
-            # if tool_call is None and llm_adapter.tool_call is None:
+            # Handle the AI response with the extracted data (supports multiple tool calls)
+            # Gather tool calls. Approval paths specify a single tool call.
+            tool_calls_list: list[ToolCall] = []
+            if tool_call is not None:
+                tool_calls_list = [tool_call]
+            else:
+                # Prefer the new multi-call field from streaming adapters
+                if hasattr(llm_adapter, "tool_calls") and llm_adapter.tool_calls:
+                    tool_calls_list = llm_adapter.tool_calls
+                elif llm_adapter.tool_call is not None:
+                    tool_calls_list = [llm_adapter.tool_call]
+                else:
+                    tool_calls_list = []
+
+            aggregated_persisted: list[Message] = []
+            tool_return_payload = (
+                approval_response.approvals[0]
+                if approval_response and approval_response.approvals and isinstance(approval_response.approvals[0], ToolReturn)
+                else None
+            )
+            primary_tool_call = tool_calls_list[0] if len(tool_calls_list) == 1 else None

            persisted_messages, self.should_continue, self.stop_reason = await self._handle_ai_response(
-                tool_call=tool_call or llm_adapter.tool_call,
+                tool_call=primary_tool_call,
+                tool_calls=tool_calls_list,
                valid_tool_names=[tool["name"] for tool in valid_tools],
                agent_state=self.agent_state,
                tool_rules_solver=self.tool_rules_solver,
@@ -386,7 +432,6 @@ class LettaAgentV3(LettaAgentV2):
                    prompt_tokens=self.usage.prompt_tokens,
                    total_tokens=self.usage.total_tokens,
                ),
-                # reasoning_content=reasoning_content or llm_adapter.reasoning_content,
                content=content or llm_adapter.content,
                pre_computed_assistant_message_id=llm_adapter.message_id,
                step_id=step_id,
@@ -398,26 +443,26 @@ class LettaAgentV3(LettaAgentV2):
                is_approval=approval_response.approve if approval_response is not None else False,
                is_denial=(approval_response.approve == False) if approval_response is not None else False,
                denial_reason=approval_response.denial_reason if approval_response is not None else None,
-                tool_return=approval_response.approvals[0]
-                if approval_response and approval_response.approvals and isinstance(approval_response.approvals[0], ToolReturn)
-                else None,
+                tool_return=tool_return_payload,
            )
+            aggregated_persisted.extend(persisted_messages)
            # NOTE: there is an edge case where persisted_messages is empty (the LLM did a "no-op")

            new_message_idx = len(input_messages_to_persist) if input_messages_to_persist else 0
-            self.response_messages.extend(persisted_messages[new_message_idx:])
+            self.response_messages.extend(aggregated_persisted[new_message_idx:])

            if llm_adapter.supports_token_streaming():
-                # Stream the tool return if a tool was actually executed.
-                # In the normal streaming path, the tool call is surfaced via the streaming interface
-                # (llm_adapter.tool_call), so don't rely solely on the local `tool_call` variable.
-                has_tool_return = any(m.role == "tool" for m in persisted_messages)
-                if len(persisted_messages) > 0 and persisted_messages[-1].role != "approval" and has_tool_return:
-                    tool_return = [msg for msg in persisted_messages if msg.role == "tool"][-1].to_letta_messages()[0]
-                    if include_return_message_types is None or tool_return.message_type in include_return_message_types:
-                        yield tool_return
+                # Stream each tool return if tools were executed
+                tool_returns = [msg for msg in aggregated_persisted if msg.role == "tool"]
+                for tr in tool_returns:
+                    # Skip streaming for aggregated parallel tool returns (no per-call tool_call_id)
+                    if tr.tool_call_id is None and tr.tool_returns:
+                        continue
+                    tool_return_letta = tr.to_letta_messages()[0]
+                    if include_return_message_types is None or tool_return_letta.message_type in include_return_message_types:
+                        yield tool_return_letta
            else:
-                filter_user_messages = [m for m in persisted_messages[new_message_idx:] if m.role != "user"]
+                filter_user_messages = [m for m in aggregated_persisted[new_message_idx:] if m.role != "user"]
                letta_messages = Message.to_letta_messages_from_list(
                    filter_user_messages,
                    use_assistant_message=False,  # NOTE: set to false
@@ -527,12 +572,10 @@ class LettaAgentV3(LettaAgentV2):
    @trace_method
    async def _handle_ai_response(
        self,
-        tool_call: Optional[ToolCall],  # NOTE: should only be None for react agents
        valid_tool_names: list[str],
        agent_state: AgentState,
        tool_rules_solver: ToolRulesSolver,
        usage: UsageStatistics,
-        # reasoning_content: list[TextContent | ReasoningContent | RedactedReasoningContent | OmittedReasoningContent] | None = None,
        content: list[TextContent | ReasoningContent | RedactedReasoningContent | OmittedReasoningContent] | None = None,
        pre_computed_assistant_message_id: str | None = None,
        step_id: str | None = None,
@@ -544,57 +587,76 @@ class LettaAgentV3(LettaAgentV2):
        is_approval: bool | None = None,
        is_denial: bool | None = None,
        denial_reason: str | None = None,
+        tool_call: ToolCall | None = None,
+        tool_calls: Optional[list[ToolCall]] = None,
        tool_return: ToolReturn | None = None,
    ) -> tuple[list[Message], bool, LettaStopReason | None]:
        """
-        Handle the final AI response once streaming completes, execute / validate the
-        tool call, decide whether we should keep stepping, and persist state.
-        """
-        if tool_call is None:
-            # NOTE: in v3 loop, no tool call is OK
-            tool_call_id = None
-        else:
-            tool_call_id: str = tool_call.id or f"call_{uuid.uuid4().hex[:8]}"
+        Handle the final AI response once streaming completes, execute / validate tool calls,
+        decide whether we should keep stepping, and persist state.

-        if is_denial or tool_return is not None:
+        Unified approach: treats single and multi-tool calls uniformly to reduce code duplication.
+        """
+        tool_calls = list(tool_calls) if tool_calls else []
+        if tool_call is not None and not tool_calls:
+            tool_calls = [tool_call]
+        first_tool_call = tool_calls[0] if tool_calls else tool_call
+
+        if tool_return is not None:
            continue_stepping = True
            stop_reason = None
-            if tool_return is not None:
-                tool_call_messages = [
-                    Message(
-                        role=MessageRole.tool,
-                        content=[TextContent(text=tool_return.func_response)],
-                        agent_id=agent_state.id,
-                        model=agent_state.llm_config.model,
-                        tool_calls=[],
-                        tool_call_id=tool_return.tool_call_id,
-                        created_at=get_utc_time(),
-                        tool_returns=[tool_return],
-                        run_id=run_id,
-                        step_id=step_id,
-                    )
-                ]
-            else:
-                tool_call_messages = create_letta_messages_from_llm_response(
+            tool_call_messages = [
+                Message(
+                    role=MessageRole.tool,
+                    content=[TextContent(text=tool_return.func_response)],
                    agent_id=agent_state.id,
                    model=agent_state.llm_config.model,
-                    function_name=tool_call.function.name,
-                    function_arguments={},
-                    tool_execution_result=ToolExecutionResult(status="error"),
-                    tool_call_id=tool_call_id,
-                    function_response=f"Error: request to call tool denied. User reason: {denial_reason}",
-                    timezone=agent_state.timezone,
-                    continue_stepping=continue_stepping,
-                    # NOTE: we may need to change this to not have a "heartbeat" prefix for v3?
-                    heartbeat_reason=f"{NON_USER_MSG_PREFIX}Continuing: user denied request to call tool.",
-                    reasoning_content=None,
-                    pre_computed_assistant_message_id=None,
-                    step_id=step_id,
+                    tool_calls=[],
+                    tool_call_id=tool_return.tool_call_id,
+                    created_at=get_utc_time(),
+                    tool_returns=[tool_return],
                    run_id=run_id,
-                    is_approval_response=True,
-                    force_set_request_heartbeat=False,
-                    add_heartbeat_on_continue=False,
+                    step_id=step_id,
                )
+            ]
+            messages_to_persist = (initial_messages or []) + tool_call_messages
+            for message in messages_to_persist:
+                if message.run_id is None:
+                    message.run_id = run_id
+
+            persisted_messages = await self.message_manager.create_many_messages_async(
+                messages_to_persist,
+                actor=self.actor,
+                run_id=run_id,
+                project_id=agent_state.project_id,
+                template_id=agent_state.template_id,
+            )
+            return persisted_messages, continue_stepping, stop_reason
+
+        # Handle denial case first (special case that bypasses normal flow)
+        if is_denial and first_tool_call is not None:
+            tool_call_id = first_tool_call.id or f"call_{uuid.uuid4().hex[:8]}"
+            continue_stepping = True
+            stop_reason = None
+            tool_call_messages = create_letta_messages_from_llm_response(
+                agent_id=agent_state.id,
+                model=agent_state.llm_config.model,
+                function_name=first_tool_call.function.name,
+                function_arguments={},
+                tool_execution_result=ToolExecutionResult(status="error"),
+                tool_call_id=tool_call_id,
+                function_response=f"Error: request to call tool denied. User reason: {denial_reason}",
+                timezone=agent_state.timezone,
+                continue_stepping=continue_stepping,
+                heartbeat_reason=f"{NON_USER_MSG_PREFIX}Continuing: user denied request to call tool.",
+                reasoning_content=None,
+                pre_computed_assistant_message_id=None,
+                step_id=step_id,
+                run_id=run_id,
+                is_approval_response=True,
+                force_set_request_heartbeat=False,
+                add_heartbeat_on_continue=False,
+            )
            messages_to_persist = (initial_messages or []) + tool_call_messages

            # Set run_id on all messages before persisting
@@ -611,97 +673,94 @@ class LettaAgentV3(LettaAgentV2):
            )
            return persisted_messages, continue_stepping, stop_reason

-        # -1. no tool call, no content
-        if tool_call is None and (content is None or len(content) == 0):
-            # Edge case is when there's also no content - basically, the LLM "no-op'd"
-            # If RequiredBeforeExitToolRule exists and not all required tools have been called,
-            # inject a rule-violation heartbeat to keep looping and inform the model.
-            uncalled = tool_rules_solver.get_uncalled_required_tools(available_tools=set([t.name for t in agent_state.tools]))
-            if uncalled:
-                # TODO: we may need to change this to not have a "heartbeat" prefix for v3?
-                heartbeat_reason = (
-                    f"{NON_USER_MSG_PREFIX}ToolRuleViolated: You must call {', '.join(uncalled)} at least once to exit the loop."
-                )
-                from letta.server.rest_api.utils import create_heartbeat_system_message
+        # 3. Handle no-tool cases (content-only or no-op)
+        if not tool_calls:
+            # Case 3a: No tool call, no content (LLM no-op)
+            if content is None or len(content) == 0:
+                # Check if there are required-before-exit tools that haven't been called
+                uncalled = tool_rules_solver.get_uncalled_required_tools(available_tools=set([t.name for t in agent_state.tools]))
+                if uncalled:
+                    heartbeat_reason = (
+                        f"{NON_USER_MSG_PREFIX}ToolRuleViolated: You must call {', '.join(uncalled)} at least once to exit the loop."
+                    )
+                    from letta.server.rest_api.utils import create_heartbeat_system_message

-                heartbeat_msg = create_heartbeat_system_message(
+                    heartbeat_msg = create_heartbeat_system_message(
+                        agent_id=agent_state.id,
+                        model=agent_state.llm_config.model,
+                        function_call_success=True,
+                        timezone=agent_state.timezone,
+                        heartbeat_reason=heartbeat_reason,
+                        run_id=run_id,
+                    )
+                    messages_to_persist = (initial_messages or []) + [heartbeat_msg]
+                    continue_stepping, stop_reason = True, None
+                else:
+                    # No required tools remaining, end turn without persisting no-op
+                    continue_stepping = False
+                    stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
+                    messages_to_persist = initial_messages or []
+
+            # Case 3b: No tool call but has content
+            else:
+                continue_stepping, heartbeat_reason, stop_reason = self._decide_continuation(
+                    agent_state=agent_state,
+                    tool_call_name=None,
+                    tool_rule_violated=False,
+                    tool_rules_solver=tool_rules_solver,
+                    is_final_step=is_final_step,
+                )
+                assistant_message = create_letta_messages_from_llm_response(
                    agent_id=agent_state.id,
                    model=agent_state.llm_config.model,
-                    function_call_success=True,
+                    function_name=None,
+                    function_arguments=None,
+                    tool_execution_result=None,
+                    tool_call_id=None,
+                    function_response=None,
                    timezone=agent_state.timezone,
+                    continue_stepping=continue_stepping,
                    heartbeat_reason=heartbeat_reason,
+                    reasoning_content=content,
+                    pre_computed_assistant_message_id=pre_computed_assistant_message_id,
+                    step_id=step_id,
                    run_id=run_id,
+                    is_approval_response=is_approval or is_denial,
+                    force_set_request_heartbeat=False,
+                    add_heartbeat_on_continue=bool(heartbeat_reason),
                )
-                messages_to_persist = (initial_messages or []) + [heartbeat_msg]
-                continue_stepping, stop_reason = True, None
-            else:
-                # In this case, we actually do not want to persist the no-op message
-                continue_stepping, heartbeat_reason, stop_reason = False, None, LettaStopReason(stop_reason=StopReasonType.end_turn.value)
-                messages_to_persist = initial_messages or []
+                messages_to_persist = (initial_messages or []) + assistant_message

-        # 0. If there's no tool call, we can early exit
-        elif tool_call is None:
-            # TODO could just hardcode the line here instead of calling the function...
-            continue_stepping, heartbeat_reason, stop_reason = self._decide_continuation(
-                agent_state=agent_state,
-                tool_call_name=None,
-                tool_rule_violated=False,
-                tool_rules_solver=tool_rules_solver,
-                is_final_step=is_final_step,
+            # Persist messages for no-tool cases
+            for message in messages_to_persist:
+                if message.run_id is None:
+                    message.run_id = run_id
+
+            persisted_messages = await self.message_manager.create_many_messages_async(
+                messages_to_persist, actor=self.actor, run_id=run_id, project_id=agent_state.project_id, template_id=agent_state.template_id
            )
-            assistant_message = create_letta_messages_from_llm_response(
-                agent_id=agent_state.id,
-                model=agent_state.llm_config.model,
-                function_name=None,
-                function_arguments=None,
-                tool_execution_result=None,
-                tool_call_id=None,
-                function_response=None,
-                timezone=agent_state.timezone,
-                continue_stepping=continue_stepping,
-                heartbeat_reason=heartbeat_reason,
-                # NOTE: should probably rename this to `content`?
-                reasoning_content=content,
-                pre_computed_assistant_message_id=pre_computed_assistant_message_id,
-                step_id=step_id,
-                run_id=run_id,
-                is_approval_response=is_approval or is_denial,
-                force_set_request_heartbeat=False,
-                # If we're continuing due to a required-before-exit rule, include a heartbeat to guide the model
-                add_heartbeat_on_continue=bool(heartbeat_reason),
-            )
-            messages_to_persist = (initial_messages or []) + assistant_message
+            return persisted_messages, continue_stepping, stop_reason

-        else:
-            # 1.  Parse and validate the tool-call envelope
-            tool_call_name: str = tool_call.function.name
+        # 4. Unified tool execution path (works for both single and multiple tools)

-            tool_args = _safe_load_tool_call_str(tool_call.function.arguments)
-            # NOTE: these are failsafes - for v3, we should eventually be able to remove these
-            # request_heartbeat: bool = _pop_heartbeat(tool_args)
+        # 4a. Check for single tool approval case (special handling required)
+        if len(tool_calls) == 1 and not is_approval:
+            single_tool = tool_calls[0]
+            tool_name = single_tool.function.name
+            tool_args = _safe_load_tool_call_str(single_tool.function.arguments)
            tool_args.pop(REQUEST_HEARTBEAT_PARAM, None)
            tool_args.pop(INNER_THOUGHTS_KWARG, None)

-            log_telemetry(
-                self.logger,
-                "_handle_ai_response execute tool start",
-                tool_name=tool_call_name,
-                tool_args=tool_args,
-                tool_call_id=tool_call_id,
-                # request_heartbeat=request_heartbeat,
-            )
-
-            if not is_approval and tool_rules_solver.is_requires_approval_tool(tool_call_name):
+            if tool_rules_solver.is_requires_approval_tool(tool_name):
+                tool_call_id = single_tool.id or f"call_{uuid.uuid4().hex[:8]}"
                approval_message = create_approval_request_message_from_llm_response(
                    agent_id=agent_state.id,
                    model=agent_state.llm_config.model,
-                    function_name=tool_call_name,
+                    function_name=tool_name,
                    function_arguments=tool_args,
                    tool_call_id=tool_call_id,
                    actor=self.actor,
-                    # continue_stepping=request_heartbeat,
                    continue_stepping=True,
-                    # reasoning_content=reasoning_content,
                    reasoning_content=content,
                    pre_computed_assistant_message_id=pre_computed_assistant_message_id,
                    step_id=step_id,
@@ -709,155 +768,220 @@ class LettaAgentV3(LettaAgentV2):
                    append_request_heartbeat=False,
                )
                messages_to_persist = (initial_messages or []) + [approval_message]
-                continue_stepping = False
-                stop_reason = LettaStopReason(stop_reason=StopReasonType.requires_approval.value)
-            else:
-                # 2.  Execute the tool (or synthesize an error result if disallowed)
-                tool_rule_violated = tool_call_name not in valid_tool_names and not is_approval
-                if tool_rule_violated:
-                    tool_execution_result = _build_rule_violation_result(tool_call_name, valid_tool_names, tool_rules_solver)
-                else:
-                    # Prefill + validate args if a rule provided them
-                    prefill_args = self.tool_rules_solver.last_prefilled_args_by_tool.get(tool_call_name)
-                    if prefill_args:
-                        # Find tool object for schema validation
-                        target_tool = next((t for t in agent_state.tools if t.name == tool_call_name), None)
-                        provenance = self.tool_rules_solver.last_prefilled_args_provenance.get(tool_call_name)
-                        try:
-                            tool_args = merge_and_validate_prefilled_args(
-                                tool=target_tool,
-                                llm_args=tool_args,
-                                prefilled_args=prefill_args,
-                            )
-                        except ValueError as ve:
-                            # Treat invalid prefilled args as user error and end the step
-                            error_prefix = "Invalid prefilled tool arguments from tool rules"
-                            prov_suffix = f" (source={provenance})" if provenance else ""
-                            err_msg = f"{error_prefix}{prov_suffix}: {str(ve)}"
-                            tool_execution_result = ToolExecutionResult(status="error", func_return=err_msg)

-                            # Create messages and early return persistence path below
-                            continue_stepping, heartbeat_reason, stop_reason = (
-                                False,
-                                None,
-                                LettaStopReason(stop_reason=StopReasonType.invalid_tool_call.value),
-                            )
-                            tool_call_messages = create_letta_messages_from_llm_response(
-                                agent_id=agent_state.id,
-                                model=agent_state.llm_config.model,
-                                function_name=tool_call_name,
-                                function_arguments=tool_args,
-                                tool_execution_result=tool_execution_result,
-                                tool_call_id=tool_call_id,
-                                function_response=tool_execution_result.func_return,
-                                timezone=agent_state.timezone,
-                                continue_stepping=continue_stepping,
-                                heartbeat_reason=None,
-                                reasoning_content=content,
-                                pre_computed_assistant_message_id=pre_computed_assistant_message_id,
-                                step_id=step_id,
-                                run_id=run_id,
-                                is_approval_response=is_approval or is_denial,
-                                force_set_request_heartbeat=False,
-                                add_heartbeat_on_continue=False,
-                            )
-                            messages_to_persist = (initial_messages or []) + tool_call_messages
+                for message in messages_to_persist:
+                    if message.run_id is None:
+                        message.run_id = run_id

-                            # Set run_id on all messages before persisting
-                            for message in messages_to_persist:
-                                if message.run_id is None:
-                                    message.run_id = run_id
-
-                            persisted_messages = await self.message_manager.create_many_messages_async(
-                                messages_to_persist,
-                                actor=self.actor,
-                                run_id=run_id,
-                                project_id=agent_state.project_id,
-                                template_id=agent_state.template_id,
-                            )
-                            return persisted_messages, continue_stepping, stop_reason
-
-                    # Track tool execution time
-                    tool_start_time = get_utc_timestamp_ns()
-                    tool_execution_result = await self._execute_tool(
-                        tool_name=tool_call_name,
-                        tool_args=tool_args,
-                        agent_state=agent_state,
-                        agent_step_span=agent_step_span,
-                        step_id=step_id,
-                    )
-                    tool_end_time = get_utc_timestamp_ns()
-
-                    # Store tool execution time in metrics
-                    step_metrics.tool_execution_ns = tool_end_time - tool_start_time
-
-                log_telemetry(
-                    self.logger,
-                    "_handle_ai_response execute tool finish",
-                    tool_execution_result=tool_execution_result,
-                    tool_call_id=tool_call_id,
-                )
-
-                # 3.  Prepare the function-response payload
-                truncate = tool_call_name not in {"conversation_search", "conversation_search_date", "archival_memory_search"}
-                return_char_limit = next(
-                    (t.return_char_limit for t in agent_state.tools if t.name == tool_call_name),
-                    None,
-                )
-                function_response_string = validate_function_response(
-                    tool_execution_result.func_return,
-                    return_char_limit=return_char_limit,
-                    truncate=truncate,
-                )
-                self.last_function_response = package_function_response(
-                    was_success=tool_execution_result.success_flag,
-                    response_string=function_response_string,
-                    timezone=agent_state.timezone,
-                )
-
-                # 4.  Decide whether to keep stepping  (focal section simplified)
-                continue_stepping, heartbeat_reason, stop_reason = self._decide_continuation(
-                    agent_state=agent_state,
-                    tool_call_name=tool_call_name,
-                    tool_rule_violated=tool_rule_violated,
-                    tool_rules_solver=tool_rules_solver,
-                    is_final_step=is_final_step,
-                )
-
-                # 5.  Create messages (step was already created at the beginning)
-                tool_call_messages = create_letta_messages_from_llm_response(
-                    agent_id=agent_state.id,
-                    model=agent_state.llm_config.model,
-                    function_name=tool_call_name,
-                    function_arguments=tool_args,
-                    tool_execution_result=tool_execution_result,
-                    tool_call_id=tool_call_id,
-                    function_response=function_response_string,
-                    timezone=agent_state.timezone,
-                    continue_stepping=continue_stepping,
-                    # heartbeat_reason=heartbeat_reason,
-                    heartbeat_reason=None,
-                    # reasoning_content=reasoning_content,
-                    reasoning_content=content,
-                    pre_computed_assistant_message_id=pre_computed_assistant_message_id,
-                    step_id=step_id,
+                persisted_messages = await self.message_manager.create_many_messages_async(
+                    messages_to_persist,
+                    actor=self.actor,
                    run_id=run_id,
-                    is_approval_response=is_approval or is_denial,
-                    force_set_request_heartbeat=False,
-                    add_heartbeat_on_continue=False,
+                    project_id=agent_state.project_id,
+                    template_id=agent_state.template_id,
                )
-                messages_to_persist = (initial_messages or []) + tool_call_messages
+                return persisted_messages, False, LettaStopReason(stop_reason=StopReasonType.requires_approval.value)
+
+        # 4b. Validate parallel tool calling constraints
+        if len(tool_calls) > 1:
+            # No parallel tool calls with tool rules
+            if agent_state.tool_rules and len(agent_state.tool_rules) > 0:
+                raise ValueError(
+                    "Parallel tool calling is not allowed when tool rules are present. Disable tool rules to use parallel tool calls."
+                )
+            # No parallel tool calls with approval-required tools
+            if any(tool_rules_solver.is_requires_approval_tool(tc.function.name) for tc in tool_calls):
+                raise ValueError("Parallel tool calling is not allowed when any tool requires approval.")
+
+        # 4c. Prepare execution specs for all tools
+        exec_specs = []
+        for tc in tool_calls:
+            call_id = tc.id or f"call_{uuid.uuid4().hex[:8]}"
+            name = tc.function.name
+            args = _safe_load_tool_call_str(tc.function.arguments)
+            args.pop(REQUEST_HEARTBEAT_PARAM, None)
+            args.pop(INNER_THOUGHTS_KWARG, None)
+
+            # Validate against allowed tools
+            tool_rule_violated = name not in valid_tool_names and not is_approval
+
+            # Handle prefilled args if present
+            if not tool_rule_violated:
+                prefill_args = tool_rules_solver.last_prefilled_args_by_tool.get(name)
+                if prefill_args:
+                    target_tool = next((t for t in agent_state.tools if t.name == name), None)
+                    provenance = tool_rules_solver.last_prefilled_args_provenance.get(name)
+                    try:
+                        args = merge_and_validate_prefilled_args(
+                            tool=target_tool,
+                            llm_args=args,
+                            prefilled_args=prefill_args,
+                        )
+                    except ValueError as ve:
+                        # Invalid prefilled args - create error result
+                        error_prefix = "Invalid prefilled tool arguments from tool rules"
+                        prov_suffix = f" (source={provenance})" if provenance else ""
+                        err_msg = f"{error_prefix}{prov_suffix}: {str(ve)}"
+
+                        exec_specs.append(
+                            {
+                                "id": call_id,
+                                "name": name,
+                                "args": args,
+                                "violated": False,
+                                "error": err_msg,
+                            }
+                        )
+                        continue
+
+            exec_specs.append(
+                {
+                    "id": call_id,
+                    "name": name,
+                    "args": args,
+                    "violated": tool_rule_violated,
+                    "error": None,
+                }
+            )
+
+        # 4d. Execute tools (sequentially for single, parallel for multiple)
+        if len(exec_specs) == 1:
+            # Single tool - execute directly without asyncio.gather overhead
+            spec = exec_specs[0]
+            if spec.get("error"):
+                # Prefill arg validation error
+                result = ToolExecutionResult(status="error", func_return=spec["error"])
+                exec_time = 0
+            elif spec["violated"]:
+                result = _build_rule_violation_result(spec["name"], valid_tool_names, tool_rules_solver)
+                exec_time = 0
+            else:
+                t0 = get_utc_timestamp_ns()
+                result = await self._execute_tool(
+                    tool_name=spec["name"],
+                    tool_args=spec["args"],
+                    agent_state=agent_state,
+                    agent_step_span=agent_step_span,
+                    step_id=step_id,
+                )
+                exec_time = get_utc_timestamp_ns() - t0
+            results = [(result, exec_time)]
+        else:
+            # Multiple tools - execute in parallel
+            async def _run_one(spec):
+                if spec.get("error"):
+                    return ToolExecutionResult(status="error", func_return=spec["error"]), 0
+                if spec["violated"]:
+                    result = _build_rule_violation_result(spec["name"], valid_tool_names, tool_rules_solver)
+                    return result, 0
+                t0 = get_utc_timestamp_ns()
+                res = await self._execute_tool(
+                    tool_name=spec["name"],
+                    tool_args=spec["args"],
+                    agent_state=agent_state,
+                    agent_step_span=agent_step_span,
+                    step_id=step_id,
+                )
+                dt = get_utc_timestamp_ns() - t0
+                return res, dt
+
+            results = await asyncio.gather(*[_run_one(s) for s in exec_specs])
+
+        # Update metrics with execution time
+        if step_metrics is not None and results:
+            step_metrics.tool_execution_ns = max(dt for _, dt in results)
+
+        # 4e. Process results and compute function responses
+        function_responses: list[Optional[str]] = []
+        persisted_continue_flags: list[bool] = []
+        persisted_stop_reasons: list[LettaStopReason | None] = []
+
+        for idx, spec in enumerate(exec_specs):
+            tool_execution_result, _ = results[idx]
+            has_prefill_error = bool(spec.get("error"))
+
+            # Validate and format function response
+            truncate = spec["name"] not in {"conversation_search", "conversation_search_date", "archival_memory_search"}
+            return_char_limit = next((t.return_char_limit for t in agent_state.tools if t.name == spec["name"]), None)
+            function_response_string = validate_function_response(
+                tool_execution_result.func_return,
+                return_char_limit=return_char_limit,
+                truncate=truncate,
+            )
+            function_responses.append(function_response_string)
+
+            # Update last function response (for tool rules)
+            self.last_function_response = package_function_response(
+                was_success=tool_execution_result.success_flag,
+                response_string=function_response_string,
+                timezone=agent_state.timezone,
+            )
+
+            # Register successful tool call with solver
+            if not spec["violated"] and not has_prefill_error:
+                tool_rules_solver.register_tool_call(spec["name"])
+
+            # Decide continuation for this tool
+            if has_prefill_error:
+                cont = False
+                hb_reason = None
+                sr = LettaStopReason(stop_reason=StopReasonType.invalid_tool_call.value)
+            else:
+                cont, hb_reason, sr = self._decide_continuation(
+                    agent_state=agent_state,
+                    tool_call_name=spec["name"],
+                    tool_rule_violated=spec["violated"],
+                    tool_rules_solver=tool_rules_solver,
+                    is_final_step=(is_final_step and idx == len(exec_specs) - 1),
+                )
+            persisted_continue_flags.append(cont)
+            persisted_stop_reasons.append(sr)
+
+        # 4f. Create messages using parallel message creation (works for both single and multi)
+        tool_call_specs = [{"name": s["name"], "arguments": s["args"], "id": s["id"]} for s in exec_specs]
+        tool_execution_results = [res for (res, _) in results]
+
+        # Use the parallel message creation function for both single and multiple tools
+        parallel_messages = create_parallel_tool_messages_from_llm_response(
+            agent_id=agent_state.id,
+            model=agent_state.llm_config.model,
+            tool_call_specs=tool_call_specs,
+            tool_execution_results=tool_execution_results,
+            function_responses=function_responses,
+            timezone=agent_state.timezone,
+            run_id=run_id,
+            step_id=step_id,
+            reasoning_content=content,
+            pre_computed_assistant_message_id=pre_computed_assistant_message_id,
+            is_approval_response=(is_approval or is_denial),
+        )
+
+        messages_to_persist: list[Message] = (initial_messages or []) + parallel_messages

        # Set run_id on all messages before persisting
        for message in messages_to_persist:
            if message.run_id is None:
                message.run_id = run_id

+        # Persist all messages
        persisted_messages = await self.message_manager.create_many_messages_async(
-            messages_to_persist, actor=self.actor, run_id=run_id, project_id=agent_state.project_id, template_id=agent_state.template_id
+            messages_to_persist,
+            actor=self.actor,
+            run_id=run_id,
+            project_id=agent_state.project_id,
+            template_id=agent_state.template_id,
        )

-        return persisted_messages, continue_stepping, stop_reason
+        # 4g. Aggregate continuation decisions
+        # For multiple tools: continue if ANY says continue, use last non-None stop_reason
+        # For single tool: use its decision directly
+        aggregate_continue = any(persisted_continue_flags) if persisted_continue_flags else False
+        aggregate_stop_reason = None
+        for sr in persisted_stop_reasons:
+            if sr is not None:
+                aggregate_stop_reason = sr
+
+        return persisted_messages, aggregate_continue, aggregate_stop_reason

    @trace_method
    def _decide_continuation(
--- a/letta/schemas/message.py
+++ b/letta/schemas/message.py
@@ -1514,6 +1514,7 @@ class Message(BaseMessage):
                        }
                    ],
                }
+
        else:
            raise ValueError(self.role)

--- a/letta/server/rest_api/utils.py
+++ b/letta/server/rest_api/utils.py
@@ -3,7 +3,7 @@ import json
 import os
 import uuid
 from enum import Enum
-from typing import AsyncGenerator, Dict, Iterable, List, Optional, Union, cast
+from typing import Any, AsyncGenerator, Dict, Iterable, List, Optional, Union, cast

 from fastapi import Header, HTTPException
 from openai.types.chat import ChatCompletionMessageParam
@@ -377,6 +377,117 @@ def create_letta_messages_from_llm_response(
    return messages


+def create_parallel_tool_messages_from_llm_response(
+    agent_id: str,
+    model: str,
+    tool_call_specs: List[Dict[str, Any]],  # List of tool call specs: {"name": str, "arguments": Dict, "id": Optional[str]}
+    tool_execution_results: List[ToolExecutionResult],
+    function_responses: List[Optional[str]],
+    timezone: str,
+    run_id: Optional[str] = None,
+    step_id: Optional[str] = None,
+    reasoning_content: Optional[
+        List[Union[TextContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent | SummarizedReasoningContent]]
+    ] = None,
+    pre_computed_assistant_message_id: Optional[str] = None,
+    llm_batch_item_id: Optional[str] = None,
+    is_approval_response: bool = False,
+) -> List[Message]:
+    """
+    Build two messages representing a parallel tool-call step:
+    - One assistant message with ALL tool_calls populated (tool_call_id left empty)
+    - One tool message with ALL tool_returns populated (tool_call_id left empty)
+
+    Notes:
+    - Consumers should read tool_calls/tool_returns arrays for per-call details.
+    - The tool message's content includes only the first call's packaged response for
+      backward-compatibility with legacy renderers. UIs should prefer tool_returns.
+    - When invoked for an approval response, the assistant message is omitted (the approval
+      tool call was previously surfaced).
+    """
+
+    # Construct OpenAI-style tool_calls for the assistant message
+    openai_tool_calls: List[OpenAIToolCall] = []
+    for spec in tool_call_specs:
+        name = spec.get("name")
+        args = spec.get("arguments", {})
+        call_id = spec.get("id") or str(uuid.uuid4())
+        # Ensure the spec carries the resolved id so returns/content can reference it
+        if not spec.get("id"):
+            spec["id"] = call_id
+        openai_tool_calls.append(
+            OpenAIToolCall(
+                id=call_id,
+                function=OpenAIFunction(name=name, arguments=json.dumps(args)),
+                type="function",
+            )
+        )
+
+    messages: List[Message] = []
+
+    if not is_approval_response:
+        # Assistant message with all tool_calls (no single tool_call_id)
+        # Safeguard against empty text messages
+        content: List[
+            Union[TextContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent, SummarizedReasoningContent]
+        ] = []
+        if reasoning_content:
+            for content_part in reasoning_content:
+                if isinstance(content_part, TextContent) and content_part.text == "":
+                    continue
+                content.append(content_part)
+
+        assistant_message = Message(
+            role=MessageRole.assistant,
+            content=content,
+            agent_id=agent_id,
+            model=model,
+            tool_calls=openai_tool_calls,
+            tool_call_id=None,
+            created_at=get_utc_time(),
+            batch_item_id=llm_batch_item_id,
+            run_id=run_id,
+        )
+        if step_id:
+            assistant_message.step_id = step_id
+        if pre_computed_assistant_message_id:
+            assistant_message.id = pre_computed_assistant_message_id
+        messages.append(assistant_message)
+
+    content: List[TextContent] = []
+    tool_returns: List[ToolReturn] = []
+    for spec, exec_result, response in zip(tool_call_specs, tool_execution_results, function_responses):
+        packaged = package_function_response(exec_result.success_flag, response, timezone)
+        content.append(TextContent(text=packaged))
+        tool_returns.append(
+            ToolReturn(
+                tool_call_id=spec.get("id"),
+                status=exec_result.status,
+                stdout=exec_result.stdout,
+                stderr=exec_result.stderr,
+                func_response=packaged,
+            )
+        )
+
+    tool_message = Message(
+        role=MessageRole.tool,
+        content=content,
+        agent_id=agent_id,
+        model=model,
+        tool_calls=[],
+        tool_call_id=tool_returns[0].tool_call_id,  # For legacy reasons, set to first one
+        created_at=get_utc_time(),
+        batch_item_id=llm_batch_item_id,
+        tool_returns=tool_returns,
+        run_id=run_id,
+    )
+    if step_id:
+        tool_message.step_id = step_id
+
+    messages.append(tool_message)
+    return messages
+
+
 def create_heartbeat_system_message(
    agent_id: str,
    model: str,
--- a/tests/test_agent_files/test_basic_agent_with_blocks_tools_messages_v2.af
+++ b/tests/test_agent_files/test_basic_agent_with_blocks_tools_messages_v2.af
@@ -1,7 +1,7 @@
 {
  "agents": [
    {
-      "name": "test_export_import_57426498-f708-4228-a331-5efc0087b895",
+      "name": "test_export_import_42fbdcec-58ce-4e27-b6e5-938b3bcd57aa",
      "memory_blocks": [],
      "tools": [],
      "tool_ids": [
@@ -20,25 +20,25 @@
        "block-2"
      ],
      "tool_rules": [
-        {
-          "tool_name": "send_message",
-          "type": "exit_loop",
-          "prompt_template": null
-        },
        {
          "tool_name": "memory_insert",
          "type": "continue_loop",
          "prompt_template": null
        },
        {
-          "tool_name": "memory_replace",
-          "type": "continue_loop",
+          "tool_name": "send_message",
+          "type": "exit_loop",
          "prompt_template": null
        },
        {
          "tool_name": "conversation_search",
          "type": "continue_loop",
          "prompt_template": null
+        },
+        {
+          "tool_name": "memory_replace",
+          "type": "continue_loop",
+          "prompt_template": null
        }
      ],
      "tags": [
@@ -129,7 +129,7 @@
          "content": [
            {
              "type": "text",
-              "text": "You are a helpful assistant specializing in data analysis and mathematical computations.\n\n<memory_blocks>\nThe following memory blocks are currently engaged in your core memory unit:\n\n<project_context>\n<description>\n\n</description>\n<metadata>\n- chars_current=210\n- chars_limit=6000\n</metadata>\n<value>\n# NOTE: Line numbers shown below are to help during editing. Do NOT include line number prefixes in your memory edit tool calls.\nLine 1: Current project: Building predictive models for financial markets. Sarah is working on sequence analysis and pattern recognition. Recently interested in mathematical sequences like Fibonacci for trend analysis.\n</value>\n</project_context>\n\n<human>\n<description>\nThe human block: Stores key details about the person you are conversing with, allowing for more personalized and friend-like conversation.\n</description>\n<metadata>\n- chars_current=175\n- chars_limit=4000\n</metadata>\n<value>\n# NOTE: Line numbers shown below are to help during editing. Do NOT include line number prefixes in your memory edit tool calls.\nLine 1: username: sarah_researcher\nLine 2: occupation: data scientist\nLine 3: interests: machine learning, statistics, fibonacci sequences\nLine 4: preferred_communication: detailed explanations with examples\n</value>\n</human>\n\n<persona>\n<description>\nThe persona block: Stores details about your current persona, guiding how you behave and respond. This helps you to maintain consistency and personality in your interactions.\n</description>\n<metadata>\n- chars_current=195\n- chars_limit=8000\n</metadata>\n<value>\n# NOTE: Line numbers shown below are to help during editing. Do NOT include line number prefixes in your memory edit tool calls.\nLine 1: You are Alex, a data analyst and mathematician who helps users with calculations and insights. You have extensive experience in statistical analysis and prefer to provide clear, accurate results.\n</value>\n</persona>\n\n</memory_blocks>\n\n<tool_usage_rules>\nThe following constraints define rules for tool usage and guide desired behavior. These rules must be followed to ensure proper tool execution and workflow. A single response may contain multiple tool calls.\n\n<tool_rule>\nmemory_insert requires continuing your response when called\n</tool_rule>\n<tool_rule>\nmemory_replace requires continuing your response when called\n</tool_rule>\n<tool_rule>\nconversation_search requires continuing your response when called\n</tool_rule>\n<tool_rule>\nsend_message ends your response (yields control) when called\n</tool_rule>\n</tool_usage_rules>\n\n<memory_metadata>\n- The current system date is: October 10, 2025\n- Memory blocks were last modified: 2025-10-10 12:21:30 AM UTC+0000\n- -1 previous messages between you and the user are stored in recall memory (use tools to access them)\n- 2 total memories you created are stored in archival memory (use tools to access them)\n</memory_metadata>",
+              "text": "You are a helpful assistant specializing in data analysis and mathematical computations.\n\n<memory_blocks>\nThe following memory blocks are currently engaged in your core memory unit:\n\n<project_context>\n<description>\n\n</description>\n<metadata>\n- chars_current=210\n- chars_limit=6000\n</metadata>\n<value>\n# NOTE: Line numbers shown below are to help during editing. Do NOT include line number prefixes in your memory edit tool calls.\nLine 1: Current project: Building predictive models for financial markets. Sarah is working on sequence analysis and pattern recognition. Recently interested in mathematical sequences like Fibonacci for trend analysis.\n</value>\n</project_context>\n\n<human>\n<description>\nThe human block: Stores key details about the person you are conversing with, allowing for more personalized and friend-like conversation.\n</description>\n<metadata>\n- chars_current=175\n- chars_limit=4000\n</metadata>\n<value>\n# NOTE: Line numbers shown below are to help during editing. Do NOT include line number prefixes in your memory edit tool calls.\nLine 1: username: sarah_researcher\nLine 2: occupation: data scientist\nLine 3: interests: machine learning, statistics, fibonacci sequences\nLine 4: preferred_communication: detailed explanations with examples\n</value>\n</human>\n\n<persona>\n<description>\nThe persona block: Stores details about your current persona, guiding how you behave and respond. This helps you to maintain consistency and personality in your interactions.\n</description>\n<metadata>\n- chars_current=195\n- chars_limit=8000\n</metadata>\n<value>\n# NOTE: Line numbers shown below are to help during editing. Do NOT include line number prefixes in your memory edit tool calls.\nLine 1: You are Alex, a data analyst and mathematician who helps users with calculations and insights. You have extensive experience in statistical analysis and prefer to provide clear, accurate results.\n</value>\n</persona>\n\n</memory_blocks>\n\n<tool_usage_rules>\nThe following constraints define rules for tool usage and guide desired behavior. These rules must be followed to ensure proper tool execution and workflow. A single response may contain multiple tool calls.\n\n<tool_rule>\nmemory_insert requires continuing your response when called\n</tool_rule>\n<tool_rule>\nconversation_search requires continuing your response when called\n</tool_rule>\n<tool_rule>\nmemory_replace requires continuing your response when called\n</tool_rule>\n<tool_rule>\nsend_message ends your response (yields control) when called\n</tool_rule>\n</tool_usage_rules>\n\n<memory_metadata>\n- The current system date is: October 10, 2025\n- Memory blocks were last modified: 2025-10-10 07:14:31 AM UTC+0000\n- -1 previous messages between you and the user are stored in recall memory (use tools to access them)\n- 2 total memories you created are stored in archival memory (use tools to access them)\n</memory_metadata>",
              "signature": null
            }
          ],
@@ -144,7 +144,7 @@
          "tool_calls": null,
          "tool_call_id": null,
          "tool_returns": [],
-          "created_at": "2025-10-10T00:21:29.139951+00:00",
+          "created_at": "2025-10-10T07:14:29.912336+00:00",
          "approve": null,
          "approval_request_id": null,
          "denial_reason": null
@@ -169,7 +169,7 @@
          "agent_id": "agent-0",
          "tool_calls": [
            {
-              "id": "9b3e8290-a336-4e3b-851e-2472ccd6fe91",
+              "id": "f9a9765f-9285-4056-9916-d5bafb729bef",
              "function": {
                "arguments": "{\n  \"message\": \"More human than human is our motto.\"\n}",
                "name": "send_message"
@@ -179,7 +179,7 @@
          ],
          "tool_call_id": null,
          "tool_returns": [],
-          "created_at": "2025-10-10T00:21:29.139990+00:00",
+          "created_at": "2025-10-10T07:14:29.912379+00:00",
          "approve": null,
          "approval_request_id": null,
          "denial_reason": null
@@ -190,7 +190,7 @@
          "content": [
            {
              "type": "text",
-              "text": "{\n  \"status\": \"OK\",\n  \"message\": null,\n  \"time\": \"2025-10-10 12:21:29 AM UTC+0000\"\n}",
+              "text": "{\n  \"status\": \"OK\",\n  \"message\": null,\n  \"time\": \"2025-10-10 07:14:29 AM UTC+0000\"\n}",
              "signature": null
            }
          ],
@@ -203,9 +203,9 @@
          "model": "gpt-4.1-mini",
          "agent_id": "agent-0",
          "tool_calls": null,
-          "tool_call_id": "9b3e8290-a336-4e3b-851e-2472ccd6fe91",
+          "tool_call_id": "f9a9765f-9285-4056-9916-d5bafb729bef",
          "tool_returns": [],
-          "created_at": "2025-10-10T00:21:29.140013+00:00",
+          "created_at": "2025-10-10T07:14:29.912403+00:00",
          "approve": null,
          "approval_request_id": null,
          "denial_reason": null
@@ -216,7 +216,7 @@
          "content": [
            {
              "type": "text",
-              "text": "{\n  \"type\": \"login\",\n  \"last_login\": \"Never (first login)\",\n  \"time\": \"2025-10-10 12:21:29 AM UTC+0000\"\n}",
+              "text": "{\n  \"type\": \"login\",\n  \"last_login\": \"Never (first login)\",\n  \"time\": \"2025-10-10 07:14:29 AM UTC+0000\"\n}",
              "signature": null
            }
          ],
@@ -231,7 +231,7 @@
          "tool_calls": null,
          "tool_call_id": null,
          "tool_returns": [],
-          "created_at": "2025-10-10T00:21:29.140024+00:00",
+          "created_at": "2025-10-10T07:14:29.912415+00:00",
          "approve": null,
          "approval_request_id": null,
          "denial_reason": null
@@ -257,7 +257,7 @@
          "tool_calls": null,
          "tool_call_id": null,
          "tool_returns": [],
-          "created_at": "2025-10-10T00:21:30.360611+00:00",
+          "created_at": "2025-10-10T07:14:31.639749+00:00",
          "approve": null,
          "approval_request_id": null,
          "denial_reason": null
@@ -268,7 +268,7 @@
          "content": [
            {
              "type": "text",
-              "text": "Responding to test message for export.",
+              "text": "Responding to test message for export with confirmation.",
              "signature": null
            }
          ],
@@ -282,17 +282,17 @@
          "agent_id": "agent-0",
          "tool_calls": [
            {
-              "id": "call_mNAOnZ212k0eehF005T5s6Wn",
+              "id": "call_tB2kJPFgSD5Ldk8VaUYwkMCF",
              "function": {
-                "arguments": "{\"message\": \"Received your test message for export. Everything is working fine on my end. How can I assist you further today?\", \"request_heartbeat\": false}",
+                "arguments": "{\"message\": \"Test message received successfully. Ready for further instructions or queries.\", \"request_heartbeat\": false}",
                "name": "send_message"
              },
              "type": "function"
            }
          ],
-          "tool_call_id": "call_mNAOnZ212k0eehF005T5s6Wn",
+          "tool_call_id": "call_tB2kJPFgSD5Ldk8VaUYwkMCF",
          "tool_returns": [],
-          "created_at": "2025-10-10T00:21:31.900813+00:00",
+          "created_at": "2025-10-10T07:14:33.049314+00:00",
          "approve": null,
          "approval_request_id": null,
          "denial_reason": null
@@ -303,7 +303,7 @@
          "content": [
            {
              "type": "text",
-              "text": "{\n  \"status\": \"OK\",\n  \"message\": \"Sent message successfully.\",\n  \"time\": \"2025-10-10 12:21:31 AM UTC+0000\"\n}",
+              "text": "{\n  \"status\": \"OK\",\n  \"message\": \"Sent message successfully.\",\n  \"time\": \"2025-10-10 07:14:33 AM UTC+0000\"\n}",
              "signature": null
            }
          ],
@@ -316,17 +316,17 @@
          "model": "gpt-4.1-mini",
          "agent_id": "agent-0",
          "tool_calls": null,
-          "tool_call_id": "call_mNAOnZ212k0eehF005T5s6Wn",
+          "tool_call_id": "call_tB2kJPFgSD5Ldk8VaUYwkMCF",
          "tool_returns": [
            {
-              "tool_call_id": "call_mNAOnZ212k0eehF005T5s6Wn",
+              "tool_call_id": "call_tB2kJPFgSD5Ldk8VaUYwkMCF",
              "status": "success",
              "stdout": null,
              "stderr": null,
-              "func_response": "{\n  \"status\": \"OK\",\n  \"message\": \"Sent message successfully.\",\n  \"time\": \"2025-10-10 12:21:31 AM UTC+0000\"\n}"
+              "func_response": "{\n  \"status\": \"OK\",\n  \"message\": \"Sent message successfully.\",\n  \"time\": \"2025-10-10 07:14:33 AM UTC+0000\"\n}"
            }
          ],
-          "created_at": "2025-10-10T00:21:31.900994+00:00",
+          "created_at": "2025-10-10T07:14:33.049503+00:00",
          "approve": null,
          "approval_request_id": null,
          "denial_reason": null
@@ -477,7 +477,7 @@
      "metadata_": {}
    },
    {
-      "id": "tool-4",
+      "id": "tool-5",
      "tool_type": "letta_core",
      "description": "Search prior conversation history using hybrid search (text + semantic similarity).\n\nExamples:\n        # Search all messages\n        conversation_search(query=\"project updates\")\n\n        # Search only assistant messages\n        conversation_search(query=\"error handling\", roles=[\"assistant\"])\n\n        # Search with date range (inclusive of both dates)\n        conversation_search(query=\"meetings\", start_date=\"2024-01-15\", end_date=\"2024-01-20\")\n        # This includes all messages from Jan 15 00:00:00 through Jan 20 23:59:59\n\n        # Search messages from a specific day (inclusive)\n        conversation_search(query=\"bug reports\", start_date=\"2024-09-04\", end_date=\"2024-09-04\")\n        # This includes ALL messages from September 4, 2024\n\n        # Search with specific time boundaries\n        conversation_search(query=\"deployment\", start_date=\"2024-01-15T09:00\", end_date=\"2024-01-15T17:30\")\n        # This includes messages from 9 AM to 5:30 PM on Jan 15\n\n        # Search with limit\n        conversation_search(query=\"debugging\", limit=10)\n\n    Returns:\n        str: Query result string containing matching messages with timestamps and content.",
      "source_type": "python",
@@ -536,7 +536,7 @@
      "metadata_": {}
    },
    {
-      "id": "tool-1",
+      "id": "tool-0",
      "tool_type": "custom",
      "description": "Get user preferences for a specific category.",
      "source_type": "json",
@@ -572,7 +572,7 @@
      "metadata_": {}
    },
    {
-      "id": "tool-0",
+      "id": "tool-6",
      "tool_type": "letta_sleeptime_core",
      "description": "The memory_insert command allows you to insert text at a specific location in a memory block.\n\nExamples:\n        # Update a block containing information about the user (append to the end of the block)\n        memory_insert(label=\"customer\", new_str=\"The customer's ticket number is 12345\")\n\n        # Update a block containing information about the user (insert at the beginning of the block)\n        memory_insert(label=\"customer\", new_str=\"The customer's ticket number is 12345\", insert_line=0)\n\n    Returns:\n        Optional[str]: None is always returned as this function does not produce a response.",
      "source_type": "python",
@@ -616,7 +616,7 @@
      "metadata_": {}
    },
    {
-      "id": "tool-5",
+      "id": "tool-1",
      "tool_type": "letta_sleeptime_core",
      "description": "The memory_replace command allows you to replace a specific string in a memory block with a new string. This is used for making precise edits.\n\nExamples:\n        # Update a block containing information about the user\n        memory_replace(label=\"human\", old_str=\"Their name is Alice\", new_str=\"Their name is Bob\")\n\n        # Update a block containing a todo list\n        memory_replace(label=\"todos\", old_str=\"- [ ] Step 5: Search the web\", new_str=\"- [x] Step 5: Search the web\")\n\n        # Pass an empty string to\n        memory_replace(label=\"human\", old_str=\"Their name is Alice\", new_str=\"\")\n\n        # Bad example - do NOT add (view-only) line numbers to the args\n        memory_replace(label=\"human\", old_str=\"Line 1: Their name is Alice\", new_str=\"Line 1: Their name is Bob\")\n\n        # Bad example - do NOT include the number number warning either\n        memory_replace(label=\"human\", old_str=\"# NOTE: Line numbers shown below are to help during editing. Do NOT include line number prefixes in your memory edit tool calls.\\nLine 1: Their name is Alice\", new_str=\"Line 1: Their name is Bob\")\n\n        # Good example - no line numbers or line number warning (they are view-only), just the text\n        memory_replace(label=\"human\", old_str=\"Their name is Alice\", new_str=\"Their name is Bob\")\n\n    Returns:\n        str: The success message",
      "source_type": "python",
@@ -661,7 +661,7 @@
      "metadata_": {}
    },
    {
-      "id": "tool-6",
+      "id": "tool-4",
      "tool_type": "letta_core",
      "description": "Sends a message to the human user.",
      "source_type": "python",
@@ -698,7 +698,7 @@
  ],
  "mcp_servers": [],
  "metadata": {
-    "revision_id": "c734cfc0d595"
+    "revision_id": "066857381578"
  },
-  "created_at": "2025-10-10T00:21:32.284067+00:00"
+  "created_at": "2025-10-10T07:14:33.457962+00:00"
 }