feat: Add parallel tool call streaming for anthropic [LET-4601] (#5225)

* wip * Fix parallel tool calling interface * wip * wip adapt using id field * Integrate new multi tool return schemas into parallel tool calling * Remove example script * Reset changes to llm stream adapter since old agent loop should not enable parallel tool calling * Clean up fallback logic for extracting tool calls * Remove redundant check * Simplify logic * Clean up logic in handle ai response * Fix tests * Write anthropic dict conversion to be back compatible * wip * Double write tool call id for legacy reasons * Fix override args failures * Patch for approvals * Revert comments * Remove extraneous prints
2025-10-10 15:16:08 -07:00
parent 1c285f5170
commit bb8a7889e0
6 changed files with 563 additions and 310 deletions
--- a/letta/server/rest_api/utils.py
+++ b/letta/server/rest_api/utils.py
@@ -3,7 +3,7 @@ import json
 import os
 import uuid
 from enum import Enum
-from typing import AsyncGenerator, Dict, Iterable, List, Optional, Union, cast
+from typing import Any, AsyncGenerator, Dict, Iterable, List, Optional, Union, cast

 from fastapi import Header, HTTPException
 from openai.types.chat import ChatCompletionMessageParam
@@ -377,6 +377,117 @@ def create_letta_messages_from_llm_response(
    return messages


+def create_parallel_tool_messages_from_llm_response(
+    agent_id: str,
+    model: str,
+    tool_call_specs: List[Dict[str, Any]],  # List of tool call specs: {"name": str, "arguments": Dict, "id": Optional[str]}
+    tool_execution_results: List[ToolExecutionResult],
+    function_responses: List[Optional[str]],
+    timezone: str,
+    run_id: Optional[str] = None,
+    step_id: Optional[str] = None,
+    reasoning_content: Optional[
+        List[Union[TextContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent | SummarizedReasoningContent]]
+    ] = None,
+    pre_computed_assistant_message_id: Optional[str] = None,
+    llm_batch_item_id: Optional[str] = None,
+    is_approval_response: bool = False,
+) -> List[Message]:
+    """
+    Build two messages representing a parallel tool-call step:
+    - One assistant message with ALL tool_calls populated (tool_call_id left empty)
+    - One tool message with ALL tool_returns populated (tool_call_id left empty)
+
+    Notes:
+    - Consumers should read tool_calls/tool_returns arrays for per-call details.
+    - The tool message's content includes only the first call's packaged response for
+      backward-compatibility with legacy renderers. UIs should prefer tool_returns.
+    - When invoked for an approval response, the assistant message is omitted (the approval
+      tool call was previously surfaced).
+    """
+
+    # Construct OpenAI-style tool_calls for the assistant message
+    openai_tool_calls: List[OpenAIToolCall] = []
+    for spec in tool_call_specs:
+        name = spec.get("name")
+        args = spec.get("arguments", {})
+        call_id = spec.get("id") or str(uuid.uuid4())
+        # Ensure the spec carries the resolved id so returns/content can reference it
+        if not spec.get("id"):
+            spec["id"] = call_id
+        openai_tool_calls.append(
+            OpenAIToolCall(
+                id=call_id,
+                function=OpenAIFunction(name=name, arguments=json.dumps(args)),
+                type="function",
+            )
+        )
+
+    messages: List[Message] = []
+
+    if not is_approval_response:
+        # Assistant message with all tool_calls (no single tool_call_id)
+        # Safeguard against empty text messages
+        content: List[
+            Union[TextContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent, SummarizedReasoningContent]
+        ] = []
+        if reasoning_content:
+            for content_part in reasoning_content:
+                if isinstance(content_part, TextContent) and content_part.text == "":
+                    continue
+                content.append(content_part)
+
+        assistant_message = Message(
+            role=MessageRole.assistant,
+            content=content,
+            agent_id=agent_id,
+            model=model,
+            tool_calls=openai_tool_calls,
+            tool_call_id=None,
+            created_at=get_utc_time(),
+            batch_item_id=llm_batch_item_id,
+            run_id=run_id,
+        )
+        if step_id:
+            assistant_message.step_id = step_id
+        if pre_computed_assistant_message_id:
+            assistant_message.id = pre_computed_assistant_message_id
+        messages.append(assistant_message)
+
+    content: List[TextContent] = []
+    tool_returns: List[ToolReturn] = []
+    for spec, exec_result, response in zip(tool_call_specs, tool_execution_results, function_responses):
+        packaged = package_function_response(exec_result.success_flag, response, timezone)
+        content.append(TextContent(text=packaged))
+        tool_returns.append(
+            ToolReturn(
+                tool_call_id=spec.get("id"),
+                status=exec_result.status,
+                stdout=exec_result.stdout,
+                stderr=exec_result.stderr,
+                func_response=packaged,
+            )
+        )
+
+    tool_message = Message(
+        role=MessageRole.tool,
+        content=content,
+        agent_id=agent_id,
+        model=model,
+        tool_calls=[],
+        tool_call_id=tool_returns[0].tool_call_id,  # For legacy reasons, set to first one
+        created_at=get_utc_time(),
+        batch_item_id=llm_batch_item_id,
+        tool_returns=tool_returns,
+        run_id=run_id,
+    )
+    if step_id:
+        tool_message.step_id = step_id
+
+    messages.append(tool_message)
+    return messages
+
+
 def create_heartbeat_system_message(
    agent_id: str,
    model: str,