feat: Add parallel tool call streaming for anthropic [LET-4601] (#5225)

* wip

* Fix parallel tool calling interface

* wip

* wip adapt using id field

* Integrate new multi tool return schemas into parallel tool calling

* Remove example script

* Reset changes to llm stream adapter since old agent loop should not enable parallel tool calling

* Clean up fallback logic for extracting tool calls

* Remove redundant check

* Simplify logic

* Clean up logic in handle ai response

* Fix tests

* Write anthropic dict conversion to be back compatible

* wip

* Double write tool call id for legacy reasons

* Fix override args failures

* Patch for approvals

* Revert comments

* Remove extraneous prints
This commit is contained in:
Matthew Zhou
2025-10-10 15:16:08 -07:00
committed by Caren Thomas
parent 1c285f5170
commit bb8a7889e0
6 changed files with 563 additions and 310 deletions

View File

@@ -3,7 +3,7 @@ import json
import os
import uuid
from enum import Enum
from typing import AsyncGenerator, Dict, Iterable, List, Optional, Union, cast
from typing import Any, AsyncGenerator, Dict, Iterable, List, Optional, Union, cast
from fastapi import Header, HTTPException
from openai.types.chat import ChatCompletionMessageParam
@@ -377,6 +377,117 @@ def create_letta_messages_from_llm_response(
return messages
def create_parallel_tool_messages_from_llm_response(
agent_id: str,
model: str,
tool_call_specs: List[Dict[str, Any]], # List of tool call specs: {"name": str, "arguments": Dict, "id": Optional[str]}
tool_execution_results: List[ToolExecutionResult],
function_responses: List[Optional[str]],
timezone: str,
run_id: Optional[str] = None,
step_id: Optional[str] = None,
reasoning_content: Optional[
List[Union[TextContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent | SummarizedReasoningContent]]
] = None,
pre_computed_assistant_message_id: Optional[str] = None,
llm_batch_item_id: Optional[str] = None,
is_approval_response: bool = False,
) -> List[Message]:
"""
Build two messages representing a parallel tool-call step:
- One assistant message with ALL tool_calls populated (tool_call_id left empty)
- One tool message with ALL tool_returns populated (tool_call_id left empty)
Notes:
- Consumers should read tool_calls/tool_returns arrays for per-call details.
- The tool message's content includes only the first call's packaged response for
backward-compatibility with legacy renderers. UIs should prefer tool_returns.
- When invoked for an approval response, the assistant message is omitted (the approval
tool call was previously surfaced).
"""
# Construct OpenAI-style tool_calls for the assistant message
openai_tool_calls: List[OpenAIToolCall] = []
for spec in tool_call_specs:
name = spec.get("name")
args = spec.get("arguments", {})
call_id = spec.get("id") or str(uuid.uuid4())
# Ensure the spec carries the resolved id so returns/content can reference it
if not spec.get("id"):
spec["id"] = call_id
openai_tool_calls.append(
OpenAIToolCall(
id=call_id,
function=OpenAIFunction(name=name, arguments=json.dumps(args)),
type="function",
)
)
messages: List[Message] = []
if not is_approval_response:
# Assistant message with all tool_calls (no single tool_call_id)
# Safeguard against empty text messages
content: List[
Union[TextContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent, SummarizedReasoningContent]
] = []
if reasoning_content:
for content_part in reasoning_content:
if isinstance(content_part, TextContent) and content_part.text == "":
continue
content.append(content_part)
assistant_message = Message(
role=MessageRole.assistant,
content=content,
agent_id=agent_id,
model=model,
tool_calls=openai_tool_calls,
tool_call_id=None,
created_at=get_utc_time(),
batch_item_id=llm_batch_item_id,
run_id=run_id,
)
if step_id:
assistant_message.step_id = step_id
if pre_computed_assistant_message_id:
assistant_message.id = pre_computed_assistant_message_id
messages.append(assistant_message)
content: List[TextContent] = []
tool_returns: List[ToolReturn] = []
for spec, exec_result, response in zip(tool_call_specs, tool_execution_results, function_responses):
packaged = package_function_response(exec_result.success_flag, response, timezone)
content.append(TextContent(text=packaged))
tool_returns.append(
ToolReturn(
tool_call_id=spec.get("id"),
status=exec_result.status,
stdout=exec_result.stdout,
stderr=exec_result.stderr,
func_response=packaged,
)
)
tool_message = Message(
role=MessageRole.tool,
content=content,
agent_id=agent_id,
model=model,
tool_calls=[],
tool_call_id=tool_returns[0].tool_call_id, # For legacy reasons, set to first one
created_at=get_utc_time(),
batch_item_id=llm_batch_item_id,
tool_returns=tool_returns,
run_id=run_id,
)
if step_id:
tool_message.step_id = step_id
messages.append(tool_message)
return messages
def create_heartbeat_system_message(
agent_id: str,
model: str,