feat: Add parallel tool call streaming for anthropic [LET-4601] (#5225)

* wip

* Fix parallel tool calling interface

* wip

* wip adapt using id field

* Integrate new multi tool return schemas into parallel tool calling

* Remove example script

* Reset changes to llm stream adapter since old agent loop should not enable parallel tool calling

* Clean up fallback logic for extracting tool calls

* Remove redundant check

* Simplify logic

* Clean up logic in handle ai response

* Fix tests

* Write anthropic dict conversion to be back compatible

* wip

* Double write tool call id for legacy reasons

* Fix override args failures

* Patch for approvals

* Revert comments

* Remove extraneous prints
This commit is contained in:
Matthew Zhou
2025-10-10 15:16:08 -07:00
committed by Caren Thomas
parent 1c285f5170
commit bb8a7889e0
6 changed files with 563 additions and 310 deletions

View File

@@ -30,6 +30,7 @@ class LettaLLMAdapter(ABC):
self.reasoning_content: list[TextContent | ReasoningContent | RedactedReasoningContent] | None = None
self.content: list[TextContent | ReasoningContent | RedactedReasoningContent] | None = None
self.tool_call: ToolCall | None = None
self.tool_calls: list[ToolCall] = []
self.usage: LettaUsageStatistics = LettaUsageStatistics()
self.telemetry_manager: TelemetryManager = TelemetryManager()
self.llm_request_finish_timestamp_ns: int | None = None

View File

@@ -25,6 +25,24 @@ class SimpleLLMStreamAdapter(LettaLLMStreamAdapter):
specific streaming formats.
"""
def _extract_tool_calls(self) -> list:
"""extract tool calls from interface, trying parallel API first then single API"""
# try multi-call api if available
if hasattr(self.interface, "get_tool_call_objects"):
try:
calls = self.interface.get_tool_call_objects()
if calls:
return calls
except Exception:
pass
# fallback to single-call api
try:
single = self.interface.get_tool_call_object()
return [single] if single else []
except Exception:
return []
async def invoke_llm(
self,
request_data: dict,
@@ -102,12 +120,10 @@ class SimpleLLMStreamAdapter(LettaLLMStreamAdapter):
# After streaming completes, extract the accumulated data
self.llm_request_finish_timestamp_ns = get_utc_timestamp_ns()
# Extract tool call from the interface
try:
self.tool_call = self.interface.get_tool_call_object()
except ValueError as e:
# No tool call, handle upstream
self.tool_call = None
# extract tool calls from interface (supports both single and parallel calls)
self.tool_calls = self._extract_tool_calls()
# preserve legacy single-call field for existing consumers
self.tool_call = self.tool_calls[-1] if self.tool_calls else None
# Extract reasoning content from the interface
# TODO this should probably just be called "content"?

View File

@@ -1,3 +1,4 @@
import asyncio
import uuid
from typing import AsyncGenerator, Optional
@@ -34,7 +35,11 @@ from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatist
from letta.schemas.step import StepProgression
from letta.schemas.step_metrics import StepMetrics
from letta.schemas.tool_execution_result import ToolExecutionResult
from letta.server.rest_api.utils import create_approval_request_message_from_llm_response, create_letta_messages_from_llm_response
from letta.server.rest_api.utils import (
create_approval_request_message_from_llm_response,
create_letta_messages_from_llm_response,
create_parallel_tool_messages_from_llm_response,
)
from letta.services.helpers.tool_parser_helper import runtime_override_tool_json_schema
from letta.settings import settings, summarizer_settings
from letta.system import package_function_response
@@ -324,6 +329,28 @@ class LettaAgentV3(LettaAgentV2):
force_tool_call=force_tool_call,
requires_subsequent_tool_call=self._require_tool_call,
)
# TODO: Extend to more providers, and also approval tool rules
# Enable Anthropic parallel tool use when no tool rules are attached
try:
if self.agent_state.llm_config.model_endpoint_type in ["anthropic", "bedrock"]:
no_tool_rules = not self.agent_state.tool_rules or len(self.agent_state.tool_rules) == 0
requires_approval = self.tool_rules_solver.get_requires_approval_tools(
set([t["name"] for t in valid_tools])
)
has_approval_tools = len(requires_approval) > 0
if (
isinstance(request_data.get("tool_choice"), dict)
and "disable_parallel_tool_use" in request_data["tool_choice"]
):
# Gate parallel tool use on both: no tool rules and no approval-required tools
if no_tool_rules and not has_approval_tools:
request_data["tool_choice"]["disable_parallel_tool_use"] = False
else:
# Explicitly disable when approvals exist (TODO support later) or tool rules present
request_data["tool_choice"]["disable_parallel_tool_use"] = True
except Exception:
# if this fails, we simply don't enable parallel tool use
pass
if dry_run:
yield request_data
return
@@ -372,12 +399,31 @@ class LettaAgentV3(LettaAgentV2):
self._update_global_usage_stats(llm_adapter.usage)
# Handle the AI response with the extracted data
# NOTE: in v3 loop, no tool call is OK
# if tool_call is None and llm_adapter.tool_call is None:
# Handle the AI response with the extracted data (supports multiple tool calls)
# Gather tool calls. Approval paths specify a single tool call.
tool_calls_list: list[ToolCall] = []
if tool_call is not None:
tool_calls_list = [tool_call]
else:
# Prefer the new multi-call field from streaming adapters
if hasattr(llm_adapter, "tool_calls") and llm_adapter.tool_calls:
tool_calls_list = llm_adapter.tool_calls
elif llm_adapter.tool_call is not None:
tool_calls_list = [llm_adapter.tool_call]
else:
tool_calls_list = []
aggregated_persisted: list[Message] = []
tool_return_payload = (
approval_response.approvals[0]
if approval_response and approval_response.approvals and isinstance(approval_response.approvals[0], ToolReturn)
else None
)
primary_tool_call = tool_calls_list[0] if len(tool_calls_list) == 1 else None
persisted_messages, self.should_continue, self.stop_reason = await self._handle_ai_response(
tool_call=tool_call or llm_adapter.tool_call,
tool_call=primary_tool_call,
tool_calls=tool_calls_list,
valid_tool_names=[tool["name"] for tool in valid_tools],
agent_state=self.agent_state,
tool_rules_solver=self.tool_rules_solver,
@@ -386,7 +432,6 @@ class LettaAgentV3(LettaAgentV2):
prompt_tokens=self.usage.prompt_tokens,
total_tokens=self.usage.total_tokens,
),
# reasoning_content=reasoning_content or llm_adapter.reasoning_content,
content=content or llm_adapter.content,
pre_computed_assistant_message_id=llm_adapter.message_id,
step_id=step_id,
@@ -398,26 +443,26 @@ class LettaAgentV3(LettaAgentV2):
is_approval=approval_response.approve if approval_response is not None else False,
is_denial=(approval_response.approve == False) if approval_response is not None else False,
denial_reason=approval_response.denial_reason if approval_response is not None else None,
tool_return=approval_response.approvals[0]
if approval_response and approval_response.approvals and isinstance(approval_response.approvals[0], ToolReturn)
else None,
tool_return=tool_return_payload,
)
aggregated_persisted.extend(persisted_messages)
# NOTE: there is an edge case where persisted_messages is empty (the LLM did a "no-op")
new_message_idx = len(input_messages_to_persist) if input_messages_to_persist else 0
self.response_messages.extend(persisted_messages[new_message_idx:])
self.response_messages.extend(aggregated_persisted[new_message_idx:])
if llm_adapter.supports_token_streaming():
# Stream the tool return if a tool was actually executed.
# In the normal streaming path, the tool call is surfaced via the streaming interface
# (llm_adapter.tool_call), so don't rely solely on the local `tool_call` variable.
has_tool_return = any(m.role == "tool" for m in persisted_messages)
if len(persisted_messages) > 0 and persisted_messages[-1].role != "approval" and has_tool_return:
tool_return = [msg for msg in persisted_messages if msg.role == "tool"][-1].to_letta_messages()[0]
if include_return_message_types is None or tool_return.message_type in include_return_message_types:
yield tool_return
# Stream each tool return if tools were executed
tool_returns = [msg for msg in aggregated_persisted if msg.role == "tool"]
for tr in tool_returns:
# Skip streaming for aggregated parallel tool returns (no per-call tool_call_id)
if tr.tool_call_id is None and tr.tool_returns:
continue
tool_return_letta = tr.to_letta_messages()[0]
if include_return_message_types is None or tool_return_letta.message_type in include_return_message_types:
yield tool_return_letta
else:
filter_user_messages = [m for m in persisted_messages[new_message_idx:] if m.role != "user"]
filter_user_messages = [m for m in aggregated_persisted[new_message_idx:] if m.role != "user"]
letta_messages = Message.to_letta_messages_from_list(
filter_user_messages,
use_assistant_message=False, # NOTE: set to false
@@ -527,12 +572,10 @@ class LettaAgentV3(LettaAgentV2):
@trace_method
async def _handle_ai_response(
self,
tool_call: Optional[ToolCall], # NOTE: should only be None for react agents
valid_tool_names: list[str],
agent_state: AgentState,
tool_rules_solver: ToolRulesSolver,
usage: UsageStatistics,
# reasoning_content: list[TextContent | ReasoningContent | RedactedReasoningContent | OmittedReasoningContent] | None = None,
content: list[TextContent | ReasoningContent | RedactedReasoningContent | OmittedReasoningContent] | None = None,
pre_computed_assistant_message_id: str | None = None,
step_id: str | None = None,
@@ -544,57 +587,76 @@ class LettaAgentV3(LettaAgentV2):
is_approval: bool | None = None,
is_denial: bool | None = None,
denial_reason: str | None = None,
tool_call: ToolCall | None = None,
tool_calls: Optional[list[ToolCall]] = None,
tool_return: ToolReturn | None = None,
) -> tuple[list[Message], bool, LettaStopReason | None]:
"""
Handle the final AI response once streaming completes, execute / validate the
tool call, decide whether we should keep stepping, and persist state.
"""
if tool_call is None:
# NOTE: in v3 loop, no tool call is OK
tool_call_id = None
else:
tool_call_id: str = tool_call.id or f"call_{uuid.uuid4().hex[:8]}"
Handle the final AI response once streaming completes, execute / validate tool calls,
decide whether we should keep stepping, and persist state.
if is_denial or tool_return is not None:
Unified approach: treats single and multi-tool calls uniformly to reduce code duplication.
"""
tool_calls = list(tool_calls) if tool_calls else []
if tool_call is not None and not tool_calls:
tool_calls = [tool_call]
first_tool_call = tool_calls[0] if tool_calls else tool_call
if tool_return is not None:
continue_stepping = True
stop_reason = None
if tool_return is not None:
tool_call_messages = [
Message(
role=MessageRole.tool,
content=[TextContent(text=tool_return.func_response)],
agent_id=agent_state.id,
model=agent_state.llm_config.model,
tool_calls=[],
tool_call_id=tool_return.tool_call_id,
created_at=get_utc_time(),
tool_returns=[tool_return],
run_id=run_id,
step_id=step_id,
)
]
else:
tool_call_messages = create_letta_messages_from_llm_response(
tool_call_messages = [
Message(
role=MessageRole.tool,
content=[TextContent(text=tool_return.func_response)],
agent_id=agent_state.id,
model=agent_state.llm_config.model,
function_name=tool_call.function.name,
function_arguments={},
tool_execution_result=ToolExecutionResult(status="error"),
tool_call_id=tool_call_id,
function_response=f"Error: request to call tool denied. User reason: {denial_reason}",
timezone=agent_state.timezone,
continue_stepping=continue_stepping,
# NOTE: we may need to change this to not have a "heartbeat" prefix for v3?
heartbeat_reason=f"{NON_USER_MSG_PREFIX}Continuing: user denied request to call tool.",
reasoning_content=None,
pre_computed_assistant_message_id=None,
step_id=step_id,
tool_calls=[],
tool_call_id=tool_return.tool_call_id,
created_at=get_utc_time(),
tool_returns=[tool_return],
run_id=run_id,
is_approval_response=True,
force_set_request_heartbeat=False,
add_heartbeat_on_continue=False,
step_id=step_id,
)
]
messages_to_persist = (initial_messages or []) + tool_call_messages
for message in messages_to_persist:
if message.run_id is None:
message.run_id = run_id
persisted_messages = await self.message_manager.create_many_messages_async(
messages_to_persist,
actor=self.actor,
run_id=run_id,
project_id=agent_state.project_id,
template_id=agent_state.template_id,
)
return persisted_messages, continue_stepping, stop_reason
# Handle denial case first (special case that bypasses normal flow)
if is_denial and first_tool_call is not None:
tool_call_id = first_tool_call.id or f"call_{uuid.uuid4().hex[:8]}"
continue_stepping = True
stop_reason = None
tool_call_messages = create_letta_messages_from_llm_response(
agent_id=agent_state.id,
model=agent_state.llm_config.model,
function_name=first_tool_call.function.name,
function_arguments={},
tool_execution_result=ToolExecutionResult(status="error"),
tool_call_id=tool_call_id,
function_response=f"Error: request to call tool denied. User reason: {denial_reason}",
timezone=agent_state.timezone,
continue_stepping=continue_stepping,
heartbeat_reason=f"{NON_USER_MSG_PREFIX}Continuing: user denied request to call tool.",
reasoning_content=None,
pre_computed_assistant_message_id=None,
step_id=step_id,
run_id=run_id,
is_approval_response=True,
force_set_request_heartbeat=False,
add_heartbeat_on_continue=False,
)
messages_to_persist = (initial_messages or []) + tool_call_messages
# Set run_id on all messages before persisting
@@ -611,97 +673,94 @@ class LettaAgentV3(LettaAgentV2):
)
return persisted_messages, continue_stepping, stop_reason
# -1. no tool call, no content
if tool_call is None and (content is None or len(content) == 0):
# Edge case is when there's also no content - basically, the LLM "no-op'd"
# If RequiredBeforeExitToolRule exists and not all required tools have been called,
# inject a rule-violation heartbeat to keep looping and inform the model.
uncalled = tool_rules_solver.get_uncalled_required_tools(available_tools=set([t.name for t in agent_state.tools]))
if uncalled:
# TODO: we may need to change this to not have a "heartbeat" prefix for v3?
heartbeat_reason = (
f"{NON_USER_MSG_PREFIX}ToolRuleViolated: You must call {', '.join(uncalled)} at least once to exit the loop."
)
from letta.server.rest_api.utils import create_heartbeat_system_message
# 3. Handle no-tool cases (content-only or no-op)
if not tool_calls:
# Case 3a: No tool call, no content (LLM no-op)
if content is None or len(content) == 0:
# Check if there are required-before-exit tools that haven't been called
uncalled = tool_rules_solver.get_uncalled_required_tools(available_tools=set([t.name for t in agent_state.tools]))
if uncalled:
heartbeat_reason = (
f"{NON_USER_MSG_PREFIX}ToolRuleViolated: You must call {', '.join(uncalled)} at least once to exit the loop."
)
from letta.server.rest_api.utils import create_heartbeat_system_message
heartbeat_msg = create_heartbeat_system_message(
heartbeat_msg = create_heartbeat_system_message(
agent_id=agent_state.id,
model=agent_state.llm_config.model,
function_call_success=True,
timezone=agent_state.timezone,
heartbeat_reason=heartbeat_reason,
run_id=run_id,
)
messages_to_persist = (initial_messages or []) + [heartbeat_msg]
continue_stepping, stop_reason = True, None
else:
# No required tools remaining, end turn without persisting no-op
continue_stepping = False
stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
messages_to_persist = initial_messages or []
# Case 3b: No tool call but has content
else:
continue_stepping, heartbeat_reason, stop_reason = self._decide_continuation(
agent_state=agent_state,
tool_call_name=None,
tool_rule_violated=False,
tool_rules_solver=tool_rules_solver,
is_final_step=is_final_step,
)
assistant_message = create_letta_messages_from_llm_response(
agent_id=agent_state.id,
model=agent_state.llm_config.model,
function_call_success=True,
function_name=None,
function_arguments=None,
tool_execution_result=None,
tool_call_id=None,
function_response=None,
timezone=agent_state.timezone,
continue_stepping=continue_stepping,
heartbeat_reason=heartbeat_reason,
reasoning_content=content,
pre_computed_assistant_message_id=pre_computed_assistant_message_id,
step_id=step_id,
run_id=run_id,
is_approval_response=is_approval or is_denial,
force_set_request_heartbeat=False,
add_heartbeat_on_continue=bool(heartbeat_reason),
)
messages_to_persist = (initial_messages or []) + [heartbeat_msg]
continue_stepping, stop_reason = True, None
else:
# In this case, we actually do not want to persist the no-op message
continue_stepping, heartbeat_reason, stop_reason = False, None, LettaStopReason(stop_reason=StopReasonType.end_turn.value)
messages_to_persist = initial_messages or []
messages_to_persist = (initial_messages or []) + assistant_message
# 0. If there's no tool call, we can early exit
elif tool_call is None:
# TODO could just hardcode the line here instead of calling the function...
continue_stepping, heartbeat_reason, stop_reason = self._decide_continuation(
agent_state=agent_state,
tool_call_name=None,
tool_rule_violated=False,
tool_rules_solver=tool_rules_solver,
is_final_step=is_final_step,
# Persist messages for no-tool cases
for message in messages_to_persist:
if message.run_id is None:
message.run_id = run_id
persisted_messages = await self.message_manager.create_many_messages_async(
messages_to_persist, actor=self.actor, run_id=run_id, project_id=agent_state.project_id, template_id=agent_state.template_id
)
assistant_message = create_letta_messages_from_llm_response(
agent_id=agent_state.id,
model=agent_state.llm_config.model,
function_name=None,
function_arguments=None,
tool_execution_result=None,
tool_call_id=None,
function_response=None,
timezone=agent_state.timezone,
continue_stepping=continue_stepping,
heartbeat_reason=heartbeat_reason,
# NOTE: should probably rename this to `content`?
reasoning_content=content,
pre_computed_assistant_message_id=pre_computed_assistant_message_id,
step_id=step_id,
run_id=run_id,
is_approval_response=is_approval or is_denial,
force_set_request_heartbeat=False,
# If we're continuing due to a required-before-exit rule, include a heartbeat to guide the model
add_heartbeat_on_continue=bool(heartbeat_reason),
)
messages_to_persist = (initial_messages or []) + assistant_message
return persisted_messages, continue_stepping, stop_reason
else:
# 1. Parse and validate the tool-call envelope
tool_call_name: str = tool_call.function.name
# 4. Unified tool execution path (works for both single and multiple tools)
tool_args = _safe_load_tool_call_str(tool_call.function.arguments)
# NOTE: these are failsafes - for v3, we should eventually be able to remove these
# request_heartbeat: bool = _pop_heartbeat(tool_args)
# 4a. Check for single tool approval case (special handling required)
if len(tool_calls) == 1 and not is_approval:
single_tool = tool_calls[0]
tool_name = single_tool.function.name
tool_args = _safe_load_tool_call_str(single_tool.function.arguments)
tool_args.pop(REQUEST_HEARTBEAT_PARAM, None)
tool_args.pop(INNER_THOUGHTS_KWARG, None)
log_telemetry(
self.logger,
"_handle_ai_response execute tool start",
tool_name=tool_call_name,
tool_args=tool_args,
tool_call_id=tool_call_id,
# request_heartbeat=request_heartbeat,
)
if not is_approval and tool_rules_solver.is_requires_approval_tool(tool_call_name):
if tool_rules_solver.is_requires_approval_tool(tool_name):
tool_call_id = single_tool.id or f"call_{uuid.uuid4().hex[:8]}"
approval_message = create_approval_request_message_from_llm_response(
agent_id=agent_state.id,
model=agent_state.llm_config.model,
function_name=tool_call_name,
function_name=tool_name,
function_arguments=tool_args,
tool_call_id=tool_call_id,
actor=self.actor,
# continue_stepping=request_heartbeat,
continue_stepping=True,
# reasoning_content=reasoning_content,
reasoning_content=content,
pre_computed_assistant_message_id=pre_computed_assistant_message_id,
step_id=step_id,
@@ -709,155 +768,220 @@ class LettaAgentV3(LettaAgentV2):
append_request_heartbeat=False,
)
messages_to_persist = (initial_messages or []) + [approval_message]
continue_stepping = False
stop_reason = LettaStopReason(stop_reason=StopReasonType.requires_approval.value)
else:
# 2. Execute the tool (or synthesize an error result if disallowed)
tool_rule_violated = tool_call_name not in valid_tool_names and not is_approval
if tool_rule_violated:
tool_execution_result = _build_rule_violation_result(tool_call_name, valid_tool_names, tool_rules_solver)
else:
# Prefill + validate args if a rule provided them
prefill_args = self.tool_rules_solver.last_prefilled_args_by_tool.get(tool_call_name)
if prefill_args:
# Find tool object for schema validation
target_tool = next((t for t in agent_state.tools if t.name == tool_call_name), None)
provenance = self.tool_rules_solver.last_prefilled_args_provenance.get(tool_call_name)
try:
tool_args = merge_and_validate_prefilled_args(
tool=target_tool,
llm_args=tool_args,
prefilled_args=prefill_args,
)
except ValueError as ve:
# Treat invalid prefilled args as user error and end the step
error_prefix = "Invalid prefilled tool arguments from tool rules"
prov_suffix = f" (source={provenance})" if provenance else ""
err_msg = f"{error_prefix}{prov_suffix}: {str(ve)}"
tool_execution_result = ToolExecutionResult(status="error", func_return=err_msg)
# Create messages and early return persistence path below
continue_stepping, heartbeat_reason, stop_reason = (
False,
None,
LettaStopReason(stop_reason=StopReasonType.invalid_tool_call.value),
)
tool_call_messages = create_letta_messages_from_llm_response(
agent_id=agent_state.id,
model=agent_state.llm_config.model,
function_name=tool_call_name,
function_arguments=tool_args,
tool_execution_result=tool_execution_result,
tool_call_id=tool_call_id,
function_response=tool_execution_result.func_return,
timezone=agent_state.timezone,
continue_stepping=continue_stepping,
heartbeat_reason=None,
reasoning_content=content,
pre_computed_assistant_message_id=pre_computed_assistant_message_id,
step_id=step_id,
run_id=run_id,
is_approval_response=is_approval or is_denial,
force_set_request_heartbeat=False,
add_heartbeat_on_continue=False,
)
messages_to_persist = (initial_messages or []) + tool_call_messages
for message in messages_to_persist:
if message.run_id is None:
message.run_id = run_id
# Set run_id on all messages before persisting
for message in messages_to_persist:
if message.run_id is None:
message.run_id = run_id
persisted_messages = await self.message_manager.create_many_messages_async(
messages_to_persist,
actor=self.actor,
run_id=run_id,
project_id=agent_state.project_id,
template_id=agent_state.template_id,
)
return persisted_messages, continue_stepping, stop_reason
# Track tool execution time
tool_start_time = get_utc_timestamp_ns()
tool_execution_result = await self._execute_tool(
tool_name=tool_call_name,
tool_args=tool_args,
agent_state=agent_state,
agent_step_span=agent_step_span,
step_id=step_id,
)
tool_end_time = get_utc_timestamp_ns()
# Store tool execution time in metrics
step_metrics.tool_execution_ns = tool_end_time - tool_start_time
log_telemetry(
self.logger,
"_handle_ai_response execute tool finish",
tool_execution_result=tool_execution_result,
tool_call_id=tool_call_id,
)
# 3. Prepare the function-response payload
truncate = tool_call_name not in {"conversation_search", "conversation_search_date", "archival_memory_search"}
return_char_limit = next(
(t.return_char_limit for t in agent_state.tools if t.name == tool_call_name),
None,
)
function_response_string = validate_function_response(
tool_execution_result.func_return,
return_char_limit=return_char_limit,
truncate=truncate,
)
self.last_function_response = package_function_response(
was_success=tool_execution_result.success_flag,
response_string=function_response_string,
timezone=agent_state.timezone,
)
# 4. Decide whether to keep stepping (focal section simplified)
continue_stepping, heartbeat_reason, stop_reason = self._decide_continuation(
agent_state=agent_state,
tool_call_name=tool_call_name,
tool_rule_violated=tool_rule_violated,
tool_rules_solver=tool_rules_solver,
is_final_step=is_final_step,
)
# 5. Create messages (step was already created at the beginning)
tool_call_messages = create_letta_messages_from_llm_response(
agent_id=agent_state.id,
model=agent_state.llm_config.model,
function_name=tool_call_name,
function_arguments=tool_args,
tool_execution_result=tool_execution_result,
tool_call_id=tool_call_id,
function_response=function_response_string,
timezone=agent_state.timezone,
continue_stepping=continue_stepping,
# heartbeat_reason=heartbeat_reason,
heartbeat_reason=None,
# reasoning_content=reasoning_content,
reasoning_content=content,
pre_computed_assistant_message_id=pre_computed_assistant_message_id,
step_id=step_id,
persisted_messages = await self.message_manager.create_many_messages_async(
messages_to_persist,
actor=self.actor,
run_id=run_id,
is_approval_response=is_approval or is_denial,
force_set_request_heartbeat=False,
add_heartbeat_on_continue=False,
project_id=agent_state.project_id,
template_id=agent_state.template_id,
)
messages_to_persist = (initial_messages or []) + tool_call_messages
return persisted_messages, False, LettaStopReason(stop_reason=StopReasonType.requires_approval.value)
# 4b. Validate parallel tool calling constraints
if len(tool_calls) > 1:
# No parallel tool calls with tool rules
if agent_state.tool_rules and len(agent_state.tool_rules) > 0:
raise ValueError(
"Parallel tool calling is not allowed when tool rules are present. Disable tool rules to use parallel tool calls."
)
# No parallel tool calls with approval-required tools
if any(tool_rules_solver.is_requires_approval_tool(tc.function.name) for tc in tool_calls):
raise ValueError("Parallel tool calling is not allowed when any tool requires approval.")
# 4c. Prepare execution specs for all tools
exec_specs = []
for tc in tool_calls:
call_id = tc.id or f"call_{uuid.uuid4().hex[:8]}"
name = tc.function.name
args = _safe_load_tool_call_str(tc.function.arguments)
args.pop(REQUEST_HEARTBEAT_PARAM, None)
args.pop(INNER_THOUGHTS_KWARG, None)
# Validate against allowed tools
tool_rule_violated = name not in valid_tool_names and not is_approval
# Handle prefilled args if present
if not tool_rule_violated:
prefill_args = tool_rules_solver.last_prefilled_args_by_tool.get(name)
if prefill_args:
target_tool = next((t for t in agent_state.tools if t.name == name), None)
provenance = tool_rules_solver.last_prefilled_args_provenance.get(name)
try:
args = merge_and_validate_prefilled_args(
tool=target_tool,
llm_args=args,
prefilled_args=prefill_args,
)
except ValueError as ve:
# Invalid prefilled args - create error result
error_prefix = "Invalid prefilled tool arguments from tool rules"
prov_suffix = f" (source={provenance})" if provenance else ""
err_msg = f"{error_prefix}{prov_suffix}: {str(ve)}"
exec_specs.append(
{
"id": call_id,
"name": name,
"args": args,
"violated": False,
"error": err_msg,
}
)
continue
exec_specs.append(
{
"id": call_id,
"name": name,
"args": args,
"violated": tool_rule_violated,
"error": None,
}
)
# 4d. Execute tools (sequentially for single, parallel for multiple)
if len(exec_specs) == 1:
# Single tool - execute directly without asyncio.gather overhead
spec = exec_specs[0]
if spec.get("error"):
# Prefill arg validation error
result = ToolExecutionResult(status="error", func_return=spec["error"])
exec_time = 0
elif spec["violated"]:
result = _build_rule_violation_result(spec["name"], valid_tool_names, tool_rules_solver)
exec_time = 0
else:
t0 = get_utc_timestamp_ns()
result = await self._execute_tool(
tool_name=spec["name"],
tool_args=spec["args"],
agent_state=agent_state,
agent_step_span=agent_step_span,
step_id=step_id,
)
exec_time = get_utc_timestamp_ns() - t0
results = [(result, exec_time)]
else:
# Multiple tools - execute in parallel
async def _run_one(spec):
if spec.get("error"):
return ToolExecutionResult(status="error", func_return=spec["error"]), 0
if spec["violated"]:
result = _build_rule_violation_result(spec["name"], valid_tool_names, tool_rules_solver)
return result, 0
t0 = get_utc_timestamp_ns()
res = await self._execute_tool(
tool_name=spec["name"],
tool_args=spec["args"],
agent_state=agent_state,
agent_step_span=agent_step_span,
step_id=step_id,
)
dt = get_utc_timestamp_ns() - t0
return res, dt
results = await asyncio.gather(*[_run_one(s) for s in exec_specs])
# Update metrics with execution time
if step_metrics is not None and results:
step_metrics.tool_execution_ns = max(dt for _, dt in results)
# 4e. Process results and compute function responses
function_responses: list[Optional[str]] = []
persisted_continue_flags: list[bool] = []
persisted_stop_reasons: list[LettaStopReason | None] = []
for idx, spec in enumerate(exec_specs):
tool_execution_result, _ = results[idx]
has_prefill_error = bool(spec.get("error"))
# Validate and format function response
truncate = spec["name"] not in {"conversation_search", "conversation_search_date", "archival_memory_search"}
return_char_limit = next((t.return_char_limit for t in agent_state.tools if t.name == spec["name"]), None)
function_response_string = validate_function_response(
tool_execution_result.func_return,
return_char_limit=return_char_limit,
truncate=truncate,
)
function_responses.append(function_response_string)
# Update last function response (for tool rules)
self.last_function_response = package_function_response(
was_success=tool_execution_result.success_flag,
response_string=function_response_string,
timezone=agent_state.timezone,
)
# Register successful tool call with solver
if not spec["violated"] and not has_prefill_error:
tool_rules_solver.register_tool_call(spec["name"])
# Decide continuation for this tool
if has_prefill_error:
cont = False
hb_reason = None
sr = LettaStopReason(stop_reason=StopReasonType.invalid_tool_call.value)
else:
cont, hb_reason, sr = self._decide_continuation(
agent_state=agent_state,
tool_call_name=spec["name"],
tool_rule_violated=spec["violated"],
tool_rules_solver=tool_rules_solver,
is_final_step=(is_final_step and idx == len(exec_specs) - 1),
)
persisted_continue_flags.append(cont)
persisted_stop_reasons.append(sr)
# 4f. Create messages using parallel message creation (works for both single and multi)
tool_call_specs = [{"name": s["name"], "arguments": s["args"], "id": s["id"]} for s in exec_specs]
tool_execution_results = [res for (res, _) in results]
# Use the parallel message creation function for both single and multiple tools
parallel_messages = create_parallel_tool_messages_from_llm_response(
agent_id=agent_state.id,
model=agent_state.llm_config.model,
tool_call_specs=tool_call_specs,
tool_execution_results=tool_execution_results,
function_responses=function_responses,
timezone=agent_state.timezone,
run_id=run_id,
step_id=step_id,
reasoning_content=content,
pre_computed_assistant_message_id=pre_computed_assistant_message_id,
is_approval_response=(is_approval or is_denial),
)
messages_to_persist: list[Message] = (initial_messages or []) + parallel_messages
# Set run_id on all messages before persisting
for message in messages_to_persist:
if message.run_id is None:
message.run_id = run_id
# Persist all messages
persisted_messages = await self.message_manager.create_many_messages_async(
messages_to_persist, actor=self.actor, run_id=run_id, project_id=agent_state.project_id, template_id=agent_state.template_id
messages_to_persist,
actor=self.actor,
run_id=run_id,
project_id=agent_state.project_id,
template_id=agent_state.template_id,
)
return persisted_messages, continue_stepping, stop_reason
# 4g. Aggregate continuation decisions
# For multiple tools: continue if ANY says continue, use last non-None stop_reason
# For single tool: use its decision directly
aggregate_continue = any(persisted_continue_flags) if persisted_continue_flags else False
aggregate_stop_reason = None
for sr in persisted_stop_reasons:
if sr is not None:
aggregate_stop_reason = sr
return persisted_messages, aggregate_continue, aggregate_stop_reason
@trace_method
def _decide_continuation(

View File

@@ -1514,6 +1514,7 @@ class Message(BaseMessage):
}
],
}
else:
raise ValueError(self.role)

View File

@@ -3,7 +3,7 @@ import json
import os
import uuid
from enum import Enum
from typing import AsyncGenerator, Dict, Iterable, List, Optional, Union, cast
from typing import Any, AsyncGenerator, Dict, Iterable, List, Optional, Union, cast
from fastapi import Header, HTTPException
from openai.types.chat import ChatCompletionMessageParam
@@ -377,6 +377,117 @@ def create_letta_messages_from_llm_response(
return messages
def create_parallel_tool_messages_from_llm_response(
agent_id: str,
model: str,
tool_call_specs: List[Dict[str, Any]], # List of tool call specs: {"name": str, "arguments": Dict, "id": Optional[str]}
tool_execution_results: List[ToolExecutionResult],
function_responses: List[Optional[str]],
timezone: str,
run_id: Optional[str] = None,
step_id: Optional[str] = None,
reasoning_content: Optional[
List[Union[TextContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent | SummarizedReasoningContent]]
] = None,
pre_computed_assistant_message_id: Optional[str] = None,
llm_batch_item_id: Optional[str] = None,
is_approval_response: bool = False,
) -> List[Message]:
"""
Build two messages representing a parallel tool-call step:
- One assistant message with ALL tool_calls populated (tool_call_id left empty)
- One tool message with ALL tool_returns populated (tool_call_id left empty)
Notes:
- Consumers should read tool_calls/tool_returns arrays for per-call details.
- The tool message's content includes only the first call's packaged response for
backward-compatibility with legacy renderers. UIs should prefer tool_returns.
- When invoked for an approval response, the assistant message is omitted (the approval
tool call was previously surfaced).
"""
# Construct OpenAI-style tool_calls for the assistant message
openai_tool_calls: List[OpenAIToolCall] = []
for spec in tool_call_specs:
name = spec.get("name")
args = spec.get("arguments", {})
call_id = spec.get("id") or str(uuid.uuid4())
# Ensure the spec carries the resolved id so returns/content can reference it
if not spec.get("id"):
spec["id"] = call_id
openai_tool_calls.append(
OpenAIToolCall(
id=call_id,
function=OpenAIFunction(name=name, arguments=json.dumps(args)),
type="function",
)
)
messages: List[Message] = []
if not is_approval_response:
# Assistant message with all tool_calls (no single tool_call_id)
# Safeguard against empty text messages
content: List[
Union[TextContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent, SummarizedReasoningContent]
] = []
if reasoning_content:
for content_part in reasoning_content:
if isinstance(content_part, TextContent) and content_part.text == "":
continue
content.append(content_part)
assistant_message = Message(
role=MessageRole.assistant,
content=content,
agent_id=agent_id,
model=model,
tool_calls=openai_tool_calls,
tool_call_id=None,
created_at=get_utc_time(),
batch_item_id=llm_batch_item_id,
run_id=run_id,
)
if step_id:
assistant_message.step_id = step_id
if pre_computed_assistant_message_id:
assistant_message.id = pre_computed_assistant_message_id
messages.append(assistant_message)
content: List[TextContent] = []
tool_returns: List[ToolReturn] = []
for spec, exec_result, response in zip(tool_call_specs, tool_execution_results, function_responses):
packaged = package_function_response(exec_result.success_flag, response, timezone)
content.append(TextContent(text=packaged))
tool_returns.append(
ToolReturn(
tool_call_id=spec.get("id"),
status=exec_result.status,
stdout=exec_result.stdout,
stderr=exec_result.stderr,
func_response=packaged,
)
)
tool_message = Message(
role=MessageRole.tool,
content=content,
agent_id=agent_id,
model=model,
tool_calls=[],
tool_call_id=tool_returns[0].tool_call_id, # For legacy reasons, set to first one
created_at=get_utc_time(),
batch_item_id=llm_batch_item_id,
tool_returns=tool_returns,
run_id=run_id,
)
if step_id:
tool_message.step_id = step_id
messages.append(tool_message)
return messages
def create_heartbeat_system_message(
agent_id: str,
model: str,

View File

@@ -1,7 +1,7 @@
{
"agents": [
{
"name": "test_export_import_57426498-f708-4228-a331-5efc0087b895",
"name": "test_export_import_42fbdcec-58ce-4e27-b6e5-938b3bcd57aa",
"memory_blocks": [],
"tools": [],
"tool_ids": [
@@ -20,25 +20,25 @@
"block-2"
],
"tool_rules": [
{
"tool_name": "send_message",
"type": "exit_loop",
"prompt_template": null
},
{
"tool_name": "memory_insert",
"type": "continue_loop",
"prompt_template": null
},
{
"tool_name": "memory_replace",
"type": "continue_loop",
"tool_name": "send_message",
"type": "exit_loop",
"prompt_template": null
},
{
"tool_name": "conversation_search",
"type": "continue_loop",
"prompt_template": null
},
{
"tool_name": "memory_replace",
"type": "continue_loop",
"prompt_template": null
}
],
"tags": [
@@ -129,7 +129,7 @@
"content": [
{
"type": "text",
"text": "You are a helpful assistant specializing in data analysis and mathematical computations.\n\n<memory_blocks>\nThe following memory blocks are currently engaged in your core memory unit:\n\n<project_context>\n<description>\n\n</description>\n<metadata>\n- chars_current=210\n- chars_limit=6000\n</metadata>\n<value>\n# NOTE: Line numbers shown below are to help during editing. Do NOT include line number prefixes in your memory edit tool calls.\nLine 1: Current project: Building predictive models for financial markets. Sarah is working on sequence analysis and pattern recognition. Recently interested in mathematical sequences like Fibonacci for trend analysis.\n</value>\n</project_context>\n\n<human>\n<description>\nThe human block: Stores key details about the person you are conversing with, allowing for more personalized and friend-like conversation.\n</description>\n<metadata>\n- chars_current=175\n- chars_limit=4000\n</metadata>\n<value>\n# NOTE: Line numbers shown below are to help during editing. Do NOT include line number prefixes in your memory edit tool calls.\nLine 1: username: sarah_researcher\nLine 2: occupation: data scientist\nLine 3: interests: machine learning, statistics, fibonacci sequences\nLine 4: preferred_communication: detailed explanations with examples\n</value>\n</human>\n\n<persona>\n<description>\nThe persona block: Stores details about your current persona, guiding how you behave and respond. This helps you to maintain consistency and personality in your interactions.\n</description>\n<metadata>\n- chars_current=195\n- chars_limit=8000\n</metadata>\n<value>\n# NOTE: Line numbers shown below are to help during editing. Do NOT include line number prefixes in your memory edit tool calls.\nLine 1: You are Alex, a data analyst and mathematician who helps users with calculations and insights. You have extensive experience in statistical analysis and prefer to provide clear, accurate results.\n</value>\n</persona>\n\n</memory_blocks>\n\n<tool_usage_rules>\nThe following constraints define rules for tool usage and guide desired behavior. These rules must be followed to ensure proper tool execution and workflow. A single response may contain multiple tool calls.\n\n<tool_rule>\nmemory_insert requires continuing your response when called\n</tool_rule>\n<tool_rule>\nmemory_replace requires continuing your response when called\n</tool_rule>\n<tool_rule>\nconversation_search requires continuing your response when called\n</tool_rule>\n<tool_rule>\nsend_message ends your response (yields control) when called\n</tool_rule>\n</tool_usage_rules>\n\n<memory_metadata>\n- The current system date is: October 10, 2025\n- Memory blocks were last modified: 2025-10-10 12:21:30 AM UTC+0000\n- -1 previous messages between you and the user are stored in recall memory (use tools to access them)\n- 2 total memories you created are stored in archival memory (use tools to access them)\n</memory_metadata>",
"text": "You are a helpful assistant specializing in data analysis and mathematical computations.\n\n<memory_blocks>\nThe following memory blocks are currently engaged in your core memory unit:\n\n<project_context>\n<description>\n\n</description>\n<metadata>\n- chars_current=210\n- chars_limit=6000\n</metadata>\n<value>\n# NOTE: Line numbers shown below are to help during editing. Do NOT include line number prefixes in your memory edit tool calls.\nLine 1: Current project: Building predictive models for financial markets. Sarah is working on sequence analysis and pattern recognition. Recently interested in mathematical sequences like Fibonacci for trend analysis.\n</value>\n</project_context>\n\n<human>\n<description>\nThe human block: Stores key details about the person you are conversing with, allowing for more personalized and friend-like conversation.\n</description>\n<metadata>\n- chars_current=175\n- chars_limit=4000\n</metadata>\n<value>\n# NOTE: Line numbers shown below are to help during editing. Do NOT include line number prefixes in your memory edit tool calls.\nLine 1: username: sarah_researcher\nLine 2: occupation: data scientist\nLine 3: interests: machine learning, statistics, fibonacci sequences\nLine 4: preferred_communication: detailed explanations with examples\n</value>\n</human>\n\n<persona>\n<description>\nThe persona block: Stores details about your current persona, guiding how you behave and respond. This helps you to maintain consistency and personality in your interactions.\n</description>\n<metadata>\n- chars_current=195\n- chars_limit=8000\n</metadata>\n<value>\n# NOTE: Line numbers shown below are to help during editing. Do NOT include line number prefixes in your memory edit tool calls.\nLine 1: You are Alex, a data analyst and mathematician who helps users with calculations and insights. You have extensive experience in statistical analysis and prefer to provide clear, accurate results.\n</value>\n</persona>\n\n</memory_blocks>\n\n<tool_usage_rules>\nThe following constraints define rules for tool usage and guide desired behavior. These rules must be followed to ensure proper tool execution and workflow. A single response may contain multiple tool calls.\n\n<tool_rule>\nmemory_insert requires continuing your response when called\n</tool_rule>\n<tool_rule>\nconversation_search requires continuing your response when called\n</tool_rule>\n<tool_rule>\nmemory_replace requires continuing your response when called\n</tool_rule>\n<tool_rule>\nsend_message ends your response (yields control) when called\n</tool_rule>\n</tool_usage_rules>\n\n<memory_metadata>\n- The current system date is: October 10, 2025\n- Memory blocks were last modified: 2025-10-10 07:14:31 AM UTC+0000\n- -1 previous messages between you and the user are stored in recall memory (use tools to access them)\n- 2 total memories you created are stored in archival memory (use tools to access them)\n</memory_metadata>",
"signature": null
}
],
@@ -144,7 +144,7 @@
"tool_calls": null,
"tool_call_id": null,
"tool_returns": [],
"created_at": "2025-10-10T00:21:29.139951+00:00",
"created_at": "2025-10-10T07:14:29.912336+00:00",
"approve": null,
"approval_request_id": null,
"denial_reason": null
@@ -169,7 +169,7 @@
"agent_id": "agent-0",
"tool_calls": [
{
"id": "9b3e8290-a336-4e3b-851e-2472ccd6fe91",
"id": "f9a9765f-9285-4056-9916-d5bafb729bef",
"function": {
"arguments": "{\n \"message\": \"More human than human is our motto.\"\n}",
"name": "send_message"
@@ -179,7 +179,7 @@
],
"tool_call_id": null,
"tool_returns": [],
"created_at": "2025-10-10T00:21:29.139990+00:00",
"created_at": "2025-10-10T07:14:29.912379+00:00",
"approve": null,
"approval_request_id": null,
"denial_reason": null
@@ -190,7 +190,7 @@
"content": [
{
"type": "text",
"text": "{\n \"status\": \"OK\",\n \"message\": null,\n \"time\": \"2025-10-10 12:21:29 AM UTC+0000\"\n}",
"text": "{\n \"status\": \"OK\",\n \"message\": null,\n \"time\": \"2025-10-10 07:14:29 AM UTC+0000\"\n}",
"signature": null
}
],
@@ -203,9 +203,9 @@
"model": "gpt-4.1-mini",
"agent_id": "agent-0",
"tool_calls": null,
"tool_call_id": "9b3e8290-a336-4e3b-851e-2472ccd6fe91",
"tool_call_id": "f9a9765f-9285-4056-9916-d5bafb729bef",
"tool_returns": [],
"created_at": "2025-10-10T00:21:29.140013+00:00",
"created_at": "2025-10-10T07:14:29.912403+00:00",
"approve": null,
"approval_request_id": null,
"denial_reason": null
@@ -216,7 +216,7 @@
"content": [
{
"type": "text",
"text": "{\n \"type\": \"login\",\n \"last_login\": \"Never (first login)\",\n \"time\": \"2025-10-10 12:21:29 AM UTC+0000\"\n}",
"text": "{\n \"type\": \"login\",\n \"last_login\": \"Never (first login)\",\n \"time\": \"2025-10-10 07:14:29 AM UTC+0000\"\n}",
"signature": null
}
],
@@ -231,7 +231,7 @@
"tool_calls": null,
"tool_call_id": null,
"tool_returns": [],
"created_at": "2025-10-10T00:21:29.140024+00:00",
"created_at": "2025-10-10T07:14:29.912415+00:00",
"approve": null,
"approval_request_id": null,
"denial_reason": null
@@ -257,7 +257,7 @@
"tool_calls": null,
"tool_call_id": null,
"tool_returns": [],
"created_at": "2025-10-10T00:21:30.360611+00:00",
"created_at": "2025-10-10T07:14:31.639749+00:00",
"approve": null,
"approval_request_id": null,
"denial_reason": null
@@ -268,7 +268,7 @@
"content": [
{
"type": "text",
"text": "Responding to test message for export.",
"text": "Responding to test message for export with confirmation.",
"signature": null
}
],
@@ -282,17 +282,17 @@
"agent_id": "agent-0",
"tool_calls": [
{
"id": "call_mNAOnZ212k0eehF005T5s6Wn",
"id": "call_tB2kJPFgSD5Ldk8VaUYwkMCF",
"function": {
"arguments": "{\"message\": \"Received your test message for export. Everything is working fine on my end. How can I assist you further today?\", \"request_heartbeat\": false}",
"arguments": "{\"message\": \"Test message received successfully. Ready for further instructions or queries.\", \"request_heartbeat\": false}",
"name": "send_message"
},
"type": "function"
}
],
"tool_call_id": "call_mNAOnZ212k0eehF005T5s6Wn",
"tool_call_id": "call_tB2kJPFgSD5Ldk8VaUYwkMCF",
"tool_returns": [],
"created_at": "2025-10-10T00:21:31.900813+00:00",
"created_at": "2025-10-10T07:14:33.049314+00:00",
"approve": null,
"approval_request_id": null,
"denial_reason": null
@@ -303,7 +303,7 @@
"content": [
{
"type": "text",
"text": "{\n \"status\": \"OK\",\n \"message\": \"Sent message successfully.\",\n \"time\": \"2025-10-10 12:21:31 AM UTC+0000\"\n}",
"text": "{\n \"status\": \"OK\",\n \"message\": \"Sent message successfully.\",\n \"time\": \"2025-10-10 07:14:33 AM UTC+0000\"\n}",
"signature": null
}
],
@@ -316,17 +316,17 @@
"model": "gpt-4.1-mini",
"agent_id": "agent-0",
"tool_calls": null,
"tool_call_id": "call_mNAOnZ212k0eehF005T5s6Wn",
"tool_call_id": "call_tB2kJPFgSD5Ldk8VaUYwkMCF",
"tool_returns": [
{
"tool_call_id": "call_mNAOnZ212k0eehF005T5s6Wn",
"tool_call_id": "call_tB2kJPFgSD5Ldk8VaUYwkMCF",
"status": "success",
"stdout": null,
"stderr": null,
"func_response": "{\n \"status\": \"OK\",\n \"message\": \"Sent message successfully.\",\n \"time\": \"2025-10-10 12:21:31 AM UTC+0000\"\n}"
"func_response": "{\n \"status\": \"OK\",\n \"message\": \"Sent message successfully.\",\n \"time\": \"2025-10-10 07:14:33 AM UTC+0000\"\n}"
}
],
"created_at": "2025-10-10T00:21:31.900994+00:00",
"created_at": "2025-10-10T07:14:33.049503+00:00",
"approve": null,
"approval_request_id": null,
"denial_reason": null
@@ -477,7 +477,7 @@
"metadata_": {}
},
{
"id": "tool-4",
"id": "tool-5",
"tool_type": "letta_core",
"description": "Search prior conversation history using hybrid search (text + semantic similarity).\n\nExamples:\n # Search all messages\n conversation_search(query=\"project updates\")\n\n # Search only assistant messages\n conversation_search(query=\"error handling\", roles=[\"assistant\"])\n\n # Search with date range (inclusive of both dates)\n conversation_search(query=\"meetings\", start_date=\"2024-01-15\", end_date=\"2024-01-20\")\n # This includes all messages from Jan 15 00:00:00 through Jan 20 23:59:59\n\n # Search messages from a specific day (inclusive)\n conversation_search(query=\"bug reports\", start_date=\"2024-09-04\", end_date=\"2024-09-04\")\n # This includes ALL messages from September 4, 2024\n\n # Search with specific time boundaries\n conversation_search(query=\"deployment\", start_date=\"2024-01-15T09:00\", end_date=\"2024-01-15T17:30\")\n # This includes messages from 9 AM to 5:30 PM on Jan 15\n\n # Search with limit\n conversation_search(query=\"debugging\", limit=10)\n\n Returns:\n str: Query result string containing matching messages with timestamps and content.",
"source_type": "python",
@@ -536,7 +536,7 @@
"metadata_": {}
},
{
"id": "tool-1",
"id": "tool-0",
"tool_type": "custom",
"description": "Get user preferences for a specific category.",
"source_type": "json",
@@ -572,7 +572,7 @@
"metadata_": {}
},
{
"id": "tool-0",
"id": "tool-6",
"tool_type": "letta_sleeptime_core",
"description": "The memory_insert command allows you to insert text at a specific location in a memory block.\n\nExamples:\n # Update a block containing information about the user (append to the end of the block)\n memory_insert(label=\"customer\", new_str=\"The customer's ticket number is 12345\")\n\n # Update a block containing information about the user (insert at the beginning of the block)\n memory_insert(label=\"customer\", new_str=\"The customer's ticket number is 12345\", insert_line=0)\n\n Returns:\n Optional[str]: None is always returned as this function does not produce a response.",
"source_type": "python",
@@ -616,7 +616,7 @@
"metadata_": {}
},
{
"id": "tool-5",
"id": "tool-1",
"tool_type": "letta_sleeptime_core",
"description": "The memory_replace command allows you to replace a specific string in a memory block with a new string. This is used for making precise edits.\n\nExamples:\n # Update a block containing information about the user\n memory_replace(label=\"human\", old_str=\"Their name is Alice\", new_str=\"Their name is Bob\")\n\n # Update a block containing a todo list\n memory_replace(label=\"todos\", old_str=\"- [ ] Step 5: Search the web\", new_str=\"- [x] Step 5: Search the web\")\n\n # Pass an empty string to\n memory_replace(label=\"human\", old_str=\"Their name is Alice\", new_str=\"\")\n\n # Bad example - do NOT add (view-only) line numbers to the args\n memory_replace(label=\"human\", old_str=\"Line 1: Their name is Alice\", new_str=\"Line 1: Their name is Bob\")\n\n # Bad example - do NOT include the number number warning either\n memory_replace(label=\"human\", old_str=\"# NOTE: Line numbers shown below are to help during editing. Do NOT include line number prefixes in your memory edit tool calls.\\nLine 1: Their name is Alice\", new_str=\"Line 1: Their name is Bob\")\n\n # Good example - no line numbers or line number warning (they are view-only), just the text\n memory_replace(label=\"human\", old_str=\"Their name is Alice\", new_str=\"Their name is Bob\")\n\n Returns:\n str: The success message",
"source_type": "python",
@@ -661,7 +661,7 @@
"metadata_": {}
},
{
"id": "tool-6",
"id": "tool-4",
"tool_type": "letta_core",
"description": "Sends a message to the human user.",
"source_type": "python",
@@ -698,7 +698,7 @@
],
"mcp_servers": [],
"metadata": {
"revision_id": "c734cfc0d595"
"revision_id": "066857381578"
},
"created_at": "2025-10-10T00:21:32.284067+00:00"
"created_at": "2025-10-10T07:14:33.457962+00:00"
}