feat: add strict tool calling setting [LET-6902] (#8577)
This commit is contained in:
@@ -604,7 +604,7 @@ class LettaAgentBatch(BaseAgent):
|
||||
def _prepare_tools_per_agent(agent_state: AgentState, tool_rules_solver: ToolRulesSolver) -> List[dict]:
|
||||
tools = [t for t in agent_state.tools if t.tool_type in {ToolType.CUSTOM, ToolType.LETTA_CORE, ToolType.LETTA_MEMORY_CORE}]
|
||||
valid_tool_names = tool_rules_solver.get_allowed_tool_names(available_tools=set([t.name for t in tools]))
|
||||
return [enable_strict_mode(t.json_schema) for t in tools if t.name in set(valid_tool_names)]
|
||||
return [enable_strict_mode(t.json_schema, strict=agent_state.llm_config.strict) for t in tools if t.name in set(valid_tool_names)]
|
||||
|
||||
async def _prepare_in_context_messages_per_agent_async(
|
||||
self, agent_state: AgentState, input_messages: List[MessageCreate]
|
||||
|
||||
@@ -779,7 +779,9 @@ class LettaAgentV2(BaseAgentV2):
|
||||
last_function_response=self.last_function_response,
|
||||
error_on_empty=False, # Return empty list instead of raising error
|
||||
) or list(set(t.name for t in tools))
|
||||
allowed_tools = [enable_strict_mode(t.json_schema) for t in tools if t.name in set(valid_tool_names)]
|
||||
allowed_tools = [
|
||||
enable_strict_mode(t.json_schema, strict=self.agent_state.llm_config.strict) for t in tools if t.name in set(valid_tool_names)
|
||||
]
|
||||
terminal_tool_names = {rule.tool_name for rule in self.tool_rules_solver.terminal_tool_rules}
|
||||
allowed_tools = runtime_override_tool_json_schema(
|
||||
tool_list=allowed_tools,
|
||||
|
||||
@@ -1396,7 +1396,9 @@ class LettaAgentV3(LettaAgentV2):
|
||||
|
||||
# Build allowed tools from server tools, excluding those overridden by client tools
|
||||
allowed_tools = [
|
||||
enable_strict_mode(t.json_schema) for t in tools if t.name in set(valid_tool_names) and t.name not in client_tool_names
|
||||
enable_strict_mode(t.json_schema, strict=self.agent_state.llm_config.strict)
|
||||
for t in tools
|
||||
if t.name in set(valid_tool_names) and t.name not in client_tool_names
|
||||
]
|
||||
|
||||
# Merge client-side tools (use flat format matching enable_strict_mode output)
|
||||
|
||||
@@ -353,9 +353,10 @@ class VoiceAgent(BaseAgent):
|
||||
"For example: 'Let me double-check my notes—one moment, please.'"
|
||||
)
|
||||
|
||||
strict = agent_state.llm_config.strict
|
||||
search_memory_json = Tool(
|
||||
type="function",
|
||||
function=enable_strict_mode( # strict=True ✓
|
||||
function=enable_strict_mode( # strict mode based on config
|
||||
add_pre_execution_message( # injects pre_exec_msg ✓
|
||||
{
|
||||
"name": "search_memory",
|
||||
@@ -399,13 +400,17 @@ class VoiceAgent(BaseAgent):
|
||||
},
|
||||
},
|
||||
description=search_memory_utterance_description,
|
||||
)
|
||||
),
|
||||
strict=strict,
|
||||
),
|
||||
)
|
||||
|
||||
# TODO: Customize whether or not to have heartbeats, pre_exec_message, etc.
|
||||
return [search_memory_json] + [
|
||||
Tool(type="function", function=enable_strict_mode(add_pre_execution_message(remove_request_heartbeat(t.json_schema))))
|
||||
Tool(
|
||||
type="function",
|
||||
function=enable_strict_mode(add_pre_execution_message(remove_request_heartbeat(t.json_schema)), strict=strict),
|
||||
)
|
||||
for t in tools
|
||||
]
|
||||
|
||||
|
||||
@@ -8,14 +8,16 @@ from letta.utils import get_logger
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def enable_strict_mode(tool_schema: Dict[str, Any]) -> Dict[str, Any]:
|
||||
def enable_strict_mode(tool_schema: Dict[str, Any], strict: bool = True) -> Dict[str, Any]:
|
||||
"""Enables strict mode for a tool schema by setting 'strict' to True and
|
||||
disallowing additional properties in the parameters.
|
||||
|
||||
If the tool schema is NON_STRICT_ONLY, strict mode will not be applied.
|
||||
If strict=False, the function will only clean metadata without applying strict mode.
|
||||
|
||||
Args:
|
||||
tool_schema (Dict[str, Any]): The original tool schema.
|
||||
strict (bool): Whether to enable strict mode. Defaults to True.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: A new tool schema with strict mode conditionally enabled.
|
||||
@@ -34,6 +36,12 @@ def enable_strict_mode(tool_schema: Dict[str, Any]) -> Dict[str, Any]:
|
||||
# We should not be hitting this and allowing invalid schemas to be used
|
||||
logger.error(f"Tool schema {schema} is invalid: {schema.get(MCP_TOOL_METADATA_SCHEMA_WARNINGS)}")
|
||||
|
||||
# If strict mode is disabled, just clean metadata and return
|
||||
if not strict:
|
||||
schema.pop(MCP_TOOL_METADATA_SCHEMA_STATUS, None)
|
||||
schema.pop(MCP_TOOL_METADATA_SCHEMA_WARNINGS, None)
|
||||
return schema
|
||||
|
||||
# Enable strict mode for STRICT_COMPLIANT or unspecified health status
|
||||
schema["strict"] = True
|
||||
|
||||
|
||||
@@ -83,17 +83,9 @@ class AnthropicClient(LLMClientBase):
|
||||
if llm_config.model.startswith("claude-opus-4-5") and llm_config.enable_reasoner:
|
||||
betas.append("context-management-2025-06-27")
|
||||
|
||||
# Structured outputs beta - only for supported models
|
||||
# Supported: Claude Sonnet 4.5, Opus 4.1, Opus 4.5, Haiku 4.5
|
||||
# DISABLED: Commenting out structured outputs to investigate TTFT latency impact
|
||||
# See PR #7495 for original implementation
|
||||
# supports_structured_outputs = _supports_structured_outputs(llm_config.model)
|
||||
#
|
||||
# if supports_structured_outputs:
|
||||
# # Always enable structured outputs beta on supported models.
|
||||
# # NOTE: We do NOT send `strict` on tool schemas because the current Anthropic SDK
|
||||
# # typed tool params reject unknown fields (e.g., `tools.0.custom.strict`).
|
||||
# betas.append("structured-outputs-2025-11-13")
|
||||
# Structured outputs beta - only when strict is enabled and model supports it
|
||||
if llm_config.strict and _supports_structured_outputs(llm_config.model):
|
||||
betas.append("structured-outputs-2025-11-13")
|
||||
|
||||
if betas:
|
||||
response = client.beta.messages.create(**request_data, betas=betas)
|
||||
@@ -128,13 +120,9 @@ class AnthropicClient(LLMClientBase):
|
||||
if llm_config.model.startswith("claude-opus-4-5") and llm_config.enable_reasoner:
|
||||
betas.append("context-management-2025-06-27")
|
||||
|
||||
# Structured outputs beta - only for supported models
|
||||
# DISABLED: Commenting out structured outputs to investigate TTFT latency impact
|
||||
# See PR #7495 for original implementation
|
||||
# supports_structured_outputs = _supports_structured_outputs(llm_config.model)
|
||||
#
|
||||
# if supports_structured_outputs:
|
||||
# betas.append("structured-outputs-2025-11-13")
|
||||
# Structured outputs beta - only when strict is enabled and model supports it
|
||||
if llm_config.strict and _supports_structured_outputs(llm_config.model):
|
||||
betas.append("structured-outputs-2025-11-13")
|
||||
|
||||
if betas:
|
||||
response = await client.beta.messages.create(**request_data, betas=betas)
|
||||
@@ -177,13 +165,9 @@ class AnthropicClient(LLMClientBase):
|
||||
if llm_config.model.startswith("claude-opus-4-5") and llm_config.enable_reasoner:
|
||||
betas.append("context-management-2025-06-27")
|
||||
|
||||
# Structured outputs beta - only for supported models
|
||||
# DISABLED: Commenting out structured outputs to investigate TTFT latency impact
|
||||
# See PR #7495 for original implementation
|
||||
# supports_structured_outputs = _supports_structured_outputs(llm_config.model)
|
||||
#
|
||||
# if supports_structured_outputs:
|
||||
# betas.append("structured-outputs-2025-11-13")
|
||||
# Structured outputs beta - only when strict is enabled and model supports it
|
||||
if llm_config.strict and _supports_structured_outputs(llm_config.model):
|
||||
betas.append("structured-outputs-2025-11-13")
|
||||
|
||||
# log failed requests
|
||||
try:
|
||||
@@ -440,11 +424,11 @@ class AnthropicClient(LLMClientBase):
|
||||
|
||||
if tools_for_request and len(tools_for_request) > 0:
|
||||
# TODO eventually enable parallel tool use
|
||||
# DISABLED: use_strict=False to disable structured outputs (TTFT latency impact)
|
||||
# See PR #7495 for original implementation
|
||||
# Enable strict mode when strict is enabled and model supports it
|
||||
use_strict = llm_config.strict and _supports_structured_outputs(llm_config.model)
|
||||
data["tools"] = convert_tools_to_anthropic_format(
|
||||
tools_for_request,
|
||||
use_strict=False, # Was: _supports_structured_outputs(llm_config.model)
|
||||
use_strict=use_strict,
|
||||
)
|
||||
# Add cache control to the last tool for caching tool definitions
|
||||
if len(data["tools"]) > 0:
|
||||
@@ -1165,14 +1149,14 @@ def convert_tools_to_anthropic_format(
|
||||
# when we are using structured outputs models. Limit the number of strict tools
|
||||
# to avoid exceeding Anthropic constraints.
|
||||
# NOTE: The token counting endpoint does NOT support `strict` - only the messages endpoint does.
|
||||
if (
|
||||
use_strict
|
||||
and add_strict_field
|
||||
and tool.function.name in ANTHROPIC_STRICT_MODE_ALLOWLIST
|
||||
and strict_count < ANTHROPIC_MAX_STRICT_TOOLS
|
||||
):
|
||||
formatted_tool["strict"] = True
|
||||
strict_count += 1
|
||||
if use_strict and add_strict_field and tool.function.name in ANTHROPIC_STRICT_MODE_ALLOWLIST:
|
||||
if strict_count < ANTHROPIC_MAX_STRICT_TOOLS:
|
||||
formatted_tool["strict"] = True
|
||||
strict_count += 1
|
||||
else:
|
||||
logger.warning(
|
||||
f"Exceeded max strict tools limit ({ANTHROPIC_MAX_STRICT_TOOLS}), tool '{tool.function.name}' will not use strict mode"
|
||||
)
|
||||
|
||||
formatted_tools.append(formatted_tool)
|
||||
|
||||
|
||||
@@ -297,8 +297,8 @@ class OpenAIClient(LLMClientBase):
|
||||
new_tools.append(tool.model_copy(deep=True))
|
||||
typed_tools = new_tools
|
||||
|
||||
# Convert to strict mode
|
||||
if supports_structured_output(llm_config):
|
||||
# Convert to strict mode when strict is enabled
|
||||
if llm_config.strict and supports_structured_output(llm_config):
|
||||
for tool in typed_tools:
|
||||
try:
|
||||
structured_output_version = convert_to_structured_output(tool.function.model_dump())
|
||||
@@ -320,13 +320,14 @@ class OpenAIClient(LLMClientBase):
|
||||
|
||||
else:
|
||||
# Finally convert to a Responses-friendly dict
|
||||
# Note: strict field is required by OpenAI SDK's FunctionToolParam type
|
||||
responses_tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"name": t.function.name,
|
||||
"description": t.function.description,
|
||||
"parameters": t.function.parameters,
|
||||
# "strict": True,
|
||||
"strict": False,
|
||||
}
|
||||
for t in typed_tools
|
||||
]
|
||||
@@ -560,9 +561,9 @@ class OpenAIClient(LLMClientBase):
|
||||
data.tools = new_tools
|
||||
|
||||
if data.tools is not None and len(data.tools) > 0:
|
||||
# Convert to structured output style (which has 'strict' and no optionals)
|
||||
# Convert to structured output style when strict is enabled
|
||||
for tool in data.tools:
|
||||
if supports_structured_output(llm_config):
|
||||
if llm_config.strict and supports_structured_output(llm_config):
|
||||
try:
|
||||
structured_output_version = convert_to_structured_output(tool.function.model_dump())
|
||||
tool.function = FunctionSchema(**structured_output_version)
|
||||
|
||||
@@ -105,6 +105,10 @@ class LLMConfig(BaseModel):
|
||||
None,
|
||||
description="The response format for the model's output. Supports text, json_object, and json_schema (structured outputs). Can be set via model_settings.",
|
||||
)
|
||||
strict: bool = Field(
|
||||
False,
|
||||
description="Enable strict mode for tool calling. When true, tool schemas include strict: true and additionalProperties: false, guaranteeing tool outputs match JSON schemas.",
|
||||
)
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
@@ -326,6 +330,7 @@ class LLMConfig(BaseModel):
|
||||
max_output_tokens=self.max_tokens or 4096,
|
||||
temperature=self.temperature,
|
||||
reasoning=OpenAIReasoning(reasoning_effort=self.reasoning_effort or "minimal"),
|
||||
strict=self.strict,
|
||||
)
|
||||
elif self.model_endpoint_type == "anthropic":
|
||||
thinking_type = "enabled" if self.enable_reasoner else "disabled"
|
||||
@@ -334,6 +339,7 @@ class LLMConfig(BaseModel):
|
||||
temperature=self.temperature,
|
||||
thinking=AnthropicThinking(type=thinking_type, budget_tokens=self.max_reasoning_tokens or 1024),
|
||||
verbosity=self.verbosity,
|
||||
strict=self.strict,
|
||||
)
|
||||
elif self.model_endpoint_type == "google_ai":
|
||||
return GoogleAIModelSettings(
|
||||
|
||||
@@ -227,6 +227,11 @@ class OpenAIModelSettings(ModelSettings):
|
||||
temperature: float = Field(0.7, description="The temperature of the model.")
|
||||
reasoning: OpenAIReasoning = Field(OpenAIReasoning(reasoning_effort="high"), description="The reasoning configuration for the model.")
|
||||
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
|
||||
# OpenAI supports strict mode for tool calling - defaults to True
|
||||
strict: bool = Field(
|
||||
True,
|
||||
description="Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.",
|
||||
)
|
||||
|
||||
# TODO: implement support for these
|
||||
# reasoning_summary: Optional[Literal["none", "short", "detailed"]] = Field(
|
||||
@@ -244,6 +249,7 @@ class OpenAIModelSettings(ModelSettings):
|
||||
"reasoning_effort": self.reasoning.reasoning_effort,
|
||||
"response_format": self.response_format,
|
||||
"parallel_tool_calls": self.parallel_tool_calls,
|
||||
"strict": self.strict,
|
||||
}
|
||||
|
||||
|
||||
@@ -278,6 +284,12 @@ class AnthropicModelSettings(ModelSettings):
|
||||
description="Effort level for Opus 4.5 model (controls token conservation). Not setting this gives similar performance to 'high'.",
|
||||
)
|
||||
|
||||
# Anthropic supports strict mode for tool calling - defaults to False
|
||||
strict: bool = Field(
|
||||
False,
|
||||
description="Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.",
|
||||
)
|
||||
|
||||
# TODO: implement support for these
|
||||
# top_k: Optional[int] = Field(None, description="The number of top tokens to return.")
|
||||
# top_p: Optional[float] = Field(None, description="The top-p value to use when generating text.")
|
||||
@@ -292,6 +304,7 @@ class AnthropicModelSettings(ModelSettings):
|
||||
"parallel_tool_calls": self.parallel_tool_calls,
|
||||
"effort": self.effort,
|
||||
"response_format": self.response_format,
|
||||
"strict": self.strict,
|
||||
}
|
||||
|
||||
|
||||
@@ -315,6 +328,7 @@ class GoogleAIModelSettings(ModelSettings):
|
||||
"max_tokens": self.max_output_tokens,
|
||||
"max_reasoning_tokens": self.thinking_config.thinking_budget if self.thinking_config.include_thoughts else 0,
|
||||
"parallel_tool_calls": self.parallel_tool_calls,
|
||||
"strict": False, # Google AI does not support strict mode
|
||||
}
|
||||
|
||||
|
||||
@@ -335,6 +349,7 @@ class AzureModelSettings(ModelSettings):
|
||||
"max_tokens": self.max_output_tokens,
|
||||
"response_format": self.response_format,
|
||||
"parallel_tool_calls": self.parallel_tool_calls,
|
||||
"strict": False, # Azure does not support strict mode
|
||||
}
|
||||
|
||||
|
||||
@@ -351,6 +366,7 @@ class XAIModelSettings(ModelSettings):
|
||||
"max_tokens": self.max_output_tokens,
|
||||
"response_format": self.response_format,
|
||||
"parallel_tool_calls": self.parallel_tool_calls,
|
||||
"strict": False, # xAI does not support strict mode
|
||||
}
|
||||
|
||||
|
||||
@@ -367,6 +383,7 @@ class ZAIModelSettings(ModelSettings):
|
||||
"max_tokens": self.max_output_tokens,
|
||||
"response_format": self.response_format,
|
||||
"parallel_tool_calls": self.parallel_tool_calls,
|
||||
"strict": False, # ZAI does not support strict mode
|
||||
}
|
||||
|
||||
|
||||
@@ -383,6 +400,7 @@ class GroqModelSettings(ModelSettings):
|
||||
"max_tokens": self.max_output_tokens,
|
||||
"response_format": self.response_format,
|
||||
"parallel_tool_calls": self.parallel_tool_calls,
|
||||
"strict": False, # Groq does not support strict mode
|
||||
}
|
||||
|
||||
|
||||
@@ -399,6 +417,7 @@ class DeepseekModelSettings(ModelSettings):
|
||||
"max_tokens": self.max_output_tokens,
|
||||
"response_format": self.response_format,
|
||||
"parallel_tool_calls": self.parallel_tool_calls,
|
||||
"strict": False, # Deepseek does not support strict mode
|
||||
}
|
||||
|
||||
|
||||
@@ -415,6 +434,7 @@ class TogetherModelSettings(ModelSettings):
|
||||
"max_tokens": self.max_output_tokens,
|
||||
"response_format": self.response_format,
|
||||
"parallel_tool_calls": self.parallel_tool_calls,
|
||||
"strict": False, # Together does not support strict mode
|
||||
}
|
||||
|
||||
|
||||
@@ -431,6 +451,7 @@ class BedrockModelSettings(ModelSettings):
|
||||
"max_tokens": self.max_output_tokens,
|
||||
"response_format": self.response_format,
|
||||
"parallel_tool_calls": self.parallel_tool_calls,
|
||||
"strict": False, # Bedrock does not support strict mode
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1851,6 +1851,7 @@ async def test_agent_state_schema_unchanged(server: SyncServer):
|
||||
"verbosity",
|
||||
"tier",
|
||||
"parallel_tool_calls",
|
||||
"strict",
|
||||
}
|
||||
actual_llm_config_fields = set(llm_config_fields.keys())
|
||||
if actual_llm_config_fields != expected_llm_config_fields:
|
||||
|
||||
@@ -7,9 +7,12 @@ AGENTS_CREATE_PARAMS = [
|
||||
{
|
||||
# Verify model_settings is populated with config values
|
||||
# Note: The 'model' field itself is separate from model_settings
|
||||
# strict defaults to False when no model_settings is explicitly provided
|
||||
# (OpenAIModelSettings defaults to True only when explicitly instantiated)
|
||||
"model_settings": {
|
||||
"max_output_tokens": 16384,
|
||||
"parallel_tool_calls": False,
|
||||
"strict": False,
|
||||
"provider_type": "openai",
|
||||
"temperature": 0.7,
|
||||
"reasoning": {"reasoning_effort": "minimal"},
|
||||
@@ -29,6 +32,7 @@ AGENTS_UPDATE_PARAMS = [
|
||||
"model_settings": {
|
||||
"max_output_tokens": 16384,
|
||||
"parallel_tool_calls": False,
|
||||
"strict": False,
|
||||
"provider_type": "openai",
|
||||
"temperature": 0.7,
|
||||
"reasoning": {"reasoning_effort": "minimal"},
|
||||
|
||||
Reference in New Issue
Block a user