From 97cdfb4225e3af85d4981b4c6585731f89ccade2 Mon Sep 17 00:00:00 2001 From: Sarah Wooders Date: Wed, 14 Jan 2026 15:41:43 -0800 Subject: [PATCH] Revert "feat: add strict tool calling setting [LET-6902]" (#8720) Revert "feat: add strict tool calling setting [LET-6902] (#8577)" This reverts commit 697c9d0dee6af73ec4d5d98780e2ca7632a69173. --- letta/agents/letta_agent_batch.py | 2 +- letta/agents/letta_agent_v2.py | 4 +- letta/agents/letta_agent_v3.py | 4 +- letta/agents/voice_agent.py | 11 ++--- letta/helpers/tool_execution_helper.py | 10 +---- letta/llm_api/anthropic_client.py | 56 +++++++++++++++++--------- letta/llm_api/openai_client.py | 11 +++-- letta/schemas/llm_config.py | 6 --- letta/schemas/model.py | 21 ---------- tests/managers/test_agent_manager.py | 1 - tests/sdk/agents_test.py | 4 -- 11 files changed, 48 insertions(+), 82 deletions(-) diff --git a/letta/agents/letta_agent_batch.py b/letta/agents/letta_agent_batch.py index 7bcc74f0..ca8fe4ed 100644 --- a/letta/agents/letta_agent_batch.py +++ b/letta/agents/letta_agent_batch.py @@ -604,7 +604,7 @@ class LettaAgentBatch(BaseAgent): def _prepare_tools_per_agent(agent_state: AgentState, tool_rules_solver: ToolRulesSolver) -> List[dict]: tools = [t for t in agent_state.tools if t.tool_type in {ToolType.CUSTOM, ToolType.LETTA_CORE, ToolType.LETTA_MEMORY_CORE}] valid_tool_names = tool_rules_solver.get_allowed_tool_names(available_tools=set([t.name for t in tools])) - return [enable_strict_mode(t.json_schema, strict=agent_state.llm_config.strict) for t in tools if t.name in set(valid_tool_names)] + return [enable_strict_mode(t.json_schema) for t in tools if t.name in set(valid_tool_names)] async def _prepare_in_context_messages_per_agent_async( self, agent_state: AgentState, input_messages: List[MessageCreate] diff --git a/letta/agents/letta_agent_v2.py b/letta/agents/letta_agent_v2.py index 4cc4a368..bd804d6f 100644 --- a/letta/agents/letta_agent_v2.py +++ b/letta/agents/letta_agent_v2.py @@ -779,9 +779,7 @@ class LettaAgentV2(BaseAgentV2): last_function_response=self.last_function_response, error_on_empty=False, # Return empty list instead of raising error ) or list(set(t.name for t in tools)) - allowed_tools = [ - enable_strict_mode(t.json_schema, strict=self.agent_state.llm_config.strict) for t in tools if t.name in set(valid_tool_names) - ] + allowed_tools = [enable_strict_mode(t.json_schema) for t in tools if t.name in set(valid_tool_names)] terminal_tool_names = {rule.tool_name for rule in self.tool_rules_solver.terminal_tool_rules} allowed_tools = runtime_override_tool_json_schema( tool_list=allowed_tools, diff --git a/letta/agents/letta_agent_v3.py b/letta/agents/letta_agent_v3.py index 7b18c89e..f1445618 100644 --- a/letta/agents/letta_agent_v3.py +++ b/letta/agents/letta_agent_v3.py @@ -1413,9 +1413,7 @@ class LettaAgentV3(LettaAgentV2): # Build allowed tools from server tools, excluding those overridden by client tools allowed_tools = [ - enable_strict_mode(t.json_schema, strict=self.agent_state.llm_config.strict) - for t in tools - if t.name in set(valid_tool_names) and t.name not in client_tool_names + enable_strict_mode(t.json_schema) for t in tools if t.name in set(valid_tool_names) and t.name not in client_tool_names ] # Merge client-side tools (use flat format matching enable_strict_mode output) diff --git a/letta/agents/voice_agent.py b/letta/agents/voice_agent.py index 068d6f3e..87e21ea0 100644 --- a/letta/agents/voice_agent.py +++ b/letta/agents/voice_agent.py @@ -353,10 +353,9 @@ class VoiceAgent(BaseAgent): "For example: 'Let me double-check my notes—one moment, please.'" ) - strict = agent_state.llm_config.strict search_memory_json = Tool( type="function", - function=enable_strict_mode( # strict mode based on config + function=enable_strict_mode( # strict=True ✓ add_pre_execution_message( # injects pre_exec_msg ✓ { "name": "search_memory", @@ -400,17 +399,13 @@ class VoiceAgent(BaseAgent): }, }, description=search_memory_utterance_description, - ), - strict=strict, + ) ), ) # TODO: Customize whether or not to have heartbeats, pre_exec_message, etc. return [search_memory_json] + [ - Tool( - type="function", - function=enable_strict_mode(add_pre_execution_message(remove_request_heartbeat(t.json_schema)), strict=strict), - ) + Tool(type="function", function=enable_strict_mode(add_pre_execution_message(remove_request_heartbeat(t.json_schema)))) for t in tools ] diff --git a/letta/helpers/tool_execution_helper.py b/letta/helpers/tool_execution_helper.py index 3844ef56..886e5239 100644 --- a/letta/helpers/tool_execution_helper.py +++ b/letta/helpers/tool_execution_helper.py @@ -8,16 +8,14 @@ from letta.utils import get_logger logger = get_logger(__name__) -def enable_strict_mode(tool_schema: Dict[str, Any], strict: bool = True) -> Dict[str, Any]: +def enable_strict_mode(tool_schema: Dict[str, Any]) -> Dict[str, Any]: """Enables strict mode for a tool schema by setting 'strict' to True and disallowing additional properties in the parameters. If the tool schema is NON_STRICT_ONLY, strict mode will not be applied. - If strict=False, the function will only clean metadata without applying strict mode. Args: tool_schema (Dict[str, Any]): The original tool schema. - strict (bool): Whether to enable strict mode. Defaults to True. Returns: Dict[str, Any]: A new tool schema with strict mode conditionally enabled. @@ -36,12 +34,6 @@ def enable_strict_mode(tool_schema: Dict[str, Any], strict: bool = True) -> Dict # We should not be hitting this and allowing invalid schemas to be used logger.error(f"Tool schema {schema} is invalid: {schema.get(MCP_TOOL_METADATA_SCHEMA_WARNINGS)}") - # If strict mode is disabled, just clean metadata and return - if not strict: - schema.pop(MCP_TOOL_METADATA_SCHEMA_STATUS, None) - schema.pop(MCP_TOOL_METADATA_SCHEMA_WARNINGS, None) - return schema - # Enable strict mode for STRICT_COMPLIANT or unspecified health status schema["strict"] = True diff --git a/letta/llm_api/anthropic_client.py b/letta/llm_api/anthropic_client.py index 5f6251e1..c4991a4d 100644 --- a/letta/llm_api/anthropic_client.py +++ b/letta/llm_api/anthropic_client.py @@ -83,9 +83,17 @@ class AnthropicClient(LLMClientBase): if llm_config.model.startswith("claude-opus-4-5") and llm_config.enable_reasoner: betas.append("context-management-2025-06-27") - # Structured outputs beta - only when strict is enabled and model supports it - if llm_config.strict and _supports_structured_outputs(llm_config.model): - betas.append("structured-outputs-2025-11-13") + # Structured outputs beta - only for supported models + # Supported: Claude Sonnet 4.5, Opus 4.1, Opus 4.5, Haiku 4.5 + # DISABLED: Commenting out structured outputs to investigate TTFT latency impact + # See PR #7495 for original implementation + # supports_structured_outputs = _supports_structured_outputs(llm_config.model) + # + # if supports_structured_outputs: + # # Always enable structured outputs beta on supported models. + # # NOTE: We do NOT send `strict` on tool schemas because the current Anthropic SDK + # # typed tool params reject unknown fields (e.g., `tools.0.custom.strict`). + # betas.append("structured-outputs-2025-11-13") if betas: response = client.beta.messages.create(**request_data, betas=betas) @@ -120,9 +128,13 @@ class AnthropicClient(LLMClientBase): if llm_config.model.startswith("claude-opus-4-5") and llm_config.enable_reasoner: betas.append("context-management-2025-06-27") - # Structured outputs beta - only when strict is enabled and model supports it - if llm_config.strict and _supports_structured_outputs(llm_config.model): - betas.append("structured-outputs-2025-11-13") + # Structured outputs beta - only for supported models + # DISABLED: Commenting out structured outputs to investigate TTFT latency impact + # See PR #7495 for original implementation + # supports_structured_outputs = _supports_structured_outputs(llm_config.model) + # + # if supports_structured_outputs: + # betas.append("structured-outputs-2025-11-13") try: if betas: @@ -279,9 +291,13 @@ class AnthropicClient(LLMClientBase): if llm_config.model.startswith("claude-opus-4-5") and llm_config.enable_reasoner: betas.append("context-management-2025-06-27") - # Structured outputs beta - only when strict is enabled and model supports it - if llm_config.strict and _supports_structured_outputs(llm_config.model): - betas.append("structured-outputs-2025-11-13") + # Structured outputs beta - only for supported models + # DISABLED: Commenting out structured outputs to investigate TTFT latency impact + # See PR #7495 for original implementation + # supports_structured_outputs = _supports_structured_outputs(llm_config.model) + # + # if supports_structured_outputs: + # betas.append("structured-outputs-2025-11-13") # log failed requests try: @@ -538,11 +554,11 @@ class AnthropicClient(LLMClientBase): if tools_for_request and len(tools_for_request) > 0: # TODO eventually enable parallel tool use - # Enable strict mode when strict is enabled and model supports it - use_strict = llm_config.strict and _supports_structured_outputs(llm_config.model) + # DISABLED: use_strict=False to disable structured outputs (TTFT latency impact) + # See PR #7495 for original implementation data["tools"] = convert_tools_to_anthropic_format( tools_for_request, - use_strict=use_strict, + use_strict=False, # Was: _supports_structured_outputs(llm_config.model) ) # Add cache control to the last tool for caching tool definitions if len(data["tools"]) > 0: @@ -1263,14 +1279,14 @@ def convert_tools_to_anthropic_format( # when we are using structured outputs models. Limit the number of strict tools # to avoid exceeding Anthropic constraints. # NOTE: The token counting endpoint does NOT support `strict` - only the messages endpoint does. - if use_strict and add_strict_field and tool.function.name in ANTHROPIC_STRICT_MODE_ALLOWLIST: - if strict_count < ANTHROPIC_MAX_STRICT_TOOLS: - formatted_tool["strict"] = True - strict_count += 1 - else: - logger.warning( - f"Exceeded max strict tools limit ({ANTHROPIC_MAX_STRICT_TOOLS}), tool '{tool.function.name}' will not use strict mode" - ) + if ( + use_strict + and add_strict_field + and tool.function.name in ANTHROPIC_STRICT_MODE_ALLOWLIST + and strict_count < ANTHROPIC_MAX_STRICT_TOOLS + ): + formatted_tool["strict"] = True + strict_count += 1 formatted_tools.append(formatted_tool) diff --git a/letta/llm_api/openai_client.py b/letta/llm_api/openai_client.py index 311a7379..03de4ca4 100644 --- a/letta/llm_api/openai_client.py +++ b/letta/llm_api/openai_client.py @@ -297,8 +297,8 @@ class OpenAIClient(LLMClientBase): new_tools.append(tool.model_copy(deep=True)) typed_tools = new_tools - # Convert to strict mode when strict is enabled - if llm_config.strict and supports_structured_output(llm_config): + # Convert to strict mode + if supports_structured_output(llm_config): for tool in typed_tools: try: structured_output_version = convert_to_structured_output(tool.function.model_dump()) @@ -320,14 +320,13 @@ class OpenAIClient(LLMClientBase): else: # Finally convert to a Responses-friendly dict - # Note: strict field is required by OpenAI SDK's FunctionToolParam type responses_tools = [ { "type": "function", "name": t.function.name, "description": t.function.description, "parameters": t.function.parameters, - "strict": False, + # "strict": True, } for t in typed_tools ] @@ -561,9 +560,9 @@ class OpenAIClient(LLMClientBase): data.tools = new_tools if data.tools is not None and len(data.tools) > 0: - # Convert to structured output style when strict is enabled + # Convert to structured output style (which has 'strict' and no optionals) for tool in data.tools: - if llm_config.strict and supports_structured_output(llm_config): + if supports_structured_output(llm_config): try: structured_output_version = convert_to_structured_output(tool.function.model_dump()) tool.function = FunctionSchema(**structured_output_version) diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py index e46a141a..3f1c484d 100644 --- a/letta/schemas/llm_config.py +++ b/letta/schemas/llm_config.py @@ -105,10 +105,6 @@ class LLMConfig(BaseModel): None, description="The response format for the model's output. Supports text, json_object, and json_schema (structured outputs). Can be set via model_settings.", ) - strict: bool = Field( - False, - description="Enable strict mode for tool calling. When true, tool schemas include strict: true and additionalProperties: false, guaranteeing tool outputs match JSON schemas.", - ) @model_validator(mode="before") @classmethod @@ -330,7 +326,6 @@ class LLMConfig(BaseModel): max_output_tokens=self.max_tokens or 4096, temperature=self.temperature, reasoning=OpenAIReasoning(reasoning_effort=self.reasoning_effort or "minimal"), - strict=self.strict, ) elif self.model_endpoint_type == "anthropic": thinking_type = "enabled" if self.enable_reasoner else "disabled" @@ -339,7 +334,6 @@ class LLMConfig(BaseModel): temperature=self.temperature, thinking=AnthropicThinking(type=thinking_type, budget_tokens=self.max_reasoning_tokens or 1024), verbosity=self.verbosity, - strict=self.strict, ) elif self.model_endpoint_type == "google_ai": return GoogleAIModelSettings( diff --git a/letta/schemas/model.py b/letta/schemas/model.py index 3a6dcdf7..daf3291e 100644 --- a/letta/schemas/model.py +++ b/letta/schemas/model.py @@ -227,11 +227,6 @@ class OpenAIModelSettings(ModelSettings): temperature: float = Field(0.7, description="The temperature of the model.") reasoning: OpenAIReasoning = Field(OpenAIReasoning(reasoning_effort="high"), description="The reasoning configuration for the model.") response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.") - # OpenAI supports strict mode for tool calling - defaults to True - strict: bool = Field( - True, - description="Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.", - ) # TODO: implement support for these # reasoning_summary: Optional[Literal["none", "short", "detailed"]] = Field( @@ -249,7 +244,6 @@ class OpenAIModelSettings(ModelSettings): "reasoning_effort": self.reasoning.reasoning_effort, "response_format": self.response_format, "parallel_tool_calls": self.parallel_tool_calls, - "strict": self.strict, } @@ -284,12 +278,6 @@ class AnthropicModelSettings(ModelSettings): description="Effort level for Opus 4.5 model (controls token conservation). Not setting this gives similar performance to 'high'.", ) - # Anthropic supports strict mode for tool calling - defaults to False - strict: bool = Field( - False, - description="Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.", - ) - # TODO: implement support for these # top_k: Optional[int] = Field(None, description="The number of top tokens to return.") # top_p: Optional[float] = Field(None, description="The top-p value to use when generating text.") @@ -304,7 +292,6 @@ class AnthropicModelSettings(ModelSettings): "parallel_tool_calls": self.parallel_tool_calls, "effort": self.effort, "response_format": self.response_format, - "strict": self.strict, } @@ -328,7 +315,6 @@ class GoogleAIModelSettings(ModelSettings): "max_tokens": self.max_output_tokens, "max_reasoning_tokens": self.thinking_config.thinking_budget if self.thinking_config.include_thoughts else 0, "parallel_tool_calls": self.parallel_tool_calls, - "strict": False, # Google AI does not support strict mode } @@ -349,7 +335,6 @@ class AzureModelSettings(ModelSettings): "max_tokens": self.max_output_tokens, "response_format": self.response_format, "parallel_tool_calls": self.parallel_tool_calls, - "strict": False, # Azure does not support strict mode } @@ -366,7 +351,6 @@ class XAIModelSettings(ModelSettings): "max_tokens": self.max_output_tokens, "response_format": self.response_format, "parallel_tool_calls": self.parallel_tool_calls, - "strict": False, # xAI does not support strict mode } @@ -383,7 +367,6 @@ class ZAIModelSettings(ModelSettings): "max_tokens": self.max_output_tokens, "response_format": self.response_format, "parallel_tool_calls": self.parallel_tool_calls, - "strict": False, # ZAI does not support strict mode } @@ -400,7 +383,6 @@ class GroqModelSettings(ModelSettings): "max_tokens": self.max_output_tokens, "response_format": self.response_format, "parallel_tool_calls": self.parallel_tool_calls, - "strict": False, # Groq does not support strict mode } @@ -417,7 +399,6 @@ class DeepseekModelSettings(ModelSettings): "max_tokens": self.max_output_tokens, "response_format": self.response_format, "parallel_tool_calls": self.parallel_tool_calls, - "strict": False, # Deepseek does not support strict mode } @@ -434,7 +415,6 @@ class TogetherModelSettings(ModelSettings): "max_tokens": self.max_output_tokens, "response_format": self.response_format, "parallel_tool_calls": self.parallel_tool_calls, - "strict": False, # Together does not support strict mode } @@ -451,7 +431,6 @@ class BedrockModelSettings(ModelSettings): "max_tokens": self.max_output_tokens, "response_format": self.response_format, "parallel_tool_calls": self.parallel_tool_calls, - "strict": False, # Bedrock does not support strict mode } diff --git a/tests/managers/test_agent_manager.py b/tests/managers/test_agent_manager.py index fb9c134f..15767114 100644 --- a/tests/managers/test_agent_manager.py +++ b/tests/managers/test_agent_manager.py @@ -1851,7 +1851,6 @@ async def test_agent_state_schema_unchanged(server: SyncServer): "verbosity", "tier", "parallel_tool_calls", - "strict", } actual_llm_config_fields = set(llm_config_fields.keys()) if actual_llm_config_fields != expected_llm_config_fields: diff --git a/tests/sdk/agents_test.py b/tests/sdk/agents_test.py index 78324ecd..f02a7ef1 100644 --- a/tests/sdk/agents_test.py +++ b/tests/sdk/agents_test.py @@ -7,12 +7,9 @@ AGENTS_CREATE_PARAMS = [ { # Verify model_settings is populated with config values # Note: The 'model' field itself is separate from model_settings - # strict defaults to False when no model_settings is explicitly provided - # (OpenAIModelSettings defaults to True only when explicitly instantiated) "model_settings": { "max_output_tokens": 16384, "parallel_tool_calls": False, - "strict": False, "provider_type": "openai", "temperature": 0.7, "reasoning": {"reasoning_effort": "minimal"}, @@ -32,7 +29,6 @@ AGENTS_UPDATE_PARAMS = [ "model_settings": { "max_output_tokens": 16384, "parallel_tool_calls": False, - "strict": False, "provider_type": "openai", "temperature": 0.7, "reasoning": {"reasoning_effort": "minimal"},