From 97cdfb4225e3af85d4981b4c6585731f89ccade2 Mon Sep 17 00:00:00 2001
From: Sarah Wooders <sarahwooders@gmail.com>
Date: Wed, 14 Jan 2026 15:41:43 -0800
Subject: [PATCH] Revert "feat: add strict tool calling setting [LET-6902]"
 (#8720)

Revert "feat: add strict tool calling setting [LET-6902] (#8577)"

This reverts commit 697c9d0dee6af73ec4d5d98780e2ca7632a69173.
---
 letta/agents/letta_agent_batch.py      |  2 +-
 letta/agents/letta_agent_v2.py         |  4 +-
 letta/agents/letta_agent_v3.py         |  4 +-
 letta/agents/voice_agent.py            | 11 ++---
 letta/helpers/tool_execution_helper.py | 10 +----
 letta/llm_api/anthropic_client.py      | 56 +++++++++++++++++---------
 letta/llm_api/openai_client.py         | 11 +++--
 letta/schemas/llm_config.py            |  6 ---
 letta/schemas/model.py                 | 21 ----------
 tests/managers/test_agent_manager.py   |  1 -
 tests/sdk/agents_test.py               |  4 --
 11 files changed, 48 insertions(+), 82 deletions(-)

diff --git a/letta/agents/letta_agent_batch.py b/letta/agents/letta_agent_batch.py
index 7bcc74f0..ca8fe4ed 100644
--- a/letta/agents/letta_agent_batch.py
+++ b/letta/agents/letta_agent_batch.py
@@ -604,7 +604,7 @@ class LettaAgentBatch(BaseAgent):
     def _prepare_tools_per_agent(agent_state: AgentState, tool_rules_solver: ToolRulesSolver) -> List[dict]:
         tools = [t for t in agent_state.tools if t.tool_type in {ToolType.CUSTOM, ToolType.LETTA_CORE, ToolType.LETTA_MEMORY_CORE}]
         valid_tool_names = tool_rules_solver.get_allowed_tool_names(available_tools=set([t.name for t in tools]))
-        return [enable_strict_mode(t.json_schema, strict=agent_state.llm_config.strict) for t in tools if t.name in set(valid_tool_names)]
+        return [enable_strict_mode(t.json_schema) for t in tools if t.name in set(valid_tool_names)]
 
     async def _prepare_in_context_messages_per_agent_async(
         self, agent_state: AgentState, input_messages: List[MessageCreate]
diff --git a/letta/agents/letta_agent_v2.py b/letta/agents/letta_agent_v2.py
index 4cc4a368..bd804d6f 100644
--- a/letta/agents/letta_agent_v2.py
+++ b/letta/agents/letta_agent_v2.py
@@ -779,9 +779,7 @@ class LettaAgentV2(BaseAgentV2):
             last_function_response=self.last_function_response,
             error_on_empty=False,  # Return empty list instead of raising error
         ) or list(set(t.name for t in tools))
-        allowed_tools = [
-            enable_strict_mode(t.json_schema, strict=self.agent_state.llm_config.strict) for t in tools if t.name in set(valid_tool_names)
-        ]
+        allowed_tools = [enable_strict_mode(t.json_schema) for t in tools if t.name in set(valid_tool_names)]
         terminal_tool_names = {rule.tool_name for rule in self.tool_rules_solver.terminal_tool_rules}
         allowed_tools = runtime_override_tool_json_schema(
             tool_list=allowed_tools,
diff --git a/letta/agents/letta_agent_v3.py b/letta/agents/letta_agent_v3.py
index 7b18c89e..f1445618 100644
--- a/letta/agents/letta_agent_v3.py
+++ b/letta/agents/letta_agent_v3.py
@@ -1413,9 +1413,7 @@ class LettaAgentV3(LettaAgentV2):
 
         # Build allowed tools from server tools, excluding those overridden by client tools
         allowed_tools = [
-            enable_strict_mode(t.json_schema, strict=self.agent_state.llm_config.strict)
-            for t in tools
-            if t.name in set(valid_tool_names) and t.name not in client_tool_names
+            enable_strict_mode(t.json_schema) for t in tools if t.name in set(valid_tool_names) and t.name not in client_tool_names
         ]
 
         # Merge client-side tools (use flat format matching enable_strict_mode output)
diff --git a/letta/agents/voice_agent.py b/letta/agents/voice_agent.py
index 068d6f3e..87e21ea0 100644
--- a/letta/agents/voice_agent.py
+++ b/letta/agents/voice_agent.py
@@ -353,10 +353,9 @@ class VoiceAgent(BaseAgent):
             "For example: 'Let me double-check my notes—one moment, please.'"
         )
 
-        strict = agent_state.llm_config.strict
         search_memory_json = Tool(
             type="function",
-            function=enable_strict_mode(  # strict mode based on config
+            function=enable_strict_mode(  # strict=True   ✓
                 add_pre_execution_message(  # injects pre_exec_msg   ✓
                     {
                         "name": "search_memory",
@@ -400,17 +399,13 @@ class VoiceAgent(BaseAgent):
                         },
                     },
                     description=search_memory_utterance_description,
-                ),
-                strict=strict,
+                )
             ),
         )
 
         # TODO: Customize whether or not to have heartbeats, pre_exec_message, etc.
         return [search_memory_json] + [
-            Tool(
-                type="function",
-                function=enable_strict_mode(add_pre_execution_message(remove_request_heartbeat(t.json_schema)), strict=strict),
-            )
+            Tool(type="function", function=enable_strict_mode(add_pre_execution_message(remove_request_heartbeat(t.json_schema))))
             for t in tools
         ]
 
diff --git a/letta/helpers/tool_execution_helper.py b/letta/helpers/tool_execution_helper.py
index 3844ef56..886e5239 100644
--- a/letta/helpers/tool_execution_helper.py
+++ b/letta/helpers/tool_execution_helper.py
@@ -8,16 +8,14 @@ from letta.utils import get_logger
 logger = get_logger(__name__)
 
 
-def enable_strict_mode(tool_schema: Dict[str, Any], strict: bool = True) -> Dict[str, Any]:
+def enable_strict_mode(tool_schema: Dict[str, Any]) -> Dict[str, Any]:
     """Enables strict mode for a tool schema by setting 'strict' to True and
     disallowing additional properties in the parameters.
 
     If the tool schema is NON_STRICT_ONLY, strict mode will not be applied.
-    If strict=False, the function will only clean metadata without applying strict mode.
 
     Args:
         tool_schema (Dict[str, Any]): The original tool schema.
-        strict (bool): Whether to enable strict mode. Defaults to True.
 
     Returns:
         Dict[str, Any]: A new tool schema with strict mode conditionally enabled.
@@ -36,12 +34,6 @@ def enable_strict_mode(tool_schema: Dict[str, Any], strict: bool = True) -> Dict
         # We should not be hitting this and allowing invalid schemas to be used
         logger.error(f"Tool schema {schema} is invalid: {schema.get(MCP_TOOL_METADATA_SCHEMA_WARNINGS)}")
 
-    # If strict mode is disabled, just clean metadata and return
-    if not strict:
-        schema.pop(MCP_TOOL_METADATA_SCHEMA_STATUS, None)
-        schema.pop(MCP_TOOL_METADATA_SCHEMA_WARNINGS, None)
-        return schema
-
     # Enable strict mode for STRICT_COMPLIANT or unspecified health status
     schema["strict"] = True
 
diff --git a/letta/llm_api/anthropic_client.py b/letta/llm_api/anthropic_client.py
index 5f6251e1..c4991a4d 100644
--- a/letta/llm_api/anthropic_client.py
+++ b/letta/llm_api/anthropic_client.py
@@ -83,9 +83,17 @@ class AnthropicClient(LLMClientBase):
         if llm_config.model.startswith("claude-opus-4-5") and llm_config.enable_reasoner:
             betas.append("context-management-2025-06-27")
 
-        # Structured outputs beta - only when strict is enabled and model supports it
-        if llm_config.strict and _supports_structured_outputs(llm_config.model):
-            betas.append("structured-outputs-2025-11-13")
+        # Structured outputs beta - only for supported models
+        # Supported: Claude Sonnet 4.5, Opus 4.1, Opus 4.5, Haiku 4.5
+        # DISABLED: Commenting out structured outputs to investigate TTFT latency impact
+        # See PR #7495 for original implementation
+        # supports_structured_outputs = _supports_structured_outputs(llm_config.model)
+        #
+        # if supports_structured_outputs:
+        #     # Always enable structured outputs beta on supported models.
+        #     # NOTE: We do NOT send `strict` on tool schemas because the current Anthropic SDK
+        #     # typed tool params reject unknown fields (e.g., `tools.0.custom.strict`).
+        #     betas.append("structured-outputs-2025-11-13")
 
         if betas:
             response = client.beta.messages.create(**request_data, betas=betas)
@@ -120,9 +128,13 @@ class AnthropicClient(LLMClientBase):
         if llm_config.model.startswith("claude-opus-4-5") and llm_config.enable_reasoner:
             betas.append("context-management-2025-06-27")
 
-        # Structured outputs beta - only when strict is enabled and model supports it
-        if llm_config.strict and _supports_structured_outputs(llm_config.model):
-            betas.append("structured-outputs-2025-11-13")
+        # Structured outputs beta - only for supported models
+        # DISABLED: Commenting out structured outputs to investigate TTFT latency impact
+        # See PR #7495 for original implementation
+        # supports_structured_outputs = _supports_structured_outputs(llm_config.model)
+        #
+        # if supports_structured_outputs:
+        #     betas.append("structured-outputs-2025-11-13")
 
         try:
             if betas:
@@ -279,9 +291,13 @@ class AnthropicClient(LLMClientBase):
         if llm_config.model.startswith("claude-opus-4-5") and llm_config.enable_reasoner:
             betas.append("context-management-2025-06-27")
 
-        # Structured outputs beta - only when strict is enabled and model supports it
-        if llm_config.strict and _supports_structured_outputs(llm_config.model):
-            betas.append("structured-outputs-2025-11-13")
+        # Structured outputs beta - only for supported models
+        # DISABLED: Commenting out structured outputs to investigate TTFT latency impact
+        # See PR #7495 for original implementation
+        # supports_structured_outputs = _supports_structured_outputs(llm_config.model)
+        #
+        # if supports_structured_outputs:
+        #     betas.append("structured-outputs-2025-11-13")
 
         # log failed requests
         try:
@@ -538,11 +554,11 @@ class AnthropicClient(LLMClientBase):
 
         if tools_for_request and len(tools_for_request) > 0:
             # TODO eventually enable parallel tool use
-            # Enable strict mode when strict is enabled and model supports it
-            use_strict = llm_config.strict and _supports_structured_outputs(llm_config.model)
+            # DISABLED: use_strict=False to disable structured outputs (TTFT latency impact)
+            # See PR #7495 for original implementation
             data["tools"] = convert_tools_to_anthropic_format(
                 tools_for_request,
-                use_strict=use_strict,
+                use_strict=False,  # Was: _supports_structured_outputs(llm_config.model)
             )
             # Add cache control to the last tool for caching tool definitions
             if len(data["tools"]) > 0:
@@ -1263,14 +1279,14 @@ def convert_tools_to_anthropic_format(
         # when we are using structured outputs models. Limit the number of strict tools
         # to avoid exceeding Anthropic constraints.
         # NOTE: The token counting endpoint does NOT support `strict` - only the messages endpoint does.
-        if use_strict and add_strict_field and tool.function.name in ANTHROPIC_STRICT_MODE_ALLOWLIST:
-            if strict_count < ANTHROPIC_MAX_STRICT_TOOLS:
-                formatted_tool["strict"] = True
-                strict_count += 1
-            else:
-                logger.warning(
-                    f"Exceeded max strict tools limit ({ANTHROPIC_MAX_STRICT_TOOLS}), tool '{tool.function.name}' will not use strict mode"
-                )
+        if (
+            use_strict
+            and add_strict_field
+            and tool.function.name in ANTHROPIC_STRICT_MODE_ALLOWLIST
+            and strict_count < ANTHROPIC_MAX_STRICT_TOOLS
+        ):
+            formatted_tool["strict"] = True
+            strict_count += 1
 
         formatted_tools.append(formatted_tool)
 
diff --git a/letta/llm_api/openai_client.py b/letta/llm_api/openai_client.py
index 311a7379..03de4ca4 100644
--- a/letta/llm_api/openai_client.py
+++ b/letta/llm_api/openai_client.py
@@ -297,8 +297,8 @@ class OpenAIClient(LLMClientBase):
                     new_tools.append(tool.model_copy(deep=True))
                 typed_tools = new_tools
 
-            # Convert to strict mode when strict is enabled
-            if llm_config.strict and supports_structured_output(llm_config):
+            # Convert to strict mode
+            if supports_structured_output(llm_config):
                 for tool in typed_tools:
                     try:
                         structured_output_version = convert_to_structured_output(tool.function.model_dump())
@@ -320,14 +320,13 @@ class OpenAIClient(LLMClientBase):
 
             else:
                 # Finally convert to a Responses-friendly dict
-                # Note: strict field is required by OpenAI SDK's FunctionToolParam type
                 responses_tools = [
                     {
                         "type": "function",
                         "name": t.function.name,
                         "description": t.function.description,
                         "parameters": t.function.parameters,
-                        "strict": False,
+                        # "strict": True,
                     }
                     for t in typed_tools
                 ]
@@ -561,9 +560,9 @@ class OpenAIClient(LLMClientBase):
                 data.tools = new_tools
 
         if data.tools is not None and len(data.tools) > 0:
-            # Convert to structured output style when strict is enabled
+            # Convert to structured output style (which has 'strict' and no optionals)
             for tool in data.tools:
-                if llm_config.strict and supports_structured_output(llm_config):
+                if supports_structured_output(llm_config):
                     try:
                         structured_output_version = convert_to_structured_output(tool.function.model_dump())
                         tool.function = FunctionSchema(**structured_output_version)
diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py
index e46a141a..3f1c484d 100644
--- a/letta/schemas/llm_config.py
+++ b/letta/schemas/llm_config.py
@@ -105,10 +105,6 @@ class LLMConfig(BaseModel):
         None,
         description="The response format for the model's output. Supports text, json_object, and json_schema (structured outputs). Can be set via model_settings.",
     )
-    strict: bool = Field(
-        False,
-        description="Enable strict mode for tool calling. When true, tool schemas include strict: true and additionalProperties: false, guaranteeing tool outputs match JSON schemas.",
-    )
 
     @model_validator(mode="before")
     @classmethod
@@ -330,7 +326,6 @@ class LLMConfig(BaseModel):
                 max_output_tokens=self.max_tokens or 4096,
                 temperature=self.temperature,
                 reasoning=OpenAIReasoning(reasoning_effort=self.reasoning_effort or "minimal"),
-                strict=self.strict,
             )
         elif self.model_endpoint_type == "anthropic":
             thinking_type = "enabled" if self.enable_reasoner else "disabled"
@@ -339,7 +334,6 @@ class LLMConfig(BaseModel):
                 temperature=self.temperature,
                 thinking=AnthropicThinking(type=thinking_type, budget_tokens=self.max_reasoning_tokens or 1024),
                 verbosity=self.verbosity,
-                strict=self.strict,
             )
         elif self.model_endpoint_type == "google_ai":
             return GoogleAIModelSettings(
diff --git a/letta/schemas/model.py b/letta/schemas/model.py
index 3a6dcdf7..daf3291e 100644
--- a/letta/schemas/model.py
+++ b/letta/schemas/model.py
@@ -227,11 +227,6 @@ class OpenAIModelSettings(ModelSettings):
     temperature: float = Field(0.7, description="The temperature of the model.")
     reasoning: OpenAIReasoning = Field(OpenAIReasoning(reasoning_effort="high"), description="The reasoning configuration for the model.")
     response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
-    # OpenAI supports strict mode for tool calling - defaults to True
-    strict: bool = Field(
-        True,
-        description="Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.",
-    )
 
     # TODO: implement support for these
     # reasoning_summary: Optional[Literal["none", "short", "detailed"]] = Field(
@@ -249,7 +244,6 @@ class OpenAIModelSettings(ModelSettings):
             "reasoning_effort": self.reasoning.reasoning_effort,
             "response_format": self.response_format,
             "parallel_tool_calls": self.parallel_tool_calls,
-            "strict": self.strict,
         }
 
 
@@ -284,12 +278,6 @@ class AnthropicModelSettings(ModelSettings):
         description="Effort level for Opus 4.5 model (controls token conservation). Not setting this gives similar performance to 'high'.",
     )
 
-    # Anthropic supports strict mode for tool calling - defaults to False
-    strict: bool = Field(
-        False,
-        description="Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.",
-    )
-
     # TODO: implement support for these
     # top_k: Optional[int] = Field(None, description="The number of top tokens to return.")
     # top_p: Optional[float] = Field(None, description="The top-p value to use when generating text.")
@@ -304,7 +292,6 @@ class AnthropicModelSettings(ModelSettings):
             "parallel_tool_calls": self.parallel_tool_calls,
             "effort": self.effort,
             "response_format": self.response_format,
-            "strict": self.strict,
         }
 
 
@@ -328,7 +315,6 @@ class GoogleAIModelSettings(ModelSettings):
             "max_tokens": self.max_output_tokens,
             "max_reasoning_tokens": self.thinking_config.thinking_budget if self.thinking_config.include_thoughts else 0,
             "parallel_tool_calls": self.parallel_tool_calls,
-            "strict": False,  # Google AI does not support strict mode
         }
 
 
@@ -349,7 +335,6 @@ class AzureModelSettings(ModelSettings):
             "max_tokens": self.max_output_tokens,
             "response_format": self.response_format,
             "parallel_tool_calls": self.parallel_tool_calls,
-            "strict": False,  # Azure does not support strict mode
         }
 
 
@@ -366,7 +351,6 @@ class XAIModelSettings(ModelSettings):
             "max_tokens": self.max_output_tokens,
             "response_format": self.response_format,
             "parallel_tool_calls": self.parallel_tool_calls,
-            "strict": False,  # xAI does not support strict mode
         }
 
 
@@ -383,7 +367,6 @@ class ZAIModelSettings(ModelSettings):
             "max_tokens": self.max_output_tokens,
             "response_format": self.response_format,
             "parallel_tool_calls": self.parallel_tool_calls,
-            "strict": False,  # ZAI does not support strict mode
         }
 
 
@@ -400,7 +383,6 @@ class GroqModelSettings(ModelSettings):
             "max_tokens": self.max_output_tokens,
             "response_format": self.response_format,
             "parallel_tool_calls": self.parallel_tool_calls,
-            "strict": False,  # Groq does not support strict mode
         }
 
 
@@ -417,7 +399,6 @@ class DeepseekModelSettings(ModelSettings):
             "max_tokens": self.max_output_tokens,
             "response_format": self.response_format,
             "parallel_tool_calls": self.parallel_tool_calls,
-            "strict": False,  # Deepseek does not support strict mode
         }
 
 
@@ -434,7 +415,6 @@ class TogetherModelSettings(ModelSettings):
             "max_tokens": self.max_output_tokens,
             "response_format": self.response_format,
             "parallel_tool_calls": self.parallel_tool_calls,
-            "strict": False,  # Together does not support strict mode
         }
 
 
@@ -451,7 +431,6 @@ class BedrockModelSettings(ModelSettings):
             "max_tokens": self.max_output_tokens,
             "response_format": self.response_format,
             "parallel_tool_calls": self.parallel_tool_calls,
-            "strict": False,  # Bedrock does not support strict mode
         }
 
 
diff --git a/tests/managers/test_agent_manager.py b/tests/managers/test_agent_manager.py
index fb9c134f..15767114 100644
--- a/tests/managers/test_agent_manager.py
+++ b/tests/managers/test_agent_manager.py
@@ -1851,7 +1851,6 @@ async def test_agent_state_schema_unchanged(server: SyncServer):
         "verbosity",
         "tier",
         "parallel_tool_calls",
-        "strict",
     }
     actual_llm_config_fields = set(llm_config_fields.keys())
     if actual_llm_config_fields != expected_llm_config_fields:
diff --git a/tests/sdk/agents_test.py b/tests/sdk/agents_test.py
index 78324ecd..f02a7ef1 100644
--- a/tests/sdk/agents_test.py
+++ b/tests/sdk/agents_test.py
@@ -7,12 +7,9 @@ AGENTS_CREATE_PARAMS = [
         {
             # Verify model_settings is populated with config values
             # Note: The 'model' field itself is separate from model_settings
-            # strict defaults to False when no model_settings is explicitly provided
-            # (OpenAIModelSettings defaults to True only when explicitly instantiated)
             "model_settings": {
                 "max_output_tokens": 16384,
                 "parallel_tool_calls": False,
-                "strict": False,
                 "provider_type": "openai",
                 "temperature": 0.7,
                 "reasoning": {"reasoning_effort": "minimal"},
@@ -32,7 +29,6 @@ AGENTS_UPDATE_PARAMS = [
             "model_settings": {
                 "max_output_tokens": 16384,
                 "parallel_tool_calls": False,
-                "strict": False,
                 "provider_type": "openai",
                 "temperature": 0.7,
                 "reasoning": {"reasoning_effort": "minimal"},