feat: Add structured outputs for Anthropic (#7495)

2025-12-19 17:23:14 -08:00
parent acd8dd7bcf
commit a1dfedfb0b
2 changed files with 139 additions and 81 deletions
--- a/letta/llm_api/anthropic_client.py
+++ b/letta/llm_api/anthropic_client.py
@@ -28,6 +28,7 @@ from letta.errors import (
 )
 from letta.helpers.datetime_helpers import get_utc_time_int
 from letta.helpers.decorators import deprecated
 from letta.llm_api.anthropic_constants import ANTHROPIC_MAX_STRICT_TOOLS, ANTHROPIC_STRICT_MODE_ALLOWLIST
 from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs
 from letta.llm_api.llm_client_base import LLMClientBase
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
@@ -81,8 +82,14 @@ class AnthropicClient(LLMClientBase):
        if llm_config.model.startswith("claude-opus-4-5") and llm_config.enable_reasoner:
            betas.append("context-management-2025-06-27")
-        # Structured outputs beta
+        # Structured outputs beta - only for supported models
-        if hasattr(llm_config, "response_format") and isinstance(llm_config.response_format, JsonSchemaResponseFormat):
+        # Supported: Claude Sonnet 4.5, Opus 4.1, Opus 4.5, Haiku 4.5
        supports_structured_outputs = _supports_structured_outputs(llm_config.model)
        if supports_structured_outputs:
            # Always enable structured outputs beta on supported models.
            # NOTE: We do NOT send `strict` on tool schemas because the current Anthropic SDK
            # typed tool params reject unknown fields (e.g., `tools.0.custom.strict`).
            betas.append("structured-outputs-2025-11-13")
        if betas:
@@ -118,8 +125,10 @@ class AnthropicClient(LLMClientBase):
        if llm_config.model.startswith("claude-opus-4-5") and llm_config.enable_reasoner:
            betas.append("context-management-2025-06-27")
-        # Structured outputs beta
+        # Structured outputs beta - only for supported models
-        if hasattr(llm_config, "response_format") and isinstance(llm_config.response_format, JsonSchemaResponseFormat):
+        supports_structured_outputs = _supports_structured_outputs(llm_config.model)
        if supports_structured_outputs:
            betas.append("structured-outputs-2025-11-13")
        if betas:
@@ -163,8 +172,10 @@ class AnthropicClient(LLMClientBase):
        if llm_config.model.startswith("claude-opus-4-5") and llm_config.enable_reasoner:
            betas.append("context-management-2025-06-27")
-        # Structured outputs beta
+        # Structured outputs beta - only for supported models
-        if hasattr(llm_config, "response_format") and isinstance(llm_config.response_format, JsonSchemaResponseFormat):
+        supports_structured_outputs = _supports_structured_outputs(llm_config.model)
        if supports_structured_outputs:
            betas.append("structured-outputs-2025-11-13")
        # log failed requests
@@ -420,7 +431,10 @@ class AnthropicClient(LLMClientBase):
        if tools_for_request and len(tools_for_request) > 0:
            # TODO eventually enable parallel tool use
-            data["tools"] = convert_tools_to_anthropic_format(tools_for_request)
+            data["tools"] = convert_tools_to_anthropic_format(
                tools_for_request,
                use_strict=_supports_structured_outputs(llm_config.model),
            )
            # Add cache control to the last tool for caching tool definitions
            if len(data["tools"]) > 0:
                data["tools"][-1]["cache_control"] = {"type": "ephemeral"}
@@ -562,7 +576,10 @@ class AnthropicClient(LLMClientBase):
        if messages and len(messages) == 0:
            messages = None
        if tools and len(tools) > 0:
-            anthropic_tools = convert_tools_to_anthropic_format(tools)
+            anthropic_tools = convert_tools_to_anthropic_format(
                tools,
                use_strict=_supports_structured_outputs(model) if model else False,
            )
        else:
            anthropic_tools = None
@@ -998,7 +1015,30 @@ class AnthropicClient(LLMClientBase):
        return messages
-def convert_tools_to_anthropic_format(tools: List[OpenAITool]) -> List[dict]:
+def _supports_structured_outputs(model: str) -> bool:
    """Check if the model supports structured outputs (strict mode).
    Only these 4 models are supported:
    - Claude Sonnet 4.5
    - Claude Opus 4.1
    - Claude Opus 4.5
    - Claude Haiku 4.5
    """
    model_lower = model.lower()
    if "sonnet-4-5" in model_lower:
        return True
    elif "opus-4-1" in model_lower:
        return True
    elif "opus-4-5" in model_lower:
        return True
    elif "haiku-4-5" in model_lower:
        return True
    return False
 def convert_tools_to_anthropic_format(tools: List[OpenAITool], use_strict: bool = False) -> List[dict]:
    """See: https://docs.anthropic.com/claude/docs/tool-use
    OpenAI style:
@@ -1009,18 +1049,11 @@ def convert_tools_to_anthropic_format(tools: List[OpenAITool]) -> List[dict]:
            "description": "find ....",
            "parameters": {
              "type": "object",
-              "properties": {
+              "properties": {...},
                 PARAM: {
                   "type": PARAM_TYPE,  # eg "string"
                   "description": PARAM_DESCRIPTION,
                 },
                 ...
              },
              "required": List[str],
            }
        }
-      }
+      }]
      ]
    Anthropic style:
      "tools": [{
@@ -1028,89 +1061,87 @@ def convert_tools_to_anthropic_format(tools: List[OpenAITool]) -> List[dict]:
        "description": "find ....",
        "input_schema": {
          "type": "object",
-          "properties": {
+          "properties": {...},
             PARAM: {
               "type": PARAM_TYPE,  # eg "string"
               "description": PARAM_DESCRIPTION,
             },
             ...
          },
          "required": List[str],
-        }
+        },
-      }
+      }]
      ]
      Two small differences:
        - 1 level less of nesting
        - "parameters" -> "input_schema"
    """
    formatted_tools = []
    strict_count = 0
    for tool in tools:
        # Get the input schema
        input_schema = tool.function.parameters or {"type": "object", "properties": {}, "required": []}
-        # Clean up the properties in the schema
+        # Use the older lightweight cleanup: remove defaults and simplify union-with-null.
-        # The presence of union types / default fields seems Anthropic to produce invalid JSON for tool calls
+        cleaned_schema = _clean_property_schema(input_schema) if isinstance(input_schema, dict) else input_schema
-        if isinstance(input_schema, dict) and "properties" in input_schema:
+        # Normalize to a safe "object" schema shape to avoid downstream assumptions failing.
-            cleaned_properties = {}
+        if isinstance(cleaned_schema, dict):
-            for prop_name, prop_schema in input_schema.get("properties", {}).items():
+            if cleaned_schema.get("type") != "object":
-                if isinstance(prop_schema, dict):
+                cleaned_schema["type"] = "object"
-                    cleaned_properties[prop_name] = _clean_property_schema(prop_schema)
+            if not isinstance(cleaned_schema.get("properties"), dict):
-                else:
+                cleaned_schema["properties"] = {}
-                    cleaned_properties[prop_name] = prop_schema
+        formatted_tool: dict = {
            # Create cleaned input schema
            cleaned_input_schema = {
                "type": input_schema.get("type", "object"),
                "properties": cleaned_properties,
            }
            # Only add required field if it exists and is non-empty
            if "required" in input_schema and input_schema["required"]:
                cleaned_input_schema["required"] = input_schema["required"]
        else:
            cleaned_input_schema = input_schema
        formatted_tool = {
            "name": tool.function.name,
            "description": tool.function.description if tool.function.description else "",
-            "input_schema": cleaned_input_schema,
+            "input_schema": cleaned_schema,
        }
        # Structured outputs "strict" mode: always attach `strict` for allowlisted tools
        # when we are using structured outputs models. Limit the number of strict tools
        # to avoid exceeding Anthropic constraints.
        if use_strict and tool.function.name in ANTHROPIC_STRICT_MODE_ALLOWLIST and strict_count < ANTHROPIC_MAX_STRICT_TOOLS:
            formatted_tool["strict"] = True
            strict_count += 1
        formatted_tools.append(formatted_tool)
    return formatted_tools
-def _clean_property_schema(prop_schema: dict) -> dict:
+def _clean_property_schema(schema: dict) -> dict:
-    """Clean up a property schema by removing defaults and simplifying union types."""
+    """Older schema cleanup used for Anthropic tools.
    cleaned = {}
-    # Handle type field - simplify union types like ["null", "string"] to just "string"
+    Removes / simplifies fields that commonly cause Anthropic tool schema issues:
-    if "type" in prop_schema:
+    - Remove `default` values
-        prop_type = prop_schema["type"]
+    - Simplify nullable unions like {"type": ["null", "string"]} -> {"type": "string"}
-        if isinstance(prop_type, list):
+    - Recurse through nested schemas (properties/items/anyOf/oneOf/allOf/etc.)
-            # Remove "null" from union types to simplify
+    """
-            # e.g., ["null", "string"] becomes "string"
+    if not isinstance(schema, dict):
-            non_null_types = [t for t in prop_type if t != "null"]
+        return schema
-            if len(non_null_types) == 1:
+
-                cleaned["type"] = non_null_types[0]
+    cleaned: dict = {}
-            elif len(non_null_types) > 1:
+
-                # Keep as array if multiple non-null types
+    # Simplify union types like ["null", "string"] to "string"
-                cleaned["type"] = non_null_types
+    if "type" in schema:
        t = schema.get("type")
        if isinstance(t, list):
            non_null = [x for x in t if x != "null"]
            if len(non_null) == 1:
                cleaned["type"] = non_null[0]
            elif len(non_null) > 1:
                cleaned["type"] = non_null
            else:
                # If only "null" was in the list, default to string
                cleaned["type"] = "string"
        else:
-            cleaned["type"] = prop_type
+            cleaned["type"] = t
-    # Copy over other fields except 'default'
+    for key, value in schema.items():
-    for key, value in prop_schema.items():
+        if key == "type":
-        if key not in ["type", "default"]:  # Skip 'default' field
+            continue
-            if key == "properties" and isinstance(value, dict):
+        if key == "default":
-                # Recursively clean nested properties
+            continue
-                cleaned["properties"] = {k: _clean_property_schema(v) if isinstance(v, dict) else v for k, v in value.items()}
+
-            else:
+        if key == "properties" and isinstance(value, dict):
-                cleaned[key] = value
+            cleaned["properties"] = {k: _clean_property_schema(v) for k, v in value.items()}
        elif key == "items" and isinstance(value, dict):
            cleaned["items"] = _clean_property_schema(value)
        elif key in ("anyOf", "oneOf", "allOf") and isinstance(value, list):
            cleaned[key] = [_clean_property_schema(v) if isinstance(v, dict) else v for v in value]
        elif key in ("additionalProperties",) and isinstance(value, dict):
            cleaned[key] = _clean_property_schema(value)
        else:
            cleaned[key] = value
    return cleaned
--- a/letta/llm_api/anthropic_constants.py
+++ b/letta/llm_api/anthropic_constants.py
@@ -0,0 +1,27 @@
 # Anthropic-specific constants for the Letta LLM API
 # Allowlist of simple tools that work with Anthropic's structured outputs (strict mode).
 # These tools have few parameters and no complex nesting, making them safe for strict mode.
 # Tools with many optional params or deeply nested structures should use non-strict mode.
 #
 # Anthropic limitations for strict mode:
 # - Max 15 tools can use strict mode per request
 # - Max 24 optional parameters per tool (counted recursively in undocumented ways)
 # - Schema complexity limits
 #
 # Rather than trying to count parameters correctly, we allowlist simple tools that we know work.
 ANTHROPIC_STRICT_MODE_ALLOWLIST = {
    "Write",  # 2 required params, no optional
    "Read",  # 1 required, 2 simple optional
    "Edit",  # 3 required, 1 simple optional
    "Glob",  # 1 required, 1 simple optional
    "KillBash",  # 1 required, no optional
    "fetch_webpage",  # 1 required, no optional
    "EnterPlanMode",  # no params
    "ExitPlanMode",  # no params
    "Skill",  # 1 required, 1 optional array
    "conversation_search",  # 1 required, 4 simple optional
 }
 # Maximum number of tools that can use strict mode in a single request
 ANTHROPIC_MAX_STRICT_TOOLS = 15