fix: mcp schema generation and non-strict schema validation

Co-authored-by: Jin Peng <jinjpeng@Jins-MacBook-Pro.local>
This commit is contained in:
jnjpng
2025-08-22 15:45:33 -07:00
committed by GitHub
parent c8771d6b80
commit 74029acb0d
8 changed files with 308 additions and 23 deletions

View File

@@ -608,6 +608,20 @@ def generate_tool_schema_for_mcp(
# Normalise so downstream code can treat it consistently.
parameters_schema.setdefault("required", [])
# Process properties to handle anyOf types
if "properties" in parameters_schema:
for field_name, field_props in parameters_schema["properties"].items():
# Handle anyOf types by flattening to type array
if "anyOf" in field_props and "type" not in field_props:
types = []
for option in field_props["anyOf"]:
if "type" in option:
types.append(option["type"])
if types:
field_props["type"] = types
# Remove the anyOf since we've flattened it
del field_props["anyOf"]
# Add the optional heartbeat parameter
if append_heartbeat:
parameters_schema["properties"][REQUEST_HEARTBEAT_PARAM] = {

View File

@@ -125,6 +125,14 @@ def validate_complete_json_schema(schema: Dict[str, Any]) -> Tuple[SchemaHealth,
required = []
# OpenAI strict-mode extra checks:
# Check that all properties are in required array (applies to all levels)
if props:
for prop_name in props.keys():
if prop_name not in required:
mark_non_strict(
f"{path}: property '{prop_name}' is not in required array (OpenAI strict mode requires all properties to be required)"
)
for req_key in required:
if props and req_key not in props:
mark_invalid(f"{path}: required contains '{req_key}' not found in properties")

View File

@@ -39,12 +39,10 @@ def enable_strict_mode(tool_schema: Dict[str, Any]) -> Dict[str, Any]:
# Ensure parameters is a valid dictionary
parameters = schema.get("parameters", {})
if isinstance(parameters, dict) and parameters.get("type") == "object":
# Set additionalProperties to False
parameters["additionalProperties"] = False
schema["parameters"] = parameters
# Remove the metadata fields from the schema
schema.pop(MCP_TOOL_METADATA_SCHEMA_STATUS, None)
schema.pop(MCP_TOOL_METADATA_SCHEMA_WARNINGS, None)

View File

@@ -133,7 +133,6 @@ def convert_to_structured_output(openai_function: dict, allow_optional: bool = F
structured_output["parameters"]["required"] = list(structured_output["parameters"]["properties"].keys())
else:
raise NotImplementedError("Optional parameter handling is not implemented.")
return structured_output

View File

@@ -2068,7 +2068,6 @@ class SyncServer(Server):
raise ValueError(f"No client was created for MCP server: {mcp_server_name}")
tools = await self.mcp_clients[mcp_server_name].list_tools()
# Add health information to each tool
for tool in tools:
if tool.inputSchema:

View File

@@ -68,7 +68,6 @@ class MCPManager:
# list tools
tools = await mcp_client.list_tools()
# Add health information to each tool
for tool in tools:
if tool.inputSchema:
@@ -129,7 +128,6 @@ class MCPManager:
raise ValueError(f"MCP server '{mcp_server_name}' not found")
mcp_tools = await self.list_mcp_server_tools(mcp_server_name, actor=actor)
for mcp_tool in mcp_tools:
# TODO: @jnjpng move health check to tool class
if mcp_tool.name == mcp_tool_name:

View File

@@ -22,7 +22,7 @@ class TestSchemaValidator:
"additionalProperties": False,
},
},
"required": ["name", "age"],
"required": ["name", "age", "address"], # All properties must be required for strict mode
"additionalProperties": False,
}
@@ -235,22 +235,22 @@ class TestSchemaValidator:
assert status == SchemaHealth.INVALID
assert any("dict" in reason for reason in reasons)
def test_schema_with_defaults_strict_compliant(self):
"""Test that root-level schemas without required field are STRICT_COMPLIANT."""
def test_schema_with_defaults_non_strict(self):
"""Test that root-level schemas without required field are NON_STRICT_ONLY."""
schema = {
"type": "object",
"properties": {"name": {"type": "string"}, "optional": {"type": "string"}},
# Missing "required" field at root level is OK
# Missing "required" field at root level - all properties must be required for strict mode
"additionalProperties": False,
}
status, reasons = validate_complete_json_schema(schema)
# After fix, root level without required should be STRICT_COMPLIANT
assert status == SchemaHealth.STRICT_COMPLIANT
assert reasons == []
# Root level without all properties required should be NON_STRICT_ONLY
assert status == SchemaHealth.NON_STRICT_ONLY
assert any("not in required array" in reason for reason in reasons)
def test_composio_schema_with_optional_root_properties_strict_compliant(self):
"""Test that Composio-like schemas with optional root properties are STRICT_COMPLIANT."""
def test_composio_schema_with_optional_root_properties_non_strict(self):
"""Test that Composio-like schemas with optional root properties are NON_STRICT_ONLY."""
schema = {
"type": "object",
"properties": {
@@ -264,25 +264,25 @@ class TestSchemaValidator:
}
status, reasons = validate_complete_json_schema(schema)
assert status == SchemaHealth.STRICT_COMPLIANT
assert reasons == []
assert status == SchemaHealth.NON_STRICT_ONLY
assert any("not in required array" in reason for reason in reasons)
def test_root_level_without_required_strict_compliant(self):
"""Test that root-level objects without 'required' field are STRICT_COMPLIANT."""
def test_root_level_without_required_non_strict(self):
"""Test that root-level objects without 'required' field are NON_STRICT_ONLY."""
schema = {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
},
# No "required" field at root level
# No "required" field at root level - all properties must be required for strict mode
"additionalProperties": False,
}
status, reasons = validate_complete_json_schema(schema)
# Root level without required should be STRICT_COMPLIANT
assert status == SchemaHealth.STRICT_COMPLIANT
assert reasons == []
# Root level without required should be NON_STRICT_ONLY
assert status == SchemaHealth.NON_STRICT_ONLY
assert any("not in required array" in reason for reason in reasons)
def test_nested_object_without_required_non_strict(self):
"""Test that nested objects without 'required' remain NON_STRICT_ONLY."""
@@ -312,3 +312,35 @@ class TestSchemaValidator:
assert status == SchemaHealth.NON_STRICT_ONLY
# Should have warning about nested preferences object missing 'required'
assert any("required" in reason and "preferences" in reason for reason in reasons)
def test_user_example_schema_non_strict(self):
"""Test the user's example schema with optional properties - should be NON_STRICT_ONLY."""
schema = {
"type": "object",
"properties": {
"a": {"title": "A", "type": "integer"},
"b": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": None, "title": "B"},
},
"required": ["a"], # Only 'a' is required, 'b' is not
"additionalProperties": False,
}
status, reasons = validate_complete_json_schema(schema)
assert status == SchemaHealth.NON_STRICT_ONLY
assert any("property 'b' is not in required array" in reason for reason in reasons)
def test_all_properties_required_strict_compliant(self):
"""Test that schemas with all properties required are STRICT_COMPLIANT."""
schema = {
"type": "object",
"properties": {
"a": {"title": "A", "type": "integer"},
"b": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": None, "title": "B"},
},
"required": ["a", "b"], # All properties are required
"additionalProperties": False,
}
status, reasons = validate_complete_json_schema(schema)
assert status == SchemaHealth.STRICT_COMPLIANT
assert reasons == []

View File

@@ -0,0 +1,237 @@
"""
Test schema validation for OpenAI strict mode compliance.
"""
from letta.functions.schema_validator import SchemaHealth, validate_complete_json_schema
def test_user_example_schema_not_strict():
"""Test the user's provided example schema is correctly marked as NON_STRICT_ONLY."""
schema = {
"properties": {
"a": {"title": "A", "type": "integer"},
"b": {
"anyOf": [{"type": "integer"}, {"type": "null"}],
"default": None,
"title": "B",
},
},
"required": ["a"], # Only 'a' is required, 'b' is not
"type": "object",
"additionalProperties": False,
}
status, reasons = validate_complete_json_schema(schema)
# Should be NON_STRICT_ONLY because not all properties are required
assert status == SchemaHealth.NON_STRICT_ONLY
assert len(reasons) > 0
# Check that the reason mentions property 'b' not being required
assert any("property 'b' is not in required array" in reason for reason in reasons)
def test_all_properties_required_is_strict():
"""Test that schemas with all properties required are STRICT_COMPLIANT."""
schema = {
"type": "object",
"properties": {
"a": {"type": "integer"},
"b": {"anyOf": [{"type": "integer"}, {"type": "null"}]}, # Optional via null type
},
"required": ["a", "b"], # All properties are required
"additionalProperties": False,
}
status, reasons = validate_complete_json_schema(schema)
# Should be STRICT_COMPLIANT since all properties are required
assert status == SchemaHealth.STRICT_COMPLIANT
assert reasons == []
def test_nested_object_missing_required_not_strict():
"""Test that nested objects with missing required properties are NON_STRICT_ONLY."""
schema = {
"type": "object",
"properties": {
"config": {
"type": "object",
"properties": {
"host": {"type": "string"},
"port": {"type": "integer"},
"optional_field": {"anyOf": [{"type": "string"}, {"type": "null"}]},
},
"required": ["host", "port"], # optional_field not required
"additionalProperties": False,
}
},
"required": ["config"],
"additionalProperties": False,
}
status, reasons = validate_complete_json_schema(schema)
# Should be NON_STRICT_ONLY because nested object doesn't require all properties
assert status == SchemaHealth.NON_STRICT_ONLY
assert len(reasons) > 0
assert any("optional_field" in reason and "not in required array" in reason for reason in reasons)
def test_nested_object_all_required_is_strict():
"""Test that nested objects with all properties required are STRICT_COMPLIANT."""
schema = {
"type": "object",
"properties": {
"config": {
"type": "object",
"properties": {
"host": {"type": "string"},
"port": {"type": "integer"},
"timeout": {"anyOf": [{"type": "integer"}, {"type": "null"}]},
},
"required": ["host", "port", "timeout"], # All properties required
"additionalProperties": False,
}
},
"required": ["config"],
"additionalProperties": False,
}
status, reasons = validate_complete_json_schema(schema)
# Should be STRICT_COMPLIANT since all properties at all levels are required
assert status == SchemaHealth.STRICT_COMPLIANT
assert reasons == []
def test_empty_object_no_properties_is_strict():
"""Test that objects with no properties are STRICT_COMPLIANT."""
schema = {
"type": "object",
"properties": {},
"required": [],
"additionalProperties": False,
}
status, reasons = validate_complete_json_schema(schema)
# Empty objects with no properties should be STRICT_COMPLIANT
assert status == SchemaHealth.STRICT_COMPLIANT
assert reasons == []
def test_missing_additionalproperties_not_strict():
"""Test that missing additionalProperties makes schema NON_STRICT_ONLY."""
schema = {
"type": "object",
"properties": {
"field": {"type": "string"},
},
"required": ["field"],
# Missing additionalProperties
}
status, reasons = validate_complete_json_schema(schema)
# Should be NON_STRICT_ONLY due to missing additionalProperties
assert status == SchemaHealth.NON_STRICT_ONLY
assert any("additionalProperties" in reason and "not explicitly set" in reason for reason in reasons)
def test_additionalproperties_true_not_strict():
"""Test that additionalProperties: true makes schema NON_STRICT_ONLY."""
schema = {
"type": "object",
"properties": {
"field": {"type": "string"},
},
"required": ["field"],
"additionalProperties": True, # Allows additional properties
}
status, reasons = validate_complete_json_schema(schema)
# Should be NON_STRICT_ONLY due to additionalProperties not being false
assert status == SchemaHealth.NON_STRICT_ONLY
assert any("additionalProperties" in reason and "not false" in reason for reason in reasons)
def test_complex_schema_with_arrays():
"""Test a complex schema with arrays and nested objects."""
schema = {
"type": "object",
"properties": {
"items": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {"type": "integer"},
"name": {"type": "string"},
"tags": {
"type": "array",
"items": {"type": "string"},
},
},
"required": ["id", "name", "tags"], # All properties required
"additionalProperties": False,
},
},
"total": {"type": "integer"},
},
"required": ["items", "total"], # All properties required
"additionalProperties": False,
}
status, reasons = validate_complete_json_schema(schema)
# Should be STRICT_COMPLIANT since all properties at all levels are required
assert status == SchemaHealth.STRICT_COMPLIANT
assert reasons == []
def test_fastmcp_tool_schema_not_strict():
"""Test that a schema from FastMCP with optional field 'b' is marked as NON_STRICT_ONLY."""
# This is the exact schema format provided by the user
schema = {
"properties": {
"a": {"title": "A", "type": "integer"},
"b": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": None, "title": "B"},
},
"required": ["a"], # Only 'a' is required, violates strict mode requirement
"type": "object",
"additionalProperties": False,
}
status, reasons = validate_complete_json_schema(schema)
# Should be NON_STRICT_ONLY because 'b' is not in the required array
# Even though 'b' accepts null (making it optional), OpenAI strict mode
# requires ALL properties to be in the required array
assert status == SchemaHealth.NON_STRICT_ONLY
assert len(reasons) > 0
assert any("property 'b' is not in required array" in reason for reason in reasons)
def test_union_types_with_anyof():
"""Test that anyOf unions are handled correctly."""
schema = {
"type": "object",
"properties": {
"value": {
"anyOf": [
{"type": "string"},
{"type": "number"},
{"type": "null"},
]
}
},
"required": ["value"],
"additionalProperties": False,
}
status, reasons = validate_complete_json_schema(schema)
# Should be STRICT_COMPLIANT - anyOf is allowed and all properties are required
assert status == SchemaHealth.STRICT_COMPLIANT
assert reasons == []