From feb232f04a0c80866622fed7b9e9eae8cef6e655 Mon Sep 17 00:00:00 2001 From: jnjpng Date: Fri, 22 Aug 2025 15:45:33 -0700 Subject: [PATCH] fix: mcp schema generation and non-strict schema validation Co-authored-by: Jin Peng --- letta/functions/schema_generator.py | 14 ++ letta/functions/schema_validator.py | 8 + letta/helpers/tool_execution_helper.py | 2 - letta/llm_api/helpers.py | 1 - letta/server/server.py | 1 - letta/services/mcp_manager.py | 2 - tests/mcp_tests/test_schema_validator.py | 66 +++++-- tests/test_schema_validator.py | 237 +++++++++++++++++++++++ 8 files changed, 308 insertions(+), 23 deletions(-) create mode 100644 tests/test_schema_validator.py diff --git a/letta/functions/schema_generator.py b/letta/functions/schema_generator.py index 05ea6dd2..6d050749 100644 --- a/letta/functions/schema_generator.py +++ b/letta/functions/schema_generator.py @@ -608,6 +608,20 @@ def generate_tool_schema_for_mcp( # Normalise so downstream code can treat it consistently. parameters_schema.setdefault("required", []) + # Process properties to handle anyOf types + if "properties" in parameters_schema: + for field_name, field_props in parameters_schema["properties"].items(): + # Handle anyOf types by flattening to type array + if "anyOf" in field_props and "type" not in field_props: + types = [] + for option in field_props["anyOf"]: + if "type" in option: + types.append(option["type"]) + if types: + field_props["type"] = types + # Remove the anyOf since we've flattened it + del field_props["anyOf"] + # Add the optional heartbeat parameter if append_heartbeat: parameters_schema["properties"][REQUEST_HEARTBEAT_PARAM] = { diff --git a/letta/functions/schema_validator.py b/letta/functions/schema_validator.py index db7652ac..5a78230a 100644 --- a/letta/functions/schema_validator.py +++ b/letta/functions/schema_validator.py @@ -125,6 +125,14 @@ def validate_complete_json_schema(schema: Dict[str, Any]) -> Tuple[SchemaHealth, required = [] # OpenAI strict-mode extra checks: + # Check that all properties are in required array (applies to all levels) + if props: + for prop_name in props.keys(): + if prop_name not in required: + mark_non_strict( + f"{path}: property '{prop_name}' is not in required array (OpenAI strict mode requires all properties to be required)" + ) + for req_key in required: if props and req_key not in props: mark_invalid(f"{path}: required contains '{req_key}' not found in properties") diff --git a/letta/helpers/tool_execution_helper.py b/letta/helpers/tool_execution_helper.py index 52459391..886e5239 100644 --- a/letta/helpers/tool_execution_helper.py +++ b/letta/helpers/tool_execution_helper.py @@ -39,12 +39,10 @@ def enable_strict_mode(tool_schema: Dict[str, Any]) -> Dict[str, Any]: # Ensure parameters is a valid dictionary parameters = schema.get("parameters", {}) - if isinstance(parameters, dict) and parameters.get("type") == "object": # Set additionalProperties to False parameters["additionalProperties"] = False schema["parameters"] = parameters - # Remove the metadata fields from the schema schema.pop(MCP_TOOL_METADATA_SCHEMA_STATUS, None) schema.pop(MCP_TOOL_METADATA_SCHEMA_WARNINGS, None) diff --git a/letta/llm_api/helpers.py b/letta/llm_api/helpers.py index cff7b96a..b6f3acb8 100644 --- a/letta/llm_api/helpers.py +++ b/letta/llm_api/helpers.py @@ -133,7 +133,6 @@ def convert_to_structured_output(openai_function: dict, allow_optional: bool = F structured_output["parameters"]["required"] = list(structured_output["parameters"]["properties"].keys()) else: raise NotImplementedError("Optional parameter handling is not implemented.") - return structured_output diff --git a/letta/server/server.py b/letta/server/server.py index d8b5a45b..358f9506 100644 --- a/letta/server/server.py +++ b/letta/server/server.py @@ -2068,7 +2068,6 @@ class SyncServer(Server): raise ValueError(f"No client was created for MCP server: {mcp_server_name}") tools = await self.mcp_clients[mcp_server_name].list_tools() - # Add health information to each tool for tool in tools: if tool.inputSchema: diff --git a/letta/services/mcp_manager.py b/letta/services/mcp_manager.py index c645a34c..95a9e11b 100644 --- a/letta/services/mcp_manager.py +++ b/letta/services/mcp_manager.py @@ -68,7 +68,6 @@ class MCPManager: # list tools tools = await mcp_client.list_tools() - # Add health information to each tool for tool in tools: if tool.inputSchema: @@ -129,7 +128,6 @@ class MCPManager: raise ValueError(f"MCP server '{mcp_server_name}' not found") mcp_tools = await self.list_mcp_server_tools(mcp_server_name, actor=actor) - for mcp_tool in mcp_tools: # TODO: @jnjpng move health check to tool class if mcp_tool.name == mcp_tool_name: diff --git a/tests/mcp_tests/test_schema_validator.py b/tests/mcp_tests/test_schema_validator.py index 5087153f..0d675e0b 100644 --- a/tests/mcp_tests/test_schema_validator.py +++ b/tests/mcp_tests/test_schema_validator.py @@ -22,7 +22,7 @@ class TestSchemaValidator: "additionalProperties": False, }, }, - "required": ["name", "age"], + "required": ["name", "age", "address"], # All properties must be required for strict mode "additionalProperties": False, } @@ -235,22 +235,22 @@ class TestSchemaValidator: assert status == SchemaHealth.INVALID assert any("dict" in reason for reason in reasons) - def test_schema_with_defaults_strict_compliant(self): - """Test that root-level schemas without required field are STRICT_COMPLIANT.""" + def test_schema_with_defaults_non_strict(self): + """Test that root-level schemas without required field are NON_STRICT_ONLY.""" schema = { "type": "object", "properties": {"name": {"type": "string"}, "optional": {"type": "string"}}, - # Missing "required" field at root level is OK + # Missing "required" field at root level - all properties must be required for strict mode "additionalProperties": False, } status, reasons = validate_complete_json_schema(schema) - # After fix, root level without required should be STRICT_COMPLIANT - assert status == SchemaHealth.STRICT_COMPLIANT - assert reasons == [] + # Root level without all properties required should be NON_STRICT_ONLY + assert status == SchemaHealth.NON_STRICT_ONLY + assert any("not in required array" in reason for reason in reasons) - def test_composio_schema_with_optional_root_properties_strict_compliant(self): - """Test that Composio-like schemas with optional root properties are STRICT_COMPLIANT.""" + def test_composio_schema_with_optional_root_properties_non_strict(self): + """Test that Composio-like schemas with optional root properties are NON_STRICT_ONLY.""" schema = { "type": "object", "properties": { @@ -264,25 +264,25 @@ class TestSchemaValidator: } status, reasons = validate_complete_json_schema(schema) - assert status == SchemaHealth.STRICT_COMPLIANT - assert reasons == [] + assert status == SchemaHealth.NON_STRICT_ONLY + assert any("not in required array" in reason for reason in reasons) - def test_root_level_without_required_strict_compliant(self): - """Test that root-level objects without 'required' field are STRICT_COMPLIANT.""" + def test_root_level_without_required_non_strict(self): + """Test that root-level objects without 'required' field are NON_STRICT_ONLY.""" schema = { "type": "object", "properties": { "name": {"type": "string"}, "age": {"type": "integer"}, }, - # No "required" field at root level + # No "required" field at root level - all properties must be required for strict mode "additionalProperties": False, } status, reasons = validate_complete_json_schema(schema) - # Root level without required should be STRICT_COMPLIANT - assert status == SchemaHealth.STRICT_COMPLIANT - assert reasons == [] + # Root level without required should be NON_STRICT_ONLY + assert status == SchemaHealth.NON_STRICT_ONLY + assert any("not in required array" in reason for reason in reasons) def test_nested_object_without_required_non_strict(self): """Test that nested objects without 'required' remain NON_STRICT_ONLY.""" @@ -312,3 +312,35 @@ class TestSchemaValidator: assert status == SchemaHealth.NON_STRICT_ONLY # Should have warning about nested preferences object missing 'required' assert any("required" in reason and "preferences" in reason for reason in reasons) + + def test_user_example_schema_non_strict(self): + """Test the user's example schema with optional properties - should be NON_STRICT_ONLY.""" + schema = { + "type": "object", + "properties": { + "a": {"title": "A", "type": "integer"}, + "b": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": None, "title": "B"}, + }, + "required": ["a"], # Only 'a' is required, 'b' is not + "additionalProperties": False, + } + + status, reasons = validate_complete_json_schema(schema) + assert status == SchemaHealth.NON_STRICT_ONLY + assert any("property 'b' is not in required array" in reason for reason in reasons) + + def test_all_properties_required_strict_compliant(self): + """Test that schemas with all properties required are STRICT_COMPLIANT.""" + schema = { + "type": "object", + "properties": { + "a": {"title": "A", "type": "integer"}, + "b": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": None, "title": "B"}, + }, + "required": ["a", "b"], # All properties are required + "additionalProperties": False, + } + + status, reasons = validate_complete_json_schema(schema) + assert status == SchemaHealth.STRICT_COMPLIANT + assert reasons == [] diff --git a/tests/test_schema_validator.py b/tests/test_schema_validator.py new file mode 100644 index 00000000..b3ee314e --- /dev/null +++ b/tests/test_schema_validator.py @@ -0,0 +1,237 @@ +""" +Test schema validation for OpenAI strict mode compliance. +""" + +from letta.functions.schema_validator import SchemaHealth, validate_complete_json_schema + + +def test_user_example_schema_not_strict(): + """Test the user's provided example schema is correctly marked as NON_STRICT_ONLY.""" + schema = { + "properties": { + "a": {"title": "A", "type": "integer"}, + "b": { + "anyOf": [{"type": "integer"}, {"type": "null"}], + "default": None, + "title": "B", + }, + }, + "required": ["a"], # Only 'a' is required, 'b' is not + "type": "object", + "additionalProperties": False, + } + + status, reasons = validate_complete_json_schema(schema) + + # Should be NON_STRICT_ONLY because not all properties are required + assert status == SchemaHealth.NON_STRICT_ONLY + assert len(reasons) > 0 + # Check that the reason mentions property 'b' not being required + assert any("property 'b' is not in required array" in reason for reason in reasons) + + +def test_all_properties_required_is_strict(): + """Test that schemas with all properties required are STRICT_COMPLIANT.""" + schema = { + "type": "object", + "properties": { + "a": {"type": "integer"}, + "b": {"anyOf": [{"type": "integer"}, {"type": "null"}]}, # Optional via null type + }, + "required": ["a", "b"], # All properties are required + "additionalProperties": False, + } + + status, reasons = validate_complete_json_schema(schema) + + # Should be STRICT_COMPLIANT since all properties are required + assert status == SchemaHealth.STRICT_COMPLIANT + assert reasons == [] + + +def test_nested_object_missing_required_not_strict(): + """Test that nested objects with missing required properties are NON_STRICT_ONLY.""" + schema = { + "type": "object", + "properties": { + "config": { + "type": "object", + "properties": { + "host": {"type": "string"}, + "port": {"type": "integer"}, + "optional_field": {"anyOf": [{"type": "string"}, {"type": "null"}]}, + }, + "required": ["host", "port"], # optional_field not required + "additionalProperties": False, + } + }, + "required": ["config"], + "additionalProperties": False, + } + + status, reasons = validate_complete_json_schema(schema) + + # Should be NON_STRICT_ONLY because nested object doesn't require all properties + assert status == SchemaHealth.NON_STRICT_ONLY + assert len(reasons) > 0 + assert any("optional_field" in reason and "not in required array" in reason for reason in reasons) + + +def test_nested_object_all_required_is_strict(): + """Test that nested objects with all properties required are STRICT_COMPLIANT.""" + schema = { + "type": "object", + "properties": { + "config": { + "type": "object", + "properties": { + "host": {"type": "string"}, + "port": {"type": "integer"}, + "timeout": {"anyOf": [{"type": "integer"}, {"type": "null"}]}, + }, + "required": ["host", "port", "timeout"], # All properties required + "additionalProperties": False, + } + }, + "required": ["config"], + "additionalProperties": False, + } + + status, reasons = validate_complete_json_schema(schema) + + # Should be STRICT_COMPLIANT since all properties at all levels are required + assert status == SchemaHealth.STRICT_COMPLIANT + assert reasons == [] + + +def test_empty_object_no_properties_is_strict(): + """Test that objects with no properties are STRICT_COMPLIANT.""" + schema = { + "type": "object", + "properties": {}, + "required": [], + "additionalProperties": False, + } + + status, reasons = validate_complete_json_schema(schema) + + # Empty objects with no properties should be STRICT_COMPLIANT + assert status == SchemaHealth.STRICT_COMPLIANT + assert reasons == [] + + +def test_missing_additionalproperties_not_strict(): + """Test that missing additionalProperties makes schema NON_STRICT_ONLY.""" + schema = { + "type": "object", + "properties": { + "field": {"type": "string"}, + }, + "required": ["field"], + # Missing additionalProperties + } + + status, reasons = validate_complete_json_schema(schema) + + # Should be NON_STRICT_ONLY due to missing additionalProperties + assert status == SchemaHealth.NON_STRICT_ONLY + assert any("additionalProperties" in reason and "not explicitly set" in reason for reason in reasons) + + +def test_additionalproperties_true_not_strict(): + """Test that additionalProperties: true makes schema NON_STRICT_ONLY.""" + schema = { + "type": "object", + "properties": { + "field": {"type": "string"}, + }, + "required": ["field"], + "additionalProperties": True, # Allows additional properties + } + + status, reasons = validate_complete_json_schema(schema) + + # Should be NON_STRICT_ONLY due to additionalProperties not being false + assert status == SchemaHealth.NON_STRICT_ONLY + assert any("additionalProperties" in reason and "not false" in reason for reason in reasons) + + +def test_complex_schema_with_arrays(): + """Test a complex schema with arrays and nested objects.""" + schema = { + "type": "object", + "properties": { + "items": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "name": {"type": "string"}, + "tags": { + "type": "array", + "items": {"type": "string"}, + }, + }, + "required": ["id", "name", "tags"], # All properties required + "additionalProperties": False, + }, + }, + "total": {"type": "integer"}, + }, + "required": ["items", "total"], # All properties required + "additionalProperties": False, + } + + status, reasons = validate_complete_json_schema(schema) + + # Should be STRICT_COMPLIANT since all properties at all levels are required + assert status == SchemaHealth.STRICT_COMPLIANT + assert reasons == [] + + +def test_fastmcp_tool_schema_not_strict(): + """Test that a schema from FastMCP with optional field 'b' is marked as NON_STRICT_ONLY.""" + # This is the exact schema format provided by the user + schema = { + "properties": { + "a": {"title": "A", "type": "integer"}, + "b": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": None, "title": "B"}, + }, + "required": ["a"], # Only 'a' is required, violates strict mode requirement + "type": "object", + "additionalProperties": False, + } + + status, reasons = validate_complete_json_schema(schema) + + # Should be NON_STRICT_ONLY because 'b' is not in the required array + # Even though 'b' accepts null (making it optional), OpenAI strict mode + # requires ALL properties to be in the required array + assert status == SchemaHealth.NON_STRICT_ONLY + assert len(reasons) > 0 + assert any("property 'b' is not in required array" in reason for reason in reasons) + + +def test_union_types_with_anyof(): + """Test that anyOf unions are handled correctly.""" + schema = { + "type": "object", + "properties": { + "value": { + "anyOf": [ + {"type": "string"}, + {"type": "number"}, + {"type": "null"}, + ] + } + }, + "required": ["value"], + "additionalProperties": False, + } + + status, reasons = validate_complete_json_schema(schema) + + # Should be STRICT_COMPLIANT - anyOf is allowed and all properties are required + assert status == SchemaHealth.STRICT_COMPLIANT + assert reasons == []