diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 6f47b173..ffaa2704 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -32,6 +32,7 @@ jobs: - "test_agent_tool_graph.py" - "test_tool_execution_sandbox.py" - "test_utils.py" + - "test_tool_schema_parsing.py" services: qdrant: image: qdrant/qdrant diff --git a/letta/functions/functions.py b/letta/functions/functions.py index fae7ca16..2e55bcdb 100644 --- a/letta/functions/functions.py +++ b/letta/functions/functions.py @@ -11,23 +11,54 @@ from letta.functions.schema_generator import generate_schema def derive_openai_json_schema(source_code: str, name: Optional[str] = None) -> dict: - # auto-generate openai schema + """Derives the OpenAI JSON schema for a given function source code. + + First, attempts to execute the source code in a custom environment with only the necessary imports. + Then, it generates the schema from the function's docstring and signature. + """ try: # Define a custom environment with necessary imports - env = {"Optional": Optional, "List": List, "Dict": Dict} # Add any other required imports here - + env = { + "Optional": Optional, + "List": List, + "Dict": Dict, + # To support Pydantic models + # "BaseModel": BaseModel, + # "Field": Field, + } env.update(globals()) + + # print("About to execute source code...") exec(source_code, env) + # print("Source code executed successfully") - # get available functions - functions = [f for f in env if callable(env[f])] + functions = [f for f in env if callable(env[f]) and not f.startswith("__")] + if not functions: + raise LettaToolCreateError("No callable functions found in source code") - # TODO: not sure if this always works + # print(f"Found functions: {functions}") func = env[functions[-1]] - json_schema = generate_schema(func, name=name) - return json_schema + + if not hasattr(func, "__doc__") or not func.__doc__: + raise LettaToolCreateError(f"Function {func.__name__} missing docstring") + + # print("About to generate schema...") + try: + schema = generate_schema(func, name=name) + # print("Schema generated successfully") + return schema + except TypeError as e: + raise LettaToolCreateError(f"Type error in schema generation: {str(e)}") + except ValueError as e: + raise LettaToolCreateError(f"Value error in schema generation: {str(e)}") + except Exception as e: + raise LettaToolCreateError(f"Unexpected error in schema generation: {str(e)}") + except Exception as e: - raise LettaToolCreateError(f"Failed to derive JSON schema from source code: {e}") + import traceback + + traceback.print_exc() + raise LettaToolCreateError(f"Schema generation failed: {str(e)}") from e def parse_source_code(func) -> str: diff --git a/letta/functions/schema_generator.py b/letta/functions/schema_generator.py index 01ff969c..e36efc07 100644 --- a/letta/functions/schema_generator.py +++ b/letta/functions/schema_generator.py @@ -22,7 +22,7 @@ def optional_length(annotation): raise ValueError("The annotation is not an Optional type") -def type_to_json_schema_type(py_type): +def type_to_json_schema_type(py_type) -> dict: """ Maps a Python type to a JSON schema type. Specifically handles typing.Optional and common Python types. @@ -36,36 +36,87 @@ def type_to_json_schema_type(py_type): # Extract and map the inner type return type_to_json_schema_type(type_args[0]) + # Handle Union types (except Optional which is handled above) + if get_origin(py_type) is Union: + # TODO support mapping Unions to anyOf + raise NotImplementedError("General Union types are not yet supported") + + # Handle array types + origin = get_origin(py_type) + if py_type == list or origin in (list, List): + args = get_args(py_type) + + if args and inspect.isclass(args[0]) and issubclass(args[0], BaseModel): + # If it's a list of Pydantic models, return an array with the model schema as items + return { + "type": "array", + "items": pydantic_model_to_json_schema(args[0]), + } + + # Otherwise, recursively call the basic type checker + return { + "type": "array", + # get the type of the items in the list + "items": type_to_json_schema_type(args[0]), + } + + # Handle object types + if py_type == dict or origin in (dict, Dict): + args = get_args(py_type) + if not args: + # Generic dict without type arguments + return { + "type": "object", + # "properties": {} + } + else: + raise ValueError( + f"Dictionary types {py_type} with nested type arguments are not supported (consider using a Pydantic model instead)" + ) + + # NOTE: the below code works for generic JSON schema parsing, but there's a problem with the key inference + # when it comes to OpenAI function schema generation so it doesn't make sense to allow for dict[str, Any] type hints + # key_type, value_type = args + + # # Ensure dict keys are strings + # # Otherwise there's no JSON schema equivalent + # if key_type != str: + # raise ValueError("Dictionary keys must be strings for OpenAI function schema compatibility") + + # # Handle value type to determine property schema + # value_schema = {} + # if inspect.isclass(value_type) and issubclass(value_type, BaseModel): + # value_schema = pydantic_model_to_json_schema(value_type) + # else: + # value_schema = type_to_json_schema_type(value_type) + + # # NOTE: the problem lies here - the key is always "key_placeholder" + # return {"type": "object", "properties": {"key_placeholder": value_schema}} + + # Handle direct Pydantic models + if inspect.isclass(py_type) and issubclass(py_type, BaseModel): + return pydantic_model_to_json_schema(py_type) + # Mapping of Python types to JSON schema types type_map = { # Basic types + # Optional, Union, and collections are handled above ^ int: "integer", str: "string", bool: "boolean", float: "number", - # Collections - List[str]: "array", - List[int]: "array", - list: "array", - tuple: "array", - set: "array", - # Dictionaries - dict: "object", - Dict[str, Any]: "object", - # Special types None: "null", - type(None): "null", - # Optional types - # Optional[str]: "string", # NOTE: caught above ^ - Union[str, None]: "string", } if py_type not in type_map: raise ValueError(f"Python type {py_type} has no corresponding JSON schema type - full map: {type_map}") - - return type_map.get(py_type, "string") # Default to "string" if type not in map + else: + return {"type": type_map[py_type]} -def pydantic_model_to_open_ai(model): +def pydantic_model_to_open_ai(model: Type[BaseModel]) -> dict: + """ + Converts a Pydantic model as a singular arg to a JSON schema object for use in OpenAI function calling. + """ schema = model.model_json_schema() docstring = parse(model.__doc__ or "") parameters = {k: v for k, v in schema.items() if k not in ("title", "description")} @@ -80,7 +131,7 @@ def pydantic_model_to_open_ai(model): if docstring.short_description: schema["description"] = docstring.short_description else: - raise + raise ValueError(f"No description found in docstring or description field (model: {model}, docstring: {docstring})") return { "name": schema["title"], @@ -89,6 +140,159 @@ def pydantic_model_to_open_ai(model): } +def pydantic_model_to_json_schema(model: Type[BaseModel]) -> dict: + """ + Converts a Pydantic model (as an arg that already is annotated) to a JSON schema object for use in OpenAI function calling. + + An example of a Pydantic model as an arg: + + class Step(BaseModel): + name: str = Field( + ..., + description="Name of the step.", + ) + key: str = Field( + ..., + description="Unique identifier for the step.", + ) + description: str = Field( + ..., + description="An exhaustic description of what this step is trying to achieve and accomplish.", + ) + + def create_task_plan(steps: list[Step]): + ''' + Creates a task plan for the current task. + + Args: + steps: List of steps to add to the task plan. + ... + + Should result in: + { + "name": "create_task_plan", + "description": "Creates a task plan for the current task.", + "parameters": { + "type": "object", + "properties": { + "steps": { # <= this is the name of the arg + "type": "object", + "description": "List of steps to add to the task plan.", + "properties": { + "name": { + "type": "str", + "description": "Name of the step.", + }, + "key": { + "type": "str", + "description": "Unique identifier for the step.", + }, + "description": { + "type": "str", + "description": "An exhaustic description of what this step is trying to achieve and accomplish.", + }, + }, + "required": ["name", "key", "description"], + } + }, + "required": ["steps"], + } + } + + Specifically, the result of pydantic_model_to_json_schema(steps) (where `steps` is an instance of BaseModel) is: + { + "type": "object", + "properties": { + "name": { + "type": "str", + "description": "Name of the step." + }, + "key": { + "type": "str", + "description": "Unique identifier for the step." + }, + "description": { + "type": "str", + "description": "An exhaustic description of what this step is trying to achieve and accomplish." + }, + }, + "required": ["name", "key", "description"], + } + """ + schema = model.model_json_schema() + + def clean_property(prop: dict) -> dict: + """Clean up a property schema to match desired format""" + + if "description" not in prop: + raise ValueError(f"Property {prop} lacks a 'description' key") + + return { + "type": "string" if prop["type"] == "string" else prop["type"], + "description": prop["description"], + } + + def resolve_ref(ref: str, schema: dict) -> dict: + """Resolve a $ref reference in the schema""" + if not ref.startswith("#/$defs/"): + raise ValueError(f"Unexpected reference format: {ref}") + + model_name = ref.split("/")[-1] + if model_name not in schema.get("$defs", {}): + raise ValueError(f"Reference {model_name} not found in schema definitions") + + return schema["$defs"][model_name] + + def clean_schema(schema_part: dict, full_schema: dict) -> dict: + """Clean up a schema part, handling references and nested structures""" + # Handle $ref + if "$ref" in schema_part: + schema_part = resolve_ref(schema_part["$ref"], full_schema) + + if "type" not in schema_part: + raise ValueError(f"Schema part lacks a 'type' key: {schema_part}") + + # Handle array type + if schema_part["type"] == "array": + items_schema = schema_part["items"] + if "$ref" in items_schema: + items_schema = resolve_ref(items_schema["$ref"], full_schema) + return {"type": "array", "items": clean_schema(items_schema, full_schema), "description": schema_part.get("description", "")} + + # Handle object type + if schema_part["type"] == "object": + if "properties" not in schema_part: + raise ValueError(f"Object schema lacks 'properties' key: {schema_part}") + + properties = {} + for name, prop in schema_part["properties"].items(): + if "items" in prop: # Handle arrays + if "description" not in prop: + raise ValueError(f"Property {prop} lacks a 'description' key") + properties[name] = { + "type": "array", + "items": clean_schema(prop["items"], full_schema), + "description": prop["description"], + } + else: + properties[name] = clean_property(prop) + + pydantic_model_schema_dict = { + "type": "object", + "properties": properties, + "required": schema_part.get("required", []), + } + if "description" in schema_part: + pydantic_model_schema_dict["description"] = schema_part["description"] + + return pydantic_model_schema_dict + + # Handle primitive types + return clean_property(schema_part) + + return clean_schema(schema_part=schema, full_schema=schema) + + def generate_schema(function, name: Optional[str] = None, description: Optional[str] = None) -> dict: # Get the signature of the function sig = inspect.signature(function) @@ -126,24 +330,60 @@ def generate_schema(function, name: Optional[str] = None, description: Optional[ if not param_doc or not param_doc.description: raise ValueError(f"Parameter '{param.name}' in function '{function.__name__}' lacks a description in the docstring") - if inspect.isclass(param.annotation) and issubclass(param.annotation, BaseModel): - schema["parameters"]["properties"][param.name] = pydantic_model_to_open_ai(param.annotation) + # If the parameter is a pydantic model, we need to unpack the Pydantic model type into a JSON schema object + # if inspect.isclass(param.annotation) and issubclass(param.annotation, BaseModel): + if ( + (inspect.isclass(param.annotation) or inspect.isclass(get_origin(param.annotation) or param.annotation)) + and not get_origin(param.annotation) + and issubclass(param.annotation, BaseModel) + ): + # print("Generating schema for pydantic model:", param.annotation) + # Extract the properties from the pydantic model + schema["parameters"]["properties"][param.name] = pydantic_model_to_json_schema(param.annotation) + schema["parameters"]["properties"][param.name]["description"] = param_doc.description + + # Otherwise, we convert the Python typing to JSON schema types + # NOTE: important - if a dict or list, the internal type can be a Pydantic model itself + # however in that else: - # Add parameter details to the schema + # print("Generating schema for non-pydantic model:", param.annotation) + # Grab the description for the parameter from the extended docstring + # If it doesn't exist, we should raise an error param_doc = next((d for d in docstring.params if d.arg_name == param.name), None) - if param_doc: - schema["parameters"]["properties"][param.name] = { - # "type": "string" if param.annotation == str else str(param.annotation), - "type": type_to_json_schema_type(param.annotation) if param.annotation != inspect.Parameter.empty else "string", - "description": param_doc.description, - } - if param.default == inspect.Parameter.empty: + if not param_doc: + raise ValueError(f"Parameter '{param.name}' in function '{function.__name__}' lacks a description in the docstring") + elif not isinstance(param_doc.description, str): + raise ValueError( + f"Parameter '{param.name}' in function '{function.__name__}' has a description in the docstring that is not a string (type: {type(param_doc.description)})" + ) + else: + # If it's a string or a basic type, then all you need is: (1) type, (2) description + # If it's a more complex type, then you also need either: + # - for array, you need "items", each of which has "type" + # - for a dict, you need "properties", which has keys which each have "type" + if param.annotation != inspect.Parameter.empty: + param_generated_schema = type_to_json_schema_type(param.annotation) + else: + # TODO why are we inferring here? + param_generated_schema = {"type": "string"} + + # Add in the description + param_generated_schema["description"] = param_doc.description + + # Add the schema to the function arg key + schema["parameters"]["properties"][param.name] = param_generated_schema + + # If the parameter doesn't have a default value, it is required (so we need to add it to the required list) + if param.default == inspect.Parameter.empty and not is_optional(param.annotation): schema["parameters"]["required"].append(param.name) + # TODO what's going on here? + # If the parameter is a list of strings we need to hard cast to "string" instead of `str` if get_origin(param.annotation) is list: if get_args(param.annotation)[0] is str: schema["parameters"]["properties"][param.name]["items"] = {"type": "string"} + # TODO is this not duplicating the other append directly above? if param.annotation == inspect.Parameter.empty: schema["parameters"]["required"].append(param.name) diff --git a/letta/llm_api/helpers.py b/letta/llm_api/helpers.py index 5048af74..1244b6ff 100644 --- a/letta/llm_api/helpers.py +++ b/letta/llm_api/helpers.py @@ -11,7 +11,55 @@ from letta.schemas.openai.chat_completion_response import ChatCompletionResponse from letta.utils import json_dumps, printd -def convert_to_structured_output(openai_function: dict) -> dict: +def _convert_to_structured_output_helper(property: dict) -> dict: + """Convert a single JSON schema property to structured output format (recursive)""" + + if "type" not in property: + raise ValueError(f"Property {property} is missing a type") + param_type = property["type"] + + if "description" not in property: + # raise ValueError(f"Property {property} is missing a description") + param_description = None + else: + param_description = property["description"] + + if param_type == "object": + if "properties" not in property: + raise ValueError(f"Property {property} of type object is missing properties") + properties = property["properties"] + property_dict = { + "type": "object", + "properties": {k: _convert_to_structured_output_helper(v) for k, v in properties.items()}, + "additionalProperties": False, + "required": list(properties.keys()), + } + if param_description is not None: + property_dict["description"] = param_description + return property_dict + + elif param_type == "array": + if "items" not in property: + raise ValueError(f"Property {property} of type array is missing items") + items = property["items"] + property_dict = { + "type": "array", + "items": _convert_to_structured_output_helper(items), + } + if param_description is not None: + property_dict["description"] = param_description + return property_dict + + else: + property_dict = { + "type": param_type, # simple type + } + if param_description is not None: + property_dict["description"] = param_description + return property_dict + + +def convert_to_structured_output(openai_function: dict, allow_optional: bool = False) -> dict: """Convert function call objects to structured output objects See: https://platform.openai.com/docs/guides/structured-outputs/supported-schemas @@ -22,17 +70,63 @@ def convert_to_structured_output(openai_function: dict) -> dict: "name": openai_function["name"], "description": description, "strict": True, - "parameters": {"type": "object", "properties": {}, "additionalProperties": False, "required": []}, + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": False, + "required": [], + }, } + # This code needs to be able to handle nested properties + # For example, the param details may have "type" + "description", + # but if "type" is "object" we expected "properties", where each property has details + # and if "type" is "array" we expect "items": for param, details in openai_function["parameters"]["properties"].items(): - structured_output["parameters"]["properties"][param] = {"type": details["type"], "description": details["description"]} + + param_type = details["type"] + description = details["description"] + + if param_type == "object": + if "properties" not in details: + # Structured outputs requires the properties on dicts be specified ahead of time + raise ValueError(f"Property {param} of type object is missing properties") + structured_output["parameters"]["properties"][param] = { + "type": "object", + "description": description, + "properties": {k: _convert_to_structured_output_helper(v) for k, v in details["properties"].items()}, + "additionalProperties": False, + "required": list(details["properties"].keys()), + } + + elif param_type == "array": + structured_output["parameters"]["properties"][param] = { + "type": "array", + "description": description, + "items": _convert_to_structured_output_helper(details["items"]), + } + + else: + structured_output["parameters"]["properties"][param] = { + "type": param_type, # simple type + "description": description, + } if "enum" in details: structured_output["parameters"]["properties"][param]["enum"] = details["enum"] - # Add all properties to required list - structured_output["parameters"]["required"] = list(structured_output["parameters"]["properties"].keys()) + if not allow_optional: + # Add all properties to required list + structured_output["parameters"]["required"] = list(structured_output["parameters"]["properties"].keys()) + + else: + # See what parameters exist that aren't required + # Those are implied "optional" types + # For those types, turn each of them into a union type with "null" + # e.g. + # "type": "string" -> "type": ["string", "null"] + # TODO + raise NotImplementedError return structured_output diff --git a/letta/llm_api/openai.py b/letta/llm_api/openai.py index f63a66fe..e83e855b 100644 --- a/letta/llm_api/openai.py +++ b/letta/llm_api/openai.py @@ -477,7 +477,10 @@ def openai_chat_completions_request_stream( if "tools" in data: for tool in data["tools"]: # tool["strict"] = True - tool["function"] = convert_to_structured_output(tool["function"]) + try: + tool["function"] = convert_to_structured_output(tool["function"]) + except ValueError as e: + warnings.warn(f"Failed to convert tool function to structured output, tool={tool}, error={e}") # print(f"\n\n\n\nData[tools]: {json.dumps(data['tools'], indent=2)}") @@ -533,7 +536,10 @@ def openai_chat_completions_request( if "tools" in data: for tool in data["tools"]: - tool["function"] = convert_to_structured_output(tool["function"]) + try: + tool["function"] = convert_to_structured_output(tool["function"]) + except ValueError as e: + warnings.warn(f"Failed to convert tool function to structured output, tool={tool}, error={e}") response_json = make_post_request(url, headers, data) return ChatCompletionResponse(**response_json) diff --git a/letta/local_llm/utils.py b/letta/local_llm/utils.py index 8ded1f82..5b46c0d2 100644 --- a/letta/local_llm/utils.py +++ b/letta/local_llm/utils.py @@ -121,7 +121,7 @@ def num_tokens_from_functions(functions: List[dict], model: str = "gpt-4"): function_tokens += 3 function_tokens += len(encoding.encode(o)) else: - print(f"Warning: not supported field {field}") + warnings.warn(f"num_tokens_from_functions: Unsupported field {field} in function {function}") function_tokens += 11 num_tokens += function_tokens diff --git a/letta/services/tool_execution_sandbox.py b/letta/services/tool_execution_sandbox.py index 5f2b428a..2e7950fc 100644 --- a/letta/services/tool_execution_sandbox.py +++ b/letta/services/tool_execution_sandbox.py @@ -276,6 +276,25 @@ class ToolExecutionSandbox: return code + def _convert_param_to_value(self, param_type: str, raw_value: str) -> str: + + if param_type == "string": + value = '"' + raw_value + '"' + + elif param_type == "integer" or param_type == "boolean" or param_type == "number": + value = raw_value + + elif param_type == "array": + value = raw_value + + elif param_type == "object": + value = raw_value + + else: + raise TypeError(f"Unsupported type: {param_type}, raw_value={raw_value}") + + return str(value) + def initialize_param(self, name: str, raw_value: str) -> str: params = self.tool.json_schema["parameters"]["properties"] spec = params.get(name) @@ -287,14 +306,9 @@ class ToolExecutionSandbox: if param_type is None and spec.get("parameters"): param_type = spec["parameters"].get("type") - if param_type == "string": - value = '"' + raw_value + '"' - elif param_type == "integer" or param_type == "boolean": - value = raw_value - else: - raise TypeError(f"unsupported type: {param_type}") + value = self._convert_param_to_value(param_type, raw_value) - return name + " = " + str(value) + "\n" + return name + " = " + value + "\n" def invoke_function_call(self, inject_agent_state: bool) -> str: """ diff --git a/tests/test_tool_schema_parsing.py b/tests/test_tool_schema_parsing.py new file mode 100644 index 00000000..f6738a06 --- /dev/null +++ b/tests/test_tool_schema_parsing.py @@ -0,0 +1,178 @@ +import json +import os + +import pytest + +from letta.functions.functions import derive_openai_json_schema +from letta.llm_api.helpers import convert_to_structured_output, make_post_request + + +def _clean_diff(d1, d2): + """Utility function to clean up the diff between two dictionaries.""" + + # Keys in d1 but not in d2 + removed = {k: d1[k] for k in d1.keys() - d2.keys()} + + # Keys in d2 but not in d1 + added = {k: d2[k] for k in d2.keys() - d1.keys()} + + # Keys in both but values changed + changed = {k: (d1[k], d2[k]) for k in d1.keys() & d2.keys() if d1[k] != d2[k]} + + return {k: v for k, v in {"removed": removed, "added": added, "changed": changed}.items() if v} # Only include non-empty differences + + +def _compare_schemas(generated_schema: dict, expected_schema: dict, strip_heartbeat: bool = True): + """Compare an autogenerated schema to an expected schema.""" + + if strip_heartbeat: + # Pop out the heartbeat parameter + del generated_schema["parameters"]["properties"]["request_heartbeat"] + # Remove from the required list + generated_schema["parameters"]["required"].remove("request_heartbeat") + + # Check that the two schemas are equal + # If not, pretty print the difference by dumping with indent=4 + if generated_schema != expected_schema: + print("==== GENERATED SCHEMA ====") + print(json.dumps(generated_schema, indent=4)) + print("==== EXPECTED SCHEMA ====") + print(json.dumps(expected_schema, indent=4)) + print("==== DIFF ====") + print(json.dumps(_clean_diff(generated_schema, expected_schema), indent=4)) + raise AssertionError("Schemas are not equal") + else: + print("Schemas are equal") + + +def _run_schema_test(schema_name: str, desired_function_name: str, expect_structured_output_fail: bool = False): + """Load a file and compare the autogenerated schema to the expected schema.""" + + # Open the python file as a string + # Use the absolute path to make it easier to run the test from the root directory + with open(os.path.join(os.path.dirname(__file__), f"test_tool_schema_parsing_files/{schema_name}.py"), "r") as file: + source_code = file.read() + + # Derive the schema + schema = derive_openai_json_schema(source_code, name=desired_function_name) + + # Assert that the schema matches the expected schema + with open(os.path.join(os.path.dirname(__file__), f"test_tool_schema_parsing_files/{schema_name}.json"), "r") as file: + expected_schema = json.load(file) + + _compare_schemas(schema, expected_schema) + + # Convert to structured output and compare + if expect_structured_output_fail: + with pytest.raises(ValueError): + structured_output = convert_to_structured_output(schema) + + else: + structured_output = convert_to_structured_output(schema) + + with open(os.path.join(os.path.dirname(__file__), f"test_tool_schema_parsing_files/{schema_name}_so.json"), "r") as file: + expected_structured_output = json.load(file) + + _compare_schemas(structured_output, expected_structured_output, strip_heartbeat=False) + + +def test_derive_openai_json_schema(): + """Test that the schema generator works across a variety of example source code inputs.""" + + print("==== TESTING basic example where the arg is a pydantic model ====") + _run_schema_test("pydantic_as_single_arg_example", "create_step") + + print("==== TESTING basic example where the arg is a list of pydantic models ====") + _run_schema_test("list_of_pydantic_example", "create_task_plan") + + print("==== TESTING more complex example where the arg is a nested pydantic model ====") + _run_schema_test("nested_pydantic_as_arg_example", "create_task_plan") + + print("==== TESTING simple function with no args ====") + _run_schema_test("simple_d20", "roll_d20") + + print("==== TESTING complex function with many args ====") + _run_schema_test("all_python_complex", "check_order_status", expect_structured_output_fail=True) + + print("==== TESTING complex function with many args and no dict ====") + # TODO we should properly cast Optionals into union nulls + # Currently, we just disregard all Optional types on the conversion path + _run_schema_test("all_python_complex_nodict", "check_order_status") + + +def _openai_payload(model: str, schema: dict, structured_output: bool): + """Create an OpenAI payload with a tool call. + + Raw version of openai_chat_completions_request w/o pydantic models + """ + + if structured_output: + tool_schema = convert_to_structured_output(schema) + else: + tool_schema = schema + + api_key = os.getenv("OPENAI_API_KEY") + assert api_key is not None, "OPENAI_API_KEY must be set" + + # Simple system prompt to encourage the LLM to jump directly to a tool call + system_prompt = "You job is to test the tool that you've been provided. Don't ask for any clarification on the args, just come up with some dummy data and try executing the tool." + + url = "https://api.openai.com/v1/chat/completions" + headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"} + data = { + "model": model, + "messages": [ + {"role": "system", "content": system_prompt}, + ], + "tools": [ + { + "type": "function", + "function": tool_schema, + } + ], + "tool_choice": "auto", # TODO force the tool call on the one we want + # NOTE: disabled for simplicity + "parallel_tool_calls": False, + } + + print("Request:\n", json.dumps(data, indent=2)) + + try: + make_post_request(url, headers, data) + except Exception as e: + print(f"Request failed, tool_schema=\n{json.dumps(tool_schema, indent=2)}") + print(f"Error: {e}") + raise e + + +def _load_schema_from_source_filename(filename: str) -> dict: + with open(os.path.join(os.path.dirname(__file__), f"test_tool_schema_parsing_files/{filename}.py"), "r") as file: + source_code = file.read() + + return derive_openai_json_schema(source_code) + + +# @pytest.mark.parametrize("openai_model", ["gpt-4o-mini"]) +# @pytest.mark.parametrize("structured_output", [True]) +@pytest.mark.parametrize("openai_model", ["gpt-4", "gpt-4o"]) +@pytest.mark.parametrize("structured_output", [True, False]) +def test_valid_schemas_via_openai(openai_model: str, structured_output: bool): + """Test that we can send the schemas to OpenAI and get a tool call back.""" + + for filename in [ + "pydantic_as_single_arg_example", + "list_of_pydantic_example", + "nested_pydantic_as_arg_example", + "simple_d20", + "all_python_complex", + "all_python_complex_nodict", + ]: + print(f"==== TESTING OPENAI PAYLOAD FOR {openai_model} + {filename} ====") + schema = _load_schema_from_source_filename(filename) + + # We should expect the all_python_complex one to fail when structured_output=True + if filename == "all_python_complex" and structured_output: + with pytest.raises(ValueError): + _openai_payload(openai_model, schema, structured_output) + else: + _openai_payload(openai_model, schema, structured_output) diff --git a/tests/test_tool_schema_parsing_files/all_python_complex.json b/tests/test_tool_schema_parsing_files/all_python_complex.json new file mode 100644 index 00000000..d0bd7986 --- /dev/null +++ b/tests/test_tool_schema_parsing_files/all_python_complex.json @@ -0,0 +1,37 @@ +{ + "name": "check_order_status", + "description": "Check the status for an order number (integer value).", + "parameters": { + "type": "object", + "properties": { + "order_number": { + "type": "integer", + "description": "The order number to check on." + }, + "customer_name": { + "type": "string", + "description": "The name of the customer who placed the order." + }, + "related_tickets": { + "type": "array", + "description": "A list of ticket numbers related to the order.", + "items": { + "type": "string" + } + }, + "related_ticket_reasons": { + "type": "object", + "description": "A dictionary of reasons for the related tickets." + }, + "severity": { + "type": "number", + "description": "The severity of the request (between 0 and 1)." + }, + "metadata": { + "type": "object", + "description": "Additional metadata about the order." + } + }, + "required": ["order_number", "customer_name", "related_tickets", "related_ticket_reasons", "severity"] + } + } diff --git a/tests/test_tool_schema_parsing_files/all_python_complex.py b/tests/test_tool_schema_parsing_files/all_python_complex.py new file mode 100644 index 00000000..4f7bc947 --- /dev/null +++ b/tests/test_tool_schema_parsing_files/all_python_complex.py @@ -0,0 +1,28 @@ +from typing import List, Optional + + +def check_order_status( + order_number: int, + customer_name: str, + related_tickets: List[str], + related_ticket_reasons: dict, + severity: float, + metadata: Optional[dict], +): + """ + Check the status for an order number (integer value). + + Args: + order_number (int): The order number to check on. + customer_name (str): The name of the customer who placed the order. + related_tickets (List[str]): A list of ticket numbers related to the order. + related_ticket_reasons (dict): A dictionary of reasons for the related tickets. + severity (float): The severity of the request (between 0 and 1). + metadata (Optional[dict]): Additional metadata about the order. + + Returns: + str: The status of the order (e.g. cancelled, refunded, processed, processing, shipping). + """ + # TODO replace this with a real query to a database + dummy_message = f"Order {order_number} is currently processing." + return dummy_message diff --git a/tests/test_tool_schema_parsing_files/all_python_complex_nodict.json b/tests/test_tool_schema_parsing_files/all_python_complex_nodict.json new file mode 100644 index 00000000..6e0d3867 --- /dev/null +++ b/tests/test_tool_schema_parsing_files/all_python_complex_nodict.json @@ -0,0 +1,33 @@ +{ + "name": "check_order_status", + "description": "Check the status for an order number (integer value).", + "parameters": { + "type": "object", + "properties": { + "order_number": { + "type": "integer", + "description": "The order number to check on." + }, + "customer_name": { + "type": "string", + "description": "The name of the customer who placed the order." + }, + "related_tickets": { + "type": "array", + "description": "A list of ticket numbers related to the order.", + "items": { + "type": "string" + } + }, + "severity": { + "type": "number", + "description": "The severity of the request (between 0 and 1)." + }, + "metadata": { + "type": "string", + "description": "Additional metadata about the order." + } + }, + "required": ["order_number", "customer_name", "related_tickets", "severity"] + } + } diff --git a/tests/test_tool_schema_parsing_files/all_python_complex_nodict.py b/tests/test_tool_schema_parsing_files/all_python_complex_nodict.py new file mode 100644 index 00000000..1c1bac4f --- /dev/null +++ b/tests/test_tool_schema_parsing_files/all_python_complex_nodict.py @@ -0,0 +1,26 @@ +from typing import List, Optional + + +def check_order_status( + order_number: int, + customer_name: str, + related_tickets: List[str], + severity: float, + metadata: Optional[str], +): + """ + Check the status for an order number (integer value). + + Args: + order_number (int): The order number to check on. + customer_name (str): The name of the customer who placed the order. + related_tickets (List[str]): A list of ticket numbers related to the order. + severity (float): The severity of the request (between 0 and 1). + metadata (Optional[str]): Additional metadata about the order. + + Returns: + str: The status of the order (e.g. cancelled, refunded, processed, processing, shipping). + """ + # TODO replace this with a real query to a database + dummy_message = f"Order {order_number} is currently processing." + return dummy_message diff --git a/tests/test_tool_schema_parsing_files/all_python_complex_nodict_so.json b/tests/test_tool_schema_parsing_files/all_python_complex_nodict_so.json new file mode 100644 index 00000000..36b1b49b --- /dev/null +++ b/tests/test_tool_schema_parsing_files/all_python_complex_nodict_so.json @@ -0,0 +1,35 @@ +{ + "name": "check_order_status", + "description": "Check the status for an order number (integer value).", + "strict": true, + "parameters": { + "type": "object", + "properties": { + "order_number": { + "type": "integer", + "description": "The order number to check on." + }, + "customer_name": { + "type": "string", + "description": "The name of the customer who placed the order." + }, + "related_tickets": { + "type": "array", + "description": "A list of ticket numbers related to the order.", + "items": { + "type": "string" + } + }, + "severity": { + "type": "number", + "description": "The severity of the request (between 0 and 1)." + }, + "metadata": { + "type": "string", + "description": "Additional metadata about the order." + } + }, + "additionalProperties": false, + "required": ["order_number", "customer_name", "related_tickets", "severity", "metadata"] + } + } diff --git a/tests/test_tool_schema_parsing_files/all_python_complex_so.json b/tests/test_tool_schema_parsing_files/all_python_complex_so.json new file mode 100644 index 00000000..d0bd7986 --- /dev/null +++ b/tests/test_tool_schema_parsing_files/all_python_complex_so.json @@ -0,0 +1,37 @@ +{ + "name": "check_order_status", + "description": "Check the status for an order number (integer value).", + "parameters": { + "type": "object", + "properties": { + "order_number": { + "type": "integer", + "description": "The order number to check on." + }, + "customer_name": { + "type": "string", + "description": "The name of the customer who placed the order." + }, + "related_tickets": { + "type": "array", + "description": "A list of ticket numbers related to the order.", + "items": { + "type": "string" + } + }, + "related_ticket_reasons": { + "type": "object", + "description": "A dictionary of reasons for the related tickets." + }, + "severity": { + "type": "number", + "description": "The severity of the request (between 0 and 1)." + }, + "metadata": { + "type": "object", + "description": "Additional metadata about the order." + } + }, + "required": ["order_number", "customer_name", "related_tickets", "related_ticket_reasons", "severity"] + } + } diff --git a/tests/test_tool_schema_parsing_files/list_of_pydantic_example.json b/tests/test_tool_schema_parsing_files/list_of_pydantic_example.json new file mode 100644 index 00000000..d2aeb6bd --- /dev/null +++ b/tests/test_tool_schema_parsing_files/list_of_pydantic_example.json @@ -0,0 +1,32 @@ +{ + "name": "create_task_plan", + "description": "Creates a task plan for the current task.", + "parameters": { + "type": "object", + "properties": { + "steps": { + "type": "array", + "description": "List of steps to add to the task plan.", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Name of the step." + }, + "key": { + "type": "string", + "description": "Unique identifier for the step." + }, + "description": { + "type": "string", + "description": "An exhaustic description of what this step is trying to achieve and accomplish." + } + }, + "required": ["name", "key", "description"] + } + } + }, + "required": ["steps"] + } +} diff --git a/tests/test_tool_schema_parsing_files/list_of_pydantic_example.py b/tests/test_tool_schema_parsing_files/list_of_pydantic_example.py new file mode 100644 index 00000000..cef1b7c9 --- /dev/null +++ b/tests/test_tool_schema_parsing_files/list_of_pydantic_example.py @@ -0,0 +1,38 @@ +from pydantic import BaseModel, Field + + +class Step(BaseModel): + name: str = Field( + ..., + description="Name of the step.", + ) + key: str = Field( + ..., + description="Unique identifier for the step.", + ) + description: str = Field( + ..., + description="An exhaustic description of what this step is trying to achieve and accomplish.", + ) + + +def create_task_plan(steps: list[Step]) -> str: + """ + Creates a task plan for the current task. + It takes in a list of steps, and updates the task with the new steps provided. + If there are any current steps, they will be overwritten. + Each step in the list should have the following format: + { + "name": -- Name of the step. + "key": -- Unique identifier for the step. + "description": -- An exhaustic description of what this step is trying to achieve and accomplish. + } + + Args: + steps: List of steps to add to the task plan. + + Returns: + str: A summary of the updated task plan after deletion + """ + DUMMY_MESSAGE = "Task plan created successfully." + return DUMMY_MESSAGE diff --git a/tests/test_tool_schema_parsing_files/list_of_pydantic_example_so.json b/tests/test_tool_schema_parsing_files/list_of_pydantic_example_so.json new file mode 100644 index 00000000..f4b8a930 --- /dev/null +++ b/tests/test_tool_schema_parsing_files/list_of_pydantic_example_so.json @@ -0,0 +1,35 @@ +{ + "name": "create_task_plan", + "description": "Creates a task plan for the current task.", + "strict": true, + "parameters": { + "type": "object", + "properties": { + "steps": { + "type": "array", + "description": "List of steps to add to the task plan.", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Name of the step." + }, + "key": { + "type": "string", + "description": "Unique identifier for the step." + }, + "description": { + "type": "string", + "description": "An exhaustic description of what this step is trying to achieve and accomplish." + } + }, + "additionalProperties": false, + "required": ["name", "key", "description"] + } + } + }, + "additionalProperties": false, + "required": ["steps"] + } + } diff --git a/tests/test_tool_schema_parsing_files/nested_pydantic_as_arg_example.json b/tests/test_tool_schema_parsing_files/nested_pydantic_as_arg_example.json new file mode 100644 index 00000000..53cb12d9 --- /dev/null +++ b/tests/test_tool_schema_parsing_files/nested_pydantic_as_arg_example.json @@ -0,0 +1,39 @@ +{ + "name": "create_task_plan", + "description": "Creates a task plan for the current task.", + "parameters": { + "type": "object", + "properties": { + "steps": { + "type": "object", + "description": "List of steps to add to the task plan.", + "properties": { + "steps": { + "type": "array", + "description": "A list of steps to add to the task plan.", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Name of the step." + }, + "key": { + "type": "string", + "description": "Unique identifier for the step." + }, + "description": { + "type": "string", + "description": "An exhaustic description of what this step is trying to achieve and accomplish." + } + }, + "required": ["name", "key", "description"] + } + } + }, + "required": ["steps"] + } + }, + "required": ["steps"] + } + } diff --git a/tests/test_tool_schema_parsing_files/nested_pydantic_as_arg_example.py b/tests/test_tool_schema_parsing_files/nested_pydantic_as_arg_example.py new file mode 100644 index 00000000..50813f89 --- /dev/null +++ b/tests/test_tool_schema_parsing_files/nested_pydantic_as_arg_example.py @@ -0,0 +1,47 @@ +from pydantic import BaseModel, Field + + +class Step(BaseModel): + name: str = Field( + ..., + description="Name of the step.", + ) + key: str = Field( + ..., + description="Unique identifier for the step.", + ) + description: str = Field( + ..., + description="An exhaustic description of what this step is trying to achieve and accomplish.", + ) + + +# NOTE: this example is pretty contrived - you probably don't want to have a nested pydantic model with +# a single field that's the same as the variable name (in this case, `steps`) +class Steps(BaseModel): + steps: list[Step] = Field( + ..., + description="A list of steps to add to the task plan.", + ) + + +def create_task_plan(steps: Steps) -> str: + """ + Creates a task plan for the current task. + It takes in a list of steps, and updates the task with the new steps provided. + If there are any current steps, they will be overwritten. + Each step in the list should have the following format: + { + "name": -- Name of the step. + "key": -- Unique identifier for the step. + "description": -- An exhaustic description of what this step is trying to achieve and accomplish. + } + + Args: + steps: List of steps to add to the task plan. + + Returns: + str: A summary of the updated task plan after deletion + """ + DUMMY_MESSAGE = "Task plan created successfully." + return DUMMY_MESSAGE diff --git a/tests/test_tool_schema_parsing_files/nested_pydantic_as_arg_example_so.json b/tests/test_tool_schema_parsing_files/nested_pydantic_as_arg_example_so.json new file mode 100644 index 00000000..5f886b5d --- /dev/null +++ b/tests/test_tool_schema_parsing_files/nested_pydantic_as_arg_example_so.json @@ -0,0 +1,43 @@ +{ + "name": "create_task_plan", + "description": "Creates a task plan for the current task.", + "strict": true, + "parameters": { + "type": "object", + "properties": { + "steps": { + "type": "object", + "description": "List of steps to add to the task plan.", + "properties": { + "steps": { + "type": "array", + "description": "A list of steps to add to the task plan.", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Name of the step." + }, + "key": { + "type": "string", + "description": "Unique identifier for the step." + }, + "description": { + "type": "string", + "description": "An exhaustic description of what this step is trying to achieve and accomplish." + } + }, + "additionalProperties": false, + "required": ["name", "key", "description"] + } + } + }, + "additionalProperties": false, + "required": ["steps"] + } + }, + "additionalProperties": false, + "required": ["steps"] + } + } diff --git a/tests/test_tool_schema_parsing_files/pydantic_as_single_arg_example.json b/tests/test_tool_schema_parsing_files/pydantic_as_single_arg_example.json new file mode 100644 index 00000000..b0a34fad --- /dev/null +++ b/tests/test_tool_schema_parsing_files/pydantic_as_single_arg_example.json @@ -0,0 +1,29 @@ +{ + "name": "create_step", + "description": "Creates a step for the current task.", + "parameters": { + "type": "object", + "properties": { + "step": { + "type": "object", + "description": "A step to add to the task plan.", + "properties": { + "name": { + "type": "string", + "description": "Name of the step." + }, + "key": { + "type": "string", + "description": "Unique identifier for the step." + }, + "description": { + "type": "string", + "description": "An exhaustic description of what this step is trying to achieve and accomplish." + } + }, + "required": ["name", "key", "description"] + } + }, + "required": ["step"] + } +} diff --git a/tests/test_tool_schema_parsing_files/pydantic_as_single_arg_example.py b/tests/test_tool_schema_parsing_files/pydantic_as_single_arg_example.py new file mode 100644 index 00000000..6a1b2264 --- /dev/null +++ b/tests/test_tool_schema_parsing_files/pydantic_as_single_arg_example.py @@ -0,0 +1,30 @@ +from pydantic import BaseModel, Field + + +class Step(BaseModel): + name: str = Field( + ..., + description="Name of the step.", + ) + key: str = Field( + ..., + description="Unique identifier for the step.", + ) + description: str = Field( + ..., + description="An exhaustic description of what this step is trying to achieve and accomplish.", + ) + + +def create_step(step: Step) -> str: + """ + Creates a step for the current task. + + Args: + step: A step to add to the task plan. + + Returns: + str: A summary of the updated task plan after deletion + """ + DUMMY_MESSAGE = "Step created successfully." + return DUMMY_MESSAGE diff --git a/tests/test_tool_schema_parsing_files/pydantic_as_single_arg_example_so.json b/tests/test_tool_schema_parsing_files/pydantic_as_single_arg_example_so.json new file mode 100644 index 00000000..0583910f --- /dev/null +++ b/tests/test_tool_schema_parsing_files/pydantic_as_single_arg_example_so.json @@ -0,0 +1,32 @@ +{ + "name": "create_step", + "description": "Creates a step for the current task.", + "strict": true, + "parameters": { + "type": "object", + "properties": { + "step": { + "type": "object", + "description": "A step to add to the task plan.", + "properties": { + "name": { + "type": "string", + "description": "Name of the step." + }, + "key": { + "type": "string", + "description": "Unique identifier for the step." + }, + "description": { + "type": "string", + "description": "An exhaustic description of what this step is trying to achieve and accomplish." + } + }, + "additionalProperties": false, + "required": ["name", "key", "description"] + } + }, + "additionalProperties": false, + "required": ["step"] + } +} diff --git a/tests/test_tool_schema_parsing_files/simple_d20.json b/tests/test_tool_schema_parsing_files/simple_d20.json new file mode 100644 index 00000000..7d660baf --- /dev/null +++ b/tests/test_tool_schema_parsing_files/simple_d20.json @@ -0,0 +1,9 @@ +{ + "name": "roll_d20", + "description": "Simulate the roll of a 20-sided die (d20).", + "parameters": { + "type": "object", + "properties": {}, + "required": [] + } + } diff --git a/tests/test_tool_schema_parsing_files/simple_d20.py b/tests/test_tool_schema_parsing_files/simple_d20.py new file mode 100644 index 00000000..242983cf --- /dev/null +++ b/tests/test_tool_schema_parsing_files/simple_d20.py @@ -0,0 +1,15 @@ +def roll_d20(): + """ + Simulate the roll of a 20-sided die (d20). + + This function generates a random integer between 1 and 20, inclusive, + which represents the outcome of a single roll of a d20. + + Returns: + str: The result of the die roll. + """ + import random + + dice_role_outcome = random.randint(1, 20) + output_string = f"You rolled a {dice_role_outcome}" + return output_string diff --git a/tests/test_tool_schema_parsing_files/simple_d20_so.json b/tests/test_tool_schema_parsing_files/simple_d20_so.json new file mode 100644 index 00000000..2f3ddeab --- /dev/null +++ b/tests/test_tool_schema_parsing_files/simple_d20_so.json @@ -0,0 +1,11 @@ +{ + "name": "roll_d20", + "description": "Simulate the roll of a 20-sided die (d20).", + "strict": true, + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": false, + "required": [] + } +}