diff --git a/fern/openapi.json b/fern/openapi.json index 24a8bafa..b8cc7e7c 100644 --- a/fern/openapi.json +++ b/fern/openapi.json @@ -23974,7 +23974,8 @@ } ], "title": "Response Format", - "description": "The response format for the agent." + "description": "Deprecated: Use `model_settings` field to configure response format instead. The response format for the agent.", + "deprecated": true }, "timezone": { "anyOf": [ @@ -28173,7 +28174,8 @@ } ], "title": "Response Format", - "description": "The response format for the agent." + "description": "Deprecated: Use `model_settings` field to configure response format instead. The response format for the agent.", + "deprecated": true }, "timezone": { "anyOf": [ @@ -29004,6 +29006,36 @@ "description": "Deprecated: Use model_settings to configure parallel tool calls instead. If set to True, enables parallel tool calling. Defaults to False.", "default": false, "deprecated": true + }, + "response_format": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/TextResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonSchemaResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonObjectResponseFormat" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "json_object": "#/components/schemas/JsonObjectResponseFormat", + "json_schema": "#/components/schemas/JsonSchemaResponseFormat", + "text": "#/components/schemas/TextResponseFormat" + } + } + }, + { + "type": "null" + } + ], + "title": "Response Format", + "description": "The response format for the model's output. Supports text, json_object, and json_schema (structured outputs). Can be set via model_settings." } }, "type": "object", @@ -31186,6 +31218,36 @@ "default": false, "deprecated": true }, + "response_format": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/TextResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonSchemaResponseFormat" + }, + { + "$ref": "#/components/schemas/JsonObjectResponseFormat" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "json_object": "#/components/schemas/JsonObjectResponseFormat", + "json_schema": "#/components/schemas/JsonSchemaResponseFormat", + "text": "#/components/schemas/TextResponseFormat" + } + } + }, + { + "type": "null" + } + ], + "title": "Response Format", + "description": "The response format for the model's output. Supports text, json_object, and json_schema (structured outputs). Can be set via model_settings." + }, "max_context_window": { "type": "integer", "title": "Max Context Window", @@ -37005,7 +37067,7 @@ } ], "title": "Response Format", - "description": "Deprecated: Use `model` field to configure response format instead. The response format for the agent.", + "description": "Deprecated: Use `model_settings` field to configure response format instead. The response format for the agent.", "deprecated": true }, "max_tokens": { @@ -38356,7 +38418,8 @@ } ], "title": "Response Format", - "description": "The response format for the agent." + "description": "Deprecated: Use `model_settings` field to configure response format instead. The response format for the agent.", + "deprecated": true }, "timezone": { "anyOf": [ diff --git a/letta/llm_api/helpers.py b/letta/llm_api/helpers.py index 7aa663a7..dbea6829 100644 --- a/letta/llm_api/helpers.py +++ b/letta/llm_api/helpers.py @@ -1,7 +1,8 @@ import copy import json +import logging from collections import OrderedDict -from typing import Any, List, Union +from typing import Any, List, Optional, Union import requests @@ -10,6 +11,13 @@ from letta.helpers.json_helpers import json_dumps from letta.log import get_logger from letta.schemas.message import Message from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice +from letta.schemas.response_format import ( + JsonObjectResponseFormat, + JsonSchemaResponseFormat, + ResponseFormatType, + ResponseFormatUnion, + TextResponseFormat, +) from letta.settings import summarizer_settings from letta.utils import count_tokens, printd @@ -166,6 +174,61 @@ def convert_to_structured_output(openai_function: dict, allow_optional: bool = F return structured_output +def convert_response_format_to_responses_api( + response_format: Optional["ResponseFormatUnion"], +) -> Optional[dict]: + """ + Convert Letta's ResponseFormatUnion to OpenAI Responses API text.format structure. + + The Responses API uses a different structure than Chat Completions: + text={ + "format": { + "type": "json_schema", + "name": "...", + "strict": True, + "schema": {...} + } + } + + Args: + response_format: Letta ResponseFormatUnion object + + Returns: + Dict with format structure for Responses API, or None + """ + if response_format is None: + return None + + # Text format - return None since it's the default + if isinstance(response_format, TextResponseFormat): + return None + + # JSON object format - not directly supported in Responses API + # Users should use json_schema instead + elif isinstance(response_format, JsonObjectResponseFormat): + logger.warning( + "json_object response format is not supported in Responses API. " + "Use json_schema with a proper schema instead. Skipping response_format." + ) + return None + + # JSON schema format - this is what Responses API supports + elif isinstance(response_format, JsonSchemaResponseFormat): + json_schema_dict = response_format.json_schema + + # Ensure required fields are present + if "schema" not in json_schema_dict: + logger.warning("json_schema missing 'schema' field, skipping response_format") + return None + + return { + "type": "json_schema", + "name": json_schema_dict.get("name", "response_schema"), + "schema": json_schema_dict["schema"], + "strict": json_schema_dict.get("strict", True), # Default to strict mode + } + + def make_post_request(url: str, headers: dict[str, str], data: dict[str, Any]) -> dict[str, Any]: printd(f"Sending request to {url}") try: diff --git a/letta/llm_api/openai_client.py b/letta/llm_api/openai_client.py index 930e29b0..c178f993 100644 --- a/letta/llm_api/openai_client.py +++ b/letta/llm_api/openai_client.py @@ -25,7 +25,12 @@ from letta.errors import ( LLMTimeoutError, LLMUnprocessableEntityError, ) -from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_structured_output, unpack_all_inner_thoughts_from_kwargs +from letta.llm_api.helpers import ( + add_inner_thoughts_to_functions, + convert_response_format_to_responses_api, + convert_to_structured_output, + unpack_all_inner_thoughts_from_kwargs, +) from letta.llm_api.llm_client_base import LLMClientBase from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST from letta.log import get_logger @@ -52,6 +57,7 @@ from letta.schemas.openai.chat_completion_response import ( UsageStatistics, ) from letta.schemas.openai.responses_request import ResponsesRequest +from letta.schemas.response_format import JsonSchemaResponseFormat from letta.settings import model_settings logger = get_logger(__name__) @@ -339,11 +345,22 @@ class OpenAIClient(LLMClientBase): parallel_tool_calls=llm_config.parallel_tool_calls if tools and supports_parallel_tool_calling(model) else False, ) + # Handle text configuration (verbosity and response format) + text_config_kwargs = {} + # Add verbosity control for GPT-5 models if supports_verbosity_control(model) and llm_config.verbosity: - # data.verbosity = llm_config.verbosity - # https://cookbook.openai.com/examples/gpt-5/gpt-5_new_params_and_tools - data.text = ResponseTextConfigParam(verbosity=llm_config.verbosity) + text_config_kwargs["verbosity"] = llm_config.verbosity + + # Add response_format support for structured outputs via text.format + if hasattr(llm_config, "response_format") and llm_config.response_format is not None: + format_dict = convert_response_format_to_responses_api(llm_config.response_format) + if format_dict is not None: + text_config_kwargs["format"] = format_dict + + # Set text config if we have any parameters + if text_config_kwargs: + data.text = ResponseTextConfigParam(**text_config_kwargs) # Add reasoning effort control for reasoning models # Only set reasoning if effort is not "none" (GPT-5.1 uses "none" to disable reasoning) @@ -501,6 +518,16 @@ class OpenAIClient(LLMClientBase): if tools and supports_parallel_tool_calling(model): data.parallel_tool_calls = False + # Add response_format support for structured outputs + if hasattr(llm_config, "response_format") and llm_config.response_format is not None: + # For Chat Completions API, we need the full nested structure + if isinstance(llm_config.response_format, JsonSchemaResponseFormat): + # Convert to the OpenAI SDK format + data.response_format = {"type": "json_schema", "json_schema": llm_config.response_format.json_schema} + else: + # For text or json_object, just pass the type + data.response_format = {"type": llm_config.response_format.type} + # always set user id for openai requests if self.actor: data.user = self.actor.id diff --git a/letta/schemas/agent.py b/letta/schemas/agent.py index 7a8c52c9..596729b6 100644 --- a/letta/schemas/agent.py +++ b/letta/schemas/agent.py @@ -299,7 +299,11 @@ class CreateAgent(BaseModel, validate_assignment=True): # description="If set to True, the agent will not remember previous messages (though the agent will still retain state via core memory blocks and archival/recall memory). Not recommended unless you have an advanced use case.", ) enable_sleeptime: Optional[bool] = Field(None, description="If set to True, memory management will move to a background agent thread.") - response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the agent.") + response_format: Optional[ResponseFormatUnion] = Field( + None, + description="Deprecated: Use `model_settings` field to configure response format instead. The response format for the agent.", + deprecated=True, + ) timezone: Optional[str] = Field(None, description="The timezone of the agent (IANA format).") max_files_open: Optional[int] = Field( None, @@ -447,7 +451,7 @@ class UpdateAgent(BaseModel): ) response_format: Optional[ResponseFormatUnion] = Field( None, - description="Deprecated: Use `model` field to configure response format instead. The response format for the agent.", + description="Deprecated: Use `model_settings` field to configure response format instead. The response format for the agent.", deprecated=True, ) max_tokens: Optional[int] = Field( diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py index cd9d867f..33892ebb 100644 --- a/letta/schemas/llm_config.py +++ b/letta/schemas/llm_config.py @@ -6,6 +6,7 @@ from letta.constants import LETTA_MODEL_ENDPOINT from letta.errors import LettaInvalidArgumentError from letta.log import get_logger from letta.schemas.enums import AgentType, ProviderCategory +from letta.schemas.response_format import ResponseFormatUnion if TYPE_CHECKING: from letta.schemas.model import ModelSettings @@ -99,6 +100,10 @@ class LLMConfig(BaseModel): description="Deprecated: Use model_settings to configure parallel tool calls instead. If set to True, enables parallel tool calling. Defaults to False.", deprecated=True, ) + response_format: Optional[ResponseFormatUnion] = Field( + None, + description="The response format for the model's output. Supports text, json_object, and json_schema (structured outputs). Can be set via model_settings.", + ) @model_validator(mode="before") @classmethod diff --git a/letta/schemas/openai/chat_completion_request.py b/letta/schemas/openai/chat_completion_request.py index 89c67e6b..da1c2632 100644 --- a/letta/schemas/openai/chat_completion_request.py +++ b/letta/schemas/openai/chat_completion_request.py @@ -59,7 +59,13 @@ def cast_message_to_subtype(m_dict: dict) -> ChatMessage: class ResponseFormat(BaseModel): - type: str = Field(default="text", pattern="^(text|json_object)$") + """ + Response format for OpenAI Chat Completions API. + Can be a simple type string or a dict with nested json_schema. + """ + + # Allow either simple dict or complex nested structure + model_config = {"extra": "allow"} # Allow extra fields for json_schema ## tool_choice ## @@ -126,7 +132,7 @@ class ChatCompletionRequest(BaseModel): max_completion_tokens: Optional[int] = None n: Optional[int] = 1 presence_penalty: Optional[float] = 0 - response_format: Optional[ResponseFormat] = None + response_format: Optional[Union[ResponseFormat, Dict[str, Any]]] = None seed: Optional[int] = None stop: Optional[Union[str, List[str]]] = None stream: Optional[bool] = False diff --git a/tests/integration_test_send_message_v2.py b/tests/integration_test_send_message_v2.py index 66028048..60863dd5 100644 --- a/tests/integration_test_send_message_v2.py +++ b/tests/integration_test_send_message_v2.py @@ -12,7 +12,7 @@ import pytest import requests from dotenv import load_dotenv from letta_client import AsyncLetta -from letta_client.types import AgentState, MessageCreateParam, ToolReturnMessage +from letta_client.types import AgentState, JsonSchemaResponseFormat, MessageCreateParam, OpenAIModelSettings, ToolReturnMessage from letta_client.types.agents import AssistantMessage, ReasoningMessage, Run, ToolCallMessage, UserMessage from letta_client.types.agents.letta_streaming_response import LettaPing, LettaStopReason, LettaUsageStatistics @@ -901,3 +901,89 @@ async def test_tool_call( assert run_id is not None run = await client.runs.retrieve(run_id=run_id) assert run.status == ("cancelled" if cancellation == "with_cancellation" else "completed") + + +@pytest.mark.parametrize( + "model_handle,api_type", + [ + ("openai/gpt-4o", "chat_completions"), + ("openai/gpt-5", "responses"), + ], +) +@pytest.mark.asyncio(loop_scope="function") +async def test_json_schema_response_format( + disable_e2b_api_key: Any, + client: AsyncLetta, + model_handle: str, + api_type: str, +) -> None: + """ + Test JsonSchemaResponseFormat with both Chat Completions API (gpt-4o) and Responses API (gpt-5). + + This test verifies that: + 1. Agents can be created with json_schema response_format via model_settings + 2. The schema is properly stored in the agent's model_settings + 3. Messages sent to the agent produce responses conforming to the schema + 4. Both APIs (Chat Completions and Responses) handle structured outputs correctly + """ + # Define the structured output schema + response_schema = { + "name": "capital_response", + "strict": True, + "schema": { + "type": "object", + "properties": { + "response": {"type": "string", "description": "The answer to the question"}, + "justification": {"type": "string", "description": "Why this is the answer"}, + }, + "required": ["response", "justification"], + "additionalProperties": False, + }, + } + + # Create model settings with json_schema response format + model_settings = OpenAIModelSettings( + provider_type="openai", response_format=JsonSchemaResponseFormat(type="json_schema", json_schema=response_schema) + ) + + # Create agent with structured output configuration + agent_state = await client.agents.create( + name=f"test_structured_agent_{model_handle.replace('/', '_')}", + model=model_handle, + model_settings=model_settings, + embedding="openai/text-embedding-3-small", + agent_type="letta_v1_agent", + ) + + try: + # Send a message to the agent + message_response = await client.agents.messages.create( + agent_id=agent_state.id, messages=[MessageCreateParam(role="user", content="What is the capital of France?")] + ) + + # Verify we got a response + assert len(message_response.messages) > 0, "Should have received at least one message" + + # Find the assistant message and verify it contains valid JSON matching the schema + assistant_message = None + for msg in message_response.messages: + if isinstance(msg, AssistantMessage): + assistant_message = msg + break + + assert assistant_message is not None, "Should have received an AssistantMessage" + + # Parse the content as JSON + parsed_content = json.loads(assistant_message.content) + + # Verify the JSON has the required fields from our schema + assert "response" in parsed_content, "JSON should contain 'response' field" + assert "justification" in parsed_content, "JSON should contain 'justification' field" + assert isinstance(parsed_content["response"], str), "'response' field should be a string" + assert isinstance(parsed_content["justification"], str), "'justification' field should be a string" + assert len(parsed_content["response"]) > 0, "'response' field should not be empty" + assert len(parsed_content["justification"]) > 0, "'justification' field should not be empty" + + finally: + # Cleanup + await client.agents.delete(agent_state.id) diff --git a/tests/managers/test_agent_manager.py b/tests/managers/test_agent_manager.py index 6361e406..06b6798d 100644 --- a/tests/managers/test_agent_manager.py +++ b/tests/managers/test_agent_manager.py @@ -1542,6 +1542,7 @@ async def test_agent_state_schema_unchanged(server: SyncServer): "enable_reasoner", "reasoning_effort", "effort", + "response_format", "max_reasoning_tokens", "frequency_penalty", "compatibility_type",