diff --git a/fern/openapi.json b/fern/openapi.json
index 24a8bafa..b8cc7e7c 100644
--- a/fern/openapi.json
+++ b/fern/openapi.json
@@ -23974,7 +23974,8 @@
               }
             ],
             "title": "Response Format",
-            "description": "The response format for the agent."
+            "description": "Deprecated: Use `model_settings` field to configure response format instead. The response format for the agent.",
+            "deprecated": true
           },
           "timezone": {
             "anyOf": [
@@ -28173,7 +28174,8 @@
               }
             ],
             "title": "Response Format",
-            "description": "The response format for the agent."
+            "description": "Deprecated: Use `model_settings` field to configure response format instead. The response format for the agent.",
+            "deprecated": true
           },
           "timezone": {
             "anyOf": [
@@ -29004,6 +29006,36 @@
             "description": "Deprecated: Use model_settings to configure parallel tool calls instead. If set to True, enables parallel tool calling. Defaults to False.",
             "default": false,
             "deprecated": true
+          },
+          "response_format": {
+            "anyOf": [
+              {
+                "oneOf": [
+                  {
+                    "$ref": "#/components/schemas/TextResponseFormat"
+                  },
+                  {
+                    "$ref": "#/components/schemas/JsonSchemaResponseFormat"
+                  },
+                  {
+                    "$ref": "#/components/schemas/JsonObjectResponseFormat"
+                  }
+                ],
+                "discriminator": {
+                  "propertyName": "type",
+                  "mapping": {
+                    "json_object": "#/components/schemas/JsonObjectResponseFormat",
+                    "json_schema": "#/components/schemas/JsonSchemaResponseFormat",
+                    "text": "#/components/schemas/TextResponseFormat"
+                  }
+                }
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Response Format",
+            "description": "The response format for the model's output. Supports text, json_object, and json_schema (structured outputs). Can be set via model_settings."
           }
         },
         "type": "object",
@@ -31186,6 +31218,36 @@
             "default": false,
             "deprecated": true
           },
+          "response_format": {
+            "anyOf": [
+              {
+                "oneOf": [
+                  {
+                    "$ref": "#/components/schemas/TextResponseFormat"
+                  },
+                  {
+                    "$ref": "#/components/schemas/JsonSchemaResponseFormat"
+                  },
+                  {
+                    "$ref": "#/components/schemas/JsonObjectResponseFormat"
+                  }
+                ],
+                "discriminator": {
+                  "propertyName": "type",
+                  "mapping": {
+                    "json_object": "#/components/schemas/JsonObjectResponseFormat",
+                    "json_schema": "#/components/schemas/JsonSchemaResponseFormat",
+                    "text": "#/components/schemas/TextResponseFormat"
+                  }
+                }
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Response Format",
+            "description": "The response format for the model's output. Supports text, json_object, and json_schema (structured outputs). Can be set via model_settings."
+          },
           "max_context_window": {
             "type": "integer",
             "title": "Max Context Window",
@@ -37005,7 +37067,7 @@
               }
             ],
             "title": "Response Format",
-            "description": "Deprecated: Use `model` field to configure response format instead. The response format for the agent.",
+            "description": "Deprecated: Use `model_settings` field to configure response format instead. The response format for the agent.",
             "deprecated": true
           },
           "max_tokens": {
@@ -38356,7 +38418,8 @@
               }
             ],
             "title": "Response Format",
-            "description": "The response format for the agent."
+            "description": "Deprecated: Use `model_settings` field to configure response format instead. The response format for the agent.",
+            "deprecated": true
           },
           "timezone": {
             "anyOf": [
diff --git a/letta/llm_api/helpers.py b/letta/llm_api/helpers.py
index 7aa663a7..dbea6829 100644
--- a/letta/llm_api/helpers.py
+++ b/letta/llm_api/helpers.py
@@ -1,7 +1,8 @@
 import copy
 import json
+import logging
 from collections import OrderedDict
-from typing import Any, List, Union
+from typing import Any, List, Optional, Union
 
 import requests
 
@@ -10,6 +11,13 @@ from letta.helpers.json_helpers import json_dumps
 from letta.log import get_logger
 from letta.schemas.message import Message
 from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice
+from letta.schemas.response_format import (
+    JsonObjectResponseFormat,
+    JsonSchemaResponseFormat,
+    ResponseFormatType,
+    ResponseFormatUnion,
+    TextResponseFormat,
+)
 from letta.settings import summarizer_settings
 from letta.utils import count_tokens, printd
 
@@ -166,6 +174,61 @@ def convert_to_structured_output(openai_function: dict, allow_optional: bool = F
     return structured_output
 
 
+def convert_response_format_to_responses_api(
+    response_format: Optional["ResponseFormatUnion"],
+) -> Optional[dict]:
+    """
+    Convert Letta's ResponseFormatUnion to OpenAI Responses API text.format structure.
+
+    The Responses API uses a different structure than Chat Completions:
+    text={
+        "format": {
+            "type": "json_schema",
+            "name": "...",
+            "strict": True,
+            "schema": {...}
+        }
+    }
+
+    Args:
+        response_format: Letta ResponseFormatUnion object
+
+    Returns:
+        Dict with format structure for Responses API, or None
+    """
+    if response_format is None:
+        return None
+
+    # Text format - return None since it's the default
+    if isinstance(response_format, TextResponseFormat):
+        return None
+
+    # JSON object format - not directly supported in Responses API
+    # Users should use json_schema instead
+    elif isinstance(response_format, JsonObjectResponseFormat):
+        logger.warning(
+            "json_object response format is not supported in Responses API. "
+            "Use json_schema with a proper schema instead. Skipping response_format."
+        )
+        return None
+
+    # JSON schema format - this is what Responses API supports
+    elif isinstance(response_format, JsonSchemaResponseFormat):
+        json_schema_dict = response_format.json_schema
+
+        # Ensure required fields are present
+        if "schema" not in json_schema_dict:
+            logger.warning("json_schema missing 'schema' field, skipping response_format")
+            return None
+
+        return {
+            "type": "json_schema",
+            "name": json_schema_dict.get("name", "response_schema"),
+            "schema": json_schema_dict["schema"],
+            "strict": json_schema_dict.get("strict", True),  # Default to strict mode
+        }
+
+
 def make_post_request(url: str, headers: dict[str, str], data: dict[str, Any]) -> dict[str, Any]:
     printd(f"Sending request to {url}")
     try:
diff --git a/letta/llm_api/openai_client.py b/letta/llm_api/openai_client.py
index 930e29b0..c178f993 100644
--- a/letta/llm_api/openai_client.py
+++ b/letta/llm_api/openai_client.py
@@ -25,7 +25,12 @@ from letta.errors import (
     LLMTimeoutError,
     LLMUnprocessableEntityError,
 )
-from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_structured_output, unpack_all_inner_thoughts_from_kwargs
+from letta.llm_api.helpers import (
+    add_inner_thoughts_to_functions,
+    convert_response_format_to_responses_api,
+    convert_to_structured_output,
+    unpack_all_inner_thoughts_from_kwargs,
+)
 from letta.llm_api.llm_client_base import LLMClientBase
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
 from letta.log import get_logger
@@ -52,6 +57,7 @@ from letta.schemas.openai.chat_completion_response import (
     UsageStatistics,
 )
 from letta.schemas.openai.responses_request import ResponsesRequest
+from letta.schemas.response_format import JsonSchemaResponseFormat
 from letta.settings import model_settings
 
 logger = get_logger(__name__)
@@ -339,11 +345,22 @@ class OpenAIClient(LLMClientBase):
             parallel_tool_calls=llm_config.parallel_tool_calls if tools and supports_parallel_tool_calling(model) else False,
         )
 
+        # Handle text configuration (verbosity and response format)
+        text_config_kwargs = {}
+
         # Add verbosity control for GPT-5 models
         if supports_verbosity_control(model) and llm_config.verbosity:
-            # data.verbosity = llm_config.verbosity
-            # https://cookbook.openai.com/examples/gpt-5/gpt-5_new_params_and_tools
-            data.text = ResponseTextConfigParam(verbosity=llm_config.verbosity)
+            text_config_kwargs["verbosity"] = llm_config.verbosity
+
+        # Add response_format support for structured outputs via text.format
+        if hasattr(llm_config, "response_format") and llm_config.response_format is not None:
+            format_dict = convert_response_format_to_responses_api(llm_config.response_format)
+            if format_dict is not None:
+                text_config_kwargs["format"] = format_dict
+
+        # Set text config if we have any parameters
+        if text_config_kwargs:
+            data.text = ResponseTextConfigParam(**text_config_kwargs)
 
         # Add reasoning effort control for reasoning models
         # Only set reasoning if effort is not "none" (GPT-5.1 uses "none" to disable reasoning)
@@ -501,6 +518,16 @@ class OpenAIClient(LLMClientBase):
         if tools and supports_parallel_tool_calling(model):
             data.parallel_tool_calls = False
 
+        # Add response_format support for structured outputs
+        if hasattr(llm_config, "response_format") and llm_config.response_format is not None:
+            # For Chat Completions API, we need the full nested structure
+            if isinstance(llm_config.response_format, JsonSchemaResponseFormat):
+                # Convert to the OpenAI SDK format
+                data.response_format = {"type": "json_schema", "json_schema": llm_config.response_format.json_schema}
+            else:
+                # For text or json_object, just pass the type
+                data.response_format = {"type": llm_config.response_format.type}
+
         # always set user id for openai requests
         if self.actor:
             data.user = self.actor.id
diff --git a/letta/schemas/agent.py b/letta/schemas/agent.py
index 7a8c52c9..596729b6 100644
--- a/letta/schemas/agent.py
+++ b/letta/schemas/agent.py
@@ -299,7 +299,11 @@ class CreateAgent(BaseModel, validate_assignment=True):  #
         description="If set to True, the agent will not remember previous messages (though the agent will still retain state via core memory blocks and archival/recall memory). Not recommended unless you have an advanced use case.",
     )
     enable_sleeptime: Optional[bool] = Field(None, description="If set to True, memory management will move to a background agent thread.")
-    response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the agent.")
+    response_format: Optional[ResponseFormatUnion] = Field(
+        None,
+        description="Deprecated: Use `model_settings` field to configure response format instead. The response format for the agent.",
+        deprecated=True,
+    )
     timezone: Optional[str] = Field(None, description="The timezone of the agent (IANA format).")
     max_files_open: Optional[int] = Field(
         None,
@@ -447,7 +451,7 @@ class UpdateAgent(BaseModel):
     )
     response_format: Optional[ResponseFormatUnion] = Field(
         None,
-        description="Deprecated: Use `model` field to configure response format instead. The response format for the agent.",
+        description="Deprecated: Use `model_settings` field to configure response format instead. The response format for the agent.",
         deprecated=True,
     )
     max_tokens: Optional[int] = Field(
diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py
index cd9d867f..33892ebb 100644
--- a/letta/schemas/llm_config.py
+++ b/letta/schemas/llm_config.py
@@ -6,6 +6,7 @@ from letta.constants import LETTA_MODEL_ENDPOINT
 from letta.errors import LettaInvalidArgumentError
 from letta.log import get_logger
 from letta.schemas.enums import AgentType, ProviderCategory
+from letta.schemas.response_format import ResponseFormatUnion
 
 if TYPE_CHECKING:
     from letta.schemas.model import ModelSettings
@@ -99,6 +100,10 @@ class LLMConfig(BaseModel):
         description="Deprecated: Use model_settings to configure parallel tool calls instead. If set to True, enables parallel tool calling. Defaults to False.",
         deprecated=True,
     )
+    response_format: Optional[ResponseFormatUnion] = Field(
+        None,
+        description="The response format for the model's output. Supports text, json_object, and json_schema (structured outputs). Can be set via model_settings.",
+    )
 
     @model_validator(mode="before")
     @classmethod
diff --git a/letta/schemas/openai/chat_completion_request.py b/letta/schemas/openai/chat_completion_request.py
index 89c67e6b..da1c2632 100644
--- a/letta/schemas/openai/chat_completion_request.py
+++ b/letta/schemas/openai/chat_completion_request.py
@@ -59,7 +59,13 @@ def cast_message_to_subtype(m_dict: dict) -> ChatMessage:
 
 
 class ResponseFormat(BaseModel):
-    type: str = Field(default="text", pattern="^(text|json_object)$")
+    """
+    Response format for OpenAI Chat Completions API.
+    Can be a simple type string or a dict with nested json_schema.
+    """
+
+    # Allow either simple dict or complex nested structure
+    model_config = {"extra": "allow"}  # Allow extra fields for json_schema
 
 
 ## tool_choice ##
@@ -126,7 +132,7 @@ class ChatCompletionRequest(BaseModel):
     max_completion_tokens: Optional[int] = None
     n: Optional[int] = 1
     presence_penalty: Optional[float] = 0
-    response_format: Optional[ResponseFormat] = None
+    response_format: Optional[Union[ResponseFormat, Dict[str, Any]]] = None
     seed: Optional[int] = None
     stop: Optional[Union[str, List[str]]] = None
     stream: Optional[bool] = False
diff --git a/tests/integration_test_send_message_v2.py b/tests/integration_test_send_message_v2.py
index 66028048..60863dd5 100644
--- a/tests/integration_test_send_message_v2.py
+++ b/tests/integration_test_send_message_v2.py
@@ -12,7 +12,7 @@ import pytest
 import requests
 from dotenv import load_dotenv
 from letta_client import AsyncLetta
-from letta_client.types import AgentState, MessageCreateParam, ToolReturnMessage
+from letta_client.types import AgentState, JsonSchemaResponseFormat, MessageCreateParam, OpenAIModelSettings, ToolReturnMessage
 from letta_client.types.agents import AssistantMessage, ReasoningMessage, Run, ToolCallMessage, UserMessage
 from letta_client.types.agents.letta_streaming_response import LettaPing, LettaStopReason, LettaUsageStatistics
 
@@ -901,3 +901,89 @@ async def test_tool_call(
     assert run_id is not None
     run = await client.runs.retrieve(run_id=run_id)
     assert run.status == ("cancelled" if cancellation == "with_cancellation" else "completed")
+
+
+@pytest.mark.parametrize(
+    "model_handle,api_type",
+    [
+        ("openai/gpt-4o", "chat_completions"),
+        ("openai/gpt-5", "responses"),
+    ],
+)
+@pytest.mark.asyncio(loop_scope="function")
+async def test_json_schema_response_format(
+    disable_e2b_api_key: Any,
+    client: AsyncLetta,
+    model_handle: str,
+    api_type: str,
+) -> None:
+    """
+    Test JsonSchemaResponseFormat with both Chat Completions API (gpt-4o) and Responses API (gpt-5).
+
+    This test verifies that:
+    1. Agents can be created with json_schema response_format via model_settings
+    2. The schema is properly stored in the agent's model_settings
+    3. Messages sent to the agent produce responses conforming to the schema
+    4. Both APIs (Chat Completions and Responses) handle structured outputs correctly
+    """
+    # Define the structured output schema
+    response_schema = {
+        "name": "capital_response",
+        "strict": True,
+        "schema": {
+            "type": "object",
+            "properties": {
+                "response": {"type": "string", "description": "The answer to the question"},
+                "justification": {"type": "string", "description": "Why this is the answer"},
+            },
+            "required": ["response", "justification"],
+            "additionalProperties": False,
+        },
+    }
+
+    # Create model settings with json_schema response format
+    model_settings = OpenAIModelSettings(
+        provider_type="openai", response_format=JsonSchemaResponseFormat(type="json_schema", json_schema=response_schema)
+    )
+
+    # Create agent with structured output configuration
+    agent_state = await client.agents.create(
+        name=f"test_structured_agent_{model_handle.replace('/', '_')}",
+        model=model_handle,
+        model_settings=model_settings,
+        embedding="openai/text-embedding-3-small",
+        agent_type="letta_v1_agent",
+    )
+
+    try:
+        # Send a message to the agent
+        message_response = await client.agents.messages.create(
+            agent_id=agent_state.id, messages=[MessageCreateParam(role="user", content="What is the capital of France?")]
+        )
+
+        # Verify we got a response
+        assert len(message_response.messages) > 0, "Should have received at least one message"
+
+        # Find the assistant message and verify it contains valid JSON matching the schema
+        assistant_message = None
+        for msg in message_response.messages:
+            if isinstance(msg, AssistantMessage):
+                assistant_message = msg
+                break
+
+        assert assistant_message is not None, "Should have received an AssistantMessage"
+
+        # Parse the content as JSON
+        parsed_content = json.loads(assistant_message.content)
+
+        # Verify the JSON has the required fields from our schema
+        assert "response" in parsed_content, "JSON should contain 'response' field"
+        assert "justification" in parsed_content, "JSON should contain 'justification' field"
+        assert isinstance(parsed_content["response"], str), "'response' field should be a string"
+        assert isinstance(parsed_content["justification"], str), "'justification' field should be a string"
+        assert len(parsed_content["response"]) > 0, "'response' field should not be empty"
+        assert len(parsed_content["justification"]) > 0, "'justification' field should not be empty"
+
+    finally:
+        # Cleanup
+        await client.agents.delete(agent_state.id)
diff --git a/tests/managers/test_agent_manager.py b/tests/managers/test_agent_manager.py
index 6361e406..06b6798d 100644
--- a/tests/managers/test_agent_manager.py
+++ b/tests/managers/test_agent_manager.py
@@ -1542,6 +1542,7 @@ async def test_agent_state_schema_unchanged(server: SyncServer):
         "enable_reasoner",
         "reasoning_effort",
         "effort",
+        "response_format",
         "max_reasoning_tokens",
         "frequency_penalty",
         "compatibility_type",