diff --git a/fern/openapi.json b/fern/openapi.json
index 4a487294..1242ea27 100644
--- a/fern/openapi.json
+++ b/fern/openapi.json
@@ -33671,6 +33671,19 @@
             ],
             "title": "Override Model",
             "description": "Model handle to use instead of agent's default (e.g., 'openai/gpt-4', 'anthropic/claude-3-5-sonnet')"
+          },
+          "response_schema": {
+            "anyOf": [
+              {
+                "additionalProperties": true,
+                "type": "object"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Response Schema",
+            "description": "JSON schema for structured output. When provided, the LLM will be forced to return a response matching this schema via tool calling. The schema should follow JSON Schema format with 'properties' and optionally 'required' fields."
           }
         },
         "type": "object",
diff --git a/letta/llm_api/anthropic_client.py b/letta/llm_api/anthropic_client.py
index 13851639..383141d9 100644
--- a/letta/llm_api/anthropic_client.py
+++ b/letta/llm_api/anthropic_client.py
@@ -62,7 +62,7 @@ class AnthropicClient(LLMClientBase):
     def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
         client = self._get_anthropic_client(llm_config, async_client=False)
         betas: list[str] = []
-        
+
         # Opus 4.6 Auto Thinking
         if llm_config.enable_reasoner:
             if llm_config.model.startswith("claude-opus-4-6"):
@@ -496,7 +496,14 @@ class AnthropicClient(LLMClientBase):
         }
 
         # Extended Thinking
-        if self.is_reasoning_model(llm_config) and llm_config.enable_reasoner:
+        # Note: Anthropic does not allow thinking when forcing tool use with split_thread_agent
+        should_enable_thinking = (
+            self.is_reasoning_model(llm_config)
+            and llm_config.enable_reasoner
+            and not (agent_type == AgentType.split_thread_agent and force_tool_call is not None)
+        )
+
+        if should_enable_thinking:
             # Opus 4.6 uses Auto Thinking (no budget tokens)
             if llm_config.model.startswith("claude-opus-4-6"):
                 data["thinking"] = {
@@ -556,9 +563,14 @@ class AnthropicClient(LLMClientBase):
             tool_choice = None
         elif self.is_reasoning_model(llm_config) and llm_config.enable_reasoner or agent_type == AgentType.letta_v1_agent:
             # NOTE: reasoning models currently do not allow for `any`
-            # NOTE: react agents should always have auto on, since the precense/absense of tool calls controls chaining
-            tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
-            tools_for_request = [OpenAITool(function=f) for f in tools]
+            # NOTE: react agents should always have at least auto on, since the precense/absense of tool calls controls chaining
+            if agent_type == AgentType.split_thread_agent and force_tool_call is not None:
+                tool_choice = {"type": "tool", "name": force_tool_call, "disable_parallel_tool_use": True}
+                # When forcing a specific tool, only include that tool
+                tools_for_request = [OpenAITool(function=f) for f in tools if f["name"] == force_tool_call]
+            else:
+                tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
+                tools_for_request = [OpenAITool(function=f) for f in tools]
         elif force_tool_call is not None:
             tool_choice = {"type": "tool", "name": force_tool_call, "disable_parallel_tool_use": True}
             tools_for_request = [OpenAITool(function=f) for f in tools if f["name"] == force_tool_call]
diff --git a/letta/server/rest_api/routers/v1/agents.py b/letta/server/rest_api/routers/v1/agents.py
index 34f4a27e..0adb1620 100644
--- a/letta/server/rest_api/routers/v1/agents.py
+++ b/letta/server/rest_api/routers/v1/agents.py
@@ -103,6 +103,15 @@ class GenerateRequest(BaseModel):
         description="Model handle to use instead of agent's default (e.g., 'openai/gpt-4', 'anthropic/claude-3-5-sonnet')",
     )
 
+    response_schema: Optional[Dict[str, Any]] = Field(
+        None,
+        description=(
+            "JSON schema for structured output. When provided, the LLM will be forced to return "
+            "a response matching this schema via tool calling. The schema should follow JSON Schema "
+            "format with 'properties' and optionally 'required' fields."
+        ),
+    )
+
     @field_validator("prompt")
     @classmethod
     def validate_prompt_not_empty(cls, v: str) -> str:
@@ -1876,6 +1885,7 @@ async def generate_completion(
             system_prompt=request.system_prompt,
             actor=actor,
             override_model=request.override_model,
+            response_schema=request.response_schema,
         )
     except NoResultFound:
         raise HTTPException(status_code=404, detail=f"Agent with ID {agent_id} not found")
diff --git a/letta/services/agent_generate_completion_manager.py b/letta/services/agent_generate_completion_manager.py
index e11ee8bd..9f5fd1f4 100644
--- a/letta/services/agent_generate_completion_manager.py
+++ b/letta/services/agent_generate_completion_manager.py
@@ -1,16 +1,20 @@
 """Manager for handling direct LLM completions using agent configuration."""
 
-from typing import TYPE_CHECKING, Optional
+import json
+from typing import TYPE_CHECKING, Any, Dict, Optional
 
 from letta.errors import HandleNotFoundError, LLMError
 from letta.llm_api.llm_client import LLMClient
 from letta.log import get_logger
 from letta.orm.errors import NoResultFound
-from letta.schemas.enums import MessageRole
+from letta.schemas.enums import AgentType, MessageRole
 from letta.schemas.letta_message_content import TextContent
 from letta.schemas.message import Message
 from letta.schemas.usage import LettaUsageStatistics
 
+# Tool name used for structured output via tool forcing
+STRUCTURED_OUTPUT_TOOL_NAME = "structured_output"
+
 if TYPE_CHECKING:
     from letta.orm import User
     from letta.schemas.llm_config import LLMConfig
@@ -19,6 +23,27 @@ if TYPE_CHECKING:
 logger = get_logger(__name__)
 
 
+def _schema_to_tool_definition(schema: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Convert a JSON schema into a tool definition for forced tool calling.
+
+    Args:
+        schema: JSON schema object with 'properties' and optionally 'required'
+
+    Returns:
+        Tool definition dict compatible with OpenAI/Anthropic function calling format
+    """
+    return {
+        "name": STRUCTURED_OUTPUT_TOOL_NAME,
+        "description": "Returns a structured response matching the requested schema.",
+        "parameters": {
+            "type": "object",
+            "properties": schema.get("properties", {}),
+            "required": schema.get("required", list(schema.get("properties", {}).keys())),
+        },
+    }
+
+
 class GenerateResponse:
     """Response from direct LLM generation."""
 
@@ -49,13 +74,14 @@ class AgentGenerateCompletionManager:
         actor: "User",
         system_prompt: Optional[str] = None,
         override_model: Optional[str] = None,
+        response_schema: Optional[Dict[str, Any]] = None,
     ) -> GenerateResponse:
         """
         Generate a completion directly from the LLM provider using the agent's configuration.
 
         This method makes a direct request to the LLM provider without any agent processing:
         - No memory or context retrieval
-        - No tool calling
+        - No tool calling (unless response_schema is provided)
         - No message persistence
         - No agent state modification
 
@@ -66,9 +92,14 @@ class AgentGenerateCompletionManager:
             system_prompt: Optional system prompt to prepend to the conversation
             override_model: Optional model handle to override the agent's default
                           (e.g., 'openai/gpt-4', 'anthropic/claude-3-5-sonnet')
+            response_schema: Optional JSON schema for structured output. When provided,
+                           the LLM will be forced to return a response matching this
+                           schema via tool calling.
 
         Returns:
-            GenerateResponse with content, model, and usage statistics
+            GenerateResponse with content, model, and usage statistics.
+            When response_schema is provided, content will be the JSON string
+            matching the schema.
 
         Raises:
             NoResultFound: If agent not found
@@ -99,6 +130,7 @@ class AgentGenerateCompletionManager:
                 "override_model": override_model,
                 "prompt_length": len(prompt),
                 "has_system_prompt": system_prompt is not None,
+                "has_response_schema": response_schema is not None,
                 "model": llm_config.model,
             },
         )
@@ -132,13 +164,23 @@ class AgentGenerateCompletionManager:
         if llm_client is None:
             raise LLMError(f"Unsupported provider type: {llm_config.model_endpoint_type}")
 
-        # 5. Build request data (no tools, no function calling)
+        # 5. Build request data
+        # If response_schema is provided, create a tool and force the model to call it
+        tools = None
+        force_tool_call = None
+        if response_schema:
+            tools = [_schema_to_tool_definition(response_schema)]
+            force_tool_call = STRUCTURED_OUTPUT_TOOL_NAME
+
+        # TODO: create a separate agent type
+        effective_agent_type = AgentType.split_thread_agent if response_schema else agent.agent_type
+
         request_data = llm_client.build_request_data(
-            agent_type=agent.agent_type,
+            agent_type=effective_agent_type,
             messages=letta_messages,
             llm_config=llm_config,
-            tools=None,  # No tools for direct generation
-            force_tool_call=None,
+            tools=tools,
+            force_tool_call=force_tool_call,
         )
 
         # 6. Make direct LLM request
@@ -155,7 +197,21 @@ class AgentGenerateCompletionManager:
         content = ""
         if chat_completion.choices and len(chat_completion.choices) > 0:
             message = chat_completion.choices[0].message
-            content = message.content or ""
+
+            if response_schema:
+                # When using structured output, extract from tool call arguments
+                if message.tool_calls and len(message.tool_calls) > 0:
+                    # The tool call arguments contain the structured output as JSON string
+                    content = message.tool_calls[0].function.arguments
+                else:
+                    # Fallback: some providers may return in content even with tool forcing
+                    content = message.content or ""
+                    logger.warning(
+                        "Expected tool call for structured output but got content response",
+                        extra={"agent_id": str(agent_id), "content_length": len(content)},
+                    )
+            else:
+                content = message.content or ""
 
         # 9. Extract usage statistics
         usage = llm_client.extract_usage_statistics(response_data, llm_config)