diff --git a/fern/openapi.json b/fern/openapi.json index 4a487294..1242ea27 100644 --- a/fern/openapi.json +++ b/fern/openapi.json @@ -33671,6 +33671,19 @@ ], "title": "Override Model", "description": "Model handle to use instead of agent's default (e.g., 'openai/gpt-4', 'anthropic/claude-3-5-sonnet')" + }, + "response_schema": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Response Schema", + "description": "JSON schema for structured output. When provided, the LLM will be forced to return a response matching this schema via tool calling. The schema should follow JSON Schema format with 'properties' and optionally 'required' fields." } }, "type": "object", diff --git a/letta/llm_api/anthropic_client.py b/letta/llm_api/anthropic_client.py index 13851639..383141d9 100644 --- a/letta/llm_api/anthropic_client.py +++ b/letta/llm_api/anthropic_client.py @@ -62,7 +62,7 @@ class AnthropicClient(LLMClientBase): def request(self, request_data: dict, llm_config: LLMConfig) -> dict: client = self._get_anthropic_client(llm_config, async_client=False) betas: list[str] = [] - + # Opus 4.6 Auto Thinking if llm_config.enable_reasoner: if llm_config.model.startswith("claude-opus-4-6"): @@ -496,7 +496,14 @@ class AnthropicClient(LLMClientBase): } # Extended Thinking - if self.is_reasoning_model(llm_config) and llm_config.enable_reasoner: + # Note: Anthropic does not allow thinking when forcing tool use with split_thread_agent + should_enable_thinking = ( + self.is_reasoning_model(llm_config) + and llm_config.enable_reasoner + and not (agent_type == AgentType.split_thread_agent and force_tool_call is not None) + ) + + if should_enable_thinking: # Opus 4.6 uses Auto Thinking (no budget tokens) if llm_config.model.startswith("claude-opus-4-6"): data["thinking"] = { @@ -556,9 +563,14 @@ class AnthropicClient(LLMClientBase): tool_choice = None elif self.is_reasoning_model(llm_config) and llm_config.enable_reasoner or agent_type == AgentType.letta_v1_agent: # NOTE: reasoning models currently do not allow for `any` - # NOTE: react agents should always have auto on, since the precense/absense of tool calls controls chaining - tool_choice = {"type": "auto", "disable_parallel_tool_use": True} - tools_for_request = [OpenAITool(function=f) for f in tools] + # NOTE: react agents should always have at least auto on, since the precense/absense of tool calls controls chaining + if agent_type == AgentType.split_thread_agent and force_tool_call is not None: + tool_choice = {"type": "tool", "name": force_tool_call, "disable_parallel_tool_use": True} + # When forcing a specific tool, only include that tool + tools_for_request = [OpenAITool(function=f) for f in tools if f["name"] == force_tool_call] + else: + tool_choice = {"type": "auto", "disable_parallel_tool_use": True} + tools_for_request = [OpenAITool(function=f) for f in tools] elif force_tool_call is not None: tool_choice = {"type": "tool", "name": force_tool_call, "disable_parallel_tool_use": True} tools_for_request = [OpenAITool(function=f) for f in tools if f["name"] == force_tool_call] diff --git a/letta/server/rest_api/routers/v1/agents.py b/letta/server/rest_api/routers/v1/agents.py index 34f4a27e..0adb1620 100644 --- a/letta/server/rest_api/routers/v1/agents.py +++ b/letta/server/rest_api/routers/v1/agents.py @@ -103,6 +103,15 @@ class GenerateRequest(BaseModel): description="Model handle to use instead of agent's default (e.g., 'openai/gpt-4', 'anthropic/claude-3-5-sonnet')", ) + response_schema: Optional[Dict[str, Any]] = Field( + None, + description=( + "JSON schema for structured output. When provided, the LLM will be forced to return " + "a response matching this schema via tool calling. The schema should follow JSON Schema " + "format with 'properties' and optionally 'required' fields." + ), + ) + @field_validator("prompt") @classmethod def validate_prompt_not_empty(cls, v: str) -> str: @@ -1876,6 +1885,7 @@ async def generate_completion( system_prompt=request.system_prompt, actor=actor, override_model=request.override_model, + response_schema=request.response_schema, ) except NoResultFound: raise HTTPException(status_code=404, detail=f"Agent with ID {agent_id} not found") diff --git a/letta/services/agent_generate_completion_manager.py b/letta/services/agent_generate_completion_manager.py index e11ee8bd..9f5fd1f4 100644 --- a/letta/services/agent_generate_completion_manager.py +++ b/letta/services/agent_generate_completion_manager.py @@ -1,16 +1,20 @@ """Manager for handling direct LLM completions using agent configuration.""" -from typing import TYPE_CHECKING, Optional +import json +from typing import TYPE_CHECKING, Any, Dict, Optional from letta.errors import HandleNotFoundError, LLMError from letta.llm_api.llm_client import LLMClient from letta.log import get_logger from letta.orm.errors import NoResultFound -from letta.schemas.enums import MessageRole +from letta.schemas.enums import AgentType, MessageRole from letta.schemas.letta_message_content import TextContent from letta.schemas.message import Message from letta.schemas.usage import LettaUsageStatistics +# Tool name used for structured output via tool forcing +STRUCTURED_OUTPUT_TOOL_NAME = "structured_output" + if TYPE_CHECKING: from letta.orm import User from letta.schemas.llm_config import LLMConfig @@ -19,6 +23,27 @@ if TYPE_CHECKING: logger = get_logger(__name__) +def _schema_to_tool_definition(schema: Dict[str, Any]) -> Dict[str, Any]: + """ + Convert a JSON schema into a tool definition for forced tool calling. + + Args: + schema: JSON schema object with 'properties' and optionally 'required' + + Returns: + Tool definition dict compatible with OpenAI/Anthropic function calling format + """ + return { + "name": STRUCTURED_OUTPUT_TOOL_NAME, + "description": "Returns a structured response matching the requested schema.", + "parameters": { + "type": "object", + "properties": schema.get("properties", {}), + "required": schema.get("required", list(schema.get("properties", {}).keys())), + }, + } + + class GenerateResponse: """Response from direct LLM generation.""" @@ -49,13 +74,14 @@ class AgentGenerateCompletionManager: actor: "User", system_prompt: Optional[str] = None, override_model: Optional[str] = None, + response_schema: Optional[Dict[str, Any]] = None, ) -> GenerateResponse: """ Generate a completion directly from the LLM provider using the agent's configuration. This method makes a direct request to the LLM provider without any agent processing: - No memory or context retrieval - - No tool calling + - No tool calling (unless response_schema is provided) - No message persistence - No agent state modification @@ -66,9 +92,14 @@ class AgentGenerateCompletionManager: system_prompt: Optional system prompt to prepend to the conversation override_model: Optional model handle to override the agent's default (e.g., 'openai/gpt-4', 'anthropic/claude-3-5-sonnet') + response_schema: Optional JSON schema for structured output. When provided, + the LLM will be forced to return a response matching this + schema via tool calling. Returns: - GenerateResponse with content, model, and usage statistics + GenerateResponse with content, model, and usage statistics. + When response_schema is provided, content will be the JSON string + matching the schema. Raises: NoResultFound: If agent not found @@ -99,6 +130,7 @@ class AgentGenerateCompletionManager: "override_model": override_model, "prompt_length": len(prompt), "has_system_prompt": system_prompt is not None, + "has_response_schema": response_schema is not None, "model": llm_config.model, }, ) @@ -132,13 +164,23 @@ class AgentGenerateCompletionManager: if llm_client is None: raise LLMError(f"Unsupported provider type: {llm_config.model_endpoint_type}") - # 5. Build request data (no tools, no function calling) + # 5. Build request data + # If response_schema is provided, create a tool and force the model to call it + tools = None + force_tool_call = None + if response_schema: + tools = [_schema_to_tool_definition(response_schema)] + force_tool_call = STRUCTURED_OUTPUT_TOOL_NAME + + # TODO: create a separate agent type + effective_agent_type = AgentType.split_thread_agent if response_schema else agent.agent_type + request_data = llm_client.build_request_data( - agent_type=agent.agent_type, + agent_type=effective_agent_type, messages=letta_messages, llm_config=llm_config, - tools=None, # No tools for direct generation - force_tool_call=None, + tools=tools, + force_tool_call=force_tool_call, ) # 6. Make direct LLM request @@ -155,7 +197,21 @@ class AgentGenerateCompletionManager: content = "" if chat_completion.choices and len(chat_completion.choices) > 0: message = chat_completion.choices[0].message - content = message.content or "" + + if response_schema: + # When using structured output, extract from tool call arguments + if message.tool_calls and len(message.tool_calls) > 0: + # The tool call arguments contain the structured output as JSON string + content = message.tool_calls[0].function.arguments + else: + # Fallback: some providers may return in content even with tool forcing + content = message.content or "" + logger.warning( + "Expected tool call for structured output but got content response", + extra={"agent_id": str(agent_id), "content_length": len(content)}, + ) + else: + content = message.content or "" # 9. Extract usage statistics usage = llm_client.extract_usage_statistics(response_data, llm_config)