feat: agent generate endpoint (#9304)
* base * update * clean up * update
This commit is contained in:
@@ -33671,6 +33671,19 @@
|
||||
],
|
||||
"title": "Override Model",
|
||||
"description": "Model handle to use instead of agent's default (e.g., 'openai/gpt-4', 'anthropic/claude-3-5-sonnet')"
|
||||
},
|
||||
"response_schema": {
|
||||
"anyOf": [
|
||||
{
|
||||
"additionalProperties": true,
|
||||
"type": "object"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"title": "Response Schema",
|
||||
"description": "JSON schema for structured output. When provided, the LLM will be forced to return a response matching this schema via tool calling. The schema should follow JSON Schema format with 'properties' and optionally 'required' fields."
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
|
||||
@@ -496,7 +496,14 @@ class AnthropicClient(LLMClientBase):
|
||||
}
|
||||
|
||||
# Extended Thinking
|
||||
if self.is_reasoning_model(llm_config) and llm_config.enable_reasoner:
|
||||
# Note: Anthropic does not allow thinking when forcing tool use with split_thread_agent
|
||||
should_enable_thinking = (
|
||||
self.is_reasoning_model(llm_config)
|
||||
and llm_config.enable_reasoner
|
||||
and not (agent_type == AgentType.split_thread_agent and force_tool_call is not None)
|
||||
)
|
||||
|
||||
if should_enable_thinking:
|
||||
# Opus 4.6 uses Auto Thinking (no budget tokens)
|
||||
if llm_config.model.startswith("claude-opus-4-6"):
|
||||
data["thinking"] = {
|
||||
@@ -556,7 +563,12 @@ class AnthropicClient(LLMClientBase):
|
||||
tool_choice = None
|
||||
elif self.is_reasoning_model(llm_config) and llm_config.enable_reasoner or agent_type == AgentType.letta_v1_agent:
|
||||
# NOTE: reasoning models currently do not allow for `any`
|
||||
# NOTE: react agents should always have auto on, since the precense/absense of tool calls controls chaining
|
||||
# NOTE: react agents should always have at least auto on, since the precense/absense of tool calls controls chaining
|
||||
if agent_type == AgentType.split_thread_agent and force_tool_call is not None:
|
||||
tool_choice = {"type": "tool", "name": force_tool_call, "disable_parallel_tool_use": True}
|
||||
# When forcing a specific tool, only include that tool
|
||||
tools_for_request = [OpenAITool(function=f) for f in tools if f["name"] == force_tool_call]
|
||||
else:
|
||||
tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
|
||||
tools_for_request = [OpenAITool(function=f) for f in tools]
|
||||
elif force_tool_call is not None:
|
||||
|
||||
@@ -103,6 +103,15 @@ class GenerateRequest(BaseModel):
|
||||
description="Model handle to use instead of agent's default (e.g., 'openai/gpt-4', 'anthropic/claude-3-5-sonnet')",
|
||||
)
|
||||
|
||||
response_schema: Optional[Dict[str, Any]] = Field(
|
||||
None,
|
||||
description=(
|
||||
"JSON schema for structured output. When provided, the LLM will be forced to return "
|
||||
"a response matching this schema via tool calling. The schema should follow JSON Schema "
|
||||
"format with 'properties' and optionally 'required' fields."
|
||||
),
|
||||
)
|
||||
|
||||
@field_validator("prompt")
|
||||
@classmethod
|
||||
def validate_prompt_not_empty(cls, v: str) -> str:
|
||||
@@ -1876,6 +1885,7 @@ async def generate_completion(
|
||||
system_prompt=request.system_prompt,
|
||||
actor=actor,
|
||||
override_model=request.override_model,
|
||||
response_schema=request.response_schema,
|
||||
)
|
||||
except NoResultFound:
|
||||
raise HTTPException(status_code=404, detail=f"Agent with ID {agent_id} not found")
|
||||
|
||||
@@ -1,16 +1,20 @@
|
||||
"""Manager for handling direct LLM completions using agent configuration."""
|
||||
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
import json
|
||||
from typing import TYPE_CHECKING, Any, Dict, Optional
|
||||
|
||||
from letta.errors import HandleNotFoundError, LLMError
|
||||
from letta.llm_api.llm_client import LLMClient
|
||||
from letta.log import get_logger
|
||||
from letta.orm.errors import NoResultFound
|
||||
from letta.schemas.enums import MessageRole
|
||||
from letta.schemas.enums import AgentType, MessageRole
|
||||
from letta.schemas.letta_message_content import TextContent
|
||||
from letta.schemas.message import Message
|
||||
from letta.schemas.usage import LettaUsageStatistics
|
||||
|
||||
# Tool name used for structured output via tool forcing
|
||||
STRUCTURED_OUTPUT_TOOL_NAME = "structured_output"
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from letta.orm import User
|
||||
from letta.schemas.llm_config import LLMConfig
|
||||
@@ -19,6 +23,27 @@ if TYPE_CHECKING:
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def _schema_to_tool_definition(schema: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Convert a JSON schema into a tool definition for forced tool calling.
|
||||
|
||||
Args:
|
||||
schema: JSON schema object with 'properties' and optionally 'required'
|
||||
|
||||
Returns:
|
||||
Tool definition dict compatible with OpenAI/Anthropic function calling format
|
||||
"""
|
||||
return {
|
||||
"name": STRUCTURED_OUTPUT_TOOL_NAME,
|
||||
"description": "Returns a structured response matching the requested schema.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": schema.get("properties", {}),
|
||||
"required": schema.get("required", list(schema.get("properties", {}).keys())),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class GenerateResponse:
|
||||
"""Response from direct LLM generation."""
|
||||
|
||||
@@ -49,13 +74,14 @@ class AgentGenerateCompletionManager:
|
||||
actor: "User",
|
||||
system_prompt: Optional[str] = None,
|
||||
override_model: Optional[str] = None,
|
||||
response_schema: Optional[Dict[str, Any]] = None,
|
||||
) -> GenerateResponse:
|
||||
"""
|
||||
Generate a completion directly from the LLM provider using the agent's configuration.
|
||||
|
||||
This method makes a direct request to the LLM provider without any agent processing:
|
||||
- No memory or context retrieval
|
||||
- No tool calling
|
||||
- No tool calling (unless response_schema is provided)
|
||||
- No message persistence
|
||||
- No agent state modification
|
||||
|
||||
@@ -66,9 +92,14 @@ class AgentGenerateCompletionManager:
|
||||
system_prompt: Optional system prompt to prepend to the conversation
|
||||
override_model: Optional model handle to override the agent's default
|
||||
(e.g., 'openai/gpt-4', 'anthropic/claude-3-5-sonnet')
|
||||
response_schema: Optional JSON schema for structured output. When provided,
|
||||
the LLM will be forced to return a response matching this
|
||||
schema via tool calling.
|
||||
|
||||
Returns:
|
||||
GenerateResponse with content, model, and usage statistics
|
||||
GenerateResponse with content, model, and usage statistics.
|
||||
When response_schema is provided, content will be the JSON string
|
||||
matching the schema.
|
||||
|
||||
Raises:
|
||||
NoResultFound: If agent not found
|
||||
@@ -99,6 +130,7 @@ class AgentGenerateCompletionManager:
|
||||
"override_model": override_model,
|
||||
"prompt_length": len(prompt),
|
||||
"has_system_prompt": system_prompt is not None,
|
||||
"has_response_schema": response_schema is not None,
|
||||
"model": llm_config.model,
|
||||
},
|
||||
)
|
||||
@@ -132,13 +164,23 @@ class AgentGenerateCompletionManager:
|
||||
if llm_client is None:
|
||||
raise LLMError(f"Unsupported provider type: {llm_config.model_endpoint_type}")
|
||||
|
||||
# 5. Build request data (no tools, no function calling)
|
||||
# 5. Build request data
|
||||
# If response_schema is provided, create a tool and force the model to call it
|
||||
tools = None
|
||||
force_tool_call = None
|
||||
if response_schema:
|
||||
tools = [_schema_to_tool_definition(response_schema)]
|
||||
force_tool_call = STRUCTURED_OUTPUT_TOOL_NAME
|
||||
|
||||
# TODO: create a separate agent type
|
||||
effective_agent_type = AgentType.split_thread_agent if response_schema else agent.agent_type
|
||||
|
||||
request_data = llm_client.build_request_data(
|
||||
agent_type=agent.agent_type,
|
||||
agent_type=effective_agent_type,
|
||||
messages=letta_messages,
|
||||
llm_config=llm_config,
|
||||
tools=None, # No tools for direct generation
|
||||
force_tool_call=None,
|
||||
tools=tools,
|
||||
force_tool_call=force_tool_call,
|
||||
)
|
||||
|
||||
# 6. Make direct LLM request
|
||||
@@ -155,6 +197,20 @@ class AgentGenerateCompletionManager:
|
||||
content = ""
|
||||
if chat_completion.choices and len(chat_completion.choices) > 0:
|
||||
message = chat_completion.choices[0].message
|
||||
|
||||
if response_schema:
|
||||
# When using structured output, extract from tool call arguments
|
||||
if message.tool_calls and len(message.tool_calls) > 0:
|
||||
# The tool call arguments contain the structured output as JSON string
|
||||
content = message.tool_calls[0].function.arguments
|
||||
else:
|
||||
# Fallback: some providers may return in content even with tool forcing
|
||||
content = message.content or ""
|
||||
logger.warning(
|
||||
"Expected tool call for structured output but got content response",
|
||||
extra={"agent_id": str(agent_id), "content_length": len(content)},
|
||||
)
|
||||
else:
|
||||
content = message.content or ""
|
||||
|
||||
# 9. Extract usage statistics
|
||||
|
||||
Reference in New Issue
Block a user