feat: agent generate endpoint (#9304)
* base * update * clean up * update
This commit is contained in:
@@ -33671,6 +33671,19 @@
|
|||||||
],
|
],
|
||||||
"title": "Override Model",
|
"title": "Override Model",
|
||||||
"description": "Model handle to use instead of agent's default (e.g., 'openai/gpt-4', 'anthropic/claude-3-5-sonnet')"
|
"description": "Model handle to use instead of agent's default (e.g., 'openai/gpt-4', 'anthropic/claude-3-5-sonnet')"
|
||||||
|
},
|
||||||
|
"response_schema": {
|
||||||
|
"anyOf": [
|
||||||
|
{
|
||||||
|
"additionalProperties": true,
|
||||||
|
"type": "object"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Response Schema",
|
||||||
|
"description": "JSON schema for structured output. When provided, the LLM will be forced to return a response matching this schema via tool calling. The schema should follow JSON Schema format with 'properties' and optionally 'required' fields."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
|||||||
@@ -62,7 +62,7 @@ class AnthropicClient(LLMClientBase):
|
|||||||
def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
||||||
client = self._get_anthropic_client(llm_config, async_client=False)
|
client = self._get_anthropic_client(llm_config, async_client=False)
|
||||||
betas: list[str] = []
|
betas: list[str] = []
|
||||||
|
|
||||||
# Opus 4.6 Auto Thinking
|
# Opus 4.6 Auto Thinking
|
||||||
if llm_config.enable_reasoner:
|
if llm_config.enable_reasoner:
|
||||||
if llm_config.model.startswith("claude-opus-4-6"):
|
if llm_config.model.startswith("claude-opus-4-6"):
|
||||||
@@ -496,7 +496,14 @@ class AnthropicClient(LLMClientBase):
|
|||||||
}
|
}
|
||||||
|
|
||||||
# Extended Thinking
|
# Extended Thinking
|
||||||
if self.is_reasoning_model(llm_config) and llm_config.enable_reasoner:
|
# Note: Anthropic does not allow thinking when forcing tool use with split_thread_agent
|
||||||
|
should_enable_thinking = (
|
||||||
|
self.is_reasoning_model(llm_config)
|
||||||
|
and llm_config.enable_reasoner
|
||||||
|
and not (agent_type == AgentType.split_thread_agent and force_tool_call is not None)
|
||||||
|
)
|
||||||
|
|
||||||
|
if should_enable_thinking:
|
||||||
# Opus 4.6 uses Auto Thinking (no budget tokens)
|
# Opus 4.6 uses Auto Thinking (no budget tokens)
|
||||||
if llm_config.model.startswith("claude-opus-4-6"):
|
if llm_config.model.startswith("claude-opus-4-6"):
|
||||||
data["thinking"] = {
|
data["thinking"] = {
|
||||||
@@ -556,9 +563,14 @@ class AnthropicClient(LLMClientBase):
|
|||||||
tool_choice = None
|
tool_choice = None
|
||||||
elif self.is_reasoning_model(llm_config) and llm_config.enable_reasoner or agent_type == AgentType.letta_v1_agent:
|
elif self.is_reasoning_model(llm_config) and llm_config.enable_reasoner or agent_type == AgentType.letta_v1_agent:
|
||||||
# NOTE: reasoning models currently do not allow for `any`
|
# NOTE: reasoning models currently do not allow for `any`
|
||||||
# NOTE: react agents should always have auto on, since the precense/absense of tool calls controls chaining
|
# NOTE: react agents should always have at least auto on, since the precense/absense of tool calls controls chaining
|
||||||
tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
|
if agent_type == AgentType.split_thread_agent and force_tool_call is not None:
|
||||||
tools_for_request = [OpenAITool(function=f) for f in tools]
|
tool_choice = {"type": "tool", "name": force_tool_call, "disable_parallel_tool_use": True}
|
||||||
|
# When forcing a specific tool, only include that tool
|
||||||
|
tools_for_request = [OpenAITool(function=f) for f in tools if f["name"] == force_tool_call]
|
||||||
|
else:
|
||||||
|
tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
|
||||||
|
tools_for_request = [OpenAITool(function=f) for f in tools]
|
||||||
elif force_tool_call is not None:
|
elif force_tool_call is not None:
|
||||||
tool_choice = {"type": "tool", "name": force_tool_call, "disable_parallel_tool_use": True}
|
tool_choice = {"type": "tool", "name": force_tool_call, "disable_parallel_tool_use": True}
|
||||||
tools_for_request = [OpenAITool(function=f) for f in tools if f["name"] == force_tool_call]
|
tools_for_request = [OpenAITool(function=f) for f in tools if f["name"] == force_tool_call]
|
||||||
|
|||||||
@@ -103,6 +103,15 @@ class GenerateRequest(BaseModel):
|
|||||||
description="Model handle to use instead of agent's default (e.g., 'openai/gpt-4', 'anthropic/claude-3-5-sonnet')",
|
description="Model handle to use instead of agent's default (e.g., 'openai/gpt-4', 'anthropic/claude-3-5-sonnet')",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
response_schema: Optional[Dict[str, Any]] = Field(
|
||||||
|
None,
|
||||||
|
description=(
|
||||||
|
"JSON schema for structured output. When provided, the LLM will be forced to return "
|
||||||
|
"a response matching this schema via tool calling. The schema should follow JSON Schema "
|
||||||
|
"format with 'properties' and optionally 'required' fields."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
@field_validator("prompt")
|
@field_validator("prompt")
|
||||||
@classmethod
|
@classmethod
|
||||||
def validate_prompt_not_empty(cls, v: str) -> str:
|
def validate_prompt_not_empty(cls, v: str) -> str:
|
||||||
@@ -1876,6 +1885,7 @@ async def generate_completion(
|
|||||||
system_prompt=request.system_prompt,
|
system_prompt=request.system_prompt,
|
||||||
actor=actor,
|
actor=actor,
|
||||||
override_model=request.override_model,
|
override_model=request.override_model,
|
||||||
|
response_schema=request.response_schema,
|
||||||
)
|
)
|
||||||
except NoResultFound:
|
except NoResultFound:
|
||||||
raise HTTPException(status_code=404, detail=f"Agent with ID {agent_id} not found")
|
raise HTTPException(status_code=404, detail=f"Agent with ID {agent_id} not found")
|
||||||
|
|||||||
@@ -1,16 +1,20 @@
|
|||||||
"""Manager for handling direct LLM completions using agent configuration."""
|
"""Manager for handling direct LLM completions using agent configuration."""
|
||||||
|
|
||||||
from typing import TYPE_CHECKING, Optional
|
import json
|
||||||
|
from typing import TYPE_CHECKING, Any, Dict, Optional
|
||||||
|
|
||||||
from letta.errors import HandleNotFoundError, LLMError
|
from letta.errors import HandleNotFoundError, LLMError
|
||||||
from letta.llm_api.llm_client import LLMClient
|
from letta.llm_api.llm_client import LLMClient
|
||||||
from letta.log import get_logger
|
from letta.log import get_logger
|
||||||
from letta.orm.errors import NoResultFound
|
from letta.orm.errors import NoResultFound
|
||||||
from letta.schemas.enums import MessageRole
|
from letta.schemas.enums import AgentType, MessageRole
|
||||||
from letta.schemas.letta_message_content import TextContent
|
from letta.schemas.letta_message_content import TextContent
|
||||||
from letta.schemas.message import Message
|
from letta.schemas.message import Message
|
||||||
from letta.schemas.usage import LettaUsageStatistics
|
from letta.schemas.usage import LettaUsageStatistics
|
||||||
|
|
||||||
|
# Tool name used for structured output via tool forcing
|
||||||
|
STRUCTURED_OUTPUT_TOOL_NAME = "structured_output"
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from letta.orm import User
|
from letta.orm import User
|
||||||
from letta.schemas.llm_config import LLMConfig
|
from letta.schemas.llm_config import LLMConfig
|
||||||
@@ -19,6 +23,27 @@ if TYPE_CHECKING:
|
|||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _schema_to_tool_definition(schema: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Convert a JSON schema into a tool definition for forced tool calling.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
schema: JSON schema object with 'properties' and optionally 'required'
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tool definition dict compatible with OpenAI/Anthropic function calling format
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
"name": STRUCTURED_OUTPUT_TOOL_NAME,
|
||||||
|
"description": "Returns a structured response matching the requested schema.",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": schema.get("properties", {}),
|
||||||
|
"required": schema.get("required", list(schema.get("properties", {}).keys())),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class GenerateResponse:
|
class GenerateResponse:
|
||||||
"""Response from direct LLM generation."""
|
"""Response from direct LLM generation."""
|
||||||
|
|
||||||
@@ -49,13 +74,14 @@ class AgentGenerateCompletionManager:
|
|||||||
actor: "User",
|
actor: "User",
|
||||||
system_prompt: Optional[str] = None,
|
system_prompt: Optional[str] = None,
|
||||||
override_model: Optional[str] = None,
|
override_model: Optional[str] = None,
|
||||||
|
response_schema: Optional[Dict[str, Any]] = None,
|
||||||
) -> GenerateResponse:
|
) -> GenerateResponse:
|
||||||
"""
|
"""
|
||||||
Generate a completion directly from the LLM provider using the agent's configuration.
|
Generate a completion directly from the LLM provider using the agent's configuration.
|
||||||
|
|
||||||
This method makes a direct request to the LLM provider without any agent processing:
|
This method makes a direct request to the LLM provider without any agent processing:
|
||||||
- No memory or context retrieval
|
- No memory or context retrieval
|
||||||
- No tool calling
|
- No tool calling (unless response_schema is provided)
|
||||||
- No message persistence
|
- No message persistence
|
||||||
- No agent state modification
|
- No agent state modification
|
||||||
|
|
||||||
@@ -66,9 +92,14 @@ class AgentGenerateCompletionManager:
|
|||||||
system_prompt: Optional system prompt to prepend to the conversation
|
system_prompt: Optional system prompt to prepend to the conversation
|
||||||
override_model: Optional model handle to override the agent's default
|
override_model: Optional model handle to override the agent's default
|
||||||
(e.g., 'openai/gpt-4', 'anthropic/claude-3-5-sonnet')
|
(e.g., 'openai/gpt-4', 'anthropic/claude-3-5-sonnet')
|
||||||
|
response_schema: Optional JSON schema for structured output. When provided,
|
||||||
|
the LLM will be forced to return a response matching this
|
||||||
|
schema via tool calling.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
GenerateResponse with content, model, and usage statistics
|
GenerateResponse with content, model, and usage statistics.
|
||||||
|
When response_schema is provided, content will be the JSON string
|
||||||
|
matching the schema.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
NoResultFound: If agent not found
|
NoResultFound: If agent not found
|
||||||
@@ -99,6 +130,7 @@ class AgentGenerateCompletionManager:
|
|||||||
"override_model": override_model,
|
"override_model": override_model,
|
||||||
"prompt_length": len(prompt),
|
"prompt_length": len(prompt),
|
||||||
"has_system_prompt": system_prompt is not None,
|
"has_system_prompt": system_prompt is not None,
|
||||||
|
"has_response_schema": response_schema is not None,
|
||||||
"model": llm_config.model,
|
"model": llm_config.model,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@@ -132,13 +164,23 @@ class AgentGenerateCompletionManager:
|
|||||||
if llm_client is None:
|
if llm_client is None:
|
||||||
raise LLMError(f"Unsupported provider type: {llm_config.model_endpoint_type}")
|
raise LLMError(f"Unsupported provider type: {llm_config.model_endpoint_type}")
|
||||||
|
|
||||||
# 5. Build request data (no tools, no function calling)
|
# 5. Build request data
|
||||||
|
# If response_schema is provided, create a tool and force the model to call it
|
||||||
|
tools = None
|
||||||
|
force_tool_call = None
|
||||||
|
if response_schema:
|
||||||
|
tools = [_schema_to_tool_definition(response_schema)]
|
||||||
|
force_tool_call = STRUCTURED_OUTPUT_TOOL_NAME
|
||||||
|
|
||||||
|
# TODO: create a separate agent type
|
||||||
|
effective_agent_type = AgentType.split_thread_agent if response_schema else agent.agent_type
|
||||||
|
|
||||||
request_data = llm_client.build_request_data(
|
request_data = llm_client.build_request_data(
|
||||||
agent_type=agent.agent_type,
|
agent_type=effective_agent_type,
|
||||||
messages=letta_messages,
|
messages=letta_messages,
|
||||||
llm_config=llm_config,
|
llm_config=llm_config,
|
||||||
tools=None, # No tools for direct generation
|
tools=tools,
|
||||||
force_tool_call=None,
|
force_tool_call=force_tool_call,
|
||||||
)
|
)
|
||||||
|
|
||||||
# 6. Make direct LLM request
|
# 6. Make direct LLM request
|
||||||
@@ -155,7 +197,21 @@ class AgentGenerateCompletionManager:
|
|||||||
content = ""
|
content = ""
|
||||||
if chat_completion.choices and len(chat_completion.choices) > 0:
|
if chat_completion.choices and len(chat_completion.choices) > 0:
|
||||||
message = chat_completion.choices[0].message
|
message = chat_completion.choices[0].message
|
||||||
content = message.content or ""
|
|
||||||
|
if response_schema:
|
||||||
|
# When using structured output, extract from tool call arguments
|
||||||
|
if message.tool_calls and len(message.tool_calls) > 0:
|
||||||
|
# The tool call arguments contain the structured output as JSON string
|
||||||
|
content = message.tool_calls[0].function.arguments
|
||||||
|
else:
|
||||||
|
# Fallback: some providers may return in content even with tool forcing
|
||||||
|
content = message.content or ""
|
||||||
|
logger.warning(
|
||||||
|
"Expected tool call for structured output but got content response",
|
||||||
|
extra={"agent_id": str(agent_id), "content_length": len(content)},
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
content = message.content or ""
|
||||||
|
|
||||||
# 9. Extract usage statistics
|
# 9. Extract usage statistics
|
||||||
usage = llm_client.extract_usage_statistics(response_data, llm_config)
|
usage = llm_client.extract_usage_statistics(response_data, llm_config)
|
||||||
|
|||||||
Reference in New Issue
Block a user