Files
letta-server/letta/services/agent_generate_completion_manager.py
jnjpng 0bdedb3c0f feat: agent generate endpoint (#9304)
* base

* update

* clean up

* update
2026-02-24 10:52:06 -08:00

225 lines
8.2 KiB
Python

"""Manager for handling direct LLM completions using agent configuration."""
import json
from typing import TYPE_CHECKING, Any, Dict, Optional
from letta.errors import HandleNotFoundError, LLMError
from letta.llm_api.llm_client import LLMClient
from letta.log import get_logger
from letta.orm.errors import NoResultFound
from letta.schemas.enums import AgentType, MessageRole
from letta.schemas.letta_message_content import TextContent
from letta.schemas.message import Message
from letta.schemas.usage import LettaUsageStatistics
# Tool name used for structured output via tool forcing
STRUCTURED_OUTPUT_TOOL_NAME = "structured_output"
if TYPE_CHECKING:
from letta.orm import User
from letta.schemas.llm_config import LLMConfig
from letta.server.server import SyncServer
logger = get_logger(__name__)
def _schema_to_tool_definition(schema: Dict[str, Any]) -> Dict[str, Any]:
"""
Convert a JSON schema into a tool definition for forced tool calling.
Args:
schema: JSON schema object with 'properties' and optionally 'required'
Returns:
Tool definition dict compatible with OpenAI/Anthropic function calling format
"""
return {
"name": STRUCTURED_OUTPUT_TOOL_NAME,
"description": "Returns a structured response matching the requested schema.",
"parameters": {
"type": "object",
"properties": schema.get("properties", {}),
"required": schema.get("required", list(schema.get("properties", {}).keys())),
},
}
class GenerateResponse:
"""Response from direct LLM generation."""
def __init__(self, content: str, model: str, usage: LettaUsageStatistics):
self.content = content
self.model = model
self.usage = usage
class AgentGenerateCompletionManager:
"""Manager for handling direct LLM completions using agent configuration."""
def __init__(self, server: "SyncServer"):
"""
Initialize the agent generate completion manager.
Args:
server: The SyncServer instance for accessing managers
"""
self.server = server
self.agent_manager = server.agent_manager
self.provider_manager = server.provider_manager
async def generate_completion_with_agent_config_async(
self,
agent_id: str,
prompt: str,
actor: "User",
system_prompt: Optional[str] = None,
override_model: Optional[str] = None,
response_schema: Optional[Dict[str, Any]] = None,
) -> GenerateResponse:
"""
Generate a completion directly from the LLM provider using the agent's configuration.
This method makes a direct request to the LLM provider without any agent processing:
- No memory or context retrieval
- No tool calling (unless response_schema is provided)
- No message persistence
- No agent state modification
Args:
agent_id: The agent ID whose configuration to use
prompt: The prompt/message to send to the LLM
actor: The user making the request
system_prompt: Optional system prompt to prepend to the conversation
override_model: Optional model handle to override the agent's default
(e.g., 'openai/gpt-4', 'anthropic/claude-3-5-sonnet')
response_schema: Optional JSON schema for structured output. When provided,
the LLM will be forced to return a response matching this
schema via tool calling.
Returns:
GenerateResponse with content, model, and usage statistics.
When response_schema is provided, content will be the JSON string
matching the schema.
Raises:
NoResultFound: If agent not found
HandleNotFoundError: If override_model is invalid
LLMError: If LLM provider error occurs
"""
# 1. Validate agent exists and user has access
agent = await self.agent_manager.get_agent_by_id_async(
agent_id,
actor,
include_relationships=[],
)
# 2. Get LLM config (with optional override)
llm_config: "LLMConfig" = agent.llm_config
if override_model:
# Get full LLM config for the override model
# This ensures we get the right provider, endpoint, credentials, etc.
llm_config = await self.server.get_llm_config_from_handle_async(
actor=actor,
handle=override_model,
)
logger.info(
f"Generating completion for agent {agent_id}",
extra={
"agent_id": str(agent_id),
"override_model": override_model,
"prompt_length": len(prompt),
"has_system_prompt": system_prompt is not None,
"has_response_schema": response_schema is not None,
"model": llm_config.model,
},
)
# 3. Build messages from prompt and optional system_prompt
letta_messages = []
# Always add a system message (required by some providers like Anthropic)
# Use provided system_prompt or minimal default (empty strings not allowed with cache_control)
letta_messages.append(
Message(
role=MessageRole.system,
content=[TextContent(text=system_prompt if system_prompt else "You are a helpful assistant.")],
)
)
# Add user prompt
letta_messages.append(
Message(
role=MessageRole.user,
content=[TextContent(text=prompt)],
)
)
# 4. Create LLM client for the provider
llm_client = LLMClient.create(
provider_type=llm_config.model_endpoint_type,
actor=actor,
)
if llm_client is None:
raise LLMError(f"Unsupported provider type: {llm_config.model_endpoint_type}")
# 5. Build request data
# If response_schema is provided, create a tool and force the model to call it
tools = None
force_tool_call = None
if response_schema:
tools = [_schema_to_tool_definition(response_schema)]
force_tool_call = STRUCTURED_OUTPUT_TOOL_NAME
# TODO: create a separate agent type
effective_agent_type = AgentType.split_thread_agent if response_schema else agent.agent_type
request_data = llm_client.build_request_data(
agent_type=effective_agent_type,
messages=letta_messages,
llm_config=llm_config,
tools=tools,
force_tool_call=force_tool_call,
)
# 6. Make direct LLM request
response_data = await llm_client.request_async(request_data, llm_config)
# 7. Convert to standard chat completion format
chat_completion = await llm_client.convert_response_to_chat_completion(
response_data,
letta_messages,
llm_config,
)
# 8. Extract response content
content = ""
if chat_completion.choices and len(chat_completion.choices) > 0:
message = chat_completion.choices[0].message
if response_schema:
# When using structured output, extract from tool call arguments
if message.tool_calls and len(message.tool_calls) > 0:
# The tool call arguments contain the structured output as JSON string
content = message.tool_calls[0].function.arguments
else:
# Fallback: some providers may return in content even with tool forcing
content = message.content or ""
logger.warning(
"Expected tool call for structured output but got content response",
extra={"agent_id": str(agent_id), "content_length": len(content)},
)
else:
content = message.content or ""
# 9. Extract usage statistics
usage = llm_client.extract_usage_statistics(response_data, llm_config)
# 10. Build and return response
return GenerateResponse(
content=content,
model=llm_config.model,
usage=usage,
)