"""Manager for handling direct LLM completions using agent configuration.""" import json from typing import TYPE_CHECKING, Any, Dict, Optional from letta.errors import HandleNotFoundError, LLMError from letta.llm_api.llm_client import LLMClient from letta.log import get_logger from letta.orm.errors import NoResultFound from letta.schemas.enums import AgentType, MessageRole from letta.schemas.letta_message_content import TextContent from letta.schemas.message import Message from letta.schemas.usage import LettaUsageStatistics # Tool name used for structured output via tool forcing STRUCTURED_OUTPUT_TOOL_NAME = "structured_output" if TYPE_CHECKING: from letta.orm import User from letta.schemas.llm_config import LLMConfig from letta.server.server import SyncServer logger = get_logger(__name__) def _schema_to_tool_definition(schema: Dict[str, Any]) -> Dict[str, Any]: """ Convert a JSON schema into a tool definition for forced tool calling. Args: schema: JSON schema object with 'properties' and optionally 'required' Returns: Tool definition dict compatible with OpenAI/Anthropic function calling format """ return { "name": STRUCTURED_OUTPUT_TOOL_NAME, "description": "Returns a structured response matching the requested schema.", "parameters": { "type": "object", "properties": schema.get("properties", {}), "required": schema.get("required", list(schema.get("properties", {}).keys())), }, } class GenerateResponse: """Response from direct LLM generation.""" def __init__(self, content: str, model: str, usage: LettaUsageStatistics): self.content = content self.model = model self.usage = usage class AgentGenerateCompletionManager: """Manager for handling direct LLM completions using agent configuration.""" def __init__(self, server: "SyncServer"): """ Initialize the agent generate completion manager. Args: server: The SyncServer instance for accessing managers """ self.server = server self.agent_manager = server.agent_manager self.provider_manager = server.provider_manager async def generate_completion_with_agent_config_async( self, agent_id: str, prompt: str, actor: "User", system_prompt: Optional[str] = None, override_model: Optional[str] = None, response_schema: Optional[Dict[str, Any]] = None, ) -> GenerateResponse: """ Generate a completion directly from the LLM provider using the agent's configuration. This method makes a direct request to the LLM provider without any agent processing: - No memory or context retrieval - No tool calling (unless response_schema is provided) - No message persistence - No agent state modification Args: agent_id: The agent ID whose configuration to use prompt: The prompt/message to send to the LLM actor: The user making the request system_prompt: Optional system prompt to prepend to the conversation override_model: Optional model handle to override the agent's default (e.g., 'openai/gpt-4', 'anthropic/claude-3-5-sonnet') response_schema: Optional JSON schema for structured output. When provided, the LLM will be forced to return a response matching this schema via tool calling. Returns: GenerateResponse with content, model, and usage statistics. When response_schema is provided, content will be the JSON string matching the schema. Raises: NoResultFound: If agent not found HandleNotFoundError: If override_model is invalid LLMError: If LLM provider error occurs """ # 1. Validate agent exists and user has access agent = await self.agent_manager.get_agent_by_id_async( agent_id, actor, include_relationships=[], ) # 2. Get LLM config (with optional override) llm_config: "LLMConfig" = agent.llm_config if override_model: # Get full LLM config for the override model # This ensures we get the right provider, endpoint, credentials, etc. llm_config = await self.server.get_llm_config_from_handle_async( actor=actor, handle=override_model, ) logger.info( f"Generating completion for agent {agent_id}", extra={ "agent_id": str(agent_id), "override_model": override_model, "prompt_length": len(prompt), "has_system_prompt": system_prompt is not None, "has_response_schema": response_schema is not None, "model": llm_config.model, }, ) # 3. Build messages from prompt and optional system_prompt letta_messages = [] # Always add a system message (required by some providers like Anthropic) # Use provided system_prompt or minimal default (empty strings not allowed with cache_control) letta_messages.append( Message( role=MessageRole.system, content=[TextContent(text=system_prompt if system_prompt else "You are a helpful assistant.")], ) ) # Add user prompt letta_messages.append( Message( role=MessageRole.user, content=[TextContent(text=prompt)], ) ) # 4. Create LLM client for the provider llm_client = LLMClient.create( provider_type=llm_config.model_endpoint_type, actor=actor, ) if llm_client is None: raise LLMError(f"Unsupported provider type: {llm_config.model_endpoint_type}") # 5. Build request data # If response_schema is provided, create a tool and force the model to call it tools = None force_tool_call = None if response_schema: tools = [_schema_to_tool_definition(response_schema)] force_tool_call = STRUCTURED_OUTPUT_TOOL_NAME # TODO: create a separate agent type effective_agent_type = AgentType.split_thread_agent if response_schema else agent.agent_type request_data = llm_client.build_request_data( agent_type=effective_agent_type, messages=letta_messages, llm_config=llm_config, tools=tools, force_tool_call=force_tool_call, ) # 6. Make direct LLM request response_data = await llm_client.request_async(request_data, llm_config) # 7. Convert to standard chat completion format chat_completion = await llm_client.convert_response_to_chat_completion( response_data, letta_messages, llm_config, ) # 8. Extract response content content = "" if chat_completion.choices and len(chat_completion.choices) > 0: message = chat_completion.choices[0].message if response_schema: # When using structured output, extract from tool call arguments if message.tool_calls and len(message.tool_calls) > 0: # The tool call arguments contain the structured output as JSON string content = message.tool_calls[0].function.arguments else: # Fallback: some providers may return in content even with tool forcing content = message.content or "" logger.warning( "Expected tool call for structured output but got content response", extra={"agent_id": str(agent_id), "content_length": len(content)}, ) else: content = message.content or "" # 9. Extract usage statistics usage = llm_client.extract_usage_statistics(response_data, llm_config) # 10. Build and return response return GenerateResponse( content=content, model=llm_config.model, usage=usage, )