import asyncio
import json
from datetime import datetime
from typing import Annotated, Any, Dict, List, Literal, Optional, Union

import orjson
from fastapi import APIRouter, Body, Depends, File, Form, Header, HTTPException, Query, Request, UploadFile, status
from fastapi.responses import JSONResponse
from pydantic import BaseModel, ConfigDict, Field, field_validator
from starlette.responses import Response, StreamingResponse

from letta.agents.agent_loop import AgentLoop
from letta.agents.base_agent_v2 import BaseAgentV2
from letta.agents.letta_agent import LettaAgent
from letta.agents.letta_agent_v3 import LettaAgentV3
from letta.constants import DEFAULT_MAX_STEPS, DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG, REDIS_RUN_ID_PREFIX
from letta.data_sources.redis_client import get_redis_client
from letta.errors import (
    HandleNotFoundError,
    LLMError,
    NoActiveRunsToCancelError,
    PendingApprovalError,
)
from letta.groups.sleeptime_multi_agent_v4 import SleeptimeMultiAgentV4
from letta.helpers.datetime_helpers import get_utc_time, get_utc_timestamp_ns
from letta.log import get_logger
from letta.orm.errors import NoResultFound
from letta.otel.context import get_ctx_attributes
from letta.otel.metric_registry import MetricRegistry
from letta.schemas.agent import AgentRelationships, AgentState, CreateAgent, UpdateAgent
from letta.schemas.agent_file import AgentFileSchema, SkillSchema
from letta.schemas.block import BlockResponse, BlockUpdate
from letta.schemas.enums import AgentType, MessageRole, RunStatus
from letta.schemas.file import AgentFileAttachment, PaginatedAgentFiles
from letta.schemas.group import Group
from letta.schemas.job import LettaRequestConfig
from letta.schemas.letta_message import LettaMessageUnion, LettaMessageUpdateUnion, MessageType
from letta.schemas.letta_message_content import TextContent
from letta.schemas.letta_request import LettaAsyncRequest, LettaRequest, LettaStreamingRequest
from letta.schemas.letta_response import LettaResponse, LettaStreamingResponse
from letta.schemas.letta_stop_reason import StopReasonType
from letta.schemas.mcp_server import ToolExecuteRequest
from letta.schemas.memory import (
    ArchivalMemorySearchResponse,
    ArchivalMemorySearchResult,
    ContextWindowOverview,
    CreateArchivalMemory,
    Memory,
)
from letta.schemas.message import Message, MessageCreate, MessageCreateType, MessageSearchRequest, MessageSearchResult
from letta.schemas.passage import Passage
from letta.schemas.run import Run as PydanticRun, RunUpdate
from letta.schemas.source import Source
from letta.schemas.tool import Tool
from letta.schemas.tool_execution_result import ToolExecutionResult
from letta.schemas.usage import LettaUsageStatistics
from letta.schemas.user import User
from letta.serialize_schemas.pydantic_agent_schema import AgentSchema
from letta.server.rest_api.dependencies import HeaderParams, get_headers, get_letta_server
from letta.server.server import SyncServer
from letta.services.lettuce import LettuceClient
from letta.services.run_manager import RunManager
from letta.services.streaming_service import StreamingService
from letta.services.summarizer.summarizer_config import CompactionSettings
from letta.settings import settings
from letta.utils import is_1_0_sdk_version, safe_create_shielded_task, safe_create_task, truncate_file_visible_content
from letta.validators import AgentId, BlockId, FileId, MessageId, SourceId, ToolId

# These can be forward refs, but because Fastapi needs them at runtime the must be imported normally


router = APIRouter(prefix="/agents", tags=["agents"])

logger = get_logger(__name__)


# Schemas for direct LLM generation endpoint
class GenerateRequest(BaseModel):
    """Request for direct LLM generation without agent processing."""

    prompt: str = Field(
        ...,
        description="The prompt/message to send to the LLM",
        min_length=1,
    )

    system_prompt: Optional[str] = Field(
        None,
        description="Optional system prompt to prepend to the conversation",
    )

    override_model: Optional[str] = Field(
        None,
        description="Model handle to use instead of agent's default (e.g., 'openai/gpt-4', 'anthropic/claude-3-5-sonnet')",
    )

    response_schema: Optional[Dict[str, Any]] = Field(
        None,
        description=(
            "JSON schema for structured output. When provided, the LLM will be forced to return "
            "a response matching this schema via tool calling. The schema should follow JSON Schema "
            "format with 'properties' and optionally 'required' fields."
        ),
    )

    @field_validator("prompt")
    @classmethod
    def validate_prompt_not_empty(cls, v: str) -> str:
        """Ensure prompt is not empty or whitespace-only."""
        if not v or not v.strip():
            raise ValueError("prompt cannot be empty or whitespace-only")
        return v


class GenerateResponse(BaseModel):
    """Response from direct LLM generation."""

    content: str = Field(..., description="The LLM's response text")
    model: str = Field(..., description="The model that generated this response")
    usage: LettaUsageStatistics = Field(..., description="Token usage statistics")


@router.get("/", response_model=list[AgentState], operation_id="list_agents")
async def list_agents(
    name: str | None = Query(None, description="Name of the agent"),
    tags: list[str] | None = Query(None, description="List of tags to filter agents by"),
    match_all_tags: bool = Query(
        False,
        description="If True, only returns agents that match ALL given tags. Otherwise, return agents that have ANY of the passed-in tags.",
    ),
    server: SyncServer = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
    before: str | None = Query(None, description="Cursor for pagination"),
    after: str | None = Query(None, description="Cursor for pagination"),
    limit: int | None = Query(50, description="Limit for pagination"),
    query_text: str | None = Query(None, description="Search agents by name"),
    project_id: str | None = Query(None, description="Search agents by project ID - this will default to your default project on cloud"),
    template_id: str | None = Query(None, description="Search agents by template ID"),
    base_template_id: str | None = Query(None, description="Search agents by base template ID"),
    identity_id: str | None = Query(None, description="Search agents by identity ID"),
    identifier_keys: list[str] | None = Query(None, description="Search agents by identifier keys"),
    include_relationships: list[str] | None = Query(
        None,
        description=(
            "Specify which relational fields (e.g., 'tools', 'sources', 'memory') to include in the response. "
            "If not provided, all relationships are loaded by default. "
            "Using this can optimize performance by reducing unnecessary joins."
            "This is a legacy parameter, and no longer supported after 1.0.0 SDK versions."
        ),
        deprecated=True,
    ),
    include: List[AgentRelationships] = Query(
        [],
        description=("Specify which relational fields to include in the response. No relationships are included by default."),
    ),
    order: Literal["asc", "desc"] = Query(
        "desc", description="Sort order for agents by creation time. 'asc' for oldest first, 'desc' for newest first"
    ),
    order_by: Literal["created_at", "last_run_completion"] = Query("created_at", description="Field to sort by"),
    ascending: bool = Query(
        False,
        description="Whether to sort agents oldest to newest (True) or newest to oldest (False, default)",
        deprecated=True,
    ),
    sort_by: str | None = Query(
        "created_at",
        description="Field to sort by. Options: 'created_at' (default), 'last_run_completion'",
        deprecated=True,
    ),
    show_hidden_agents: bool | None = Query(
        False,
        include_in_schema=False,
        description="If set to True, include agents marked as hidden in the results.",
    ),
    last_stop_reason: Optional[StopReasonType] = Query(None, description="Filter agents by their last stop reason."),
):
    """
    Get a list of all agents.
    """

    # Retrieve the actor (user) details
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)

    # Handle backwards compatibility - prefer new parameters over legacy ones
    final_ascending = (order == "asc") if order else ascending
    final_sort_by = order_by if order_by else sort_by
    if include_relationships is None and is_1_0_sdk_version(headers):
        include_relationships = []  # don't default include all if using new SDK version

    # Call list_agents directly without unnecessary dict handling
    return await server.agent_manager.list_agents_async(
        actor=actor,
        name=name,
        before=before,
        after=after,
        limit=limit,
        query_text=query_text,
        tags=tags,
        match_all_tags=match_all_tags,
        project_id=project_id,
        template_id=template_id,
        base_template_id=base_template_id,
        identity_id=identity_id,
        identifier_keys=identifier_keys,
        include_relationships=include_relationships,
        include=include,
        ascending=final_ascending,
        sort_by=final_sort_by,
        show_hidden_agents=show_hidden_agents,
        last_stop_reason=last_stop_reason,
    )


@router.get("/count", response_model=int, operation_id="count_agents")
async def count_agents(
    name: str | None = Query(None, description="Name of the agent"),
    tags: list[str] | None = Query(None, description="List of tags to filter agents by"),
    match_all_tags: bool = Query(
        False,
        description="If True, only counts agents that match ALL given tags. Otherwise, counts agents that have ANY of the passed-in tags.",
    ),
    query_text: str | None = Query(None, description="Search agents by name"),
    project_id: str | None = Query(None, description="Search agents by project ID - this will default to your default project on cloud"),
    template_id: str | None = Query(None, description="Search agents by template ID"),
    base_template_id: str | None = Query(None, description="Search agents by base template ID"),
    identity_id: str | None = Query(None, description="Search agents by identity ID"),
    identifier_keys: list[str] | None = Query(None, description="Search agents by identifier keys"),
    show_hidden_agents: bool | None = Query(
        False,
        include_in_schema=False,
        description="If set to True, include agents marked as hidden in the results.",
    ),
    last_stop_reason: Optional[StopReasonType] = Query(None, description="Filter agents by their last stop reason."),
    server: SyncServer = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Get the total number of agents with optional filtering.
    Supports the same filters as list_agents for consistent querying.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)

    # If no filters are provided, use the simpler size_async method
    if (
        all(
            param is None or param is False
            for param in [name, tags, query_text, project_id, template_id, base_template_id, identity_id, identifier_keys, last_stop_reason]
        )
        and not show_hidden_agents
    ):
        return await server.agent_manager.size_async(actor=actor)

    return await server.agent_manager.count_agents_async(
        actor=actor,
        name=name,
        tags=tags,
        match_all_tags=match_all_tags,
        query_text=query_text,
        project_id=project_id,
        template_id=template_id,
        base_template_id=base_template_id,
        identity_id=identity_id,
        identifier_keys=identifier_keys,
        show_hidden_agents=show_hidden_agents,
        last_stop_reason=last_stop_reason,
    )


class IndentedORJSONResponse(Response):
    media_type = "application/json"

    def render(self, content: Any) -> bytes:
        return orjson.dumps(content, option=orjson.OPT_INDENT_2)


@router.get("/{agent_id}/export", response_class=IndentedORJSONResponse, operation_id="export_agent")
async def export_agent(
    agent_id: str = AgentId,
    max_steps: int = Query(100, deprecated=True),
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
    use_legacy_format: bool = Query(
        False,
        description="If True, exports using the legacy single-agent 'v1' format with inline tools/blocks. If False, exports using the new multi-entity 'v2' format, with separate agents, tools, blocks, files, etc.",
        deprecated=True,
    ),
    conversation_id: Optional[str] = Query(
        None,
        description="Conversation ID to export. If provided, uses messages from this conversation instead of the agent's global message history.",
    ),
    scrub_messages: bool = Query(
        False,
        description="If True, excludes all messages from the export. Useful for sharing agent configs without conversation history.",
    ),
    # do not remove, used to autogeneration of spec
    # TODO: Think of a better way to export AgentFileSchema
    spec: AgentFileSchema | None = None,
    legacy_spec: AgentSchema | None = None,
) -> JSONResponse:
    """
    Export the serialized JSON representation of an agent, formatted with indentation.
    """
    if use_legacy_format:
        raise HTTPException(status_code=400, detail="Legacy format is not supported")
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    agent_file_schema = await server.agent_serialization_manager.export(
        agent_ids=[agent_id],
        actor=actor,
        conversation_id=conversation_id,
        scrub_messages=scrub_messages,
    )
    return agent_file_schema.model_dump()


class ExportAgentRequest(BaseModel):
    """Request body for POST /export endpoint."""

    skills: List[SkillSchema] = Field(
        default_factory=list,
        description="Skills to include in the export. Each skill must have a name and files (including SKILL.md).",
    )
    conversation_id: Optional[str] = Field(
        None,
        description="Conversation ID to export. If provided, uses messages from this conversation instead of the agent's global message history.",
    )
    scrub_messages: bool = Field(
        default=False,
        description="If True, excludes all messages from the export. Useful for sharing agent configs without conversation history.",
    )


@router.post("/{agent_id}/export", response_class=IndentedORJSONResponse, operation_id="export_agent_with_skills")
async def export_agent_with_skills(
    agent_id: str = AgentId,
    request: Optional[ExportAgentRequest] = Body(default=None),
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
) -> JSONResponse:
    """
    Export the serialized JSON representation of an agent with optional skills.

    This POST endpoint allows including skills in the export by providing them in the request body.
    Skills are resolved client-side and passed as SkillSchema objects containing the skill files.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)

    # Use defaults if no request body provided
    skills = request.skills if request else []
    conversation_id = request.conversation_id if request else None
    scrub_messages = request.scrub_messages if request else False

    agent_file_schema = await server.agent_serialization_manager.export(
        agent_ids=[agent_id],
        actor=actor,
        conversation_id=conversation_id,
        skills=skills,
        scrub_messages=scrub_messages,
    )
    return agent_file_schema.model_dump()


class ImportedAgentsResponse(BaseModel):
    """Response model for imported agents"""

    agent_ids: List[str] = Field(..., description="List of IDs of the imported agents")


def import_agent_legacy(
    agent_json: dict,
    server: "SyncServer",
    actor: User,
    append_copy_suffix: bool = True,
    override_existing_tools: bool = True,
    project_id: str | None = None,
    strip_messages: bool = False,
    env_vars: Optional[dict[str, Any]] = None,
) -> List[str]:
    """
    Import an agent using the legacy AgentSchema format.
    """
    # Validate the JSON against AgentSchema before passing it to deserialize
    agent_schema = AgentSchema.model_validate(agent_json)

    new_agent = server.agent_manager.deserialize(
        serialized_agent=agent_schema,  # Ensure we're passing a validated AgentSchema
        actor=actor,
        append_copy_suffix=append_copy_suffix,
        override_existing_tools=override_existing_tools,
        project_id=project_id,
        strip_messages=strip_messages,
        env_vars=env_vars,
    )
    return [new_agent.id]


async def _import_agent(
    agent_file_json: dict,
    server: "SyncServer",
    actor: User,
    # TODO: Support these fields for new agent file
    append_copy_suffix: bool = True,
    override_name: Optional[str] = None,
    override_existing_tools: bool = True,
    project_id: str | None = None,
    strip_messages: bool = False,
    env_vars: Optional[dict[str, Any]] = None,
    override_embedding_handle: Optional[str] = None,
    override_model_handle: Optional[str] = None,
) -> List[str]:
    """
    Import an agent using the new AgentFileSchema format.
    """
    agent_schema = AgentFileSchema.model_validate(agent_file_json)

    if override_embedding_handle:
        embedding_config_override = await server.get_embedding_config_from_handle_async(actor=actor, handle=override_embedding_handle)
    else:
        embedding_config_override = None

    if override_model_handle:
        llm_config_override = await server.get_llm_config_from_handle_async(actor=actor, handle=override_model_handle)
    else:
        llm_config_override = None

    import_result = await server.agent_serialization_manager.import_file(
        schema=agent_schema,
        actor=actor,
        append_copy_suffix=append_copy_suffix,
        override_name=override_name,
        override_existing_tools=override_existing_tools,
        env_vars=env_vars,
        override_embedding_config=embedding_config_override,
        override_llm_config=llm_config_override,
        project_id=project_id,
    )

    if not import_result.success:
        from letta.errors import AgentFileImportError

        raise AgentFileImportError(f"Import failed: {import_result.message}. Errors: {', '.join(import_result.errors)}")

    return import_result.imported_agent_ids


@router.post("/import", response_model=ImportedAgentsResponse, operation_id="import_agent")
async def import_agent(
    file: UploadFile = File(...),
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
    x_override_embedding_model: str | None = Header(None, alias="x-override-embedding-model"),
    # New fields (all optional)
    override_existing_tools: bool = Form(
        True,
        description="If set to True, existing tools can get their source code overwritten by the uploaded tool definitions. Note that Letta core tools can never be updated externally.",
    ),
    strip_messages: bool = Form(
        False,
        description="If set to True, strips all messages from the agent before importing.",
    ),
    secrets: Optional[str] = Form(None, description="Secrets as a JSON string to pass to the agent for tool execution."),
    name: Optional[str] = Form(
        None,
        description="If provided, overrides the agent name with this value.",
    ),
    embedding: Optional[str] = Form(
        None,
        description="Embedding handle to override with.",
    ),
    model: Optional[str] = Form(
        None,
        description="Model handle to override the agent's default model. This allows the imported agent to use a different model while keeping other defaults (e.g., context size) from the original configuration.",
    ),
    # Deprecated fields (maintain backward compatibility)
    append_copy_suffix: bool = Form(
        True,
        description='If set to True, appends "_copy" to the end of the agent name.',
        deprecated=True,
    ),
    override_name: Optional[str] = Form(
        None,
        description="If provided, overrides the agent name with this value. Use 'name' instead.",
        deprecated=True,
    ),
    override_embedding_handle: Optional[str] = Form(
        None,
        description="Override import with specific embedding handle. Use 'embedding' instead.",
        deprecated=True,
    ),
    override_model_handle: Optional[str] = Form(
        None,
        description="Model handle to override the agent's default model. Use 'model' instead.",
        deprecated=True,
    ),
    project_id: str | None = Form(
        None, description="The project ID to associate the uploaded agent with. This is now passed via headers.", deprecated=True
    ),
    env_vars_json: Optional[str] = Form(
        None,
        description="Environment variables as a JSON string to pass to the agent for tool execution. Use 'secrets' instead.",
        deprecated=True,
    ),
):
    """
    Import a serialized agent file and recreate the agent(s) in the system.
    Returns the IDs of all imported agents.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)

    try:
        serialized_data = await file.read()
        file_size_mb = len(serialized_data) / (1024 * 1024)
        logger.info(f"Agent import: loaded {file_size_mb:.2f} MB into memory")
        agent_json = json.loads(serialized_data)

        # Handle double-encoded JSON (if the result is a string, parse it again)
        if isinstance(agent_json, str):
            agent_json = json.loads(agent_json)
    except json.JSONDecodeError:
        raise HTTPException(status_code=400, detail="Corrupted agent file format.")

    # Handle backward compatibility: prefer new field names over deprecated ones
    final_name = name or override_name
    final_embedding_handle = embedding or override_embedding_handle or x_override_embedding_model
    final_model_handle = model or override_model_handle

    # Parse secrets (new) or env_vars_json (deprecated)
    env_vars = None
    secrets_json = secrets or env_vars_json
    if secrets_json:
        try:
            env_vars = json.loads(secrets_json)
        except json.JSONDecodeError:
            raise HTTPException(status_code=400, detail="secrets must be a valid JSON string")

        if not isinstance(env_vars, dict):
            raise HTTPException(status_code=400, detail="secrets must be a valid JSON string")

    # Get project_id from headers (preferred) or fall back to form data for backward compatibility
    # In cloud environments, project_id should be passed via headers
    final_project_id = headers.project_id or project_id

    # Check if the JSON is AgentFileSchema or AgentSchema
    # TODO: This is kind of hacky, but should work as long as dont' change the schema
    if "agents" in agent_json and isinstance(agent_json.get("agents"), list):
        # This is an AgentFileSchema
        agent_ids = await _import_agent(
            agent_file_json=agent_json,
            server=server,
            actor=actor,
            append_copy_suffix=append_copy_suffix,
            override_name=final_name,
            override_existing_tools=override_existing_tools,
            project_id=final_project_id,
            strip_messages=strip_messages,
            env_vars=env_vars,
            override_embedding_handle=final_embedding_handle,
            override_model_handle=final_model_handle,
        )
    else:
        # This is a legacy AgentSchema
        raise HTTPException(
            status_code=400,
            detail="Legacy AgentSchema format is deprecated. Please use the new AgentFileSchema format with 'agents' field.",
        )

    return ImportedAgentsResponse(agent_ids=agent_ids)


@router.get("/{agent_id}/context", response_model=ContextWindowOverview, operation_id="retrieve_agent_context_window", deprecated=True)
async def retrieve_agent_context_window(
    agent_id: AgentId,
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
    conversation_id: Optional[str] = Query(
        None, description="Conversation ID to get context window for. If provided, uses messages from this conversation."
    ),
):
    """
    Retrieve the context window of a specific agent.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    return await server.agent_manager.get_context_window(agent_id=agent_id, actor=actor, conversation_id=conversation_id)


class CreateAgentRequest(CreateAgent):
    """
    CreateAgent model specifically for POST request body, excluding user_id which comes from headers
    """

    # Override the user_id field to exclude it from the request body validation
    actor_id: str | None = Field(None, exclude=True)


@router.post("/", response_model=AgentState, operation_id="create_agent")
async def create_agent(
    agent: CreateAgentRequest = Body(...),
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
    x_project: str | None = Header(
        None, alias="X-Project", description="The project slug to associate with the agent (cloud only)."
    ),  # Only handled by next js middleware
):
    """
    Create an agent.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    return await server.create_agent_async(agent, actor=actor)


@router.patch("/{agent_id}", response_model=AgentState, operation_id="modify_agent")
async def modify_agent(
    agent_id: AgentId,
    update_agent: UpdateAgent = Body(...),
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """Update an existing agent."""
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    return await server.update_agent_async(agent_id=agent_id, request=update_agent, actor=actor)


@router.get("/{agent_id}/tools", response_model=list[Tool], operation_id="list_tools_for_agent")
async def list_tools_for_agent(
    agent_id: AgentId,
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
    before: Optional[str] = Query(
        None, description="Tool ID cursor for pagination. Returns tools that come before this tool ID in the specified sort order"
    ),
    after: Optional[str] = Query(
        None, description="Tool ID cursor for pagination. Returns tools that come after this tool ID in the specified sort order"
    ),
    limit: Optional[int] = Query(10, description="Maximum number of tools to return"),
    order: Literal["asc", "desc"] = Query(
        "desc", description="Sort order for tools by creation time. 'asc' for oldest first, 'desc' for newest first"
    ),
    order_by: Literal["created_at"] = Query("created_at", description="Field to sort by"),
):
    """Get tools from an existing agent."""
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    return await server.agent_manager.list_attached_tools_async(
        agent_id=agent_id,
        actor=actor,
        before=before,
        after=after,
        limit=limit,
        ascending=(order == "asc"),
    )


@router.patch("/{agent_id}/tools/attach/{tool_id}", response_model=Optional[AgentState], operation_id="attach_tool_to_agent")
async def attach_tool_to_agent(
    tool_id: ToolId,
    agent_id: AgentId,
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Attach a tool to an agent.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    await server.agent_manager.attach_tool_async(agent_id=agent_id, tool_id=tool_id, actor=actor)
    if is_1_0_sdk_version(headers):
        return None
    # TODO: Unfortunately we need this to preserve our current API behavior
    return await server.agent_manager.get_agent_by_id_async(agent_id=agent_id, actor=actor)


@router.patch("/{agent_id}/tools/detach/{tool_id}", response_model=Optional[AgentState], operation_id="detach_tool_from_agent")
async def detach_tool_from_agent(
    tool_id: ToolId,
    agent_id: AgentId,
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Detach a tool from an agent.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    await server.agent_manager.detach_tool_async(agent_id=agent_id, tool_id=tool_id, actor=actor)
    if is_1_0_sdk_version(headers):
        return None
    # TODO: Unfortunately we need this to preserve our current API behavior
    return await server.agent_manager.get_agent_by_id_async(agent_id=agent_id, actor=actor)


class ModifyApprovalRequest(BaseModel):
    """Request body for modifying tool approval requirements."""

    requires_approval: bool = Field(..., description="Whether the tool requires approval before execution")

    model_config = ConfigDict(extra="forbid")


@router.patch("/{agent_id}/tools/approval/{tool_name}", response_model=Optional[AgentState], operation_id="modify_approval_for_tool")
async def modify_approval_for_tool(
    tool_name: str,
    agent_id: AgentId,
    requires_approval: bool | None = Query(None, description="Whether the tool requires approval before execution", deprecated=True),
    request: ModifyApprovalRequest | None = Body(None),
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Modify the approval requirement for a tool attached to an agent.

    Accepts requires_approval via request body (preferred) or query parameter (deprecated).
    """
    # Prefer body over query param for backwards compatibility
    if request is not None:
        approval_value = request.requires_approval
    elif requires_approval is not None:
        approval_value = requires_approval
    else:
        raise HTTPException(
            status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
            detail="requires_approval must be provided either in request body or as query parameter",
        )

    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    await server.agent_manager.modify_approvals_async(agent_id=agent_id, tool_name=tool_name, requires_approval=approval_value, actor=actor)
    if is_1_0_sdk_version(headers):
        return None
    # TODO: Unfortunately we need this to preserve our current API behavior
    return await server.agent_manager.get_agent_by_id_async(agent_id=agent_id, actor=actor)


@router.post("/{agent_id}/tools/{tool_name}/run", response_model=ToolExecutionResult, operation_id="run_tool_for_agent")
async def run_tool_for_agent(
    agent_id: AgentId,
    tool_name: str,
    request: ToolExecuteRequest = Body(default=ToolExecuteRequest()),
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Trigger a tool by name on a specific agent, providing the necessary arguments.

    This endpoint executes a tool that is attached to the agent, using the agent's
    state and environment variables for execution context.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)

    # Get agent with all relationships
    agent = await server.agent_manager.get_agent_by_id_async(
        agent_id,
        actor,
        include_relationships=["memory", "multi_agent_group", "sources", "tool_exec_environment_variables", "tools", "tags"],
    )

    # Find the tool by name among attached tools
    tool = None
    if agent.tools:
        for t in agent.tools:
            if t.name == tool_name:
                tool = t
                break

    if tool is None:
        raise HTTPException(
            status_code=status.HTTP_404_NOT_FOUND,
            detail=f"Tool '{tool_name}' not found or not attached to agent '{agent_id}'",
        )

    # Build environment variables dict from agent secrets
    # Use pre-decrypted value field (populated in from_orm_async)
    sandbox_env_vars = {}
    if agent.tool_exec_environment_variables:
        for env_var in agent.tool_exec_environment_variables:
            sandbox_env_vars[env_var.key] = env_var.value or ""

    # Create tool execution manager and execute the tool
    from letta.services.tool_executor.tool_execution_manager import ToolExecutionManager

    tool_execution_manager = ToolExecutionManager(
        agent_state=agent,
        message_manager=server.message_manager,
        agent_manager=server.agent_manager,
        block_manager=server.block_manager,
        run_manager=server.run_manager,
        passage_manager=server.passage_manager,
        actor=actor,
        sandbox_env_vars=sandbox_env_vars,
    )

    tool_execution_result = await tool_execution_manager.execute_tool_async(
        function_name=tool_name,
        function_args=request.args,
        tool=tool,
    )

    # don't return a result if the tool execution failed
    if tool_execution_result.status == "error":
        tool_execution_result.func_return = None
    # remove deprecated agent_state field
    tool_execution_result.agent_state = None
    return tool_execution_result


@router.patch("/{agent_id}/sources/attach/{source_id}", response_model=AgentState, operation_id="attach_source_to_agent", deprecated=True)
async def attach_source(
    source_id: SourceId,
    agent_id: AgentId,
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Attach a source to an agent.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    agent_state = await server.agent_manager.attach_source_async(agent_id=agent_id, source_id=source_id, actor=actor)

    # Check if the agent is missing any files tools
    agent_state = await server.agent_manager.attach_missing_files_tools_async(agent_state=agent_state, actor=actor)

    files = await server.file_manager.list_files(source_id, actor, include_content=True)
    if files:
        await server.agent_manager.insert_files_into_context_window(agent_state=agent_state, file_metadata_with_content=files, actor=actor)

    if agent_state.enable_sleeptime:
        source = await server.source_manager.get_source_by_id(source_id=source_id, actor=actor)
        safe_create_task(server.sleeptime_document_ingest_async(agent_state, source, actor), label="sleeptime_document_ingest_async")

    return agent_state


@router.patch("/{agent_id}/folders/attach/{folder_id}", response_model=Optional[AgentState], operation_id="attach_folder_to_agent")
async def attach_folder_to_agent(
    folder_id: SourceId,
    agent_id: AgentId,
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Attach a folder to an agent.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    agent_state = await server.agent_manager.attach_source_async(agent_id=agent_id, source_id=folder_id, actor=actor)

    # Check if the agent is missing any files tools
    agent_state = await server.agent_manager.attach_missing_files_tools_async(agent_state=agent_state, actor=actor)

    files = await server.file_manager.list_files(folder_id, actor, include_content=True)
    if files:
        await server.agent_manager.insert_files_into_context_window(agent_state=agent_state, file_metadata_with_content=files, actor=actor)

    if agent_state.enable_sleeptime:
        source = await server.source_manager.get_source_by_id(source_id=folder_id, actor=actor)
        safe_create_task(server.sleeptime_document_ingest_async(agent_state, source, actor), label="sleeptime_document_ingest_async")

    if is_1_0_sdk_version(headers):
        return None
    return agent_state


@router.patch("/{agent_id}/sources/detach/{source_id}", response_model=AgentState, operation_id="detach_source_from_agent", deprecated=True)
async def detach_source(
    source_id: SourceId,
    agent_id: AgentId,
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Detach a source from an agent.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    agent_state = await server.agent_manager.detach_source_async(agent_id=agent_id, source_id=source_id, actor=actor)

    if not agent_state.sources:
        agent_state = await server.agent_manager.detach_all_files_tools_async(agent_state=agent_state, actor=actor)

    # Query files_agents directly to get exactly what was attached, regardless of source changes
    file_ids = await server.file_agent_manager.get_file_ids_for_agent_by_source(agent_id=agent_id, source_id=source_id, actor=actor)
    if file_ids:
        await server.remove_files_from_context_window(agent_state=agent_state, file_ids=file_ids, actor=actor)

    if agent_state.enable_sleeptime:
        try:
            source = await server.source_manager.get_source_by_id(source_id=source_id, actor=actor)
            block = await server.agent_manager.get_block_with_label_async(agent_id=agent_state.id, block_label=source.name, actor=actor)
            await server.block_manager.delete_block_async(block.id, actor)
        except Exception:
            pass

    return agent_state


@router.patch("/{agent_id}/folders/detach/{folder_id}", response_model=Optional[AgentState], operation_id="detach_folder_from_agent")
async def detach_folder_from_agent(
    folder_id: SourceId,
    agent_id: AgentId,
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Detach a folder from an agent.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    agent_state = await server.agent_manager.detach_source_async(agent_id=agent_id, source_id=folder_id, actor=actor)

    if not agent_state.sources:
        agent_state = await server.agent_manager.detach_all_files_tools_async(agent_state=agent_state, actor=actor)

    # Query files_agents directly to get exactly what was attached, regardless of source changes
    file_ids = await server.file_agent_manager.get_file_ids_for_agent_by_source(agent_id=agent_id, source_id=folder_id, actor=actor)
    if file_ids:
        await server.remove_files_from_context_window(agent_state=agent_state, file_ids=file_ids, actor=actor)

    if agent_state.enable_sleeptime:
        try:
            source = await server.source_manager.get_source_by_id(source_id=folder_id, actor=actor)
            block = await server.agent_manager.get_block_with_label_async(agent_id=agent_state.id, block_label=source.name, actor=actor)
            await server.block_manager.delete_block_async(block.id, actor)
        except Exception:
            pass

    if is_1_0_sdk_version(headers):
        return None
    return agent_state


@router.patch("/{agent_id}/files/close-all", response_model=List[str], operation_id="close_all_files_for_agent")
async def close_all_files_for_agent(
    agent_id: AgentId,
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Closes all currently open files for a given agent.

    This endpoint updates the file state for the agent so that no files are marked as open.
    Typically used to reset the working memory view for the agent.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)

    return await server.file_agent_manager.close_all_other_files(agent_id=agent_id, keep_file_names=[], actor=actor)


@router.patch("/{agent_id}/files/{file_id}/open", response_model=List[str], operation_id="open_file_for_agent")
async def open_file_for_agent(
    file_id: FileId,
    agent_id: AgentId,
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Opens a specific file for a given agent.

    This endpoint marks a specific file as open in the agent's file state.
    The file will be included in the agent's working memory view.
    Returns a list of file names that were closed due to LRU eviction.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)

    # Get the agent to access files configuration
    per_file_view_window_char_limit, max_files_open = await server.agent_manager.get_agent_files_config_async(
        agent_id=agent_id, actor=actor
    )

    # Get file metadata
    file_metadata = await server.file_manager.get_file_by_id(file_id=file_id, actor=actor, include_content=True)
    if not file_metadata:
        raise HTTPException(status_code=404, detail=f"File with id={file_id} not found")

    # Process file content with line numbers using LineChunker
    from letta.services.file_processor.chunker.line_chunker import LineChunker

    content_lines = LineChunker().chunk_text(file_metadata=file_metadata, validate_range=False)
    visible_content = "\n".join(content_lines)

    # Truncate if needed
    visible_content = truncate_file_visible_content(visible_content, True, per_file_view_window_char_limit)

    # Use enforce_max_open_files_and_open for efficient LRU handling
    closed_files, _was_already_open, _ = await server.file_agent_manager.enforce_max_open_files_and_open(
        agent_id=agent_id,
        file_id=file_id,
        file_name=file_metadata.file_name,
        source_id=file_metadata.source_id,
        actor=actor,
        visible_content=visible_content,
        max_files_open=max_files_open,
    )

    return closed_files


@router.patch("/{agent_id}/files/{file_id}/close", response_model=None, operation_id="close_file_for_agent")
async def close_file_for_agent(
    file_id: FileId,
    agent_id: AgentId,
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Closes a specific file for a given agent.

    This endpoint marks a specific file as closed in the agent's file state.
    The file will be removed from the agent's working memory view.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)

    # Use update_file_agent_by_id to close the file
    await server.file_agent_manager.update_file_agent_by_id(
        agent_id=agent_id,
        file_id=file_id,
        actor=actor,
        is_open=False,
    )
    return JSONResponse(status_code=status.HTTP_200_OK, content={"message": f"File id={file_id} successfully closed"})


@router.get("/{agent_id}", response_model=AgentState, operation_id="retrieve_agent")
async def retrieve_agent(
    agent_id: AgentId,
    include_relationships: list[str] | None = Query(
        None,
        description=(
            "Specify which relational fields (e.g., 'tools', 'sources', 'memory') to include in the response. "
            "If not provided, all relationships are loaded by default. "
            "Using this can optimize performance by reducing unnecessary joins."
            "This is a legacy parameter, and no longer supported after 1.0.0 SDK versions."
        ),
        deprecated=True,
    ),
    include: List[AgentRelationships] = Query(
        [],
        description=("Specify which relational fields to include in the response. No relationships are included by default."),
    ),
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Get the state of the agent.
    """

    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)

    if include_relationships is None and is_1_0_sdk_version(headers):
        include_relationships = []  # don't default include all if using new SDK version
    return await server.agent_manager.get_agent_by_id_async(
        agent_id=agent_id, include_relationships=include_relationships, include=include, actor=actor
    )


@router.delete("/{agent_id}", response_model=None, operation_id="delete_agent")
async def delete_agent(
    agent_id: AgentId,
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Delete an agent.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    await server.agent_manager.delete_agent_async(agent_id=agent_id, actor=actor)
    return JSONResponse(status_code=status.HTTP_200_OK, content={"message": f"Agent id={agent_id} successfully deleted"})


@router.get("/{agent_id}/sources", response_model=list[Source], operation_id="list_agent_sources", deprecated=True)
async def list_agent_sources(
    agent_id: AgentId,
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
    before: Optional[str] = Query(
        None, description="Source ID cursor for pagination. Returns sources that come before this source ID in the specified sort order"
    ),
    after: Optional[str] = Query(
        None, description="Source ID cursor for pagination. Returns sources that come after this source ID in the specified sort order"
    ),
    limit: Optional[int] = Query(100, description="Maximum number of sources to return"),
    order: Literal["asc", "desc"] = Query(
        "desc", description="Sort order for sources by creation time. 'asc' for oldest first, 'desc' for newest first"
    ),
    order_by: Literal["created_at"] = Query("created_at", description="Field to sort by"),
):
    """
    Get the sources associated with an agent.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    return await server.agent_manager.list_attached_sources_async(
        agent_id=agent_id,
        actor=actor,
        before=before,
        after=after,
        limit=limit,
        ascending=(order == "asc"),
    )


@router.get("/{agent_id}/folders", response_model=list[Source], operation_id="list_folders_for_agent")
async def list_folders_for_agent(
    agent_id: AgentId,
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
    before: Optional[str] = Query(
        None, description="Source ID cursor for pagination. Returns sources that come before this source ID in the specified sort order"
    ),
    after: Optional[str] = Query(
        None, description="Source ID cursor for pagination. Returns sources that come after this source ID in the specified sort order"
    ),
    limit: Optional[int] = Query(100, description="Maximum number of sources to return"),
    order: Literal["asc", "desc"] = Query(
        "desc", description="Sort order for sources by creation time. 'asc' for oldest first, 'desc' for newest first"
    ),
    order_by: Literal["created_at"] = Query("created_at", description="Field to sort by"),
):
    """
    Get the folders associated with an agent.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    return await server.agent_manager.list_attached_sources_async(
        agent_id=agent_id,
        actor=actor,
        before=before,
        after=after,
        limit=limit,
        ascending=(order == "asc"),
    )


@router.get("/{agent_id}/files", response_model=PaginatedAgentFiles, operation_id="list_files_for_agent")
async def list_files_for_agent(
    agent_id: AgentId,
    before: Optional[str] = Query(
        None, description="File ID cursor for pagination. Returns files that come before this file ID in the specified sort order"
    ),
    after: Optional[str] = Query(
        None, description="File ID cursor for pagination. Returns files that come after this file ID in the specified sort order"
    ),
    limit: Optional[int] = Query(100, description="Maximum number of files to return"),
    order: Literal["asc", "desc"] = Query(
        "desc", description="Sort order for files by creation time. 'asc' for oldest first, 'desc' for newest first"
    ),
    order_by: Literal["created_at"] = Query("created_at", description="Field to sort by"),
    cursor: Optional[str] = Query(
        None, description="Pagination cursor from previous response (deprecated, use before/after)", deprecated=True
    ),
    is_open: Optional[bool] = Query(None, description="Filter by open status (true for open files, false for closed files)"),
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Get the files attached to an agent with their open/closed status.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)

    effective_limit = limit or 20

    # get paginated file-agent relationships for this agent
    file_agents, next_cursor, has_more = await server.file_agent_manager.list_files_for_agent_paginated(
        agent_id=agent_id,
        actor=actor,
        cursor=cursor,  # keep for backwards compatibility
        limit=effective_limit,
        is_open=is_open,
        before=before,
        after=after,
        ascending=(order == "asc"),
    )

    # enrich with file and source metadata
    enriched_files = []
    for fa in file_agents:
        # get source/folder metadata
        source = await server.source_manager.get_source_by_id(source_id=fa.source_id, actor=actor)

        # build response object
        attachment = AgentFileAttachment(
            id=fa.id,
            file_id=fa.file_id,
            file_name=fa.file_name,
            folder_id=fa.source_id,
            folder_name=source.name if source else "Unknown",
            is_open=fa.is_open,
            last_accessed_at=fa.last_accessed_at,
            visible_content=fa.visible_content,
            start_line=fa.start_line,
            end_line=fa.end_line,
        )
        enriched_files.append(attachment)

    return PaginatedAgentFiles(files=enriched_files, next_cursor=next_cursor, has_more=has_more)


# TODO: remove? can also get with agent blocks
@router.get("/{agent_id}/core-memory", response_model=Memory, operation_id="retrieve_agent_memory", deprecated=True)
async def retrieve_agent_memory(
    agent_id: AgentId,
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Retrieve the memory state of a specific agent.
    This endpoint fetches the current memory state of the agent identified by the user ID and agent ID.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)

    return await server.get_agent_memory_async(agent_id=agent_id, actor=actor)


@router.get("/{agent_id}/core-memory/blocks/{block_label}", response_model=BlockResponse, operation_id="retrieve_core_memory_block")
async def retrieve_block_for_agent(
    block_label: str,
    agent_id: AgentId,
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Retrieve a core memory block from an agent.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)

    return await server.agent_manager.get_block_with_label_async(agent_id=agent_id, block_label=block_label, actor=actor)


@router.get("/{agent_id}/core-memory/blocks", response_model=list[BlockResponse], operation_id="list_core_memory_blocks")
async def list_blocks_for_agent(
    agent_id: AgentId,
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
    before: Optional[str] = Query(
        None, description="Block ID cursor for pagination. Returns blocks that come before this block ID in the specified sort order"
    ),
    after: Optional[str] = Query(
        None, description="Block ID cursor for pagination. Returns blocks that come after this block ID in the specified sort order"
    ),
    limit: Optional[int] = Query(100, description="Maximum number of blocks to return"),
    order: Literal["asc", "desc"] = Query(
        "desc", description="Sort order for blocks by creation time. 'asc' for oldest first, 'desc' for newest first"
    ),
    order_by: Literal["created_at"] = Query("created_at", description="Field to sort by"),
):
    """
    Retrieve the core memory blocks of a specific agent.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)

    return await server.agent_manager.list_agent_blocks_async(
        agent_id=agent_id,
        actor=actor,
        before=before,
        after=after,
        limit=limit,
        ascending=(order == "asc"),
    )


@router.patch("/{agent_id}/core-memory/blocks/{block_label}", response_model=BlockResponse, operation_id="modify_core_memory_block")
async def modify_block_for_agent(
    block_label: str,
    agent_id: AgentId,
    block_update: BlockUpdate = Body(...),
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Updates a core memory block of an agent.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)

    block = await server.agent_manager.modify_block_by_label_async(
        agent_id=agent_id, block_label=block_label, block_update=block_update, actor=actor
    )

    # This should also trigger a system prompt change in the agent
    await server.agent_manager.rebuild_system_prompt_async(agent_id=agent_id, actor=actor, force=True, update_timestamp=False)

    return block


@router.post(
    "/{agent_id}/recompile",
    response_model=str,
    operation_id="recompile_agent",
)
async def recompile_agent(
    agent_id: AgentId,
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
    update_timestamp: bool = Query(
        False,
        description="If True, update the in-context memory last edit timestamp embedded in the system prompt.",
    ),
    dry_run: bool = Query(
        False,
        description="If True, do not persist changes; still returns the compiled system prompt.",
    ),
):
    """Manually trigger system prompt recompilation for an agent."""
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)

    _, system_message, _, _ = await server.agent_manager.rebuild_system_prompt_async(
        agent_id=agent_id,
        actor=actor,
        force=True,
        update_timestamp=update_timestamp,
        dry_run=dry_run,
    )

    if system_message is None:
        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"No system message found for agent '{agent_id}'")

    return system_message.to_openai_dict().get("content", "")


@router.post(
    "/{agent_id}/system-prompt/recompile",
    response_model=str,
    operation_id="recompile_agent_system_prompt",
    deprecated=True,
)
async def recompile_agent_system_prompt(
    agent_id: AgentId,
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
    update_timestamp: bool = Query(
        False,
        description="If True, update the in-context memory last edit timestamp embedded in the system prompt.",
    ),
    dry_run: bool = Query(
        False,
        description="If True, do not persist changes; still returns the compiled system prompt.",
    ),
):
    """Deprecated alias for POST /v1/agents/{agent_id}/recompile."""
    return await recompile_agent(
        agent_id=agent_id,
        server=server,
        headers=headers,
        update_timestamp=update_timestamp,
        dry_run=dry_run,
    )


@router.patch("/{agent_id}/core-memory/blocks/attach/{block_id}", response_model=AgentState, operation_id="attach_core_memory_block")
async def attach_block_to_agent(
    block_id: BlockId,
    agent_id: AgentId,
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Attach a core memory block to an agent.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    return await server.agent_manager.attach_block_async(agent_id=agent_id, block_id=block_id, actor=actor)


@router.patch("/{agent_id}/core-memory/blocks/detach/{block_id}", response_model=AgentState, operation_id="detach_core_memory_block")
async def detach_block_from_agent(
    block_id: BlockId,
    agent_id: AgentId,
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Detach a core memory block from an agent.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    return await server.agent_manager.detach_block_async(agent_id=agent_id, block_id=block_id, actor=actor)


@router.patch("/{agent_id}/archives/attach/{archive_id}", response_model=None, operation_id="attach_archive_to_agent")
async def attach_archive_to_agent(
    archive_id: str,
    agent_id: AgentId,
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Attach an archive to an agent.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    await server.archive_manager.attach_agent_to_archive_async(
        agent_id=agent_id,
        archive_id=archive_id,
        actor=actor,
    )
    return None


@router.patch("/{agent_id}/archives/detach/{archive_id}", response_model=None, operation_id="detach_archive_from_agent")
async def detach_archive_from_agent(
    archive_id: str,
    agent_id: AgentId,
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Detach an archive from an agent.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    await server.archive_manager.detach_agent_from_archive_async(
        agent_id=agent_id,
        archive_id=archive_id,
        actor=actor,
    )
    return None


@router.patch("/{agent_id}/identities/attach/{identity_id}", response_model=None, operation_id="attach_identity_to_agent")
async def attach_identity_to_agent(
    identity_id: str,
    agent_id: AgentId,
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Attach an identity to an agent.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    await server.identity_manager.attach_agent_async(
        identity_id=identity_id,
        agent_id=agent_id,
        actor=actor,
    )
    return None


@router.patch("/{agent_id}/identities/detach/{identity_id}", response_model=None, operation_id="detach_identity_from_agent")
async def detach_identity_from_agent(
    identity_id: str,
    agent_id: AgentId,
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Detach an identity from an agent.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    await server.identity_manager.detach_agent_async(
        identity_id=identity_id,
        agent_id=agent_id,
        actor=actor,
    )
    return None


@router.get("/{agent_id}/archival-memory", response_model=list[Passage], operation_id="list_passages")
async def list_passages(
    agent_id: AgentId,
    server: "SyncServer" = Depends(get_letta_server),
    after: str | None = Query(None, description="Unique ID of the memory to start the query range at."),
    before: str | None = Query(None, description="Unique ID of the memory to end the query range at."),
    limit: int | None = Query(100, description="How many results to include in the response."),
    search: str | None = Query(None, description="Search passages by text"),
    ascending: bool | None = Query(
        True, description="Whether to sort passages oldest to newest (True, default) or newest to oldest (False)"
    ),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Retrieve the memories in an agent's archival memory store (paginated query).
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)

    return await server.get_agent_archival_async(
        agent_id=agent_id,
        actor=actor,
        after=after,
        before=before,
        query_text=search,
        limit=limit,
        ascending=ascending,
    )


@router.post("/{agent_id}/archival-memory", response_model=list[Passage], operation_id="create_passage")
async def create_passage(
    agent_id: AgentId,
    request: CreateArchivalMemory = Body(...),
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Insert a memory into an agent's archival memory store.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)

    return await server.insert_archival_memory_async(
        agent_id=agent_id, memory_contents=request.text, actor=actor, tags=request.tags, created_at=request.created_at
    )


@router.get(
    "/{agent_id}/archival-memory/search",
    response_model=ArchivalMemorySearchResponse,
    operation_id="search_archival_memory",
)
async def search_archival_memory(
    agent_id: AgentId,
    query: str = Query(..., description="String to search for using semantic similarity"),
    tags: Optional[List[str]] = Query(None, description="Optional list of tags to filter search results"),
    tag_match_mode: Literal["any", "all"] = Query(
        "any", description="How to match tags - 'any' to match passages with any of the tags, 'all' to match only passages with all tags"
    ),
    top_k: Optional[int] = Query(None, description="Maximum number of results to return. Uses system default if not specified"),
    start_datetime: Optional[datetime] = Query(None, description="Filter results to passages created after this datetime"),
    end_datetime: Optional[datetime] = Query(None, description="Filter results to passages created before this datetime"),
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Search archival memory using semantic (embedding-based) search with optional temporal filtering.

    This endpoint allows manual triggering of archival memory searches, enabling users to query
    an agent's archival memory store directly via the API. The search uses the same functionality
    as the agent's archival_memory_search tool but is accessible for external API usage.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)

    # convert datetime to string in ISO 8601 format
    start_datetime = start_datetime.isoformat() if start_datetime else None
    end_datetime = end_datetime.isoformat() if end_datetime else None

    # Use the shared agent manager method
    formatted_results = await server.agent_manager.search_agent_archival_memory_async(
        agent_id=agent_id,
        actor=actor,
        query=query,
        tags=tags,
        tag_match_mode=tag_match_mode,
        top_k=top_k,
        start_datetime=start_datetime,
        end_datetime=end_datetime,
    )

    # Convert to proper response schema
    search_results = [ArchivalMemorySearchResult(**result) for result in formatted_results]

    return ArchivalMemorySearchResponse(results=search_results, count=len(formatted_results))


# TODO(ethan): query or path parameter for memory_id?
# @router.delete("/{agent_id}/archival")
@router.delete("/{agent_id}/archival-memory/{memory_id}", response_model=None, operation_id="delete_passage")
async def delete_passage(
    memory_id: str,
    agent_id: AgentId,
    # memory_id: str = Query(..., description="Unique ID of the memory to be deleted."),
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Delete a memory from an agent's archival memory store.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)

    await server.delete_archival_memory_async(memory_id=memory_id, actor=actor)
    return JSONResponse(status_code=status.HTTP_200_OK, content={"message": f"Memory id={memory_id} successfully deleted"})


AgentMessagesResponse = Annotated[
    list[LettaMessageUnion], Field(json_schema_extra={"type": "array", "items": {"$ref": "#/components/schemas/LettaMessageUnion"}})
]


@router.get("/{agent_id}/messages", response_model=AgentMessagesResponse, operation_id="list_messages")
async def list_messages(
    agent_id: AgentId,
    server: "SyncServer" = Depends(get_letta_server),
    before: Optional[str] = Query(
        None, description="Message ID cursor for pagination. Returns messages that come before this message ID in the specified sort order"
    ),
    after: Optional[str] = Query(
        None, description="Message ID cursor for pagination. Returns messages that come after this message ID in the specified sort order"
    ),
    limit: Optional[int] = Query(100, description="Maximum number of messages to return"),
    order: Literal["asc", "desc"] = Query(
        "desc", description="Sort order for messages by creation time. 'asc' for oldest first, 'desc' for newest first"
    ),
    order_by: Literal["created_at"] = Query("created_at", description="Field to sort by"),
    group_id: str | None = Query(None, description="Group ID to filter messages by."),
    conversation_id: str | None = Query(None, description="Conversation ID to filter messages by."),
    use_assistant_message: bool = Query(True, description="Whether to use assistant messages", deprecated=True),
    assistant_message_tool_name: str = Query(DEFAULT_MESSAGE_TOOL, description="The name of the designated message tool.", deprecated=True),
    assistant_message_tool_kwarg: str = Query(DEFAULT_MESSAGE_TOOL_KWARG, description="The name of the message argument.", deprecated=True),
    include_err: bool | None = Query(
        None, description="Whether to include error messages and error statuses. For debugging purposes only."
    ),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Retrieve message history for an agent.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)

    return await server.get_agent_recall_async(
        agent_id=agent_id,
        after=after,
        before=before,
        limit=limit,
        group_id=group_id,
        conversation_id=conversation_id,
        reverse=(order == "desc"),
        return_message_object=False,
        use_assistant_message=use_assistant_message,
        assistant_message_tool_name=assistant_message_tool_name,
        assistant_message_tool_kwarg=assistant_message_tool_kwarg,
        include_err=include_err,
        actor=actor,
    )


@router.patch("/{agent_id}/messages/{message_id}", response_model=LettaMessageUnion, operation_id="modify_message")
async def modify_message(
    agent_id: AgentId,  # backwards compatible. Consider removing for v1
    message_id: MessageId,
    request: LettaMessageUpdateUnion = Body(...),
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Update the details of a message associated with an agent.
    """
    # TODO: support modifying tool calls/returns
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    return await server.message_manager.update_message_by_letta_message_async(
        message_id=message_id, letta_message_update=request, actor=actor
    )


# noinspection PyInconsistentReturns
@router.post(
    "/{agent_id}/messages",
    response_model=LettaResponse,
    operation_id="send_message",
    responses={
        200: {
            "description": "Successful response",
            "content": {
                "application/json": {"schema": {"$ref": "#/components/schemas/LettaResponse"}},
                "text/event-stream": {"description": "Server-Sent Events stream (when streaming=true in request body)"},
            },
        }
    },
)
async def send_message(
    request_obj: Request,  # FastAPI Request
    agent_id: AgentId,
    server: SyncServer = Depends(get_letta_server),
    request: LettaStreamingRequest = Body(...),
    headers: HeaderParams = Depends(get_headers),
) -> StreamingResponse | LettaResponse:
    """
    Process a user message and return the agent's response.
    This endpoint accepts a message from a user and processes it through the agent.

    **Note:** Sending multiple concurrent requests to the same agent can lead to undefined behavior.
    Each agent processes messages sequentially, and concurrent requests may interleave in unexpected ways.
    Wait for each request to complete before sending the next one. Use separate agents or conversations for parallel processing.

    The response format is controlled by the `streaming` field in the request body:
    - If `streaming=false` (default): Returns a complete LettaResponse with all messages
    - If `streaming=true`: Returns a Server-Sent Events (SSE) stream

    Additional streaming options (only used when streaming=true):
    - `stream_tokens`: Stream individual tokens instead of complete steps
    - `include_pings`: Include keepalive pings to prevent connection timeouts
    - `background`: Process the request in the background
    """
    # After validation, messages should always be set (converted from input if needed)
    if not request.messages or len(request.messages) == 0:
        raise HTTPException(status_code=422, detail="Messages must not be empty")

    # Validate streaming-specific options are only set when streaming=true
    if not request.streaming:
        errors = []

        if request.stream_tokens is True:
            errors.append("stream_tokens can only be true when streaming=true")

        if request.include_pings is False:
            errors.append("include_pings can only be set to false when streaming=true")

        if request.background is True:
            errors.append("background can only be true when streaming=true")

        if errors:
            raise HTTPException(
                status_code=422,
                detail=f"Streaming options set without streaming enabled. {'; '.join(errors)}. "
                "Either set streaming=true or use default values for streaming options.",
            )

    is_1_0_sdk = is_1_0_sdk_version(headers)
    if request.streaming and not is_1_0_sdk:
        raise HTTPException(status_code=422, detail="streaming=true is only supported for SDK v1.0+ clients.")

    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)

    if request.streaming and is_1_0_sdk:
        streaming_service = StreamingService(server)
        run, result = await streaming_service.create_agent_stream(
            agent_id=agent_id,
            actor=actor,
            request=request,
            run_type="send_message",
        )
        return result

    request_start_timestamp_ns = get_utc_timestamp_ns()
    MetricRegistry().user_message_counter.add(1, get_ctx_attributes())
    # TODO: This is redundant, remove soon
    agent = await server.agent_manager.get_agent_by_id_async(
        agent_id,
        actor,
        include_relationships=["memory", "multi_agent_group", "sources", "tool_exec_environment_variables", "tools", "tags"],
    )

    # Handle model override if specified in the request
    if request.override_model:
        override_llm_config = await server.get_llm_config_from_handle_async(
            actor=actor,
            handle=request.override_model,
        )
        # Create a copy of agent state with the overridden llm_config
        agent = agent.model_copy(update={"llm_config": override_llm_config})

    # Create a new run for execution tracking
    if settings.track_agent_run:
        runs_manager = RunManager()
        run = await runs_manager.create_run(
            pydantic_run=PydanticRun(
                agent_id=agent_id,
                background=False,
                metadata={
                    "run_type": "send_message",
                },
                request_config=LettaRequestConfig.from_letta_request(request),
            ),
            actor=actor,
        )
    else:
        run = None

    # TODO (cliandy): clean this up
    redis_client = await get_redis_client()
    await redis_client.set(f"{REDIS_RUN_ID_PREFIX}:{agent_id}", run.id if run else None)

    run_update_metadata = None
    result = None
    run_status = RunStatus.failed  # Default to failed, updated on success
    try:
        # Handle request-level logprobs override
        if request.return_logprobs or request.return_token_ids:
            agent = agent.model_copy(
                update={
                    "llm_config": agent.llm_config.model_copy(
                        update={
                            "return_logprobs": request.return_logprobs,
                            "top_logprobs": request.top_logprobs,
                            "return_token_ids": request.return_token_ids,
                        }
                    )
                }
            )

        agent_loop = AgentLoop.load(agent_state=agent, actor=actor)
        result = await agent_loop.step(
            request.messages,
            max_steps=request.max_steps,
            run_id=run.id if run else None,
            use_assistant_message=request.use_assistant_message,
            request_start_timestamp_ns=request_start_timestamp_ns,
            include_return_message_types=request.include_return_message_types,
            client_tools=request.client_tools,
            include_compaction_messages=request.include_compaction_messages,
        )
        run_status = result.stop_reason.stop_reason.run_status
        return result
    except PendingApprovalError as e:
        run_update_metadata = {"error": str(e)}
        run_status = RunStatus.failed
        raise HTTPException(
            status_code=409, detail={"code": "PENDING_APPROVAL", "message": str(e), "pending_request_id": e.pending_request_id}
        )
    except Exception as e:
        run_update_metadata = {"error": str(e)}
        run_status = RunStatus.failed
        raise
    finally:
        if settings.track_agent_run:
            if result:
                stop_reason = result.stop_reason.stop_reason
            else:
                # NOTE: we could also consider this an error?
                stop_reason = None
            await server.run_manager.update_run_by_id_async(
                run_id=run.id,
                update=RunUpdate(
                    status=run_status,
                    metadata=run_update_metadata,
                    stop_reason=stop_reason,
                ),
                actor=actor,
            )


# noinspection PyInconsistentReturns
@router.post(
    "/{agent_id}/messages/stream",
    response_model=LettaStreamingResponse,
    operation_id="create_agent_message_stream",
    responses={
        200: {
            "description": "Successful response",
            "content": {
                "text/event-stream": {"description": "Server-Sent Events stream"},
            },
        }
    },
    deprecated=True,
)
async def send_message_streaming(
    request_obj: Request,  # FastAPI Request
    agent_id: AgentId,
    server: SyncServer = Depends(get_letta_server),
    request: LettaStreamingRequest = Body(...),
    headers: HeaderParams = Depends(get_headers),
) -> StreamingResponse | LettaResponse:
    """
    Process a user message and return the agent's response.

    Deprecated: Use the `POST /{agent_id}/messages` endpoint with `streaming=true` in the request body instead.

    **Note:** Sending multiple concurrent requests to the same agent can lead to undefined behavior.
    Each agent processes messages sequentially, and concurrent requests may interleave in unexpected ways.
    Wait for each request to complete before sending the next one. Use separate agents or conversations for parallel processing.

    This endpoint accepts a message from a user and processes it through the agent.
    It will stream the steps of the response always, and stream the tokens if 'stream_tokens' is set to True.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)

    # Since this is the dedicated streaming endpoint, ensure streaming is enabled
    request.streaming = True

    # use the streaming service for unified stream handling
    streaming_service = StreamingService(server)

    _run, result = await streaming_service.create_agent_stream(
        agent_id=agent_id,
        actor=actor,
        request=request,
        run_type="send_message_streaming",
    )

    return result


class CancelAgentRunRequest(BaseModel):
    run_ids: list[str] | None = Field(None, description="Optional list of run IDs to cancel")


@router.post("/{agent_id}/messages/cancel", operation_id="cancel_message")
async def cancel_message(
    agent_id: AgentId,
    request: CancelAgentRunRequest = Body(None),
    server: SyncServer = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
) -> dict:
    """
    Cancel runs associated with an agent. If run_ids are passed in, cancel those in particular.

    Note to cancel active runs associated with an agent, redis is required.
    """
    # TODO: WHY DOES THIS CANCEL A LIST OF RUNS?
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    if not settings.track_agent_run:
        raise HTTPException(status_code=400, detail="Agent run tracking is disabled")
    run_ids = request.run_ids if request else None
    if not run_ids:
        run_id = None
        try:
            redis_client = await get_redis_client()
            run_id = await redis_client.get(f"{REDIS_RUN_ID_PREFIX}:{agent_id}")
        except Exception as e:
            # Redis is optional; fall back to DB to avoid surfacing 5XXs for cancellation.
            logger.warning(f"Failed to look up run to cancel in redis for agent {agent_id}, falling back to DB: {e}")

        if run_id is None:
            logger.warning("Cannot find run associated with agent to cancel in redis, fetching from db.")
            runs = await server.run_manager.list_runs(
                actor=actor,
                statuses=[RunStatus.created, RunStatus.running],
                ascending=False,
                agent_id=agent_id,  # NOTE: this will override agent_ids if provided
                limit=100,  # Limit to 100 most recent active runs for cancellation
            )
            run_ids = [run.id for run in runs]
        else:
            run_ids = [run_id]

    if not run_ids:
        raise NoActiveRunsToCancelError(agent_id=agent_id)

    results = {}
    for run_id in run_ids:
        try:
            run = await server.run_manager.get_run_by_id(run_id=run_id, actor=actor)
            if run.metadata and run.metadata.get("lettuce"):
                try:
                    lettuce_client = await LettuceClient.create()
                    await lettuce_client.cancel(run_id)
                except Exception as e:
                    # Do not surface cancellation failures as 5XXs.
                    logger.error(f"Failed to cancel Lettuce run {run_id}: {e}")

            await server.run_manager.cancel_run(actor=actor, agent_id=agent_id, run_id=run_id)
        except Exception as e:
            results[run_id] = "failed"
            # Cancellation failures should not raise errors back to the client.
            logger.error(f"Failed to cancel run {run_id}: {str(e)}")
            continue
        results[run_id] = "cancelled"
        logger.info(f"Cancelled run {run_id}")
    return results


@router.post(
    "/{agent_id}/generate",
    operation_id="generate_completion",
    responses={
        200: {"description": "Successful generation"},
        404: {"description": "Agent not found"},
        422: {"description": "Invalid request parameters"},
        502: {"description": "LLM provider error"},
    },
)
async def generate_completion(
    agent_id: AgentId,
    server: SyncServer = Depends(get_letta_server),
    request: GenerateRequest = Body(...),
    headers: HeaderParams = Depends(get_headers),
) -> GenerateResponse:
    """
    Generate a completion directly from the LLM provider using the agent's configuration.

    This endpoint makes a direct request to the LLM provider without any agent processing:
    - No memory or context retrieval
    - No tool calling
    - No message persistence
    - No agent state modification

    Simply provide a prompt, and the endpoint formats it as a user message.
    Optionally include a system_prompt for context/instructions.

    The agent's LLM configuration (model, credentials, settings) is used by default.
    Use override_model to switch to a different model/provider while still using
    the organization's configured providers.

    Example use cases:
    - Quick LLM queries without agent overhead
    - Testing different models with the same prompt
    - Simple chat completions using agent's credentials
    - Comparing model outputs on identical prompts
    """
    # Get actor for permissions
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)

    # Call the manager to generate the completion
    try:
        service_response = await server.agent_generate_completion_manager.generate_completion_with_agent_config_async(
            agent_id=str(agent_id),
            prompt=request.prompt,
            system_prompt=request.system_prompt,
            actor=actor,
            override_model=request.override_model,
            response_schema=request.response_schema,
        )
    except NoResultFound:
        raise HTTPException(status_code=404, detail=f"Agent with ID {agent_id} not found")
    except HandleNotFoundError:
        raise HTTPException(status_code=404, detail=f"Model '{request.override_model}' not found or not accessible")
    except LLMError as e:
        raise HTTPException(status_code=502, detail=f"LLM provider error: {str(e)}")
    except Exception as e:
        logger.error(f"Failed to process LLM response: {str(e)}")
        raise HTTPException(status_code=502, detail=f"Failed to process LLM response: {str(e)}")

    # Convert service response to API response model
    return GenerateResponse(
        content=service_response.content,
        model=service_response.model,
        usage=service_response.usage,
    )


@router.post("/messages/search", response_model=List[MessageSearchResult], operation_id="search_messages")
async def search_messages(
    request: MessageSearchRequest = Body(...),
    server: SyncServer = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Search messages across the entire organization with optional project and template filtering. Returns messages with FTS/vector ranks and total RRF score.

    This is a cloud-only feature.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)

    # get embedding config from the default agent if needed
    # check if any agents exist in the org
    agent_count = await server.agent_manager.size_async(actor=actor)
    if agent_count == 0:
        raise HTTPException(status_code=400, detail="No agents found in organization to derive embedding configuration from")

    results = await server.message_manager.search_messages_org_async(
        actor=actor,
        query_text=request.query,
        search_mode=request.search_mode,
        roles=request.roles,
        agent_id=request.agent_id,
        project_id=request.project_id,
        template_id=request.template_id,
        conversation_id=request.conversation_id,
        limit=request.limit,
        start_date=request.start_date,
        end_date=request.end_date,
    )
    return results


async def _process_message_background(
    run_id: str,
    server: SyncServer,
    actor: User,
    agent_id: str,
    messages: list[MessageCreate],
    use_assistant_message: bool,
    assistant_message_tool_name: str,
    assistant_message_tool_kwarg: str,
    max_steps: int = DEFAULT_MAX_STEPS,
    include_return_message_types: list[MessageType] | None = None,
    override_model: str | None = None,
    include_compaction_messages: bool = False,
) -> None:
    """Background task to process the message and update run status."""
    request_start_timestamp_ns = get_utc_timestamp_ns()
    agent_loop = None
    result = None

    try:
        agent = await server.agent_manager.get_agent_by_id_async(
            agent_id,
            actor,
            include_relationships=["memory", "multi_agent_group", "sources", "tool_exec_environment_variables", "tools", "tags"],
        )

        # Handle model override if specified
        if override_model:
            override_llm_config = await server.get_llm_config_from_handle_async(
                actor=actor,
                handle=override_model,
            )
            # Create a copy of agent state with the overridden llm_config
            agent = agent.model_copy(update={"llm_config": override_llm_config})

        agent_loop = AgentLoop.load(agent_state=agent, actor=actor)
        result = await agent_loop.step(
            messages,
            max_steps=max_steps,
            run_id=run_id,
            use_assistant_message=use_assistant_message,
            request_start_timestamp_ns=request_start_timestamp_ns,
            include_return_message_types=include_return_message_types,
            include_compaction_messages=include_compaction_messages,
        )
        runs_manager = RunManager()
        from letta.schemas.enums import RunStatus
        from letta.schemas.letta_stop_reason import StopReasonType

        # Handle cases where stop_reason might be None (defensive)
        if result.stop_reason and result.stop_reason.stop_reason == "cancelled":
            run_status = RunStatus.cancelled
            stop_reason = result.stop_reason.stop_reason
        elif result.stop_reason:
            run_status = RunStatus.completed
            stop_reason = result.stop_reason.stop_reason
        else:
            # Fallback: no stop_reason set (shouldn't happen but defensive)
            logger.error(f"Run {run_id} completed without stop_reason in result, defaulting to end_turn")
            run_status = RunStatus.completed
            stop_reason = StopReasonType.end_turn

        await runs_manager.update_run_by_id_async(
            run_id=run_id,
            update=RunUpdate(status=run_status, stop_reason=stop_reason),
            actor=actor,
        )

    except PendingApprovalError as e:
        # Update run status to failed with specific error info
        runs_manager = RunManager()
        from letta.schemas.enums import RunStatus
        from letta.schemas.letta_stop_reason import StopReasonType

        await runs_manager.update_run_by_id_async(
            run_id=run_id,
            update=RunUpdate(status=RunStatus.failed, stop_reason=StopReasonType.error, metadata={"error": str(e)}),
            actor=actor,
        )
    except Exception as e:
        # Update run status to failed
        runs_manager = RunManager()
        from letta.schemas.enums import RunStatus
        from letta.schemas.letta_stop_reason import StopReasonType

        await runs_manager.update_run_by_id_async(
            run_id=run_id,
            update=RunUpdate(status=RunStatus.failed, stop_reason=StopReasonType.error, metadata={"error": str(e)}),
            actor=actor,
        )
    finally:
        # Critical: Explicit resource cleanup to prevent accumulation
        if agent_loop and result:
            await _cleanup_background_task_resources(agent_loop, result)


async def _cleanup_background_task_resources(agent_loop: BaseAgentV2 | LettaAgent, result: StreamingResponse | LettaResponse) -> None:
    """
    Explicit cleanup of resources created during background message processing.

    Proper cleanup of:
    - Agent instances and their internal state
    - Message buffers and response accumulation
    - Any database connections or sessions
    - LLM client resources
    """
    import gc

    try:
        if agent_loop is not None:
            if agent_loop.response_messages:
                # Clear response message buffer to prevent accumulation
                agent_loop.response_messages.clear()
            # Clean up agent loop resources
            del agent_loop

        if result is not None:
            del result  # Clear result data to free memory

        # Force garbage collection to clean up references and release memory
        gc.collect()
    except Exception as e:
        # Handle errors for logging but don't fail the background task
        logger.warning(f"Error during background task resource cleanup: {e}")
        pass


@router.post(
    "/{agent_id}/messages/async",
    response_model=PydanticRun,
    operation_id="create_agent_message_async",
)
async def send_message_async(
    agent_id: AgentId,
    server: SyncServer = Depends(get_letta_server),
    request: LettaAsyncRequest = Body(...),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Asynchronously process a user message and return a run object.
    The actual processing happens in the background, and the status can be checked using the run ID.

    This is "asynchronous" in the sense that it's a background run and explicitly must be fetched by the run ID.

    **Note:** Sending multiple concurrent requests to the same agent can lead to undefined behavior.
    Each agent processes messages sequentially, and concurrent requests may interleave in unexpected ways.
    Wait for each request to complete before sending the next one. Use separate agents or conversations for parallel processing.
    """
    MetricRegistry().user_message_counter.add(1, get_ctx_attributes())
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)

    try:
        is_message_input = request.messages[0].type == MessageCreateType.message
    except Exception:
        is_message_input = True
    use_lettuce = headers.experimental_params.message_async and is_message_input

    # Create a new run
    run = PydanticRun(
        callback_url=request.callback_url,
        agent_id=agent_id,
        background=True,  # Async endpoints are always background
        metadata={
            "run_type": "send_message_async",
            "lettuce": use_lettuce,
        },
        request_config=LettaRequestConfig.from_letta_request(request),
    )
    run = await server.run_manager.create_run(
        pydantic_run=run,
        actor=actor,
    )

    if use_lettuce:
        agent_state = await server.agent_manager.get_agent_by_id_async(
            agent_id,
            actor,
            include_relationships=["memory", "multi_agent_group", "sources", "tool_exec_environment_variables", "tools", "tags"],
        )
        # Allow V1 agents only if the message async flag is enabled
        is_v1_message_async_enabled = (
            agent_state.agent_type == AgentType.letta_v1_agent and headers.experimental_params.letta_v1_agent_message_async
        )
        if agent_state.multi_agent_group is None and (agent_state.agent_type != AgentType.letta_v1_agent or is_v1_message_async_enabled):
            lettuce_client = await LettuceClient.create()
            run_id_from_lettuce = await lettuce_client.step(
                agent_state=agent_state,
                actor=actor,
                input_messages=request.messages,
                max_steps=request.max_steps,
                run_id=run.id,
                use_assistant_message=request.use_assistant_message,
                include_return_message_types=request.include_return_message_types,
            )
            if run_id_from_lettuce:
                return run

    # Create asyncio task for background processing (shielded to prevent cancellation)
    task = safe_create_shielded_task(
        _process_message_background(
            run_id=run.id,
            server=server,
            actor=actor,
            agent_id=agent_id,
            messages=request.messages,
            use_assistant_message=request.use_assistant_message,
            assistant_message_tool_name=request.assistant_message_tool_name,
            assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
            max_steps=request.max_steps,
            include_return_message_types=request.include_return_message_types,
            override_model=request.override_model,
            include_compaction_messages=request.include_compaction_messages,
        ),
        label=f"process_message_background_{run.id}",
    )

    def handle_task_completion(t):
        try:
            t.result()
        except asyncio.CancelledError:
            # Note: With shielded tasks, cancellation attempts don't actually stop the task
            logger.info(f"Cancellation attempted on shielded background task for run {run.id}, but task continues running")
            # Don't mark as failed since the shielded task is still running
        except Exception as e:
            logger.error(f"Unhandled exception in background task for run {run.id}: {e}")
            from letta.services.run_manager import RunManager

            error_str = str(e)

            async def update_failed_run():
                runs_manager = RunManager()
                from letta.schemas.enums import RunStatus
                from letta.schemas.letta_stop_reason import StopReasonType

                await runs_manager.update_run_by_id_async(
                    run_id=run.id,
                    update=RunUpdate(status=RunStatus.failed, stop_reason=StopReasonType.error, metadata={"error": error_str}),
                    actor=actor,
                )

            safe_create_task(
                update_failed_run(),
                label=f"update_failed_run_{run.id}",
            )

    task.add_done_callback(handle_task_completion)

    return run


class ResetMessagesRequest(BaseModel):
    """Request body for resetting messages on an agent."""

    add_default_initial_messages: bool = Field(
        False,
        description="If true, adds the default initial messages after resetting.",
    )


@router.patch("/{agent_id}/reset-messages", response_model=Optional[AgentState], operation_id="reset_messages")
async def reset_messages(
    agent_id: AgentId,
    request: ResetMessagesRequest = Body(...),
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """Resets the messages for an agent"""
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    return await server.agent_manager.reset_messages_async(
        agent_id=agent_id,
        actor=actor,
        add_default_initial_messages=request.add_default_initial_messages,
        needs_agent_state=not is_1_0_sdk_version(headers),
        rebuild_system_prompt=True,
    )


@router.get("/{agent_id}/groups", response_model=list[Group], operation_id="list_groups_for_agent")
async def list_groups_for_agent(
    agent_id: AgentId,
    manager_type: str | None = Query(None, description="Manager type to filter groups by"),
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
    before: Optional[str] = Query(
        None, description="Group ID cursor for pagination. Returns groups that come before this group ID in the specified sort order"
    ),
    after: Optional[str] = Query(
        None, description="Group ID cursor for pagination. Returns groups that come after this group ID in the specified sort order"
    ),
    limit: Optional[int] = Query(100, description="Maximum number of groups to return"),
    order: Literal["asc", "desc"] = Query(
        "desc", description="Sort order for groups by creation time. 'asc' for oldest first, 'desc' for newest first"
    ),
    order_by: Literal["created_at"] = Query("created_at", description="Field to sort by"),
):
    """Lists the groups for an agent."""
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    logger.info("in list agents with manager_type: %s", manager_type)
    return await server.agent_manager.list_groups_async(
        agent_id=agent_id,
        manager_type=manager_type,
        actor=actor,
        before=before,
        after=after,
        limit=limit,
        ascending=(order == "asc"),
    )


@router.post(
    "/{agent_id}/messages/preview-raw-payload",
    response_model=Dict[str, Any],
    operation_id="preview_model_request",
)
async def preview_model_request(
    agent_id: AgentId,
    request: Union[LettaRequest, LettaStreamingRequest] = Body(...),
    server: SyncServer = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Inspect the raw LLM request payload without sending it.

    This endpoint processes the message through the agent loop up until
    the LLM request, then returns the raw request payload that would
    be sent to the LLM provider. Useful for debugging and inspection.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    agent = await server.agent_manager.get_agent_by_id_async(
        agent_id, actor, include_relationships=["multi_agent_group", "memory", "sources"]
    )
    agent_loop = AgentLoop.load(agent_state=agent, actor=actor)
    return await agent_loop.build_request(
        input_messages=request.messages,
    )


class CompactionRequest(BaseModel):
    compaction_settings: Optional[CompactionSettings] = Field(
        default=None,
        description="Optional compaction settings to use for this summarization request. If not provided, the agent's default settings will be used.",
    )


class CompactionResponse(BaseModel):
    summary: str
    num_messages_before: int
    num_messages_after: int


@router.post("/{agent_id}/summarize", response_model=CompactionResponse, operation_id="summarize_messages")
async def summarize_messages(
    agent_id: AgentId,
    request: Optional[CompactionRequest] = Body(default=None),
    server: SyncServer = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Summarize an agent's conversation history.
    """

    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group", "tools"])

    agent_loop = LettaAgentV3(agent_state=agent, actor=actor)
    in_context_messages = await server.message_manager.get_messages_by_ids_async(message_ids=agent.message_ids, actor=actor)

    # Early return if there's nothing to compact (only system message, or system + summary)
    non_system_summary_messages = [m for m in in_context_messages if m.role not in (MessageRole.system, MessageRole.summary)]
    if not non_system_summary_messages:
        existing_summary = None
        for m in in_context_messages:
            if m.role == MessageRole.summary and m.content:
                try:
                    summary_json = json.loads(m.content[0].text)
                    existing_summary = summary_json.get("message")
                except (json.JSONDecodeError, IndexError, AttributeError):
                    existing_summary = m.content[0].text if m.content else None
                break
        return CompactionResponse(
            summary=existing_summary,
            num_messages_before=len(in_context_messages),
            num_messages_after=len(in_context_messages),
        )

    # Merge request compaction_settings with agent's settings (request overrides agent)
    if agent.compaction_settings and request and request.compaction_settings:
        # Start with agent's settings, override with new values from request
        # Use model_fields_set to get the fields that were changed in the request (want to ignore the defaults that get set automatically)
        compaction_settings = agent.compaction_settings.copy()  # do not mutate original agent compaction settings
        changed_fields = request.compaction_settings.model_fields_set
        for field in changed_fields:
            setattr(compaction_settings, field, getattr(request.compaction_settings, field))

        # If mode changed from agent's original settings and prompt not explicitly set in request, then use the default prompt for the new mode
        # Ex: previously was sliding_window, now is all, so we need to use the default prompt for all mode
        if (
            "mode" in changed_fields
            and "prompt" not in changed_fields
            and agent.compaction_settings.mode != request.compaction_settings.mode
        ):
            from letta.services.summarizer.summarizer_config import get_default_prompt_for_mode

            compaction_settings.prompt = get_default_prompt_for_mode(compaction_settings.mode)
    else:
        compaction_settings = (request and request.compaction_settings) or agent.compaction_settings
    num_messages_before = len(in_context_messages)
    summary_message, messages, summary = await agent_loop.compact(
        messages=in_context_messages,
        compaction_settings=compaction_settings,
        use_summary_role=True,
    )
    num_messages_after = len(messages)

    # update the agent state
    logger.info(f"Summarized {num_messages_before} messages to {num_messages_after}")
    if num_messages_before <= num_messages_after:
        logger.warning(f"Summarization failed to reduce the number of messages. {num_messages_before} messages -> {num_messages_after}.")
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail="Summarization failed to reduce the number of messages. You may not have enough messages to compact or need to use a different CompactionSettings (e.g. using `all` mode).",
        )
    await agent_loop._checkpoint_messages(run_id=None, step_id=None, new_messages=[summary_message], in_context_messages=messages)
    return CompactionResponse(
        summary=summary,
        num_messages_before=num_messages_before,
        num_messages_after=num_messages_after,
    )


class CaptureMessagesRequest(BaseModel):
    provider: str
    model: str
    request_messages: list[dict[str, Any]]
    response_dict: dict[str, Any]


@router.post("/{agent_id}/messages/capture", response_model=str, operation_id="capture_messages", include_in_schema=False)
async def capture_messages(
    agent_id: AgentId,
    request: CaptureMessagesRequest = Body(...),
    server: "SyncServer" = Depends(get_letta_server),
    headers: HeaderParams = Depends(get_headers),
):
    """
    Capture a list of messages for an agent.
    """
    actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
    agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group"])

    messages_to_persist = []

    # Input user messages
    for message in request.request_messages:
        if message["role"] == "user":
            messages_to_persist.append(
                Message(
                    role=MessageRole.user,
                    content=[TextContent(text=message["content"])],
                    agent_id=agent_id,
                    tool_calls=None,
                    tool_call_id=None,
                    created_at=get_utc_time(),
                )
            )

    # Assistant response
    messages_to_persist.append(
        Message(
            role=MessageRole.assistant,
            content=[TextContent(text=request.response_dict["content"])],
            agent_id=agent_id,
            model=request.model,
            tool_calls=None,
            tool_call_id=None,
            created_at=get_utc_time(),
        )
    )

    response_messages = await server.message_manager.create_many_messages_async(messages_to_persist, actor=actor)

    run_ids = []
    sleeptime_group = agent.multi_agent_group if agent.multi_agent_group and agent.multi_agent_group.manager_type == "sleeptime" else None
    if sleeptime_group:
        sleeptime_agent_loop = SleeptimeMultiAgentV4(agent_state=agent, actor=actor, group=sleeptime_group)
        sleeptime_agent_loop.response_messages = response_messages
        run_ids = await sleeptime_agent_loop.run_sleeptime_agents()

    return JSONResponse({"success": True, "messages_created": len(response_messages), "run_ids": run_ids})