Files
letta-server/letta/server/rest_api/routers/v1/runs.py
Kian Jones 25d54dd896 chore: enable F821, F401, W293 (#9503)
* auto fixes

* auto fix pt2 and transitive deps and undefined var checking locals()

* manual fixes (ignored or letta-code fixed)

* fix circular import
2026-02-24 10:55:08 -08:00

412 lines
16 KiB
Python

from datetime import timedelta
from typing import Annotated, Any, List, Literal, Optional
from fastapi import APIRouter, Body, Depends, HTTPException, Query
from pydantic import Field
from letta.data_sources.redis_client import NoopAsyncRedisClient, get_redis_client
from letta.errors import LettaExpiredError, LettaInvalidArgumentError
from letta.helpers.datetime_helpers import get_utc_time
from letta.schemas.enums import RunStatus
from letta.schemas.letta_message import LettaMessageUnion
from letta.schemas.letta_request import RetrieveStreamRequest
from letta.schemas.letta_stop_reason import StopReasonType
from letta.schemas.openai.chat_completion_response import UsageStatistics
from letta.schemas.run import Run
from letta.schemas.run_metrics import RunMetrics
from letta.schemas.step import Step
from letta.server.rest_api.dependencies import HeaderParams, get_headers, get_letta_server
from letta.server.rest_api.redis_stream_manager import redis_sse_stream_generator
from letta.server.rest_api.streaming_response import (
StreamingResponseWithStatusCode,
add_keepalive_to_stream,
)
from letta.server.server import SyncServer
from letta.services.clickhouse_otel_traces import ClickhouseOtelTracesReader
from letta.services.run_manager import RunManager
from letta.settings import settings
router = APIRouter(prefix="/runs", tags=["runs"])
def convert_statuses_to_enum(statuses: Optional[List[str]]) -> Optional[List[RunStatus]]:
"""Convert a list of status strings to RunStatus enum values.
Args:
statuses: List of status strings or None
Returns:
List of RunStatus enum values or None if input is None
"""
if statuses is None:
return None
return [RunStatus(status) for status in statuses]
@router.get("/", response_model=List[Run], operation_id="list_runs")
async def list_runs(
server: "SyncServer" = Depends(get_letta_server),
agent_id: Optional[str] = Query(None, description="The unique identifier of the agent associated with the run."),
agent_ids: Optional[List[str]] = Query(
None,
description="The unique identifiers of the agents associated with the run. Deprecated in favor of agent_id field.",
deprecated=True,
),
statuses: Optional[List[str]] = Query(None, description="Filter runs by status. Can specify multiple statuses."),
background: Optional[bool] = Query(None, description="If True, filters for runs that were created in background mode."),
stop_reason: Optional[StopReasonType] = Query(None, description="Filter runs by stop reason."),
conversation_id: Optional[str] = Query(None, description="Filter runs by conversation ID."),
before: Optional[str] = Query(
None, description="Run ID cursor for pagination. Returns runs that come before this run ID in the specified sort order"
),
after: Optional[str] = Query(
None, description="Run ID cursor for pagination. Returns runs that come after this run ID in the specified sort order"
),
limit: Optional[int] = Query(100, description="Maximum number of runs to return", ge=1, le=1000),
order: Literal["asc", "desc"] = Query(
"desc", description="Sort order for runs by creation time. 'asc' for oldest first, 'desc' for newest first"
),
order_by: Literal["created_at"] = Query("created_at", description="Field to sort by"),
active: bool = Query(False, description="Filter for active runs."),
ascending: bool = Query(
False,
description="Whether to sort agents oldest to newest (True) or newest to oldest (False, default). Deprecated in favor of order field.",
deprecated=True,
),
headers: HeaderParams = Depends(get_headers),
):
"""
List all runs.
"""
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
runs_manager = RunManager()
# Handle backwards compatibility: if statuses not provided but active=True, filter by active statuses
if statuses is None and active:
statuses = [RunStatus.created, RunStatus.running]
if agent_id:
# NOTE: we are deprecating agent_ids so this will the primary path soon
agent_ids = [agent_id]
# Handle backward compatibility: if ascending is explicitly set, use it; otherwise use order
if ascending is not False:
# ascending was explicitly set to True
sort_ascending = ascending
else:
# Use the new order parameter
sort_ascending = order == "asc"
# Convert string statuses to RunStatus enum
parsed_statuses = convert_statuses_to_enum(statuses)
runs = await runs_manager.list_runs(
actor=actor,
agent_ids=agent_ids,
statuses=parsed_statuses,
limit=limit,
before=before,
after=after,
ascending=sort_ascending,
stop_reason=stop_reason,
background=background,
conversation_id=conversation_id,
)
return runs
@router.get("/active", response_model=List[Run], operation_id="list_active_runs", deprecated=True)
async def list_active_runs(
server: "SyncServer" = Depends(get_letta_server),
agent_id: Optional[str] = Query(None, description="The unique identifier of the agent associated with the run."),
background: Optional[bool] = Query(None, description="If True, filters for runs that were created in background mode."),
headers: HeaderParams = Depends(get_headers),
):
"""
List all active runs.
"""
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
runs_manager = RunManager()
if agent_id:
agent_ids = [agent_id]
else:
agent_ids = None
active_runs = await runs_manager.list_runs(
actor=actor, statuses=[RunStatus.created, RunStatus.running], agent_ids=agent_ids, background=background, limit=100
)
return active_runs
@router.get("/{run_id}", response_model=Run, operation_id="retrieve_run")
async def retrieve_run(
run_id: str,
headers: HeaderParams = Depends(get_headers),
server: "SyncServer" = Depends(get_letta_server),
):
"""
Get the status of a run.
"""
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
runs_manager = RunManager()
return await runs_manager.get_run_with_status(run_id=run_id, actor=actor)
RunMessagesResponse = Annotated[
List[LettaMessageUnion], Field(json_schema_extra={"type": "array", "items": {"$ref": "#/components/schemas/LettaMessageUnion"}})
]
@router.get(
"/{run_id}/messages",
response_model=RunMessagesResponse,
operation_id="list_messages_for_run",
)
async def list_messages_for_run(
run_id: str,
server: "SyncServer" = Depends(get_letta_server),
headers: HeaderParams = Depends(get_headers),
before: Optional[str] = Query(
None, description="Message ID cursor for pagination. Returns messages that come before this message ID in the specified sort order"
),
after: Optional[str] = Query(
None, description="Message ID cursor for pagination. Returns messages that come after this message ID in the specified sort order"
),
limit: Optional[int] = Query(100, description="Maximum number of messages to return"),
order: Literal["asc", "desc"] = Query(
"asc", description="Sort order for messages by creation time. 'asc' for oldest first, 'desc' for newest first"
),
order_by: Literal["created_at"] = Query("created_at", description="Field to sort by"),
):
"""Get response messages associated with a run."""
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
return await server.run_manager.get_run_messages(run_id=run_id, actor=actor, before=before, after=after, limit=limit, order=order)
@router.get("/{run_id}/usage", response_model=UsageStatistics, operation_id="retrieve_usage_for_run")
async def retrieve_usage_for_run(
run_id: str,
headers: HeaderParams = Depends(get_headers),
server: "SyncServer" = Depends(get_letta_server),
):
"""
Get usage statistics for a run.
"""
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
runs_manager = RunManager()
return await runs_manager.get_run_usage(run_id=run_id, actor=actor)
@router.get("/{run_id}/metrics", response_model=RunMetrics, operation_id="retrieve_metrics_for_run")
async def retrieve_metrics_for_run(
run_id: str,
headers: HeaderParams = Depends(get_headers),
server: "SyncServer" = Depends(get_letta_server),
):
"""
Get run metrics by run ID.
"""
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
runs_manager = RunManager()
return await runs_manager.get_run_metrics_async(run_id=run_id, actor=actor)
@router.get("/{run_id}/metrics", response_model=RunMetrics, operation_id="retrieve_metrics_for_run")
async def retrieve_metrics_for_run(
run_id: str,
headers: HeaderParams = Depends(get_headers),
server: "SyncServer" = Depends(get_letta_server),
):
"""
Get run metrics by run ID.
"""
try:
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
runs_manager = RunManager()
return await runs_manager.get_run_metrics_async(run_id=run_id, actor=actor)
except NoResultFound:
raise HTTPException(status_code=404, detail="Run metrics not found")
@router.get(
"/{run_id}/steps",
response_model=List[Step],
operation_id="list_steps_for_run",
)
async def list_steps_for_run(
run_id: str,
server: "SyncServer" = Depends(get_letta_server),
headers: HeaderParams = Depends(get_headers),
before: Optional[str] = Query(None, description="Cursor for pagination"),
after: Optional[str] = Query(None, description="Cursor for pagination"),
limit: Optional[int] = Query(100, description="Maximum number of messages to return"),
order: Literal["asc", "desc"] = Query(
"desc", description="Sort order for steps by creation time. 'asc' for oldest first, 'desc' for newest first"
),
order_by: Literal["created_at"] = Query("created_at", description="Field to sort by"),
):
"""
Get steps associated with a run with filtering options.
"""
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
runs_manager = RunManager()
return await runs_manager.get_run_steps(
run_id=run_id,
actor=actor,
limit=limit,
before=before,
after=after,
ascending=(order == "asc"),
)
@router.get(
"/{run_id}/trace",
response_model=List[dict[str, Any]],
operation_id="retrieve_trace_for_run",
)
async def retrieve_trace_for_run(
run_id: str,
server: "SyncServer" = Depends(get_letta_server),
headers: HeaderParams = Depends(get_headers),
limit: int = Query(1000, description="Maximum number of spans to return", ge=1, le=5000),
):
"""
Retrieve OTEL trace spans for a run.
Returns a filtered set of spans relevant for observability:
- agent_step: Individual agent reasoning steps
- tool executions: Tool call spans
- Root span: The top-level request span
- time_to_first_token: TTFT measurement span
Requires ClickHouse to be configured for trace storage.
"""
# OTEL traces are only available when ClickHouse is configured
if not settings.clickhouse_endpoint:
raise HTTPException(
status_code=501,
detail="OTEL traces require ClickHouse. Set LETTA_CLICKHOUSE_ENDPOINT and configure ClickHouse connection.",
)
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
runs_manager = RunManager()
# We assume trace_id is stable across all steps in a run, but individual step rows may
# lack trace_id (e.g. older data). Grab a few and pick the first populated value.
steps = await runs_manager.get_run_steps(run_id=run_id, actor=actor, limit=25)
trace_id = next((s.trace_id for s in steps if s.trace_id), None)
if not trace_id:
return []
# Only return spans relevant to the trace viewer UI (agent_step, tool executions, root span, TTFT)
return await ClickhouseOtelTracesReader().get_traces_by_trace_id_async(trace_id=trace_id, limit=limit, filter_ui_spans=True)
@router.delete("/{run_id}", response_model=None, operation_id="delete_run")
async def delete_run(
run_id: str,
headers: HeaderParams = Depends(get_headers),
server: "SyncServer" = Depends(get_letta_server),
):
"""
Delete a run by its run_id.
"""
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
runs_manager = RunManager()
return await runs_manager.delete_run_by_id(run_id=run_id, actor=actor)
@router.post(
"/{run_id}/stream",
response_model=None,
operation_id="retrieve_stream_for_run",
responses={
200: {
"description": "Successful response",
"content": {
# Align streaming schema with agents.create_stream so SDKs accept approval messages
"text/event-stream": {
"description": "Server-Sent Events stream",
"schema": {
"oneOf": [
{"$ref": "#/components/schemas/SystemMessage"},
{"$ref": "#/components/schemas/UserMessage"},
{"$ref": "#/components/schemas/ReasoningMessage"},
{"$ref": "#/components/schemas/HiddenReasoningMessage"},
{"$ref": "#/components/schemas/ToolCallMessage"},
{"$ref": "#/components/schemas/ToolReturnMessage"},
{"$ref": "#/components/schemas/AssistantMessage"},
{"$ref": "#/components/schemas/ApprovalRequestMessage"},
{"$ref": "#/components/schemas/ApprovalResponseMessage"},
{"$ref": "#/components/schemas/LettaPing"},
{"$ref": "#/components/schemas/LettaErrorMessage"},
{"$ref": "#/components/schemas/LettaStopReason"},
{"$ref": "#/components/schemas/LettaUsageStatistics"},
]
},
},
},
}
},
)
async def retrieve_stream_for_run(
run_id: str,
request: RetrieveStreamRequest = Body(None),
headers: HeaderParams = Depends(get_headers),
server: "SyncServer" = Depends(get_letta_server),
):
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
runs_manager = RunManager()
run = await runs_manager.get_run_by_id(run_id=run_id, actor=actor)
if not run.background:
raise LettaInvalidArgumentError("Run was not created in background mode, so it cannot be retrieved.")
if run.created_at < get_utc_time() - timedelta(hours=3):
raise LettaExpiredError("Run was created more than 3 hours ago, and is now expired.")
redis_client = await get_redis_client()
if isinstance(redis_client, NoopAsyncRedisClient):
raise HTTPException(
status_code=503,
detail=(
"Background streaming requires Redis to be running. "
"Please ensure Redis is properly configured. "
f"LETTA_REDIS_HOST: {settings.redis_host}, LETTA_REDIS_PORT: {settings.redis_port}"
),
)
stream = redis_sse_stream_generator(
redis_client=redis_client,
run_id=run_id,
starting_after=request.starting_after,
poll_interval=request.poll_interval,
batch_size=request.batch_size,
)
if settings.enable_cancellation_aware_streaming:
from letta.server.rest_api.streaming_response import cancellation_aware_stream_wrapper, get_cancellation_event_for_run
stream = cancellation_aware_stream_wrapper(
stream_generator=stream,
run_manager=server.run_manager,
run_id=run_id,
actor=actor,
cancellation_event=get_cancellation_event_for_run(run_id),
)
if request.include_pings and settings.enable_keepalive:
stream = add_keepalive_to_stream(stream, keepalive_interval=settings.keepalive_interval, run_id=run_id)
return StreamingResponseWithStatusCode(
stream,
media_type="text/event-stream",
)