Files
letta-server/tests/managers/test_run_manager.py
Kian Jones 25d54dd896 chore: enable F821, F401, W293 (#9503)
* auto fixes

* auto fix pt2 and transitive deps and undefined var checking locals()

* manual fixes (ignored or letta-code fixed)

* fix circular import
2026-02-24 10:55:08 -08:00

2087 lines
80 KiB
Python

import uuid
from datetime import datetime, timedelta, timezone
from unittest.mock import AsyncMock, patch
import pytest
# Import shared fixtures and constants from conftest
from openai.types.chat.chat_completion_message_tool_call import ChatCompletionMessageToolCall as OpenAIToolCall, Function as OpenAIFunction
from letta.errors import LettaInvalidArgumentError
from letta.orm.errors import NoResultFound
from letta.schemas.enums import (
MessageRole,
RunStatus,
)
from letta.schemas.job import LettaRequestConfig
from letta.schemas.letta_message_content import TextContent
from letta.schemas.letta_stop_reason import StopReasonType
from letta.schemas.message import Message, Message as PydanticMessage, ToolReturn
from letta.schemas.openai.chat_completion_response import UsageStatistics
from letta.schemas.run import Run as PydanticRun, RunUpdate
from letta.schemas.user import User as PydanticUser
from letta.server.server import SyncServer
from letta.services.step_manager import FeedbackType
# ======================================================================================================================
# RunManager Tests
# ======================================================================================================================
@pytest.mark.asyncio
async def test_create_run(server: SyncServer, sarah_agent, default_user):
"""Test creating a run."""
run_data = PydanticRun(
metadata={"type": "test"},
agent_id=sarah_agent.id,
)
created_run = await server.run_manager.create_run(pydantic_run=run_data, actor=default_user)
# Assertions to ensure the created run matches the expected values
assert created_run.agent_id == sarah_agent.id
assert created_run.created_at
assert created_run.status == RunStatus.created
assert created_run.metadata == {"type": "test"}
@pytest.mark.asyncio
async def test_get_run_by_id(server: SyncServer, sarah_agent, default_user):
"""Test fetching a run by ID."""
# Create a run
run_data = PydanticRun(
metadata={"type": "test"},
agent_id=sarah_agent.id,
)
created_run = await server.run_manager.create_run(pydantic_run=run_data, actor=default_user)
# Fetch the run by ID
fetched_run = await server.run_manager.get_run_by_id(created_run.id, actor=default_user)
# Assertions to ensure the fetched run matches the created run
assert fetched_run.id == created_run.id
assert fetched_run.status == RunStatus.created
assert fetched_run.metadata == {"type": "test"}
@pytest.mark.asyncio
async def test_list_runs(server: SyncServer, sarah_agent, default_user):
"""Test listing runs."""
# Create multiple runs
for i in range(3):
run_data = PydanticRun(
metadata={"type": f"test-{i}"},
agent_id=sarah_agent.id,
)
await server.run_manager.create_run(pydantic_run=run_data, actor=default_user)
# List runs
runs = await server.run_manager.list_runs(actor=default_user)
# Assertions to check that the created runs are listed
assert len(runs) == 3
assert all(run.agent_id == sarah_agent.id for run in runs)
assert all(run.metadata["type"].startswith("test") for run in runs)
@pytest.mark.asyncio
async def test_list_runs_with_metadata(server: SyncServer, sarah_agent, default_user):
for i in range(3):
run_data = PydanticRun(agent_id=sarah_agent.id)
created_run = await server.run_manager.create_run(pydantic_run=run_data, actor=default_user)
if i == 1:
await server.run_manager.update_run_by_id_async(created_run.id, RunUpdate(status=RunStatus.completed), actor=default_user)
runs = await server.run_manager.list_runs(actor=default_user, statuses=[RunStatus.completed])
assert len(runs) == 1
assert runs[0].status == RunStatus.completed
runs = await server.run_manager.list_runs(actor=default_user)
assert len(runs) == 3
@pytest.mark.asyncio
async def test_update_run_by_id(server: SyncServer, sarah_agent, default_user):
"""Test updating a run by its ID."""
# Create a run
run_data = PydanticRun(
metadata={"type": "test"},
agent_id=sarah_agent.id,
)
created_run = await server.run_manager.create_run(pydantic_run=run_data, actor=default_user)
# Update the run
updated_run = await server.run_manager.update_run_by_id_async(created_run.id, RunUpdate(status=RunStatus.completed), actor=default_user)
# Assertions to ensure the run was updated
assert updated_run.status == RunStatus.completed
@pytest.mark.asyncio
async def test_update_run_metadata_persistence(server: SyncServer, sarah_agent, default_user):
"""Test that metadata is properly persisted when updating a run."""
# Create a run with initial metadata
run_data = PydanticRun(
metadata={"type": "test", "initial": "value"},
agent_id=sarah_agent.id,
)
created_run = await server.run_manager.create_run(pydantic_run=run_data, actor=default_user)
# Verify initial metadata
assert created_run.metadata == {"type": "test", "initial": "value"}
# Update the run with error metadata (simulating what happens in streaming service)
error_data = {
"error": {"type": "llm_timeout", "message": "The LLM request timed out. Please try again.", "detail": "Timeout after 30s"}
}
updated_run = await server.run_manager.update_run_by_id_async(
created_run.id,
RunUpdate(status=RunStatus.failed, stop_reason=StopReasonType.llm_api_error, metadata=error_data),
actor=default_user,
)
# Verify metadata was properly updated (metadata should merge, not overwrite)
assert updated_run.status == RunStatus.failed
assert updated_run.stop_reason == StopReasonType.llm_api_error
assert updated_run.metadata["type"] == "test"
assert updated_run.metadata["initial"] == "value"
assert "error" in updated_run.metadata
assert updated_run.metadata["error"]["type"] == "llm_timeout"
# Fetch the run again to ensure it's persisted in DB
fetched_run = await server.run_manager.get_run_by_id(created_run.id, actor=default_user)
assert fetched_run.metadata["type"] == "test"
assert fetched_run.metadata["initial"] == "value"
assert "error" in fetched_run.metadata
assert fetched_run.metadata["error"]["type"] == "llm_timeout"
@pytest.mark.asyncio
async def test_update_run_updates_agent_last_stop_reason(server: SyncServer, sarah_agent, default_user):
"""Test that completing a run updates the agent's last_stop_reason."""
# Verify agent starts with no last_stop_reason
agent = await server.agent_manager.get_agent_by_id_async(agent_id=sarah_agent.id, actor=default_user)
initial_stop_reason = agent.last_stop_reason
# Create a run
run_data = PydanticRun(agent_id=sarah_agent.id)
created_run = await server.run_manager.create_run(pydantic_run=run_data, actor=default_user)
# Complete the run with end_turn stop reason
await server.run_manager.update_run_by_id_async(
created_run.id, RunUpdate(status=RunStatus.completed, stop_reason=StopReasonType.end_turn), actor=default_user
)
# Verify agent's last_stop_reason was updated to end_turn
updated_agent = await server.agent_manager.get_agent_by_id_async(agent_id=sarah_agent.id, actor=default_user)
assert updated_agent.last_stop_reason == StopReasonType.end_turn
# Create another run and complete with different stop reason
run_data2 = PydanticRun(agent_id=sarah_agent.id)
created_run2 = await server.run_manager.create_run(pydantic_run=run_data2, actor=default_user)
# Complete with error stop reason
await server.run_manager.update_run_by_id_async(
created_run2.id, RunUpdate(status=RunStatus.failed, stop_reason=StopReasonType.error), actor=default_user
)
# Verify agent's last_stop_reason was updated to error
final_agent = await server.agent_manager.get_agent_by_id_async(agent_id=sarah_agent.id, actor=default_user)
assert final_agent.last_stop_reason == StopReasonType.error
@pytest.mark.asyncio
async def test_delete_run_by_id(server: SyncServer, sarah_agent, default_user):
"""Test deleting a run by its ID."""
# Create a run
run_data = PydanticRun(
metadata={"type": "test"},
agent_id=sarah_agent.id,
)
created_run = await server.run_manager.create_run(pydantic_run=run_data, actor=default_user)
print("created_run to delete", created_run.id)
# Delete the run
await server.run_manager.delete_run(created_run.id, actor=default_user)
# Fetch the run by ID
with pytest.raises(NoResultFound):
await server.run_manager.get_run_by_id(created_run.id, actor=default_user)
# List runs to ensure the run was deleted
runs = await server.run_manager.list_runs(actor=default_user)
assert len(runs) == 0
@pytest.mark.asyncio
async def test_update_run_auto_complete(server: SyncServer, default_user, sarah_agent):
"""Test that updating a run's status to 'completed' automatically sets completed_at."""
# Create a run
run_data = PydanticRun(
metadata={"type": "test"},
agent_id=sarah_agent.id,
)
created_run = await server.run_manager.create_run(pydantic_run=run_data, actor=default_user)
assert created_run.completed_at is None
# Update the run to completed status
updated_run = await server.run_manager.update_run_by_id_async(created_run.id, RunUpdate(status=RunStatus.completed), actor=default_user)
# Check that completed_at was automatically set
assert updated_run.completed_at is not None
assert isinstance(updated_run.completed_at, datetime)
@pytest.mark.asyncio
async def test_get_run_not_found(server: SyncServer, default_user):
"""Test fetching a non-existent run."""
non_existent_run_id = "nonexistent-id"
with pytest.raises(LettaInvalidArgumentError):
await server.run_manager.get_run_by_id(non_existent_run_id, actor=default_user)
@pytest.mark.asyncio
async def test_delete_run_not_found(server: SyncServer, default_user):
"""Test deleting a non-existent run."""
non_existent_run_id = "nonexistent-id"
with pytest.raises(LettaInvalidArgumentError):
await server.run_manager.delete_run(non_existent_run_id, actor=default_user)
@pytest.mark.asyncio
async def test_list_runs_pagination(server: SyncServer, sarah_agent, default_user):
"""Test listing runs with pagination."""
# Create multiple runs
for i in range(10):
run_data = PydanticRun(agent_id=sarah_agent.id)
await server.run_manager.create_run(pydantic_run=run_data, actor=default_user)
# List runs with a limit
runs = await server.run_manager.list_runs(actor=default_user, limit=5)
assert len(runs) == 5
assert all(run.agent_id == sarah_agent.id for run in runs)
# Test cursor-based pagination
first_page = await server.run_manager.list_runs(actor=default_user, limit=3, ascending=True)
assert len(first_page) == 3
assert first_page[0].created_at <= first_page[1].created_at <= first_page[2].created_at
last_page = await server.run_manager.list_runs(actor=default_user, limit=3, ascending=False)
assert len(last_page) == 3
assert last_page[0].created_at >= last_page[1].created_at >= last_page[2].created_at
first_page_ids = set(run.id for run in first_page)
last_page_ids = set(run.id for run in last_page)
assert first_page_ids.isdisjoint(last_page_ids)
# Test pagination with "before" cursor in descending order (UI's default behavior)
# This is the critical scenario that was broken - clicking "Next" in the UI
second_page_desc = await server.run_manager.list_runs(
actor=default_user,
before=last_page[-1].id, # Use last (oldest) item from first page as cursor
limit=3,
ascending=False,
)
assert len(second_page_desc) == 3
# CRITICAL: Verify no overlap with first page (this was the bug - there was overlap before)
second_page_desc_ids = set(run.id for run in second_page_desc)
assert second_page_desc_ids.isdisjoint(last_page_ids), "Second page should not overlap with first page"
# Verify descending order is maintained
assert second_page_desc[0].created_at >= second_page_desc[1].created_at >= second_page_desc[2].created_at
# Verify second page contains older items than first page
assert second_page_desc[0].created_at < last_page[-1].created_at
@pytest.mark.asyncio
async def test_list_runs_by_status(server: SyncServer, default_user, sarah_agent):
"""Test listing runs filtered by status."""
# Create multiple runs with different statuses
run_data_created = PydanticRun(
status=RunStatus.created,
metadata={"type": "test-created"},
agent_id=sarah_agent.id,
)
run_data_in_progress = PydanticRun(
status=RunStatus.running,
metadata={"type": "test-running"},
agent_id=sarah_agent.id,
)
run_data_completed = PydanticRun(
status=RunStatus.completed,
metadata={"type": "test-completed"},
agent_id=sarah_agent.id,
)
await server.run_manager.create_run(pydantic_run=run_data_created, actor=default_user)
await server.run_manager.create_run(pydantic_run=run_data_in_progress, actor=default_user)
await server.run_manager.create_run(pydantic_run=run_data_completed, actor=default_user)
# List runs filtered by status
created_runs = await server.run_manager.list_runs(actor=default_user, statuses=[RunStatus.created])
in_progress_runs = await server.run_manager.list_runs(actor=default_user, statuses=[RunStatus.running])
completed_runs = await server.run_manager.list_runs(actor=default_user, statuses=[RunStatus.completed])
# Assertions
assert len(created_runs) == 1
assert created_runs[0].metadata["type"] == run_data_created.metadata["type"]
assert len(in_progress_runs) == 1
assert in_progress_runs[0].metadata["type"] == run_data_in_progress.metadata["type"]
assert len(completed_runs) == 1
assert completed_runs[0].metadata["type"] == run_data_completed.metadata["type"]
@pytest.mark.asyncio
async def test_list_runs_by_stop_reason(server: SyncServer, sarah_agent, default_user):
"""Test listing runs by stop reason."""
run_pydantic = PydanticRun(
agent_id=sarah_agent.id,
stop_reason=StopReasonType.requires_approval,
background=True,
)
run = await server.run_manager.create_run(pydantic_run=run_pydantic, actor=default_user)
assert run.stop_reason == StopReasonType.requires_approval
assert run.background == True
assert run.agent_id == sarah_agent.id
# list runs by stop reason
runs = await server.run_manager.list_runs(actor=default_user, stop_reason=StopReasonType.requires_approval)
assert len(runs) == 1
assert runs[0].id == run.id
# list runs by background
runs = await server.run_manager.list_runs(actor=default_user, background=True)
assert len(runs) == 1
assert runs[0].id == run.id
# list runs by agent_id
runs = await server.run_manager.list_runs(actor=default_user, agent_ids=[sarah_agent.id])
assert len(runs) == 1
assert runs[0].id == run.id
@pytest.mark.asyncio
async def test_list_runs_by_tools_used(server: SyncServer, sarah_agent, default_user):
"""Test listing runs filtered by tools used."""
# Seed tools first
from letta.services.tool_manager import ToolManager
tool_manager = ToolManager()
await tool_manager.upsert_base_tools_async(default_user)
web_search_tool_id = await tool_manager.get_tool_id_by_name_async("web_search", default_user)
run_code_tool_id = await tool_manager.get_tool_id_by_name_async("run_code", default_user)
if not web_search_tool_id or not run_code_tool_id:
pytest.skip("Required tools (web_search, run_code) are not available in the database")
# Create run with web_search tool
run_web = await server.run_manager.create_run(
pydantic_run=PydanticRun(agent_id=sarah_agent.id),
actor=default_user,
)
await server.message_manager.create_many_messages_async(
[
PydanticMessage(
agent_id=sarah_agent.id,
role=MessageRole.assistant,
content=[TextContent(text="Using web search")],
tool_calls=[
OpenAIToolCall(
id="call_web",
type="function",
function=OpenAIFunction(name="web_search", arguments="{}"),
)
],
run_id=run_web.id,
)
],
actor=default_user,
)
# Create run with run_code tool
run_code = await server.run_manager.create_run(
pydantic_run=PydanticRun(agent_id=sarah_agent.id),
actor=default_user,
)
await server.message_manager.create_many_messages_async(
[
PydanticMessage(
agent_id=sarah_agent.id,
role=MessageRole.assistant,
content=[TextContent(text="Using run code")],
tool_calls=[
OpenAIToolCall(
id="call_code",
type="function",
function=OpenAIFunction(name="run_code", arguments="{}"),
)
],
run_id=run_code.id,
)
],
actor=default_user,
)
# Complete runs to populate tools_used
await server.run_manager.update_run_by_id_async(
run_web.id, RunUpdate(status=RunStatus.completed, stop_reason=StopReasonType.end_turn), actor=default_user
)
await server.run_manager.update_run_by_id_async(
run_code.id, RunUpdate(status=RunStatus.completed, stop_reason=StopReasonType.end_turn), actor=default_user
)
# Test filtering by single tool
runs_web = await server.run_manager.list_runs(
actor=default_user,
agent_id=sarah_agent.id,
tools_used=[web_search_tool_id],
)
assert len(runs_web) == 1
assert runs_web[0].id == run_web.id
# Test filtering by multiple tools
runs_multi = await server.run_manager.list_runs(
actor=default_user,
agent_id=sarah_agent.id,
tools_used=[web_search_tool_id, run_code_tool_id],
)
assert len(runs_multi) == 2
assert {r.id for r in runs_multi} == {run_web.id, run_code.id}
@pytest.mark.asyncio
async def test_list_runs_by_step_count(server: SyncServer, sarah_agent, default_user):
"""Test listing runs filtered by step count."""
from letta.schemas.enums import ComparisonOperator
# Create runs with different numbers of steps
runs_data = []
# Run with 0 steps
run_0 = await server.run_manager.create_run(
pydantic_run=PydanticRun(
agent_id=sarah_agent.id,
metadata={"steps": 0},
),
actor=default_user,
)
runs_data.append((run_0, 0))
# Run with 2 steps
run_2 = await server.run_manager.create_run(
pydantic_run=PydanticRun(
agent_id=sarah_agent.id,
metadata={"steps": 2},
),
actor=default_user,
)
for i in range(2):
await server.step_manager.log_step_async(
agent_id=sarah_agent.id,
provider_name="openai",
provider_category="base",
model="gpt-4o-mini",
model_endpoint="https://api.openai.com/v1",
context_window_limit=8192,
usage=UsageStatistics(
completion_tokens=100,
prompt_tokens=50,
total_tokens=150,
),
run_id=run_2.id,
actor=default_user,
project_id=sarah_agent.project_id,
)
runs_data.append((run_2, 2))
# Run with 5 steps
run_5 = await server.run_manager.create_run(
pydantic_run=PydanticRun(
agent_id=sarah_agent.id,
metadata={"steps": 5},
),
actor=default_user,
)
for i in range(5):
await server.step_manager.log_step_async(
agent_id=sarah_agent.id,
provider_name="openai",
provider_category="base",
model="gpt-4o-mini",
model_endpoint="https://api.openai.com/v1",
context_window_limit=8192,
usage=UsageStatistics(
completion_tokens=100,
prompt_tokens=50,
total_tokens=150,
),
run_id=run_5.id,
actor=default_user,
project_id=sarah_agent.project_id,
)
runs_data.append((run_5, 5))
# Update all runs to trigger metrics update
for run, _ in runs_data:
await server.run_manager.update_run_by_id_async(
run.id,
RunUpdate(status=RunStatus.completed, stop_reason=StopReasonType.end_turn),
actor=default_user,
)
# Test EQ operator - exact match
runs_eq_2 = await server.run_manager.list_runs(
actor=default_user,
agent_id=sarah_agent.id,
step_count=2,
step_count_operator=ComparisonOperator.EQ,
)
assert len(runs_eq_2) == 1
assert runs_eq_2[0].id == run_2.id
# Test GTE operator - greater than or equal
runs_gte_2 = await server.run_manager.list_runs(
actor=default_user,
agent_id=sarah_agent.id,
step_count=2,
step_count_operator=ComparisonOperator.GTE,
)
assert len(runs_gte_2) == 2
run_ids_gte = {run.id for run in runs_gte_2}
assert run_2.id in run_ids_gte
assert run_5.id in run_ids_gte
# Test LTE operator - less than or equal
runs_lte_2 = await server.run_manager.list_runs(
actor=default_user,
agent_id=sarah_agent.id,
step_count=2,
step_count_operator=ComparisonOperator.LTE,
)
assert len(runs_lte_2) == 2
run_ids_lte = {run.id for run in runs_lte_2}
assert run_0.id in run_ids_lte
assert run_2.id in run_ids_lte
# Test GTE with 0 - should return all runs
runs_gte_0 = await server.run_manager.list_runs(
actor=default_user,
agent_id=sarah_agent.id,
step_count=0,
step_count_operator=ComparisonOperator.GTE,
)
assert len(runs_gte_0) == 3
# Test LTE with 0 - should return only run with 0 steps
runs_lte_0 = await server.run_manager.list_runs(
actor=default_user,
agent_id=sarah_agent.id,
step_count=0,
step_count_operator=ComparisonOperator.LTE,
)
assert len(runs_lte_0) == 1
assert runs_lte_0[0].id == run_0.id
@pytest.mark.asyncio
async def test_list_runs_by_base_template_id(server: SyncServer, sarah_agent, default_user):
"""Test listing runs by template family."""
run_data = PydanticRun(
agent_id=sarah_agent.id,
base_template_id="test-template-family",
)
await server.run_manager.create_run(pydantic_run=run_data, actor=default_user)
runs = await server.run_manager.list_runs(actor=default_user, template_family="test-template-family")
assert len(runs) == 1
async def test_e2e_run_callback(monkeypatch, server: SyncServer, default_user, sarah_agent):
"""Test that run callbacks are properly dispatched when a run is completed."""
captured = {}
# Create a simple mock for the async HTTP client
class MockAsyncResponse:
status_code = 202
async def mock_post(url, json, timeout):
captured["url"] = url
captured["json"] = json
return MockAsyncResponse()
class MockAsyncClient:
async def __aenter__(self):
return self
async def __aexit__(self, *args):
pass
async def post(self, url, json, timeout):
return await mock_post(url, json, timeout)
# Patch the AsyncClient
import letta.services.run_manager as run_manager_module
monkeypatch.setattr(run_manager_module, "AsyncClient", MockAsyncClient)
run_in = PydanticRun(
status=RunStatus.created, metadata={"foo": "bar"}, agent_id=sarah_agent.id, callback_url="http://example.test/webhook/runs"
)
created = await server.run_manager.create_run(pydantic_run=run_in, actor=default_user)
assert created.callback_url == "http://example.test/webhook/runs"
# Update the run status to completed, which should trigger the callback
updated = await server.run_manager.update_run_by_id_async(
created.id, RunUpdate(status=RunStatus.completed, stop_reason=StopReasonType.end_turn), actor=default_user
)
# Verify the callback was triggered with the correct parameters
assert captured["url"] == created.callback_url, "Callback URL doesn't match"
assert captured["json"]["run_id"] == created.id, "Run ID in callback doesn't match"
assert captured["json"]["status"] == RunStatus.completed.value, "Run status in callback doesn't match"
# Verify the completed_at timestamp is reasonable
actual_dt = datetime.fromisoformat(captured["json"]["completed_at"]).replace(tzinfo=None)
# Remove timezone from updated.completed_at for comparison (it comes from DB as timezone-aware)
assert abs((actual_dt - updated.completed_at).total_seconds()) < 1, "Timestamp difference is too large"
assert isinstance(updated.callback_sent_at, datetime)
assert updated.callback_status_code == 202
@pytest.mark.asyncio
async def test_run_callback_only_on_terminal_status(server: SyncServer, sarah_agent, default_user, monkeypatch):
"""
Regression: ensure a non-terminal update (running) does NOT set completed_at or trigger callback,
and that a subsequent terminal update (completed) does trigger the callback exactly once.
"""
# Capture callback invocations
captured = {"count": 0, "url": None, "json": None}
class MockAsyncResponse:
status_code = 202
async def mock_post(url, json, timeout):
captured["count"] += 1
captured["url"] = url
captured["json"] = json
return MockAsyncResponse()
class MockAsyncClient:
async def __aenter__(self):
return self
async def __aexit__(self, *args):
pass
async def post(self, url, json, timeout):
return await mock_post(url, json, timeout)
# Patch the AsyncClient in run_manager module
import letta.services.run_manager as run_manager_module
monkeypatch.setattr(run_manager_module, "AsyncClient", MockAsyncClient)
# Create run with a callback URL
run_in = PydanticRun(
status=RunStatus.created,
metadata={"foo": "bar"},
agent_id=sarah_agent.id,
callback_url="http://example.test/webhook/runs",
)
created = await server.run_manager.create_run(pydantic_run=run_in, actor=default_user)
assert created.callback_url == "http://example.test/webhook/runs"
# 1) Non-terminal update: running
updated_running = await server.run_manager.update_run_by_id_async(created.id, RunUpdate(status=RunStatus.running), actor=default_user)
# Should not set completed_at or trigger callback
assert updated_running.completed_at is None
assert captured["count"] == 0
# 2) Terminal update: completed
updated_completed = await server.run_manager.update_run_by_id_async(
created.id, RunUpdate(status=RunStatus.completed, stop_reason=StopReasonType.end_turn), actor=default_user
)
# Should trigger exactly one callback with expected payload
assert captured["count"] == 1
assert captured["url"] == created.callback_url
assert captured["json"]["run_id"] == created.id
assert captured["json"]["status"] == RunStatus.completed.value
# completed_at should be set and align closely with callback payload
assert updated_completed.completed_at is not None
actual_dt = datetime.fromisoformat(captured["json"]["completed_at"]).replace(tzinfo=None)
assert abs((actual_dt - updated_completed.completed_at).total_seconds()) < 1
assert isinstance(updated_completed.callback_sent_at, datetime)
assert updated_completed.callback_status_code == 202
# ======================================================================================================================
# RunManager Tests - Messages
# ======================================================================================================================
@pytest.mark.asyncio
async def test_run_messages_pagination(server: SyncServer, default_run, default_user, sarah_agent):
"""Test pagination of run messages."""
# create the run
run_pydantic = PydanticRun(
agent_id=sarah_agent.id,
status=RunStatus.created,
metadata={"foo": "bar"},
)
run = await server.run_manager.create_run(pydantic_run=run_pydantic, actor=default_user)
assert run.status == RunStatus.created
# Create multiple messages
message_ids = []
for i in range(5):
message = PydanticMessage(
agent_id=sarah_agent.id,
role=MessageRole.user,
content=[TextContent(text=f"Test message {i}")],
run_id=run.id,
)
msg = await server.message_manager.create_many_messages_async([message], actor=default_user)
message_ids.append(msg[0].id)
# Test pagination with limit
messages = await server.message_manager.list_messages(
run_id=run.id,
actor=default_user,
limit=2,
)
assert len(messages) == 2
assert messages[0].id == message_ids[0]
assert messages[1].id == message_ids[1]
# Test pagination with cursor
first_page = await server.message_manager.list_messages(
run_id=run.id,
actor=default_user,
limit=2,
ascending=True, # [M0, M1]
)
assert len(first_page) == 2
assert first_page[0].id == message_ids[0]
assert first_page[1].id == message_ids[1]
assert first_page[0].created_at <= first_page[1].created_at
last_page = await server.message_manager.list_messages(
run_id=run.id,
actor=default_user,
limit=2,
ascending=False, # [M4, M3]
)
assert len(last_page) == 2
assert last_page[0].id == message_ids[4]
assert last_page[1].id == message_ids[3]
assert last_page[0].created_at >= last_page[1].created_at
first_page_ids = set(msg.id for msg in first_page)
last_page_ids = set(msg.id for msg in last_page)
assert first_page_ids.isdisjoint(last_page_ids)
# Test middle page using both before and after
middle_page = await server.message_manager.list_messages(
run_id=run.id,
actor=default_user,
before=last_page[-1].id, # M3
after=first_page[0].id, # M0
ascending=True, # [M1, M2]
)
assert len(middle_page) == 2 # Should include message between first and last pages
assert middle_page[0].id == message_ids[1]
assert middle_page[1].id == message_ids[2]
head_tail_msgs = first_page_ids.union(last_page_ids)
assert middle_page[1].id not in head_tail_msgs
assert middle_page[0].id in first_page_ids
# Test descending order for middle page
middle_page = await server.message_manager.list_messages(
run_id=run.id,
actor=default_user,
before=last_page[-1].id, # M3
after=first_page[0].id, # M0
ascending=False, # [M2, M1]
)
assert len(middle_page) == 2 # Should include message between first and last pages
assert middle_page[0].id == message_ids[2]
assert middle_page[1].id == message_ids[1]
# Test getting earliest messages
msg_3 = last_page[-1].id
earliest_msgs = await server.message_manager.list_messages(
run_id=run.id,
actor=default_user,
ascending=False,
before=msg_3, # Get messages after M3 in descending order
)
assert len(earliest_msgs) == 3 # Should get M2, M1, M0
assert all(m.id not in last_page_ids for m in earliest_msgs)
assert earliest_msgs[0].created_at > earliest_msgs[1].created_at > earliest_msgs[2].created_at
# Test getting earliest messages with ascending order
earliest_msgs_ascending = await server.message_manager.list_messages(
run_id=run.id,
actor=default_user,
ascending=True,
before=msg_3, # Get messages before M3 in ascending order
)
assert len(earliest_msgs_ascending) == 3 # Should get M0, M1, M2
assert all(m.id not in last_page_ids for m in earliest_msgs_ascending)
assert earliest_msgs_ascending[0].created_at < earliest_msgs_ascending[1].created_at < earliest_msgs_ascending[2].created_at
@pytest.mark.asyncio
async def test_run_messages_ordering(server: SyncServer, default_run, default_user, sarah_agent):
"""Test that messages are ordered by created_at."""
# Create messages with different timestamps
base_time = datetime.now(timezone.utc)
message_times = [
base_time - timedelta(minutes=2),
base_time - timedelta(minutes=1),
base_time,
]
# create the run
run_pydantic = PydanticRun(
agent_id=sarah_agent.id,
)
run = await server.run_manager.create_run(pydantic_run=run_pydantic, actor=default_user)
assert run.status == RunStatus.created
for i, created_at in enumerate(message_times):
message = PydanticMessage(
role=MessageRole.user,
content=[TextContent(text="Test message")],
agent_id=sarah_agent.id,
created_at=created_at,
run_id=run.id,
)
msg = await server.message_manager.create_many_messages_async([message], actor=default_user)
# Verify messages are returned in chronological order
returned_messages = await server.message_manager.list_messages(
run_id=run.id,
actor=default_user,
)
assert len(returned_messages) == 3
assert returned_messages[0].created_at < returned_messages[1].created_at
assert returned_messages[1].created_at < returned_messages[2].created_at
# Verify messages are returned in descending order
returned_messages = await server.message_manager.list_messages(
run_id=run.id,
actor=default_user,
ascending=False,
)
assert len(returned_messages) == 3
assert returned_messages[0].created_at > returned_messages[1].created_at
assert returned_messages[1].created_at > returned_messages[2].created_at
@pytest.mark.asyncio
async def test_job_messages_empty(server: SyncServer, default_run, default_user):
"""Test getting messages for a job with no messages."""
messages = await server.message_manager.list_messages(
run_id=default_run.id,
actor=default_user,
)
assert len(messages) == 0
@pytest.mark.asyncio
async def test_job_messages_filter(server: SyncServer, default_run, default_user, sarah_agent):
"""Test getting messages associated with a job."""
# Create the run
run_pydantic = PydanticRun(
agent_id=sarah_agent.id,
)
run = await server.run_manager.create_run(pydantic_run=run_pydantic, actor=default_user)
assert run.status == RunStatus.created
# Create test messages with different roles and tool calls
messages = [
PydanticMessage(
role=MessageRole.user,
content=[TextContent(text="Hello")],
agent_id=sarah_agent.id,
run_id=default_run.id,
),
PydanticMessage(
role=MessageRole.assistant,
content=[TextContent(text="Hi there!")],
agent_id=sarah_agent.id,
run_id=default_run.id,
),
PydanticMessage(
role=MessageRole.assistant,
content=[TextContent(text="Let me help you with that")],
agent_id=sarah_agent.id,
tool_calls=[
OpenAIToolCall(
id="call_1",
type="function",
function=OpenAIFunction(
name="test_tool",
arguments='{"arg1": "value1"}',
),
)
],
run_id=default_run.id,
),
]
await server.message_manager.create_many_messages_async(messages, actor=default_user)
# Test getting all messages
all_messages = await server.message_manager.list_messages(
run_id=default_run.id,
actor=default_user,
)
assert len(all_messages) == 3
# Test filtering by role
user_messages = await server.message_manager.list_messages(run_id=default_run.id, actor=default_user, roles=[MessageRole.user])
assert len(user_messages) == 1
assert user_messages[0].role == MessageRole.user
# Test limit
limited_messages = await server.message_manager.list_messages(run_id=default_run.id, actor=default_user, limit=2)
assert len(limited_messages) == 2
@pytest.mark.asyncio
async def test_get_run_messages(server: SyncServer, default_user: PydanticUser, sarah_agent):
"""Test getting messages for a run with request config."""
# Create a run with custom request config
run = await server.run_manager.create_run(
pydantic_run=PydanticRun(
agent_id=sarah_agent.id,
status=RunStatus.created,
request_config=LettaRequestConfig(
use_assistant_message=False, assistant_message_tool_name="custom_tool", assistant_message_tool_kwarg="custom_arg"
),
),
actor=default_user,
)
# Add some messages
messages = []
for i in range(4):
if i % 2 == 0:
# tool return message
messages.append(
PydanticMessage(
agent_id=sarah_agent.id,
role=MessageRole.tool,
content=[TextContent(text='{"status": "OK"}')],
tool_call_id=f"call_{i // 2}",
tool_returns=[
ToolReturn(
tool_call_id=f"call_{i // 2}",
status="success",
func_response='{"status": "OK", "message": "Tool executed successfully"}',
)
],
run_id=run.id,
)
)
else:
# assistant message with tool call
messages.append(
PydanticMessage(
agent_id=sarah_agent.id,
role=MessageRole.assistant,
content=[TextContent(text=f"Test message {i}")],
tool_calls=[
{
"type": "function",
"id": f"call_{i // 2}",
"function": {"name": "custom_tool", "arguments": '{"custom_arg": "test"}'},
}
],
run_id=run.id,
)
)
created_msg = await server.message_manager.create_many_messages_async(messages, actor=default_user)
# Get messages and verify they're converted correctly
result = await server.message_manager.list_messages(run_id=run.id, actor=default_user)
result = Message.to_letta_messages_from_list(result)
# Verify correct number of messages. Assistant messages should be parsed
assert len(result) == 6
# Verify assistant messages are parsed according to request config
tool_call_messages = [msg for msg in result if msg.message_type == "tool_call_message"]
reasoning_messages = [msg for msg in result if msg.message_type == "reasoning_message"]
assert len(tool_call_messages) == 2
assert len(reasoning_messages) == 2
for msg in tool_call_messages:
assert msg.tool_call is not None
assert msg.tool_call.name == "custom_tool"
@pytest.mark.asyncio
async def test_get_run_messages_with_assistant_message(server: SyncServer, default_user: PydanticUser, sarah_agent):
"""Test getting messages for a run with request config."""
# Create a run with custom request config
run = await server.run_manager.create_run(
pydantic_run=PydanticRun(
agent_id=sarah_agent.id,
status=RunStatus.created,
request_config=LettaRequestConfig(
use_assistant_message=True, assistant_message_tool_name="custom_tool", assistant_message_tool_kwarg="custom_arg"
),
),
actor=default_user,
)
# Add some messages
messages = []
for i in range(4):
if i % 2 == 0:
# tool return message
messages.append(
PydanticMessage(
agent_id=sarah_agent.id,
role=MessageRole.tool,
content=[TextContent(text='{"status": "OK"}')],
tool_call_id=f"call_{i // 2}",
tool_returns=[
ToolReturn(
tool_call_id=f"call_{i // 2}",
status="success",
func_response='{"status": "OK", "message": "Tool executed successfully"}',
)
],
run_id=run.id,
)
)
else:
# assistant message with tool call
messages.append(
PydanticMessage(
agent_id=sarah_agent.id,
role=MessageRole.assistant,
content=[TextContent(text=f"Test message {i}")],
tool_calls=[
{
"type": "function",
"id": f"call_{i // 2}",
"function": {"name": "custom_tool", "arguments": '{"custom_arg": "test"}'},
}
],
run_id=run.id,
)
)
created_msg = await server.message_manager.create_many_messages_async(messages, actor=default_user)
# Get messages and verify they're converted correctly
result = await server.message_manager.list_messages(run_id=run.id, actor=default_user)
result = Message.to_letta_messages_from_list(
result, assistant_message_tool_name="custom_tool", assistant_message_tool_kwarg="custom_arg"
)
# Verify correct number of messages. Assistant messages should be parsed
assert len(result) == 4
# Verify assistant messages are parsed according to request config
assistant_messages = [msg for msg in result if msg.message_type == "assistant_message"]
reasoning_messages = [msg for msg in result if msg.message_type == "reasoning_message"]
assert len(assistant_messages) == 2
assert len(reasoning_messages) == 2
for msg in assistant_messages:
assert msg.content == "test"
for msg in reasoning_messages:
assert "Test message" in msg.reasoning
# ======================================================================================================================
# RunManager Tests - Usage Statistics -
# ======================================================================================================================
@pytest.mark.asyncio
async def test_run_usage_stats_add_and_get(server: SyncServer, sarah_agent, default_run, default_user):
"""Test adding and retrieving run usage statistics."""
run_manager = server.run_manager
step_manager = server.step_manager
# Add usage statistics
await step_manager.log_step_async(
agent_id=sarah_agent.id,
provider_name="openai",
provider_category="base",
model="gpt-4o-mini",
model_endpoint="https://api.openai.com/v1",
context_window_limit=8192,
run_id=default_run.id,
usage=UsageStatistics(
completion_tokens=100,
prompt_tokens=50,
total_tokens=150,
),
actor=default_user,
project_id=sarah_agent.project_id,
)
# Get usage statistics
usage_stats = await run_manager.get_run_usage(run_id=default_run.id, actor=default_user)
# Verify the statistics
assert usage_stats.completion_tokens == 100
assert usage_stats.prompt_tokens == 50
assert usage_stats.total_tokens == 150
# get steps
steps = await step_manager.list_steps_async(run_id=default_run.id, actor=default_user)
assert len(steps) == 1
@pytest.mark.asyncio
async def test_run_usage_stats_get_no_stats(server: SyncServer, default_run, default_user):
"""Test getting usage statistics for a job with no stats."""
run_manager = server.run_manager
# Get usage statistics for a job with no stats
usage_stats = await run_manager.get_run_usage(run_id=default_run.id, actor=default_user)
# Verify default values
assert usage_stats.completion_tokens == 0
assert usage_stats.prompt_tokens == 0
assert usage_stats.total_tokens == 0
# get steps
steps = await server.step_manager.list_steps_async(run_id=default_run.id, actor=default_user)
assert len(steps) == 0
@pytest.mark.asyncio
async def test_run_usage_stats_add_multiple(server: SyncServer, sarah_agent, default_run, default_user):
"""Test adding multiple usage statistics entries for a job."""
run_manager = server.run_manager
step_manager = server.step_manager
# Add first usage statistics entry
await step_manager.log_step_async(
agent_id=sarah_agent.id,
provider_name="openai",
provider_category="base",
model="gpt-4o-mini",
model_endpoint="https://api.openai.com/v1",
context_window_limit=8192,
usage=UsageStatistics(
completion_tokens=100,
prompt_tokens=50,
total_tokens=150,
),
actor=default_user,
project_id=sarah_agent.project_id,
run_id=default_run.id,
)
# Add second usage statistics entry
await step_manager.log_step_async(
agent_id=sarah_agent.id,
provider_name="openai",
provider_category="base",
model="gpt-4o-mini",
model_endpoint="https://api.openai.com/v1",
context_window_limit=8192,
usage=UsageStatistics(
completion_tokens=200,
prompt_tokens=100,
total_tokens=300,
),
actor=default_user,
project_id=sarah_agent.project_id,
run_id=default_run.id,
)
# Get usage statistics (should return the latest entry)
usage_stats = await run_manager.get_run_usage(run_id=default_run.id, actor=default_user)
# Verify we get the most recent statistics
assert usage_stats.completion_tokens == 300
assert usage_stats.prompt_tokens == 150
assert usage_stats.total_tokens == 450
assert usage_stats.step_count == 2
# get steps
steps = await step_manager.list_steps_async(run_id=default_run.id, actor=default_user)
assert len(steps) == 2
# get agent steps
steps = await step_manager.list_steps_async(agent_id=sarah_agent.id, actor=default_user)
assert len(steps) == 2
# add step feedback
step_manager = server.step_manager
# Add feedback to first step
await step_manager.add_feedback_async(step_id=steps[0].id, feedback=FeedbackType.POSITIVE, actor=default_user)
# Test has_feedback filtering
steps_with_feedback = await step_manager.list_steps_async(agent_id=sarah_agent.id, has_feedback=True, actor=default_user)
assert len(steps_with_feedback) == 1
steps_without_feedback = await step_manager.list_steps_async(agent_id=sarah_agent.id, actor=default_user)
assert len(steps_without_feedback) == 2
@pytest.mark.asyncio
async def test_run_usage_stats_get_nonexistent_run(server: SyncServer, default_user):
"""Test getting usage statistics for a nonexistent run."""
run_manager = server.run_manager
with pytest.raises(LettaInvalidArgumentError):
await run_manager.get_run_usage(run_id="nonexistent_run", actor=default_user)
@pytest.mark.asyncio
async def test_get_run_request_config(server: SyncServer, sarah_agent, default_user):
"""Test getting request config from a run."""
request_config = LettaRequestConfig(
use_assistant_message=True, assistant_message_tool_name="send_message", assistant_message_tool_kwarg="message"
)
run_data = PydanticRun(
agent_id=sarah_agent.id,
request_config=request_config,
)
created_run = await server.run_manager.create_run(pydantic_run=run_data, actor=default_user)
retrieved_config = await server.run_manager.get_run_request_config(created_run.id, actor=default_user)
assert retrieved_config is not None
assert retrieved_config.use_assistant_message == request_config.use_assistant_message
assert retrieved_config.assistant_message_tool_name == request_config.assistant_message_tool_name
assert retrieved_config.assistant_message_tool_kwarg == request_config.assistant_message_tool_kwarg
@pytest.mark.asyncio
async def test_get_run_request_config_none(server: SyncServer, sarah_agent, default_user):
"""Test getting request config from a run with no config."""
run_data = PydanticRun(agent_id=sarah_agent.id)
created_run = await server.run_manager.create_run(pydantic_run=run_data, actor=default_user)
retrieved_config = await server.run_manager.get_run_request_config(created_run.id, actor=default_user)
assert retrieved_config is None
@pytest.mark.asyncio
async def test_get_run_request_config_nonexistent_run(server: SyncServer, default_user):
"""Test getting request config for a nonexistent run."""
with pytest.raises(LettaInvalidArgumentError):
await server.run_manager.get_run_request_config("nonexistent_run", actor=default_user)
# ======================================================================================================================
# RunManager Tests - Run Metrics
# ======================================================================================================================
@pytest.mark.asyncio
async def test_run_metrics_creation(server: SyncServer, sarah_agent, default_user):
"""Test that run metrics are created when a run is created."""
# Create a run
run_data = PydanticRun(
metadata={"type": "test_metrics"},
agent_id=sarah_agent.id,
)
created_run = await server.run_manager.create_run(pydantic_run=run_data, actor=default_user)
# Get the run metrics
metrics = await server.run_manager.get_run_metrics_async(run_id=created_run.id, actor=default_user)
# Assertions
assert metrics is not None
assert metrics.id == created_run.id
assert metrics.agent_id == sarah_agent.id
assert metrics.organization_id == default_user.organization_id
# project_id may be None or set from the agent
assert metrics.run_start_ns is not None
assert metrics.run_start_ns > 0
assert metrics.run_ns is None # Should be None until run completes
assert metrics.num_steps is not None
assert metrics.num_steps == 0 # Should be 0 initially
@pytest.mark.asyncio
async def test_run_metrics_timestamp_tracking(server: SyncServer, sarah_agent, default_user):
"""Test that run_start_ns is properly tracked."""
import time
# Record time before creation
before_ns = int(time.time() * 1e9)
# Create a run
run_data = PydanticRun(
metadata={"type": "test_timestamp"},
agent_id=sarah_agent.id,
)
created_run = await server.run_manager.create_run(pydantic_run=run_data, actor=default_user)
# Record time after creation
after_ns = int(time.time() * 1e9)
# Get the run metrics
metrics = await server.run_manager.get_run_metrics_async(run_id=created_run.id, actor=default_user)
# Verify timestamp is within expected range
assert metrics.run_start_ns is not None
assert before_ns <= metrics.run_start_ns <= after_ns, f"Expected {before_ns} <= {metrics.run_start_ns} <= {after_ns}"
@pytest.mark.asyncio
async def test_run_metrics_duration_calculation(server: SyncServer, sarah_agent, default_user):
"""Test that run duration (run_ns) is calculated when run completes."""
import asyncio
# Create a run
run_data = PydanticRun(
metadata={"type": "test_duration"},
agent_id=sarah_agent.id,
)
created_run = await server.run_manager.create_run(pydantic_run=run_data, actor=default_user)
# Get initial metrics
initial_metrics = await server.run_manager.get_run_metrics_async(run_id=created_run.id, actor=default_user)
assert initial_metrics.run_ns is None # Should be None initially
assert initial_metrics.run_start_ns is not None
# Wait a bit to ensure there's measurable duration
await asyncio.sleep(0.1) # Wait 100ms
# Update the run to completed
updated_run = await server.run_manager.update_run_by_id_async(
created_run.id, RunUpdate(status=RunStatus.completed, stop_reason=StopReasonType.end_turn), actor=default_user
)
# Get updated metrics
final_metrics = await server.run_manager.get_run_metrics_async(run_id=created_run.id, actor=default_user)
# Assertions
assert final_metrics.run_ns is not None
assert final_metrics.run_ns > 0
# Duration should be at least 100ms (100_000_000 nanoseconds)
assert final_metrics.run_ns >= 100_000_000, f"Expected run_ns >= 100_000_000, got {final_metrics.run_ns}"
# Duration should be reasonable (less than 10 seconds)
assert final_metrics.run_ns < 10_000_000_000, f"Expected run_ns < 10_000_000_000, got {final_metrics.run_ns}"
@pytest.mark.asyncio
async def test_run_metrics_num_steps_tracking(server: SyncServer, sarah_agent, default_user):
"""Test that num_steps is properly tracked in run metrics."""
# Create a run
run_data = PydanticRun(
metadata={"type": "test_num_steps"},
agent_id=sarah_agent.id,
)
created_run = await server.run_manager.create_run(pydantic_run=run_data, actor=default_user)
# Initial metrics should have 0 steps
initial_metrics = await server.run_manager.get_run_metrics_async(run_id=created_run.id, actor=default_user)
assert initial_metrics.num_steps == 0
# Add some steps
for i in range(3):
await server.step_manager.log_step_async(
agent_id=sarah_agent.id,
provider_name="openai",
provider_category="base",
model="gpt-4o-mini",
model_endpoint="https://api.openai.com/v1",
context_window_limit=8192,
usage=UsageStatistics(
completion_tokens=100 + i * 10,
prompt_tokens=50 + i * 5,
total_tokens=150 + i * 15,
),
run_id=created_run.id,
actor=default_user,
project_id=sarah_agent.project_id,
)
# Update the run to trigger metrics update
await server.run_manager.update_run_by_id_async(
created_run.id, RunUpdate(status=RunStatus.completed, stop_reason=StopReasonType.end_turn), actor=default_user
)
# Get updated metrics
final_metrics = await server.run_manager.get_run_metrics_async(run_id=created_run.id, actor=default_user)
# Verify num_steps was updated
assert final_metrics.num_steps == 3
@pytest.mark.asyncio
async def test_run_metrics_not_found(server: SyncServer, default_user):
"""Test getting metrics for non-existent run."""
with pytest.raises(LettaInvalidArgumentError):
await server.run_manager.get_run_metrics_async(run_id="nonexistent_run", actor=default_user)
@pytest.mark.asyncio
async def test_run_metrics_partial_update(server: SyncServer, sarah_agent, default_user):
"""Test that non-terminal updates don't calculate run_ns."""
# Create a run
run_data = PydanticRun(
metadata={"type": "test_partial"},
agent_id=sarah_agent.id,
)
created_run = await server.run_manager.create_run(pydantic_run=run_data, actor=default_user)
# Add a step
await server.step_manager.log_step_async(
agent_id=sarah_agent.id,
provider_name="openai",
provider_category="base",
model="gpt-4o-mini",
model_endpoint="https://api.openai.com/v1",
context_window_limit=8192,
usage=UsageStatistics(
completion_tokens=100,
prompt_tokens=50,
total_tokens=150,
),
run_id=created_run.id,
actor=default_user,
project_id=sarah_agent.project_id,
)
# Update to running (non-terminal)
await server.run_manager.update_run_by_id_async(created_run.id, RunUpdate(status=RunStatus.running), actor=default_user)
# Get metrics
metrics = await server.run_manager.get_run_metrics_async(run_id=created_run.id, actor=default_user)
# Verify run_ns is still None (not calculated for non-terminal updates)
assert metrics.run_ns is None
# But num_steps should be updated
assert metrics.num_steps == 1
@pytest.mark.asyncio
async def test_run_metrics_integration_with_run_steps(server: SyncServer, sarah_agent, default_user):
"""Test integration between run metrics and run steps."""
# Create a run
run_data = PydanticRun(
metadata={"type": "test_integration"},
agent_id=sarah_agent.id,
)
created_run = await server.run_manager.create_run(pydantic_run=run_data, actor=default_user)
# Add multiple steps
step_ids = []
for i in range(5):
step = await server.step_manager.log_step_async(
agent_id=sarah_agent.id,
provider_name="openai",
provider_category="base",
model="gpt-4o-mini",
model_endpoint="https://api.openai.com/v1",
context_window_limit=8192,
usage=UsageStatistics(
completion_tokens=100,
prompt_tokens=50,
total_tokens=150,
),
run_id=created_run.id,
actor=default_user,
project_id=sarah_agent.project_id,
)
step_ids.append(step.id)
# Get run steps
run_steps = await server.run_manager.get_run_steps(run_id=created_run.id, actor=default_user)
# Verify steps are returned correctly
assert len(run_steps) == 5
assert all(step.run_id == created_run.id for step in run_steps)
# Update run to completed
await server.run_manager.update_run_by_id_async(
created_run.id, RunUpdate(status=RunStatus.completed, stop_reason=StopReasonType.end_turn), actor=default_user
)
# Get final metrics
metrics = await server.run_manager.get_run_metrics_async(run_id=created_run.id, actor=default_user)
# Verify metrics reflect the steps
assert metrics.num_steps == 5
assert metrics.run_ns is not None
# TODO: add back once metrics are added
# @pytest.mark.asyncio
# async def test_record_ttft(server: SyncServer, default_user):
# """Test recording time to first token for a job."""
# # Create a job
# job_data = PydanticJob(
# status=RunStatus.created,
# metadata={"type": "test_timing"},
# )
# created_job = await server.job_manager.create_job_async(pydantic_job=job_data, actor=default_user)
#
# # Record TTFT
# ttft_ns = 1_500_000_000 # 1.5 seconds in nanoseconds
# await server.job_manager.record_ttft(created_job.id, ttft_ns, default_user)
#
# # Fetch the job and verify TTFT was recorded
# updated_job = await server.job_manager.get_job_by_id_async(created_job.id, default_user)
# assert updated_job.ttft_ns == ttft_ns
#
#
# @pytest.mark.asyncio
# async def test_record_response_duration(server: SyncServer, default_user):
# """Test recording total response duration for a job."""
# # Create a job
# job_data = PydanticJob(
# status=RunStatus.created,
# metadata={"type": "test_timing"},
# )
# created_job = await server.job_manager.create_job_async(pydantic_job=job_data, actor=default_user)
#
# # Record response duration
# duration_ns = 5_000_000_000 # 5 seconds in nanoseconds
# await server.job_manager.record_response_duration(created_job.id, duration_ns, default_user)
#
# # Fetch the job and verify duration was recorded
# updated_job = await server.job_manager.get_job_by_id_async(created_job.id, default_user)
# assert updated_job.total_duration_ns == duration_ns
#
#
# @pytest.mark.asyncio
# async def test_record_timing_metrics_together(server: SyncServer, default_user):
# """Test recording both TTFT and response duration for a job."""
# # Create a job
# job_data = PydanticJob(
# status=RunStatus.created,
# metadata={"type": "test_timing_combined"},
# )
# created_job = await server.job_manager.create_job_async(pydantic_job=job_data, actor=default_user)
#
# # Record both metrics
# ttft_ns = 2_000_000_000 # 2 seconds in nanoseconds
# duration_ns = 8_500_000_000 # 8.5 seconds in nanoseconds
#
# await server.job_manager.record_ttft(created_job.id, ttft_ns, default_user)
# await server.job_manager.record_response_duration(created_job.id, duration_ns, default_user)
#
# # Fetch the job and verify both metrics were recorded
# updated_job = await server.job_manager.get_job_by_id_async(created_job.id, default_user)
# assert updated_job.ttft_ns == ttft_ns
# assert updated_job.total_duration_ns == duration_ns
#
#
# @pytest.mark.asyncio
# async def test_record_timing_invalid_job(server: SyncServer, default_user):
# """Test recording timing metrics for non-existent job fails gracefully."""
# # Try to record TTFT for non-existent job - should not raise exception but log warning
# await server.job_manager.record_ttft("nonexistent_job_id", 1_000_000_000, default_user)
#
# # Try to record response duration for non-existent job - should not raise exception but log warning
# await server.job_manager.record_response_duration("nonexistent_job_id", 2_000_000_000, default_user)
#
# ======================================================================================================================
# convert_statuses_to_enum Tests
# ======================================================================================================================
def test_convert_statuses_to_enum_with_none():
"""Test that convert_statuses_to_enum returns None when input is None."""
from letta.server.rest_api.routers.v1.runs import convert_statuses_to_enum
result = convert_statuses_to_enum(None)
assert result is None
def test_convert_statuses_to_enum_with_single_status():
"""Test converting a single status string to RunStatus enum."""
from letta.server.rest_api.routers.v1.runs import convert_statuses_to_enum
result = convert_statuses_to_enum(["completed"])
assert result == [RunStatus.completed]
assert len(result) == 1
def test_convert_statuses_to_enum_with_multiple_statuses():
"""Test converting multiple status strings to RunStatus enums."""
from letta.server.rest_api.routers.v1.runs import convert_statuses_to_enum
result = convert_statuses_to_enum(["created", "running", "completed"])
assert result == [RunStatus.created, RunStatus.running, RunStatus.completed]
assert len(result) == 3
def test_convert_statuses_to_enum_with_all_statuses():
"""Test converting all possible status strings."""
from letta.server.rest_api.routers.v1.runs import convert_statuses_to_enum
all_statuses = ["created", "running", "completed", "failed", "cancelled"]
result = convert_statuses_to_enum(all_statuses)
assert result == [RunStatus.created, RunStatus.running, RunStatus.completed, RunStatus.failed, RunStatus.cancelled]
assert len(result) == 5
def test_convert_statuses_to_enum_with_empty_list():
"""Test converting an empty list."""
from letta.server.rest_api.routers.v1.runs import convert_statuses_to_enum
result = convert_statuses_to_enum([])
assert result == []
def test_convert_statuses_to_enum_with_invalid_status():
"""Test that invalid status strings raise ValueError."""
from letta.server.rest_api.routers.v1.runs import convert_statuses_to_enum
with pytest.raises(ValueError):
convert_statuses_to_enum(["invalid_status"])
@pytest.mark.asyncio
async def test_list_runs_with_multiple_statuses(server: SyncServer, sarah_agent, default_user):
"""Test listing runs with multiple status filters."""
# Create runs with different statuses
run_created = await server.run_manager.create_run(
pydantic_run=PydanticRun(
status=RunStatus.created,
agent_id=sarah_agent.id,
metadata={"type": "created"},
),
actor=default_user,
)
run_running = await server.run_manager.create_run(
pydantic_run=PydanticRun(
status=RunStatus.running,
agent_id=sarah_agent.id,
metadata={"type": "running"},
),
actor=default_user,
)
run_completed = await server.run_manager.create_run(
pydantic_run=PydanticRun(
status=RunStatus.completed,
agent_id=sarah_agent.id,
metadata={"type": "completed"},
),
actor=default_user,
)
run_failed = await server.run_manager.create_run(
pydantic_run=PydanticRun(
status=RunStatus.failed,
agent_id=sarah_agent.id,
metadata={"type": "failed"},
),
actor=default_user,
)
# Test filtering by multiple statuses
active_runs = await server.run_manager.list_runs(
actor=default_user, statuses=[RunStatus.created, RunStatus.running], agent_id=sarah_agent.id
)
assert len(active_runs) == 2
assert all(run.status in [RunStatus.created, RunStatus.running] for run in active_runs)
# Test filtering by terminal statuses
terminal_runs = await server.run_manager.list_runs(
actor=default_user, statuses=[RunStatus.completed, RunStatus.failed], agent_id=sarah_agent.id
)
assert len(terminal_runs) == 2
assert all(run.status in [RunStatus.completed, RunStatus.failed] for run in terminal_runs)
@pytest.mark.asyncio
async def test_list_runs_with_no_status_filter_returns_all(server: SyncServer, sarah_agent, default_user):
"""Test that not providing statuses parameter returns all runs."""
# Create runs with different statuses
await server.run_manager.create_run(pydantic_run=PydanticRun(status=RunStatus.created, agent_id=sarah_agent.id), actor=default_user)
await server.run_manager.create_run(pydantic_run=PydanticRun(status=RunStatus.running, agent_id=sarah_agent.id), actor=default_user)
await server.run_manager.create_run(pydantic_run=PydanticRun(status=RunStatus.completed, agent_id=sarah_agent.id), actor=default_user)
await server.run_manager.create_run(pydantic_run=PydanticRun(status=RunStatus.failed, agent_id=sarah_agent.id), actor=default_user)
await server.run_manager.create_run(pydantic_run=PydanticRun(status=RunStatus.cancelled, agent_id=sarah_agent.id), actor=default_user)
# List all runs without status filter
all_runs = await server.run_manager.list_runs(actor=default_user, agent_id=sarah_agent.id)
# Should return all 5 runs
assert len(all_runs) >= 5
# Verify we have all statuses represented
statuses_found = {run.status for run in all_runs}
assert RunStatus.created in statuses_found
assert RunStatus.running in statuses_found
assert RunStatus.completed in statuses_found
assert RunStatus.failed in statuses_found
assert RunStatus.cancelled in statuses_found
# ======================================================================================================================
# RunManager Tests - Duration Filtering
# ======================================================================================================================
@pytest.mark.asyncio
async def test_list_runs_by_duration_gt(server: SyncServer, sarah_agent, default_user):
"""Test listing runs filtered by duration greater than a threshold."""
import asyncio
# Create runs with different durations
runs_data = []
# Fast run (< 100ms)
run_fast = await server.run_manager.create_run(
pydantic_run=PydanticRun(agent_id=sarah_agent.id, metadata={"speed": "fast"}),
actor=default_user,
)
await asyncio.sleep(0.05) # 50ms
await server.run_manager.update_run_by_id_async(
run_fast.id, RunUpdate(status=RunStatus.completed, stop_reason=StopReasonType.end_turn), actor=default_user
)
runs_data.append(run_fast)
# Medium run (~150ms)
run_medium = await server.run_manager.create_run(
pydantic_run=PydanticRun(agent_id=sarah_agent.id, metadata={"speed": "medium"}),
actor=default_user,
)
await asyncio.sleep(0.15) # 150ms
await server.run_manager.update_run_by_id_async(
run_medium.id, RunUpdate(status=RunStatus.completed, stop_reason=StopReasonType.end_turn), actor=default_user
)
runs_data.append(run_medium)
# Slow run (~250ms)
run_slow = await server.run_manager.create_run(
pydantic_run=PydanticRun(agent_id=sarah_agent.id, metadata={"speed": "slow"}),
actor=default_user,
)
await asyncio.sleep(0.25) # 250ms
await server.run_manager.update_run_by_id_async(
run_slow.id, RunUpdate(status=RunStatus.completed, stop_reason=StopReasonType.end_turn), actor=default_user
)
runs_data.append(run_slow)
# Filter runs with duration > 100ms (100,000,000 ns)
filtered_runs = await server.run_manager.list_runs(
actor=default_user,
agent_id=sarah_agent.id,
duration_filter={"value": 100_000_000, "operator": "gt"},
)
# Should return medium and slow runs
assert len(filtered_runs) >= 2
run_ids = {run.id for run in filtered_runs}
assert run_medium.id in run_ids
assert run_slow.id in run_ids
@pytest.mark.asyncio
async def test_list_runs_by_duration_lt(server: SyncServer, sarah_agent, default_user):
"""Test listing runs filtered by duration less than a threshold."""
import asyncio
# Create runs with different durations
# Fast run
run_fast = await server.run_manager.create_run(
pydantic_run=PydanticRun(agent_id=sarah_agent.id, metadata={"speed": "fast"}),
actor=default_user,
)
await asyncio.sleep(0.05) # 50ms
await server.run_manager.update_run_by_id_async(
run_fast.id, RunUpdate(status=RunStatus.completed, stop_reason=StopReasonType.end_turn), actor=default_user
)
# Slow run
run_slow = await server.run_manager.create_run(
pydantic_run=PydanticRun(agent_id=sarah_agent.id, metadata={"speed": "slow"}),
actor=default_user,
)
await asyncio.sleep(0.30) # 300ms
await server.run_manager.update_run_by_id_async(
run_slow.id, RunUpdate(status=RunStatus.completed, stop_reason=StopReasonType.end_turn), actor=default_user
)
# Get actual durations to set a threshold between them
fast_metrics = await server.run_manager.get_run_metrics_async(run_id=run_fast.id, actor=default_user)
slow_metrics = await server.run_manager.get_run_metrics_async(run_id=run_slow.id, actor=default_user)
# Set threshold between the two durations
threshold = (fast_metrics.run_ns + slow_metrics.run_ns) // 2
# Filter runs with duration < threshold
filtered_runs = await server.run_manager.list_runs(
actor=default_user,
agent_id=sarah_agent.id,
duration_filter={"value": threshold, "operator": "lt"},
)
# Should return only the fast run
assert len(filtered_runs) >= 1
assert run_fast.id in [run.id for run in filtered_runs]
# Verify slow run is not included
assert run_slow.id not in [run.id for run in filtered_runs]
@pytest.mark.asyncio
async def test_list_runs_by_duration_percentile(server: SyncServer, sarah_agent, default_user):
"""Test listing runs filtered by duration percentile."""
import asyncio
# Create runs with varied durations
run_ids = []
durations_ms = [50, 100, 150, 200, 250, 300, 350, 400, 450, 500]
for i, duration_ms in enumerate(durations_ms):
run = await server.run_manager.create_run(
pydantic_run=PydanticRun(agent_id=sarah_agent.id, metadata={"index": i}),
actor=default_user,
)
await asyncio.sleep(duration_ms / 1000.0) # Convert to seconds
await server.run_manager.update_run_by_id_async(
run.id, RunUpdate(status=RunStatus.completed, stop_reason=StopReasonType.end_turn), actor=default_user
)
run_ids.append(run.id)
# Filter runs in top 20% (80th percentile)
# This should return approximately the slowest 20% of runs
filtered_runs = await server.run_manager.list_runs(
actor=default_user,
agent_id=sarah_agent.id,
duration_percentile=80,
)
# Should return at least 2 runs (approximately 20% of 10)
assert len(filtered_runs) >= 2
# Verify the slowest run is definitely included
filtered_ids = {run.id for run in filtered_runs}
assert run_ids[-1] in filtered_ids # Slowest run (500ms)
# Verify that filtered runs are among the slower runs
# At least one should be from the slowest 3
slowest_3_ids = set(run_ids[-3:])
assert len(filtered_ids & slowest_3_ids) >= 2, "Expected at least 2 of the slowest 3 runs"
@pytest.mark.asyncio
async def test_list_runs_by_duration_with_order_by(server: SyncServer, sarah_agent, default_user):
"""Test listing runs filtered by duration with different order_by options."""
import asyncio
# Create runs with different durations
runs = []
for i, duration_ms in enumerate([100, 200, 300]):
run = await server.run_manager.create_run(
pydantic_run=PydanticRun(agent_id=sarah_agent.id, metadata={"index": i}),
actor=default_user,
)
await asyncio.sleep(duration_ms / 1000.0)
await server.run_manager.update_run_by_id_async(
run.id, RunUpdate(status=RunStatus.completed, stop_reason=StopReasonType.end_turn), actor=default_user
)
runs.append(run)
# Test order_by="duration" with ascending order
filtered_runs_asc = await server.run_manager.list_runs(
actor=default_user,
agent_id=sarah_agent.id,
order_by="duration",
ascending=True,
)
# Should be ordered from fastest to slowest
assert len(filtered_runs_asc) >= 3
# Get metrics to verify ordering
metrics_asc = []
for run in filtered_runs_asc[:3]:
metrics = await server.run_manager.get_run_metrics_async(run_id=run.id, actor=default_user)
metrics_asc.append(metrics.run_ns)
# Verify ascending order
assert metrics_asc[0] <= metrics_asc[1] <= metrics_asc[2]
# Test order_by="duration" with descending order (default)
filtered_runs_desc = await server.run_manager.list_runs(
actor=default_user,
agent_id=sarah_agent.id,
order_by="duration",
ascending=False,
)
# Should be ordered from slowest to fastest
assert len(filtered_runs_desc) >= 3
# Get metrics to verify ordering
metrics_desc = []
for run in filtered_runs_desc[:3]:
metrics = await server.run_manager.get_run_metrics_async(run_id=run.id, actor=default_user)
metrics_desc.append(metrics.run_ns)
# Verify descending order
assert metrics_desc[0] >= metrics_desc[1] >= metrics_desc[2]
@pytest.mark.asyncio
async def test_list_runs_combined_duration_filter_and_percentile(server: SyncServer, sarah_agent, default_user):
"""Test combining duration filter with percentile filter."""
import asyncio
# Create runs with varied durations
runs = []
for i, duration_ms in enumerate([50, 100, 150, 200, 250, 300, 350, 400]):
run = await server.run_manager.create_run(
pydantic_run=PydanticRun(agent_id=sarah_agent.id, metadata={"index": i}),
actor=default_user,
)
await asyncio.sleep(duration_ms / 1000.0)
await server.run_manager.update_run_by_id_async(
run.id, RunUpdate(status=RunStatus.completed, stop_reason=StopReasonType.end_turn), actor=default_user
)
runs.append(run)
# Filter runs that are:
# 1. In top 50% slowest (duration_percentile=50)
# 2. AND greater than 200ms (duration_filter > 200_000_000 ns)
filtered_runs = await server.run_manager.list_runs(
actor=default_user,
agent_id=sarah_agent.id,
duration_percentile=50,
duration_filter={"value": 200_000_000, "operator": "gt"},
)
# Should return runs that satisfy both conditions
assert len(filtered_runs) >= 2
# Verify all returned runs meet both criteria
for run in filtered_runs:
metrics = await server.run_manager.get_run_metrics_async(run_id=run.id, actor=default_user)
# Should be greater than 200ms
assert metrics.run_ns > 200_000_000
@pytest.mark.asyncio
async def test_get_run_with_status_no_lettuce(server: SyncServer, sarah_agent, default_user):
"""Test getting a run without Lettuce metadata."""
# Create a run without Lettuce metadata
run_data = PydanticRun(
metadata={"type": "test"},
agent_id=sarah_agent.id,
)
created_run = await server.run_manager.create_run(pydantic_run=run_data, actor=default_user)
# Get run with status
fetched_run = await server.run_manager.get_run_with_status(run_id=created_run.id, actor=default_user)
# Verify run is returned correctly without Lettuce status check
assert fetched_run.id == created_run.id
assert fetched_run.status == RunStatus.created
assert fetched_run.metadata == {"type": "test"}
@pytest.mark.asyncio
async def test_get_run_with_status_lettuce_success(server: SyncServer, sarah_agent, default_user, monkeypatch):
"""Test getting a run with Lettuce metadata and successful status fetch."""
# Create a run with Lettuce metadata
run_data = PydanticRun(
metadata={"lettuce": True},
agent_id=sarah_agent.id,
status=RunStatus.running,
)
created_run = await server.run_manager.create_run(pydantic_run=run_data, actor=default_user)
# Mock LettuceClient
mock_client = AsyncMock()
mock_client.get_status = AsyncMock(return_value="COMPLETED")
mock_lettuce_class = AsyncMock()
mock_lettuce_class.create = AsyncMock(return_value=mock_client)
# Patch LettuceClient where it's imported from
with patch("letta.services.lettuce.LettuceClient", mock_lettuce_class):
# Get run with status
fetched_run = await server.run_manager.get_run_with_status(run_id=created_run.id, actor=default_user)
# Verify status was updated from Lettuce
assert fetched_run.id == created_run.id
assert fetched_run.status == RunStatus.completed
mock_client.get_status.assert_called_once_with(run_id=created_run.id)
@pytest.mark.asyncio
async def test_get_run_with_status_lettuce_failure(server: SyncServer, sarah_agent, default_user, monkeypatch):
"""Test getting a run when Lettuce status fetch fails."""
# Create a run with Lettuce metadata
run_data = PydanticRun(
metadata={"lettuce": True},
agent_id=sarah_agent.id,
status=RunStatus.running,
)
created_run = await server.run_manager.create_run(pydantic_run=run_data, actor=default_user)
# Mock LettuceClient to raise an exception
mock_lettuce_class = AsyncMock()
mock_lettuce_class.create = AsyncMock(side_effect=Exception("Lettuce connection failed"))
# Patch LettuceClient where it's imported from
with patch("letta.services.lettuce.LettuceClient", mock_lettuce_class):
# Get run with status - should gracefully handle error
fetched_run = await server.run_manager.get_run_with_status(run_id=created_run.id, actor=default_user)
# Verify run is returned with DB status (error was logged but not raised)
assert fetched_run.id == created_run.id
assert fetched_run.status == RunStatus.running # Original status from DB
@pytest.mark.asyncio
async def test_get_run_with_status_lettuce_terminal_status(server: SyncServer, sarah_agent, default_user, monkeypatch):
"""Test that Lettuce status is not fetched for runs with terminal status."""
# Create a run with Lettuce metadata but terminal status
run_data = PydanticRun(
metadata={"lettuce": True},
agent_id=sarah_agent.id,
status=RunStatus.completed,
)
created_run = await server.run_manager.create_run(pydantic_run=run_data, actor=default_user)
# Mock LettuceClient - should not be called
mock_client = AsyncMock()
mock_client.get_status = AsyncMock()
mock_lettuce_class = AsyncMock()
mock_lettuce_class.create = AsyncMock(return_value=mock_client)
# Patch LettuceClient where it's imported from
with patch("letta.services.lettuce.LettuceClient", mock_lettuce_class):
# Get run with status
fetched_run = await server.run_manager.get_run_with_status(run_id=created_run.id, actor=default_user)
# Verify status remains unchanged and Lettuce was not called
assert fetched_run.id == created_run.id
assert fetched_run.status == RunStatus.completed
mock_client.get_status.assert_not_called()
@pytest.mark.asyncio
async def test_get_run_with_status_not_found(server: SyncServer, default_user):
"""Test getting a non-existent run with get_run_with_status."""
# Use properly formatted run ID that doesn't exist
non_existent_run_id = f"run-{uuid.uuid4()}"
with pytest.raises(NoResultFound):
await server.run_manager.get_run_with_status(run_id=non_existent_run_id, actor=default_user)