Files
letta-server/tests/managers/test_passage_manager.py
Kian Jones 25d54dd896 chore: enable F821, F401, W293 (#9503)
* auto fixes

* auto fix pt2 and transitive deps and undefined var checking locals()

* manual fixes (ignored or letta-code fixed)

* fix circular import
2026-02-24 10:55:08 -08:00

1329 lines
51 KiB
Python

import json
import os
from datetime import datetime, timedelta, timezone
from unittest.mock import Mock
import pytest
from _pytest.python_api import approx
# Import shared fixtures and constants from conftest
from conftest import (
DEFAULT_EMBEDDING_CONFIG,
)
from letta.orm.errors import NoResultFound
from letta.schemas.enums import (
TagMatchMode,
)
from letta.schemas.passage import Passage as PydanticPassage
from letta.server.server import SyncServer
# ======================================================================================================================
# Agent Manager - Passages Tests
# ======================================================================================================================
@pytest.mark.asyncio
async def test_agent_list_passages_basic(server, default_user, sarah_agent, agent_passages_setup, disable_turbopuffer):
"""Test basic listing functionality of agent passages"""
all_passages = await server.agent_manager.list_passages_async(actor=default_user, agent_id=sarah_agent.id)
assert len(all_passages) == 5 # 3 source + 2 agent passages
source_passages = await server.agent_manager.query_source_passages_async(actor=default_user, agent_id=sarah_agent.id)
assert len(source_passages) == 3 # 3 source + 2 agent passages
@pytest.mark.asyncio
async def test_agent_list_passages_ordering(server, default_user, sarah_agent, agent_passages_setup, disable_turbopuffer):
"""Test ordering of agent passages"""
# Test ascending order
asc_passages = await server.agent_manager.list_passages_async(actor=default_user, agent_id=sarah_agent.id, ascending=True)
assert len(asc_passages) == 5
for i in range(1, len(asc_passages)):
assert asc_passages[i - 1].created_at <= asc_passages[i].created_at
# Test descending order
desc_passages = await server.agent_manager.list_passages_async(actor=default_user, agent_id=sarah_agent.id, ascending=False)
assert len(desc_passages) == 5
for i in range(1, len(desc_passages)):
assert desc_passages[i - 1].created_at >= desc_passages[i].created_at
@pytest.mark.asyncio
async def test_agent_list_passages_pagination(server, default_user, sarah_agent, agent_passages_setup, disable_turbopuffer):
"""Test pagination of agent passages"""
# Test limit
limited_passages = await server.agent_manager.list_passages_async(actor=default_user, agent_id=sarah_agent.id, limit=3)
assert len(limited_passages) == 3
# Test cursor-based pagination
first_page = await server.agent_manager.list_passages_async(actor=default_user, agent_id=sarah_agent.id, limit=2, ascending=True)
assert len(first_page) == 2
second_page = await server.agent_manager.list_passages_async(
actor=default_user, agent_id=sarah_agent.id, after=first_page[-1].id, limit=2, ascending=True
)
assert len(second_page) == 2
assert first_page[-1].id != second_page[0].id
assert first_page[-1].created_at <= second_page[0].created_at
"""
[1] [2]
* * | * *
[mid]
* | * * | *
"""
middle_page = await server.agent_manager.list_passages_async(
actor=default_user, agent_id=sarah_agent.id, before=second_page[-1].id, after=first_page[0].id, ascending=True
)
assert len(middle_page) == 2
assert middle_page[0].id == first_page[-1].id
assert middle_page[1].id == second_page[0].id
middle_page_desc = await server.agent_manager.list_passages_async(
actor=default_user, agent_id=sarah_agent.id, before=second_page[-1].id, after=first_page[0].id, ascending=False
)
assert len(middle_page_desc) == 2
assert middle_page_desc[0].id == second_page[0].id
assert middle_page_desc[1].id == first_page[-1].id
@pytest.mark.asyncio
async def test_agent_list_passages_text_search(server, default_user, sarah_agent, agent_passages_setup, disable_turbopuffer):
"""Test text search functionality of agent passages"""
# Test text search for source passages
source_text_passages = await server.agent_manager.list_passages_async(
actor=default_user, agent_id=sarah_agent.id, query_text="Source passage"
)
assert len(source_text_passages) == 3
# Test text search for agent passages
agent_text_passages = await server.agent_manager.list_passages_async(
actor=default_user, agent_id=sarah_agent.id, query_text="Agent passage"
)
assert len(agent_text_passages) == 2
@pytest.mark.asyncio
async def test_agent_list_passages_agent_only(server, default_user, sarah_agent, agent_passages_setup, disable_turbopuffer):
"""Test text search functionality of agent passages"""
# Test text search for agent passages
agent_text_passages = await server.agent_manager.list_passages_async(actor=default_user, agent_id=sarah_agent.id, agent_only=True)
assert len(agent_text_passages) == 2
@pytest.mark.asyncio
async def test_agent_list_passages_filtering(server, default_user, sarah_agent, default_source, agent_passages_setup, disable_turbopuffer):
"""Test filtering functionality of agent passages"""
# Test source filtering
source_filtered = await server.agent_manager.list_passages_async(
actor=default_user, agent_id=sarah_agent.id, source_id=default_source.id
)
assert len(source_filtered) == 3
# Test date filtering
now = datetime.now(timezone.utc)
future_date = now + timedelta(days=1)
past_date = now - timedelta(days=1)
date_filtered = await server.agent_manager.list_passages_async(
actor=default_user, agent_id=sarah_agent.id, start_date=past_date, end_date=future_date
)
assert len(date_filtered) == 5
@pytest.mark.asyncio
async def test_agent_query_passages_time_only(server, default_user, default_archive, disable_turbopuffer):
"""Test querying passages with date filters and no query text."""
now = datetime.now(timezone.utc)
older_date = now - timedelta(days=2)
newer_date = now - timedelta(hours=2)
older_passage = await server.passage_manager.create_agent_passage_async(
PydanticPassage(
organization_id=default_user.organization_id,
archive_id=default_archive.id,
text="Older passage",
embedding=[0.1],
embedding_config=DEFAULT_EMBEDDING_CONFIG,
created_at=older_date,
),
actor=default_user,
)
newer_passage = await server.passage_manager.create_agent_passage_async(
PydanticPassage(
organization_id=default_user.organization_id,
archive_id=default_archive.id,
text="Newer passage",
embedding=[0.1],
embedding_config=DEFAULT_EMBEDDING_CONFIG,
created_at=newer_date,
),
actor=default_user,
)
results = await server.agent_manager.query_agent_passages_async(
actor=default_user,
archive_id=default_archive.id,
start_date=now - timedelta(days=1),
end_date=now + timedelta(minutes=1),
)
assert len(results) == 1
passage, _, _ = results[0]
assert passage.id == newer_passage.id
assert passage.id != older_passage.id
assert passage.created_at >= now - timedelta(days=1)
assert passage.created_at <= now + timedelta(minutes=1)
@pytest.fixture
def mock_embeddings():
"""Load mock embeddings from JSON file"""
fixture_path = os.path.join(os.path.dirname(__file__), "data", "test_embeddings.json")
with open(fixture_path, "r") as f:
return json.load(f)
@pytest.fixture
def mock_embed_model(mock_embeddings):
"""Mock embedding model that returns predefined embeddings"""
mock_model = Mock()
mock_model.get_text_embedding = lambda text: mock_embeddings.get(text, [0.0] * 1536)
return mock_model
async def test_agent_list_passages_vector_search(
server, default_user, sarah_agent, default_source, default_file, mock_embed_model, disable_turbopuffer
):
"""Test vector search functionality of agent passages"""
embed_model = mock_embed_model
# Get or create default archive for the agent
archive = await server.archive_manager.get_or_create_default_archive_for_agent_async(agent_state=sarah_agent, actor=default_user)
# Create passages with known embeddings
passages = []
# Create passages with different embeddings
test_passages = [
"I like red",
"random text",
"blue shoes",
]
await server.agent_manager.attach_source_async(agent_id=sarah_agent.id, source_id=default_source.id, actor=default_user)
for i, text in enumerate(test_passages):
embedding = embed_model.get_text_embedding(text)
if i % 2 == 0:
# Create agent passage
passage = PydanticPassage(
text=text,
organization_id=default_user.organization_id,
archive_id=archive.id,
embedding_config=DEFAULT_EMBEDDING_CONFIG,
embedding=embedding,
)
created_passage = await server.passage_manager.create_agent_passage_async(passage, default_user)
else:
# Create source passage
passage = PydanticPassage(
text=text,
organization_id=default_user.organization_id,
source_id=default_source.id,
file_id=default_file.id,
embedding_config=DEFAULT_EMBEDDING_CONFIG,
embedding=embedding,
)
created_passage = await server.passage_manager.create_source_passage_async(passage, default_file, default_user)
passages.append(created_passage)
# Query vector similar to "red" embedding
query_key = "What's my favorite color?"
# Test vector search with all passages
results = await server.agent_manager.list_passages_async(
actor=default_user,
agent_id=sarah_agent.id,
query_text=query_key,
embedding_config=DEFAULT_EMBEDDING_CONFIG,
embed_query=True,
)
# Verify results are ordered by similarity
assert len(results) == 3
assert results[0].text == "I like red"
assert "random" in results[1].text or "random" in results[2].text
assert "blue" in results[1].text or "blue" in results[2].text
# Test vector search with agent_only=True
agent_only_results = await server.agent_manager.list_passages_async(
actor=default_user,
agent_id=sarah_agent.id,
query_text=query_key,
embedding_config=DEFAULT_EMBEDDING_CONFIG,
embed_query=True,
agent_only=True,
)
# Verify agent-only results
assert len(agent_only_results) == 2
assert agent_only_results[0].text == "I like red"
assert agent_only_results[1].text == "blue shoes"
@pytest.mark.asyncio
async def test_list_source_passages_only(server: SyncServer, default_user, default_source, agent_passages_setup):
"""Test listing passages from a source without specifying an agent."""
# List passages by source_id without agent_id
source_passages = await server.agent_manager.list_passages_async(
actor=default_user,
source_id=default_source.id,
)
# Verify we get only source passages (3 from agent_passages_setup)
assert len(source_passages) == 3
assert all(p.source_id == default_source.id for p in source_passages)
assert all(p.archive_id is None for p in source_passages)
# ======================================================================================================================
# Passage Manager Tests
# ======================================================================================================================
@pytest.mark.asyncio
async def test_passage_create_agentic(server: SyncServer, agent_passage_fixture, default_user):
"""Test creating a passage using agent_passage_fixture fixture"""
assert agent_passage_fixture.id is not None
assert agent_passage_fixture.text == "Hello, I am an agent passage"
# Verify we can retrieve it
retrieved = await server.passage_manager.get_passage_by_id_async(
agent_passage_fixture.id,
actor=default_user,
)
assert retrieved is not None
assert retrieved.id == agent_passage_fixture.id
assert retrieved.text == agent_passage_fixture.text
@pytest.mark.asyncio
async def test_passage_create_source(server: SyncServer, source_passage_fixture, default_user):
"""Test creating a source passage."""
assert source_passage_fixture is not None
assert source_passage_fixture.text == "Hello, I am a source passage"
# Verify we can retrieve it
retrieved = await server.passage_manager.get_passage_by_id_async(
source_passage_fixture.id,
actor=default_user,
)
assert retrieved is not None
assert retrieved.id == source_passage_fixture.id
assert retrieved.text == source_passage_fixture.text
@pytest.mark.asyncio
async def test_passage_create_invalid(server: SyncServer, agent_passage_fixture, default_user):
"""Test creating an agent passage."""
assert agent_passage_fixture is not None
assert agent_passage_fixture.text == "Hello, I am an agent passage"
# Try to create an invalid passage (with both archive_id and source_id)
with pytest.raises(AssertionError):
await server.passage_manager.create_passage_async(
PydanticPassage(
text="Invalid passage",
archive_id="123",
source_id="456",
organization_id=default_user.organization_id,
embedding=[0.1] * 1024,
embedding_config=DEFAULT_EMBEDDING_CONFIG,
),
actor=default_user,
)
@pytest.mark.asyncio
async def test_passage_get_by_id(server: SyncServer, agent_passage_fixture, source_passage_fixture, default_user):
"""Test retrieving a passage by ID"""
retrieved = await server.passage_manager.get_passage_by_id_async(agent_passage_fixture.id, actor=default_user)
assert retrieved is not None
assert retrieved.id == agent_passage_fixture.id
assert retrieved.text == agent_passage_fixture.text
retrieved = await server.passage_manager.get_passage_by_id_async(source_passage_fixture.id, actor=default_user)
assert retrieved is not None
assert retrieved.id == source_passage_fixture.id
assert retrieved.text == source_passage_fixture.text
@pytest.mark.asyncio
async def test_passage_cascade_deletion(
server: SyncServer, agent_passage_fixture, source_passage_fixture, default_user, default_source, sarah_agent
):
"""Test that passages are deleted when their parent (agent or source) is deleted."""
# Verify passages exist
agent_passage = await server.passage_manager.get_passage_by_id_async(agent_passage_fixture.id, default_user)
source_passage = await server.passage_manager.get_passage_by_id_async(source_passage_fixture.id, default_user)
assert agent_passage is not None
assert source_passage is not None
# Delete agent and verify its passages are deleted
await server.agent_manager.delete_agent_async(sarah_agent.id, default_user)
agentic_passages = await server.agent_manager.list_passages_async(actor=default_user, agent_id=sarah_agent.id, agent_only=True)
assert len(agentic_passages) == 0
@pytest.mark.asyncio
async def test_create_agent_passage_specific(server: SyncServer, default_user, sarah_agent):
"""Test creating an agent passage using the new agent-specific method."""
# Get or create default archive for the agent
archive = await server.archive_manager.get_or_create_default_archive_for_agent_async(agent_state=sarah_agent, actor=default_user)
passage = await server.passage_manager.create_agent_passage_async(
PydanticPassage(
text="Test agent passage via specific method",
archive_id=archive.id,
organization_id=default_user.organization_id,
embedding=[0.1],
embedding_config=DEFAULT_EMBEDDING_CONFIG,
metadata={"type": "test_specific"},
tags=["python", "test", "agent"],
),
actor=default_user,
)
assert passage.id is not None
assert passage.text == "Test agent passage via specific method"
assert passage.archive_id == archive.id
assert passage.source_id is None
assert sorted(passage.tags) == sorted(["python", "test", "agent"])
@pytest.mark.asyncio
async def test_create_source_passage_specific(server: SyncServer, default_user, default_file, default_source):
"""Test creating a source passage using the new source-specific method."""
passage = await server.passage_manager.create_source_passage_async(
PydanticPassage(
text="Test source passage via specific method",
source_id=default_source.id,
file_id=default_file.id,
organization_id=default_user.organization_id,
embedding=[0.1],
embedding_config=DEFAULT_EMBEDDING_CONFIG,
metadata={"type": "test_specific"},
tags=["document", "test", "source"],
),
file_metadata=default_file,
actor=default_user,
)
assert passage.id is not None
assert passage.text == "Test source passage via specific method"
assert passage.source_id == default_source.id
assert passage.archive_id is None
assert sorted(passage.tags) == sorted(["document", "test", "source"])
@pytest.mark.asyncio
async def test_create_agent_passage_validation(server: SyncServer, default_user, default_source, sarah_agent):
"""Test that agent passage creation validates inputs correctly."""
# Should fail if archive_id is missing
with pytest.raises(ValueError, match="Agent passage must have archive_id"):
await server.passage_manager.create_agent_passage_async(
PydanticPassage(
text="Invalid agent passage",
organization_id=default_user.organization_id,
embedding=[0.1],
embedding_config=DEFAULT_EMBEDDING_CONFIG,
),
actor=default_user,
)
# Get or create default archive for the agent
archive = await server.archive_manager.get_or_create_default_archive_for_agent_async(agent_state=sarah_agent, actor=default_user)
# Should fail if source_id is present
with pytest.raises(ValueError, match="Agent passage cannot have source_id"):
await server.passage_manager.create_agent_passage_async(
PydanticPassage(
text="Invalid agent passage",
archive_id=archive.id,
source_id=default_source.id,
organization_id=default_user.organization_id,
embedding=[0.1],
embedding_config=DEFAULT_EMBEDDING_CONFIG,
),
actor=default_user,
)
@pytest.mark.asyncio
async def test_create_source_passage_validation(server: SyncServer, default_user, default_file, default_source, sarah_agent):
"""Test that source passage creation validates inputs correctly."""
# Should fail if source_id is missing
with pytest.raises(ValueError, match="Source passage must have source_id"):
await server.passage_manager.create_source_passage_async(
PydanticPassage(
text="Invalid source passage",
organization_id=default_user.organization_id,
embedding=[0.1],
embedding_config=DEFAULT_EMBEDDING_CONFIG,
),
file_metadata=default_file,
actor=default_user,
)
# Get or create default archive for the agent
archive = await server.archive_manager.get_or_create_default_archive_for_agent_async(agent_state=sarah_agent, actor=default_user)
# Should fail if archive_id is present
with pytest.raises(ValueError, match="Source passage cannot have archive_id"):
await server.passage_manager.create_source_passage_async(
PydanticPassage(
text="Invalid source passage",
source_id=default_source.id,
archive_id=archive.id,
organization_id=default_user.organization_id,
embedding=[0.1],
embedding_config=DEFAULT_EMBEDDING_CONFIG,
),
file_metadata=default_file,
actor=default_user,
)
@pytest.mark.asyncio
async def test_get_agent_passage_by_id_specific(server: SyncServer, default_user, sarah_agent):
"""Test retrieving an agent passage using the new agent-specific method."""
# Get or create default archive for the agent
archive = await server.archive_manager.get_or_create_default_archive_for_agent_async(agent_state=sarah_agent, actor=default_user)
# Create an agent passage
passage = await server.passage_manager.create_agent_passage_async(
PydanticPassage(
text="Agent passage for retrieval test",
archive_id=archive.id,
organization_id=default_user.organization_id,
embedding=[0.1],
embedding_config=DEFAULT_EMBEDDING_CONFIG,
),
actor=default_user,
)
# Retrieve it using the specific method
retrieved = await server.passage_manager.get_agent_passage_by_id_async(passage.id, actor=default_user)
assert retrieved is not None
assert retrieved.id == passage.id
assert retrieved.text == passage.text
assert retrieved.archive_id == archive.id
@pytest.mark.asyncio
async def test_get_source_passage_by_id_specific(server: SyncServer, default_user, default_file, default_source):
"""Test retrieving a source passage using the new source-specific method."""
# Create a source passage
passage = await server.passage_manager.create_source_passage_async(
PydanticPassage(
text="Source passage for retrieval test",
source_id=default_source.id,
file_id=default_file.id,
organization_id=default_user.organization_id,
embedding=[0.1],
embedding_config=DEFAULT_EMBEDDING_CONFIG,
),
file_metadata=default_file,
actor=default_user,
)
# Retrieve it using the specific method
retrieved = await server.passage_manager.get_source_passage_by_id_async(passage.id, actor=default_user)
assert retrieved is not None
assert retrieved.id == passage.id
assert retrieved.text == passage.text
assert retrieved.source_id == default_source.id
@pytest.mark.asyncio
async def test_get_wrong_passage_type_fails(server: SyncServer, default_user, sarah_agent, default_file, default_source):
"""Test that trying to get the wrong passage type with specific methods fails."""
# Create an agent passage
# Get or create default archive for the agent
archive = await server.archive_manager.get_or_create_default_archive_for_agent_async(agent_state=sarah_agent, actor=default_user)
agent_passage = await server.passage_manager.create_agent_passage_async(
PydanticPassage(
text="Agent passage",
archive_id=archive.id,
organization_id=default_user.organization_id,
embedding=[0.1],
embedding_config=DEFAULT_EMBEDDING_CONFIG,
),
actor=default_user,
)
# Create a source passage
source_passage = await server.passage_manager.create_source_passage_async(
PydanticPassage(
text="Source passage",
source_id=default_source.id,
file_id=default_file.id,
organization_id=default_user.organization_id,
embedding=[0.1],
embedding_config=DEFAULT_EMBEDDING_CONFIG,
),
file_metadata=default_file,
actor=default_user,
)
# Trying to get agent passage with source method should fail
with pytest.raises(NoResultFound):
await server.passage_manager.get_source_passage_by_id_async(agent_passage.id, actor=default_user)
# Trying to get source passage with agent method should fail
with pytest.raises(NoResultFound):
await server.passage_manager.get_agent_passage_by_id_async(source_passage.id, actor=default_user)
@pytest.mark.asyncio
async def test_update_agent_passage_specific(server: SyncServer, default_user, sarah_agent):
"""Test updating an agent passage using the new agent-specific method."""
# Get or create default archive for the agent
archive = await server.archive_manager.get_or_create_default_archive_for_agent_async(agent_state=sarah_agent, actor=default_user)
# Create an agent passage
passage = await server.passage_manager.create_agent_passage_async(
PydanticPassage(
text="Original agent passage text",
archive_id=archive.id,
organization_id=default_user.organization_id,
embedding=[0.1],
embedding_config=DEFAULT_EMBEDDING_CONFIG,
),
actor=default_user,
)
# Update it
updated_passage = await server.passage_manager.update_agent_passage_by_id_async(
passage.id,
PydanticPassage(
text="Updated agent passage text",
archive_id=archive.id,
organization_id=default_user.organization_id,
embedding=[0.2],
embedding_config=DEFAULT_EMBEDDING_CONFIG,
),
actor=default_user,
)
assert updated_passage.text == "Updated agent passage text"
assert updated_passage.embedding[0] == approx(0.2)
assert updated_passage.id == passage.id
@pytest.mark.asyncio
async def test_update_source_passage_specific(server: SyncServer, default_user, default_file, default_source):
"""Test updating a source passage using the new source-specific method."""
# Create a source passage
passage = await server.passage_manager.create_source_passage_async(
PydanticPassage(
text="Original source passage text",
source_id=default_source.id,
file_id=default_file.id,
organization_id=default_user.organization_id,
embedding=[0.1],
embedding_config=DEFAULT_EMBEDDING_CONFIG,
),
file_metadata=default_file,
actor=default_user,
)
# Update it
updated_passage = await server.passage_manager.update_source_passage_by_id_async(
passage.id,
PydanticPassage(
text="Updated source passage text",
source_id=default_source.id,
file_id=default_file.id,
organization_id=default_user.organization_id,
embedding=[0.2],
embedding_config=DEFAULT_EMBEDDING_CONFIG,
),
actor=default_user,
)
assert updated_passage.text == "Updated source passage text"
assert updated_passage.embedding[0] == approx(0.2)
assert updated_passage.id == passage.id
@pytest.mark.asyncio
async def test_delete_agent_passage_specific(server: SyncServer, default_user, sarah_agent):
"""Test deleting an agent passage using the new agent-specific method."""
# Get or create default archive for the agent
archive = await server.archive_manager.get_or_create_default_archive_for_agent_async(agent_state=sarah_agent, actor=default_user)
# Create an agent passage
passage = await server.passage_manager.create_agent_passage_async(
PydanticPassage(
text="Agent passage to delete",
archive_id=archive.id,
organization_id=default_user.organization_id,
embedding=[0.1],
embedding_config=DEFAULT_EMBEDDING_CONFIG,
),
actor=default_user,
)
# Verify it exists
retrieved = await server.passage_manager.get_agent_passage_by_id_async(passage.id, actor=default_user)
assert retrieved is not None
# Delete it
result = await server.passage_manager.delete_agent_passage_by_id_async(passage.id, actor=default_user)
assert result is True
# Verify it's gone
with pytest.raises(NoResultFound):
await server.passage_manager.get_agent_passage_by_id_async(passage.id, actor=default_user)
@pytest.mark.asyncio
async def test_delete_source_passage_specific(server: SyncServer, default_user, default_file, default_source):
"""Test deleting a source passage using the new source-specific method."""
# Create a source passage
passage = await server.passage_manager.create_source_passage_async(
PydanticPassage(
text="Source passage to delete",
source_id=default_source.id,
file_id=default_file.id,
organization_id=default_user.organization_id,
embedding=[0.1],
embedding_config=DEFAULT_EMBEDDING_CONFIG,
),
file_metadata=default_file,
actor=default_user,
)
# Verify it exists
retrieved = await server.passage_manager.get_source_passage_by_id_async(passage.id, actor=default_user)
assert retrieved is not None
# Delete it
result = await server.passage_manager.delete_source_passage_by_id_async(passage.id, actor=default_user)
assert result is True
# Verify it's gone
with pytest.raises(NoResultFound):
await server.passage_manager.get_source_passage_by_id_async(passage.id, actor=default_user)
@pytest.mark.asyncio
async def test_create_many_agent_passages_async(server: SyncServer, default_user, sarah_agent):
"""Test creating multiple agent passages using the new batch method."""
# Get or create default archive for the agent
archive = await server.archive_manager.get_or_create_default_archive_for_agent_async(agent_state=sarah_agent, actor=default_user)
passages = [
PydanticPassage(
text=f"Batch agent passage {i}",
archive_id=archive.id, # Now archive is a PydanticArchive object
organization_id=default_user.organization_id,
embedding=[0.1 * i],
embedding_config=DEFAULT_EMBEDDING_CONFIG,
tags=["batch", f"item{i}"] if i % 2 == 0 else ["batch", "odd"],
)
for i in range(3)
]
created_passages = await server.passage_manager.create_many_archival_passages_async(passages, actor=default_user)
assert len(created_passages) == 3
for i, passage in enumerate(created_passages):
assert passage.text == f"Batch agent passage {i}"
assert passage.archive_id == archive.id
assert passage.source_id is None
expected_tags = ["batch", f"item{i}"] if i % 2 == 0 else ["batch", "odd"]
assert passage.tags == expected_tags
@pytest.mark.asyncio
async def test_create_many_source_passages_async(server: SyncServer, default_user, default_file, default_source):
"""Test creating multiple source passages using the new batch method."""
passages = [
PydanticPassage(
text=f"Batch source passage {i}",
source_id=default_source.id,
file_id=default_file.id,
organization_id=default_user.organization_id,
embedding=[0.1 * i],
embedding_config=DEFAULT_EMBEDDING_CONFIG,
)
for i in range(3)
]
created_passages = await server.passage_manager.create_many_source_passages_async(
passages, file_metadata=default_file, actor=default_user
)
assert len(created_passages) == 3
for i, passage in enumerate(created_passages):
assert passage.text == f"Batch source passage {i}"
assert passage.source_id == default_source.id
assert passage.archive_id is None
@pytest.mark.asyncio
async def test_agent_passage_size(server: SyncServer, default_user, sarah_agent):
"""Test counting agent passages using the new agent-specific size method."""
initial_size = await server.passage_manager.agent_passage_size_async(actor=default_user, agent_id=sarah_agent.id)
# Get or create default archive for the agent
archive = await server.archive_manager.get_or_create_default_archive_for_agent_async(agent_state=sarah_agent, actor=default_user)
# Create some agent passages
for i in range(3):
await server.passage_manager.create_agent_passage_async(
PydanticPassage(
text=f"Agent passage {i} for size test",
archive_id=archive.id,
organization_id=default_user.organization_id,
embedding=[0.1],
embedding_config=DEFAULT_EMBEDDING_CONFIG,
),
actor=default_user,
)
final_size = await server.passage_manager.agent_passage_size_async(actor=default_user, agent_id=sarah_agent.id)
assert final_size == initial_size + 3
@pytest.mark.asyncio
async def test_passage_tags_functionality(disable_turbopuffer, server: SyncServer, default_user, sarah_agent):
"""Test comprehensive tag functionality for passages."""
from letta.schemas.enums import TagMatchMode
# Get or create default archive for the agent
archive = await server.archive_manager.get_or_create_default_archive_for_agent_async(agent_state=sarah_agent, actor=default_user)
# Create passages with different tag combinations
test_passages = [
{"text": "Python programming tutorial", "tags": ["python", "tutorial", "programming"]},
{"text": "Machine learning with Python", "tags": ["python", "ml", "ai"]},
{"text": "JavaScript web development", "tags": ["javascript", "web", "frontend"]},
{"text": "Python data science guide", "tags": ["python", "tutorial", "data"]},
{"text": "No tags passage", "tags": None},
]
created_passages = []
for test_data in test_passages:
passage = await server.passage_manager.create_agent_passage_async(
PydanticPassage(
text=test_data["text"],
archive_id=archive.id,
organization_id=default_user.organization_id,
embedding=[0.1, 0.2, 0.3],
embedding_config=DEFAULT_EMBEDDING_CONFIG,
tags=test_data["tags"],
),
actor=default_user,
)
created_passages.append(passage)
# Test that tags are properly stored (deduplicated)
for i, passage in enumerate(created_passages):
expected_tags = test_passages[i]["tags"]
if expected_tags:
assert set(passage.tags) == set(expected_tags)
else:
assert passage.tags is None
# Test querying with tag filtering (if Turbopuffer is enabled)
if hasattr(server.agent_manager, "query_agent_passages_async"):
# Test querying with python tag (should find 3 passages)
python_results = await server.agent_manager.query_agent_passages_async(
actor=default_user,
agent_id=sarah_agent.id,
tags=["python"],
tag_match_mode=TagMatchMode.ANY,
)
python_texts = [p.text for p, _, _ in python_results]
assert len([t for t in python_texts if "Python" in t]) >= 2
# Test querying with multiple tags using ALL mode
tutorial_python_results = await server.agent_manager.query_agent_passages_async(
actor=default_user,
agent_id=sarah_agent.id,
tags=["python", "tutorial"],
tag_match_mode=TagMatchMode.ALL,
)
tutorial_texts = [p.text for p, _, _ in tutorial_python_results]
expected_matches = [t for t in tutorial_texts if "tutorial" in t and "Python" in t]
assert len(expected_matches) >= 1
@pytest.mark.asyncio
async def test_comprehensive_tag_functionality(disable_turbopuffer, server: SyncServer, sarah_agent, default_user):
"""Comprehensive test for tag functionality including dual storage and junction table."""
# Test 1: Create passages with tags and verify they're stored in both places
passages_with_tags = []
test_tags = {
"passage1": ["important", "documentation", "python"],
"passage2": ["important", "testing"],
"passage3": ["documentation", "api"],
"passage4": ["python", "testing", "api"],
"passage5": [], # Test empty tags
}
for i, (passage_key, tags) in enumerate(test_tags.items(), 1):
text = f"Test passage {i} for comprehensive tag testing"
created_passages = await server.passage_manager.insert_passage(
agent_state=sarah_agent,
text=text,
actor=default_user,
tags=tags if tags else None,
)
assert len(created_passages) == 1
passage = created_passages[0]
# Verify tags are stored in the JSON column (deduplicated)
if tags:
assert set(passage.tags) == set(tags)
else:
assert passage.tags is None
passages_with_tags.append(passage)
# Test 2: Verify unique tags for archive
archive = await server.archive_manager.get_or_create_default_archive_for_agent_async(
agent_state=sarah_agent,
actor=default_user,
)
unique_tags = await server.passage_manager.get_unique_tags_for_archive_async(
archive_id=archive.id,
actor=default_user,
)
# Should have all unique tags: "important", "documentation", "python", "testing", "api"
expected_unique_tags = {"important", "documentation", "python", "testing", "api"}
assert set(unique_tags) == expected_unique_tags
assert len(unique_tags) == 5
# Test 3: Verify tag counts
tag_counts = await server.passage_manager.get_tag_counts_for_archive_async(
archive_id=archive.id,
actor=default_user,
)
# Verify counts
assert tag_counts["important"] == 2 # passage1 and passage2
assert tag_counts["documentation"] == 2 # passage1 and passage3
assert tag_counts["python"] == 2 # passage1 and passage4
assert tag_counts["testing"] == 2 # passage2 and passage4
assert tag_counts["api"] == 2 # passage3 and passage4
# Test 4: Query passages with ANY tag matching
any_results = await server.agent_manager.query_agent_passages_async(
agent_id=sarah_agent.id,
query_text="test",
limit=10,
tags=["important", "api"],
tag_match_mode=TagMatchMode.ANY,
actor=default_user,
)
# Should match passages with "important" OR "api" tags (passages 1, 2, 3, 4)
[p.text for p, _, _ in any_results]
assert len(any_results) >= 4
# Test 5: Query passages with ALL tag matching
all_results = await server.agent_manager.query_agent_passages_async(
agent_id=sarah_agent.id,
query_text="test",
limit=10,
tags=["python", "testing"],
tag_match_mode=TagMatchMode.ALL,
actor=default_user,
)
# Should only match passage4 which has both "python" AND "testing"
all_passage_texts = [p.text for p, _, _ in all_results]
assert any("Test passage 4" in text for text in all_passage_texts)
# Test 6: Query with non-existent tags
no_results = await server.agent_manager.query_agent_passages_async(
agent_id=sarah_agent.id,
query_text="test",
limit=10,
tags=["nonexistent", "missing"],
tag_match_mode=TagMatchMode.ANY,
actor=default_user,
)
# Should return no results
assert len(no_results) == 0
# Test 7: Verify tags CAN be updated (with junction table properly maintained)
first_passage = passages_with_tags[0]
new_tags = ["updated", "modified", "changed"]
update_data = PydanticPassage(
id=first_passage.id,
text="Updated text",
tags=new_tags,
organization_id=first_passage.organization_id,
archive_id=first_passage.archive_id,
embedding=first_passage.embedding,
embedding_config=first_passage.embedding_config,
)
# Update should work and tags should be updated
updated = await server.passage_manager.update_agent_passage_by_id_async(
passage_id=first_passage.id,
passage=update_data,
actor=default_user,
)
# Both text and tags should be updated
assert updated.text == "Updated text"
assert set(updated.tags) == set(new_tags)
# Verify tags are properly updated in junction table
updated_unique_tags = await server.passage_manager.get_unique_tags_for_archive_async(
archive_id=archive.id,
actor=default_user,
)
# Should include new tags and not include old "important", "documentation", "python" from passage1
# But still have tags from other passages
assert "updated" in updated_unique_tags
assert "modified" in updated_unique_tags
assert "changed" in updated_unique_tags
# Test 8: Delete a passage and verify cascade deletion of tags
passage_to_delete = passages_with_tags[1] # passage2 with ["important", "testing"]
await server.passage_manager.delete_agent_passage_by_id_async(
passage_id=passage_to_delete.id,
actor=default_user,
)
# Get updated tag counts
updated_tag_counts = await server.passage_manager.get_tag_counts_for_archive_async(
archive_id=archive.id,
actor=default_user,
)
# "important" no longer exists (was in passage1 which was updated and passage2 which was deleted)
assert "important" not in updated_tag_counts
# "testing" count should decrease from 2 to 1 (only in passage4 now)
assert updated_tag_counts["testing"] == 1
# Test 9: Batch create passages with tags
batch_texts = [
"Batch passage 1",
"Batch passage 2",
"Batch passage 3",
]
batch_tags = ["batch", "test", "multiple"]
batch_passages = []
for text in batch_texts:
passages = await server.passage_manager.insert_passage(
agent_state=sarah_agent,
text=text,
actor=default_user,
tags=batch_tags,
)
batch_passages.extend(passages)
# Verify all batch passages have the same tags
for passage in batch_passages:
assert set(passage.tags) == set(batch_tags)
# Test 10: Verify tag counts include batch passages
final_tag_counts = await server.passage_manager.get_tag_counts_for_archive_async(
archive_id=archive.id,
actor=default_user,
)
assert final_tag_counts["batch"] == 3
assert final_tag_counts["test"] == 3
assert final_tag_counts["multiple"] == 3
# Test 11: Complex query with multiple tags and ALL matching
complex_all_results = await server.agent_manager.query_agent_passages_async(
agent_id=sarah_agent.id,
query_text="batch",
limit=10,
tags=["batch", "test", "multiple"],
tag_match_mode=TagMatchMode.ALL,
actor=default_user,
)
# Should match all 3 batch passages
assert len(complex_all_results) >= 3
# Test 12: Empty tag list should return all passages
all_passages = await server.agent_manager.query_agent_passages_async(
agent_id=sarah_agent.id,
query_text="passage",
limit=50,
tags=[],
tag_match_mode=TagMatchMode.ANY,
actor=default_user,
)
# Should return passages based on text search only
assert len(all_passages) > 0
@pytest.mark.asyncio
async def test_tag_edge_cases(disable_turbopuffer, server: SyncServer, sarah_agent, default_user):
"""Test edge cases for tag functionality."""
# Test 1: Very long tag names
long_tag = "a" * 500 # 500 character tag
passages = await server.passage_manager.insert_passage(
agent_state=sarah_agent,
text="Testing long tag names",
actor=default_user,
tags=[long_tag, "normal_tag"],
)
assert len(passages) == 1
assert long_tag in passages[0].tags
# Test 2: Special characters in tags
special_tags = [
"tag-with-dash",
"tag_with_underscore",
"tag.with.dots",
"tag/with/slash",
"tag:with:colon",
"tag@with@at",
"tag#with#hash",
"tag with spaces",
"CamelCaseTag",
"数字标签",
]
passages_special = await server.passage_manager.insert_passage(
agent_state=sarah_agent,
text="Testing special character tags",
actor=default_user,
tags=special_tags,
)
assert len(passages_special) == 1
assert set(passages_special[0].tags) == set(special_tags)
# Verify unique tags includes all special character tags
archive = await server.archive_manager.get_or_create_default_archive_for_agent_async(
agent_state=sarah_agent,
actor=default_user,
)
unique_tags = await server.passage_manager.get_unique_tags_for_archive_async(
archive_id=archive.id,
actor=default_user,
)
for tag in special_tags:
assert tag in unique_tags
# Test 3: Duplicate tags in input (should be deduplicated)
duplicate_tags = ["tag1", "tag2", "tag1", "tag3", "tag2", "tag1"]
passages_dup = await server.passage_manager.insert_passage(
agent_state=sarah_agent,
text="Testing duplicate tags",
actor=default_user,
tags=duplicate_tags,
)
# Should only have unique tags (duplicates removed)
assert len(passages_dup) == 1
assert set(passages_dup[0].tags) == {"tag1", "tag2", "tag3"}
assert len(passages_dup[0].tags) == 3 # Should be deduplicated
# Test 4: Case sensitivity in tags
case_tags = ["Tag", "tag", "TAG", "tAg"]
passages_case = await server.passage_manager.insert_passage(
agent_state=sarah_agent,
text="Testing case sensitive tags",
actor=default_user,
tags=case_tags,
)
# All variations should be preserved (case-sensitive)
assert len(passages_case) == 1
assert set(passages_case[0].tags) == set(case_tags)
@pytest.mark.asyncio
async def test_search_agent_archival_memory_async(disable_turbopuffer, server: SyncServer, default_user, sarah_agent):
"""Test the search_agent_archival_memory_async method that powers both the agent tool and API endpoint."""
# Get or create default archive for the agent
archive = await server.archive_manager.get_or_create_default_archive_for_agent_async(agent_state=sarah_agent, actor=default_user)
# Create test passages with various content and tags
test_data = [
{
"text": "Python is a powerful programming language used for data science and web development.",
"tags": ["python", "programming", "data-science", "web"],
"created_at": datetime(2024, 1, 15, 10, 30, tzinfo=timezone.utc),
},
{
"text": "Machine learning algorithms can be implemented in Python using libraries like scikit-learn.",
"tags": ["python", "machine-learning", "algorithms"],
"created_at": datetime(2024, 1, 16, 14, 45, tzinfo=timezone.utc),
},
{
"text": "JavaScript is essential for frontend web development and modern web applications.",
"tags": ["javascript", "frontend", "web"],
"created_at": datetime(2024, 1, 17, 9, 15, tzinfo=timezone.utc),
},
{
"text": "Database design principles are important for building scalable applications.",
"tags": ["database", "design", "scalability"],
"created_at": datetime(2024, 1, 18, 16, 20, tzinfo=timezone.utc),
},
{
"text": "The weather today is sunny and warm, perfect for outdoor activities.",
"tags": ["weather", "outdoor"],
"created_at": datetime(2024, 1, 19, 11, 0, tzinfo=timezone.utc),
},
]
# Create passages in the database
created_passages = []
for data in test_data:
passage = await server.passage_manager.create_agent_passage_async(
PydanticPassage(
text=data["text"],
archive_id=archive.id,
organization_id=default_user.organization_id,
embedding=[0.1, 0.2, 0.3], # Mock embedding
embedding_config=DEFAULT_EMBEDDING_CONFIG,
tags=data["tags"],
created_at=data["created_at"],
),
actor=default_user,
)
created_passages.append(passage)
# Test 1: Basic search by query text
results = await server.agent_manager.search_agent_archival_memory_async(
agent_id=sarah_agent.id, actor=default_user, query="Python programming"
)
assert len(results) > 0
# Check structure of results
for result in results:
assert "timestamp" in result
assert "content" in result
assert "tags" in result
assert isinstance(result["tags"], list)
# Test 2: Search with tag filtering - single tag
results = await server.agent_manager.search_agent_archival_memory_async(
agent_id=sarah_agent.id, actor=default_user, query="programming", tags=["python"]
)
assert len(results) > 0
# All results should have "python" tag
for result in results:
assert "python" in result["tags"]
# Test 3: Search with tag filtering - multiple tags with "any" mode
results = await server.agent_manager.search_agent_archival_memory_async(
agent_id=sarah_agent.id, actor=default_user, query="development", tags=["web", "database"], tag_match_mode="any"
)
assert len(results) > 0
# All results should have at least one of the specified tags
for result in results:
assert any(tag in result["tags"] for tag in ["web", "database"])
# Test 4: Search with tag filtering - multiple tags with "all" mode
results = await server.agent_manager.search_agent_archival_memory_async(
agent_id=sarah_agent.id, actor=default_user, query="Python", tags=["python", "web"], tag_match_mode="all"
)
# Should only return results that have BOTH tags
for result in results:
assert "python" in result["tags"]
assert "web" in result["tags"]
# Test 5: Search with top_k limit
results = await server.agent_manager.search_agent_archival_memory_async(
agent_id=sarah_agent.id, actor=default_user, query="programming", top_k=2
)
assert len(results) <= 2
# Test 6: Search with datetime filtering
results = await server.agent_manager.search_agent_archival_memory_async(
agent_id=sarah_agent.id, actor=default_user, query="programming", start_datetime="2024-01-16", end_datetime="2024-01-17"
)
# Should only include passages created between those dates
for result in results:
# Parse timestamp to verify it's in range
timestamp_str = result["timestamp"]
# Basic validation that timestamp exists and has expected format
assert "2024-01-16" in timestamp_str or "2024-01-17" in timestamp_str
# Test 7: Search with ISO datetime format
results = await server.agent_manager.search_agent_archival_memory_async(
agent_id=sarah_agent.id,
actor=default_user,
query="algorithms",
start_datetime="2024-01-16T14:00:00",
end_datetime="2024-01-16T15:00:00",
)
# Should include the machine learning passage created at 14:45
assert len(results) >= 0 # Might be 0 if no results, but shouldn't error
# Test 8: Search with non-existent agent should raise error
non_existent_agent_id = "agent-00000000-0000-4000-8000-000000000000"
with pytest.raises(Exception): # Should raise NoResultFound or similar
await server.agent_manager.search_agent_archival_memory_async(agent_id=non_existent_agent_id, actor=default_user, query="test")
# Test 9: Search with invalid datetime format should raise ValueError
with pytest.raises(ValueError, match="Invalid start_datetime format"):
await server.agent_manager.search_agent_archival_memory_async(
agent_id=sarah_agent.id, actor=default_user, query="test", start_datetime="invalid-date"
)
# Test 10: Empty query should return empty results
results = await server.agent_manager.search_agent_archival_memory_async(agent_id=sarah_agent.id, actor=default_user, query="")
assert len(results) == 0 # Empty query should return 0 results
# Test 11: Whitespace-only query should also return empty results
results = await server.agent_manager.search_agent_archival_memory_async(agent_id=sarah_agent.id, actor=default_user, query=" \n\t ")
assert len(results) == 0 # Whitespace-only query should return 0 results
# Cleanup - delete the created passages
for passage in created_passages:
await server.passage_manager.delete_agent_passage_by_id_async(passage_id=passage.id, actor=default_user)