feat: add include_detailed_per_source_metadata flag to sources metadata endpoint (#3371)

This commit is contained in:
Matthew Zhou
2025-07-16 17:10:41 -07:00
committed by GitHub
parent f76cfa7e8d
commit 8a8aafb3fe
3 changed files with 54 additions and 32 deletions

View File

@@ -99,6 +99,7 @@ async def get_source_id_by_name(
async def get_sources_metadata(
server: "SyncServer" = Depends(get_letta_server),
actor_id: Optional[str] = Header(None, alias="user_id"),
include_detailed_per_source_metadata: bool = False,
):
"""
Get aggregated metadata for all sources in an organization.
@@ -107,10 +108,12 @@ async def get_sources_metadata(
- Total number of sources
- Total number of files across all sources
- Total size of all files
- Per-source breakdown with file details (file_name, file_size per file)
- Per-source breakdown with file details (file_name, file_size per file) if include_detailed_per_source_metadata is True
"""
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
return await server.file_manager.get_organization_sources_metadata(actor=actor)
return await server.file_manager.get_organization_sources_metadata(
actor=actor, include_detailed_per_source_metadata=include_detailed_per_source_metadata
)
@router.get("/", response_model=List[Source], operation_id="list_sources")

View File

@@ -357,7 +357,9 @@ class FileManager:
@enforce_types
@trace_method
async def get_organization_sources_metadata(self, actor: PydanticUser) -> OrganizationSourcesStats:
async def get_organization_sources_metadata(
self, actor: PydanticUser, include_detailed_per_source_metadata: bool = False
) -> OrganizationSourcesStats:
"""
Get aggregated metadata for all sources in an organization with optimized queries.
@@ -365,7 +367,7 @@ class FileManager:
- Total number of sources
- Total number of files across all sources
- Total size of all files
- Per-source breakdown with file details
- Per-source breakdown with file details (if include_detailed_per_source_metadata is True)
"""
async with db_registry.async_session() as session:
# Import here to avoid circular imports
@@ -395,31 +397,33 @@ class FileManager:
for row in source_aggregations:
source_id, source_name, file_count, total_size = row
# Get individual file details for this source
files_query = (
select(FileMetadataModel.id, FileMetadataModel.file_name, FileMetadataModel.file_size)
.where(
FileMetadataModel.source_id == source_id,
FileMetadataModel.organization_id == actor.organization_id,
FileMetadataModel.is_deleted == False,
if include_detailed_per_source_metadata:
# Get individual file details for this source
files_query = (
select(FileMetadataModel.id, FileMetadataModel.file_name, FileMetadataModel.file_size)
.where(
FileMetadataModel.source_id == source_id,
FileMetadataModel.organization_id == actor.organization_id,
FileMetadataModel.is_deleted == False,
)
.order_by(FileMetadataModel.file_name)
)
.order_by(FileMetadataModel.file_name)
)
files_result = await session.execute(files_query)
files_rows = files_result.fetchall()
files_result = await session.execute(files_query)
files_rows = files_result.fetchall()
# Build file stats
files = [FileStats(file_id=file_row[0], file_name=file_row[1], file_size=file_row[2]) for file_row in files_rows]
# Build file stats
files = [FileStats(file_id=file_row[0], file_name=file_row[1], file_size=file_row[2]) for file_row in files_rows]
# Build source metadata
source_metadata = SourceStats(
source_id=source_id, source_name=source_name, file_count=file_count, total_size=total_size, files=files
)
# Build source metadata
source_metadata = SourceStats(
source_id=source_id, source_name=source_name, file_count=file_count, total_size=total_size, files=files
)
metadata.sources.append(source_metadata)
metadata.sources.append(source_metadata)
metadata.total_files += file_count
metadata.total_size += total_size
metadata.total_sources = len(metadata.sources)
metadata.total_sources = len(source_aggregations)
return metadata

View File

@@ -5492,17 +5492,32 @@ async def test_get_organization_sources_metadata(server, default_user):
)
file3 = await server.file_manager.create_file(file_metadata=file3_meta, actor=default_user)
# Get organization metadata
metadata = await server.file_manager.get_organization_sources_metadata(actor=default_user)
# Test 1: Get organization metadata without detailed per-source metadata (default behavior)
metadata_summary = await server.file_manager.get_organization_sources_metadata(
actor=default_user, include_detailed_per_source_metadata=False
)
# Verify top-level aggregations
assert metadata.total_sources >= 2 # May have other sources from other tests
assert metadata.total_files >= 3
assert metadata.total_size >= 3584
# Verify top-level aggregations are present
assert metadata_summary.total_sources >= 2 # May have other sources from other tests
assert metadata_summary.total_files >= 3
assert metadata_summary.total_size >= 3584
# Find our test sources in the results
source1_meta = next((s for s in metadata.sources if s.source_id == source1.id), None)
source2_meta = next((s for s in metadata.sources if s.source_id == source2.id), None)
# Verify sources list is empty when include_detailed_per_source_metadata=False
assert len(metadata_summary.sources) == 0
# Test 2: Get organization metadata with detailed per-source metadata
metadata_detailed = await server.file_manager.get_organization_sources_metadata(
actor=default_user, include_detailed_per_source_metadata=True
)
# Verify top-level aggregations are the same
assert metadata_detailed.total_sources == metadata_summary.total_sources
assert metadata_detailed.total_files == metadata_summary.total_files
assert metadata_detailed.total_size == metadata_summary.total_size
# Find our test sources in the detailed results
source1_meta = next((s for s in metadata_detailed.sources if s.source_id == source1.id), None)
source2_meta = next((s for s in metadata_detailed.sources if s.source_id == source2.id), None)
assert source1_meta is not None
assert source1_meta.source_name == "test_source_1"