From 8a8aafb3febe43fa8c97d8d867b04c204680299a Mon Sep 17 00:00:00 2001 From: Matthew Zhou Date: Wed, 16 Jul 2025 17:10:41 -0700 Subject: [PATCH] feat: add include_detailed_per_source_metadata flag to sources metadata endpoint (#3371) --- letta/server/rest_api/routers/v1/sources.py | 7 +++- letta/services/file_manager.py | 46 +++++++++++---------- tests/test_managers.py | 33 +++++++++++---- 3 files changed, 54 insertions(+), 32 deletions(-) diff --git a/letta/server/rest_api/routers/v1/sources.py b/letta/server/rest_api/routers/v1/sources.py index 3997d578..082fe6e1 100644 --- a/letta/server/rest_api/routers/v1/sources.py +++ b/letta/server/rest_api/routers/v1/sources.py @@ -99,6 +99,7 @@ async def get_source_id_by_name( async def get_sources_metadata( server: "SyncServer" = Depends(get_letta_server), actor_id: Optional[str] = Header(None, alias="user_id"), + include_detailed_per_source_metadata: bool = False, ): """ Get aggregated metadata for all sources in an organization. @@ -107,10 +108,12 @@ async def get_sources_metadata( - Total number of sources - Total number of files across all sources - Total size of all files - - Per-source breakdown with file details (file_name, file_size per file) + - Per-source breakdown with file details (file_name, file_size per file) if include_detailed_per_source_metadata is True """ actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id) - return await server.file_manager.get_organization_sources_metadata(actor=actor) + return await server.file_manager.get_organization_sources_metadata( + actor=actor, include_detailed_per_source_metadata=include_detailed_per_source_metadata + ) @router.get("/", response_model=List[Source], operation_id="list_sources") diff --git a/letta/services/file_manager.py b/letta/services/file_manager.py index 6aa86b16..2d4c0d81 100644 --- a/letta/services/file_manager.py +++ b/letta/services/file_manager.py @@ -357,7 +357,9 @@ class FileManager: @enforce_types @trace_method - async def get_organization_sources_metadata(self, actor: PydanticUser) -> OrganizationSourcesStats: + async def get_organization_sources_metadata( + self, actor: PydanticUser, include_detailed_per_source_metadata: bool = False + ) -> OrganizationSourcesStats: """ Get aggregated metadata for all sources in an organization with optimized queries. @@ -365,7 +367,7 @@ class FileManager: - Total number of sources - Total number of files across all sources - Total size of all files - - Per-source breakdown with file details + - Per-source breakdown with file details (if include_detailed_per_source_metadata is True) """ async with db_registry.async_session() as session: # Import here to avoid circular imports @@ -395,31 +397,33 @@ class FileManager: for row in source_aggregations: source_id, source_name, file_count, total_size = row - # Get individual file details for this source - files_query = ( - select(FileMetadataModel.id, FileMetadataModel.file_name, FileMetadataModel.file_size) - .where( - FileMetadataModel.source_id == source_id, - FileMetadataModel.organization_id == actor.organization_id, - FileMetadataModel.is_deleted == False, + if include_detailed_per_source_metadata: + # Get individual file details for this source + files_query = ( + select(FileMetadataModel.id, FileMetadataModel.file_name, FileMetadataModel.file_size) + .where( + FileMetadataModel.source_id == source_id, + FileMetadataModel.organization_id == actor.organization_id, + FileMetadataModel.is_deleted == False, + ) + .order_by(FileMetadataModel.file_name) ) - .order_by(FileMetadataModel.file_name) - ) - files_result = await session.execute(files_query) - files_rows = files_result.fetchall() + files_result = await session.execute(files_query) + files_rows = files_result.fetchall() - # Build file stats - files = [FileStats(file_id=file_row[0], file_name=file_row[1], file_size=file_row[2]) for file_row in files_rows] + # Build file stats + files = [FileStats(file_id=file_row[0], file_name=file_row[1], file_size=file_row[2]) for file_row in files_rows] - # Build source metadata - source_metadata = SourceStats( - source_id=source_id, source_name=source_name, file_count=file_count, total_size=total_size, files=files - ) + # Build source metadata + source_metadata = SourceStats( + source_id=source_id, source_name=source_name, file_count=file_count, total_size=total_size, files=files + ) + + metadata.sources.append(source_metadata) - metadata.sources.append(source_metadata) metadata.total_files += file_count metadata.total_size += total_size - metadata.total_sources = len(metadata.sources) + metadata.total_sources = len(source_aggregations) return metadata diff --git a/tests/test_managers.py b/tests/test_managers.py index 10f00a54..f4590280 100644 --- a/tests/test_managers.py +++ b/tests/test_managers.py @@ -5492,17 +5492,32 @@ async def test_get_organization_sources_metadata(server, default_user): ) file3 = await server.file_manager.create_file(file_metadata=file3_meta, actor=default_user) - # Get organization metadata - metadata = await server.file_manager.get_organization_sources_metadata(actor=default_user) + # Test 1: Get organization metadata without detailed per-source metadata (default behavior) + metadata_summary = await server.file_manager.get_organization_sources_metadata( + actor=default_user, include_detailed_per_source_metadata=False + ) - # Verify top-level aggregations - assert metadata.total_sources >= 2 # May have other sources from other tests - assert metadata.total_files >= 3 - assert metadata.total_size >= 3584 + # Verify top-level aggregations are present + assert metadata_summary.total_sources >= 2 # May have other sources from other tests + assert metadata_summary.total_files >= 3 + assert metadata_summary.total_size >= 3584 - # Find our test sources in the results - source1_meta = next((s for s in metadata.sources if s.source_id == source1.id), None) - source2_meta = next((s for s in metadata.sources if s.source_id == source2.id), None) + # Verify sources list is empty when include_detailed_per_source_metadata=False + assert len(metadata_summary.sources) == 0 + + # Test 2: Get organization metadata with detailed per-source metadata + metadata_detailed = await server.file_manager.get_organization_sources_metadata( + actor=default_user, include_detailed_per_source_metadata=True + ) + + # Verify top-level aggregations are the same + assert metadata_detailed.total_sources == metadata_summary.total_sources + assert metadata_detailed.total_files == metadata_summary.total_files + assert metadata_detailed.total_size == metadata_summary.total_size + + # Find our test sources in the detailed results + source1_meta = next((s for s in metadata_detailed.sources if s.source_id == source1.id), None) + source2_meta = next((s for s in metadata_detailed.sources if s.source_id == source2.id), None) assert source1_meta is not None assert source1_meta.source_name == "test_source_1"