diff --git a/letta/server/rest_api/routers/v1/sources.py b/letta/server/rest_api/routers/v1/sources.py index f224d0a7..992eb064 100644 --- a/letta/server/rest_api/routers/v1/sources.py +++ b/letta/server/rest_api/routers/v1/sources.py @@ -431,11 +431,9 @@ async def get_file_metadata( else: file_status = FileProcessingStatus.COMPLETED try: - print("GETTING PINECONE!!!") file_metadata = await server.file_manager.update_file_status( file_id=file_metadata.id, actor=actor, chunks_embedded=len(ids), processing_status=file_status ) - print(file_metadata) except ValueError as e: # state transition was blocked - this is a race condition # log it but don't fail the request since we're just reading metadata diff --git a/letta/services/file_processor/file_processor.py b/letta/services/file_processor/file_processor.py index 2084e108..d27597a4 100644 --- a/letta/services/file_processor/file_processor.py +++ b/letta/services/file_processor/file_processor.py @@ -69,6 +69,15 @@ class FileProcessor: raise ValueError("No chunks created from text") all_chunks.extend(chunks) + # Update with chunks length + file_metadata = await self.file_manager.update_file_status( + file_id=file_metadata.id, + actor=self.actor, + processing_status=FileProcessingStatus.EMBEDDING, + total_chunks=len(all_chunks), + chunks_embedded=0, + ) + all_passages = await self.embedder.generate_embedded_passages( file_id=file_metadata.id, source_id=source_id, @@ -205,11 +214,6 @@ class FileProcessor: ) # Chunk and embed with fallback logic - if not self.using_pinecone: - file_metadata = await self.file_manager.update_file_status( - file_id=file_metadata.id, actor=self.actor, processing_status=FileProcessingStatus.EMBEDDING - ) - all_passages = await self._chunk_and_embed_with_fallback( file_metadata=file_metadata, ocr_response=ocr_response, @@ -244,18 +248,8 @@ class FileProcessor: file_id=file_metadata.id, actor=self.actor, processing_status=FileProcessingStatus.COMPLETED, + chunks_embedded=len(all_passages), ) - else: - print("UPDATING HERE!!!!") - - file_metadata = await self.file_manager.update_file_status( - file_id=file_metadata.id, - actor=self.actor, - total_chunks=len(all_passages), - chunks_embedded=0, - processing_status=FileProcessingStatus.EMBEDDING, - ) - print(file_metadata) return all_passages @@ -293,6 +287,7 @@ class FileProcessor: document_annotation=None, ) + # TODO: The file state machine here is kind of out of date, we need to match with the correct one above @trace_method async def process_imported_file(self, file_metadata: FileMetadata, source_id: str) -> List[Passage]: """Process an imported file that already has content - skip OCR, do chunking/embedding"""