import uuid from typing import TYPE_CHECKING, Optional from sqlalchemy import ForeignKey, Index, Integer, String, Text, UniqueConstraint, desc from sqlalchemy.ext.asyncio import AsyncAttrs from sqlalchemy.orm import Mapped, mapped_column, relationship from letta.orm.mixins import OrganizationMixin, SourceMixin from letta.orm.sqlalchemy_base import SqlalchemyBase from letta.schemas.enums import FileProcessingStatus from letta.schemas.file import FileMetadata as PydanticFileMetadata if TYPE_CHECKING: pass # TODO: Note that this is NOT organization scoped, this is potentially dangerous if we misuse this # TODO: This should ONLY be manipulated internally in relation to FileMetadata.content # TODO: Leaving organization_id out of this for now for simplicity class FileContent(SqlalchemyBase): """Holds the full text content of a file (potentially large).""" __tablename__ = "file_contents" __table_args__ = (UniqueConstraint("file_id", name="uq_file_contents_file_id"),) # TODO: We want to migrate all the ORM models to do this, so we will need to move this to the SqlalchemyBase # TODO: Some still rely on the Pydantic object to do this id: Mapped[str] = mapped_column(String, primary_key=True, default=lambda: f"file_content-{uuid.uuid4()}") file_id: Mapped[str] = mapped_column(ForeignKey("files.id", ondelete="CASCADE"), nullable=False, doc="Foreign key to files table.") text: Mapped[str] = mapped_column(Text, nullable=False, doc="Full plain-text content of the file (e.g., extracted from a PDF).") # back-reference to FileMetadata file: Mapped["FileMetadata"] = relationship(back_populates="content", lazy="selectin") class FileMetadata(SqlalchemyBase, OrganizationMixin, SourceMixin, AsyncAttrs): """Represents an uploaded file.""" __tablename__ = "files" __pydantic_model__ = PydanticFileMetadata __table_args__ = ( Index("ix_files_org_created", "organization_id", desc("created_at")), Index("ix_files_source_created", "source_id", desc("created_at")), Index("ix_files_processing_status", "processing_status"), ) file_name: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="The name of the file.") original_file_name: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="The original name of the file as uploaded.") file_path: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="The file path on the system.") file_type: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="The type of the file.") file_size: Mapped[Optional[int]] = mapped_column(Integer, nullable=True, doc="The size of the file in bytes.") file_creation_date: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="The creation date of the file.") file_last_modified_date: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="The last modified date of the file.") processing_status: Mapped[FileProcessingStatus] = mapped_column( String, default=FileProcessingStatus.PENDING, nullable=False, doc="The current processing status of the file." ) error_message: Mapped[Optional[str]] = mapped_column(Text, nullable=True, doc="Any error message encountered during processing.") total_chunks: Mapped[Optional[int]] = mapped_column(Integer, nullable=True, doc="Total number of chunks for the file.") chunks_embedded: Mapped[Optional[int]] = mapped_column(Integer, nullable=True, doc="Number of chunks that have been embedded.") # relationships content: Mapped[Optional["FileContent"]] = relationship( "FileContent", uselist=False, back_populates="file", lazy="raise", # raises if you access without eager load cascade="all, delete-orphan", ) def to_pydantic(self, strip_directory_prefix: bool = False) -> PydanticFileMetadata: """ Convert to Pydantic model without any relationship loading. """ file_name = self.file_name if strip_directory_prefix and "/" in file_name: file_name = "/".join(file_name.split("/")[1:]) return PydanticFileMetadata( id=self.id, organization_id=self.organization_id, source_id=self.source_id, file_name=file_name, original_file_name=self.original_file_name, file_path=self.file_path, file_type=self.file_type, file_size=self.file_size, file_creation_date=self.file_creation_date, file_last_modified_date=self.file_last_modified_date, processing_status=self.processing_status, error_message=self.error_message, total_chunks=self.total_chunks, chunks_embedded=self.chunks_embedded, created_at=self.created_at, updated_at=self.updated_at, content=None, ) async def to_pydantic_async(self, include_content: bool = False, strip_directory_prefix: bool = False) -> PydanticFileMetadata: """ Async version of `to_pydantic` that supports optional relationship loading without requiring `expire_on_commit=False`. """ # Load content relationship if requested if include_content: content_obj = await self.awaitable_attrs.content content_text = content_obj.text if content_obj else None else: content_text = None file_name = self.file_name if strip_directory_prefix and "/" in file_name: file_name = "/".join(file_name.split("/")[1:]) return PydanticFileMetadata( id=self.id, organization_id=self.organization_id, source_id=self.source_id, file_name=file_name, original_file_name=self.original_file_name, file_path=self.file_path, file_type=self.file_type, file_size=self.file_size, file_creation_date=self.file_creation_date, file_last_modified_date=self.file_last_modified_date, processing_status=self.processing_status, error_message=self.error_message, total_chunks=self.total_chunks, chunks_embedded=self.chunks_embedded, created_at=self.created_at, updated_at=self.updated_at, content=content_text, )