chore: officially migrate to submodule (#4502)

* remove apps/core and apps/fern * fix precommit * add submodule updates in workflows * submodule * remove core tests * update core revision * Add submodules: true to all GitHub workflows - Ensure all workflows can access git submodules - Add submodules support to deployment, test, and CI workflows - Fix YAML syntax issues in workflow files 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> * remove core-lint * upgrade core with latest main of oss --------- Co-authored-by: Claude <noreply@anthropic.com>
2025-09-09 12:45:53 -07:00
parent 48b5722095
commit 22f70ca07c
953 changed files with 0 additions and 181472 deletions
--- a/letta/services/file_processor/parser/markitdown_parser.py
+++ b/letta/services/file_processor/parser/markitdown_parser.py
@@ -1,95 +0,0 @@
-import logging
-import os
-import tempfile
-
-from markitdown import MarkItDown
-from mistralai import OCRPageObject, OCRResponse, OCRUsageInfo
-
-from letta.log import get_logger
-from letta.otel.tracing import trace_method
-from letta.services.file_processor.file_types import is_simple_text_mime_type
-from letta.services.file_processor.parser.base_parser import FileParser
-
-logger = get_logger(__name__)
-
-# Suppress pdfminer warnings that occur during PDF processing
-logging.getLogger("pdfminer.pdffont").setLevel(logging.ERROR)
-logging.getLogger("pdfminer.pdfinterp").setLevel(logging.ERROR)
-logging.getLogger("pdfminer.pdfpage").setLevel(logging.ERROR)
-logging.getLogger("pdfminer.converter").setLevel(logging.ERROR)
-
-
-class MarkitdownFileParser(FileParser):
-    """Markitdown-based file parsing for documents"""
-
-    def __init__(self, model: str = "markitdown"):
-        self.model = model
-
-    @trace_method
-    async def extract_text(self, content: bytes, mime_type: str) -> OCRResponse:
-        """Extract text using markitdown."""
-        try:
-            # Handle simple text files directly
-            if is_simple_text_mime_type(mime_type):
-                logger.info(f"Extracting text directly (no processing needed): {self.model}")
-                text = content.decode("utf-8", errors="replace")
-                return OCRResponse(
-                    model=self.model,
-                    pages=[
-                        OCRPageObject(
-                            index=0,
-                            markdown=text,
-                            images=[],
-                            dimensions=None,
-                        )
-                    ],
-                    usage_info=OCRUsageInfo(pages_processed=1),
-                    document_annotation=None,
-                )
-
-            logger.info(f"Extracting text using markitdown: {self.model}")
-
-            # Create temporary file to pass to markitdown
-            with tempfile.NamedTemporaryFile(delete=False, suffix=self._get_file_extension(mime_type)) as temp_file:
-                temp_file.write(content)
-                temp_file_path = temp_file.name
-
-            try:
-                md = MarkItDown(enable_plugins=False)
-                result = md.convert(temp_file_path)
-
-                return OCRResponse(
-                    model=self.model,
-                    pages=[
-                        OCRPageObject(
-                            index=0,
-                            markdown=result.text_content,
-                            images=[],
-                            dimensions=None,
-                        )
-                    ],
-                    usage_info=OCRUsageInfo(pages_processed=1),
-                    document_annotation=None,
-                )
-            finally:
-                # Clean up temporary file
-                os.unlink(temp_file_path)
-
-        except Exception as e:
-            logger.error(f"Markitdown text extraction failed: {str(e)}")
-            raise
-
-    def _get_file_extension(self, mime_type: str) -> str:
-        """Get file extension based on MIME type for markitdown processing."""
-        mime_to_ext = {
-            "application/pdf": ".pdf",
-            "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
-            "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
-            "application/vnd.ms-excel": ".xls",
-            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
-            "text/csv": ".csv",
-            "application/json": ".json",
-            "text/xml": ".xml",
-            "application/xml": ".xml",
-        }
-        return mime_to_ext.get(mime_type, ".txt")