Files
letta-server/letta/data_sources/connectors_helper.py
Matthew Zhou 2a9a96a7ba chore: Various updates and improvements (#1247)
Co-authored-by: Charles Packer <packercharles@gmail.com>
Co-authored-by: cthomas <caren@letta.com>
Co-authored-by: Shubham Naik <shubham.naik10@gmail.com>
Co-authored-by: Shubham Naik <shub@memgpt.ai>
Co-authored-by: mlong93 <35275280+mlong93@users.noreply.github.com>
Co-authored-by: Mindy Long <mindy@letta.com>
Co-authored-by: Kevin Lin <klin5061@gmail.com>
Co-authored-by: Stephan Fitzpatrick <stephan@knowsuchagency.com>
Co-authored-by: dboyliao <qmalliao@gmail.com>
Co-authored-by: Sarah Wooders <sarahwooders@gmail.com>
Co-authored-by: Jyotirmaya Mahanta <jyotirmaya.mahanta@gmail.com>
Co-authored-by: Nicholas <102550462+ndisalvio3@users.noreply.github.com>
Co-authored-by: tarunkumark <tkksctwo@gmail.com>
Co-authored-by: Miao <one.lemorage@gmail.com>
Co-authored-by: Krishnakumar R (KK) <65895020+kk-src@users.noreply.github.com>
Co-authored-by: Shubham Naik <shub@letta.com>
Co-authored-by: Will Sargent <will.sargent@gmail.com>
2025-03-11 14:46:45 -07:00

98 lines
3.3 KiB
Python

import mimetypes
import os
from datetime import datetime
from pathlib import Path
from typing import List, Optional
def extract_file_metadata(file_path) -> dict:
"""Extracts metadata from a single file."""
if not os.path.exists(file_path):
raise FileNotFoundError(file_path)
file_metadata = {
"file_name": os.path.basename(file_path),
"file_path": file_path,
"file_type": mimetypes.guess_type(file_path)[0] or "unknown",
"file_size": os.path.getsize(file_path),
"file_creation_date": datetime.fromtimestamp(os.path.getctime(file_path)).strftime("%Y-%m-%d"),
"file_last_modified_date": datetime.fromtimestamp(os.path.getmtime(file_path)).strftime("%Y-%m-%d"),
}
return file_metadata
def extract_metadata_from_files(file_list):
"""Extracts metadata for a list of files."""
metadata = []
for file_path in file_list:
file_metadata = extract_file_metadata(file_path)
if file_metadata:
metadata.append(file_metadata)
return metadata
def get_filenames_in_dir(
input_dir: str, recursive: bool = True, required_exts: Optional[List[str]] = None, exclude: Optional[List[str]] = None
):
"""
Recursively reads files from the directory, applying required_exts and exclude filters.
Ensures that required_exts and exclude do not overlap.
Args:
input_dir (str): The directory to scan for files.
recursive (bool): Whether to scan directories recursively.
required_exts (list): List of file extensions to include (e.g., ['pdf', 'txt']).
If None or empty, matches any file extension.
exclude (list): List of file patterns to exclude (e.g., ['*png', '*jpg']).
Returns:
list: A list of matching file paths.
"""
required_exts = required_exts or []
exclude = exclude or []
# Ensure required_exts and exclude do not overlap
ext_set = set(required_exts)
exclude_set = set(exclude)
overlap = ext_set & exclude_set
if overlap:
raise ValueError(f"Extensions in required_exts and exclude overlap: {overlap}")
def is_excluded(file_name):
"""Check if a file matches any pattern in the exclude list."""
for pattern in exclude:
if Path(file_name).match(pattern):
return True
return False
files = []
search_pattern = "**/*" if recursive else "*"
for file_path in Path(input_dir).glob(search_pattern):
if file_path.is_file() and not is_excluded(file_path.name):
ext = file_path.suffix.lstrip(".")
# If required_exts is empty, match any file
if not required_exts or ext in required_exts:
files.append(str(file_path))
return files
def assert_all_files_exist_locally(file_paths: List[str]) -> bool:
"""
Checks if all file paths in the provided list exist locally.
Raises a FileNotFoundError with a list of missing files if any do not exist.
Args:
file_paths (List[str]): List of file paths to check.
Returns:
bool: True if all files exist, raises FileNotFoundError if any file is missing.
"""
missing_files = [file_path for file_path in file_paths if not Path(file_path).exists()]
if missing_files:
raise FileNotFoundError(missing_files)
return True