Files
letta-server/letta/cli/cli_load.py
Matthew Zhou dd1f4d5cbe chore: Various updates and improvements (#1247)
Co-authored-by: Charles Packer <packercharles@gmail.com>
Co-authored-by: cthomas <caren@letta.com>
Co-authored-by: Shubham Naik <shubham.naik10@gmail.com>
Co-authored-by: Shubham Naik <shub@memgpt.ai>
Co-authored-by: mlong93 <35275280+mlong93@users.noreply.github.com>
Co-authored-by: Mindy Long <mindy@letta.com>
Co-authored-by: Kevin Lin <klin5061@gmail.com>
Co-authored-by: Stephan Fitzpatrick <stephan@knowsuchagency.com>
Co-authored-by: dboyliao <qmalliao@gmail.com>
Co-authored-by: Sarah Wooders <sarahwooders@gmail.com>
Co-authored-by: Jyotirmaya Mahanta <jyotirmaya.mahanta@gmail.com>
Co-authored-by: Nicholas <102550462+ndisalvio3@users.noreply.github.com>
Co-authored-by: tarunkumark <tkksctwo@gmail.com>
Co-authored-by: Miao <one.lemorage@gmail.com>
Co-authored-by: Krishnakumar R (KK) <65895020+kk-src@users.noreply.github.com>
Co-authored-by: Shubham Naik <shub@letta.com>
Co-authored-by: Will Sargent <will.sargent@gmail.com>
2025-03-11 14:46:45 -07:00

69 lines
2.6 KiB
Python

"""
This file contains functions for loading data into Letta's archival storage.
Data can be loaded with the following command, once a load function is defined:
```
letta load <data-connector-type> --name <dataset-name> [ADDITIONAL ARGS]
```
"""
import uuid
from typing import Annotated, List, Optional
import questionary
import typer
from letta import create_client
from letta.data_sources.connectors import DirectoryConnector
app = typer.Typer()
default_extensions = "txt,md,pdf"
@app.command("directory")
def load_directory(
name: Annotated[str, typer.Option(help="Name of dataset to load.")],
input_dir: Annotated[Optional[str], typer.Option(help="Path to directory containing dataset.")] = None,
input_files: Annotated[List[str], typer.Option(help="List of paths to files containing dataset.")] = [],
recursive: Annotated[bool, typer.Option(help="Recursively search for files in directory.")] = False,
extensions: Annotated[str, typer.Option(help="Comma separated list of file extensions to load")] = default_extensions,
user_id: Annotated[Optional[uuid.UUID], typer.Option(help="User ID to associate with dataset.")] = None, # TODO: remove
description: Annotated[Optional[str], typer.Option(help="Description of the source.")] = None,
):
client = create_client()
# create connector
connector = DirectoryConnector(input_files=input_files, input_directory=input_dir, recursive=recursive, extensions=extensions)
# choose form list of embedding configs
embedding_configs = client.list_embedding_configs()
embedding_options = [embedding_config.embedding_model for embedding_config in embedding_configs]
embedding_choices = [
questionary.Choice(title=embedding_config.pretty_print(), value=embedding_config) for embedding_config in embedding_configs
]
# select model
if len(embedding_options) == 0:
raise ValueError("No embedding models found. Please enable a provider.")
elif len(embedding_options) == 1:
embedding_model_name = embedding_options[0]
else:
embedding_model_name = questionary.select("Select embedding model:", choices=embedding_choices).ask().embedding_model
embedding_config = [
embedding_config for embedding_config in embedding_configs if embedding_config.embedding_model == embedding_model_name
][0]
# create source
source = client.create_source(name=name, embedding_config=embedding_config)
# load data
try:
client.load_data(connector, source_name=name)
except Exception as e:
typer.secho(f"Failed to load data from provided information.\n{e}", fg=typer.colors.RED)
client.delete_source(source.id)