letta-server/memgpt/config.py

import glob
import inspect
import random
import string
import json
import os
import uuid
import textwrap
from dataclasses import dataclass
import configparser


import questionary

from colorama import Fore, Style

from typing import List, Type

import memgpt.utils as utils
import memgpt.interface as interface
from memgpt.personas.personas import get_persona_text
from memgpt.humans.humans import get_human_text
from memgpt.constants import MEMGPT_DIR, LLM_MAX_TOKENS
import memgpt.constants as constants
import memgpt.personas.personas as personas
import memgpt.humans.humans as humans
from memgpt.presets.presets import DEFAULT_PRESET, preset_options


model_choices = [
    questionary.Choice("gpt-4"),
    questionary.Choice(
        "gpt-4-turbo (developer preview)",
        value="gpt-4-1106-preview",
    ),
    questionary.Choice(
        "gpt-3.5-turbo (experimental! function-calling performance is not quite at the level of gpt-4 yet)",
        value="gpt-3.5-turbo-16k",
    ),
]


@dataclass
class MemGPTConfig:
    config_path: str = os.path.join(MEMGPT_DIR, "config")
    anon_clientid: str = None

    # preset
    preset: str = DEFAULT_PRESET

    # model parameters
    # provider: str = "openai"  # openai, azure, local (TODO)
    model_endpoint: str = "openai"
    model: str = "gpt-4"  # gpt-4, gpt-3.5-turbo, local
    context_window: int = LLM_MAX_TOKENS[model] if model in LLM_MAX_TOKENS else LLM_MAX_TOKENS["DEFAULT"]

    # model parameters: openai
    openai_key: str = None

    # model parameters: azure
    azure_key: str = None
    azure_endpoint: str = None
    azure_version: str = None
    azure_deployment: str = None
    azure_embedding_deployment: str = None

    # persona parameters
    default_persona: str = personas.DEFAULT
    default_human: str = humans.DEFAULT
    default_agent: str = None

    # embedding parameters
    embedding_model: str = "openai"
    embedding_dim: int = 1536
    embedding_chunk_size: int = 300  # number of tokens

    # database configs: archival
    archival_storage_type: str = "local"  # local, db
    archival_storage_path: str = None  # TODO: set to memgpt dir
    archival_storage_uri: str = None  # TODO: eventually allow external vector DB

    # database configs: recall
    recall_storage_type: str = "local"  # local, db
    recall_storage_path: str = None  # TODO: set to memgpt dir
    recall_storage_uri: str = None  # TODO: eventually allow external vector DB

    # database configs: agent state
    persistence_manager_type: str = None  # in-memory, db
    persistence_manager_save_file: str = None  # local file
    persistence_manager_uri: str = None  # db URI

    @staticmethod
    def generate_uuid() -> str:
        return uuid.UUID(int=uuid.getnode()).hex

    @classmethod
    def load(cls) -> "MemGPTConfig":
        config = configparser.ConfigParser()

        # allow overriding with env variables
        if os.getenv("MEMGPT_CONFIG_PATH"):
            config_path = os.getenv("MEMGPT_CONFIG_PATH")
        else:
            config_path = MemGPTConfig.config_path

        if os.path.exists(config_path):
            config.read(config_path)

            # read config values
            model = config.get("defaults", "model")
            context_window = (
                config.get("defaults", "context_window") if config.has_option("defaults", "context_window") else LLM_MAX_TOKENS["DEFAULT"]
            )
            preset = config.get("defaults", "preset")
            model_endpoint = config.get("defaults", "model_endpoint")
            default_persona = config.get("defaults", "persona")
            default_human = config.get("defaults", "human")
            default_agent = config.get("defaults", "agent") if config.has_option("defaults", "agent") else None

            openai_key, openai_model = None, None
            if "openai" in config:
                openai_key = config.get("openai", "key")

            azure_key, azure_endpoint, azure_version, azure_deployment, azure_embedding_deployment = None, None, None, None, None
            if "azure" in config:
                azure_key = config.get("azure", "key")
                azure_endpoint = config.get("azure", "endpoint")
                azure_version = config.get("azure", "version")
                azure_deployment = config.get("azure", "deployment") if config.has_option("azure", "deployment") else None
                azure_embedding_deployment = (
                    config.get("azure", "embedding_deployment") if config.has_option("azure", "embedding_deployment") else None
                )

            embedding_model = config.get("embedding", "model")
            embedding_dim = config.getint("embedding", "dim")
            embedding_chunk_size = config.getint("embedding", "chunk_size")

            # archival storage
            archival_storage_type, archival_storage_path, archival_storage_uri = "local", None, None
            if "archival_storage" in config:
                archival_storage_type = config.get("archival_storage", "type")
                archival_storage_path = config.get("archival_storage", "path") if config.has_option("archival_storage", "path") else None
                archival_storage_uri = config.get("archival_storage", "uri") if config.has_option("archival_storage", "uri") else None

            anon_clientid = config.get("client", "anon_clientid")

            return cls(
                model=model,
                context_window=context_window,
                preset=preset,
                model_endpoint=model_endpoint,
                default_persona=default_persona,
                default_human=default_human,
                default_agent=default_agent,
                openai_key=openai_key,
                azure_key=azure_key,
                azure_endpoint=azure_endpoint,
                azure_version=azure_version,
                azure_deployment=azure_deployment,
                azure_embedding_deployment=azure_embedding_deployment,
                embedding_model=embedding_model,
                embedding_dim=embedding_dim,
                embedding_chunk_size=embedding_chunk_size,
                archival_storage_type=archival_storage_type,
                archival_storage_path=archival_storage_path,
                archival_storage_uri=archival_storage_uri,
                anon_clientid=anon_clientid,
                config_path=config_path,
            )

        anon_clientid = MemGPTConfig.generate_uuid()
        config = cls(anon_clientid=anon_clientid, config_path=config_path)
        config.save()  # save updated config
        return config

    def save(self):
        config = configparser.ConfigParser()

        # CLI defaults
        config.add_section("defaults")
        config.set("defaults", "model", self.model)
        config.set("defaults", "context_window", str(self.context_window))
        config.set("defaults", "preset", self.preset)
        assert self.model_endpoint is not None, "Endpoint must be set"
        config.set("defaults", "model_endpoint", self.model_endpoint)
        config.set("defaults", "persona", self.default_persona)
        config.set("defaults", "human", self.default_human)
        if self.default_agent:
            config.set("defaults", "agent", self.default_agent)

        # security credentials
        if self.openai_key:
            config.add_section("openai")
            config.set("openai", "key", self.openai_key)

        if self.azure_key:
            config.add_section("azure")
            config.set("azure", "key", self.azure_key)
            config.set("azure", "endpoint", self.azure_endpoint)
            config.set("azure", "version", self.azure_version)
            if self.azure_deployment:
                config.set("azure", "deployment", self.azure_deployment)
                config.set("azure", "embedding_deployment", self.azure_embedding_deployment)

        # embeddings
        config.add_section("embedding")
        config.set("embedding", "model", self.embedding_model)
        config.set("embedding", "dim", str(self.embedding_dim))
        config.set("embedding", "chunk_size", str(self.embedding_chunk_size))

        # archival storage
        config.add_section("archival_storage")
        # print("archival storage", self.archival_storage_type)
        config.set("archival_storage", "type", self.archival_storage_type)
        if self.archival_storage_path:
            config.set("archival_storage", "path", self.archival_storage_path)
        if self.archival_storage_uri:
            config.set("archival_storage", "uri", self.archival_storage_uri)

        # client
        config.add_section("client")
        if not self.anon_clientid:
            self.anon_clientid = self.generate_uuid()
        config.set("client", "anon_clientid", self.anon_clientid)

        if not os.path.exists(MEMGPT_DIR):
            os.makedirs(MEMGPT_DIR, exist_ok=True)
        with open(self.config_path, "w") as f:
            config.write(f)

    @staticmethod
    def exists():
        # allow overriding with env variables
        if os.getenv("MEMGPT_CONFIG_PATH"):
            config_path = os.getenv("MEMGPT_CONFIG_PATH")
        else:
            config_path = MemGPTConfig.config_path

        assert not os.path.isdir(config_path), f"Config path {config_path} cannot be set to a directory."
        return os.path.exists(config_path)

    @staticmethod
    def create_config_dir():
        if not os.path.exists(MEMGPT_DIR):
            os.makedirs(MEMGPT_DIR, exist_ok=True)

        folders = ["personas", "humans", "archival", "agents", "functions", "system_prompts", "presets"]
        for folder in folders:
            if not os.path.exists(os.path.join(MEMGPT_DIR, folder)):
                os.makedirs(os.path.join(MEMGPT_DIR, folder))


@dataclass
class AgentConfig:
    """
    Configuration for a specific instance of an agent
    """

    def __init__(
        self,
        persona,
        human,
        model,
        context_window=None,
        preset=DEFAULT_PRESET,
        name=None,
        data_sources=[],
        agent_config_path=None,
        create_time=None,
        data_source=None,
    ):
        if name is None:
            self.name = f"agent_{self.generate_agent_id()}"
        else:
            self.name = name
        self.persona = persona
        self.human = human
        self.model = model
        self.context_window = context_window
        self.preset = preset
        self.data_sources = data_sources
        self.create_time = create_time if create_time is not None else utils.get_local_time()
        self.data_source = None  # deprecated

        if context_window is None:
            self.context_window = LLM_MAX_TOKENS[self.model] if self.model in LLM_MAX_TOKENS else LLM_MAX_TOKENS["DEFAULT"]
        else:
            self.context_window = context_window

        # save agent config
        self.agent_config_path = (
            os.path.join(MEMGPT_DIR, "agents", self.name, "config.json") if agent_config_path is None else agent_config_path
        )
        # assert not os.path.exists(self.agent_config_path), f"Agent config file already exists at {self.agent_config_path}"
        self.save()

    def generate_agent_id(self, length=6):
        ## random character based
        # characters = string.ascii_lowercase + string.digits
        # return ''.join(random.choices(characters, k=length))

        # count based
        agent_count = len(utils.list_agent_config_files())
        return str(agent_count + 1)

    def attach_data_source(self, data_source: str):
        # TODO: add warning that only once source can be attached
        # i.e. previous source will be overriden
        self.data_sources.append(data_source)
        self.save()

    def save_state_dir(self):
        # directory to save agent state
        return os.path.join(MEMGPT_DIR, "agents", self.name, "agent_state")

    def save_persistence_manager_dir(self):
        # directory to save persistent manager state
        return os.path.join(MEMGPT_DIR, "agents", self.name, "persistence_manager")

    def save_agent_index_dir(self):
        # save llama index inside of persistent manager directory
        return os.path.join(self.save_persistence_manager_dir(), "index")

    def save(self):
        # save state of persistence manager
        os.makedirs(os.path.join(MEMGPT_DIR, "agents", self.name), exist_ok=True)
        with open(self.agent_config_path, "w") as f:
            json.dump(vars(self), f, indent=4)

    @staticmethod
    def exists(name: str):
        """Check if agent config exists"""
        agent_config_path = os.path.join(MEMGPT_DIR, "agents", name)
        return os.path.exists(agent_config_path)

    @classmethod
    def load(cls, name: str):
        """Load agent config from JSON file"""
        agent_config_path = os.path.join(MEMGPT_DIR, "agents", name, "config.json")
        assert os.path.exists(agent_config_path), f"Agent config file does not exist at {agent_config_path}"
        with open(agent_config_path, "r") as f:
            agent_config = json.load(f)

        # allow compatibility accross versions
        class_args = inspect.getargspec(cls.__init__).args
        agent_fields = list(agent_config.keys())
        for key in agent_fields:
            if key not in class_args:
                utils.printd(f"Removing missing argument {key} from agent config")
                del agent_config[key]

        return cls(**agent_config)


class Config:
    personas_dir = os.path.join("memgpt", "personas", "examples")
    custom_personas_dir = os.path.join(MEMGPT_DIR, "personas")
    humans_dir = os.path.join("memgpt", "humans", "examples")
    custom_humans_dir = os.path.join(MEMGPT_DIR, "humans")
    configs_dir = os.path.join(MEMGPT_DIR, "configs")

    def __init__(self):
        os.makedirs(Config.custom_personas_dir, exist_ok=True)
        os.makedirs(Config.custom_humans_dir, exist_ok=True)
        self.load_type = None
        self.archival_storage_files = None
        self.compute_embeddings = False
        self.agent_save_file = None
        self.persistence_manager_save_file = None
        self.host = os.getenv("OPENAI_API_BASE")
        self.index = None
        self.config_file = None
        self.preload_archival = False

    @classmethod
    def legacy_flags_init(
        cls: Type["Config"],
        model: str,
        memgpt_persona: str,
        human_persona: str,
        load_type: str = None,
        archival_storage_files: str = None,
        archival_storage_index: str = None,
        compute_embeddings: bool = False,
    ):
        self = cls()
        self.model = model
        self.memgpt_persona = memgpt_persona
        self.human_persona = human_persona
        self.load_type = load_type
        self.archival_storage_files = archival_storage_files
        self.archival_storage_index = archival_storage_index
        self.compute_embeddings = compute_embeddings
        recompute_embeddings = self.compute_embeddings
        if self.archival_storage_index:
            recompute_embeddings = False  # TODO Legacy support -- can't recompute embeddings on a path that's not specified.
        if self.archival_storage_files:
            self.configure_archival_storage(recompute_embeddings)
        return self

    @classmethod
    def config_init(cls: Type["Config"], config_file: str = None):
        self = cls()
        self.config_file = config_file
        if self.config_file is None:
            cfg = Config.get_most_recent_config()
            use_cfg = False
            if cfg:
                print(f"{Style.BRIGHT}{Fore.MAGENTA}⚙️ Found saved config file.{Style.RESET_ALL}")
                use_cfg = questionary.confirm(f"Use most recent config file '{cfg}'?").ask()
            if use_cfg:
                self.config_file = cfg

        if self.config_file:
            self.load_config(self.config_file)
            recompute_embeddings = False
            if self.compute_embeddings:
                if self.archival_storage_index:
                    recompute_embeddings = questionary.confirm(
                        f"Would you like to recompute embeddings? Do this if your files have changed.\n    Files: {self.archival_storage_files}",
                        default=False,
                    ).ask()
                else:
                    recompute_embeddings = True
            if self.load_type:
                self.configure_archival_storage(recompute_embeddings)
                self.write_config()
            return self

        # print("No settings file found, configuring MemGPT...")
        print(f"{Style.BRIGHT}{Fore.MAGENTA}⚙️ No settings file found, configuring MemGPT...{Style.RESET_ALL}")

        self.model = questionary.select(
            "Which model would you like to use?",
            model_choices,
            default=model_choices[0],
        ).ask()

        self.memgpt_persona = questionary.select(
            "Which persona would you like MemGPT to use?",
            Config.get_memgpt_personas(),
        ).ask()
        print(self.memgpt_persona)

        self.human_persona = questionary.select(
            "Which user would you like to use?",
            Config.get_user_personas(),
        ).ask()

        self.archival_storage_index = None
        self.preload_archival = questionary.confirm(
            "Would you like to preload anything into MemGPT's archival memory?", default=False
        ).ask()
        if self.preload_archival:
            self.load_type = questionary.select(
                "What would you like to load?",
                choices=[
                    questionary.Choice("A folder or file", value="folder"),
                    questionary.Choice("A SQL database", value="sql"),
                    questionary.Choice("A glob pattern", value="glob"),
                ],
            ).ask()
            if self.load_type == "folder" or self.load_type == "sql":
                archival_storage_path = questionary.path("Please enter the folder or file (tab for autocomplete):").ask()
                if os.path.isdir(archival_storage_path):
                    self.archival_storage_files = os.path.join(archival_storage_path, "*")
                else:
                    self.archival_storage_files = archival_storage_path
            else:
                self.archival_storage_files = questionary.path("Please enter the glob pattern (tab for autocomplete):").ask()
            self.compute_embeddings = questionary.confirm(
                "Would you like to compute embeddings over these files to enable embeddings search?"
            ).ask()
            self.configure_archival_storage(self.compute_embeddings)

        self.write_config()
        return self

    def configure_archival_storage(self, recompute_embeddings):
        if recompute_embeddings:
            if self.host:
                interface.warning_message(
                    "⛔️ Embeddings on a non-OpenAI endpoint are not yet supported, falling back to substring matching search."
                )
            else:
                self.archival_storage_index = utils.prepare_archival_index_from_files_compute_embeddings(self.archival_storage_files)
        if self.compute_embeddings and self.archival_storage_index:
            self.index, self.archival_database = utils.prepare_archival_index(self.archival_storage_index)
        else:
            self.archival_database = utils.prepare_archival_index_from_files(self.archival_storage_files)

    def to_dict(self):
        return {
            "model": self.model,
            "memgpt_persona": self.memgpt_persona,
            "human_persona": self.human_persona,
            "preload_archival": self.preload_archival,
            "archival_storage_files": self.archival_storage_files,
            "archival_storage_index": self.archival_storage_index,
            "compute_embeddings": self.compute_embeddings,
            "load_type": self.load_type,
            "agent_save_file": self.agent_save_file,
            "persistence_manager_save_file": self.persistence_manager_save_file,
            "host": self.host,
        }

    def load_config(self, config_file):
        with open(config_file, "rt") as f:
            cfg = json.load(f)
        self.model = cfg["model"]
        self.memgpt_persona = cfg["memgpt_persona"]
        self.human_persona = cfg["human_persona"]
        self.preload_archival = cfg["preload_archival"]
        self.archival_storage_files = cfg["archival_storage_files"]
        self.archival_storage_index = cfg["archival_storage_index"]
        self.compute_embeddings = cfg["compute_embeddings"]
        self.load_type = cfg["load_type"]
        self.agent_save_file = cfg["agent_save_file"]
        self.persistence_manager_save_file = cfg["persistence_manager_save_file"]
        self.host = cfg["host"]

    def write_config(self, configs_dir=None):
        if configs_dir is None:
            configs_dir = Config.configs_dir
        os.makedirs(configs_dir, exist_ok=True)
        if self.config_file is None:
            filename = os.path.join(configs_dir, utils.get_local_time().replace(" ", "_").replace(":", "_"))
            self.config_file = f"{filename}.json"
        with open(self.config_file, "wt") as f:
            json.dump(self.to_dict(), f, indent=4)
        print(f"{Style.BRIGHT}{Fore.MAGENTA}⚙️ Saved config file to {self.config_file}.{Style.RESET_ALL}")

    @staticmethod
    def is_valid_config_file(file: str):
        cfg = Config()
        try:
            cfg.load_config(file)
        except Exception:
            return False
        return cfg.memgpt_persona is not None and cfg.human_persona is not None  # TODO: more validation for configs

    @staticmethod
    def get_memgpt_personas():
        dir_path = Config.personas_dir
        all_personas = Config.get_personas(dir_path)
        default_personas = [
            "sam",
            "sam_pov",
            "memgpt_starter",
            "memgpt_doc",
            "sam_simple_pov_gpt35",
        ]
        custom_personas_in_examples = list(set(all_personas) - set(default_personas))
        custom_personas = Config.get_personas(Config.custom_personas_dir)
        return (
            Config.get_persona_choices(
                [p for p in custom_personas],
                get_persona_text,
                Config.custom_personas_dir,
            )
            + Config.get_persona_choices(
                [p for p in custom_personas_in_examples + default_personas],
                get_persona_text,
                None,
                # Config.personas_dir,
            )
            + [
                questionary.Separator(),
                questionary.Choice(
                    f"📝 You can create your own personas by adding .txt files to {Config.custom_personas_dir}.",
                    disabled=True,
                ),
            ]
        )

    @staticmethod
    def get_user_personas():
        dir_path = Config.humans_dir
        all_personas = Config.get_personas(dir_path)
        default_personas = ["basic", "cs_phd"]
        custom_personas_in_examples = list(set(all_personas) - set(default_personas))
        custom_personas = Config.get_personas(Config.custom_humans_dir)
        return (
            Config.get_persona_choices(
                [p for p in custom_personas],
                get_human_text,
                Config.custom_humans_dir,
            )
            + Config.get_persona_choices(
                [p for p in custom_personas_in_examples + default_personas],
                get_human_text,
                None,
                # Config.humans_dir,
            )
            + [
                questionary.Separator(),
                questionary.Choice(
                    f"📝 You can create your own human profiles by adding .txt files to {Config.custom_humans_dir}.",
                    disabled=True,
                ),
            ]
        )

    @staticmethod
    def get_personas(dir_path) -> List[str]:
        files = sorted(glob.glob(os.path.join(dir_path, "*.txt")))
        stems = []
        for f in files:
            filename = os.path.basename(f)
            stem, _ = os.path.splitext(filename)
            stems.append(stem)
        return stems

    @staticmethod
    def get_persona_choices(personas, text_getter, dir):
        return [
            questionary.Choice(
                title=[
                    ("class:question", f"{p}"),
                    ("class:text", f"\n{indent(text_getter(p, dir))}"),
                ],
                value=(p, dir),
            )
            for p in personas
        ]

    @staticmethod
    def get_most_recent_config(configs_dir=None):
        if configs_dir is None:
            configs_dir = Config.configs_dir
        os.makedirs(configs_dir, exist_ok=True)
        files = [
            os.path.join(configs_dir, f)
            for f in os.listdir(configs_dir)
            if os.path.isfile(os.path.join(configs_dir, f)) and Config.is_valid_config_file(os.path.join(configs_dir, f))
        ]
        # Return the file with the most recent modification time
        if len(files) == 0:
            return None
        return max(files, key=os.path.getmtime)


def indent(text, num_lines=5):
    lines = textwrap.fill(text, width=100).split("\n")
    if len(lines) > num_lines:
        lines = lines[: num_lines - 1] + ["... (truncated)", lines[-1]]
    return "     " + "\n     ".join(lines)