diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index ce02993e..15107b51 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -42,7 +42,7 @@ jobs:
         PGVECTOR_TEST_DB_URL: ${{ secrets.PGVECTOR_TEST_DB_URL }}
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       run: |
-        poetry install -E dev -E postgres -E local -E legacy
+        poetry install -E dev -E postgres -E local
 
     - name: Set Poetry config
       env:
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 31f836db..8a4b0f35 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -29,7 +29,7 @@ Once Poetry is installed, navigate to the MemGPT directory and install the MemGP
 ```shell
 cd MemGPT
 poetry shell
-poetry install -E dev -E postgres -E local -E legacy
+poetry install -E dev -E postgres -E local
 ```
 
 Now when you want to use `memgpt`, make sure you first activate the `poetry` environment using poetry shell:
@@ -54,7 +54,7 @@ python3 -m venv venv
 
 Once you've activated your virtual environment and are in the MemGPT project directory, you can install the dependencies with `pip`:
 ```shell
-pip install -e '.[dev,postgres,local,legacy]'
+pip install -e '.[dev,postgres,local]'
 ```
 
 Now, you should be able to run `memgpt` from the command-line using the downloaded source code (if you used a virtual environment, you have to activate the virtual environment to access `memgpt`):
@@ -105,8 +105,8 @@ pytest -s tests
 ### Creating new tests
 If you added a major feature change, please add new tests in the `tests/` directory.
 
-## 4. 🧩 Adding new dependencies 
-If you need to add a new dependency to MemGPT, please add the package via `poetry add <PACKAGE_NAME>`. This will update the `pyproject.toml` and `poetry.lock` files. If the dependency does not need to be installed by all users, make sure to mark the dependency as optional in the `pyproject.toml` file and if needed, create a new extra under `[tool.poetry.extras]`. 
+## 4. 🧩 Adding new dependencies
+If you need to add a new dependency to MemGPT, please add the package via `poetry add <PACKAGE_NAME>`. This will update the `pyproject.toml` and `poetry.lock` files. If the dependency does not need to be installed by all users, make sure to mark the dependency as optional in the `pyproject.toml` file and if needed, create a new extra under `[tool.poetry.extras]`.
 
 ## 5. 🚀 Submitting Changes
 
diff --git a/docs/contributing.md b/docs/contributing.md
index 70f9a752..80101bca 100644
--- a/docs/contributing.md
+++ b/docs/contributing.md
@@ -13,7 +13,7 @@ Once Poetry is installed, navigate to the MemGPT directory and install the MemGP
 ```shell
 cd MemGPT
 poetry shell
-poetry install -E dev -E postgres -E local -E legacy
+poetry install -E dev -E postgres -E local 
 ```
 
 Now when you want to use `memgpt`, make sure you first activate the `poetry` environment using poetry shell:
@@ -38,7 +38,7 @@ python3 -m venv venv
 
 Once you've activated your virtual environment and are in the MemGPT project directory, you can install the dependencies with `pip`:
 ```shell
-pip install -e '.[dev,postgres,local,legacy]'
+pip install -e '.[dev,postgres,local]'
 ```
 
 Now, you should be able to run `memgpt` from the command-line using the downloaded source code (if you used a virtual environment, you have to activate the virtual environment to access `memgpt`):
diff --git a/main.py b/main.py
index 88b67b5c..6e3ee9e0 100644
--- a/main.py
+++ b/main.py
@@ -1,3 +1,7 @@
 from memgpt.main import app
+import typer
 
-app()
+typer.secho(
+    "Command `python main.py` no longer supported. Please run `memgpt run`. See https://memgpt.readthedocs.io/en/latest/quickstart/.",
+    fg=typer.colors.YELLOW,
+)
diff --git a/memgpt/agent.py b/memgpt/agent.py
index e9ace060..46b74f7e 100644
--- a/memgpt/agent.py
+++ b/memgpt/agent.py
@@ -6,14 +6,12 @@ import traceback
 
 from memgpt.persistence_manager import LocalStateManager
 from memgpt.config import AgentConfig, MemGPTConfig
-from .system import get_login_event, package_function_response, package_summarize_message, get_initial_boot_messages
-from .memory import CoreMemory as Memory, summarize_messages
-from .openai_tools import completions_with_backoff as create, is_context_overflow_error
-from memgpt.openai_tools import chat_completion_with_backoff
-from .utils import get_local_time, parse_json, united_diff, printd, count_tokens, get_schema_diff
-from .constants import (
+from memgpt.system import get_login_event, package_function_response, package_summarize_message, get_initial_boot_messages
+from memgpt.memory import CoreMemory as Memory, summarize_messages
+from memgpt.openai_tools import create, is_context_overflow_error
+from memgpt.utils import get_local_time, parse_json, united_diff, printd, count_tokens, get_schema_diff
+from memgpt.constants import (
     FIRST_MESSAGE_ATTEMPTS,
-    MAX_PAUSE_HEARTBEATS,
     MESSAGE_SUMMARY_WARNING_FRAC,
     MESSAGE_SUMMARY_TRUNC_TOKEN_FRAC,
     MESSAGE_SUMMARY_TRUNC_KEEP_N_LAST,
@@ -759,33 +757,8 @@ class Agent(object):
         function_call="auto",
     ):
         """Get response from LLM API"""
-
-        # TODO: Legacy code - delete
-        if self.config is None:
-            try:
-                response = create(
-                    model=self.model,
-                    context_window=self.context_window,
-                    messages=message_sequence,
-                    functions=self.functions,
-                    function_call=function_call,
-                )
-
-                # special case for 'length'
-                if response.choices[0].finish_reason == "length":
-                    raise Exception("Finish reason was length (maximum context length)")
-
-                # catches for soft errors
-                if response.choices[0].finish_reason not in ["stop", "function_call"]:
-                    raise Exception(f"API call finish with bad finish reason: {response}")
-
-                # unpack with response.choices[0].message.content
-                return response
-            except Exception as e:
-                raise e
-
         try:
-            response = chat_completion_with_backoff(
+            response = create(
                 agent_config=self.config,
                 messages=message_sequence,
                 functions=self.functions,
diff --git a/memgpt/cli/cli.py b/memgpt/cli/cli.py
index 8a942c4f..0a3bcebb 100644
--- a/memgpt/cli/cli.py
+++ b/memgpt/cli/cli.py
@@ -3,21 +3,14 @@ import json
 import sys
 import io
 import logging
-import os
-from prettytable import PrettyTable
 import questionary
 
 from llama_index import set_global_service_context
-from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
+from llama_index import ServiceContext
 
 from memgpt.interface import CLIInterface as interface  # for printing to terminal
 from memgpt.cli.cli_config import configure
-import memgpt.agent as agent
-import memgpt.system as system
 import memgpt.presets.presets as presets
-import memgpt.constants as constants
-import memgpt.personas.personas as personas
-import memgpt.humans.humans as humans
 import memgpt.utils as utils
 from memgpt.utils import printd
 from memgpt.persistence_manager import LocalStateManager
@@ -25,10 +18,6 @@ from memgpt.config import MemGPTConfig, AgentConfig
 from memgpt.constants import MEMGPT_DIR
 from memgpt.agent import Agent
 from memgpt.embeddings import embedding_model
-from memgpt.openai_tools import (
-    configure_azure_support,
-    check_azure_embeddings,
-)
 
 
 def run(
@@ -196,11 +185,6 @@ def run(
     # start event loop
     from memgpt.main import run_agent_loop
 
-    # setup azure if using
-    # TODO: cleanup this code
-    if config.model_endpoint == "azure":
-        configure_azure_support()
-
     run_agent_loop(memgpt_agent, first, no_verify, config)  # TODO: add back no_verify
 
 
diff --git a/memgpt/cli/cli_config.py b/memgpt/cli/cli_config.py
index 889bfd50..fcd5b279 100644
--- a/memgpt/cli/cli_config.py
+++ b/memgpt/cli/cli_config.py
@@ -4,14 +4,11 @@ from prettytable import PrettyTable
 import typer
 import os
 import shutil
-from collections import defaultdict
 
 # from memgpt.cli import app
 from memgpt import utils
 
-import memgpt.humans.humans as humans
-import memgpt.personas.personas as personas
-from memgpt.config import MemGPTConfig, AgentConfig, Config
+from memgpt.config import MemGPTConfig, AgentConfig
 from memgpt.constants import MEMGPT_DIR
 from memgpt.connectors.storage import StorageConnector
 from memgpt.constants import LLM_MAX_TOKENS
diff --git a/memgpt/config.py b/memgpt/config.py
index 3326e040..39af5c19 100644
--- a/memgpt/config.py
+++ b/memgpt/config.py
@@ -1,44 +1,14 @@
-import glob
 import inspect
-import random
-import string
 import json
 import os
 import uuid
-import textwrap
 from dataclasses import dataclass
 import configparser
 
-
-import questionary
-
-from colorama import Fore, Style
-
-from typing import List, Type
-
 import memgpt
 import memgpt.utils as utils
-from memgpt.interface import CLIInterface as interface
-from memgpt.personas.personas import get_persona_text
-from memgpt.humans.humans import get_human_text
-from memgpt.constants import MEMGPT_DIR, LLM_MAX_TOKENS
-import memgpt.constants as constants
-import memgpt.personas.personas as personas
-import memgpt.humans.humans as humans
-from memgpt.presets.presets import DEFAULT_PRESET, preset_options
-
-
-model_choices = [
-    questionary.Choice("gpt-4"),
-    questionary.Choice(
-        "gpt-4-turbo (developer preview)",
-        value="gpt-4-1106-preview",
-    ),
-    questionary.Choice(
-        "gpt-3.5-turbo (experimental! function-calling performance is not quite at the level of gpt-4 yet)",
-        value="gpt-3.5-turbo-16k",
-    ),
-]
+from memgpt.constants import MEMGPT_DIR, LLM_MAX_TOKENS, DEFAULT_HUMAN, DEFAULT_PERSONA
+from memgpt.presets.presets import DEFAULT_PRESET
 
 
 # helper functions for writing to configs
@@ -85,8 +55,8 @@ class MemGPTConfig:
     azure_embedding_deployment: str = None
 
     # persona parameters
-    persona: str = personas.DEFAULT
-    human: str = humans.DEFAULT
+    persona: str = DEFAULT_PERSONA
+    human: str = DEFAULT_HUMAN
     agent: str = None
 
     # embedding parameters
@@ -377,298 +347,3 @@ class AgentConfig:
                 utils.printd(f"Removing missing argument {key} from agent config")
                 del agent_config[key]
         return cls(**agent_config)
-
-
-class Config:
-    personas_dir = os.path.join("memgpt", "personas", "examples")
-    custom_personas_dir = os.path.join(MEMGPT_DIR, "personas")
-    humans_dir = os.path.join("memgpt", "humans", "examples")
-    custom_humans_dir = os.path.join(MEMGPT_DIR, "humans")
-    configs_dir = os.path.join(MEMGPT_DIR, "configs")
-
-    def __init__(self):
-        os.makedirs(Config.custom_personas_dir, exist_ok=True)
-        os.makedirs(Config.custom_humans_dir, exist_ok=True)
-        self.load_type = None
-        self.archival_storage_files = None
-        self.compute_embeddings = False
-        self.agent_save_file = None
-        self.persistence_manager_save_file = None
-        self.host = os.getenv("OPENAI_API_BASE")
-        self.index = None
-        self.config_file = None
-        self.preload_archival = False
-
-    @classmethod
-    def legacy_flags_init(
-        cls: Type["Config"],
-        model: str,
-        memgpt_persona: str,
-        human_persona: str,
-        load_type: str = None,
-        archival_storage_files: str = None,
-        archival_storage_index: str = None,
-        compute_embeddings: bool = False,
-    ):
-        self = cls()
-        self.model = model
-        self.memgpt_persona = memgpt_persona
-        self.human_persona = human_persona
-        self.load_type = load_type
-        self.archival_storage_files = archival_storage_files
-        self.archival_storage_index = archival_storage_index
-        self.compute_embeddings = compute_embeddings
-        recompute_embeddings = self.compute_embeddings
-        if self.archival_storage_index:
-            recompute_embeddings = False  # TODO Legacy support -- can't recompute embeddings on a path that's not specified.
-        if self.archival_storage_files:
-            self.configure_archival_storage(recompute_embeddings)
-        return self
-
-    @classmethod
-    def config_init(cls: Type["Config"], config_file: str = None):
-        self = cls()
-        self.config_file = config_file
-        if self.config_file is None:
-            cfg = Config.get_most_recent_config()
-            use_cfg = False
-            if cfg:
-                print(f"{Style.BRIGHT}{Fore.MAGENTA}⚙️ Found saved config file.{Style.RESET_ALL}")
-                use_cfg = questionary.confirm(f"Use most recent config file '{cfg}'?").ask()
-            if use_cfg:
-                self.config_file = cfg
-
-        if self.config_file:
-            self.load_config(self.config_file)
-            recompute_embeddings = False
-            if self.compute_embeddings:
-                if self.archival_storage_index:
-                    recompute_embeddings = questionary.confirm(
-                        f"Would you like to recompute embeddings? Do this if your files have changed.\n    Files: {self.archival_storage_files}",
-                        default=False,
-                    ).ask()
-                else:
-                    recompute_embeddings = True
-            if self.load_type:
-                self.configure_archival_storage(recompute_embeddings)
-                self.write_config()
-            return self
-
-        # print("No settings file found, configuring MemGPT...")
-        print(f"{Style.BRIGHT}{Fore.MAGENTA}⚙️ No settings file found, configuring MemGPT...{Style.RESET_ALL}")
-
-        self.model = questionary.select(
-            "Which model would you like to use?",
-            model_choices,
-            default=model_choices[0],
-        ).ask()
-
-        self.memgpt_persona = questionary.select(
-            "Which persona would you like MemGPT to use?",
-            Config.get_memgpt_personas(),
-        ).ask()
-        print(self.memgpt_persona)
-
-        self.human_persona = questionary.select(
-            "Which user would you like to use?",
-            Config.get_user_personas(),
-        ).ask()
-
-        self.archival_storage_index = None
-        self.preload_archival = questionary.confirm(
-            "Would you like to preload anything into MemGPT's archival memory?", default=False
-        ).ask()
-        if self.preload_archival:
-            self.load_type = questionary.select(
-                "What would you like to load?",
-                choices=[
-                    questionary.Choice("A folder or file", value="folder"),
-                    questionary.Choice("A SQL database", value="sql"),
-                    questionary.Choice("A glob pattern", value="glob"),
-                ],
-            ).ask()
-            if self.load_type == "folder" or self.load_type == "sql":
-                archival_storage_path = questionary.path("Please enter the folder or file (tab for autocomplete):").ask()
-                if os.path.isdir(archival_storage_path):
-                    self.archival_storage_files = os.path.join(archival_storage_path, "*")
-                else:
-                    self.archival_storage_files = archival_storage_path
-            else:
-                self.archival_storage_files = questionary.path("Please enter the glob pattern (tab for autocomplete):").ask()
-            self.compute_embeddings = questionary.confirm(
-                "Would you like to compute embeddings over these files to enable embeddings search?"
-            ).ask()
-            self.configure_archival_storage(self.compute_embeddings)
-
-        self.write_config()
-        return self
-
-    def configure_archival_storage(self, recompute_embeddings):
-        if recompute_embeddings:
-            if self.host:
-                interface.warning_message(
-                    "⛔️ Embeddings on a non-OpenAI endpoint are not yet supported, falling back to substring matching search."
-                )
-            else:
-                self.archival_storage_index = utils.prepare_archival_index_from_files_compute_embeddings(self.archival_storage_files)
-        if self.compute_embeddings and self.archival_storage_index:
-            self.index, self.archival_database = utils.prepare_archival_index(self.archival_storage_index)
-        else:
-            self.archival_database = utils.prepare_archival_index_from_files(self.archival_storage_files)
-
-    def to_dict(self):
-        return {
-            "model": self.model,
-            "memgpt_persona": self.memgpt_persona,
-            "human_persona": self.human_persona,
-            "preload_archival": self.preload_archival,
-            "archival_storage_files": self.archival_storage_files,
-            "archival_storage_index": self.archival_storage_index,
-            "compute_embeddings": self.compute_embeddings,
-            "load_type": self.load_type,
-            "agent_save_file": self.agent_save_file,
-            "persistence_manager_save_file": self.persistence_manager_save_file,
-            "host": self.host,
-        }
-
-    def load_config(self, config_file):
-        with open(config_file, "rt") as f:
-            cfg = json.load(f)
-        self.model = cfg["model"]
-        self.memgpt_persona = cfg["memgpt_persona"]
-        self.human_persona = cfg["human_persona"]
-        self.preload_archival = cfg["preload_archival"]
-        self.archival_storage_files = cfg["archival_storage_files"]
-        self.archival_storage_index = cfg["archival_storage_index"]
-        self.compute_embeddings = cfg["compute_embeddings"]
-        self.load_type = cfg["load_type"]
-        self.agent_save_file = cfg["agent_save_file"]
-        self.persistence_manager_save_file = cfg["persistence_manager_save_file"]
-        self.host = cfg["host"]
-
-    def write_config(self, configs_dir=None):
-        if configs_dir is None:
-            configs_dir = Config.configs_dir
-        os.makedirs(configs_dir, exist_ok=True)
-        if self.config_file is None:
-            filename = os.path.join(configs_dir, utils.get_local_time().replace(" ", "_").replace(":", "_"))
-            self.config_file = f"{filename}.json"
-        with open(self.config_file, "wt") as f:
-            json.dump(self.to_dict(), f, indent=4)
-        print(f"{Style.BRIGHT}{Fore.MAGENTA}⚙️ Saved config file to {self.config_file}.{Style.RESET_ALL}")
-
-    @staticmethod
-    def is_valid_config_file(file: str):
-        cfg = Config()
-        try:
-            cfg.load_config(file)
-        except Exception:
-            return False
-        return cfg.memgpt_persona is not None and cfg.human_persona is not None  # TODO: more validation for configs
-
-    @staticmethod
-    def get_memgpt_personas():
-        dir_path = Config.personas_dir
-        all_personas = Config.get_personas(dir_path)
-        default_personas = [
-            "sam",
-            "sam_pov",
-            "memgpt_starter",
-            "memgpt_doc",
-            "sam_simple_pov_gpt35",
-        ]
-        custom_personas_in_examples = list(set(all_personas) - set(default_personas))
-        custom_personas = Config.get_personas(Config.custom_personas_dir)
-        return (
-            Config.get_persona_choices(
-                [p for p in custom_personas],
-                get_persona_text,
-                Config.custom_personas_dir,
-            )
-            + Config.get_persona_choices(
-                [p for p in custom_personas_in_examples + default_personas],
-                get_persona_text,
-                None,
-                # Config.personas_dir,
-            )
-            + [
-                questionary.Separator(),
-                questionary.Choice(
-                    f"📝 You can create your own personas by adding .txt files to {Config.custom_personas_dir}.",
-                    disabled=True,
-                ),
-            ]
-        )
-
-    @staticmethod
-    def get_user_personas():
-        dir_path = Config.humans_dir
-        all_personas = Config.get_personas(dir_path)
-        default_personas = ["basic", "cs_phd"]
-        custom_personas_in_examples = list(set(all_personas) - set(default_personas))
-        custom_personas = Config.get_personas(Config.custom_humans_dir)
-        return (
-            Config.get_persona_choices(
-                [p for p in custom_personas],
-                get_human_text,
-                Config.custom_humans_dir,
-            )
-            + Config.get_persona_choices(
-                [p for p in custom_personas_in_examples + default_personas],
-                get_human_text,
-                None,
-                # Config.humans_dir,
-            )
-            + [
-                questionary.Separator(),
-                questionary.Choice(
-                    f"📝 You can create your own human profiles by adding .txt files to {Config.custom_humans_dir}.",
-                    disabled=True,
-                ),
-            ]
-        )
-
-    @staticmethod
-    def get_personas(dir_path) -> List[str]:
-        files = sorted(glob.glob(os.path.join(dir_path, "*.txt")))
-        stems = []
-        for f in files:
-            filename = os.path.basename(f)
-            stem, _ = os.path.splitext(filename)
-            stems.append(stem)
-        return stems
-
-    @staticmethod
-    def get_persona_choices(personas, text_getter, dir):
-        return [
-            questionary.Choice(
-                title=[
-                    ("class:question", f"{p}"),
-                    ("class:text", f"\n{indent(text_getter(p, dir))}"),
-                ],
-                value=(p, dir),
-            )
-            for p in personas
-        ]
-
-    @staticmethod
-    def get_most_recent_config(configs_dir=None):
-        if configs_dir is None:
-            configs_dir = Config.configs_dir
-        os.makedirs(configs_dir, exist_ok=True)
-        files = [
-            os.path.join(configs_dir, f)
-            for f in os.listdir(configs_dir)
-            if os.path.isfile(os.path.join(configs_dir, f)) and Config.is_valid_config_file(os.path.join(configs_dir, f))
-        ]
-        # Return the file with the most recent modification time
-        if len(files) == 0:
-            return None
-        return max(files, key=os.path.getmtime)
-
-
-def indent(text, num_lines=5):
-    lines = textwrap.fill(text, width=100).split("\n")
-    if len(lines) > num_lines:
-        lines = lines[: num_lines - 1] + ["... (truncated)", lines[-1]]
-    return "     " + "\n     ".join(lines)
diff --git a/memgpt/constants.py b/memgpt/constants.py
index 6fb35a62..dcbd009f 100644
--- a/memgpt/constants.py
+++ b/memgpt/constants.py
@@ -3,6 +3,8 @@ import os
 MEMGPT_DIR = os.path.join(os.path.expanduser("~"), ".memgpt")
 
 DEFAULT_MEMGPT_MODEL = "gpt-4"
+DEFAULT_PERSONA = "sam_pov"
+DEFAULT_HUMAN = "basic"
 
 FIRST_MESSAGE_ATTEMPTS = 10
 
diff --git a/memgpt/functions/function_sets/extras.py b/memgpt/functions/function_sets/extras.py
index eda4d53c..21ef96d5 100644
--- a/memgpt/functions/function_sets/extras.py
+++ b/memgpt/functions/function_sets/extras.py
@@ -5,7 +5,7 @@ import requests
 
 
 from memgpt.constants import MESSAGE_CHATGPT_FUNCTION_MODEL, MESSAGE_CHATGPT_FUNCTION_SYSTEM_MESSAGE, MAX_PAUSE_HEARTBEATS
-from memgpt.openai_tools import completions_with_backoff as create
+from memgpt.openai_tools import create
 
 
 def message_chatgpt(self, message: str):
diff --git a/memgpt/humans/humans.py b/memgpt/humans/humans.py
deleted file mode 100644
index 8cd99f1f..00000000
--- a/memgpt/humans/humans.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import os
-
-DEFAULT = "cs_phd"
-
-
-def get_human_text(key=DEFAULT, dir=None):
-    if dir is None:
-        dir = os.path.join(os.path.dirname(__file__), "examples")
-    filename = key if key.endswith(".txt") else f"{key}.txt"
-    file_path = os.path.join(dir, filename)
-
-    if os.path.exists(file_path):
-        with open(file_path, "r") as file:
-            return file.read().strip()
-    else:
-        raise FileNotFoundError(f"No file found for key {key}, path={file_path}")
diff --git a/memgpt/main.py b/memgpt/main.py
index e493f14d..d585744f 100644
--- a/memgpt/main.py
+++ b/memgpt/main.py
@@ -20,28 +20,10 @@ console = Console()
 from memgpt.interface import CLIInterface as interface  # for printing to terminal
 import memgpt.agent as agent
 import memgpt.system as system
-import memgpt.utils as utils
-import memgpt.presets.presets as presets
 import memgpt.constants as constants
-import memgpt.personas.personas as personas
-import memgpt.humans.humans as humans
-from memgpt.persistence_manager import (
-    LocalStateManager,
-    InMemoryStateManager,
-    InMemoryStateManagerWithPreloadedArchivalMemory,
-    InMemoryStateManagerWithFaiss,
-)
 from memgpt.cli.cli import run, attach, version
 from memgpt.cli.cli_config import configure, list, add
 from memgpt.cli.cli_load import app as load_app
-from memgpt.config import Config, MemGPTConfig, AgentConfig
-from memgpt.constants import MEMGPT_DIR
-from memgpt.agent import Agent
-from memgpt.openai_tools import (
-    configure_azure_support,
-    check_azure_embeddings,
-    get_set_azure_env_vars,
-)
 from memgpt.connectors.storage import StorageConnector
 
 app = typer.Typer(pretty_exceptions_enable=False)
@@ -65,313 +47,7 @@ def clear_line(strip_ui=False):
         sys.stdout.flush()
 
 
-def save(memgpt_agent, cfg):
-    filename = utils.get_local_time().replace(" ", "_").replace(":", "_")
-    filename = f"{filename}.json"
-    directory = os.path.join(MEMGPT_DIR, "saved_state")
-    filename = os.path.join(directory, filename)
-    try:
-        if not os.path.exists(directory):
-            os.makedirs(directory)
-        memgpt_agent.save_to_json_file(filename)
-        print(f"Saved checkpoint to: {filename}")
-        cfg.agent_save_file = filename
-    except Exception as e:
-        print(f"Saving state to {filename} failed with: {e}")
-
-    # save the persistence manager too
-    filename = filename.replace(".json", ".persistence.pickle")
-    try:
-        memgpt_agent.persistence_manager.save(filename)
-        print(f"Saved persistence manager to: {filename}")
-        cfg.persistence_manager_save_file = filename
-    except Exception as e:
-        print(f"Saving persistence manager to {filename} failed with: {e}")
-    cfg.write_config()
-
-
-def load(memgpt_agent, filename):
-    if filename is not None:
-        if filename[-5:] != ".json":
-            filename += ".json"
-        try:
-            memgpt_agent.load_from_json_file_inplace(filename)
-            print(f"Loaded checkpoint {filename}")
-        except Exception as e:
-            print(f"Loading {filename} failed with: {e}")
-    else:
-        # Load the latest file
-        save_path = os.path.join(constants.MEMGPT_DIR, "saved_state")
-        print(f"/load warning: no checkpoint specified, loading most recent checkpoint from {save_path} instead")
-        json_files = glob.glob(os.path.join(save_path, "*.json"))  # This will list all .json files in the current directory.
-
-        # Check if there are any json files.
-        if not json_files:
-            print(f"/load error: no .json checkpoint files found")
-            return
-        else:
-            # Sort files based on modified timestamp, with the latest file being the first.
-            filename = max(json_files, key=os.path.getmtime)
-            try:
-                memgpt_agent.load_from_json_file_inplace(filename)
-                print(f"Loaded checkpoint {filename}")
-            except Exception as e:
-                print(f"Loading {filename} failed with: {e}")
-
-    # need to load persistence manager too
-    filename = filename.replace(".json", ".persistence.pickle")
-    try:
-        memgpt_agent.persistence_manager = InMemoryStateManager.load(
-            filename
-        )  # TODO(fixme):for different types of persistence managers that require different load/save methods
-        print(f"Loaded persistence manager from {filename}")
-    except Exception as e:
-        print(f"/load warning: loading persistence manager from {filename} failed with: {e}")
-
-
-@app.callback(invoke_without_command=True)  # make default command
-# @app.command("legacy-run")
-def legacy_run(
-    ctx: typer.Context,
-    persona: str = typer.Option(None, help="Specify persona"),
-    human: str = typer.Option(None, help="Specify human"),
-    model: str = typer.Option(constants.DEFAULT_MEMGPT_MODEL, help="Specify the LLM model"),
-    first: bool = typer.Option(False, "--first", help="Use --first to send the first message in the sequence"),
-    strip_ui: bool = typer.Option(False, "--strip_ui", help="Remove all the bells and whistles in CLI output (helpful for testing)"),
-    debug: bool = typer.Option(False, "--debug", help="Use --debug to enable debugging output"),
-    no_verify: bool = typer.Option(False, "--no_verify", help="Bypass message verification"),
-    archival_storage_faiss_path: str = typer.Option(
-        "",
-        "--archival_storage_faiss_path",
-        help="Specify archival storage with FAISS index to load (a folder with a .index and .json describing documents to be loaded)",
-    ),
-    archival_storage_files: str = typer.Option(
-        "",
-        "--archival_storage_files",
-        help="Specify files to pre-load into archival memory (glob pattern)",
-    ),
-    archival_storage_files_compute_embeddings: str = typer.Option(
-        "",
-        "--archival_storage_files_compute_embeddings",
-        help="Specify files to pre-load into archival memory (glob pattern), and compute embeddings over them",
-    ),
-    archival_storage_sqldb: str = typer.Option(
-        "",
-        "--archival_storage_sqldb",
-        help="Specify SQL database to pre-load into archival memory",
-    ),
-    use_azure_openai: bool = typer.Option(
-        False,
-        "--use_azure_openai",
-        help="Use Azure OpenAI (requires additional environment variables)",
-    ),  # TODO: just pass in?
-):
-    if ctx.invoked_subcommand is not None:
-        return
-
-    typer.secho(
-        "Warning: Running legacy run command. You may need to `pip install pymemgpt[legacy] -U`. Run `memgpt run` instead.",
-        fg=typer.colors.RED,
-        bold=True,
-    )
-    if not questionary.confirm("Continue with legacy CLI?", default=False).ask():
-        return
-
-    main(
-        persona,
-        human,
-        model,
-        first,
-        debug,
-        no_verify,
-        archival_storage_faiss_path,
-        archival_storage_files,
-        archival_storage_files_compute_embeddings,
-        archival_storage_sqldb,
-        use_azure_openai,
-        strip_ui,
-    )
-
-
-def main(
-    persona,
-    human,
-    model,
-    first,
-    debug,
-    no_verify,
-    archival_storage_faiss_path,
-    archival_storage_files,
-    archival_storage_files_compute_embeddings,
-    archival_storage_sqldb,
-    use_azure_openai,
-    strip_ui,
-):
-    interface.STRIP_UI = strip_ui
-    utils.DEBUG = debug
-    logging.getLogger().setLevel(logging.CRITICAL)
-    if debug:
-        logging.getLogger().setLevel(logging.DEBUG)
-
-    # Azure OpenAI support
-    if use_azure_openai:
-        configure_azure_support()
-        check_azure_embeddings()
-    else:
-        azure_vars = get_set_azure_env_vars()
-        if len(azure_vars) > 0:
-            print(f"Error: Environment variables {', '.join([x[0] for x in azure_vars])} should not be set if --use_azure_openai is False")
-            return
-
-    if any(
-        (
-            persona,
-            human,
-            model != constants.DEFAULT_MEMGPT_MODEL,
-            archival_storage_faiss_path,
-            archival_storage_files,
-            archival_storage_files_compute_embeddings,
-            archival_storage_sqldb,
-        )
-    ):
-        interface.important_message("⚙️ Using legacy command line arguments.")
-        model = model
-        if model is None:
-            model = constants.DEFAULT_MEMGPT_MODEL
-        memgpt_persona = persona
-        if memgpt_persona is None:
-            memgpt_persona = (
-                personas.GPT35_DEFAULT if (model is not None and "gpt-3.5" in model) else personas.DEFAULT,
-                None,  # represents the personas dir in pymemgpt package
-            )
-        else:
-            try:
-                personas.get_persona_text(memgpt_persona, Config.custom_personas_dir)
-                memgpt_persona = (memgpt_persona, Config.custom_personas_dir)
-            except FileNotFoundError:
-                personas.get_persona_text(memgpt_persona)
-                memgpt_persona = (memgpt_persona, None)
-
-        human_persona = human
-        if human_persona is None:
-            human_persona = (humans.DEFAULT, None)
-        else:
-            try:
-                humans.get_human_text(human_persona, Config.custom_humans_dir)
-                human_persona = (human_persona, Config.custom_humans_dir)
-            except FileNotFoundError:
-                humans.get_human_text(human_persona)
-                human_persona = (human_persona, None)
-
-        print(persona, model, memgpt_persona)
-        if archival_storage_files:
-            cfg = Config.legacy_flags_init(
-                model,
-                memgpt_persona,
-                human_persona,
-                load_type="folder",
-                archival_storage_files=archival_storage_files,
-                compute_embeddings=False,
-            )
-        elif archival_storage_faiss_path:
-            cfg = Config.legacy_flags_init(
-                model,
-                memgpt_persona,
-                human_persona,
-                load_type="folder",
-                archival_storage_files=archival_storage_faiss_path,
-                archival_storage_index=archival_storage_faiss_path,
-                compute_embeddings=True,
-            )
-        elif archival_storage_files_compute_embeddings:
-            print(model)
-            print(memgpt_persona)
-            print(human_persona)
-            cfg = Config.legacy_flags_init(
-                model,
-                memgpt_persona,
-                human_persona,
-                load_type="folder",
-                archival_storage_files=archival_storage_files_compute_embeddings,
-                compute_embeddings=True,
-            )
-        elif archival_storage_sqldb:
-            cfg = Config.legacy_flags_init(
-                model,
-                memgpt_persona,
-                human_persona,
-                load_type="sql",
-                archival_storage_files=archival_storage_sqldb,
-                compute_embeddings=False,
-            )
-        else:
-            cfg = Config.legacy_flags_init(
-                model,
-                memgpt_persona,
-                human_persona,
-            )
-    else:
-        cfg = Config.config_init()
-
-    interface.important_message("Running... [exit by typing '/exit', list available commands with '/help']")
-    if cfg.model != constants.DEFAULT_MEMGPT_MODEL:
-        interface.warning_message(
-            f"⛔️ Warning - you are running MemGPT with {cfg.model}, which is not officially supported (yet). Expect bugs!"
-        )
-
-    if cfg.index:
-        persistence_manager = InMemoryStateManagerWithFaiss(cfg.index, cfg.archival_database)
-    elif cfg.archival_storage_files:
-        print(f"Preloaded {len(cfg.archival_database)} chunks into archival memory.")
-        persistence_manager = InMemoryStateManagerWithPreloadedArchivalMemory(cfg.archival_database)
-    else:
-        persistence_manager = InMemoryStateManager()
-
-    if archival_storage_files_compute_embeddings:
-        interface.important_message(
-            f"(legacy) To avoid computing embeddings next time, replace --archival_storage_files_compute_embeddings={archival_storage_files_compute_embeddings} with\n\t --archival_storage_faiss_path={cfg.archival_storage_index} (if your files haven't changed)."
-        )
-
-    # Moved defaults out of FLAGS so that we can dynamically select the default persona based on model
-    chosen_human = cfg.human_persona
-    chosen_persona = cfg.memgpt_persona
-
-    memgpt_agent = presets.use_preset(
-        presets.DEFAULT_PRESET,
-        None,  # no agent config to provide
-        cfg.model,
-        personas.get_persona_text(*chosen_persona),
-        humans.get_human_text(*chosen_human),
-        interface,
-        persistence_manager,
-    )
-
-    print_messages = interface.print_messages
-    print_messages(memgpt_agent.messages)
-
-    if cfg.load_type == "sql":  # TODO: move this into config.py in a clean manner
-        if not os.path.exists(cfg.archival_storage_files):
-            print(f"File {cfg.archival_storage_files} does not exist")
-            return
-        # Ingest data from file into archival storage
-        else:
-            print(f"Database found! Loading database into archival memory")
-            data_list = utils.read_database_as_list(cfg.archival_storage_files)
-            user_message = f"Your archival memory has been loaded with a SQL database called {data_list[0]}, which contains schema {data_list[1]}. Remember to refer to this first while answering any user questions!"
-            for row in data_list:
-                memgpt_agent.persistence_manager.archival_memory.insert(row)
-            print(f"Database loaded into archival memory.")
-
-    if cfg.agent_save_file:
-        load_save_file = questionary.confirm(f"Load in saved agent '{cfg.agent_save_file}'?").ask()
-        if load_save_file:
-            load(memgpt_agent, cfg.agent_save_file)
-
-    # run agent loop
-    run_agent_loop(memgpt_agent, first, no_verify, cfg, strip_ui, legacy=True)
-
-
-def run_agent_loop(memgpt_agent, first, no_verify=False, cfg=None, strip_ui=False, legacy=False):
+def run_agent_loop(memgpt_agent, first, no_verify=False, cfg=None, strip_ui=False):
     counter = 0
     user_input = None
     skip_next_user_input = False
@@ -412,49 +88,14 @@ def run_agent_loop(memgpt_agent, first, no_verify=False, cfg=None, strip_ui=Fals
             # Handle CLI commands
             # Commands to not get passed as input to MemGPT
             if user_input.startswith("/"):
-                if legacy:
-                    # legacy agent save functions (TODO: eventually remove)
-                    if user_input.lower() == "/load" or user_input.lower().startswith("/load "):
-                        command = user_input.strip().split()
-                        filename = command[1] if len(command) > 1 else None
-                        load(memgpt_agent=memgpt_agent, filename=filename)
-                        continue
-                    elif user_input.lower() == "/exit":
-                        # autosave
-                        save(memgpt_agent=memgpt_agent, cfg=cfg)
-                        break
-
-                    elif user_input.lower() == "/savechat":
-                        filename = utils.get_local_time().replace(" ", "_").replace(":", "_")
-                        filename = f"{filename}.pkl"
-                        directory = os.path.join(MEMGPT_DIR, "saved_chats")
-                        try:
-                            if not os.path.exists(directory):
-                                os.makedirs(directory)
-                            with open(os.path.join(directory, filename), "wb") as f:
-                                pickle.dump(memgpt_agent.messages, f)
-                                print(f"Saved messages to: {filename}")
-                        except Exception as e:
-                            print(f"Saving chat to {filename} failed with: {e}")
-                        continue
-
-                    elif user_input.lower() == "/save":
-                        save(memgpt_agent=memgpt_agent, cfg=cfg)
-                        continue
-                else:
-                    # updated agent save functions
-                    if user_input.lower() == "/exit":
-                        memgpt_agent.save()
-                        break
-                    elif user_input.lower() == "/save" or user_input.lower() == "/savechat":
-                        memgpt_agent.save()
-                        continue
-
-                if user_input.lower() == "/attach":
-                    if legacy:
-                        typer.secho("Error: /attach is not supported in legacy mode.", fg=typer.colors.RED, bold=True)
-                        continue
-
+                # updated agent save functions
+                if user_input.lower() == "/exit":
+                    memgpt_agent.save()
+                    break
+                elif user_input.lower() == "/save" or user_input.lower() == "/savechat":
+                    memgpt_agent.save()
+                    continue
+                elif user_input.lower() == "/attach":
                     # TODO: check if agent already has it
                     data_source_options = StorageConnector.list_loaded_data()
                     if len(data_source_options) == 0:
diff --git a/memgpt/memory.py b/memgpt/memory.py
index 38cdcb5a..ff681508 100644
--- a/memgpt/memory.py
+++ b/memgpt/memory.py
@@ -1,30 +1,15 @@
 from abc import ABC, abstractmethod
-import os
 import datetime
 import re
 from typing import Optional, List, Tuple
 
-from memgpt.constants import MESSAGE_SUMMARY_WARNING_FRAC, MEMGPT_DIR
-from memgpt.utils import cosine_similarity, get_local_time, printd, count_tokens
+from memgpt.constants import MESSAGE_SUMMARY_WARNING_FRAC
+from memgpt.utils import get_local_time, printd, count_tokens
 from memgpt.prompts.gpt_summarize import SYSTEM as SUMMARY_PROMPT_SYSTEM
-from memgpt import utils
-from memgpt.openai_tools import get_embedding_with_backoff, chat_completion_with_backoff
-from llama_index import (
-    VectorStoreIndex,
-    EmptyIndex,
-    get_response_synthesizer,
-    load_index_from_storage,
-    StorageContext,
-    Document,
-)
+from memgpt.openai_tools import create
+from llama_index import Document
 from llama_index.node_parser import SimpleNodeParser
 from llama_index.node_parser import SimpleNodeParser
-from llama_index.retrievers import VectorIndexRetriever
-from llama_index.query_engine import RetrieverQueryEngine
-from llama_index.indices.postprocessor import SimilarityPostprocessor
-
-from memgpt.embeddings import embedding_model
-from memgpt.config import MemGPTConfig
 
 from memgpt.embeddings import embedding_model
 from memgpt.config import MemGPTConfig
@@ -138,7 +123,7 @@ def summarize_messages(
         {"role": "user", "content": summary_input},
     ]
 
-    response = chat_completion_with_backoff(
+    response = create(
         agent_config=agent_config,
         messages=message_sequence,
     )
@@ -178,206 +163,6 @@ class ArchivalMemory(ABC):
         pass
 
 
-class DummyArchivalMemory(ArchivalMemory):
-    """Dummy in-memory version of an archival memory database (eg run on MongoDB)
-
-    Archival Memory: A more structured and deep storage space for the AI's reflections,
-    insights, or any other data that doesn't fit into the active memory but
-    is essential enough not to be left only to the recall memory.
-    """
-
-    def __init__(self, archival_memory_database=None):
-        self._archive = [] if archival_memory_database is None else archival_memory_database  # consists of {'content': str} dicts
-
-    def __len__(self):
-        return len(self._archive)
-
-    def __repr__(self) -> str:
-        if len(self._archive) == 0:
-            memory_str = "<empty>"
-        else:
-            memory_str = "\n".join([d["content"] for d in self._archive])
-        return f"\n### ARCHIVAL MEMORY ###" + f"\n{memory_str}"
-
-    def insert(self, memory_string):
-        self._archive.append(
-            {
-                # can eventually upgrade to adding semantic tags, etc
-                "timestamp": get_local_time(),
-                "content": memory_string,
-            }
-        )
-
-    def search(self, query_string, count=None, start=None):
-        """Simple text-based search"""
-        # in the dummy version, run an (inefficient) case-insensitive match search
-        # printd(f"query_string: {query_string}")
-        matches = [s for s in self._archive if query_string.lower() in s["content"].lower()]
-        # printd(f"archive_memory.search (text-based): search for query '{query_string}' returned the following results (limit 5):\n{[str(d['content']) d in matches[:5]]}")
-        printd(
-            f"archive_memory.search (text-based): search for query '{query_string}' returned the following results (limit 5):\n{[matches[start:count]]}"
-        )
-
-        # start/count support paging through results
-        if start is not None and count is not None:
-            return matches[start : start + count], len(matches)
-        elif start is None and count is not None:
-            return matches[:count], len(matches)
-        elif start is not None and count is None:
-            return matches[start:], len(matches)
-        else:
-            return matches, len(matches)
-
-
-class DummyArchivalMemoryWithEmbeddings(DummyArchivalMemory):
-    """Same as dummy in-memory archival memory, but with bare-bones embedding support"""
-
-    def __init__(self, archival_memory_database=None, embedding_model="text-embedding-ada-002"):
-        self._archive = [] if archival_memory_database is None else archival_memory_database  # consists of {'content': str} dicts
-        self.embedding_model = embedding_model
-
-    def __len__(self):
-        return len(self._archive)
-
-    def _insert(self, memory_string, embedding):
-        # Get the embedding
-        embedding_meta = {"model": self.embedding_model}
-        printd(f"Got an embedding, type {type(embedding)}, len {len(embedding)}")
-
-        self._archive.append(
-            {
-                "timestamp": get_local_time(),
-                "content": memory_string,
-                "embedding": embedding,
-                "embedding_metadata": embedding_meta,
-            }
-        )
-
-    def insert(self, memory_string):
-        embedding = get_embedding_with_backoff(memory_string, model=self.embedding_model)
-        return self._insert(memory_string, embedding)
-
-    def search(self, query_string, count, start):
-        """Simple embedding-based search (inefficient, no caching)"""
-        # see: https://github.com/openai/openai-cookbook/blob/main/examples/Semantic_text_search_using_embeddings.ipynb
-        query_embedding = get_embedding_with_backoff(query_string, model=self.embedding_model)
-
-        # query_embedding = get_embedding(query_string, model=self.embedding_model)
-        # our wrapped version supports backoff/rate-limits
-        similarity_scores = [cosine_similarity(memory["embedding"], query_embedding) for memory in self._archive]
-
-        # Sort the archive based on similarity scores
-        sorted_archive_with_scores = sorted(
-            zip(self._archive, similarity_scores),
-            key=lambda pair: pair[1],  # Sort by the similarity score
-            reverse=True,  # We want the highest similarity first
-        )
-        printd(
-            f"archive_memory.search (vector-based): search for query '{query_string}' returned the following results (limit 5) and scores:\n{str([str(t[0]['content']) + '- score ' + str(t[1]) for t in sorted_archive_with_scores[:5]])}"
-        )
-
-        # Extract the sorted archive without the scores
-        matches = [item[0] for item in sorted_archive_with_scores]
-
-        # start/count support paging through results
-        if start is not None and count is not None:
-            return matches[start : start + count], len(matches)
-        elif start is None and count is not None:
-            return matches[:count], len(matches)
-        elif start is not None and count is None:
-            return matches[start:], len(matches)
-        else:
-            return matches, len(matches)
-
-
-class DummyArchivalMemoryWithFaiss(DummyArchivalMemory):
-    """Dummy in-memory version of an archival memory database, using a FAISS
-    index for fast nearest-neighbors embedding search.
-
-    Archival memory is effectively "infinite" overflow for core memory,
-    and is read-only via string queries.
-
-    Archival Memory: A more structured and deep storage space for the AI's reflections,
-    insights, or any other data that doesn't fit into the active memory but
-    is essential enough not to be left only to the recall memory.
-    """
-
-    def __init__(self, index=None, archival_memory_database=None, embedding_model="text-embedding-ada-002", k=100):
-        if index is None:
-            import faiss
-
-            self.index = faiss.IndexFlatL2(1536)  # openai embedding vector size.
-        else:
-            self.index = index
-        self.k = k
-        self._archive = [] if archival_memory_database is None else archival_memory_database  # consists of {'content': str} dicts
-        self.embedding_model = embedding_model
-        self.embeddings_dict = {}
-        self.search_results = {}
-
-    def __len__(self):
-        return len(self._archive)
-
-    def insert(self, memory_string):
-        import numpy as np
-
-        # Get the embedding
-        embedding = get_embedding_with_backoff(memory_string, model=self.embedding_model)
-
-        print(f"Got an embedding, type {type(embedding)}, len {len(embedding)}")
-
-        self._archive.append(
-            {
-                # can eventually upgrade to adding semantic tags, etc
-                "timestamp": get_local_time(),
-                "content": memory_string,
-            }
-        )
-        embedding = np.array([embedding]).astype("float32")
-        self.index.add(embedding)
-
-    def search(self, query_string, count=None, start=None):
-        """Simple embedding-based search (inefficient, no caching)"""
-        # see: https://github.com/openai/openai-cookbook/blob/main/examples/Semantic_text_search_using_embeddings.ipynb
-
-        # query_embedding = get_embedding(query_string, model=self.embedding_model)
-        # our wrapped version supports backoff/rate-limits
-        import numpy as np
-
-        if query_string in self.embeddings_dict:
-            search_result = self.search_results[query_string]
-        else:
-            query_embedding = get_embedding_with_backoff(query_string, model=self.embedding_model)
-            _, indices = self.index.search(np.array([np.array(query_embedding, dtype=np.float32)]), self.k)
-            search_result = [self._archive[idx] if idx < len(self._archive) else "" for idx in indices[0]]
-            self.embeddings_dict[query_string] = query_embedding
-            self.search_results[query_string] = search_result
-
-        if start is not None and count is not None:
-            toprint = search_result[start : start + count]
-        else:
-            if len(search_result) >= 5:
-                toprint = search_result[:5]
-            else:
-                toprint = search_result
-        printd(
-            f"archive_memory.search (vector-based): search for query '{query_string}' returned the following results ({start}--{start+5}/{len(search_result)}) and scores:\n{str([t[:60] if len(t) > 60 else t for t in toprint])}"
-        )
-
-        # Extract the sorted archive without the scores
-        matches = search_result
-
-        # start/count support paging through results
-        if start is not None and count is not None:
-            return matches[start : start + count], len(matches)
-        elif start is None and count is not None:
-            return matches[:count], len(matches)
-        elif start is not None and count is None:
-            return matches[start:], len(matches)
-        else:
-            return matches, len(matches)
-
-
 class RecallMemory(ABC):
     @abstractmethod
     def text_search(self, query_string, count=None, start=None):
@@ -402,6 +187,8 @@ class DummyRecallMemory(RecallMemory):
     effectively allowing it to 'remember' prior engagements with a user.
     """
 
+    # TODO: replace this with StorageConnector based implementation
+
     def __init__(self, message_database=None, restrict_search_to_summaries=False):
         self._message_logs = [] if message_database is None else message_database  # consists of full message dicts
 
@@ -508,150 +295,6 @@ class DummyRecallMemory(RecallMemory):
             return matches, len(matches)
 
 
-class DummyRecallMemoryWithEmbeddings(DummyRecallMemory):
-    """Lazily manage embeddings by keeping a string->embed dict"""
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.embeddings = dict()
-        self.embedding_model = "text-embedding-ada-002"
-        self.only_use_preloaded_embeddings = False
-
-    def text_search(self, query_string, count, start):
-        # in the dummy version, run an (inefficient) case-insensitive match search
-        message_pool = [d for d in self._message_logs if d["message"]["role"] not in ["system", "function"]]
-
-        # first, go through and make sure we have all the embeddings we need
-        message_pool_filtered = []
-        for d in message_pool:
-            message_str = d["message"]["content"]
-            if self.only_use_preloaded_embeddings:
-                if message_str not in self.embeddings:
-                    printd(f"recall_memory.text_search -- '{message_str}' was not in embedding dict, skipping.")
-                else:
-                    message_pool_filtered.append(d)
-            elif message_str not in self.embeddings:
-                printd(f"recall_memory.text_search -- '{message_str}' was not in embedding dict, computing now")
-                self.embeddings[message_str] = get_embedding_with_backoff(message_str, model=self.embedding_model)
-                message_pool_filtered.append(d)
-
-        # our wrapped version supports backoff/rate-limits
-        query_embedding = get_embedding_with_backoff(query_string, model=self.embedding_model)
-        similarity_scores = [cosine_similarity(self.embeddings[d["message"]["content"]], query_embedding) for d in message_pool_filtered]
-
-        # Sort the archive based on similarity scores
-        sorted_archive_with_scores = sorted(
-            zip(message_pool_filtered, similarity_scores),
-            key=lambda pair: pair[1],  # Sort by the similarity score
-            reverse=True,  # We want the highest similarity first
-        )
-        printd(
-            f"recall_memory.text_search (vector-based): search for query '{query_string}' returned the following results (limit 5) and scores:\n{str([str(t[0]['message']['content']) + '- score ' + str(t[1]) for t in sorted_archive_with_scores[:5]])}"
-        )
-
-        # Extract the sorted archive without the scores
-        matches = [item[0] for item in sorted_archive_with_scores]
-
-        # start/count support paging through results
-        if start is not None and count is not None:
-            return matches[start : start + count], len(matches)
-        elif start is None and count is not None:
-            return matches[:count], len(matches)
-        elif start is not None and count is None:
-            return matches[start:], len(matches)
-        else:
-            return matches, len(matches)
-
-
-class LocalArchivalMemory(ArchivalMemory):
-    """Archival memory built on top of Llama Index"""
-
-    def __init__(self, agent_config, top_k: Optional[int] = 100):
-        """Init function for archival memory
-
-        :param archiva_memory_database: name of dataset to pre-fill archival with
-        :type archival_memory_database: str
-        """
-
-        self.top_k = top_k
-        self.agent_config = agent_config
-
-        # locate saved index
-        # if self.agent_config.data_source is not None:  # connected data source
-        #    directory = f"{MEMGPT_DIR}/archival/{self.agent_config.data_source}"
-        #    assert os.path.exists(directory), f"Archival memory database {self.agent_config.data_source} does not exist"
-        # elif self.agent_config.name is not None:
-        if self.agent_config.name is not None:
-            directory = agent_config.save_agent_index_dir()
-            if not os.path.exists(directory):
-                # no existing archival storage
-                directory = None
-
-        # load/create index
-        if directory:
-            storage_context = StorageContext.from_defaults(persist_dir=directory)
-            self.index = load_index_from_storage(storage_context)
-        else:
-            self.index = EmptyIndex()
-
-        # create retriever
-        if isinstance(self.index, EmptyIndex):
-            self.retriever = None  # cant create retriever over empty indes
-        else:
-            self.retriever = VectorIndexRetriever(
-                index=self.index,  # does this get refreshed?
-                similarity_top_k=self.top_k,
-            )
-
-        # TODO: have some mechanism for cleanup otherwise will lead to OOM
-        self.cache = {}
-
-    def save(self):
-        """Save the index to disk"""
-        # if self.agent_config.data_sources:  # update original archival index
-        #    # TODO: this corrupts the originally loaded data. do we want to do this?
-        #    utils.save_index(self.index, self.agent_config.data_sources)
-        # else:
-
-        # don't need to save data source, since we assume data source data is already loaded into the agent index
-        utils.save_agent_index(self.index, self.agent_config)
-
-    def insert(self, memory_string):
-        self.index.insert(memory_string)
-
-        # TODO: figure out if this needs to be refreshed (probably not)
-        self.retriever = VectorIndexRetriever(
-            index=self.index,
-            similarity_top_k=self.top_k,
-        )
-
-    def search(self, query_string, count=None, start=None):
-        print("searching with local")
-        if self.retriever is None:
-            print("Warning: archival memory is empty")
-            return [], 0
-
-        start = start if start else 0
-        count = count if count else self.top_k
-        count = min(count + start, self.top_k)
-
-        if query_string not in self.cache:
-            self.cache[query_string] = self.retriever.retrieve(query_string)
-
-        results = self.cache[query_string][start : start + count]
-        results = [{"timestamp": get_local_time(), "content": node.node.text} for node in results]
-        # from pprint import pprint
-        # pprint(results)
-        return results, len(results)
-
-    def __repr__(self) -> str:
-        if isinstance(self.index, EmptyIndex):
-            memory_str = "<empty>"
-        else:
-            memory_str = self.index.ref_doc_info
-        return f"\n### ARCHIVAL MEMORY ###" + f"\n{memory_str}"
-
-
 class EmbeddingArchivalMemory(ArchivalMemory):
     """Archival memory with embedding based search"""
 
diff --git a/memgpt/openai_tools.py b/memgpt/openai_tools.py
index 72fd6a9d..14f8c237 100644
--- a/memgpt/openai_tools.py
+++ b/memgpt/openai_tools.py
@@ -311,39 +311,14 @@ def retry_with_exponential_backoff(
     return wrapper
 
 
-# TODO: delete/ignore --legacy
 @retry_with_exponential_backoff
-def completions_with_backoff(**kwargs):
-    # Local model
-    if HOST_TYPE is not None:
-        return get_chat_completion(**kwargs)
-
-    # OpenAI / Azure model
-    else:
-        if using_azure():
-            azure_openai_deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT")
-            if azure_openai_deployment is not None:
-                kwargs["deployment_id"] = azure_openai_deployment
-            else:
-                kwargs["engine"] = MODEL_TO_AZURE_ENGINE[kwargs["model"]]
-                kwargs.pop("model")
-        if "context_window" in kwargs:
-            kwargs.pop("context_window")
-
-        api_url = "https://api.openai.com/v1"
-        api_key = os.get_env("OPENAI_API_KEY")
-        if api_key is None:
-            raise Exception("OPENAI_API_KEY is not defined - please set it")
-        return openai_chat_completions_request(api_url, api_key, data=kwargs)
-
-
-@retry_with_exponential_backoff
-def chat_completion_with_backoff(
+def create(
     agent_config,
     messages,
     functions=None,
     function_call="auto",
 ):
+    """Return response to chat completion with backoff"""
     from memgpt.utils import printd
     from memgpt.config import MemGPTConfig
 
@@ -392,91 +367,3 @@ def chat_completion_with_backoff(
             wrapper=agent_config.model_wrapper,
             user=config.anon_clientid,
         )
-
-
-# TODO: deprecate
-@retry_with_exponential_backoff
-def create_embedding_with_backoff(**kwargs):
-    if using_azure():
-        azure_openai_deployment = os.getenv("AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT")
-        if azure_openai_deployment is not None:
-            kwargs["deployment_id"] = azure_openai_deployment
-        else:
-            kwargs["engine"] = kwargs["model"]
-            kwargs.pop("model")
-
-        api_key = os.get_env("AZURE_OPENAI_KEY")
-        if api_key is None:
-            raise Exception("AZURE_OPENAI_API_KEY is not defined - please set it")
-        # TODO check
-        # api_version???
-        # resource_name???
-        # "engine" instead of "model"???
-        return azure_openai_embeddings_request(
-            resource_name=None, deployment_id=azure_openai_deployment, api_version=None, api_key=api_key, data=kwargs
-        )
-
-    else:
-        # return openai.Embedding.create(**kwargs)
-        api_url = "https://api.openai.com/v1"
-        api_key = os.get_env("OPENAI_API_KEY")
-        if api_key is None:
-            raise Exception("OPENAI_API_KEY is not defined - please set it")
-        return openai_embeddings_request(url=api_url, api_key=api_key, data=kwargs)
-
-
-def get_embedding_with_backoff(text, model="text-embedding-ada-002"):
-    text = text.replace("\n", " ")
-    response = create_embedding_with_backoff(input=[text], model=model)
-    embedding = response["data"][0]["embedding"]
-    return embedding
-
-
-MODEL_TO_AZURE_ENGINE = {
-    "gpt-4-1106-preview": "gpt-4-1106-preview",  # TODO check
-    "gpt-4": "gpt-4",
-    "gpt-4-32k": "gpt-4-32k",
-    "gpt-3.5": "gpt-35-turbo",  # diff
-    "gpt-3.5-turbo": "gpt-35-turbo",  # diff
-    "gpt-3.5-turbo-16k": "gpt-35-turbo-16k",  # diff
-}
-
-
-def get_set_azure_env_vars():
-    azure_env_variables = [
-        ("AZURE_OPENAI_KEY", os.getenv("AZURE_OPENAI_KEY")),
-        ("AZURE_OPENAI_ENDPOINT", os.getenv("AZURE_OPENAI_ENDPOINT")),
-        ("AZURE_OPENAI_VERSION", os.getenv("AZURE_OPENAI_VERSION")),
-        ("AZURE_OPENAI_DEPLOYMENT", os.getenv("AZURE_OPENAI_DEPLOYMENT")),
-        (
-            "AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT",
-            os.getenv("AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT"),
-        ),
-    ]
-    return [x for x in azure_env_variables if x[1] is not None]
-
-
-def using_azure():
-    return len(get_set_azure_env_vars()) > 0
-
-
-def configure_azure_support():
-    azure_openai_key = os.getenv("AZURE_OPENAI_KEY")
-    azure_openai_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
-    azure_openai_version = os.getenv("AZURE_OPENAI_VERSION")
-    if None in [
-        azure_openai_key,
-        azure_openai_endpoint,
-        azure_openai_version,
-    ]:
-        print(f"Error: missing Azure OpenAI environment variables. Please see README section on Azure.")
-        return
-
-
-def check_azure_embeddings():
-    azure_openai_deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT")
-    azure_openai_embedding_deployment = os.getenv("AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT")
-    if azure_openai_deployment is not None and azure_openai_embedding_deployment is None:
-        raise ValueError(
-            f"Error: It looks like you are using Azure deployment ids and computing embeddings, make sure you are setting one for embeddings as well. Please see README section on Azure"
-        )
diff --git a/memgpt/persistence_manager.py b/memgpt/persistence_manager.py
index 8efd74ac..1138a109 100644
--- a/memgpt/persistence_manager.py
+++ b/memgpt/persistence_manager.py
@@ -1,16 +1,11 @@
 from abc import ABC, abstractmethod
-import os
 import pickle
 from memgpt.config import AgentConfig
-from .memory import (
+from memgpt.memory import (
     DummyRecallMemory,
-    DummyRecallMemoryWithEmbeddings,
-    DummyArchivalMemory,
-    DummyArchivalMemoryWithEmbeddings,
-    DummyArchivalMemoryWithFaiss,
     EmbeddingArchivalMemory,
 )
-from .utils import get_local_time, printd
+from memgpt.utils import get_local_time, printd
 
 
 class PersistenceManager(ABC):
@@ -35,73 +30,6 @@ class PersistenceManager(ABC):
         pass
 
 
-class InMemoryStateManager(PersistenceManager):
-    """In-memory state manager has nothing to manage, all agents are held in-memory"""
-
-    recall_memory_cls = DummyRecallMemory
-    archival_memory_cls = DummyArchivalMemory
-
-    def __init__(self):
-        # Memory held in-state useful for debugging stateful versions
-        self.memory = None
-        self.messages = []
-        self.all_messages = []
-
-    @staticmethod
-    def load(filename):
-        with open(filename, "rb") as f:
-            return pickle.load(f)
-
-    def save(self, filename):
-        with open(filename, "wb") as fh:
-            pickle.dump(self, fh, protocol=pickle.HIGHEST_PROTOCOL)
-
-    def init(self, agent):
-        printd(f"Initializing {self.__class__.__name__} with agent object")
-        self.all_messages = [{"timestamp": get_local_time(), "message": msg} for msg in agent.messages.copy()]
-        self.messages = [{"timestamp": get_local_time(), "message": msg} for msg in agent.messages.copy()]
-        self.memory = agent.memory
-        printd(f"InMemoryStateManager.all_messages.len = {len(self.all_messages)}")
-        printd(f"InMemoryStateManager.messages.len = {len(self.messages)}")
-
-        # Persistence manager also handles DB-related state
-        self.recall_memory = self.recall_memory_cls(message_database=self.all_messages)
-        self.archival_memory_db = []
-        self.archival_memory = self.archival_memory_cls(archival_memory_database=self.archival_memory_db)
-
-    def trim_messages(self, num):
-        # printd(f"InMemoryStateManager.trim_messages")
-        self.messages = [self.messages[0]] + self.messages[num:]
-
-    def prepend_to_messages(self, added_messages):
-        # first tag with timestamps
-        added_messages = [{"timestamp": get_local_time(), "message": msg} for msg in added_messages]
-
-        printd(f"{self.__class__.__name__}.prepend_to_message")
-        self.messages = [self.messages[0]] + added_messages + self.messages[1:]
-        self.all_messages.extend(added_messages)
-
-    def append_to_messages(self, added_messages):
-        # first tag with timestamps
-        added_messages = [{"timestamp": get_local_time(), "message": msg} for msg in added_messages]
-
-        printd(f"{self.__class__.__name__}.append_to_messages")
-        self.messages = self.messages + added_messages
-        self.all_messages.extend(added_messages)
-
-    def swap_system_message(self, new_system_message):
-        # first tag with timestamps
-        new_system_message = {"timestamp": get_local_time(), "message": new_system_message}
-
-        printd(f"{self.__class__.__name__}.swap_system_message")
-        self.messages[0] = new_system_message
-        self.all_messages.append(new_system_message)
-
-    def update_memory(self, new_memory):
-        printd(f"{self.__class__.__name__}.update_memory")
-        self.memory = new_memory
-
-
 class LocalStateManager(PersistenceManager):
     """In-memory state manager has nothing to manage, all agents are held in-memory"""
 
@@ -189,54 +117,3 @@ class LocalStateManager(PersistenceManager):
     def update_memory(self, new_memory):
         printd(f"{self.__class__.__name__}.update_memory")
         self.memory = new_memory
-
-
-class InMemoryStateManagerWithPreloadedArchivalMemory(InMemoryStateManager):
-    archival_memory_cls = DummyArchivalMemory
-    recall_memory_cls = DummyRecallMemory
-
-    def __init__(self, archival_memory_db):
-        self.archival_memory_db = archival_memory_db
-
-    def init(self, agent):
-        print(f"Initializing {self.__class__.__name__} with agent object")
-        self.all_messages = [{"timestamp": get_local_time(), "message": msg} for msg in agent.messages.copy()]
-        self.messages = [{"timestamp": get_local_time(), "message": msg} for msg in agent.messages.copy()]
-        self.memory = agent.memory
-        print(f"{self.__class__.__name__}.all_messages.len = {len(self.all_messages)}")
-        print(f"{self.__class__.__name__}.messages.len = {len(self.messages)}")
-        self.recall_memory = self.recall_memory_cls(message_database=self.all_messages)
-        self.archival_memory = self.archival_memory_cls(archival_memory_database=self.archival_memory_db)
-
-
-class InMemoryStateManagerWithEmbeddings(InMemoryStateManager):
-    archival_memory_cls = DummyArchivalMemoryWithEmbeddings
-    recall_memory_cls = DummyRecallMemoryWithEmbeddings
-
-
-class InMemoryStateManagerWithFaiss(InMemoryStateManager):
-    archival_memory_cls = DummyArchivalMemoryWithFaiss
-    recall_memory_cls = DummyRecallMemoryWithEmbeddings
-
-    def __init__(self, archival_index, archival_memory_db, a_k=100):
-        super().__init__()
-        self.archival_index = archival_index
-        self.archival_memory_db = archival_memory_db
-        self.a_k = a_k
-
-    def save(self, _filename):
-        raise NotImplementedError
-
-    def init(self, agent):
-        print(f"Initializing {self.__class__.__name__} with agent object")
-        self.all_messages = [{"timestamp": get_local_time(), "message": msg} for msg in agent.messages.copy()]
-        self.messages = [{"timestamp": get_local_time(), "message": msg} for msg in agent.messages.copy()]
-        self.memory = agent.memory
-        print(f"{self.__class__.__name__}.all_messages.len = {len(self.all_messages)}")
-        print(f"{self.__class__.__name__}.messages.len = {len(self.messages)}")
-
-        # Persistence manager also handles DB-related state
-        self.recall_memory = self.recall_memory_cls(message_database=self.all_messages)
-        self.archival_memory = self.archival_memory_cls(
-            index=self.archival_index, archival_memory_database=self.archival_memory_db, k=self.a_k
-        )
diff --git a/memgpt/personas/examples/docqa/README.md b/memgpt/personas/examples/docqa/README.md
deleted file mode 100644
index e4404165..00000000
--- a/memgpt/personas/examples/docqa/README.md
+++ /dev/null
@@ -1,35 +0,0 @@
-# MemGPT over LlamaIndex API Docs
-
-MemGPT enables you to chat with your data -- try running this example to talk to the LlamaIndex API docs!
-
-1.
-    a. Download LlamaIndex API docs and FAISS index from [Hugging Face](https://huggingface.co/datasets/MemGPT/llamaindex-api-docs).
-   ```bash
-   # Make sure you have git-lfs installed (https://git-lfs.com)
-   git lfs install
-   git clone https://huggingface.co/datasets/MemGPT/llamaindex-api-docs
-   ```
-
-    **-- OR --**
-
-    b. Build the index:
-    1. Build `llama_index` API docs with `make text`. Instructions [here](https://github.com/run-llama/llama_index/blob/main/docs/DOCS_README.md). Copy over the generated `_build/text` folder to this directory.
-    2. Generate embeddings and FAISS index.
-        ```bash
-        python3 scrape_docs.py
-        python3 generate_embeddings_for_docs.py all_docs.jsonl
-        python3 build_index.py --embedding_files all_docs.embeddings.jsonl --output_index_file all_docs.index
-        ```
-
-2. In the root `MemGPT` directory, run
-    ```bash
-    python3 main.py --archival_storage_faiss_path=<ARCHIVAL_STORAGE_FAISS_PATH> --persona=memgpt_doc --human=basic
-    ```
-    where `ARCHIVAL_STORAGE_FAISS_PATH` is the directory where `all_docs.jsonl` and `all_docs.index` are located.
-   If you downloaded from Hugging Face, it will be `memgpt/personas/docqa/llamaindex-api-docs`.
-   If you built the index yourself, it will be `memgpt/personas/docqa`.
-
-## Demo
-<div align="center">
-    <img src="https://memgpt.ai/assets/img/docqa_demo.gif" alt="MemGPT demo video for llamaindex api docs search" width="800">
-</div>
diff --git a/memgpt/personas/examples/docqa/build_index.py b/memgpt/personas/examples/docqa/build_index.py
deleted file mode 100644
index 94802395..00000000
--- a/memgpt/personas/examples/docqa/build_index.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import faiss
-from glob import glob
-from tqdm import tqdm
-import numpy as np
-import argparse
-import json
-
-
-def build_index(embedding_files: str, index_name: str):
-    index = faiss.IndexFlatL2(1536)
-    file_list = sorted(glob(embedding_files))
-
-    for embedding_file in file_list:
-        print(embedding_file)
-        with open(embedding_file, "rt", encoding="utf-8") as file:
-            embeddings = []
-            l = 0
-            for line in tqdm(file):
-                # Parse each JSON line
-                data = json.loads(line)
-                embeddings.append(data)
-                l += 1
-            data = np.array(embeddings).astype("float32")
-            print(data.shape)
-            try:
-                index.add(data)
-            except Exception as e:
-                print(data)
-                raise e
-
-    faiss.write_index(index, index_name)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-
-    parser.add_argument("--embedding_files", type=str, help="embedding_filepaths glob expression")
-    parser.add_argument("--output_index_file", type=str, help="output filepath")
-    args = parser.parse_args()
-
-    build_index(embedding_files=args.embedding_files, index_name=args.output_index_file)
diff --git a/memgpt/personas/examples/docqa/generate_embeddings_for_docs.py b/memgpt/personas/examples/docqa/generate_embeddings_for_docs.py
deleted file mode 100644
index 0c2d9479..00000000
--- a/memgpt/personas/examples/docqa/generate_embeddings_for_docs.py
+++ /dev/null
@@ -1,134 +0,0 @@
-import asyncio
-import json
-import os
-import logging
-import sys
-import argparse
-
-from tqdm import tqdm
-import openai
-
-try:
-    from dotenv import load_dotenv
-
-    load_dotenv()
-except ModuleNotFoundError:
-    pass
-openai.api_key = os.getenv("OPENAI_API_KEY")
-
-sys.path.append("../../../")
-from openai_tools import async_get_embedding_with_backoff
-from openai_parallel_request_processor import process_api_requests_from_file
-
-
-# some settings specific to our own OpenAI org limits
-# (specific to text-embedding-ada-002)
-TPM_LIMIT = 1000000
-RPM_LIMIT = 3000
-
-DEFAULT_FILE = "iclr/data/qa_data/30_total_documents/nq-open-30_total_documents_gold_at_0.jsonl.gz"
-EMBEDDING_MODEL = "text-embedding-ada-002"
-
-
-async def generate_requests_file(filename):
-    """Generate a file of requests, which we can feed to a pre-made openai cookbook function"""
-    base_name = os.path.splitext(filename)[0]
-    requests_filename = f"{base_name}_embedding_requests.jsonl"
-
-    with open(filename, "r") as f:
-        all_data = [json.loads(line) for line in f]
-
-    with open(requests_filename, "w") as f:
-        for data in all_data:
-            documents = data
-            for idx, doc in enumerate(documents):
-                title = doc["title"]
-                text = doc["text"]
-                document_string = f"Document [{idx+1}] (Title: {title}) {text}"
-                request = {"model": EMBEDDING_MODEL, "input": document_string}
-                json_string = json.dumps(request)
-                f.write(json_string + "\n")
-
-    # Run your parallel processing function
-    input(f"Generated requests file ({requests_filename}), continue with embedding batch requests? (hit enter)")
-    await process_api_requests_from_file(
-        requests_filepath=requests_filename,
-        save_filepath=f"{base_name}.embeddings.jsonl.gz",  # Adjust as necessary
-        request_url="https://api.openai.com/v1/embeddings",
-        api_key=os.getenv("OPENAI_API_KEY"),
-        max_requests_per_minute=RPM_LIMIT,
-        max_tokens_per_minute=TPM_LIMIT,
-        token_encoding_name=EMBEDDING_MODEL,
-        max_attempts=5,
-        logging_level=logging.INFO,
-    )
-
-
-async def generate_embedding_file(filename, parallel_mode=False):
-    if parallel_mode:
-        await generate_requests_file(filename)
-        return
-
-    # Derive the sister filename
-    # base_name = os.path.splitext(filename)[0]
-    base_name = filename.rsplit(".jsonl", 1)[0]
-    sister_filename = f"{base_name}.embeddings.jsonl"
-
-    # Check if the sister file already exists
-    if os.path.exists(sister_filename):
-        print(f"{sister_filename} already exists. Skipping embedding generation.")
-        return
-
-    with open(filename, "rt") as f:
-        all_data = [json.loads(line) for line in f]
-
-    embedding_data = []
-    total_documents = sum(len(data) for data in all_data)
-
-    # Outer loop progress bar
-    for i, data in enumerate(tqdm(all_data, desc="Processing data", total=len(all_data))):
-        documents = data
-        # Inner loop progress bar
-        for idx, doc in enumerate(
-            tqdm(documents, desc=f"Embedding documents for data {i+1}/{len(all_data)}", total=len(documents), leave=False)
-        ):
-            title = doc["title"]
-            text = doc["text"]
-            document_string = f"[Title: {title}] {text}"
-            try:
-                embedding = await async_get_embedding_with_backoff(document_string, model=EMBEDDING_MODEL)
-            except Exception as e:
-                print(document_string)
-                raise e
-            embedding_data.append(embedding)
-
-    # Save the embeddings to the sister file
-    # with gzip.open(sister_filename, 'wt') as f:
-    with open(sister_filename, "wb") as f:
-        for embedding in embedding_data:
-            # f.write(json.dumps(embedding) + '\n')
-            f.write((json.dumps(embedding) + "\n").encode("utf-8"))
-
-    print(f"Embeddings saved to {sister_filename}")
-
-
-async def main():
-    if len(sys.argv) > 1:
-        filename = sys.argv[1]
-    else:
-        filename = DEFAULT_FILE
-    await generate_embedding_file(filename)
-
-
-async def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("filename", nargs="?", default=DEFAULT_FILE, help="Path to the input file")
-    parser.add_argument("--parallel", action="store_true", help="Enable parallel mode")
-    args = parser.parse_args()
-
-    await generate_embedding_file(args.filename, parallel_mode=args.parallel)
-
-
-if __name__ == "__main__":
-    loop = asyncio.get_event_loop()
-    loop.run_until_complete(main())
diff --git a/memgpt/personas/examples/docqa/openai_parallel_request_processor.py b/memgpt/personas/examples/docqa/openai_parallel_request_processor.py
deleted file mode 100644
index 169bfd37..00000000
--- a/memgpt/personas/examples/docqa/openai_parallel_request_processor.py
+++ /dev/null
@@ -1,456 +0,0 @@
-"""
-API REQUEST PARALLEL PROCESSOR
-
-Using the OpenAI API to process lots of text quickly takes some care.
-If you trickle in a million API requests one by one, they'll take days to complete.
-If you flood a million API requests in parallel, they'll exceed the rate limits and fail with errors.
-To maximize throughput, parallel requests need to be throttled to stay under rate limits.
-
-This script parallelizes requests to the OpenAI API while throttling to stay under rate limits.
-
-Features:
-- Streams requests from file, to avoid running out of memory for giant jobs
-- Makes requests concurrently, to maximize throughput
-- Throttles request and token usage, to stay under rate limits
-- Retries failed requests up to {max_attempts} times, to avoid missing data
-- Logs errors, to diagnose problems with requests
-
-Example command to call script:
-```
-python examples/api_request_parallel_processor.py \
-  --requests_filepath examples/data/example_requests_to_parallel_process.jsonl \
-  --save_filepath examples/data/example_requests_to_parallel_process_results.jsonl \
-  --request_url https://api.openai.com/v1/embeddings \
-  --max_requests_per_minute 1500 \
-  --max_tokens_per_minute 6250000 \
-  --token_encoding_name cl100k_base \
-  --max_attempts 5 \
-  --logging_level 20
-```
-
-Inputs:
-- requests_filepath : str
-    - path to the file containing the requests to be processed
-    - file should be a jsonl file, where each line is a json object with API parameters and an optional metadata field
-    - e.g., {"model": "text-embedding-ada-002", "input": "embed me", "metadata": {"row_id": 1}}
-    - as with all jsonl files, take care that newlines in the content are properly escaped (json.dumps does this automatically)
-    - an example file is provided at examples/data/example_requests_to_parallel_process.jsonl
-    - the code to generate the example file is appended to the bottom of this script
-- save_filepath : str, optional
-    - path to the file where the results will be saved
-    - file will be a jsonl file, where each line is an array with the original request plus the API response
-    - e.g., [{"model": "text-embedding-ada-002", "input": "embed me"}, {...}]
-    - if omitted, results will be saved to {requests_filename}_results.jsonl
-- request_url : str, optional
-    - URL of the API endpoint to call
-    - if omitted, will default to "https://api.openai.com/v1/embeddings"
-- api_key : str, optional
-    - API key to use
-    - if omitted, the script will attempt to read it from an environment variable {os.getenv("OPENAI_API_KEY")}
-- max_requests_per_minute : float, optional
-    - target number of requests to make per minute (will make less if limited by tokens)
-    - leave headroom by setting this to 50% or 75% of your limit
-    - if requests are limiting you, try batching multiple embeddings or completions into one request
-    - if omitted, will default to 1,500
-- max_tokens_per_minute : float, optional
-    - target number of tokens to use per minute (will use less if limited by requests)
-    - leave headroom by setting this to 50% or 75% of your limit
-    - if omitted, will default to 125,000
-- token_encoding_name : str, optional
-    - name of the token encoding used, as defined in the `tiktoken` package
-    - if omitted, will default to "cl100k_base" (used by `text-embedding-ada-002`)
-- max_attempts : int, optional
-    - number of times to retry a failed request before giving up
-    - if omitted, will default to 5
-- logging_level : int, optional
-    - level of logging to use; higher numbers will log fewer messages
-    - 40 = ERROR; will log only when requests fail after all retries
-    - 30 = WARNING; will log when requests his rate limits or other errors
-    - 20 = INFO; will log when requests start and the status at finish
-    - 10 = DEBUG; will log various things as the loop runs to see when they occur
-    - if omitted, will default to 20 (INFO).
-
-The script is structured as follows:
-    - Imports
-    - Define main()
-        - Initialize things
-        - In main loop:
-            - Get next request if one is not already waiting for capacity
-            - Update available token & request capacity
-            - If enough capacity available, call API
-            - The loop pauses if a rate limit error is hit
-            - The loop breaks when no tasks remain
-    - Define dataclasses
-        - StatusTracker (stores script metadata counters; only one instance is created)
-        - APIRequest (stores API inputs, outputs, metadata; one method to call API)
-    - Define functions
-        - api_endpoint_from_url (extracts API endpoint from request URL)
-        - append_to_jsonl (writes to results file)
-        - num_tokens_consumed_from_request (bigger function to infer token usage from request)
-        - task_id_generator_function (yields 1, 2, 3, ...)
-    - Run main()
-"""
-
-# imports
-import aiohttp  # for making API calls concurrently
-import argparse  # for running script from command line
-import asyncio  # for running API calls concurrently
-import json  # for saving results to a jsonl file
-import logging  # for logging rate limit warnings and other messages
-import os  # for reading API key
-import re  # for matching endpoint from request URL
-import tiktoken  # for counting tokens
-import time  # for sleeping after rate limit is hit
-from dataclasses import (
-    dataclass,
-    field,
-)  # for storing API inputs, outputs, and metadata
-
-
-async def process_api_requests_from_file(
-    requests_filepath: str,
-    save_filepath: str,
-    request_url: str,
-    api_key: str,
-    max_requests_per_minute: float,
-    max_tokens_per_minute: float,
-    token_encoding_name: str,
-    max_attempts: int,
-    logging_level: int,
-):
-    """Processes API requests in parallel, throttling to stay under rate limits."""
-    # constants
-    seconds_to_pause_after_rate_limit_error = 15
-    seconds_to_sleep_each_loop = 0.001  # 1 ms limits max throughput to 1,000 requests per second
-
-    # initialize logging
-    logging.basicConfig(level=logging_level)
-    logging.debug(f"Logging initialized at level {logging_level}")
-
-    # infer API endpoint and construct request header
-    api_endpoint = api_endpoint_from_url(request_url)
-    request_header = {"Authorization": f"Bearer {api_key}"}
-
-    # initialize trackers
-    queue_of_requests_to_retry = asyncio.Queue()
-    task_id_generator = task_id_generator_function()  # generates integer IDs of 1, 2, 3, ...
-    status_tracker = StatusTracker()  # single instance to track a collection of variables
-    next_request = None  # variable to hold the next request to call
-
-    # initialize available capacity counts
-    available_request_capacity = max_requests_per_minute
-    available_token_capacity = max_tokens_per_minute
-    last_update_time = time.time()
-
-    # initialize flags
-    file_not_finished = True  # after file is empty, we'll skip reading it
-    logging.debug(f"Initialization complete.")
-
-    # initialize file reading
-    with open(requests_filepath) as file:
-        # `requests` will provide requests one at a time
-        requests = file.__iter__()
-        logging.debug(f"File opened. Entering main loop")
-        async with aiohttp.ClientSession() as session:  # Initialize ClientSession here
-            while True:
-                # get next request (if one is not already waiting for capacity)
-                if next_request is None:
-                    if not queue_of_requests_to_retry.empty():
-                        next_request = queue_of_requests_to_retry.get_nowait()
-                        logging.debug(f"Retrying request {next_request.task_id}: {next_request}")
-                    elif file_not_finished:
-                        try:
-                            # get new request
-                            request_json = json.loads(next(requests))
-                            next_request = APIRequest(
-                                task_id=next(task_id_generator),
-                                request_json=request_json,
-                                token_consumption=num_tokens_consumed_from_request(request_json, api_endpoint, token_encoding_name),
-                                attempts_left=max_attempts,
-                                metadata=request_json.pop("metadata", None),
-                            )
-                            status_tracker.num_tasks_started += 1
-                            status_tracker.num_tasks_in_progress += 1
-                            logging.debug(f"Reading request {next_request.task_id}: {next_request}")
-                        except StopIteration:
-                            # if file runs out, set flag to stop reading it
-                            logging.debug("Read file exhausted")
-                            file_not_finished = False
-
-                # update available capacity
-                current_time = time.time()
-                seconds_since_update = current_time - last_update_time
-                available_request_capacity = min(
-                    available_request_capacity + max_requests_per_minute * seconds_since_update / 60.0,
-                    max_requests_per_minute,
-                )
-                available_token_capacity = min(
-                    available_token_capacity + max_tokens_per_minute * seconds_since_update / 60.0,
-                    max_tokens_per_minute,
-                )
-                last_update_time = current_time
-
-                # if enough capacity available, call API
-                if next_request:
-                    next_request_tokens = next_request.token_consumption
-                    if available_request_capacity >= 1 and available_token_capacity >= next_request_tokens:
-                        # update counters
-                        available_request_capacity -= 1
-                        available_token_capacity -= next_request_tokens
-                        next_request.attempts_left -= 1
-
-                        # call API
-                        asyncio.create_task(
-                            next_request.call_api(
-                                session=session,
-                                request_url=request_url,
-                                request_header=request_header,
-                                retry_queue=queue_of_requests_to_retry,
-                                save_filepath=save_filepath,
-                                status_tracker=status_tracker,
-                            )
-                        )
-                        next_request = None  # reset next_request to empty
-
-                # if all tasks are finished, break
-                if status_tracker.num_tasks_in_progress == 0:
-                    break
-
-                # main loop sleeps briefly so concurrent tasks can run
-                await asyncio.sleep(seconds_to_sleep_each_loop)
-
-                # if a rate limit error was hit recently, pause to cool down
-                seconds_since_rate_limit_error = time.time() - status_tracker.time_of_last_rate_limit_error
-                if seconds_since_rate_limit_error < seconds_to_pause_after_rate_limit_error:
-                    remaining_seconds_to_pause = seconds_to_pause_after_rate_limit_error - seconds_since_rate_limit_error
-                    await asyncio.sleep(remaining_seconds_to_pause)
-                    # ^e.g., if pause is 15 seconds and final limit was hit 5 seconds ago
-                    logging.warn(
-                        f"Pausing to cool down until {time.ctime(status_tracker.time_of_last_rate_limit_error + seconds_to_pause_after_rate_limit_error)}"
-                    )
-
-        # after finishing, log final status
-        logging.info(f"""Parallel processing complete. Results saved to {save_filepath}""")
-        if status_tracker.num_tasks_failed > 0:
-            logging.warning(
-                f"{status_tracker.num_tasks_failed} / {status_tracker.num_tasks_started} requests failed. Errors logged to {save_filepath}."
-            )
-        if status_tracker.num_rate_limit_errors > 0:
-            logging.warning(f"{status_tracker.num_rate_limit_errors} rate limit errors received. Consider running at a lower rate.")
-
-
-# dataclasses
-
-
-@dataclass
-class StatusTracker:
-    """Stores metadata about the script's progress. Only one instance is created."""
-
-    num_tasks_started: int = 0
-    num_tasks_in_progress: int = 0  # script ends when this reaches 0
-    num_tasks_succeeded: int = 0
-    num_tasks_failed: int = 0
-    num_rate_limit_errors: int = 0
-    num_api_errors: int = 0  # excluding rate limit errors, counted above
-    num_other_errors: int = 0
-    time_of_last_rate_limit_error: int = 0  # used to cool off after hitting rate limits
-
-
-@dataclass
-class APIRequest:
-    """Stores an API request's inputs, outputs, and other metadata. Contains a method to make an API call."""
-
-    task_id: int
-    request_json: dict
-    token_consumption: int
-    attempts_left: int
-    metadata: dict
-    result: list = field(default_factory=list)
-
-    async def call_api(
-        self,
-        session: aiohttp.ClientSession,
-        request_url: str,
-        request_header: dict,
-        retry_queue: asyncio.Queue,
-        save_filepath: str,
-        status_tracker: StatusTracker,
-    ):
-        """Calls the OpenAI API and saves results."""
-        logging.info(f"Starting request #{self.task_id}")
-        error = None
-        try:
-            async with session.post(url=request_url, headers=request_header, json=self.request_json) as response:
-                response = await response.json()
-            if "error" in response:
-                logging.warning(f"Request {self.task_id} failed with error {response['error']}")
-                status_tracker.num_api_errors += 1
-                error = response
-                if "Rate limit" in response["error"].get("message", ""):
-                    status_tracker.time_of_last_rate_limit_error = time.time()
-                    status_tracker.num_rate_limit_errors += 1
-                    status_tracker.num_api_errors -= 1  # rate limit errors are counted separately
-
-        except Exception as e:  # catching naked exceptions is bad practice, but in this case we'll log & save them
-            logging.warning(f"Request {self.task_id} failed with Exception {e}")
-            status_tracker.num_other_errors += 1
-            error = e
-        if error:
-            self.result.append(error)
-            if self.attempts_left:
-                retry_queue.put_nowait(self)
-            else:
-                logging.error(f"Request {self.request_json} failed after all attempts. Saving errors: {self.result}")
-                data = (
-                    [self.request_json, [str(e) for e in self.result], self.metadata]
-                    if self.metadata
-                    else [self.request_json, [str(e) for e in self.result]]
-                )
-                append_to_jsonl(data, save_filepath)
-                status_tracker.num_tasks_in_progress -= 1
-                status_tracker.num_tasks_failed += 1
-        else:
-            data = [self.request_json, response, self.metadata] if self.metadata else [self.request_json, response]
-            append_to_jsonl(data, save_filepath)
-            status_tracker.num_tasks_in_progress -= 1
-            status_tracker.num_tasks_succeeded += 1
-            logging.debug(f"Request {self.task_id} saved to {save_filepath}")
-
-
-# functions
-
-
-def api_endpoint_from_url(request_url):
-    """Extract the API endpoint from the request URL."""
-    match = re.search("^https://[^/]+/v\\d+/(.+)$", request_url)
-    return match[1]
-
-
-def append_to_jsonl(data, filename: str) -> None:
-    """Append a json payload to the end of a jsonl file."""
-    json_string = json.dumps(data)
-    with open(filename, "a") as f:
-        f.write(json_string + "\n")
-
-
-def num_tokens_consumed_from_request(
-    request_json: dict,
-    api_endpoint: str,
-    token_encoding_name: str,
-):
-    """Count the number of tokens in the request. Only supports completion and embedding requests."""
-    if token_encoding_name == "text-embedding-ada-002":
-        encoding = tiktoken.get_encoding("cl100k_base")
-    else:
-        encoding = tiktoken.get_encoding(token_encoding_name)
-    # if completions request, tokens = prompt + n * max_tokens
-    if api_endpoint.endswith("completions"):
-        max_tokens = request_json.get("max_tokens", 15)
-        n = request_json.get("n", 1)
-        completion_tokens = n * max_tokens
-
-        # chat completions
-        if api_endpoint.startswith("chat/"):
-            num_tokens = 0
-            for message in request_json["messages"]:
-                num_tokens += 4  # every message follows <im_start>{role/name}\n{content}<im_end>\n
-                for key, value in message.items():
-                    num_tokens += len(encoding.encode(value))
-                    if key == "name":  # if there's a name, the role is omitted
-                        num_tokens -= 1  # role is always required and always 1 token
-            num_tokens += 2  # every reply is primed with <im_start>assistant
-            return num_tokens + completion_tokens
-        # normal completions
-        else:
-            prompt = request_json["prompt"]
-            if isinstance(prompt, str):  # single prompt
-                prompt_tokens = len(encoding.encode(prompt))
-                num_tokens = prompt_tokens + completion_tokens
-                return num_tokens
-            elif isinstance(prompt, list):  # multiple prompts
-                prompt_tokens = sum([len(encoding.encode(p)) for p in prompt])
-                num_tokens = prompt_tokens + completion_tokens * len(prompt)
-                return num_tokens
-            else:
-                raise TypeError('Expecting either string or list of strings for "prompt" field in completion request')
-    # if embeddings request, tokens = input tokens
-    elif api_endpoint == "embeddings":
-        input = request_json["input"]
-        if isinstance(input, str):  # single input
-            num_tokens = len(encoding.encode(input))
-            return num_tokens
-        elif isinstance(input, list):  # multiple inputs
-            num_tokens = sum([len(encoding.encode(i)) for i in input])
-            return num_tokens
-        else:
-            raise TypeError('Expecting either string or list of strings for "inputs" field in embedding request')
-    # more logic needed to support other API calls (e.g., edits, inserts, DALL-E)
-    else:
-        raise NotImplementedError(f'API endpoint "{api_endpoint}" not implemented in this script')
-
-
-def task_id_generator_function():
-    """Generate integers 0, 1, 2, and so on."""
-    task_id = 0
-    while True:
-        yield task_id
-        task_id += 1
-
-
-# run script
-
-
-if __name__ == "__main__":
-    # parse command line arguments
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--requests_filepath")
-    parser.add_argument("--save_filepath", default=None)
-    parser.add_argument("--request_url", default="https://api.openai.com/v1/embeddings")
-    parser.add_argument("--api_key", default=os.getenv("OPENAI_API_KEY"))
-    parser.add_argument("--max_requests_per_minute", type=int, default=3_000 * 0.5)
-    parser.add_argument("--max_tokens_per_minute", type=int, default=250_000 * 0.5)
-    parser.add_argument("--token_encoding_name", default="cl100k_base")
-    parser.add_argument("--max_attempts", type=int, default=5)
-    parser.add_argument("--logging_level", default=logging.INFO)
-    args = parser.parse_args()
-
-    if args.save_filepath is None:
-        args.save_filepath = args.requests_filepath.replace(".jsonl", "_results.jsonl")
-
-    # run script
-    asyncio.run(
-        process_api_requests_from_file(
-            requests_filepath=args.requests_filepath,
-            save_filepath=args.save_filepath,
-            request_url=args.request_url,
-            api_key=args.api_key,
-            max_requests_per_minute=float(args.max_requests_per_minute),
-            max_tokens_per_minute=float(args.max_tokens_per_minute),
-            token_encoding_name=args.token_encoding_name,
-            max_attempts=int(args.max_attempts),
-            logging_level=int(args.logging_level),
-        )
-    )
-
-
-"""
-APPENDIX
-
-The example requests file at openai-cookbook/examples/data/example_requests_to_parallel_process.jsonl contains 10,000 requests to text-embedding-ada-002.
-
-It was generated with the following code:
-
-```python
-import json
-
-filename = "data/example_requests_to_parallel_process.jsonl"
-n_requests = 10_000
-jobs = [{"model": "text-embedding-ada-002", "input": str(x) + "\n"} for x in range(n_requests)]
-with open(filename, "w") as f:
-    for job in jobs:
-        json_string = json.dumps(job)
-        f.write(json_string + "\n")
-```
-
-As with all jsonl files, take care that newlines in the content are properly escaped (json.dumps does this automatically).
-"""
diff --git a/memgpt/personas/examples/docqa/scrape_docs.py b/memgpt/personas/examples/docqa/scrape_docs.py
deleted file mode 100644
index f02df414..00000000
--- a/memgpt/personas/examples/docqa/scrape_docs.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import os
-import re
-import tiktoken
-import json
-
-# Define the directory where the documentation resides
-docs_dir = "text"
-
-encoding = tiktoken.encoding_for_model("gpt-4")
-PASSAGE_TOKEN_LEN = 800
-
-
-def extract_text_from_sphinx_txt(file_path):
-    lines = []
-    title = ""
-    with open(file_path, "r", encoding="utf-8") as file:
-        for line in file:
-            if not title:
-                title = line.strip()
-                continue
-            if line and re.match(r"^.*\S.*$", line) and not re.match(r"^[-=*]+$", line):
-                lines.append(line)
-    passages = []
-    curr_passage = []
-    curr_token_ct = 0
-    for line in lines:
-        try:
-            line_token_ct = len(encoding.encode(line, allowed_special={"<|endoftext|>"}))
-        except Exception as e:
-            print("line", line)
-            raise e
-        if line_token_ct > PASSAGE_TOKEN_LEN:
-            passages.append(
-                {
-                    "title": title,
-                    "text": line[:3200],
-                    "num_tokens": curr_token_ct,
-                }
-            )
-            continue
-        curr_token_ct += line_token_ct
-        curr_passage.append(line)
-        if curr_token_ct > PASSAGE_TOKEN_LEN:
-            passages.append({"title": title, "text": "".join(curr_passage), "num_tokens": curr_token_ct})
-            curr_passage = []
-            curr_token_ct = 0
-
-    if len(curr_passage) > 0:
-        passages.append({"title": title, "text": "".join(curr_passage), "num_tokens": curr_token_ct})
-    return passages
-
-
-# Iterate over all files in the directory and its subdirectories
-passages = []
-total_files = 0
-for subdir, _, files in os.walk(docs_dir):
-    for file in files:
-        if file.endswith(".txt"):
-            file_path = os.path.join(subdir, file)
-            passages.append(extract_text_from_sphinx_txt(file_path))
-            total_files += 1
-print("total .txt files:", total_files)
-
-# Save to a new text file or process as needed
-with open("all_docs.jsonl", "w", encoding="utf-8") as file:
-    for p in passages:
-        file.write(json.dumps(p))
-        file.write("\n")
diff --git a/memgpt/personas/examples/preload_archival/README.md b/memgpt/personas/examples/preload_archival/README.md
deleted file mode 100644
index 126d4bfe..00000000
--- a/memgpt/personas/examples/preload_archival/README.md
+++ /dev/null
@@ -1,19 +0,0 @@
-# Preloading Archival Memory with Files
-MemGPT enables you to chat with your data locally -- this example gives the workflow for loading documents into MemGPT's archival memory.
-
-To run our example where you can search over the SEC 10-K filings of Uber, Lyft, and Airbnb,
-
-1. Download the .txt files from [Hugging Face](https://huggingface.co/datasets/MemGPT/example-sec-filings/tree/main) and place them in this directory.
-
-2. In the root `MemGPT` directory, run
-    ```bash
-    python3 main.py --archival_storage_files="memgpt/personas/examples/preload_archival/*.txt" --persona=memgpt_doc --human=basic
-    ```
-
-
-If you would like to load your own local files into MemGPT's archival memory, run the command above but replace `--archival_storage_files="memgpt/personas/examples/preload_archival/*.txt"` with your own file glob expression (enclosed in quotes).
-
-## Demo
-<div align="center">
-    <img src="https://memgpt.ai/assets/img/preload_archival_demo.gif" alt="MemGPT demo video for searching through preloaded files" width="800">
-</div>
diff --git a/memgpt/personas/personas.py b/memgpt/personas/personas.py
deleted file mode 100644
index 9d15938e..00000000
--- a/memgpt/personas/personas.py
+++ /dev/null
@@ -1,17 +0,0 @@
-import os
-
-DEFAULT = "sam_pov"
-GPT35_DEFAULT = "sam_simple_pov_gpt35"
-
-
-def get_persona_text(key=DEFAULT, dir=None):
-    if dir is None:
-        dir = os.path.join(os.path.dirname(__file__), "examples")
-    filename = key if key.endswith(".txt") else f"{key}.txt"
-    file_path = os.path.join(dir, filename)
-
-    if os.path.exists(file_path):
-        with open(file_path, "r") as file:
-            return file.read().strip()
-    else:
-        raise FileNotFoundError(f"No file found for key {key}, path={file_path}")
diff --git a/memgpt/utils.py b/memgpt/utils.py
index c6b371dd..c9c57529 100644
--- a/memgpt/utils.py
+++ b/memgpt/utils.py
@@ -1,24 +1,16 @@
 from datetime import datetime
-import csv
 import difflib
 import demjson3 as demjson
-import numpy as np
 import json
 import pytz
 import os
 import tiktoken
-import glob
-import sqlite3
-import fitz
-from tqdm import tqdm
-import typer
 import memgpt
-from memgpt.openai_tools import get_embedding_with_backoff
 from memgpt.constants import MEMGPT_DIR
-from llama_index import set_global_service_context, ServiceContext, VectorStoreIndex, load_index_from_storage, StorageContext
-from llama_index.embeddings import OpenAIEmbedding
 
-from concurrent.futures import ThreadPoolExecutor, as_completed
+# TODO: what is this?
+# DEBUG = True
+DEBUG = False
 
 
 def count_tokens(s: str, model: str = "gpt-4") -> int:
@@ -26,19 +18,11 @@ def count_tokens(s: str, model: str = "gpt-4") -> int:
     return len(encoding.encode(s))
 
 
-# DEBUG = True
-DEBUG = False
-
-
 def printd(*args, **kwargs):
     if DEBUG:
         print(*args, **kwargs)
 
 
-def cosine_similarity(a, b):
-    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
-
-
 def united_diff(str1, str2):
     lines1 = str1.splitlines(True)
     lines2 = str2.splitlines(True)
@@ -88,6 +72,7 @@ def get_local_time(timezone=None):
 
 
 def parse_json(string):
+    """Parse JSON string into JSON with both json and demjson"""
     result = None
     try:
         result = json.loads(string)
@@ -103,273 +88,6 @@ def parse_json(string):
         raise e
 
 
-def prepare_archival_index(folder):
-    import faiss
-
-    index_file = os.path.join(folder, "all_docs.index")
-    index = faiss.read_index(index_file)
-
-    archival_database_file = os.path.join(folder, "all_docs.jsonl")
-    archival_database = []
-    with open(archival_database_file, "rt") as f:
-        all_data = [json.loads(line) for line in f]
-    for doc in all_data:
-        total = len(doc)
-        for i, passage in enumerate(doc):
-            archival_database.append(
-                {
-                    "content": f"[Title: {passage['title']}, {i}/{total}] {passage['text']}",
-                    "timestamp": get_local_time(),
-                }
-            )
-    return index, archival_database
-
-
-def read_in_chunks(file_object, chunk_size):
-    while True:
-        data = file_object.read(chunk_size)
-        if not data:
-            break
-        yield data
-
-
-def read_pdf_in_chunks(file, chunk_size):
-    doc = fitz.open(file)
-    for page in doc:
-        text = page.get_text()
-        yield text
-
-
-def read_in_rows_csv(file_object, chunk_size):
-    csvreader = csv.reader(file_object)
-    header = next(csvreader)
-    for row in csvreader:
-        next_row_terms = []
-        for h, v in zip(header, row):
-            next_row_terms.append(f"{h}={v}")
-        next_row_str = ", ".join(next_row_terms)
-        yield next_row_str
-
-
-def prepare_archival_index_from_files(glob_pattern, tkns_per_chunk=300, model="gpt-4"):
-    encoding = tiktoken.encoding_for_model(model)
-    files = glob.glob(glob_pattern, recursive=True)
-    return chunk_files(files, tkns_per_chunk, model)
-
-
-def total_bytes(pattern):
-    total = 0
-    for filename in glob.glob(pattern, recursive=True):
-        if os.path.isfile(filename):  # ensure it's a file and not a directory
-            total += os.path.getsize(filename)
-    return total
-
-
-def chunk_file(file, tkns_per_chunk=300, model="gpt-4"):
-    encoding = tiktoken.encoding_for_model(model)
-
-    if file.endswith(".db"):
-        return  # can't read the sqlite db this way, will get handled in main.py
-
-    with open(file, "r") as f:
-        if file.endswith(".pdf"):
-            lines = [l for l in read_pdf_in_chunks(file, tkns_per_chunk * 8)]
-            if len(lines) == 0:
-                print(f"Warning: {file} did not have any extractable text.")
-        elif file.endswith(".csv"):
-            lines = [l for l in read_in_rows_csv(f, tkns_per_chunk * 8)]
-        else:
-            lines = [l for l in read_in_chunks(f, tkns_per_chunk * 4)]
-    curr_chunk = []
-    curr_token_ct = 0
-    for i, line in enumerate(lines):
-        line = line.rstrip()
-        line = line.lstrip()
-        line += "\n"
-        try:
-            line_token_ct = len(encoding.encode(line))
-        except Exception as e:
-            line_token_ct = len(line.split(" ")) / 0.75
-            print(f"Could not encode line {i}, estimating it to be {line_token_ct} tokens")
-            print(e)
-        if line_token_ct > tkns_per_chunk:
-            if len(curr_chunk) > 0:
-                yield "".join(curr_chunk)
-                curr_chunk = []
-                curr_token_ct = 0
-            yield line[:3200]
-            continue
-        curr_token_ct += line_token_ct
-        curr_chunk.append(line)
-        if curr_token_ct > tkns_per_chunk:
-            yield "".join(curr_chunk)
-            curr_chunk = []
-            curr_token_ct = 0
-
-    if len(curr_chunk) > 0:
-        yield "".join(curr_chunk)
-
-
-def chunk_files(files, tkns_per_chunk=300, model="gpt-4"):
-    archival_database = []
-    for file in files:
-        timestamp = os.path.getmtime(file)
-        formatted_time = datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d %I:%M:%S %p %Z%z")
-        file_stem = file.split(os.sep)[-1]
-        chunks = [c for c in chunk_file(file, tkns_per_chunk, model)]
-        for i, chunk in enumerate(chunks):
-            archival_database.append(
-                {
-                    "content": f"[File: {file_stem} Part {i}/{len(chunks)}] {chunk}",
-                    "timestamp": formatted_time,
-                }
-            )
-    return archival_database
-
-
-def chunk_files_for_jsonl(files, tkns_per_chunk=300, model="gpt-4"):
-    ret = []
-    for file in files:
-        file_stem = file.split(os.sep)[-1]
-        curr_file = []
-        for chunk in chunk_file(file, tkns_per_chunk, model):
-            curr_file.append(
-                {
-                    "title": file_stem,
-                    "text": chunk,
-                }
-            )
-        ret.append(curr_file)
-    return ret
-
-
-def process_chunk(i, chunk, model):
-    try:
-        return i, get_embedding_with_backoff(chunk["content"], model=model)
-    except Exception as e:
-        print(chunk)
-        raise e
-
-
-def process_concurrently(archival_database, model, concurrency=10):
-    embedding_data = [0 for _ in archival_database]
-    with ThreadPoolExecutor(max_workers=concurrency) as executor:
-        # Submit tasks to the executor
-        future_to_chunk = {executor.submit(process_chunk, i, chunk, model): i for i, chunk in enumerate(archival_database)}
-
-        # As each task completes, process the results
-        for future in tqdm(as_completed(future_to_chunk), total=len(archival_database), desc="Processing file chunks"):
-            i, result = future.result()
-            embedding_data[i] = result
-    return embedding_data
-
-
-def prepare_archival_index_from_files_compute_embeddings(
-    glob_pattern,
-    tkns_per_chunk=300,
-    model="gpt-4",
-    embeddings_model="text-embedding-ada-002",
-):
-    files = sorted(glob.glob(glob_pattern, recursive=True))
-    save_dir = os.path.join(
-        MEMGPT_DIR,
-        "archival_index_from_files_" + get_local_time().replace(" ", "_").replace(":", "_"),
-    )
-    os.makedirs(save_dir, exist_ok=True)
-    total_tokens = total_bytes(glob_pattern) / 3
-    price_estimate = total_tokens / 1000 * 0.0001
-    confirm = input(f"Computing embeddings over {len(files)} files. This will cost ~${price_estimate:.2f}. Continue? [y/n] ")
-    if confirm != "y":
-        raise Exception("embeddings were not computed")
-
-    # chunk the files, make embeddings
-    archival_database = chunk_files(files, tkns_per_chunk, model)
-    embedding_data = process_concurrently(archival_database, embeddings_model)
-    embeddings_file = os.path.join(save_dir, "embeddings.json")
-    with open(embeddings_file, "w") as f:
-        print(f"Saving embeddings to {embeddings_file}")
-        json.dump(embedding_data, f)
-
-    # make all_text.json
-    archival_storage_file = os.path.join(save_dir, "all_docs.jsonl")
-    chunks_by_file = chunk_files_for_jsonl(files, tkns_per_chunk, model)
-    with open(archival_storage_file, "w") as f:
-        print(f"Saving archival storage with preloaded files to {archival_storage_file}")
-        for c in chunks_by_file:
-            json.dump(c, f)
-            f.write("\n")
-
-    # make the faiss index
-    import faiss
-
-    index = faiss.IndexFlatL2(1536)
-    data = np.array(embedding_data).astype("float32")
-    try:
-        index.add(data)
-    except Exception as e:
-        print(data)
-        raise e
-    index_file = os.path.join(save_dir, "all_docs.index")
-    print(f"Saving faiss index {index_file}")
-    faiss.write_index(index, index_file)
-    return save_dir
-
-
-def read_database_as_list(database_name):
-    result_list = []
-
-    try:
-        conn = sqlite3.connect(database_name)
-        cursor = conn.cursor()
-        cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
-        table_names = cursor.fetchall()
-        for table_name in table_names:
-            cursor.execute(f"PRAGMA table_info({table_name[0]});")
-            schema_rows = cursor.fetchall()
-            columns = [row[1] for row in schema_rows]
-            cursor.execute(f"SELECT * FROM {table_name[0]};")
-            rows = cursor.fetchall()
-            result_list.append(f"Table: {table_name[0]}")  # Add table name to the list
-            schema_row = "\t".join(columns)
-            result_list.append(schema_row)
-            for row in rows:
-                data_row = "\t".join(map(str, row))
-                result_list.append(data_row)
-        conn.close()
-    except sqlite3.Error as e:
-        result_list.append(f"Error reading database: {str(e)}")
-    except Exception as e:
-        result_list.append(f"Error: {str(e)}")
-    return result_list
-
-
-def estimate_openai_cost(docs):
-    """Estimate OpenAI embedding cost
-
-    :param docs: Documents to be embedded
-    :type docs: List[Document]
-    :return: Estimated cost
-    :rtype: float
-    """
-    from llama_index import MockEmbedding
-    from llama_index.callbacks import CallbackManager, TokenCountingHandler
-    import tiktoken
-
-    embed_model = MockEmbedding(embed_dim=1536)
-
-    token_counter = TokenCountingHandler(tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode)
-
-    callback_manager = CallbackManager([token_counter])
-
-    set_global_service_context(ServiceContext.from_defaults(embed_model=embed_model, callback_manager=callback_manager))
-    index = VectorStoreIndex.from_documents(docs)
-
-    # estimate cost
-    cost = 0.0001 * token_counter.total_embedding_token_count / 1000
-    token_counter.reset_counts()
-    return cost
-
-
 def list_agent_config_files():
     """List all agent config files, ignoring dotfiles."""
     files = os.listdir(os.path.join(MEMGPT_DIR, "agents"))
diff --git a/poetry.lock b/poetry.lock
index ecb8d852..8184c389 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.7.0 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
 
 [[package]]
 name = "aiohttp"
@@ -566,40 +566,6 @@ files = [
 [package.extras]
 test = ["pytest (>=6)"]
 
-[[package]]
-name = "faiss-cpu"
-version = "1.7.4"
-description = "A library for efficient similarity search and clustering of dense vectors."
-optional = true
-python-versions = "*"
-files = [
-    {file = "faiss-cpu-1.7.4.tar.gz", hash = "sha256:265dc31b0c079bf4433303bf6010f73922490adff9188b915e2d3f5e9c82dd0a"},
-    {file = "faiss_cpu-1.7.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:50d4ebe7f1869483751c558558504f818980292a9b55be36f9a1ee1009d9a686"},
-    {file = "faiss_cpu-1.7.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7b1db7fae7bd8312aeedd0c41536bcd19a6e297229e1dce526bde3a73ab8c0b5"},
-    {file = "faiss_cpu-1.7.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17b7fa7194a228a84929d9e6619d0e7dbf00cc0f717e3462253766f5e3d07de8"},
-    {file = "faiss_cpu-1.7.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dca531952a2e3eac56f479ff22951af4715ee44788a3fe991d208d766d3f95f3"},
-    {file = "faiss_cpu-1.7.4-cp310-cp310-win_amd64.whl", hash = "sha256:7173081d605e74766f950f2e3d6568a6f00c53f32fd9318063e96728c6c62821"},
-    {file = "faiss_cpu-1.7.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d0bbd6f55d7940cc0692f79e32a58c66106c3c950cee2341b05722de9da23ea3"},
-    {file = "faiss_cpu-1.7.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e13c14280376100f143767d0efe47dcb32618f69e62bbd3ea5cd38c2e1755926"},
-    {file = "faiss_cpu-1.7.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c521cb8462f3b00c0c7dfb11caff492bb67816528b947be28a3b76373952c41d"},
-    {file = "faiss_cpu-1.7.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afdd9fe1141117fed85961fd36ee627c83fc3b9fd47bafb52d3c849cc2f088b7"},
-    {file = "faiss_cpu-1.7.4-cp311-cp311-win_amd64.whl", hash = "sha256:2ff7f57889ea31d945e3b87275be3cad5d55b6261a4e3f51c7aba304d76b81fb"},
-    {file = "faiss_cpu-1.7.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:eeaf92f27d76249fb53c1adafe617b0f217ab65837acf7b4ec818511caf6e3d8"},
-    {file = "faiss_cpu-1.7.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:102b1bd763e9b0c281ac312590af3eaf1c8b663ccbc1145821fe6a9f92b8eaaf"},
-    {file = "faiss_cpu-1.7.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5512da6707c967310c46ff712b00418b7ae28e93cb609726136e826e9f2f14fa"},
-    {file = "faiss_cpu-1.7.4-cp37-cp37m-win_amd64.whl", hash = "sha256:0c2e5b9d8c28c99f990e87379d5bbcc6c914da91ebb4250166864fd12db5755b"},
-    {file = "faiss_cpu-1.7.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:43f67f325393145d360171cd98786fcea6120ce50397319afd3bb78be409fb8a"},
-    {file = "faiss_cpu-1.7.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6a4e4af194b8fce74c4b770cad67ad1dd1b4673677fc169723e4c50ba5bd97a8"},
-    {file = "faiss_cpu-1.7.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:31bfb7b9cffc36897ae02a983e04c09fe3b8c053110a287134751a115334a1df"},
-    {file = "faiss_cpu-1.7.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:52d7de96abef2340c0d373c1f5cbc78026a3cebb0f8f3a5920920a00210ead1f"},
-    {file = "faiss_cpu-1.7.4-cp38-cp38-win_amd64.whl", hash = "sha256:699feef85b23c2c729d794e26ca69bebc0bee920d676028c06fd0e0becc15c7e"},
-    {file = "faiss_cpu-1.7.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:559a0133f5ed44422acb09ee1ac0acffd90c6666d1bc0d671c18f6e93ad603e2"},
-    {file = "faiss_cpu-1.7.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ea1d71539fe3dc0f1bed41ef954ca701678776f231046bf0ca22ccea5cf5bef6"},
-    {file = "faiss_cpu-1.7.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:12d45e0157024eb3249842163162983a1ac8b458f1a8b17bbf86f01be4585a99"},
-    {file = "faiss_cpu-1.7.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f0eab359e066d32c874f51a7d4bf6440edeec068b7fe47e6d803c73605a8b4c"},
-    {file = "faiss_cpu-1.7.4-cp39-cp39-win_amd64.whl", hash = "sha256:98459ceeeb735b9df1a5b94572106ffe0a6ce740eb7e4626715dd218657bb4dc"},
-]
-
 [[package]]
 name = "filelock"
 version = "3.13.1"
@@ -1046,13 +1012,13 @@ tests = ["pandas (>=1.4)", "pytest", "pytest-asyncio", "pytest-mock", "requests"
 
 [[package]]
 name = "langchain"
-version = "0.0.342"
+version = "0.0.343"
 description = "Building applications with LLMs through composability"
 optional = false
 python-versions = ">=3.8.1,<4.0"
 files = [
-    {file = "langchain-0.0.342-py3-none-any.whl", hash = "sha256:83c37898226666e0176d093f57fa49e176486608ef4c617a65aadf0b038ba0ec"},
-    {file = "langchain-0.0.342.tar.gz", hash = "sha256:06341ee0b034847cbcea4b40a0a26b270abb6fd1237437735187c44d30a7a24d"},
+    {file = "langchain-0.0.343-py3-none-any.whl", hash = "sha256:1959336b6076066bf233dd99dce44be2e9adccb53d799bff92c653098178b347"},
+    {file = "langchain-0.0.343.tar.gz", hash = "sha256:166924d771a463009277f688f6dfc829a3af2d9cd5b41a64a7a6bd7860280e81"},
 ]
 
 [package.dependencies]
@@ -1071,14 +1037,14 @@ SQLAlchemy = ">=1.4,<3"
 tenacity = ">=8.1.0,<9.0.0"
 
 [package.extras]
-all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-textanalytics (>=5.3.0,<6.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.8.3,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=1.2.4,<2.0.0)", "momento (>=1.13.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<4)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "openai (<2)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.6.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"]
+all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-textanalytics (>=5.3.0,<6.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.8.3,<4.0.0)", "dgml-utils (>=0.3.0,<0.4.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=1.2.4,<2.0.0)", "momento (>=1.13.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<4)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "openai (<2)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.6.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"]
 azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-textanalytics (>=5.3.0,<6.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0b8)", "openai (<2)"]
 clarifai = ["clarifai (>=9.1.0)"]
 cli = ["typer (>=0.9.0,<0.10.0)"]
 cohere = ["cohere (>=4,<5)"]
 docarray = ["docarray[hnswlib] (>=0.32.0,<0.33.0)"]
 embeddings = ["sentence-transformers (>=2,<3)"]
-extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "dashvector (>=1.0.1,<2.0.0)", "databricks-vectorsearch (>=0.21,<0.22)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.6.0,<0.7.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "javelin-sdk (>=0.1.8,<0.2.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "lxml (>=4.9.2,<5.0.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "msal (>=1.25.0,<2.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "openai (<2)", "openapi-pydantic (>=0.3.2,<0.4.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "upstash-redis (>=0.15.0,<0.16.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"]
+extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "dashvector (>=1.0.1,<2.0.0)", "databricks-vectorsearch (>=0.21,<0.22)", "dgml-utils (>=0.3.0,<0.4.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.6.0,<0.7.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "javelin-sdk (>=0.1.8,<0.2.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "lxml (>=4.9.2,<5.0.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "msal (>=1.25.0,<2.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "openai (<2)", "openapi-pydantic (>=0.3.2,<0.4.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "upstash-redis (>=0.15.0,<0.16.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"]
 javascript = ["esprima (>=4.0.1,<5.0.0)"]
 llms = ["clarifai (>=9.1.0)", "cohere (>=4,<5)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (<2)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)"]
 openai = ["openai (<2)", "tiktoken (>=0.3.2,<0.6.0)"]
@@ -2344,64 +2310,6 @@ benchmarks = ["pytest-benchmark"]
 tests = ["duckdb", "ml_dtypes", "pandas (>=1.4,<2.1)", "polars[pandas,pyarrow]", "pytest", "semver", "tensorflow", "tqdm"]
 torch = ["torch"]
 
-[[package]]
-name = "pymupdf"
-version = "1.23.6"
-description = "A high performance Python library for data extraction, analysis, conversion & manipulation of PDF (and other) documents."
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "PyMuPDF-1.23.6-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:c4eb71b88a22c1008f764b3121b36a9d25340f9920b870508356050a365d9ca1"},
-    {file = "PyMuPDF-1.23.6-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:3ce2d3678dbf822cff213b1902f2e59756313e543efd516a2b4f15bb0353bd6c"},
-    {file = "PyMuPDF-1.23.6-cp310-none-manylinux2014_aarch64.whl", hash = "sha256:2e27857a15c8a810d0b66455b8c8a79013640b6267a9b4ea808a5fe1f47711f2"},
-    {file = "PyMuPDF-1.23.6-cp310-none-manylinux2014_x86_64.whl", hash = "sha256:5cd05700c8f18c9dafef63ac2ed3b1099ca06017ca0c32deea13093cea1b8671"},
-    {file = "PyMuPDF-1.23.6-cp310-none-win32.whl", hash = "sha256:951d280c1daafac2fd6a664b031f7f98b27eb2def55d39c92a19087bd8041c5d"},
-    {file = "PyMuPDF-1.23.6-cp310-none-win_amd64.whl", hash = "sha256:19d1711d5908c4527ad2deef5af2d066649f3f9a12950faf30be5f7251d18abc"},
-    {file = "PyMuPDF-1.23.6-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:3f0f9b76bc4f039e7587003cbd40684d93a98441549dd033cab38ca07d61988d"},
-    {file = "PyMuPDF-1.23.6-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:e047571d799b30459ad7ee0bc6e68900a7f6b928876f956c976f279808814e72"},
-    {file = "PyMuPDF-1.23.6-cp311-none-manylinux2014_aarch64.whl", hash = "sha256:1cbcf05c06f314fdf3042ceee674e9a0ac7fae598347d5442e2138c6046d4e82"},
-    {file = "PyMuPDF-1.23.6-cp311-none-manylinux2014_x86_64.whl", hash = "sha256:e33f8ec5ba7265fe78b30332840b8f454184addfa79f9c27f160f19789aa5ffd"},
-    {file = "PyMuPDF-1.23.6-cp311-none-win32.whl", hash = "sha256:2c141f33e2733e48de8524dfd2de56d889feef0c7773b20a8cd216c03ab24793"},
-    {file = "PyMuPDF-1.23.6-cp311-none-win_amd64.whl", hash = "sha256:8fd9c4ee1dd4744a515b9190d8ba9133348b0d94c362293ed77726aa1c13b0a6"},
-    {file = "PyMuPDF-1.23.6-cp312-none-macosx_10_9_x86_64.whl", hash = "sha256:4d06751d5cd213e96f84f2faaa71a51cf4d641851e07579247ca1190121f173b"},
-    {file = "PyMuPDF-1.23.6-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:526b26a5207e923aab65877ad305644402851823a352cb92d362053426899354"},
-    {file = "PyMuPDF-1.23.6-cp312-none-manylinux2014_aarch64.whl", hash = "sha256:0f852d125defc26716878b1796f4d68870e9065041d00cf46bde317fd8d30e68"},
-    {file = "PyMuPDF-1.23.6-cp312-none-manylinux2014_x86_64.whl", hash = "sha256:5bdf7020b90987412381acc42427dd1b7a03d771ee9ec273de003e570164ec1a"},
-    {file = "PyMuPDF-1.23.6-cp312-none-win32.whl", hash = "sha256:e2d64799c6d9a3735be9e162a5d11061c0b7fbcb1e5fc7446e0993d0f815a93a"},
-    {file = "PyMuPDF-1.23.6-cp312-none-win_amd64.whl", hash = "sha256:c8ea81964c1433ea163ad4b53c56053a87a9ef6e1bd7a879d4d368a3988b60d1"},
-    {file = "PyMuPDF-1.23.6-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:761501a4965264e81acdd8f2224f993020bf24474e9b34fcdb5805a6826eda1c"},
-    {file = "PyMuPDF-1.23.6-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:fd8388e82b6045807d19addf310d8119d32908e89f76cc8bbf8cf1ec36fce947"},
-    {file = "PyMuPDF-1.23.6-cp38-none-manylinux2014_aarch64.whl", hash = "sha256:4ac9673a6d6ee7e80cb242dacb43f9ca097b502d9c5e44687dbdffc2bce7961a"},
-    {file = "PyMuPDF-1.23.6-cp38-none-manylinux2014_x86_64.whl", hash = "sha256:6e319c1f49476e07b9a12017c2d031687617713f8a46b7adcec03c636ed04607"},
-    {file = "PyMuPDF-1.23.6-cp38-none-win32.whl", hash = "sha256:1103eea4ab727e32b9cb93347b35f71562033018c333a7f3a17d115e980fea4a"},
-    {file = "PyMuPDF-1.23.6-cp38-none-win_amd64.whl", hash = "sha256:991a37e1cba43775ce094da87cf0bf72172a5532a09644003276bc8bfdfe9f1a"},
-    {file = "PyMuPDF-1.23.6-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:57725e15872f7ab67a9fb3e06e5384d1047b2121e85755c93a6d4266d3ca8983"},
-    {file = "PyMuPDF-1.23.6-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:224c341fe254adda97c8f06a4c5838cdbcf609fa89e70b1fb179752533378f2f"},
-    {file = "PyMuPDF-1.23.6-cp39-none-manylinux2014_aarch64.whl", hash = "sha256:271bdf6059bb8347f9c9c6b721329bd353a933681b1fc62f43241b410e7ab7ae"},
-    {file = "PyMuPDF-1.23.6-cp39-none-manylinux2014_x86_64.whl", hash = "sha256:57e22bea69690450197b34dcde16bd9fe0265ac4425b4033535ccc5c044246fb"},
-    {file = "PyMuPDF-1.23.6-cp39-none-win32.whl", hash = "sha256:2885a26220a32fb45ea443443b72194bb7107d6862d8d546b59e4ad0c8a1f2c9"},
-    {file = "PyMuPDF-1.23.6-cp39-none-win_amd64.whl", hash = "sha256:361cab1be45481bd3dc4e00ec82628ebc189b4f4b6fd9bd78a00cfeed54e0034"},
-    {file = "PyMuPDF-1.23.6.tar.gz", hash = "sha256:618b8e884190ac1cca9df1c637f87669d2d532d421d4ee7e4763c848dc4f3a1e"},
-]
-
-[package.dependencies]
-PyMuPDFb = "1.23.6"
-
-[[package]]
-name = "pymupdfb"
-version = "1.23.6"
-description = "MuPDF shared libraries for PyMuPDF."
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "PyMuPDFb-1.23.6-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:e5af77580aad3d1103aeec57009d156bfca429cecda14a17c573fcbe97bafb30"},
-    {file = "PyMuPDFb-1.23.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:9925816cbe3e05e920f9be925e5752c2eef42b793885b62075bb0f6a69178598"},
-    {file = "PyMuPDFb-1.23.6-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:009e2cff166059e13bf71f93919e688f46b8fc11d122433574cfb0cc9134690e"},
-    {file = "PyMuPDFb-1.23.6-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7132b30e6ad6ff2013344e3a481b2287fe0be3710d80694807dd6e0d8635f085"},
-    {file = "PyMuPDFb-1.23.6-py3-none-win32.whl", hash = "sha256:9d24ddadc204e895bee5000ddc7507c801643548e59f5a56aad6d32981d17eeb"},
-    {file = "PyMuPDFb-1.23.6-py3-none-win_amd64.whl", hash = "sha256:7bef75988e6979b10ca804cf9487f817aae43b0fff1c6e315b3b9ee0cf1cc32f"},
-]
-
 [[package]]
 name = "pytest"
 version = "7.4.3"
@@ -3885,11 +3793,10 @@ multidict = ">=4.0"
 autogen = ["pyautogen"]
 dev = ["black", "datasets", "pre-commit", "pytest"]
 lancedb = ["lancedb"]
-legacy = ["faiss-cpu", "numpy"]
 local = ["huggingface-hub", "torch", "transformers"]
 postgres = ["pg8000", "pgvector", "psycopg", "psycopg-binary", "psycopg2-binary"]
 
 [metadata]
 lock-version = "2.0"
 python-versions = "<3.12,>=3.9"
-content-hash = "a1d04a1b10676fcb84fbce5440800706a2ae14cbe2a10bb7d59667b7c36b7709"
+content-hash = "61614071518e8b09eb7396b9f56caef3c08bd6c3c587c0048569d887e3d85601"
diff --git a/pyproject.toml b/pyproject.toml
index d06b2793..ddb32cf8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,12 +22,7 @@ memgpt = "memgpt.main:app"
 python = "<3.12,>=3.9"
 typer = {extras = ["all"], version = "^0.9.0"}
 questionary = "^2.0.1"
-demjson3 = "^3.0.6"
-numpy = "^1.26.1"
 pytz = "^2023.3.post1"
-faiss-cpu = { version = "^1.7.4", optional = true }
-tiktoken = "^0.5.1"
-pymupdf = "^1.23.5"
 tqdm = "^4.66.1"
 black = { version = "^23.10.1", optional = true }
 pytest = { version = "^7.4.3", optional = true }
@@ -49,10 +44,12 @@ docstring-parser = "^0.15"
 lancedb = {version = "^0.3.3", optional = true}
 httpx = "^0.25.2"
 pyautogen = {version = "0.1.14", optional = true}
+numpy = "^1.26.2"
+demjson3 = "^3.0.6"
+tiktoken = "^0.5.1"
 python-box = "^7.1.1"
 
 [tool.poetry.extras]
-legacy = ["faiss-cpu", "numpy"]
 local = ["torch", "huggingface-hub", "transformers"]
 lancedb = ["lancedb"]
 postgres = ["pgvector", "psycopg", "psycopg-binary", "psycopg2-binary", "pg8000"]
diff --git a/tests/test_cli.py b/tests/test_cli.py
index c0aa8287..a7a7774c 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -43,4 +43,3 @@ def test_save_load():
 if __name__ == "__main__":
     test_configure_memgpt()
     test_save_load()
-    # test_legacy_cli_sequence()
diff --git a/tests/test_questionary.py b/tests/test_questionary.py
deleted file mode 100644
index 4f3cdb15..00000000
--- a/tests/test_questionary.py
+++ /dev/null
@@ -1,58 +0,0 @@
-import subprocess
-import sys
-
-subprocess.check_call([sys.executable, "-m", "pip", "install", "pexpect"])
-import pexpect
-
-
-TIMEOUT = 30  # seconds
-
-
-def test_legacy_cli_sequence():
-    # Start the CLI process
-    child = pexpect.spawn("memgpt --first --strip_ui")
-
-    child.expect("Continue with legacy CLI?", timeout=TIMEOUT)
-    # Send 'Y' followed by newline
-    child.sendline("Y")
-
-    # Since .memgpt is empty, should jump immediately to "Which model?"
-    child.expect("Which model would you like to use?", timeout=TIMEOUT)
-    child.sendline()
-
-    child.expect("Which persona would you like MemGPT to use?", timeout=TIMEOUT)
-    child.sendline()
-
-    child.expect("Which user would you like to use?", timeout=TIMEOUT)
-    child.sendline()
-
-    child.expect("Would you like to preload anything into MemGPT's archival memory?", timeout=TIMEOUT)
-    child.sendline()  # Default No
-
-    child.expect("Testing messaging functionality", timeout=TIMEOUT)
-    child.expect("Enter your message", timeout=TIMEOUT)
-    child.sendline()  # Send empty message
-
-    child.expect("Try again!", timeout=TIMEOUT)  # Empty message
-    child.sendline("/save")
-
-    child.expect("Saved checkpoint", timeout=TIMEOUT)
-    child.sendline("/load")
-
-    child.expect("Loaded persistence manager", timeout=TIMEOUT)
-
-    child.sendline("/dump")  # just testing no-crash
-    # child.expect("", timeout=TIMEOUT)
-    child.sendline("/dump 3")  # just testing no-crash
-
-    child.sendline("/exit")
-    child.expect("Finished.", timeout=TIMEOUT)
-
-    child.expect(pexpect.EOF, timeout=TIMEOUT)  # Wait for child to exit
-    child.close()
-    assert child.isalive() is False, "CLI should have terminated."
-    assert child.exitstatus == 0, "CLI did not exit cleanly."
-
-
-if __name__ == "__main__":
-    test_legacy_cli_sequence()
diff --git a/tests/test_websocket_interface.py b/tests/test_websocket_interface.py
index 30b7d9f8..e966cb05 100644
--- a/tests/test_websocket_interface.py
+++ b/tests/test_websocket_interface.py
@@ -1,18 +1,12 @@
-import argparse
-import os
-import subprocess
-import sys
-
 import pytest
 from unittest.mock import Mock, AsyncMock, MagicMock
 
 from memgpt.config import MemGPTConfig, AgentConfig
 from memgpt.server.websocket_interface import SyncWebSocketInterface
 import memgpt.presets as presets
-import memgpt.personas.personas as personas
-import memgpt.humans.humans as humans
+import memgpt.utils as utils
 import memgpt.system as system
-from memgpt.persistence_manager import InMemoryStateManager
+from memgpt.persistence_manager import LocalStateManager
 
 
 # def test_websockets():
@@ -59,17 +53,20 @@ async def test_websockets():
     # Register the mock websocket as a client
     ws_interface.register_client(mock_websocket)
 
-    # Mock the persistence manager
-    persistence_manager = InMemoryStateManager()
-
     # Create an agent and hook it up to the WebSocket interface
     config = MemGPTConfig()
+
+    # Mock the persistence manager
+    # create agents with defaults
+    agent_config = AgentConfig(persona="sam_pov", human="basic", model="gpt-4-1106-preview")
+    persistence_manager = LocalStateManager(agent_config=agent_config)
+
     memgpt_agent = presets.use_preset(
         presets.DEFAULT_PRESET,
         config,  # no agent config to provide
         "gpt-4-1106-preview",
-        personas.get_persona_text("sam_pov"),
-        humans.get_human_text("basic"),
+        utils.get_persona_text("sam_pov"),
+        utils.get_human_text("basic"),
         ws_interface,
         persistence_manager,
     )