From 388a1262b1aa93a607cda6b2318b3274c7fa61aa Mon Sep 17 00:00:00 2001
From: Vivian Fang <hi@vivi.sh>
Date: Sun, 22 Oct 2023 20:46:02 -0700
Subject: [PATCH 01/37] run main.py though black formatter

---
 main.py | 221 +++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 164 insertions(+), 57 deletions(-)

diff --git a/main.py b/main.py
index 6e031be7..cb8c5673 100644
--- a/main.py
+++ b/main.py
@@ -8,6 +8,7 @@ import pickle
 import readline
 
 from rich.console import Console
+
 console = Console()
 
 import interface  # for printing to terminal
@@ -18,25 +19,68 @@ import memgpt.presets as presets
 import memgpt.constants as constants
 import memgpt.personas.personas as personas
 import memgpt.humans.humans as humans
-from memgpt.persistence_manager import InMemoryStateManager, InMemoryStateManagerWithPreloadedArchivalMemory, InMemoryStateManagerWithFaiss
+from memgpt.persistence_manager import (
+    InMemoryStateManager,
+    InMemoryStateManagerWithPreloadedArchivalMemory,
+    InMemoryStateManagerWithFaiss,
+)
 
 FLAGS = flags.FLAGS
 flags.DEFINE_string("persona", default=None, required=False, help="Specify persona")
 flags.DEFINE_string("human", default=None, required=False, help="Specify human")
-flags.DEFINE_string("model", default=constants.DEFAULT_MEMGPT_MODEL, required=False, help="Specify the LLM model")
-flags.DEFINE_boolean("first", default=False, required=False, help="Use -first to send the first message in the sequence")
-flags.DEFINE_boolean("debug", default=False, required=False, help="Use -debug to enable debugging output")
-flags.DEFINE_boolean("no_verify", default=False, required=False, help="Bypass message verification")
-flags.DEFINE_string("archival_storage_faiss_path", default="", required=False, help="Specify archival storage with FAISS index to load (a folder with a .index and .json describing documents to be loaded)")
-flags.DEFINE_string("archival_storage_files", default="", required=False, help="Specify files to pre-load into archival memory (glob pattern)")
-flags.DEFINE_string("archival_storage_files_compute_embeddings", default="", required=False, help="Specify files to pre-load into archival memory (glob pattern), and compute embeddings over them")
-flags.DEFINE_string("archival_storage_sqldb", default="", required=False, help="Specify SQL database to pre-load into archival memory")
+flags.DEFINE_string(
+    "model",
+    default=constants.DEFAULT_MEMGPT_MODEL,
+    required=False,
+    help="Specify the LLM model",
+)
+flags.DEFINE_boolean(
+    "first",
+    default=False,
+    required=False,
+    help="Use -first to send the first message in the sequence",
+)
+flags.DEFINE_boolean(
+    "debug", default=False, required=False, help="Use -debug to enable debugging output"
+)
+flags.DEFINE_boolean(
+    "no_verify", default=False, required=False, help="Bypass message verification"
+)
+flags.DEFINE_string(
+    "archival_storage_faiss_path",
+    default="",
+    required=False,
+    help="Specify archival storage with FAISS index to load (a folder with a .index and .json describing documents to be loaded)",
+)
+flags.DEFINE_string(
+    "archival_storage_files",
+    default="",
+    required=False,
+    help="Specify files to pre-load into archival memory (glob pattern)",
+)
+flags.DEFINE_string(
+    "archival_storage_files_compute_embeddings",
+    default="",
+    required=False,
+    help="Specify files to pre-load into archival memory (glob pattern), and compute embeddings over them",
+)
+flags.DEFINE_string(
+    "archival_storage_sqldb",
+    default="",
+    required=False,
+    help="Specify SQL database to pre-load into archival memory",
+)
 # Support for Azure OpenAI (see: https://github.com/openai/openai-python#microsoft-azure-endpoints)
-flags.DEFINE_boolean("use_azure_openai", default=False, required=False, help="Use Azure OpenAI (requires additional environment variables)")
+flags.DEFINE_boolean(
+    "use_azure_openai",
+    default=False,
+    required=False,
+    help="Use Azure OpenAI (requires additional environment variables)",
+)
 
 
 def clear_line():
-    if os.name == 'nt':  # for windows
+    if os.name == "nt":  # for windows
         console.print("\033[A\033[K", end="")
     else:  # for linux
         sys.stdout.write("\033[2K\033[G")
@@ -44,9 +88,9 @@ def clear_line():
 
 
 def save(memgpt_agent):
-    filename = utils.get_local_time().replace(' ', '_').replace(':', '_')
+    filename = utils.get_local_time().replace(" ", "_").replace(":", "_")
     filename = f"{filename}.json"
-    filename = os.path.join('saved_state', filename)
+    filename = os.path.join("saved_state", filename)
     try:
         if not os.path.exists("saved_state"):
             os.makedirs("saved_state")
@@ -56,7 +100,7 @@ def save(memgpt_agent):
         print(f"Saving state to {filename} failed with: {e}")
 
     # save the persistence manager too
-    filename = filename.replace('.json', '.persistence.pickle')
+    filename = filename.replace(".json", ".persistence.pickle")
     try:
         memgpt_agent.persistence_manager.save(filename)
         print(f"Saved persistence manager to: {filename}")
@@ -66,8 +110,8 @@ def save(memgpt_agent):
 
 def load(memgpt_agent, filename):
     if filename is not None:
-        if filename[-5:] != '.json':
-            filename += '.json'
+        if filename[-5:] != ".json":
+            filename += ".json"
         try:
             memgpt_agent.load_from_json_file_inplace(filename)
             print(f"Loaded checkpoint {filename}")
@@ -75,8 +119,12 @@ def load(memgpt_agent, filename):
             print(f"Loading {filename} failed with: {e}")
     else:
         # Load the latest file
-        print(f"/load warning: no checkpoint specified, loading most recent checkpoint instead")
-        json_files = glob.glob("saved_state/*.json")  # This will list all .json files in the current directory.
+        print(
+            f"/load warning: no checkpoint specified, loading most recent checkpoint instead"
+        )
+        json_files = glob.glob(
+            "saved_state/*.json"
+        )  # This will list all .json files in the current directory.
 
         # Check if there are any json files.
         if not json_files:
@@ -91,12 +139,16 @@ def load(memgpt_agent, filename):
                 print(f"Loading {filename} failed with: {e}")
 
     # need to load persistence manager too
-    filename = filename.replace('.json', '.persistence.pickle')
+    filename = filename.replace(".json", ".persistence.pickle")
     try:
-        memgpt_agent.persistence_manager = InMemoryStateManager.load(filename)  # TODO(fixme):for different types of persistence managers that require different load/save methods
+        memgpt_agent.persistence_manager = InMemoryStateManager.load(
+            filename
+        )  # TODO(fixme):for different types of persistence managers that require different load/save methods
         print(f"Loaded persistence manager from {filename}")
     except Exception as e:
-        print(f"/load warning: loading persistence manager from {filename} failed with: {e}")
+        print(
+            f"/load warning: loading persistence manager from {filename} failed with: {e}"
+        )
 
 
 async def main():
@@ -108,39 +160,63 @@ async def main():
 
     # Azure OpenAI support
     if FLAGS.use_azure_openai:
-        azure_openai_key = os.getenv('AZURE_OPENAI_KEY')
-        azure_openai_endpoint = os.getenv('AZURE_OPENAI_ENDPOINT')
-        azure_openai_version = os.getenv('AZURE_OPENAI_VERSION')
-        azure_openai_deployment = os.getenv('AZURE_OPENAI_DEPLOYMENT')
-        if None in [azure_openai_key, azure_openai_endpoint, azure_openai_version, azure_openai_deployment]:
-            print(f"Error: missing Azure OpenAI environment variables. Please see README section on Azure.")
+        azure_openai_key = os.getenv("AZURE_OPENAI_KEY")
+        azure_openai_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
+        azure_openai_version = os.getenv("AZURE_OPENAI_VERSION")
+        azure_openai_deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT")
+        if None in [
+            azure_openai_key,
+            azure_openai_endpoint,
+            azure_openai_version,
+            azure_openai_deployment,
+        ]:
+            print(
+                f"Error: missing Azure OpenAI environment variables. Please see README section on Azure."
+            )
             return
 
         import openai
+
         openai.api_type = "azure"
         openai.api_key = azure_openai_key
         openai.api_base = azure_openai_endpoint
         openai.api_version = azure_openai_version
         # deployment gets passed into chatcompletion
     else:
-        azure_openai_deployment = os.getenv('AZURE_OPENAI_DEPLOYMENT')
+        azure_openai_deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT")
         if azure_openai_deployment is not None:
-            print(f"Error: AZURE_OPENAI_DEPLOYMENT should not be set if --use_azure_openai is False")
+            print(
+                f"Error: AZURE_OPENAI_DEPLOYMENT should not be set if --use_azure_openai is False"
+            )
             return
 
     if FLAGS.model != constants.DEFAULT_MEMGPT_MODEL:
-      interface.important_message(f"Warning - you are running MemGPT with {FLAGS.model}, which is not officially supported (yet). Expect bugs!")
+        interface.important_message(
+            f"Warning - you are running MemGPT with {FLAGS.model}, which is not officially supported (yet). Expect bugs!"
+        )
 
     if FLAGS.archival_storage_faiss_path:
-        index, archival_database = utils.prepare_archival_index(FLAGS.archival_storage_faiss_path)
+        index, archival_database = utils.prepare_archival_index(
+            FLAGS.archival_storage_faiss_path
+        )
         persistence_manager = InMemoryStateManagerWithFaiss(index, archival_database)
     elif FLAGS.archival_storage_files:
-        archival_database = utils.prepare_archival_index_from_files(FLAGS.archival_storage_files)
+        archival_database = utils.prepare_archival_index_from_files(
+            FLAGS.archival_storage_files
+        )
         print(f"Preloaded {len(archival_database)} chunks into archival memory.")
-        persistence_manager = InMemoryStateManagerWithPreloadedArchivalMemory(archival_database)
+        persistence_manager = InMemoryStateManagerWithPreloadedArchivalMemory(
+            archival_database
+        )
     elif FLAGS.archival_storage_files_compute_embeddings:
-        faiss_save_dir = await utils.prepare_archival_index_from_files_compute_embeddings(FLAGS.archival_storage_files_compute_embeddings)
-        interface.important_message(f"To avoid computing embeddings next time, replace --archival_storage_files_compute_embeddings={FLAGS.archival_storage_files_compute_embeddings} with\n\t --archival_storage_faiss_path={faiss_save_dir} (if your files haven't changed).")
+        faiss_save_dir = (
+            await utils.prepare_archival_index_from_files_compute_embeddings(
+                FLAGS.archival_storage_files_compute_embeddings
+            )
+        )
+        interface.important_message(
+            f"To avoid computing embeddings next time, replace --archival_storage_files_compute_embeddings={FLAGS.archival_storage_files_compute_embeddings} with\n\t --archival_storage_faiss_path={faiss_save_dir} (if your files haven't changed)."
+        )
         index, archival_database = utils.prepare_archival_index(faiss_save_dir)
         persistence_manager = InMemoryStateManagerWithFaiss(index, archival_database)
     else:
@@ -148,13 +224,23 @@ async def main():
 
     # Moved defaults out of FLAGS so that we can dynamically select the default persona based on model
     chosen_human = FLAGS.human if FLAGS.human is not None else humans.DEFAULT
-    chosen_persona = FLAGS.persona if FLAGS.persona is not None else (personas.GPT35_DEFAULT if 'gpt-3.5' in FLAGS.model else personas.DEFAULT)
+    chosen_persona = (
+        FLAGS.persona
+        if FLAGS.persona is not None
+        else (personas.GPT35_DEFAULT if "gpt-3.5" in FLAGS.model else personas.DEFAULT)
+    )
 
-    memgpt_agent = presets.use_preset(presets.DEFAULT, FLAGS.model, personas.get_persona_text(chosen_persona), humans.get_human_text(chosen_human), interface, persistence_manager)
+    memgpt_agent = presets.use_preset(
+        presets.DEFAULT,
+        FLAGS.model,
+        personas.get_persona_text(chosen_persona),
+        humans.get_human_text(chosen_human),
+        interface,
+        persistence_manager,
+    )
     print_messages = interface.print_messages
     await print_messages(memgpt_agent.messages)
 
-
     counter = 0
     user_input = None
     skip_next_user_input = False
@@ -179,18 +265,19 @@ async def main():
         return
 
     if not USER_GOES_FIRST:
-        console.input('[bold cyan]Hit enter to begin (will request first MemGPT message)[/bold cyan]')
+        console.input(
+            "[bold cyan]Hit enter to begin (will request first MemGPT message)[/bold cyan]"
+        )
         clear_line()
         print()
 
     while True:
         if not skip_next_user_input and (counter > 0 or USER_GOES_FIRST):
-
             # Ask for user input
             user_input = console.input("[bold cyan]Enter your message:[/bold cyan] ")
             clear_line()
 
-            if user_input.startswith('!'):
+            if user_input.startswith("!"):
                 print(f"Commands for CLI begin with '/' not '!'")
                 continue
 
@@ -201,8 +288,7 @@ async def main():
 
             # Handle CLI commands
             # Commands to not get passed as input to MemGPT
-            if user_input.startswith('/'):
-
+            if user_input.startswith("/"):
                 if user_input == "//":
                     print("Entering multiline mode, type // when done")
                     user_input_list = []
@@ -215,7 +301,9 @@ async def main():
                             user_input_list.append(user_input)
 
                     # pass multiline inputs to MemGPT
-                    user_message = system.package_user_message("\n".join(user_input_list))
+                    user_message = system.package_user_message(
+                        "\n".join(user_input_list)
+                    )
 
                 elif user_input.lower() == "/exit":
                     # autosave
@@ -223,12 +311,14 @@ async def main():
                     break
 
                 elif user_input.lower() == "/savechat":
-                    filename = utils.get_local_time().replace(' ', '_').replace(':', '_')
+                    filename = (
+                        utils.get_local_time().replace(" ", "_").replace(":", "_")
+                    )
                     filename = f"{filename}.pkl"
                     try:
                         if not os.path.exists("saved_chats"):
                             os.makedirs("saved_chats")
-                        with open(os.path.join('saved_chats', filename), 'wb') as f:
+                        with open(os.path.join("saved_chats", filename), "wb") as f:
                             pickle.dump(memgpt_agent.messages, f)
                             print(f"Saved messages to: {filename}")
                     except Exception as e:
@@ -239,7 +329,9 @@ async def main():
                     save(memgpt_agent=memgpt_agent)
                     continue
 
-                elif user_input.lower() == "/load" or user_input.lower().startswith("/load "):
+                elif user_input.lower() == "/load" or user_input.lower().startswith(
+                    "/load "
+                ):
                     command = user_input.strip().split()
                     filename = command[1] if len(command) > 1 else None
                     load(memgpt_agent=memgpt_agent, filename=filename)
@@ -265,17 +357,23 @@ async def main():
                     continue
 
                 elif user_input.lower() == "/model":
-                    if memgpt_agent.model == 'gpt-4':
-                        memgpt_agent.model = 'gpt-3.5-turbo'
-                    elif memgpt_agent.model == 'gpt-3.5-turbo':
-                        memgpt_agent.model = 'gpt-4'
+                    if memgpt_agent.model == "gpt-4":
+                        memgpt_agent.model = "gpt-3.5-turbo"
+                    elif memgpt_agent.model == "gpt-3.5-turbo":
+                        memgpt_agent.model = "gpt-4"
                     print(f"Updated model to:\n{str(memgpt_agent.model)}")
                     continue
 
-                elif user_input.lower() == "/pop" or user_input.lower().startswith("/pop "):
+                elif user_input.lower() == "/pop" or user_input.lower().startswith(
+                    "/pop "
+                ):
                     # Check if there's an additional argument that's an integer
                     command = user_input.strip().split()
-                    amount = int(command[1]) if len(command) > 1 and command[1].isdigit() else 2
+                    amount = (
+                        int(command[1])
+                        if len(command) > 1 and command[1].isdigit()
+                        else 2
+                    )
                     print(f"Popping last {amount} messages from stack")
                     for _ in range(min(amount, len(memgpt_agent.messages))):
                         memgpt_agent.messages.pop()
@@ -304,14 +402,23 @@ async def main():
         skip_next_user_input = False
 
         with console.status("[bold cyan]Thinking...") as status:
-            new_messages, heartbeat_request, function_failed, token_warning = await memgpt_agent.step(user_message, first_message=False, skip_verify=FLAGS.no_verify)
+            (
+                new_messages,
+                heartbeat_request,
+                function_failed,
+                token_warning,
+            ) = await memgpt_agent.step(
+                user_message, first_message=False, skip_verify=FLAGS.no_verify
+            )
 
             # Skip user inputs if there's a memory warning, function execution failed, or the agent asked for control
             if token_warning:
                 user_message = system.get_token_limit_warning()
                 skip_next_user_input = True
             elif function_failed:
-                user_message = system.get_heartbeat(constants.FUNC_FAILED_HEARTBEAT_MESSAGE)
+                user_message = system.get_heartbeat(
+                    constants.FUNC_FAILED_HEARTBEAT_MESSAGE
+                )
                 skip_next_user_input = True
             elif heartbeat_request:
                 user_message = system.get_heartbeat(constants.REQ_HEARTBEAT_MESSAGE)
@@ -322,10 +429,10 @@ async def main():
     print("Finished.")
 
 
-if __name__ ==  '__main__':
+if __name__ == "__main__":
 
     def run(argv):
         loop = asyncio.get_event_loop()
         loop.run_until_complete(main())
 
-    app.run(run)
\ No newline at end of file
+    app.run(run)

From 0a0fd1c0e44b3dfe8bf28ee1d93de768151cdfeb Mon Sep 17 00:00:00 2001
From: Surav Shrestha <suravshresth@gmail.com>
Date: Mon, 23 Oct 2023 11:11:18 +0545
Subject: [PATCH 02/37] fix: typo in memgpt/autogen/memgpt_agent.py

---
 memgpt/autogen/memgpt_agent.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/memgpt/autogen/memgpt_agent.py b/memgpt/autogen/memgpt_agent.py
index a1dce92f..0c22beb1 100644
--- a/memgpt/autogen/memgpt_agent.py
+++ b/memgpt/autogen/memgpt_agent.py
@@ -80,7 +80,7 @@ class MemGPTAgent(ConversableAgent):
     def pretty_concat(messages):
         """AutoGen expects a single response, but MemGPT may take many steps.
         
-        To accomadate AutoGen, concatenate all of MemGPT's steps into one and return as a single message.
+        To accommodate AutoGen, concatenate all of MemGPT's steps into one and return as a single message.
         """
         ret = {
             'role': 'assistant',

From 0705d2464d5800a2351902d1529a151589b865cd Mon Sep 17 00:00:00 2001
From: Vivian Fang <hi@vivi.sh>
Date: Sun, 22 Oct 2023 22:29:15 -0700
Subject: [PATCH 03/37] cli improvements using questionary

---
 .gitignore       |   3 +
 config.py        | 280 +++++++++++++++++++++++++++++++++++++++++++++++
 main.py          | 173 +++++++++++++++++++----------
 requirements.txt |   1 +
 4 files changed, 401 insertions(+), 56 deletions(-)
 create mode 100644 config.py

diff --git a/.gitignore b/.gitignore
index 3ba9ba08..709e8f50 100644
--- a/.gitignore
+++ b/.gitignore
@@ -79,3 +79,6 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+# MemGPT config files
+configs/
diff --git a/config.py b/config.py
new file mode 100644
index 00000000..2c0d0be6
--- /dev/null
+++ b/config.py
@@ -0,0 +1,280 @@
+import asyncio
+import glob
+import json
+import os
+import textwrap
+
+import interface
+
+import questionary
+
+from colorama import Fore, Style, init
+from rich.console import Console
+
+console = Console()
+
+from typing import List, Type
+
+import memgpt.utils as utils
+
+from memgpt.personas.personas import get_persona_text
+from memgpt.humans.humans import get_human_text
+
+model_choices = [
+    questionary.Choice("gpt-4"),
+    questionary.Choice(
+        "gpt-3.5-turbo (experimental! function-calling performance is not quite at the level of gpt-4 yet)",
+        value="gpt-3.5-turbo",
+    ),
+]
+
+
+class Config:
+    personas_dir = os.path.join("memgpt", "personas", "examples")
+    humans_dir = os.path.join("memgpt", "humans", "examples")
+    configs_dir = "configs"
+
+    def __init__(self):
+        self.load_type = None
+        self.archival_storage_files = None
+        self.compute_embeddings = False
+        self.agent_save_file = None
+        self.persistence_manager_save_file = None
+
+    @classmethod
+    async def legacy_flags_init(
+        cls: Type["config"],
+        model: str,
+        memgpt_persona: str,
+        human_persona: str,
+        load_type: str = None,
+        archival_storage_files: str = None,
+        archival_storage_index: str = None,
+        compute_embeddings: bool = False,
+    ):
+        self = cls()
+        self.model = model
+        self.memgpt_persona = memgpt_persona
+        self.human_persona = human_persona
+        self.load_type = load_type
+        self.archival_storage_files = archival_storage_files
+        self.archival_storage_index = archival_storage_index
+        self.compute_embeddings = compute_embeddings
+        recompute_embeddings = self.compute_embeddings
+        if self.archival_storage_index:
+            recompute_embeddings = questionary.confirm(
+                f"Would you like to recompute embeddings? Do this if your files have changed.\nFiles:{self.archival_storage_files}",
+                default=False,
+            )
+        await self.configure_archival_storage(recompute_embeddings)
+        return self
+
+    @classmethod
+    async def config_init(cls: Type["Config"], config_file: str = None):
+        self = cls()
+        self.config_file = config_file
+        if self.config_file is None:
+            cfg = Config.get_most_recent_config()
+            use_cfg = False
+            if cfg:
+                print(
+                    f"{Style.BRIGHT}{Fore.MAGENTA}⚙️ Found saved config file.{Style.RESET_ALL}"
+                )
+                use_cfg = await questionary.confirm(
+                    f"Use most recent config file '{cfg}'?"
+                ).ask_async()
+            if use_cfg:
+                self.config_file = cfg
+
+        if self.config_file:
+            self.load_config(self.config_file)
+            recompute_embeddings = False
+            if self.compute_embeddings:
+                if self.archival_storage_index:
+                    recompute_embeddings = await questionary.confirm(
+                        f"Would you like to recompute embeddings? Do this if your files have changed.\n    Files: {self.archival_storage_files}",
+                        default=False,
+                    ).ask_async()
+                else:
+                    recompute_embeddings = True
+            if self.load_type:
+                await self.configure_archival_storage(recompute_embeddings)
+                self.write_config()
+            return self
+
+        # print("No settings file found, configuring MemGPT...")
+        print(
+            f"{Style.BRIGHT}{Fore.MAGENTA}⚙️ No settings file found, configuring MemGPT...{Style.RESET_ALL}"
+        )
+
+        self.model = await questionary.select(
+            "Which model would you like to use?",
+            model_choices,
+            default=model_choices[0],
+        ).ask_async()
+
+        self.memgpt_persona = await questionary.select(
+            "Which persona would you like MemGPT to use?",
+            Config.get_memgpt_personas(),
+        ).ask_async()
+        print(self.memgpt_persona)
+
+        self.human_persona = await questionary.select(
+            "Which persona would you like to use?",
+            Config.get_user_personas(),
+        ).ask_async()
+
+        self.archival_storage_index = None
+        self.preload_archival = await questionary.confirm(
+            "Would you like to preload anything into MemGPT's archival memory?"
+        ).ask_async()
+        if self.preload_archival:
+            self.load_type = await questionary.select(
+                "What would you like to load?",
+                choices=[
+                    questionary.Choice("A folder or file", value="folder"),
+                    questionary.Choice("A SQL database", value="sql"),
+                    questionary.Choice("A glob pattern", value="glob"),
+                ],
+            ).ask_async()
+            if self.load_type == "folder" or self.load_type == "sql":
+                archival_storage_path = await questionary.path(
+                    "Please enter the folder or file (tab for autocomplete):"
+                ).ask_async()
+                if os.path.isdir(archival_storage_path):
+                    self.archival_storage_files = os.path.join(
+                        archival_storage_path, "*"
+                    )
+                else:
+                    self.archival_storage_files = archival_storage_path
+            else:
+                self.archival_storage_files = await questionary.path(
+                    "Please enter the glob pattern (tab for autocomplete):"
+                ).ask_async()
+            self.compute_embeddings = await questionary.confirm(
+                "Would you like to compute embeddings over these files to enable embeddings search?"
+            ).ask_async()
+            await self.configure_archival_storage(self.compute_embeddings)
+
+        self.write_config()
+        return self
+
+    async def configure_archival_storage(self, recompute_embeddings):
+        if recompute_embeddings:
+            self.archival_storage_index = (
+                await utils.prepare_archival_index_from_files_compute_embeddings(
+                    self.archival_storage_files
+                )
+            )
+        if self.compute_embeddings and self.archival_storage_index:
+            self.index, self.archival_database = utils.prepare_archival_index(
+                self.archival_storage_index
+            )
+        else:
+            self.archival_database = utils.prepare_archival_index_from_files(
+                self.archival_storage_files
+            )
+
+    def to_dict(self):
+        return {
+            "model": self.model,
+            "memgpt_persona": self.memgpt_persona,
+            "human_persona": self.human_persona,
+            "preload_archival": self.preload_archival,
+            "archival_storage_files": self.archival_storage_files,
+            "archival_storage_index": self.archival_storage_index,
+            "compute_embeddings": self.compute_embeddings,
+            "load_type": self.load_type,
+            "agent_save_file": self.agent_save_file,
+            "persistence_manager_save_file": self.persistence_manager_save_file,
+        }
+
+    def load_config(self, config_file):
+        with open(config_file, "rt") as f:
+            cfg = json.load(f)
+        self.model = cfg["model"]
+        self.memgpt_persona = cfg["memgpt_persona"]
+        self.human_persona = cfg["human_persona"]
+        self.preload_archival = cfg["preload_archival"]
+        self.archival_storage_files = cfg["archival_storage_files"]
+        self.archival_storage_index = cfg["archival_storage_index"]
+        self.compute_embeddings = cfg["compute_embeddings"]
+        self.load_type = cfg["load_type"]
+        self.agent_save_file = cfg["agent_save_file"]
+        self.persistence_manager_save_file = cfg["persistence_manager_save_file"]
+
+    def write_config(self, configs_dir=None):
+        if configs_dir is None:
+            configs_dir = Config.configs_dir
+        os.makedirs(configs_dir, exist_ok=True)
+        if self.config_file is None:
+            filename = os.path.join(
+                configs_dir, utils.get_local_time().replace(" ", "_").replace(":", "_")
+            )
+            self.config_file = f"{filename}.json"
+        with open(self.config_file, "wt") as f:
+            json.dump(self.to_dict(), f, indent=4)
+        print(
+            f"{Style.BRIGHT}{Fore.MAGENTA}⚙️ Saved config file to {self.config_file}.{Style.RESET_ALL}"
+        )
+
+    @staticmethod
+    def get_memgpt_personas(dir_path=None):
+        if dir_path is None:
+            dir_path = Config.personas_dir
+        all_personas = Config.get_personas(dir_path)
+        return Config.get_persona_choices([p for p in all_personas], get_persona_text)
+
+    @staticmethod
+    def get_user_personas(dir_path=None):
+        if dir_path is None:
+            dir_path = Config.humans_dir
+        all_personas = Config.get_personas(dir_path)
+        return Config.get_persona_choices([p for p in all_personas], get_human_text)
+
+    @staticmethod
+    def get_personas(dir_path) -> List[str]:
+        files = sorted(glob.glob(os.path.join(dir_path, "*.txt")))
+        stems = []
+        for f in files:
+            filename = os.path.basename(f)
+            stem, _ = os.path.splitext(filename)
+            stems.append(stem)
+        return stems
+
+    @staticmethod
+    def get_persona_choices(personas, text_getter):
+        return [
+            questionary.Choice(
+                title=[
+                    ("class:question", f"{p}"),
+                    ("class:text", f"\n{indent(text_getter(p))}"),
+                ],
+                value=p,
+            )
+            for p in personas
+        ]
+
+    @staticmethod
+    def get_most_recent_config(configs_dir=None):
+        if configs_dir is None:
+            configs_dir = Config.configs_dir
+        files = [
+            os.path.join(configs_dir, f)
+            for f in os.listdir(configs_dir)
+            if os.path.isfile(os.path.join(configs_dir, f))
+        ]
+        # Return the file with the most recent modification time
+        if len(files) == 0:
+            return None
+        return max(files, key=os.path.getmtime)
+
+
+def indent(text, num_lines=5):
+    lines = textwrap.fill(text, width=100).split("\n")
+    if len(lines) > num_lines:
+        lines = lines[: num_lines - 1] + ["... (truncated)", lines[-1]]
+    return "     " + "\n     ".join(lines)
+
+
+config = Config()
diff --git a/main.py b/main.py
index cb8c5673..ade7aa9b 100644
--- a/main.py
+++ b/main.py
@@ -5,7 +5,8 @@ import glob
 import os
 import sys
 import pickle
-import readline
+
+import questionary
 
 from rich.console import Console
 
@@ -25,6 +26,8 @@ from memgpt.persistence_manager import (
     InMemoryStateManagerWithFaiss,
 )
 
+from config import Config
+
 FLAGS = flags.FLAGS
 flags.DEFINE_string("persona", default=None, required=False, help="Specify persona")
 flags.DEFINE_string("human", default=None, required=False, help="Specify human")
@@ -87,7 +90,7 @@ def clear_line():
         sys.stdout.flush()
 
 
-def save(memgpt_agent):
+def save(memgpt_agent, cfg):
     filename = utils.get_local_time().replace(" ", "_").replace(":", "_")
     filename = f"{filename}.json"
     filename = os.path.join("saved_state", filename)
@@ -96,6 +99,7 @@ def save(memgpt_agent):
             os.makedirs("saved_state")
         memgpt_agent.save_to_json_file(filename)
         print(f"Saved checkpoint to: {filename}")
+        cfg.agent_save_file = filename
     except Exception as e:
         print(f"Saving state to {filename} failed with: {e}")
 
@@ -104,8 +108,10 @@ def save(memgpt_agent):
     try:
         memgpt_agent.persistence_manager.save(filename)
         print(f"Saved persistence manager to: {filename}")
+        cfg.persistence_manager_save_file = filename
     except Exception as e:
         print(f"Saving persistence manager to {filename} failed with: {e}")
+    cfg.write_config()
 
 
 def load(memgpt_agent, filename):
@@ -156,6 +162,79 @@ async def main():
     logging.getLogger().setLevel(logging.CRITICAL)
     if FLAGS.debug:
         logging.getLogger().setLevel(logging.DEBUG)
+
+    if any(
+        (
+            FLAGS.persona,
+            FLAGS.human,
+            FLAGS.model != constants.DEFAULT_MEMGPT_MODEL,
+            FLAGS.archival_storage_faiss_path,
+            FLAGS.archival_storage_files,
+            FLAGS.archival_storage_files_compute_embeddings,
+            FLAGS.archival_storage_sqldb,
+        )
+    ):
+        interface.important_message("⚙️ Using legacy command line arguments.")
+        model = FLAGS.model
+        if model is None:
+            model = constants.DEFAULT_MEMGPT_MODEL
+        memgpt_persona = FLAGS.persona
+        if memgpt_persona is None:
+            memgpt_persona = (
+                personas.GPT35_DEFAULT if "gpt-3.5" in model else personas.DEFAULT
+            )
+        human_persona = FLAGS.human
+        if human_persona is None:
+            human_persona = humans.DEFAULT
+
+        if FLAGS.archival_storage_files:
+            cfg = await Config.legacy_flags_init(
+                model,
+                memgpt_persona,
+                human_persona,
+                load_type="folder",
+                archival_storage_files=FLAGS.archival_storage_files,
+                compute_embeddings=False,
+            )
+        elif FLAGS.archival_storage_faiss_path:
+            cfg = await Config.legacy_flags_init(
+                model,
+                memgpt_persona,
+                human_persona,
+                load_type="folder",
+                archival_storage_index=FLAGS.archival_storage_index,
+                compute_embeddings=False,
+            )
+        elif FLAGS.archival_storage_files_compute_embeddings:
+            print(model)
+            print(memgpt_persona)
+            print(human_persona)
+            cfg = await Config.legacy_flags_init(
+                model,
+                memgpt_persona,
+                human_persona,
+                load_type="folder",
+                archival_storage_files=FLAGS.archival_storage_files_compute_embeddings,
+                compute_embeddings=True,
+            )
+        elif FLAGS.archival_storage_sqldb:
+            cfg = await Config.legacy_flags_init(
+                model,
+                memgpt_persona,
+                human_persona,
+                load_type="sql",
+                archival_storage_files=FLAGS.archival_storage_sqldb,
+                compute_embeddings=False,
+            )
+        else:
+            cfg = await Config.legacy_flags_init(
+                model,
+                memgpt_persona,
+                human_persona,
+            )
+    else:
+        cfg = await Config.config_init()
+
     print("Running... [exit by typing '/exit']")
 
     # Azure OpenAI support
@@ -190,49 +269,35 @@ async def main():
             )
             return
 
-    if FLAGS.model != constants.DEFAULT_MEMGPT_MODEL:
+    if cfg.model != constants.DEFAULT_MEMGPT_MODEL:
         interface.important_message(
-            f"Warning - you are running MemGPT with {FLAGS.model}, which is not officially supported (yet). Expect bugs!"
+            f"Warning - you are running MemGPT with {cfg.model}, which is not officially supported (yet). Expect bugs!"
         )
 
-    if FLAGS.archival_storage_faiss_path:
-        index, archival_database = utils.prepare_archival_index(
-            FLAGS.archival_storage_faiss_path
+    if cfg.archival_storage_index:
+        persistence_manager = InMemoryStateManagerWithFaiss(
+            cfg.index, cfg.archival_database
         )
-        persistence_manager = InMemoryStateManagerWithFaiss(index, archival_database)
-    elif FLAGS.archival_storage_files:
-        archival_database = utils.prepare_archival_index_from_files(
-            FLAGS.archival_storage_files
-        )
-        print(f"Preloaded {len(archival_database)} chunks into archival memory.")
+    elif cfg.archival_storage_files:
+        print(f"Preloaded {len(cfg.archival_database)} chunks into archival memory.")
         persistence_manager = InMemoryStateManagerWithPreloadedArchivalMemory(
-            archival_database
+            cfg.archival_database
         )
-    elif FLAGS.archival_storage_files_compute_embeddings:
-        faiss_save_dir = (
-            await utils.prepare_archival_index_from_files_compute_embeddings(
-                FLAGS.archival_storage_files_compute_embeddings
-            )
-        )
-        interface.important_message(
-            f"To avoid computing embeddings next time, replace --archival_storage_files_compute_embeddings={FLAGS.archival_storage_files_compute_embeddings} with\n\t --archival_storage_faiss_path={faiss_save_dir} (if your files haven't changed)."
-        )
-        index, archival_database = utils.prepare_archival_index(faiss_save_dir)
-        persistence_manager = InMemoryStateManagerWithFaiss(index, archival_database)
     else:
         persistence_manager = InMemoryStateManager()
 
+    if FLAGS.archival_storage_files_compute_embeddings:
+        interface.important_message(
+            f"(legacy) To avoid computing embeddings next time, replace --archival_storage_files_compute_embeddings={FLAGS.archival_storage_files_compute_embeddings} with\n\t --archival_storage_faiss_path={cfg.archival_storage_index} (if your files haven't changed)."
+        )
+
     # Moved defaults out of FLAGS so that we can dynamically select the default persona based on model
-    chosen_human = FLAGS.human if FLAGS.human is not None else humans.DEFAULT
-    chosen_persona = (
-        FLAGS.persona
-        if FLAGS.persona is not None
-        else (personas.GPT35_DEFAULT if "gpt-3.5" in FLAGS.model else personas.DEFAULT)
-    )
+    chosen_human = cfg.human_persona
+    chosen_persona = cfg.memgpt_persona
 
     memgpt_agent = presets.use_preset(
         presets.DEFAULT,
-        FLAGS.model,
+        cfg.model,
         personas.get_persona_text(chosen_persona),
         humans.get_human_text(chosen_human),
         interface,
@@ -247,19 +312,26 @@ async def main():
     user_message = None
     USER_GOES_FIRST = FLAGS.first
 
-    if FLAGS.archival_storage_sqldb:
-        if not os.path.exists(FLAGS.archival_storage_sqldb):
-            print(f"File {FLAGS.archival_storage_sqldb} does not exist")
+    if cfg.load_type == "sql":  # TODO: move this into config.py in a clean manner
+        if not os.path.exists(cfg.archival_storage_files):
+            print(f"File {cfg.archival_storage_files} does not exist")
             return
         # Ingest data from file into archival storage
         else:
             print(f"Database found! Loading database into archival memory")
-            data_list = utils.read_database_as_list(FLAGS.archival_storage_sqldb)
+            data_list = utils.read_database_as_list(cfg.archival_storage_files)
             user_message = f"Your archival memory has been loaded with a SQL database called {data_list[0]}, which contains schema {data_list[1]}. Remember to refer to this first while answering any user questions!"
             for row in data_list:
                 await memgpt_agent.persistence_manager.archival_memory.insert(row)
             print(f"Database loaded into archival memory.")
 
+    if cfg.agent_save_file:
+        load_save_file = await questionary.confirm(
+            f"Load in saved agent '{cfg.agent_save_file}'?"
+        ).ask_async()
+        if load_save_file:
+            load(memgpt_agent, cfg.agent_save_file)
+
     # auto-exit for
     if "GITHUB_ACTIONS" in os.environ:
         return
@@ -274,7 +346,12 @@ async def main():
     while True:
         if not skip_next_user_input and (counter > 0 or USER_GOES_FIRST):
             # Ask for user input
-            user_input = console.input("[bold cyan]Enter your message:[/bold cyan] ")
+            # user_input = console.input("[bold cyan]Enter your message:[/bold cyan] ")
+            user_input = await questionary.text(
+                "Enter your message:",
+                multiline=True,
+                qmark=">",
+            ).ask_async()
             clear_line()
 
             if user_input.startswith("!"):
@@ -289,25 +366,9 @@ async def main():
             # Handle CLI commands
             # Commands to not get passed as input to MemGPT
             if user_input.startswith("/"):
-                if user_input == "//":
-                    print("Entering multiline mode, type // when done")
-                    user_input_list = []
-                    while True:
-                        user_input = console.input("[bold cyan]>[/bold cyan] ")
-                        clear_line()
-                        if user_input == "//":
-                            break
-                        else:
-                            user_input_list.append(user_input)
-
-                    # pass multiline inputs to MemGPT
-                    user_message = system.package_user_message(
-                        "\n".join(user_input_list)
-                    )
-
-                elif user_input.lower() == "/exit":
+                if user_input.lower() == "/exit":
                     # autosave
-                    save(memgpt_agent=memgpt_agent)
+                    save(memgpt_agent=memgpt_agent, cfg=cfg)
                     break
 
                 elif user_input.lower() == "/savechat":
@@ -326,7 +387,7 @@ async def main():
                     continue
 
                 elif user_input.lower() == "/save":
-                    save(memgpt_agent=memgpt_agent)
+                    save(memgpt_agent=memgpt_agent, cfg=cfg)
                     continue
 
                 elif user_input.lower() == "/load" or user_input.lower().startswith(
diff --git a/requirements.txt b/requirements.txt
index 484bc1e2..cc1c5688 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,6 +9,7 @@ pybars3
 pymupdf
 python-dotenv
 pytz
+questionary
 rich
 tiktoken
 timezonefinder

From be10a0fbd4237172672c4a234ab2244637517559 Mon Sep 17 00:00:00 2001
From: Vivian Fang <hi@vivi.sh>
Date: Sun, 22 Oct 2023 22:31:54 -0700
Subject: [PATCH 04/37] remove readline

---
 README.md | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/README.md b/README.md
index 347dc39f..887b6429 100644
--- a/README.md
+++ b/README.md
@@ -75,13 +75,6 @@ Install dependencies:
 pip install -r requirements.txt
 ```
 
-Extra step for Windows:
-
-```sh
-# only needed on Windows
-pip install pyreadline3
-```
-
 Add your OpenAI API key to your environment:
 
 ```sh

From 619bde8e6d826a4babd9f6835ad79c86177a83b1 Mon Sep 17 00:00:00 2001
From: Vivian Fang <hi@vivi.sh>
Date: Sun, 22 Oct 2023 22:48:02 -0700
Subject: [PATCH 05/37] update README.md to reflect new cli

---
 README.md | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 887b6429..540497c8 100644
--- a/README.md
+++ b/README.md
@@ -108,6 +108,12 @@ python main.py --use_azure_openai
 
 To create a new starter user or starter persona (that MemGPT gets initialized with), create a new `.txt` file in [/memgpt/humans/examples](/memgpt/humans/examples) or [/memgpt/personas/examples](/memgpt/personas/examples), then use the `--persona` or `--human` flag when running `main.py`. For example:
 
+```sh
+# assuming you created a new file /memgpt/humans/examples/me.txt
+python main.py
+# Select me.txt during configuration process
+```
+-- OR --
 ```sh
 # assuming you created a new file /memgpt/humans/examples/me.txt
 python main.py --human me.txt
@@ -116,6 +122,11 @@ python main.py --human me.txt
 ### GPT-3.5 support
 You can run MemGPT with GPT-3.5 as the LLM instead of GPT-4:
 ```sh
+python main.py
+# Select gpt-3.5 during configuration process
+```
+-- OR --
+```sh
 python main.py --model gpt-3.5-turbo
 ```
 
@@ -124,6 +135,15 @@ python main.py --model gpt-3.5-turbo
 Please report any bugs you encounter regarding MemGPT running on GPT-3.5 to  https://github.com/cpacker/MemGPT/issues/59.
 
 ### `main.py` flags
+```text
+--first
+  allows you to send the first message in the chat (by default, MemGPT will send the first message)
+--debug
+  enables debugging output
+```
+
+<details>
+<summary>Configure via legacy flags</summary>
 
 ```text
 --model
@@ -132,10 +152,6 @@ Please report any bugs you encounter regarding MemGPT running on GPT-3.5 to  htt
   load a specific persona file
 --human
   load a specific human file
---first
-  allows you to send the first message in the chat (by default, MemGPT will send the first message)
---debug
-  enables debugging output
 --archival_storage_faiss_path=<ARCHIVAL_STORAGE_FAISS_PATH>
   load in document database (backed by FAISS index)
 --archival_storage_files="<ARCHIVAL_STORAGE_FILES_GLOB_PATTERN>"
@@ -145,6 +161,8 @@ Please report any bugs you encounter regarding MemGPT running on GPT-3.5 to  htt
 --archival_storage_sqldb=<SQLDB_PATH>
   load in SQL database
 ```
+</details>
+
 
 ### Interactive CLI commands
 
@@ -153,8 +171,6 @@ These are the commands for the CLI, **not the Discord bot**! The Discord bot has
 While using MemGPT via the CLI (not Discord!) you can run various commands:
 
 ```text
-//
-  enter multiline input mode (type // again when done)
 /exit
   exit the CLI
 /save

From f1ec95aff6e780f0feee19016bacbd15d7e999d8 Mon Sep 17 00:00:00 2001
From: Vivian Fang <hi@vivi.sh>
Date: Sun, 22 Oct 2023 22:50:07 -0700
Subject: [PATCH 06/37] create configs directory

---
 config.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/config.py b/config.py
index 2c0d0be6..c567c118 100644
--- a/config.py
+++ b/config.py
@@ -259,6 +259,7 @@ class Config:
     def get_most_recent_config(configs_dir=None):
         if configs_dir is None:
             configs_dir = Config.configs_dir
+        os.makedirs(configs_dir, exist_ok=True)
         files = [
             os.path.join(configs_dir, f)
             for f in os.listdir(configs_dir)

From 8484f0557d76cc84ed4ae9dd8eb1f86322463b3e Mon Sep 17 00:00:00 2001
From: Charles Packer <packercharles@gmail.com>
Date: Sun, 22 Oct 2023 22:52:24 -0700
Subject: [PATCH 07/37] basic proof of concept tested on airoboros 70b 2.1

---
 memgpt/local_llm/README.md                    |   3 +
 memgpt/local_llm/__init__.py                  |   0
 memgpt/local_llm/chat_completion_proxy.py     |  88 +++++++++++
 .../llm_chat_completion_wrappers/__init__.py  |   0
 .../llm_chat_completion_wrappers/airoboros.py | 146 ++++++++++++++++++
 .../wrapper_base.py                           |  14 ++
 memgpt/local_llm/webui_settings.py            |  54 +++++++
 memgpt/openai_tools.py                        |  21 ++-
 8 files changed, 322 insertions(+), 4 deletions(-)
 create mode 100644 memgpt/local_llm/README.md
 create mode 100644 memgpt/local_llm/__init__.py
 create mode 100644 memgpt/local_llm/chat_completion_proxy.py
 create mode 100644 memgpt/local_llm/llm_chat_completion_wrappers/__init__.py
 create mode 100644 memgpt/local_llm/llm_chat_completion_wrappers/airoboros.py
 create mode 100644 memgpt/local_llm/llm_chat_completion_wrappers/wrapper_base.py
 create mode 100644 memgpt/local_llm/webui_settings.py

diff --git a/memgpt/local_llm/README.md b/memgpt/local_llm/README.md
new file mode 100644
index 00000000..d81a58e7
--- /dev/null
+++ b/memgpt/local_llm/README.md
@@ -0,0 +1,3 @@
+## TODO
+
+Instructions on how to add additional support for other function calling LLMs + other LLM backends
\ No newline at end of file
diff --git a/memgpt/local_llm/__init__.py b/memgpt/local_llm/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/memgpt/local_llm/chat_completion_proxy.py b/memgpt/local_llm/chat_completion_proxy.py
new file mode 100644
index 00000000..39f69109
--- /dev/null
+++ b/memgpt/local_llm/chat_completion_proxy.py
@@ -0,0 +1,88 @@
+"""MemGPT sends a ChatCompletion request
+
+Under the hood, we use the functions argument to turn
+"""
+
+
+"""Key idea: create drop-in replacement for agent's ChatCompletion call that runs on an OpenLLM backend"""
+
+import os
+import json
+import requests
+
+from .webui_settings import DETERMINISTIC, SIMPLE
+from .llm_chat_completion_wrappers import airoboros
+
+HOST = os.getenv('OPENAI_API_BASE')
+HOST_TYPE = os.getenv('BACKEND_TYPE')  # default None == ChatCompletion
+
+
+class DotDict(dict):
+    """Allow dot access on properties similar to OpenAI response object"""
+
+    def __getattr__(self, attr):
+        return self.get(attr)
+
+    def __setattr__(self, key, value):
+        self[key] = value
+
+
+async def get_chat_completion(
+        model,  # no model, since the model is fixed to whatever you set in your own backend
+        messages,
+        functions,
+        function_call="auto",
+    ):
+    if function_call != "auto":
+        raise ValueError(f"function_call == {function_call} not supported (auto only)")
+
+    if True or model == 'airoboros_v2.1':
+        llm_wrapper = airoboros.Airoboros21Wrapper()
+
+    # First step: turn the message sequence into a prompt that the model expects
+    prompt = llm_wrapper.chat_completion_to_prompt(messages, functions)
+    # print(prompt)
+
+    if HOST_TYPE != 'webui':
+        raise ValueError(HOST_TYPE)
+
+    request = SIMPLE
+    request['prompt'] = prompt
+
+    try:
+
+        URI = f'{HOST}/v1/generate'
+        response = requests.post(URI, json=request)
+        if response.status_code == 200:
+            # result = response.json()['results'][0]['history']
+            result = response.json()
+            # print(f"raw API response: {result}")
+            result = result['results'][0]['text']
+            print(f"json API response.text: {result}")
+        else:
+            raise Exception(f"API call got non-200 response code")
+
+        # cleaned_result, chatcompletion_result = parse_st_json_output(result)
+        chat_completion_result = llm_wrapper.output_to_chat_completion_response(result)
+        print(json.dumps(chat_completion_result, indent=2))
+        # print(cleaned_result)
+
+        # unpack with response.choices[0].message.content
+        response = DotDict({
+            'model': None,
+            'choices': [DotDict({
+                'message': DotDict(chat_completion_result),
+                'finish_reason': 'stop',  # TODO vary based on webui response
+            })],
+            'usage': DotDict({
+                # TODO fix
+                'prompt_tokens': 0,
+                'completion_tokens': 0,
+                'total_tokens': 0,
+            })
+        })
+        return response
+
+    except Exception as e:
+        # TODO
+        raise e
diff --git a/memgpt/local_llm/llm_chat_completion_wrappers/__init__.py b/memgpt/local_llm/llm_chat_completion_wrappers/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/memgpt/local_llm/llm_chat_completion_wrappers/airoboros.py b/memgpt/local_llm/llm_chat_completion_wrappers/airoboros.py
new file mode 100644
index 00000000..303e2d37
--- /dev/null
+++ b/memgpt/local_llm/llm_chat_completion_wrappers/airoboros.py
@@ -0,0 +1,146 @@
+import json
+
+from .wrapper_base import LLMChatCompletionWrapper
+
+
+class Airoboros21Wrapper(LLMChatCompletionWrapper):
+    """Wrapper for Airoboros 70b v2.1: https://huggingface.co/jondurbin/airoboros-l2-70b-2.1
+    """
+
+    def __init__(self, simplify_json_content=True, include_assistant_prefix=True, clean_function_args=True):
+        self.simplify_json_content = simplify_json_content
+        self.include_assistant_prefix = include_assistant_prefix
+        self.clean_func_args = clean_function_args
+
+    def chat_completion_to_prompt(self, messages, functions):
+        """Example for airoboros: https://huggingface.co/jondurbin/airoboros-l2-70b-2.1#prompt-format
+
+        A chat.
+        USER: {prompt}
+        ASSISTANT:
+
+        Functions support: https://huggingface.co/jondurbin/airoboros-l2-70b-2.1#agentfunction-calling
+
+            As an AI assistant, please select the most suitable function and parameters from the list of available functions below, based on the user's input. Provide your response in JSON format.
+
+            Input: I want to know how many times 'Python' is mentioned in my text file.
+
+            Available functions:
+            file_analytics:
+              description: This tool performs various operations on a text file.
+              params:
+                action: The operation we want to perform on the data, such as "count_occurrences", "find_line", etc.
+                filters:
+                  keyword: The word or phrase we want to search for.
+
+        OpenAI functions schema style:
+
+            {
+                "name": "send_message",
+                "description": "Sends a message to the human user",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        # https://json-schema.org/understanding-json-schema/reference/array.html
+                        "message": {
+                            "type": "string",
+                            "description": "Message contents. All unicode (including emojis) are supported.",
+                        },
+                    },
+                    "required": ["message"],
+                }
+            },
+        """
+        prompt = ""
+
+        # System insturctions go first
+        assert messages[0]['role'] == 'system'
+        prompt += messages[0]['content']
+
+        # Next is the functions preamble
+        def create_function_description(schema):
+            # airorobos style
+            func_str = ""
+            func_str += f"{schema['name']}:"
+            func_str += f"\n  description: {schema['description']}"
+            func_str += f"\n  params:"
+            for param_k, param_v in schema['parameters']['properties'].items():
+                # TODO we're ignoring type
+                func_str += f"\n    {param_k}: {param_v['description']}"
+            # TODO we're ignoring schema['parameters']['required']
+            return func_str
+
+        prompt += f"\nPlease select the most suitable function and parameters from the list of available functions below, based on the user's input. Provide your response in JSON format."
+        prompt += f"\nAvailable functions:"
+        for function_dict in functions:
+            prompt += f"\n{create_function_description(function_dict)}"
+
+        # Last are the user/assistant messages
+        for message in messages[1:]:
+            assert message['role'] in ['user', 'assistant', 'function'], message
+
+            if message['role'] == 'user':
+                if self.simplify_json_content:
+                    try:
+                        content_json = json.loads(message['content'])
+                        content_simple = content_json['message']
+                        prompt += f"\nUSER: {content_simple}"
+                    except:
+                        prompt += f"\nUSER: {message['content']}"
+            elif message['role'] == 'assistant':
+                prompt += f"\nASSISTANT: {message['content']}"
+            elif message['role'] == 'function':
+                # TODO
+                continue
+                # prompt += f"\nASSISTANT: (function return) {message['content']}"
+            else:
+                raise ValueError(message)
+
+        if self.include_assistant_prefix:
+            # prompt += f"\nPlease select the most suitable function and parameters from the list of available functions below, based on the user's input. Provide your response in JSON format."
+            prompt += f"\nASSISTANT:"
+
+        return prompt
+
+    def clean_function_args(self, function_name, function_args):
+        """Some basic MemGPT-specific cleaning of function args"""
+        cleaned_function_name = function_name
+        cleaned_function_args = function_args.copy()
+
+        if function_name == 'send_message':
+            # strip request_heartbeat
+            cleaned_function_args.pop('request_heartbeat', None)
+
+        # TODO more cleaning to fix errors LLM makes
+        return cleaned_function_name, cleaned_function_args
+
+    def output_to_chat_completion_response(self, raw_llm_output):
+        """Turn raw LLM output into a ChatCompletion style response with:
+        "message" = {
+            "role": "assistant",
+            "content": ...,
+            "function_call": {
+                "name": ...
+                "arguments": {
+                    "arg1": val1,
+                    ...
+                }
+            }
+        }
+        """
+        function_json_output = json.loads(raw_llm_output)
+        function_name = function_json_output['function']
+        function_parameters = function_json_output['params']
+
+        if self.clean_func_args:
+            function_name, function_parameters = self.clean_function_args(function_name, function_parameters)
+
+        message = {
+            'role': 'assistant',
+            'content': None,
+            'function_call': {
+                'name': function_name,
+                'arguments': json.dumps(function_parameters),
+            }
+        }
+        return message
diff --git a/memgpt/local_llm/llm_chat_completion_wrappers/wrapper_base.py b/memgpt/local_llm/llm_chat_completion_wrappers/wrapper_base.py
new file mode 100644
index 00000000..d2e7584e
--- /dev/null
+++ b/memgpt/local_llm/llm_chat_completion_wrappers/wrapper_base.py
@@ -0,0 +1,14 @@
+from abc import ABC, abstractmethod
+
+
+class LLMChatCompletionWrapper(ABC):
+
+    @abstractmethod
+    def chat_completion_to_prompt(self, messages, functions):
+        """Go from ChatCompletion to a single prompt string"""
+        pass
+
+    @abstractmethod
+    def output_to_chat_completion_response(self, raw_llm_output):
+        """Turn the LLM output string into a ChatCompletion response"""
+        pass
diff --git a/memgpt/local_llm/webui_settings.py b/memgpt/local_llm/webui_settings.py
new file mode 100644
index 00000000..dc578084
--- /dev/null
+++ b/memgpt/local_llm/webui_settings.py
@@ -0,0 +1,54 @@
+DETERMINISTIC = {
+      'max_new_tokens': 250,
+      'do_sample': False,
+      'temperature': 0,
+      'top_p': 0,
+      'typical_p': 1,
+      'repetition_penalty': 1.18,
+      'repetition_penalty_range': 0,
+      'encoder_repetition_penalty': 1,
+      'top_k': 1,
+      'min_length': 0,
+      'no_repeat_ngram_size': 0,
+      'num_beams': 1,
+      'penalty_alpha': 0,
+      'length_penalty': 1,
+      'early_stopping': False,
+      'guidance_scale': 1,
+      'negative_prompt': '',
+      'seed': -1,
+      'add_bos_token': True,
+      'stopping_strings': [
+        '\nUSER:',
+        '\nASSISTANT:',
+        # '\n' +
+        # '</s>',
+        # '<|',
+        # '\n#',
+        # '\n\n\n',
+      ],
+      'truncation_length': 4096,
+      'ban_eos_token': False,
+      'skip_special_tokens': True,
+      'top_a': 0,
+      'tfs': 1,
+      'epsilon_cutoff': 0,
+      'eta_cutoff': 0,
+      'mirostat_mode': 2,
+      'mirostat_tau': 4,
+      'mirostat_eta': 0.1,
+      'use_mancer': False
+    }
+
+SIMPLE = {
+      'stopping_strings': [
+        '\nUSER:',
+        '\nASSISTANT:',
+        # '\n' +
+        # '</s>',
+        # '<|',
+        # '\n#',
+        # '\n\n\n',
+      ],
+      'truncation_length': 4096,
+}
\ No newline at end of file
diff --git a/memgpt/openai_tools.py b/memgpt/openai_tools.py
index 98444878..7729ae15 100644
--- a/memgpt/openai_tools.py
+++ b/memgpt/openai_tools.py
@@ -3,7 +3,13 @@ import random
 import os
 import time
 
+from .local_llm.chat_completion_proxy import get_chat_completion
+HOST = os.getenv('OPENAI_API_BASE')
+HOST_TYPE = os.getenv('BACKEND_TYPE')  # default None == ChatCompletion
+
 import openai
+if HOST is not None:
+    openai.api_base = HOST
 
 
 def retry_with_exponential_backoff(
@@ -102,10 +108,17 @@ def aretry_with_exponential_backoff(
 
 @aretry_with_exponential_backoff
 async def acompletions_with_backoff(**kwargs):
-    azure_openai_deployment = os.getenv('AZURE_OPENAI_DEPLOYMENT')
-    if azure_openai_deployment is not None:
-        kwargs['deployment_id'] = azure_openai_deployment
-    return await openai.ChatCompletion.acreate(**kwargs)
+
+    # Local model
+    if HOST_TYPE is not None:
+        return await get_chat_completion(**kwargs)
+
+    # OpenAI / Azure model
+    else:
+        azure_openai_deployment = os.getenv('AZURE_OPENAI_DEPLOYMENT')
+        if azure_openai_deployment is not None:
+            kwargs['deployment_id'] = azure_openai_deployment
+        return await openai.ChatCompletion.acreate(**kwargs)
 
 
 @aretry_with_exponential_backoff

From 63a003468590e845c1e9b11b9038dabfe4fd553d Mon Sep 17 00:00:00 2001
From: Vivian Fang <hi@vivi.sh>
Date: Sun, 22 Oct 2023 22:53:00 -0700
Subject: [PATCH 08/37] remove stray Config()

---
 config.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/config.py b/config.py
index c567c118..abe86b2b 100644
--- a/config.py
+++ b/config.py
@@ -276,6 +276,3 @@ def indent(text, num_lines=5):
     if len(lines) > num_lines:
         lines = lines[: num_lines - 1] + ["... (truncated)", lines[-1]]
     return "     " + "\n     ".join(lines)
-
-
-config = Config()

From 172ddc4423a6e0b5253e47ebecc94e9b0fc00d91 Mon Sep 17 00:00:00 2001
From: Charles Packer <packercharles@gmail.com>
Date: Sun, 22 Oct 2023 23:09:41 -0700
Subject: [PATCH 09/37] Update README.md

---
 memgpt/local_llm/README.md | 36 ++++++++++++++++++++++++++++++++++--
 1 file changed, 34 insertions(+), 2 deletions(-)

diff --git a/memgpt/local_llm/README.md b/memgpt/local_llm/README.md
index d81a58e7..f23bbc70 100644
--- a/memgpt/local_llm/README.md
+++ b/memgpt/local_llm/README.md
@@ -1,3 +1,35 @@
-## TODO
+## tl;dr - how to connect MemGPT to non-OpenAI LLMs
 
-Instructions on how to add additional support for other function calling LLMs + other LLM backends
\ No newline at end of file
+**If you have a hosted ChatCompletion-compatible endpoint that works with function calling**:
+  - simply set `OPENAI_API_BASE` to the IP+port of your endpoint:
+
+```sh
+export OPENAI_API_BASE=...
+```
+
+Note: for this to work, the endpoint MUST support function calls. As of 10/22/2023, most ChatCompletion endpoints do NOT support function calls, so if you want to play with MemGPT and open models, follow the instructions below.
+
+**If you have a hosted local model that is function-call finetuned**:
+  - implement a wrapper class for that model
+    - the wrapper class needs to implement two functions:
+      - one to go from ChatCompletion messages/functions schema to a prompt string
+      - and one to go from raw LLM outputs to a ChatCompletion response
+  - put that model behind a server (e.g. using WebUI) and set `OPENAI_API_BASE`
+
+To help you get started, we've implemented an example wrapper class for a popular llama2 model finetuned on function calling (airoboros). We want MemGPT to run well on open models as much as you do, so we'll be actively updating this page with more examples. Additionally, we welcome contributions from the community! If you find an open LLM that works well with MemGPT, please open a PR with a model wrapper and we'll merge it ASAP.
+
+## Status of ChatCompletion w/ function calling and open LLMs
+
+MemGPT uses function calling to do memory management. With OpenAI's ChatCompletion API, you can pass in a function schema in the ‘functions' keyword arg, and the API response will include a ‘function_call’ field that includes the function name and the function arguments (generated JSON). How this works under the hood is your ‘functions’ keyword is combined with the ‘messages’ and ‘system' to form one big string input to the transformer, and the output of the transformer is parsed to extract the JSON function call.
+
+In the future, more open LLMs and LLM servers (that can host OpenAI-compatable ChatCompletion endpoints) may start including parsing code to do this automatically as standard practice. However, in the meantime, when you see a model that says it supports “function calling”, like Airoboros, it doesn't mean that you can just load Airoboros into a ChatCompletion-compatable endpoint like FastChat, and then use the same OpenAI API call and it'll just work.
+
+(1) When an open LLM says it supports function calling, they probably mean that the model was finetuned on some function call data. Remember, transformers are just string-in-string-out, so there are many ways to format this function call data. Airoboros formats the function schema in YAML style (see https://huggingface.co/jondurbin/airoboros-l2-70b-3.1.2#agentfunction-calling)) and the output is in JSON style. To get this to work behind a ChatCompletion API, you still have to do the parsing from ‘functions’ keyword arg (containing the schema) to the model's expected schema style in the prompt (YAML for Airoboros), and you have to run some code to extract the function call (JSON for Airoboros) and package it cleanly as a ‘function_call’ field in the response.
+
+(2) Partly because of how complex it is to support function calling, most (all?) of the community projects that do OpenAI ChatCompletion endpoints for arbitrary open LLMs do not support function calling, because if they did, they would need to write model-specific parsing code for each one.
+
+## How can you run MemGPT with open LLMs that support function calling?
+
+Because of the poor state of function calling support in existing ChatCompletion API serving code, we instead provide a light wrapper on top of ChatCompletion that uses a parser specific to Airoboros. We hope that this example code will help the community add additional compatability of MemGPT with more function-calling LLMs - we will also add more model support as we test more models and find those that work well enough to run MemGPT's function set.
+
+To run the example of MemGPT with Airoboros, you'll need to host the model with some open LLM hosting code, for example Oobagooba (see here). Then, all you need to do is point MemGPT to this API endpoint. Now, instead of calling ChatCompletion on OpenAI's API, MemGPT will use it's own ChatCompletion wrapper that parses the system, messages, and function arguments into a format that Airoboros has been finetuned on, and once Airoboros generates a string output, MemGPT will parse the response to extract a potential function call (knowing what we know about Airoboros expected function call output).

From e4add84bbe62ff9472dc0a7c89c9b219940b92bc Mon Sep 17 00:00:00 2001
From: Charles Packer <packercharles@gmail.com>
Date: Sun, 22 Oct 2023 23:13:01 -0700
Subject: [PATCH 10/37] Update README.md

---
 memgpt/local_llm/README.md | 44 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/memgpt/local_llm/README.md b/memgpt/local_llm/README.md
index f23bbc70..a1f7b759 100644
--- a/memgpt/local_llm/README.md
+++ b/memgpt/local_llm/README.md
@@ -9,6 +9,8 @@ export OPENAI_API_BASE=...
 
 Note: for this to work, the endpoint MUST support function calls. As of 10/22/2023, most ChatCompletion endpoints do NOT support function calls, so if you want to play with MemGPT and open models, follow the instructions below.
 
+## Integrating a function-call finetuned LLM with MemGPT
+
 **If you have a hosted local model that is function-call finetuned**:
   - implement a wrapper class for that model
     - the wrapper class needs to implement two functions:
@@ -16,8 +18,50 @@ Note: for this to work, the endpoint MUST support function calls. As of 10/22/20
       - and one to go from raw LLM outputs to a ChatCompletion response
   - put that model behind a server (e.g. using WebUI) and set `OPENAI_API_BASE`
 
+```python
+class LLMChatCompletionWrapper(ABC):
+
+    @abstractmethod
+    def chat_completion_to_prompt(self, messages, functions):
+        """Go from ChatCompletion to a single prompt string"""
+        pass
+
+    @abstractmethod
+    def output_to_chat_completion_response(self, raw_llm_output):
+        """Turn the LLM output string into a ChatCompletion response"""
+        pass
+```
+
 To help you get started, we've implemented an example wrapper class for a popular llama2 model finetuned on function calling (airoboros). We want MemGPT to run well on open models as much as you do, so we'll be actively updating this page with more examples. Additionally, we welcome contributions from the community! If you find an open LLM that works well with MemGPT, please open a PR with a model wrapper and we'll merge it ASAP.
 
+```python
+class Airoboros21Wrapper(LLMChatCompletionWrapper):
+    """Wrapper for Airoboros 70b v2.1: https://huggingface.co/jondurbin/airoboros-l2-70b-2.1"""
+
+    def chat_completion_to_prompt(self, messages, functions):
+        """
+        Examples for how airoboros expects its prompt inputs: https://huggingface.co/jondurbin/airoboros-l2-70b-2.1#prompt-format
+        Examples for how airoboros expects to see function schemas: https://huggingface.co/jondurbin/airoboros-l2-70b-2.1#agentfunction-calling
+        """
+
+    def output_to_chat_completion_response(self, raw_llm_output):
+        """Turn raw LLM output into a ChatCompletion style response with:
+        "message" = {
+            "role": "assistant",
+            "content": ...,
+            "function_call": {
+                "name": ...
+                "arguments": {
+                    "arg1": val1,
+                    ...
+                }
+            }
+        }
+        """
+```
+
+---
+
 ## Status of ChatCompletion w/ function calling and open LLMs
 
 MemGPT uses function calling to do memory management. With OpenAI's ChatCompletion API, you can pass in a function schema in the ‘functions' keyword arg, and the API response will include a ‘function_call’ field that includes the function name and the function arguments (generated JSON). How this works under the hood is your ‘functions’ keyword is combined with the ‘messages’ and ‘system' to form one big string input to the transformer, and the output of the transformer is parsed to extract the JSON function call.

From c8b89e25d068d38e113c210c17ed9a00d13fb1e2 Mon Sep 17 00:00:00 2001
From: Charles Packer <packercharles@gmail.com>
Date: Sun, 22 Oct 2023 23:13:49 -0700
Subject: [PATCH 11/37] Update README.md

---
 memgpt/local_llm/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/memgpt/local_llm/README.md b/memgpt/local_llm/README.md
index a1f7b759..a8c2304a 100644
--- a/memgpt/local_llm/README.md
+++ b/memgpt/local_llm/README.md
@@ -1,4 +1,4 @@
-## tl;dr - how to connect MemGPT to non-OpenAI LLMs
+## How to connect MemGPT to non-OpenAI LLMs
 
 **If you have a hosted ChatCompletion-compatible endpoint that works with function calling**:
   - simply set `OPENAI_API_BASE` to the IP+port of your endpoint:
@@ -7,7 +7,7 @@
 export OPENAI_API_BASE=...
 ```
 
-Note: for this to work, the endpoint MUST support function calls. As of 10/22/2023, most ChatCompletion endpoints do NOT support function calls, so if you want to play with MemGPT and open models, follow the instructions below.
+Note: for this to work, the endpoint **MUST** support function calls. As of 10/22/2023, most ChatCompletion endpoints do **NOT** support function calls, so if you want to play with MemGPT and open models, you probably need to follow the instructions below.
 
 ## Integrating a function-call finetuned LLM with MemGPT
 

From 6f293c90f465f6f165038d7f41bd548bebfdc53e Mon Sep 17 00:00:00 2001
From: Charles Packer <packercharles@gmail.com>
Date: Sun, 22 Oct 2023 23:15:01 -0700
Subject: [PATCH 12/37] Update README.md

---
 memgpt/local_llm/README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/memgpt/local_llm/README.md b/memgpt/local_llm/README.md
index a8c2304a..a82e2127 100644
--- a/memgpt/local_llm/README.md
+++ b/memgpt/local_llm/README.md
@@ -32,6 +32,8 @@ class LLMChatCompletionWrapper(ABC):
         pass
 ```
 
+## Example with Airoboros LLM
+
 To help you get started, we've implemented an example wrapper class for a popular llama2 model finetuned on function calling (airoboros). We want MemGPT to run well on open models as much as you do, so we'll be actively updating this page with more examples. Additionally, we welcome contributions from the community! If you find an open LLM that works well with MemGPT, please open a PR with a model wrapper and we'll merge it ASAP.
 
 ```python

From 7e103fcb63e50b6a731c9e59ff2ec25c17e016c6 Mon Sep 17 00:00:00 2001
From: Vivian Fang <hi@vivi.sh>
Date: Sun, 22 Oct 2023 23:35:52 -0700
Subject: [PATCH 13/37] Update README.md

---
 memgpt/local_llm/README.md | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/memgpt/local_llm/README.md b/memgpt/local_llm/README.md
index a82e2127..69165305 100644
--- a/memgpt/local_llm/README.md
+++ b/memgpt/local_llm/README.md
@@ -7,16 +7,18 @@
 export OPENAI_API_BASE=...
 ```
 
-Note: for this to work, the endpoint **MUST** support function calls. As of 10/22/2023, most ChatCompletion endpoints do **NOT** support function calls, so if you want to play with MemGPT and open models, you probably need to follow the instructions below.
+For this to work, the endpoint **MUST** support function calls.
+
+**As of 10/22/2023, most ChatCompletion endpoints do *NOT* support function calls, so if you want to play with MemGPT and open models, you probably need to follow the instructions below.**
 
 ## Integrating a function-call finetuned LLM with MemGPT
 
 **If you have a hosted local model that is function-call finetuned**:
-  - implement a wrapper class for that model
-    - the wrapper class needs to implement two functions:
-      - one to go from ChatCompletion messages/functions schema to a prompt string
-      - and one to go from raw LLM outputs to a ChatCompletion response
-  - put that model behind a server (e.g. using WebUI) and set `OPENAI_API_BASE`
+  - Implement a wrapper class for that model
+    - The wrapper class needs to implement two functions:
+      - One to go from ChatCompletion messages/functions schema to a prompt string
+      - And one to go from raw LLM outputs to a ChatCompletion response
+  - Put that model behind a server (e.g. using WebUI) and set `OPENAI_API_BASE`
 
 ```python
 class LLMChatCompletionWrapper(ABC):
@@ -61,18 +63,19 @@ class Airoboros21Wrapper(LLMChatCompletionWrapper):
         }
         """
 ```
+See full file [here](llm_chat_completion_wrappers/airoboros.py).
 
 ---
 
 ## Status of ChatCompletion w/ function calling and open LLMs
 
-MemGPT uses function calling to do memory management. With OpenAI's ChatCompletion API, you can pass in a function schema in the ‘functions' keyword arg, and the API response will include a ‘function_call’ field that includes the function name and the function arguments (generated JSON). How this works under the hood is your ‘functions’ keyword is combined with the ‘messages’ and ‘system' to form one big string input to the transformer, and the output of the transformer is parsed to extract the JSON function call.
+MemGPT uses function calling to do memory management. With OpenAI's ChatCompletion API, you can pass in a function schema in the `functions` keyword arg, and the API response will include a `function_call` field that includes the function name and the function arguments (generated JSON). How this works under the hood is your `functions` keyword is combined with the `messages` and `system` to form one big string input to the transformer, and the output of the transformer is parsed to extract the JSON function call.
 
 In the future, more open LLMs and LLM servers (that can host OpenAI-compatable ChatCompletion endpoints) may start including parsing code to do this automatically as standard practice. However, in the meantime, when you see a model that says it supports “function calling”, like Airoboros, it doesn't mean that you can just load Airoboros into a ChatCompletion-compatable endpoint like FastChat, and then use the same OpenAI API call and it'll just work.
 
-(1) When an open LLM says it supports function calling, they probably mean that the model was finetuned on some function call data. Remember, transformers are just string-in-string-out, so there are many ways to format this function call data. Airoboros formats the function schema in YAML style (see https://huggingface.co/jondurbin/airoboros-l2-70b-3.1.2#agentfunction-calling)) and the output is in JSON style. To get this to work behind a ChatCompletion API, you still have to do the parsing from ‘functions’ keyword arg (containing the schema) to the model's expected schema style in the prompt (YAML for Airoboros), and you have to run some code to extract the function call (JSON for Airoboros) and package it cleanly as a ‘function_call’ field in the response.
+1. When an open LLM says it supports function calling, they probably mean that the model was finetuned on some function call data. Remember, transformers are just string-in-string-out, so there are many ways to format this function call data. Airoboros formats the function schema in YAML style (see https://huggingface.co/jondurbin/airoboros-l2-70b-3.1.2#agentfunction-calling) and the output is in JSON style. To get this to work behind a ChatCompletion API, you still have to do the parsing from ‘functions’ keyword arg (containing the schema) to the model's expected schema style in the prompt (YAML for Airoboros), and you have to run some code to extract the function call (JSON for Airoboros) and package it cleanly as a ‘function_call’ field in the response.
 
-(2) Partly because of how complex it is to support function calling, most (all?) of the community projects that do OpenAI ChatCompletion endpoints for arbitrary open LLMs do not support function calling, because if they did, they would need to write model-specific parsing code for each one.
+2. Partly because of how complex it is to support function calling, most (all?) of the community projects that do OpenAI ChatCompletion endpoints for arbitrary open LLMs do not support function calling, because if they did, they would need to write model-specific parsing code for each one.
 
 ## How can you run MemGPT with open LLMs that support function calling?
 

From f4ae08f6f5fa0cf15d874091df6769f68b9495de Mon Sep 17 00:00:00 2001
From: Charles Packer <packercharles@gmail.com>
Date: Sun, 22 Oct 2023 23:59:46 -0700
Subject: [PATCH 14/37] add comment about no inner mono + blackified the code

---
 memgpt/local_llm/chat_completion_proxy.py     | 59 +++++++------
 .../llm_chat_completion_wrappers/airoboros.py | 51 +++++++-----
 .../wrapper_base.py                           |  1 -
 memgpt/local_llm/webui_settings.py            | 82 +++++++++----------
 memgpt/openai_tools.py                        | 21 ++---
 5 files changed, 115 insertions(+), 99 deletions(-)

diff --git a/memgpt/local_llm/chat_completion_proxy.py b/memgpt/local_llm/chat_completion_proxy.py
index 39f69109..ea5b904f 100644
--- a/memgpt/local_llm/chat_completion_proxy.py
+++ b/memgpt/local_llm/chat_completion_proxy.py
@@ -13,8 +13,8 @@ import requests
 from .webui_settings import DETERMINISTIC, SIMPLE
 from .llm_chat_completion_wrappers import airoboros
 
-HOST = os.getenv('OPENAI_API_BASE')
-HOST_TYPE = os.getenv('BACKEND_TYPE')  # default None == ChatCompletion
+HOST = os.getenv("OPENAI_API_BASE")
+HOST_TYPE = os.getenv("BACKEND_TYPE")  # default None == ChatCompletion
 
 
 class DotDict(dict):
@@ -28,36 +28,35 @@ class DotDict(dict):
 
 
 async def get_chat_completion(
-        model,  # no model, since the model is fixed to whatever you set in your own backend
-        messages,
-        functions,
-        function_call="auto",
-    ):
+    model,  # no model, since the model is fixed to whatever you set in your own backend
+    messages,
+    functions,
+    function_call="auto",
+):
     if function_call != "auto":
         raise ValueError(f"function_call == {function_call} not supported (auto only)")
 
-    if True or model == 'airoboros_v2.1':
+    if True or model == "airoboros_v2.1":
         llm_wrapper = airoboros.Airoboros21Wrapper()
 
     # First step: turn the message sequence into a prompt that the model expects
     prompt = llm_wrapper.chat_completion_to_prompt(messages, functions)
     # print(prompt)
 
-    if HOST_TYPE != 'webui':
+    if HOST_TYPE != "webui":
         raise ValueError(HOST_TYPE)
 
     request = SIMPLE
-    request['prompt'] = prompt
+    request["prompt"] = prompt
 
     try:
-
-        URI = f'{HOST}/v1/generate'
+        URI = f"{HOST}/v1/generate"
         response = requests.post(URI, json=request)
         if response.status_code == 200:
             # result = response.json()['results'][0]['history']
             result = response.json()
             # print(f"raw API response: {result}")
-            result = result['results'][0]['text']
+            result = result["results"][0]["text"]
             print(f"json API response.text: {result}")
         else:
             raise Exception(f"API call got non-200 response code")
@@ -68,19 +67,27 @@ async def get_chat_completion(
         # print(cleaned_result)
 
         # unpack with response.choices[0].message.content
-        response = DotDict({
-            'model': None,
-            'choices': [DotDict({
-                'message': DotDict(chat_completion_result),
-                'finish_reason': 'stop',  # TODO vary based on webui response
-            })],
-            'usage': DotDict({
-                # TODO fix
-                'prompt_tokens': 0,
-                'completion_tokens': 0,
-                'total_tokens': 0,
-            })
-        })
+        response = DotDict(
+            {
+                "model": None,
+                "choices": [
+                    DotDict(
+                        {
+                            "message": DotDict(chat_completion_result),
+                            "finish_reason": "stop",  # TODO vary based on webui response
+                        }
+                    )
+                ],
+                "usage": DotDict(
+                    {
+                        # TODO fix
+                        "prompt_tokens": 0,
+                        "completion_tokens": 0,
+                        "total_tokens": 0,
+                    }
+                ),
+            }
+        )
         return response
 
     except Exception as e:
diff --git a/memgpt/local_llm/llm_chat_completion_wrappers/airoboros.py b/memgpt/local_llm/llm_chat_completion_wrappers/airoboros.py
index 303e2d37..6b3a117f 100644
--- a/memgpt/local_llm/llm_chat_completion_wrappers/airoboros.py
+++ b/memgpt/local_llm/llm_chat_completion_wrappers/airoboros.py
@@ -5,9 +5,16 @@ from .wrapper_base import LLMChatCompletionWrapper
 
 class Airoboros21Wrapper(LLMChatCompletionWrapper):
     """Wrapper for Airoboros 70b v2.1: https://huggingface.co/jondurbin/airoboros-l2-70b-2.1
+
+    Note: this wrapper formats a prompt that only generates JSON, no inner thoughts
     """
 
-    def __init__(self, simplify_json_content=True, include_assistant_prefix=True, clean_function_args=True):
+    def __init__(
+        self,
+        simplify_json_content=True,
+        include_assistant_prefix=True,
+        clean_function_args=True,
+    ):
         self.simplify_json_content = simplify_json_content
         self.include_assistant_prefix = include_assistant_prefix
         self.clean_func_args = clean_function_args
@@ -54,8 +61,8 @@ class Airoboros21Wrapper(LLMChatCompletionWrapper):
         prompt = ""
 
         # System insturctions go first
-        assert messages[0]['role'] == 'system'
-        prompt += messages[0]['content']
+        assert messages[0]["role"] == "system"
+        prompt += messages[0]["content"]
 
         # Next is the functions preamble
         def create_function_description(schema):
@@ -64,7 +71,7 @@ class Airoboros21Wrapper(LLMChatCompletionWrapper):
             func_str += f"{schema['name']}:"
             func_str += f"\n  description: {schema['description']}"
             func_str += f"\n  params:"
-            for param_k, param_v in schema['parameters']['properties'].items():
+            for param_k, param_v in schema["parameters"]["properties"].items():
                 # TODO we're ignoring type
                 func_str += f"\n    {param_k}: {param_v['description']}"
             # TODO we're ignoring schema['parameters']['required']
@@ -77,19 +84,19 @@ class Airoboros21Wrapper(LLMChatCompletionWrapper):
 
         # Last are the user/assistant messages
         for message in messages[1:]:
-            assert message['role'] in ['user', 'assistant', 'function'], message
+            assert message["role"] in ["user", "assistant", "function"], message
 
-            if message['role'] == 'user':
+            if message["role"] == "user":
                 if self.simplify_json_content:
                     try:
-                        content_json = json.loads(message['content'])
-                        content_simple = content_json['message']
+                        content_json = json.loads(message["content"])
+                        content_simple = content_json["message"]
                         prompt += f"\nUSER: {content_simple}"
                     except:
                         prompt += f"\nUSER: {message['content']}"
-            elif message['role'] == 'assistant':
+            elif message["role"] == "assistant":
                 prompt += f"\nASSISTANT: {message['content']}"
-            elif message['role'] == 'function':
+            elif message["role"] == "function":
                 # TODO
                 continue
                 # prompt += f"\nASSISTANT: (function return) {message['content']}"
@@ -107,9 +114,9 @@ class Airoboros21Wrapper(LLMChatCompletionWrapper):
         cleaned_function_name = function_name
         cleaned_function_args = function_args.copy()
 
-        if function_name == 'send_message':
+        if function_name == "send_message":
             # strip request_heartbeat
-            cleaned_function_args.pop('request_heartbeat', None)
+            cleaned_function_args.pop("request_heartbeat", None)
 
         # TODO more cleaning to fix errors LLM makes
         return cleaned_function_name, cleaned_function_args
@@ -129,18 +136,20 @@ class Airoboros21Wrapper(LLMChatCompletionWrapper):
         }
         """
         function_json_output = json.loads(raw_llm_output)
-        function_name = function_json_output['function']
-        function_parameters = function_json_output['params']
+        function_name = function_json_output["function"]
+        function_parameters = function_json_output["params"]
 
         if self.clean_func_args:
-            function_name, function_parameters = self.clean_function_args(function_name, function_parameters)
+            function_name, function_parameters = self.clean_function_args(
+                function_name, function_parameters
+            )
 
         message = {
-            'role': 'assistant',
-            'content': None,
-            'function_call': {
-                'name': function_name,
-                'arguments': json.dumps(function_parameters),
-            }
+            "role": "assistant",
+            "content": None,
+            "function_call": {
+                "name": function_name,
+                "arguments": json.dumps(function_parameters),
+            },
         }
         return message
diff --git a/memgpt/local_llm/llm_chat_completion_wrappers/wrapper_base.py b/memgpt/local_llm/llm_chat_completion_wrappers/wrapper_base.py
index d2e7584e..b1186c46 100644
--- a/memgpt/local_llm/llm_chat_completion_wrappers/wrapper_base.py
+++ b/memgpt/local_llm/llm_chat_completion_wrappers/wrapper_base.py
@@ -2,7 +2,6 @@ from abc import ABC, abstractmethod
 
 
 class LLMChatCompletionWrapper(ABC):
-
     @abstractmethod
     def chat_completion_to_prompt(self, messages, functions):
         """Go from ChatCompletion to a single prompt string"""
diff --git a/memgpt/local_llm/webui_settings.py b/memgpt/local_llm/webui_settings.py
index dc578084..2601f642 100644
--- a/memgpt/local_llm/webui_settings.py
+++ b/memgpt/local_llm/webui_settings.py
@@ -1,54 +1,54 @@
 DETERMINISTIC = {
-      'max_new_tokens': 250,
-      'do_sample': False,
-      'temperature': 0,
-      'top_p': 0,
-      'typical_p': 1,
-      'repetition_penalty': 1.18,
-      'repetition_penalty_range': 0,
-      'encoder_repetition_penalty': 1,
-      'top_k': 1,
-      'min_length': 0,
-      'no_repeat_ngram_size': 0,
-      'num_beams': 1,
-      'penalty_alpha': 0,
-      'length_penalty': 1,
-      'early_stopping': False,
-      'guidance_scale': 1,
-      'negative_prompt': '',
-      'seed': -1,
-      'add_bos_token': True,
-      'stopping_strings': [
-        '\nUSER:',
-        '\nASSISTANT:',
+    "max_new_tokens": 250,
+    "do_sample": False,
+    "temperature": 0,
+    "top_p": 0,
+    "typical_p": 1,
+    "repetition_penalty": 1.18,
+    "repetition_penalty_range": 0,
+    "encoder_repetition_penalty": 1,
+    "top_k": 1,
+    "min_length": 0,
+    "no_repeat_ngram_size": 0,
+    "num_beams": 1,
+    "penalty_alpha": 0,
+    "length_penalty": 1,
+    "early_stopping": False,
+    "guidance_scale": 1,
+    "negative_prompt": "",
+    "seed": -1,
+    "add_bos_token": True,
+    "stopping_strings": [
+        "\nUSER:",
+        "\nASSISTANT:",
         # '\n' +
         # '</s>',
         # '<|',
         # '\n#',
         # '\n\n\n',
-      ],
-      'truncation_length': 4096,
-      'ban_eos_token': False,
-      'skip_special_tokens': True,
-      'top_a': 0,
-      'tfs': 1,
-      'epsilon_cutoff': 0,
-      'eta_cutoff': 0,
-      'mirostat_mode': 2,
-      'mirostat_tau': 4,
-      'mirostat_eta': 0.1,
-      'use_mancer': False
-    }
+    ],
+    "truncation_length": 4096,
+    "ban_eos_token": False,
+    "skip_special_tokens": True,
+    "top_a": 0,
+    "tfs": 1,
+    "epsilon_cutoff": 0,
+    "eta_cutoff": 0,
+    "mirostat_mode": 2,
+    "mirostat_tau": 4,
+    "mirostat_eta": 0.1,
+    "use_mancer": False,
+}
 
 SIMPLE = {
-      'stopping_strings': [
-        '\nUSER:',
-        '\nASSISTANT:',
+    "stopping_strings": [
+        "\nUSER:",
+        "\nASSISTANT:",
         # '\n' +
         # '</s>',
         # '<|',
         # '\n#',
         # '\n\n\n',
-      ],
-      'truncation_length': 4096,
-}
\ No newline at end of file
+    ],
+    "truncation_length": 4096,
+}
diff --git a/memgpt/openai_tools.py b/memgpt/openai_tools.py
index 7729ae15..3d63d134 100644
--- a/memgpt/openai_tools.py
+++ b/memgpt/openai_tools.py
@@ -4,10 +4,12 @@ import os
 import time
 
 from .local_llm.chat_completion_proxy import get_chat_completion
-HOST = os.getenv('OPENAI_API_BASE')
-HOST_TYPE = os.getenv('BACKEND_TYPE')  # default None == ChatCompletion
+
+HOST = os.getenv("OPENAI_API_BASE")
+HOST_TYPE = os.getenv("BACKEND_TYPE")  # default None == ChatCompletion
 
 import openai
+
 if HOST is not None:
     openai.api_base = HOST
 
@@ -108,25 +110,24 @@ def aretry_with_exponential_backoff(
 
 @aretry_with_exponential_backoff
 async def acompletions_with_backoff(**kwargs):
-
     # Local model
     if HOST_TYPE is not None:
         return await get_chat_completion(**kwargs)
 
     # OpenAI / Azure model
     else:
-        azure_openai_deployment = os.getenv('AZURE_OPENAI_DEPLOYMENT')
+        azure_openai_deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT")
         if azure_openai_deployment is not None:
-            kwargs['deployment_id'] = azure_openai_deployment
+            kwargs["deployment_id"] = azure_openai_deployment
         return await openai.ChatCompletion.acreate(**kwargs)
 
 
 @aretry_with_exponential_backoff
 async def acreate_embedding_with_backoff(**kwargs):
     """Wrapper around Embedding.acreate w/ backoff"""
-    azure_openai_deployment = os.getenv('AZURE_OPENAI_DEPLOYMENT')
+    azure_openai_deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT")
     if azure_openai_deployment is not None:
-        kwargs['deployment_id'] = azure_openai_deployment
+        kwargs["deployment_id"] = azure_openai_deployment
     return await openai.Embedding.acreate(**kwargs)
 
 
@@ -134,6 +135,6 @@ async def async_get_embedding_with_backoff(text, model="text-embedding-ada-002")
     """To get text embeddings, import/call this function
     It specifies defaults + handles rate-limiting + is async"""
     text = text.replace("\n", " ")
-    response = await acreate_embedding_with_backoff(input = [text], model=model)
-    embedding = response['data'][0]['embedding']
-    return embedding
\ No newline at end of file
+    response = await acreate_embedding_with_backoff(input=[text], model=model)
+    embedding = response["data"][0]["embedding"]
+    return embedding

From cc352e7312080ab75d420763a3c96fac73776529 Mon Sep 17 00:00:00 2001
From: Vivian Fang <hi@vivi.sh>
Date: Mon, 23 Oct 2023 00:07:12 -0700
Subject: [PATCH 15/37] address comments, run black on interface.py

---
 config.py    |  35 +++++++++++--
 interface.py | 143 +++++++++++++++++++++++++++++++++------------------
 main.py      |  11 ++--
 3 files changed, 128 insertions(+), 61 deletions(-)

diff --git a/config.py b/config.py
index abe86b2b..14833fb4 100644
--- a/config.py
+++ b/config.py
@@ -66,7 +66,8 @@ class Config:
                 f"Would you like to recompute embeddings? Do this if your files have changed.\nFiles:{self.archival_storage_files}",
                 default=False,
             )
-        await self.configure_archival_storage(recompute_embeddings)
+        if self.archival_storage_files:
+            await self.configure_archival_storage(recompute_embeddings)
         return self
 
     @classmethod
@@ -120,7 +121,7 @@ class Config:
         print(self.memgpt_persona)
 
         self.human_persona = await questionary.select(
-            "Which persona would you like to use?",
+            "Which user would you like to use?",
             Config.get_user_personas(),
         ).ask_async()
 
@@ -223,14 +224,40 @@ class Config:
         if dir_path is None:
             dir_path = Config.personas_dir
         all_personas = Config.get_personas(dir_path)
-        return Config.get_persona_choices([p for p in all_personas], get_persona_text)
+        default_personas = [
+            "sam",
+            "sam_pov",
+            "memgpt_starter",
+            "memgpt_doc",
+            "sam_simple_pov_gpt35",
+        ]
+        custom_personas = list(set(all_personas) - set(default_personas))
+        return Config.get_persona_choices(
+            [p for p in custom_personas + default_personas], get_persona_text
+        ) + [
+            questionary.Separator(),
+            questionary.Choice(
+                f"📝 You can create your own personas by adding .txt files to {dir_path}.",
+                disabled=True,
+            ),
+        ]
 
     @staticmethod
     def get_user_personas(dir_path=None):
         if dir_path is None:
             dir_path = Config.humans_dir
         all_personas = Config.get_personas(dir_path)
-        return Config.get_persona_choices([p for p in all_personas], get_human_text)
+        default_personas = ["basic", "cs_phd"]
+        custom_personas = list(set(all_personas) - set(default_personas))
+        return Config.get_persona_choices(
+            [p for p in custom_personas + default_personas], get_human_text
+        ) + [
+            questionary.Separator(),
+            questionary.Choice(
+                f"📝 You can create your own human profiles by adding .txt files to {dir_path}.",
+                disabled=True,
+            ),
+        ]
 
     @staticmethod
     def get_personas(dir_path) -> List[str]:
diff --git a/interface.py b/interface.py
index af5cfc46..0e66af08 100644
--- a/interface.py
+++ b/interface.py
@@ -10,131 +10,172 @@ init(autoreset=True)
 # DEBUG = True  # puts full message outputs in the terminal
 DEBUG = False  # only dumps important messages in the terminal
 
+
 def important_message(msg):
-    print(f'{Fore.MAGENTA}{Style.BRIGHT}{msg}{Style.RESET_ALL}')
+    print(f"{Fore.MAGENTA}{Style.BRIGHT}{msg}{Style.RESET_ALL}")
+
+
+def warning_message(msg):
+    print(f"{Fore.RED}{Style.BRIGHT}{msg}{Style.RESET_ALL}")
+
 
 async def internal_monologue(msg):
     # ANSI escape code for italic is '\x1B[3m'
-    print(f'\x1B[3m{Fore.LIGHTBLACK_EX}💭 {msg}{Style.RESET_ALL}')
+    print(f"\x1B[3m{Fore.LIGHTBLACK_EX}💭 {msg}{Style.RESET_ALL}")
+
 
 async def assistant_message(msg):
-    print(f'{Fore.YELLOW}{Style.BRIGHT}🤖 {Fore.YELLOW}{msg}{Style.RESET_ALL}')
+    print(f"{Fore.YELLOW}{Style.BRIGHT}🤖 {Fore.YELLOW}{msg}{Style.RESET_ALL}")
+
 
 async def memory_message(msg):
-    print(f'{Fore.LIGHTMAGENTA_EX}{Style.BRIGHT}🧠 {Fore.LIGHTMAGENTA_EX}{msg}{Style.RESET_ALL}')
+    print(
+        f"{Fore.LIGHTMAGENTA_EX}{Style.BRIGHT}🧠 {Fore.LIGHTMAGENTA_EX}{msg}{Style.RESET_ALL}"
+    )
+
 
 async def system_message(msg):
-    printd(f'{Fore.MAGENTA}{Style.BRIGHT}🖥️ [system] {Fore.MAGENTA}{msg}{Style.RESET_ALL}')
+    printd(
+        f"{Fore.MAGENTA}{Style.BRIGHT}🖥️ [system] {Fore.MAGENTA}{msg}{Style.RESET_ALL}"
+    )
+
 
 async def user_message(msg, raw=False):
     if isinstance(msg, str):
         if raw:
-            printd(f'{Fore.GREEN}{Style.BRIGHT}🧑 {Fore.GREEN}{msg}{Style.RESET_ALL}')
+            printd(f"{Fore.GREEN}{Style.BRIGHT}🧑 {Fore.GREEN}{msg}{Style.RESET_ALL}")
             return
         else:
             try:
                 msg_json = json.loads(msg)
             except:
                 printd(f"Warning: failed to parse user message into json")
-                printd(f'{Fore.GREEN}{Style.BRIGHT}🧑 {Fore.GREEN}{msg}{Style.RESET_ALL}')
+                printd(
+                    f"{Fore.GREEN}{Style.BRIGHT}🧑 {Fore.GREEN}{msg}{Style.RESET_ALL}"
+                )
                 return
 
-    if msg_json['type'] == 'user_message':
-        msg_json.pop('type')
-        printd(f'{Fore.GREEN}{Style.BRIGHT}🧑 {Fore.GREEN}{msg_json}{Style.RESET_ALL}')
-    elif msg_json['type'] == 'heartbeat':
+    if msg_json["type"] == "user_message":
+        msg_json.pop("type")
+        printd(f"{Fore.GREEN}{Style.BRIGHT}🧑 {Fore.GREEN}{msg_json}{Style.RESET_ALL}")
+    elif msg_json["type"] == "heartbeat":
         if DEBUG:
-            msg_json.pop('type')
-            printd(f'{Fore.GREEN}{Style.BRIGHT}💓 {Fore.GREEN}{msg_json}{Style.RESET_ALL}')
-    elif msg_json['type'] == 'system_message':
-        msg_json.pop('type')
-        printd(f'{Fore.GREEN}{Style.BRIGHT}🖥️ {Fore.GREEN}{msg_json}{Style.RESET_ALL}')
+            msg_json.pop("type")
+            printd(
+                f"{Fore.GREEN}{Style.BRIGHT}💓 {Fore.GREEN}{msg_json}{Style.RESET_ALL}"
+            )
+    elif msg_json["type"] == "system_message":
+        msg_json.pop("type")
+        printd(f"{Fore.GREEN}{Style.BRIGHT}🖥️ {Fore.GREEN}{msg_json}{Style.RESET_ALL}")
     else:
-        printd(f'{Fore.GREEN}{Style.BRIGHT}🧑 {Fore.GREEN}{msg_json}{Style.RESET_ALL}')
+        printd(f"{Fore.GREEN}{Style.BRIGHT}🧑 {Fore.GREEN}{msg_json}{Style.RESET_ALL}")
+
 
 async def function_message(msg):
-
     if isinstance(msg, dict):
-        printd(f'{Fore.RED}{Style.BRIGHT}⚡ [function] {Fore.RED}{msg}{Style.RESET_ALL}')
+        printd(f"{Fore.RED}{Style.BRIGHT}⚡ [function] {Fore.RED}{msg}{Style.RESET_ALL}")
         return
 
-    if msg.startswith('Success: '):
-        printd(f'{Fore.RED}{Style.BRIGHT}⚡🟢 [function] {Fore.RED}{msg}{Style.RESET_ALL}')
-    elif msg.startswith('Error: '):
-        printd(f'{Fore.RED}{Style.BRIGHT}⚡🔴 [function] {Fore.RED}{msg}{Style.RESET_ALL}')
-    elif msg.startswith('Running '):
+    if msg.startswith("Success: "):
+        printd(
+            f"{Fore.RED}{Style.BRIGHT}⚡🟢 [function] {Fore.RED}{msg}{Style.RESET_ALL}"
+        )
+    elif msg.startswith("Error: "):
+        printd(
+            f"{Fore.RED}{Style.BRIGHT}⚡🔴 [function] {Fore.RED}{msg}{Style.RESET_ALL}"
+        )
+    elif msg.startswith("Running "):
         if DEBUG:
-            printd(f'{Fore.RED}{Style.BRIGHT}⚡ [function] {Fore.RED}{msg}{Style.RESET_ALL}')
+            printd(
+                f"{Fore.RED}{Style.BRIGHT}⚡ [function] {Fore.RED}{msg}{Style.RESET_ALL}"
+            )
         else:
-            if 'memory' in msg:
-                match = re.search(r'Running (\w+)\((.*)\)', msg)
+            if "memory" in msg:
+                match = re.search(r"Running (\w+)\((.*)\)", msg)
                 if match:
                     function_name = match.group(1)
                     function_args = match.group(2)
-                    print(f'{Fore.RED}{Style.BRIGHT}⚡🧠 [function] {Fore.RED}updating memory with {function_name}{Style.RESET_ALL}:')
+                    print(
+                        f"{Fore.RED}{Style.BRIGHT}⚡🧠 [function] {Fore.RED}updating memory with {function_name}{Style.RESET_ALL}:"
+                    )
                     try:
                         msg_dict = eval(function_args)
-                        if function_name == 'archival_memory_search':
-                            print(f'{Fore.RED}\tquery: {msg_dict["query"]}, page: {msg_dict["page"]}')
+                        if function_name == "archival_memory_search":
+                            print(
+                                f'{Fore.RED}\tquery: {msg_dict["query"]}, page: {msg_dict["page"]}'
+                            )
                         else:
-                            print(f'{Fore.RED}{Style.BRIGHT}\t{Fore.RED} {msg_dict["old_content"]}\n\t{Fore.GREEN}→ {msg_dict["new_content"]}')
+                            print(
+                                f'{Fore.RED}{Style.BRIGHT}\t{Fore.RED} {msg_dict["old_content"]}\n\t{Fore.GREEN}→ {msg_dict["new_content"]}'
+                            )
                     except Exception as e:
                         printd(e)
                         printd(msg_dict)
                         pass
                 else:
                     printd(f"Warning: did not recognize function message")
-                    printd(f'{Fore.RED}{Style.BRIGHT}⚡ [function] {Fore.RED}{msg}{Style.RESET_ALL}')
-            elif 'send_message' in msg:
+                    printd(
+                        f"{Fore.RED}{Style.BRIGHT}⚡ [function] {Fore.RED}{msg}{Style.RESET_ALL}"
+                    )
+            elif "send_message" in msg:
                 # ignore in debug mode
                 pass
             else:
-                printd(f'{Fore.RED}{Style.BRIGHT}⚡ [function] {Fore.RED}{msg}{Style.RESET_ALL}')
+                printd(
+                    f"{Fore.RED}{Style.BRIGHT}⚡ [function] {Fore.RED}{msg}{Style.RESET_ALL}"
+                )
     else:
         try:
             msg_dict = json.loads(msg)
             if "status" in msg_dict and msg_dict["status"] == "OK":
-                printd(f'{Fore.GREEN}{Style.BRIGHT}⚡ [function] {Fore.GREEN}{msg}{Style.RESET_ALL}')
+                printd(
+                    f"{Fore.GREEN}{Style.BRIGHT}⚡ [function] {Fore.GREEN}{msg}{Style.RESET_ALL}"
+                )
         except Exception:
             printd(f"Warning: did not recognize function message {type(msg)} {msg}")
-            printd(f'{Fore.RED}{Style.BRIGHT}⚡ [function] {Fore.RED}{msg}{Style.RESET_ALL}')
+            printd(
+                f"{Fore.RED}{Style.BRIGHT}⚡ [function] {Fore.RED}{msg}{Style.RESET_ALL}"
+            )
+
 
 async def print_messages(message_sequence):
     for msg in message_sequence:
-        role = msg['role']
-        content = msg['content']
+        role = msg["role"]
+        content = msg["content"]
 
-        if role == 'system':
+        if role == "system":
             await system_message(content)
-        elif role == 'assistant':
+        elif role == "assistant":
             # Differentiate between internal monologue, function calls, and messages
-            if msg.get('function_call'):
+            if msg.get("function_call"):
                 if content is not None:
                     await internal_monologue(content)
-                await function_message(msg['function_call'])
+                await function_message(msg["function_call"])
                 # assistant_message(content)
             else:
                 await internal_monologue(content)
-        elif role == 'user':
+        elif role == "user":
             await user_message(content)
-        elif role == 'function':
+        elif role == "function":
             await function_message(content)
         else:
-            print(f'Unknown role: {content}')
+            print(f"Unknown role: {content}")
+
 
 async def print_messages_simple(message_sequence):
     for msg in message_sequence:
-        role = msg['role']
-        content = msg['content']
+        role = msg["role"]
+        content = msg["content"]
 
-        if role == 'system':
+        if role == "system":
             await system_message(content)
-        elif role == 'assistant':
+        elif role == "assistant":
             await assistant_message(content)
-        elif role == 'user':
+        elif role == "user":
             await user_message(content, raw=True)
         else:
-            print(f'Unknown role: {content}')
+            print(f"Unknown role: {content}")
+
 
 async def print_messages_raw(message_sequence):
     for msg in message_sequence:
diff --git a/main.py b/main.py
index ade7aa9b..c0a92941 100644
--- a/main.py
+++ b/main.py
@@ -235,7 +235,11 @@ async def main():
     else:
         cfg = await Config.config_init()
 
-    print("Running... [exit by typing '/exit']")
+    interface.important_message("Running... [exit by typing '/exit']")
+    if cfg.model != constants.DEFAULT_MEMGPT_MODEL:
+        interface.warning_message(
+            f"⛔️ Warning - you are running MemGPT with {cfg.model}, which is not officially supported (yet). Expect bugs!"
+        )
 
     # Azure OpenAI support
     if FLAGS.use_azure_openai:
@@ -269,11 +273,6 @@ async def main():
             )
             return
 
-    if cfg.model != constants.DEFAULT_MEMGPT_MODEL:
-        interface.important_message(
-            f"Warning - you are running MemGPT with {cfg.model}, which is not officially supported (yet). Expect bugs!"
-        )
-
     if cfg.archival_storage_index:
         persistence_manager = InMemoryStateManagerWithFaiss(
             cfg.index, cfg.archival_database

From 30609cbac7d56a5721cddbaec94f98ea67e156aa Mon Sep 17 00:00:00 2001
From: Vivian Fang <hi@vivi.sh>
Date: Mon, 23 Oct 2023 00:16:19 -0700
Subject: [PATCH 16/37] warn on computing embeddings with non-openai endpoint

---
 config.py | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/config.py b/config.py
index 14833fb4..5ebebd24 100644
--- a/config.py
+++ b/config.py
@@ -1,4 +1,3 @@
-import asyncio
 import glob
 import json
 import os
@@ -8,10 +7,7 @@ import interface
 
 import questionary
 
-from colorama import Fore, Style, init
-from rich.console import Console
-
-console = Console()
+from colorama import Fore, Style
 
 from typing import List, Type
 
@@ -40,10 +36,11 @@ class Config:
         self.compute_embeddings = False
         self.agent_save_file = None
         self.persistence_manager_save_file = None
+        self.host = os.getenv("OPENAI_API_BASE")
 
     @classmethod
     async def legacy_flags_init(
-        cls: Type["config"],
+        cls: Type["Config"],
         model: str,
         memgpt_persona: str,
         human_persona: str,
@@ -162,11 +159,16 @@ class Config:
 
     async def configure_archival_storage(self, recompute_embeddings):
         if recompute_embeddings:
-            self.archival_storage_index = (
-                await utils.prepare_archival_index_from_files_compute_embeddings(
-                    self.archival_storage_files
+            if self.host:
+                interface.warning_message(
+                    "⛔️ Embeddings on a non-OpenAI endpoint are not yet supported, falling back to substring matching search."
+                )
+            else:
+                self.archival_storage_index = (
+                    await utils.prepare_archival_index_from_files_compute_embeddings(
+                        self.archival_storage_files
+                    )
                 )
-            )
         if self.compute_embeddings and self.archival_storage_index:
             self.index, self.archival_database = utils.prepare_archival_index(
                 self.archival_storage_index
@@ -188,6 +190,7 @@ class Config:
             "load_type": self.load_type,
             "agent_save_file": self.agent_save_file,
             "persistence_manager_save_file": self.persistence_manager_save_file,
+            "host": self.host,
         }
 
     def load_config(self, config_file):
@@ -203,6 +206,7 @@ class Config:
         self.load_type = cfg["load_type"]
         self.agent_save_file = cfg["agent_save_file"]
         self.persistence_manager_save_file = cfg["persistence_manager_save_file"]
+        self.host = cfg["host"]
 
     def write_config(self, configs_dir=None):
         if configs_dir is None:

From 40a93abc851d8e19ba918aa3c8de319fb4c9b787 Mon Sep 17 00:00:00 2001
From: Vivian Fang <hi@vivi.sh>
Date: Mon, 23 Oct 2023 00:20:20 -0700
Subject: [PATCH 17/37] fix github workflow

---
 .github/workflows/main.yml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 89bd5422..13fea8e5 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -23,5 +23,6 @@ jobs:
         python -m pip install --upgrade pip
         pip install -r requirements.txt
 
-    - name: Run main.py
-      run: python main.py
+    - name: Run main.py with input
+      run: |
+        echo -e "\n\n\nn" | python main.py

From faaa9a04fa80c1b686b5a3afc97fa607a41af9c1 Mon Sep 17 00:00:00 2001
From: Charles Packer <packercharles@gmail.com>
Date: Mon, 23 Oct 2023 00:41:10 -0700
Subject: [PATCH 18/37] refactored + updated the airo wrapper a bit

---
 memgpt/local_llm/chat_completion_proxy.py     | 104 +++++++-----------
 .../llm_chat_completion_wrappers/airoboros.py |  63 +++++++++--
 memgpt/local_llm/utils.py                     |   8 ++
 memgpt/local_llm/webui/api.py                 |  33 ++++++
 memgpt/local_llm/webui/settings.py            |  12 ++
 memgpt/local_llm/webui_settings.py            |  54 ---------
 6 files changed, 150 insertions(+), 124 deletions(-)
 create mode 100644 memgpt/local_llm/utils.py
 create mode 100644 memgpt/local_llm/webui/api.py
 create mode 100644 memgpt/local_llm/webui/settings.py
 delete mode 100644 memgpt/local_llm/webui_settings.py

diff --git a/memgpt/local_llm/chat_completion_proxy.py b/memgpt/local_llm/chat_completion_proxy.py
index ea5b904f..ae983339 100644
--- a/memgpt/local_llm/chat_completion_proxy.py
+++ b/memgpt/local_llm/chat_completion_proxy.py
@@ -1,30 +1,16 @@
-"""MemGPT sends a ChatCompletion request
-
-Under the hood, we use the functions argument to turn
-"""
-
-
 """Key idea: create drop-in replacement for agent's ChatCompletion call that runs on an OpenLLM backend"""
 
 import os
-import json
 import requests
+import json
 
-from .webui_settings import DETERMINISTIC, SIMPLE
+from .webui.api import get_webui_completion
 from .llm_chat_completion_wrappers import airoboros
+from .utils import DotDict
 
 HOST = os.getenv("OPENAI_API_BASE")
 HOST_TYPE = os.getenv("BACKEND_TYPE")  # default None == ChatCompletion
-
-
-class DotDict(dict):
-    """Allow dot access on properties similar to OpenAI response object"""
-
-    def __getattr__(self, attr):
-        return self.get(attr)
-
-    def __setattr__(self, key, value):
-        self[key] = value
+DEBUG = True
 
 
 async def get_chat_completion(
@@ -36,60 +22,52 @@ async def get_chat_completion(
     if function_call != "auto":
         raise ValueError(f"function_call == {function_call} not supported (auto only)")
 
-    if True or model == "airoboros_v2.1":
+    if model == "airoboros_v2.1":
+        llm_wrapper = airoboros.Airoboros21Wrapper()
+    else:
+        # Warn the user that we're using the fallback
+        print(
+            f"Warning: could not find an LLM wrapper for {model}, using the airoboros wrapper"
+        )
         llm_wrapper = airoboros.Airoboros21Wrapper()
 
     # First step: turn the message sequence into a prompt that the model expects
     prompt = llm_wrapper.chat_completion_to_prompt(messages, functions)
-    # print(prompt)
-
-    if HOST_TYPE != "webui":
-        raise ValueError(HOST_TYPE)
-
-    request = SIMPLE
-    request["prompt"] = prompt
+    if DEBUG:
+        print(prompt)
 
     try:
-        URI = f"{HOST}/v1/generate"
-        response = requests.post(URI, json=request)
-        if response.status_code == 200:
-            # result = response.json()['results'][0]['history']
-            result = response.json()
-            # print(f"raw API response: {result}")
-            result = result["results"][0]["text"]
-            print(f"json API response.text: {result}")
+        if HOST_TYPE == "webui":
+            result = get_webui_completion(prompt)
         else:
-            raise Exception(f"API call got non-200 response code")
+            raise ValueError(HOST_TYPE)
+    except requests.exceptions.ConnectionError as e:
+        raise ValueError(f"Was unable to connect to host {HOST}")
 
-        # cleaned_result, chatcompletion_result = parse_st_json_output(result)
-        chat_completion_result = llm_wrapper.output_to_chat_completion_response(result)
+    chat_completion_result = llm_wrapper.output_to_chat_completion_response(result)
+    if DEBUG:
         print(json.dumps(chat_completion_result, indent=2))
-        # print(cleaned_result)
 
-        # unpack with response.choices[0].message.content
-        response = DotDict(
-            {
-                "model": None,
-                "choices": [
-                    DotDict(
-                        {
-                            "message": DotDict(chat_completion_result),
-                            "finish_reason": "stop",  # TODO vary based on webui response
-                        }
-                    )
-                ],
-                "usage": DotDict(
+    # unpack with response.choices[0].message.content
+    response = DotDict(
+        {
+            "model": None,
+            "choices": [
+                DotDict(
                     {
-                        # TODO fix
-                        "prompt_tokens": 0,
-                        "completion_tokens": 0,
-                        "total_tokens": 0,
+                        "message": DotDict(chat_completion_result),
+                        "finish_reason": "stop",  # TODO vary based on backend response
                     }
-                ),
-            }
-        )
-        return response
-
-    except Exception as e:
-        # TODO
-        raise e
+                )
+            ],
+            "usage": DotDict(
+                {
+                    # TODO fix, actually use real info
+                    "prompt_tokens": 0,
+                    "completion_tokens": 0,
+                    "total_tokens": 0,
+                }
+            ),
+        }
+    )
+    return response
diff --git a/memgpt/local_llm/llm_chat_completion_wrappers/airoboros.py b/memgpt/local_llm/llm_chat_completion_wrappers/airoboros.py
index 6b3a117f..98d3625e 100644
--- a/memgpt/local_llm/llm_chat_completion_wrappers/airoboros.py
+++ b/memgpt/local_llm/llm_chat_completion_wrappers/airoboros.py
@@ -12,12 +12,16 @@ class Airoboros21Wrapper(LLMChatCompletionWrapper):
     def __init__(
         self,
         simplify_json_content=True,
-        include_assistant_prefix=True,
         clean_function_args=True,
+        include_assistant_prefix=True,
+        include_opening_brace_in_prefix=True,
+        include_section_separators=True,
     ):
         self.simplify_json_content = simplify_json_content
-        self.include_assistant_prefix = include_assistant_prefix
         self.clean_func_args = clean_function_args
+        self.include_assistant_prefix = include_assistant_prefix
+        self.include_opening_brance_in_prefix = include_opening_brace_in_prefix
+        self.include_section_separators = include_section_separators
 
     def chat_completion_to_prompt(self, messages, functions):
         """Example for airoboros: https://huggingface.co/jondurbin/airoboros-l2-70b-2.1#prompt-format
@@ -77,11 +81,41 @@ class Airoboros21Wrapper(LLMChatCompletionWrapper):
             # TODO we're ignoring schema['parameters']['required']
             return func_str
 
-        prompt += f"\nPlease select the most suitable function and parameters from the list of available functions below, based on the user's input. Provide your response in JSON format."
+        # prompt += f"\nPlease select the most suitable function and parameters from the list of available functions below, based on the user's input. Provide your response in JSON format."
+        prompt += f"\nPlease select the most suitable function and parameters from the list of available functions below, based on the ongoing conversation. Provide your response in JSON format."
         prompt += f"\nAvailable functions:"
         for function_dict in functions:
             prompt += f"\n{create_function_description(function_dict)}"
 
+        def create_function_call(function_call):
+            """Go from ChatCompletion to Airoboros style function trace (in prompt)
+
+            ChatCompletion data (inside message['function_call']):
+                "function_call": {
+                    "name": ...
+                    "arguments": {
+                        "arg1": val1,
+                        ...
+                    }
+
+            Airoboros output:
+                {
+                  "function": "send_message",
+                  "params": {
+                    "message": "Hello there! I am Sam, an AI developed by Liminal Corp. How can I assist you today?"
+                  }
+                }
+            """
+            airo_func_call = {
+                "function": function_call["name"],
+                "params": json.loads(function_call["arguments"]),
+            }
+            return json.dumps(airo_func_call, indent=2)
+
+        # Add a sep for the conversation
+        if self.include_section_separators:
+            prompt += "\n### INPUT"
+
         # Last are the user/assistant messages
         for message in messages[1:]:
             assert message["role"] in ["user", "assistant", "function"], message
@@ -96,16 +130,25 @@ class Airoboros21Wrapper(LLMChatCompletionWrapper):
                         prompt += f"\nUSER: {message['content']}"
             elif message["role"] == "assistant":
                 prompt += f"\nASSISTANT: {message['content']}"
+                # need to add the function call if there was one
+                if message["function_call"]:
+                    prompt += f"\n{create_function_call(message['function_call'])}"
             elif message["role"] == "function":
-                # TODO
-                continue
+                # TODO find a good way to add this
                 # prompt += f"\nASSISTANT: (function return) {message['content']}"
+                prompt += f"\nFUNCTION RETURN: {message['content']}"
+                continue
             else:
                 raise ValueError(message)
 
+        # Add a sep for the response
+        if self.include_section_separators:
+            prompt += "\n### RESPONSE"
+
         if self.include_assistant_prefix:
-            # prompt += f"\nPlease select the most suitable function and parameters from the list of available functions below, based on the user's input. Provide your response in JSON format."
             prompt += f"\nASSISTANT:"
+            if self.include_opening_brance_in_prefix:
+                prompt += "\n{"
 
         return prompt
 
@@ -135,7 +178,13 @@ class Airoboros21Wrapper(LLMChatCompletionWrapper):
             }
         }
         """
-        function_json_output = json.loads(raw_llm_output)
+        if self.include_opening_brance_in_prefix and raw_llm_output[0] != "{":
+            raw_llm_output = "{" + raw_llm_output
+
+        try:
+            function_json_output = json.loads(raw_llm_output)
+        except Exception as e:
+            raise Exception(f"Failed to decode JSON from LLM output:\n{raw_llm_output}")
         function_name = function_json_output["function"]
         function_parameters = function_json_output["params"]
 
diff --git a/memgpt/local_llm/utils.py b/memgpt/local_llm/utils.py
new file mode 100644
index 00000000..42a0ce27
--- /dev/null
+++ b/memgpt/local_llm/utils.py
@@ -0,0 +1,8 @@
+class DotDict(dict):
+    """Allow dot access on properties similar to OpenAI response object"""
+
+    def __getattr__(self, attr):
+        return self.get(attr)
+
+    def __setattr__(self, key, value):
+        self[key] = value
diff --git a/memgpt/local_llm/webui/api.py b/memgpt/local_llm/webui/api.py
new file mode 100644
index 00000000..3cff08e0
--- /dev/null
+++ b/memgpt/local_llm/webui/api.py
@@ -0,0 +1,33 @@
+import os
+import requests
+
+from .settings import SIMPLE
+
+HOST = os.getenv("OPENAI_API_BASE")
+HOST_TYPE = os.getenv("BACKEND_TYPE")  # default None == ChatCompletion
+WEBUI_API_SUFFIX = "/v1/generate"
+DEBUG = True
+
+
+def get_webui_completion(prompt, settings=SIMPLE):
+    """See https://github.com/oobabooga/text-generation-webui for instructions on how to run the LLM web server"""
+
+    # Settings for the generation, includes the prompt + stop tokens, max length, etc
+    request = settings
+    request["prompt"] = prompt
+
+    try:
+        URI = f"{HOST}{WEBUI_API_SUFFIX}"
+        response = requests.post(URI, json=request)
+        if response.status_code == 200:
+            result = response.json()
+            result = result["results"][0]["text"]
+            if DEBUG:
+                print(f"json API response.text: {result}")
+        else:
+            raise Exception(f"API call got non-200 response code")
+    except:
+        # TODO handle gracefully
+        raise
+
+    return result
diff --git a/memgpt/local_llm/webui/settings.py b/memgpt/local_llm/webui/settings.py
new file mode 100644
index 00000000..2e9ecbce
--- /dev/null
+++ b/memgpt/local_llm/webui/settings.py
@@ -0,0 +1,12 @@
+SIMPLE = {
+    "stopping_strings": [
+        "\nUSER:",
+        "\nASSISTANT:",
+        # '\n' +
+        # '</s>',
+        # '<|',
+        # '\n#',
+        # '\n\n\n',
+    ],
+    "truncation_length": 4096,  # assuming llama2 models
+}
diff --git a/memgpt/local_llm/webui_settings.py b/memgpt/local_llm/webui_settings.py
deleted file mode 100644
index 2601f642..00000000
--- a/memgpt/local_llm/webui_settings.py
+++ /dev/null
@@ -1,54 +0,0 @@
-DETERMINISTIC = {
-    "max_new_tokens": 250,
-    "do_sample": False,
-    "temperature": 0,
-    "top_p": 0,
-    "typical_p": 1,
-    "repetition_penalty": 1.18,
-    "repetition_penalty_range": 0,
-    "encoder_repetition_penalty": 1,
-    "top_k": 1,
-    "min_length": 0,
-    "no_repeat_ngram_size": 0,
-    "num_beams": 1,
-    "penalty_alpha": 0,
-    "length_penalty": 1,
-    "early_stopping": False,
-    "guidance_scale": 1,
-    "negative_prompt": "",
-    "seed": -1,
-    "add_bos_token": True,
-    "stopping_strings": [
-        "\nUSER:",
-        "\nASSISTANT:",
-        # '\n' +
-        # '</s>',
-        # '<|',
-        # '\n#',
-        # '\n\n\n',
-    ],
-    "truncation_length": 4096,
-    "ban_eos_token": False,
-    "skip_special_tokens": True,
-    "top_a": 0,
-    "tfs": 1,
-    "epsilon_cutoff": 0,
-    "eta_cutoff": 0,
-    "mirostat_mode": 2,
-    "mirostat_tau": 4,
-    "mirostat_eta": 0.1,
-    "use_mancer": False,
-}
-
-SIMPLE = {
-    "stopping_strings": [
-        "\nUSER:",
-        "\nASSISTANT:",
-        # '\n' +
-        # '</s>',
-        # '<|',
-        # '\n#',
-        # '\n\n\n',
-    ],
-    "truncation_length": 4096,
-}

From 3d2b4c74891c078461e38cf8fa099a9015c70b90 Mon Sep 17 00:00:00 2001
From: Charles Packer <packercharles@gmail.com>
Date: Mon, 23 Oct 2023 00:43:47 -0700
Subject: [PATCH 19/37] default to webui if BACKEND_TYPE is not set

---
 memgpt/local_llm/chat_completion_proxy.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/memgpt/local_llm/chat_completion_proxy.py b/memgpt/local_llm/chat_completion_proxy.py
index ae983339..f4fe7b81 100644
--- a/memgpt/local_llm/chat_completion_proxy.py
+++ b/memgpt/local_llm/chat_completion_proxy.py
@@ -40,7 +40,8 @@ async def get_chat_completion(
         if HOST_TYPE == "webui":
             result = get_webui_completion(prompt)
         else:
-            raise ValueError(HOST_TYPE)
+            print(f"Warning: HOST_TYPE was not set, defaulting to webui")
+            result = get_webui_completion(prompt)
     except requests.exceptions.ConnectionError as e:
         raise ValueError(f"Was unable to connect to host {HOST}")
 

From a49731d71439bf63b3347396649fe71bdf7b0c6d Mon Sep 17 00:00:00 2001
From: Charles Packer <packercharles@gmail.com>
Date: Mon, 23 Oct 2023 00:44:43 -0700
Subject: [PATCH 20/37] typo

---
 memgpt/local_llm/chat_completion_proxy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/memgpt/local_llm/chat_completion_proxy.py b/memgpt/local_llm/chat_completion_proxy.py
index f4fe7b81..a5290717 100644
--- a/memgpt/local_llm/chat_completion_proxy.py
+++ b/memgpt/local_llm/chat_completion_proxy.py
@@ -40,7 +40,7 @@ async def get_chat_completion(
         if HOST_TYPE == "webui":
             result = get_webui_completion(prompt)
         else:
-            print(f"Warning: HOST_TYPE was not set, defaulting to webui")
+            print(f"Warning: BACKEND_TYPE was not set, defaulting to webui")
             result = get_webui_completion(prompt)
     except requests.exceptions.ConnectionError as e:
         raise ValueError(f"Was unable to connect to host {HOST}")

From ed52ea6aafe4d16d810268edc69e8e3b7cc2a8ad Mon Sep 17 00:00:00 2001
From: Charles Packer <packercharles@gmail.com>
Date: Mon, 23 Oct 2023 00:56:02 -0700
Subject: [PATCH 21/37] Update README.md

---
 memgpt/local_llm/README.md | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/memgpt/local_llm/README.md b/memgpt/local_llm/README.md
index 69165305..7f54b9a2 100644
--- a/memgpt/local_llm/README.md
+++ b/memgpt/local_llm/README.md
@@ -1,19 +1,14 @@
-## How to connect MemGPT to non-OpenAI LLMs
+⁉️ Need help configuring local LLMs with MemGPT? Ask for help on [our Discord](https://discord.gg/9GEQrxmVyE) or [post on the GitHub discussion](https://github.com/cpacker/MemGPT/discussions/67).
 
-**If you have a hosted ChatCompletion-compatible endpoint that works with function calling**:
-  - simply set `OPENAI_API_BASE` to the IP+port of your endpoint:
+👀 If you have a hosted ChatCompletion-compatible endpoint that works with function calling, you can simply set `OPENAI_API_BASE` (`export OPENAI_API_BASE=...`) to the IP+port of your endpoint. **As of 10/22/2023, most ChatCompletion endpoints do *NOT* support function calls, so if you want to play with MemGPT and open models, you probably need to follow the instructions below.**
 
-```sh
-export OPENAI_API_BASE=...
-```
+🙋 Our examples assume that you're using [oobabooga web UI](https://github.com/oobabooga/text-generation-webui#starting-the-web-ui) to put your LLMs behind a web server. If you need help setting this up, check the instructions [here](https://github.com/oobabooga/text-generation-webui#starting-the-web-ui). More LLM web server support to come soon (tell us what you use and we'll add it)!
 
-For this to work, the endpoint **MUST** support function calls.
+---
 
-**As of 10/22/2023, most ChatCompletion endpoints do *NOT* support function calls, so if you want to play with MemGPT and open models, you probably need to follow the instructions below.**
+# How to connect MemGPT to non-OpenAI LLMs
 
-## Integrating a function-call finetuned LLM with MemGPT
-
-**If you have a hosted local model that is function-call finetuned**:
+**If you have an LLM that is function-call finetuned**:
   - Implement a wrapper class for that model
     - The wrapper class needs to implement two functions:
       - One to go from ChatCompletion messages/functions schema to a prompt string
@@ -34,9 +29,9 @@ class LLMChatCompletionWrapper(ABC):
         pass
 ```
 
-## Example with Airoboros LLM
+## Example with [Airoboros](https://huggingface.co/jondurbin/airoboros-l2-70b-2.1) (llama2 finetune)
 
-To help you get started, we've implemented an example wrapper class for a popular llama2 model finetuned on function calling (airoboros). We want MemGPT to run well on open models as much as you do, so we'll be actively updating this page with more examples. Additionally, we welcome contributions from the community! If you find an open LLM that works well with MemGPT, please open a PR with a model wrapper and we'll merge it ASAP.
+To help you get started, we've implemented an example wrapper class for a popular llama2 model **finetuned on function calling** (airoboros). We want MemGPT to run well on open models as much as you do, so we'll be actively updating this page with more examples. Additionally, we welcome contributions from the community! If you find an open LLM that works well with MemGPT, please open a PR with a model wrapper and we'll merge it ASAP.
 
 ```python
 class Airoboros21Wrapper(LLMChatCompletionWrapper):
@@ -77,8 +72,8 @@ In the future, more open LLMs and LLM servers (that can host OpenAI-compatable C
 
 2. Partly because of how complex it is to support function calling, most (all?) of the community projects that do OpenAI ChatCompletion endpoints for arbitrary open LLMs do not support function calling, because if they did, they would need to write model-specific parsing code for each one.
 
-## How can you run MemGPT with open LLMs that support function calling?
+## What is this all this extra code for?
 
-Because of the poor state of function calling support in existing ChatCompletion API serving code, we instead provide a light wrapper on top of ChatCompletion that uses a parser specific to Airoboros. We hope that this example code will help the community add additional compatability of MemGPT with more function-calling LLMs - we will also add more model support as we test more models and find those that work well enough to run MemGPT's function set.
+Because of the poor state of function calling support in existing ChatCompletion API serving code, we instead provide a light wrapper on top of ChatCompletion that add parsers to handle function calling support. These parsers need to be specific to the model you're using (or at least specific to the way it was trained on function calling). We hope that our example code will help the community add additional compatability of MemGPT with more function-calling LLMs - we will also add more model support as we test more models and find those that work well enough to run MemGPT's function set.
 
-To run the example of MemGPT with Airoboros, you'll need to host the model with some open LLM hosting code, for example Oobagooba (see here). Then, all you need to do is point MemGPT to this API endpoint. Now, instead of calling ChatCompletion on OpenAI's API, MemGPT will use it's own ChatCompletion wrapper that parses the system, messages, and function arguments into a format that Airoboros has been finetuned on, and once Airoboros generates a string output, MemGPT will parse the response to extract a potential function call (knowing what we know about Airoboros expected function call output).
+To run the example of MemGPT with Airoboros, you'll need to host the model behind some LLM web server (for example [webui](https://github.com/oobabooga/text-generation-webui#starting-the-web-ui)). Then, all you need to do is point MemGPT to this API endpoint by setting `OPENAI_API_BASE` and `BACKEND_TYPE`. Now, instead of calling ChatCompletion on OpenAI's API, MemGPT will use it's own ChatCompletion wrapper that parses the system, messages, and function arguments into a format that Airoboros has been finetuned on, and once Airoboros generates a string output, MemGPT will parse the response to extract a potential function call (knowing what we know about Airoboros expected function call output).

From 0478a7a49ed8687026be56ea98fe0842ff22877e Mon Sep 17 00:00:00 2001
From: Charles Packer <packercharles@gmail.com>
Date: Mon, 23 Oct 2023 00:57:37 -0700
Subject: [PATCH 22/37] Update README.md

---
 memgpt/local_llm/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/memgpt/local_llm/README.md b/memgpt/local_llm/README.md
index 7f54b9a2..fce5899b 100644
--- a/memgpt/local_llm/README.md
+++ b/memgpt/local_llm/README.md
@@ -68,7 +68,7 @@ MemGPT uses function calling to do memory management. With OpenAI's ChatCompleti
 
 In the future, more open LLMs and LLM servers (that can host OpenAI-compatable ChatCompletion endpoints) may start including parsing code to do this automatically as standard practice. However, in the meantime, when you see a model that says it supports “function calling”, like Airoboros, it doesn't mean that you can just load Airoboros into a ChatCompletion-compatable endpoint like FastChat, and then use the same OpenAI API call and it'll just work.
 
-1. When an open LLM says it supports function calling, they probably mean that the model was finetuned on some function call data. Remember, transformers are just string-in-string-out, so there are many ways to format this function call data. Airoboros formats the function schema in YAML style (see https://huggingface.co/jondurbin/airoboros-l2-70b-3.1.2#agentfunction-calling) and the output is in JSON style. To get this to work behind a ChatCompletion API, you still have to do the parsing from ‘functions’ keyword arg (containing the schema) to the model's expected schema style in the prompt (YAML for Airoboros), and you have to run some code to extract the function call (JSON for Airoboros) and package it cleanly as a ‘function_call’ field in the response.
+1. When a model page says it supports function calling, they probably mean that the model was finetuned on some function call data (not that you can just use ChatCompletion with functions out-of-the-box). Remember, LLMs are just string-in-string-out, so there are many ways to format the function call data. E.g. Airoboros formats the function schema in YAML style (see https://huggingface.co/jondurbin/airoboros-l2-70b-3.1.2#agentfunction-calling) and the output is in JSON style. To get this to work behind a ChatCompletion API, you still have to do the parsing from ‘functions’ keyword arg (containing the schema) to the model's expected schema style in the prompt (YAML for Airoboros), and you have to run some code to extract the function call (JSON for Airoboros) and package it cleanly as a ‘function_call’ field in the response.
 
 2. Partly because of how complex it is to support function calling, most (all?) of the community projects that do OpenAI ChatCompletion endpoints for arbitrary open LLMs do not support function calling, because if they did, they would need to write model-specific parsing code for each one.
 

From ab1f75a368c26e253f26e05267c43e0c62bc7939 Mon Sep 17 00:00:00 2001
From: Charles Packer <packercharles@gmail.com>
Date: Mon, 23 Oct 2023 01:01:41 -0700
Subject: [PATCH 23/37] Update README.md

---
 memgpt/local_llm/README.md | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/memgpt/local_llm/README.md b/memgpt/local_llm/README.md
index fce5899b..a62bfe24 100644
--- a/memgpt/local_llm/README.md
+++ b/memgpt/local_llm/README.md
@@ -60,6 +60,30 @@ class Airoboros21Wrapper(LLMChatCompletionWrapper):
 ```
 See full file [here](llm_chat_completion_wrappers/airoboros.py).
 
+Example running the code (airoboros is able to properly call `send_message`:
+```sh
+# running airoboros behind a textgen webui server
+export OPENAI_API_BASE = <pointing at webui server>
+export BACKEND_TYPE = webui
+
+# using --no_verify because this airoboros example does not output inner monologue, just functions
+$ python3 main.py --no_verify
+
+Running... [exit by typing '/exit']
+💭 Bootup sequence complete. Persona activated. Testing messaging functionality.
+
+💭 None
+🤖 Welcome! My name is Sam. How can I assist you today?
+Enter your message: My name is Brad, not Chad...
+
+💭 None
+⚡🧠 [function] updating memory with core_memory_replace:
+         First name: Chad
+        → First name: Brad
+```
+
+WebUI exposes a lot of parameters that can dramatically change LLM outputs, to change these you can modify the [WebUI settings file](/memgpt/local_llm/webui/settings.py).
+
 ---
 
 ## Status of ChatCompletion w/ function calling and open LLMs

From 34f5a74f62b051fd468595430add1f725845771e Mon Sep 17 00:00:00 2001
From: Charles Packer <packercharles@gmail.com>
Date: Mon, 23 Oct 2023 01:02:25 -0700
Subject: [PATCH 24/37] Update README.md

---
 memgpt/local_llm/README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/memgpt/local_llm/README.md b/memgpt/local_llm/README.md
index a62bfe24..60ca74be 100644
--- a/memgpt/local_llm/README.md
+++ b/memgpt/local_llm/README.md
@@ -60,13 +60,15 @@ class Airoboros21Wrapper(LLMChatCompletionWrapper):
 ```
 See full file [here](llm_chat_completion_wrappers/airoboros.py).
 
-Example running the code (airoboros is able to properly call `send_message`:
+### Running the example
+
 ```sh
 # running airoboros behind a textgen webui server
 export OPENAI_API_BASE = <pointing at webui server>
 export BACKEND_TYPE = webui
 
 # using --no_verify because this airoboros example does not output inner monologue, just functions
+# airoboros is able to properly call `send_message`
 $ python3 main.py --no_verify
 
 Running... [exit by typing '/exit']

From d8c0092a3eb4afd13c50cb38400106a1e0c93716 Mon Sep 17 00:00:00 2001
From: Charles Packer <packercharles@gmail.com>
Date: Mon, 23 Oct 2023 01:02:40 -0700
Subject: [PATCH 25/37] Update README.md

---
 memgpt/local_llm/README.md | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/memgpt/local_llm/README.md b/memgpt/local_llm/README.md
index 60ca74be..5bbe2162 100644
--- a/memgpt/local_llm/README.md
+++ b/memgpt/local_llm/README.md
@@ -58,7 +58,7 @@ class Airoboros21Wrapper(LLMChatCompletionWrapper):
         }
         """
 ```
-See full file [here](llm_chat_completion_wrappers/airoboros.py).
+See full file [here](llm_chat_completion_wrappers/airoboros.py). WebUI exposes a lot of parameters that can dramatically change LLM outputs, to change these you can modify the [WebUI settings file](/memgpt/local_llm/webui/settings.py).
 
 ### Running the example
 
@@ -84,8 +84,6 @@ Enter your message: My name is Brad, not Chad...
         → First name: Brad
 ```
 
-WebUI exposes a lot of parameters that can dramatically change LLM outputs, to change these you can modify the [WebUI settings file](/memgpt/local_llm/webui/settings.py).
-
 ---
 
 ## Status of ChatCompletion w/ function calling and open LLMs

From a2b824ecb538f25f5429b3d27a314580e803dfe0 Mon Sep 17 00:00:00 2001
From: Charles Packer <packercharles@gmail.com>
Date: Mon, 23 Oct 2023 01:04:02 -0700
Subject: [PATCH 26/37] Update README.md

---
 memgpt/local_llm/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/memgpt/local_llm/README.md b/memgpt/local_llm/README.md
index 5bbe2162..001d4a58 100644
--- a/memgpt/local_llm/README.md
+++ b/memgpt/local_llm/README.md
@@ -88,7 +88,7 @@ Enter your message: My name is Brad, not Chad...
 
 ## Status of ChatCompletion w/ function calling and open LLMs
 
-MemGPT uses function calling to do memory management. With OpenAI's ChatCompletion API, you can pass in a function schema in the `functions` keyword arg, and the API response will include a `function_call` field that includes the function name and the function arguments (generated JSON). How this works under the hood is your `functions` keyword is combined with the `messages` and `system` to form one big string input to the transformer, and the output of the transformer is parsed to extract the JSON function call.
+MemGPT uses function calling to do memory management. With [OpenAI's ChatCompletion API](https://platform.openai.com/docs/api-reference/chat/), you can pass in a function schema in the `functions` keyword arg, and the API response will include a `function_call` field that includes the function name and the function arguments (generated JSON). How this works under the hood is your `functions` keyword is combined with the `messages` and `system` to form one big string input to the transformer, and the output of the transformer is parsed to extract the JSON function call.
 
 In the future, more open LLMs and LLM servers (that can host OpenAI-compatable ChatCompletion endpoints) may start including parsing code to do this automatically as standard practice. However, in the meantime, when you see a model that says it supports “function calling”, like Airoboros, it doesn't mean that you can just load Airoboros into a ChatCompletion-compatable endpoint like FastChat, and then use the same OpenAI API call and it'll just work.
 

From 489981240c3bd0fa0413adc3781778f1ba212916 Mon Sep 17 00:00:00 2001
From: Charles Packer <packercharles@gmail.com>
Date: Mon, 23 Oct 2023 01:04:45 -0700
Subject: [PATCH 27/37] Update README.md

---
 memgpt/local_llm/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/memgpt/local_llm/README.md b/memgpt/local_llm/README.md
index 001d4a58..e5c91950 100644
--- a/memgpt/local_llm/README.md
+++ b/memgpt/local_llm/README.md
@@ -90,7 +90,7 @@ Enter your message: My name is Brad, not Chad...
 
 MemGPT uses function calling to do memory management. With [OpenAI's ChatCompletion API](https://platform.openai.com/docs/api-reference/chat/), you can pass in a function schema in the `functions` keyword arg, and the API response will include a `function_call` field that includes the function name and the function arguments (generated JSON). How this works under the hood is your `functions` keyword is combined with the `messages` and `system` to form one big string input to the transformer, and the output of the transformer is parsed to extract the JSON function call.
 
-In the future, more open LLMs and LLM servers (that can host OpenAI-compatable ChatCompletion endpoints) may start including parsing code to do this automatically as standard practice. However, in the meantime, when you see a model that says it supports “function calling”, like Airoboros, it doesn't mean that you can just load Airoboros into a ChatCompletion-compatable endpoint like FastChat, and then use the same OpenAI API call and it'll just work.
+In the future, more open LLMs and LLM servers (that can host OpenAI-compatable ChatCompletion endpoints) may start including parsing code to do this automatically as standard practice. However, in the meantime, when you see a model that says it supports “function calling”, like Airoboros, it doesn't mean that you can just load Airoboros into a ChatCompletion-compatable endpoint like WebUI, and then use the same OpenAI API call and it'll just work.
 
 1. When a model page says it supports function calling, they probably mean that the model was finetuned on some function call data (not that you can just use ChatCompletion with functions out-of-the-box). Remember, LLMs are just string-in-string-out, so there are many ways to format the function call data. E.g. Airoboros formats the function schema in YAML style (see https://huggingface.co/jondurbin/airoboros-l2-70b-3.1.2#agentfunction-calling) and the output is in JSON style. To get this to work behind a ChatCompletion API, you still have to do the parsing from ‘functions’ keyword arg (containing the schema) to the model's expected schema style in the prompt (YAML for Airoboros), and you have to run some code to extract the function call (JSON for Airoboros) and package it cleanly as a ‘function_call’ field in the response.
 

From 33551b1106d9d6effa895f287c9e14f24dbd9255 Mon Sep 17 00:00:00 2001
From: Charles Packer <packercharles@gmail.com>
Date: Mon, 23 Oct 2023 01:05:22 -0700
Subject: [PATCH 28/37] Update README.md

---
 memgpt/local_llm/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/memgpt/local_llm/README.md b/memgpt/local_llm/README.md
index e5c91950..f45c16c4 100644
--- a/memgpt/local_llm/README.md
+++ b/memgpt/local_llm/README.md
@@ -92,7 +92,7 @@ MemGPT uses function calling to do memory management. With [OpenAI's ChatComplet
 
 In the future, more open LLMs and LLM servers (that can host OpenAI-compatable ChatCompletion endpoints) may start including parsing code to do this automatically as standard practice. However, in the meantime, when you see a model that says it supports “function calling”, like Airoboros, it doesn't mean that you can just load Airoboros into a ChatCompletion-compatable endpoint like WebUI, and then use the same OpenAI API call and it'll just work.
 
-1. When a model page says it supports function calling, they probably mean that the model was finetuned on some function call data (not that you can just use ChatCompletion with functions out-of-the-box). Remember, LLMs are just string-in-string-out, so there are many ways to format the function call data. E.g. Airoboros formats the function schema in YAML style (see https://huggingface.co/jondurbin/airoboros-l2-70b-3.1.2#agentfunction-calling) and the output is in JSON style. To get this to work behind a ChatCompletion API, you still have to do the parsing from ‘functions’ keyword arg (containing the schema) to the model's expected schema style in the prompt (YAML for Airoboros), and you have to run some code to extract the function call (JSON for Airoboros) and package it cleanly as a ‘function_call’ field in the response.
+1. When a model page says it supports function calling, they probably mean that the model was finetuned on some function call data (not that you can just use ChatCompletion with functions out-of-the-box). Remember, LLMs are just string-in-string-out, so there are many ways to format the function call data. E.g. Airoboros formats the function schema in YAML style (see https://huggingface.co/jondurbin/airoboros-l2-70b-3.1.2#agentfunction-calling) and the output is in JSON style. To get this to work behind a ChatCompletion API, you still have to do the parsing from `functions` keyword arg (containing the schema) to the model's expected schema style in the prompt (YAML for Airoboros), and you have to run some code to extract the function call (JSON for Airoboros) and package it cleanly as a `function_call` field in the response.
 
 2. Partly because of how complex it is to support function calling, most (all?) of the community projects that do OpenAI ChatCompletion endpoints for arbitrary open LLMs do not support function calling, because if they did, they would need to write model-specific parsing code for each one.
 

From 7721cad39257af122d52e3b66b72e968f10d27db Mon Sep 17 00:00:00 2001
From: Vivian Fang <hi@vivi.sh>
Date: Mon, 23 Oct 2023 01:07:13 -0700
Subject: [PATCH 29/37] Update README.md

---
 memgpt/local_llm/README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/memgpt/local_llm/README.md b/memgpt/local_llm/README.md
index f45c16c4..a79c0f9e 100644
--- a/memgpt/local_llm/README.md
+++ b/memgpt/local_llm/README.md
@@ -31,7 +31,7 @@ class LLMChatCompletionWrapper(ABC):
 
 ## Example with [Airoboros](https://huggingface.co/jondurbin/airoboros-l2-70b-2.1) (llama2 finetune)
 
-To help you get started, we've implemented an example wrapper class for a popular llama2 model **finetuned on function calling** (airoboros). We want MemGPT to run well on open models as much as you do, so we'll be actively updating this page with more examples. Additionally, we welcome contributions from the community! If you find an open LLM that works well with MemGPT, please open a PR with a model wrapper and we'll merge it ASAP.
+To help you get started, we've implemented an example wrapper class for a popular llama2 model **finetuned on function calling** (Airoboros). We want MemGPT to run well on open models as much as you do, so we'll be actively updating this page with more examples. Additionally, we welcome contributions from the community! If you find an open LLM that works well with MemGPT, please open a PR with a model wrapper and we'll merge it ASAP.
 
 ```python
 class Airoboros21Wrapper(LLMChatCompletionWrapper):
@@ -98,6 +98,6 @@ In the future, more open LLMs and LLM servers (that can host OpenAI-compatable C
 
 ## What is this all this extra code for?
 
-Because of the poor state of function calling support in existing ChatCompletion API serving code, we instead provide a light wrapper on top of ChatCompletion that add parsers to handle function calling support. These parsers need to be specific to the model you're using (or at least specific to the way it was trained on function calling). We hope that our example code will help the community add additional compatability of MemGPT with more function-calling LLMs - we will also add more model support as we test more models and find those that work well enough to run MemGPT's function set.
+Because of the poor state of function calling support in existing ChatCompletion API serving code, we instead provide a light wrapper on top of ChatCompletion that adds parsers to handle function calling support. These parsers need to be specific to the model you're using (or at least specific to the way it was trained on function calling). We hope that our example code will help the community add additional compatability of MemGPT with more function-calling LLMs - we will also add more model support as we test more models and find those that work well enough to run MemGPT's function set.
 
-To run the example of MemGPT with Airoboros, you'll need to host the model behind some LLM web server (for example [webui](https://github.com/oobabooga/text-generation-webui#starting-the-web-ui)). Then, all you need to do is point MemGPT to this API endpoint by setting `OPENAI_API_BASE` and `BACKEND_TYPE`. Now, instead of calling ChatCompletion on OpenAI's API, MemGPT will use it's own ChatCompletion wrapper that parses the system, messages, and function arguments into a format that Airoboros has been finetuned on, and once Airoboros generates a string output, MemGPT will parse the response to extract a potential function call (knowing what we know about Airoboros expected function call output).
+To run the example of MemGPT with Airoboros, you'll need to host the model behind some LLM web server (for example [webui](https://github.com/oobabooga/text-generation-webui#starting-the-web-ui)). Then, all you need to do is point MemGPT to this API endpoint by setting the environment variables `OPENAI_API_BASE` and `BACKEND_TYPE`. Now, instead of calling ChatCompletion on OpenAI's API, MemGPT will use it's own ChatCompletion wrapper that parses the system, messages, and function arguments into a format that Airoboros has been finetuned on, and once Airoboros generates a string output, MemGPT will parse the response to extract a potential function call (knowing what we know about Airoboros expected function call output).

From cbbe8a3ce6844ace409b1c4ad35dca4485a462f2 Mon Sep 17 00:00:00 2001
From: Vivian Fang <hi@vivi.sh>
Date: Mon, 23 Oct 2023 01:09:44 -0700
Subject: [PATCH 30/37] Update README.md

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index 347dc39f..f85e8d72 100644
--- a/README.md
+++ b/README.md
@@ -130,6 +130,9 @@ python main.py --model gpt-3.5-turbo
 
 Please report any bugs you encounter regarding MemGPT running on GPT-3.5 to  https://github.com/cpacker/MemGPT/issues/59.
 
+### Local LLM support
+You can run MemGPT with local LLMs too. See [instructions here](/memgpt/local_llm) and report any bugs/improvements here https://github.com/cpacker/MemGPT/discussions/67.
+
 ### `main.py` flags
 
 ```text

From 326bdb10a91fa3fae2a3e44326dd78f5e0ec8d2c Mon Sep 17 00:00:00 2001
From: Charles Packer <packercharles@gmail.com>
Date: Mon, 23 Oct 2023 01:13:02 -0700
Subject: [PATCH 31/37] Update README.md

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index f85e8d72..2362d4e8 100644
--- a/README.md
+++ b/README.md
@@ -5,6 +5,8 @@
 <div align="center">
 
  <strong>Try out our MemGPT chatbot on <a href="https://discord.gg/9GEQrxmVyE">Discord</a>!</strong>
+ 
+ <strong>⭐ NEW: You can now run MemGPT with <a href="https://github.com/cpacker/MemGPT/discussions/67">local LLMs</a>! ⭐ </strong>
 
 [![Discord](https://img.shields.io/discord/1161736243340640419?label=Discord&logo=discord&logoColor=5865F2&style=flat-square&color=5865F2)](https://discord.gg/9GEQrxmVyE)
 [![arXiv 2310.08560](https://img.shields.io/badge/arXiv-2310.08560-B31B1B?logo=arxiv&style=flat-square)](https://arxiv.org/abs/2310.08560)

From 2b2da823077c25cb24cef8109ed025e0f0ea5ebc Mon Sep 17 00:00:00 2001
From: Charles Packer <packercharles@gmail.com>
Date: Mon, 23 Oct 2023 02:10:55 -0700
Subject: [PATCH 32/37] hotfix

---
 memgpt/local_llm/chat_completion_proxy.py | 2 +-
 memgpt/local_llm/webui/api.py             | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/memgpt/local_llm/chat_completion_proxy.py b/memgpt/local_llm/chat_completion_proxy.py
index a5290717..cab33c89 100644
--- a/memgpt/local_llm/chat_completion_proxy.py
+++ b/memgpt/local_llm/chat_completion_proxy.py
@@ -10,7 +10,7 @@ from .utils import DotDict
 
 HOST = os.getenv("OPENAI_API_BASE")
 HOST_TYPE = os.getenv("BACKEND_TYPE")  # default None == ChatCompletion
-DEBUG = True
+DEBUG = False
 
 
 async def get_chat_completion(
diff --git a/memgpt/local_llm/webui/api.py b/memgpt/local_llm/webui/api.py
index 3cff08e0..2614050d 100644
--- a/memgpt/local_llm/webui/api.py
+++ b/memgpt/local_llm/webui/api.py
@@ -5,8 +5,8 @@ from .settings import SIMPLE
 
 HOST = os.getenv("OPENAI_API_BASE")
 HOST_TYPE = os.getenv("BACKEND_TYPE")  # default None == ChatCompletion
-WEBUI_API_SUFFIX = "/v1/generate"
-DEBUG = True
+WEBUI_API_SUFFIX = "/api/v1/generate"
+DEBUG = False
 
 
 def get_webui_completion(prompt, settings=SIMPLE):
@@ -25,7 +25,7 @@ def get_webui_completion(prompt, settings=SIMPLE):
             if DEBUG:
                 print(f"json API response.text: {result}")
         else:
-            raise Exception(f"API call got non-200 response code")
+            raise Exception(f"API call got non-200 response code for address: {URI}")
     except:
         # TODO handle gracefully
         raise

From 0e1fdcd360172205c447e6432ad1decaf744bda4 Mon Sep 17 00:00:00 2001
From: Charles Packer <packercharles@gmail.com>
Date: Mon, 23 Oct 2023 02:20:56 -0700
Subject: [PATCH 33/37] hotfix 2

---
 memgpt/local_llm/chat_completion_proxy.py | 3 +++
 memgpt/local_llm/webui/api.py             | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/memgpt/local_llm/chat_completion_proxy.py b/memgpt/local_llm/chat_completion_proxy.py
index cab33c89..de9da221 100644
--- a/memgpt/local_llm/chat_completion_proxy.py
+++ b/memgpt/local_llm/chat_completion_proxy.py
@@ -45,6 +45,9 @@ async def get_chat_completion(
     except requests.exceptions.ConnectionError as e:
         raise ValueError(f"Was unable to connect to host {HOST}")
 
+    if result is None or result == "":
+        raise Exception(f"Got back an empty response string from {HOST}")
+
     chat_completion_result = llm_wrapper.output_to_chat_completion_response(result)
     if DEBUG:
         print(json.dumps(chat_completion_result, indent=2))
diff --git a/memgpt/local_llm/webui/api.py b/memgpt/local_llm/webui/api.py
index 2614050d..547377b1 100644
--- a/memgpt/local_llm/webui/api.py
+++ b/memgpt/local_llm/webui/api.py
@@ -17,7 +17,7 @@ def get_webui_completion(prompt, settings=SIMPLE):
     request["prompt"] = prompt
 
     try:
-        URI = f"{HOST}{WEBUI_API_SUFFIX}"
+        URI = f"{HOST.strip('/')}{WEBUI_API_SUFFIX}"
         response = requests.post(URI, json=request)
         if response.status_code == 200:
             result = response.json()

From 7225d4b9b7de31e1f699a0dd0657b7ffd837448a Mon Sep 17 00:00:00 2001
From: Vivian Fang <hi@vivi.sh>
Date: Mon, 23 Oct 2023 02:31:13 -0700
Subject: [PATCH 34/37] Update README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index b7c1a3b7..cd5f761b 100644
--- a/README.md
+++ b/README.md
@@ -307,6 +307,6 @@ Datasets used in our [paper](https://arxiv.org/abs/2310.08560) can be downloaded
 - [x] Integration tests
 - [x] Integrate with AutoGen ([discussion](https://github.com/cpacker/MemGPT/discussions/65))
 - [x] Add official gpt-3.5-turbo support ([discussion](https://github.com/cpacker/MemGPT/discussions/66))
+- [x] CLI UI improvements ([issue](https://github.com/cpacker/MemGPT/issues/11))
+- [x] Add support for other LLM backends ([issue](https://github.com/cpacker/MemGPT/issues/18), [discussion](https://github.com/cpacker/MemGPT/discussions/67))
 - [ ] Release MemGPT family of open models (eg finetuned Mistral) ([discussion](https://github.com/cpacker/MemGPT/discussions/67))
-- [ ] CLI UI improvements ([issue](https://github.com/cpacker/MemGPT/issues/11))
-- [ ] Add support for other LLM backends ([issue](https://github.com/cpacker/MemGPT/issues/18))

From 00179f9e38e911a94b1214ed6e008079738accdd Mon Sep 17 00:00:00 2001
From: Charles Packer <packercharles@gmail.com>
Date: Mon, 23 Oct 2023 13:01:47 -0700
Subject: [PATCH 35/37] Update README.md

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index cd5f761b..14fddba6 100644
--- a/README.md
+++ b/README.md
@@ -7,6 +7,8 @@
  <strong>Try out our MemGPT chatbot on <a href="https://discord.gg/9GEQrxmVyE">Discord</a>!</strong>
  
  <strong>⭐ NEW: You can now run MemGPT with <a href="https://github.com/cpacker/MemGPT/discussions/67">local LLMs</a>! ⭐ </strong>
+ 
+ <strong>⭐ NEW: You can now run MemGPT with <a href="https://github.com/cpacker/MemGPT/discussions/65">AutoGen</a>! ⭐ </strong>
 
 [![Discord](https://img.shields.io/discord/1161736243340640419?label=Discord&logo=discord&logoColor=5865F2&style=flat-square&color=5865F2)](https://discord.gg/9GEQrxmVyE)
 [![arXiv 2310.08560](https://img.shields.io/badge/arXiv-2310.08560-B31B1B?logo=arxiv&style=flat-square)](https://arxiv.org/abs/2310.08560)

From 68e422f56f7654ead0ccda7091db0e53285a24a5 Mon Sep 17 00:00:00 2001
From: Charles Packer <packercharles@gmail.com>
Date: Mon, 23 Oct 2023 13:02:20 -0700
Subject: [PATCH 36/37] Update README.md

---
 README.md | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 14fddba6..04d3048a 100644
--- a/README.md
+++ b/README.md
@@ -6,10 +6,8 @@
 
  <strong>Try out our MemGPT chatbot on <a href="https://discord.gg/9GEQrxmVyE">Discord</a>!</strong>
  
- <strong>⭐ NEW: You can now run MemGPT with <a href="https://github.com/cpacker/MemGPT/discussions/67">local LLMs</a>! ⭐ </strong>
+ <strong>⭐ NEW: You can now run MemGPT with <a href="https://github.com/cpacker/MemGPT/discussions/67">local LLMs</a> and <a href="https://github.com/cpacker/MemGPT/discussions/65">AutoGen</a>! ⭐ </strong>
  
- <strong>⭐ NEW: You can now run MemGPT with <a href="https://github.com/cpacker/MemGPT/discussions/65">AutoGen</a>! ⭐ </strong>
-
 [![Discord](https://img.shields.io/discord/1161736243340640419?label=Discord&logo=discord&logoColor=5865F2&style=flat-square&color=5865F2)](https://discord.gg/9GEQrxmVyE)
 [![arXiv 2310.08560](https://img.shields.io/badge/arXiv-2310.08560-B31B1B?logo=arxiv&style=flat-square)](https://arxiv.org/abs/2310.08560)
 

From 39b545f869ba5b9a79a321f9225b781e9727c100 Mon Sep 17 00:00:00 2001
From: Vivian Fang <hi@vivi.sh>
Date: Mon, 23 Oct 2023 13:48:05 -0700
Subject: [PATCH 37/37] cli hotfix

---
 config.py | 6 ++----
 main.py   | 7 ++++---
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/config.py b/config.py
index 5ebebd24..b900e3a3 100644
--- a/config.py
+++ b/config.py
@@ -37,6 +37,7 @@ class Config:
         self.agent_save_file = None
         self.persistence_manager_save_file = None
         self.host = os.getenv("OPENAI_API_BASE")
+        self.index = None
 
     @classmethod
     async def legacy_flags_init(
@@ -59,10 +60,7 @@ class Config:
         self.compute_embeddings = compute_embeddings
         recompute_embeddings = self.compute_embeddings
         if self.archival_storage_index:
-            recompute_embeddings = questionary.confirm(
-                f"Would you like to recompute embeddings? Do this if your files have changed.\nFiles:{self.archival_storage_files}",
-                default=False,
-            )
+            recompute_embeddings = False  # TODO Legacy support -- can't recompute embeddings on a path that's not specified.
         if self.archival_storage_files:
             await self.configure_archival_storage(recompute_embeddings)
         return self
diff --git a/main.py b/main.py
index c0a92941..f7743200 100644
--- a/main.py
+++ b/main.py
@@ -202,8 +202,9 @@ async def main():
                 memgpt_persona,
                 human_persona,
                 load_type="folder",
-                archival_storage_index=FLAGS.archival_storage_index,
-                compute_embeddings=False,
+                archival_storage_files=FLAGS.archival_storage_faiss_path,
+                archival_storage_index=FLAGS.archival_storage_faiss_path,
+                compute_embeddings=True,
             )
         elif FLAGS.archival_storage_files_compute_embeddings:
             print(model)
@@ -273,7 +274,7 @@ async def main():
             )
             return
 
-    if cfg.archival_storage_index:
+    if cfg.index:
         persistence_manager = InMemoryStateManagerWithFaiss(
             cfg.index, cfg.archival_database
         )