From f63419c78b139466fb75c67900e1c5b858285468 Mon Sep 17 00:00:00 2001 From: Charles Packer Date: Wed, 15 Nov 2023 01:02:57 -0800 Subject: [PATCH] Update documentation [local LLMs, presets] (#453) * updated local llm documentation * updated cli flags to be consistent with documentation * added preset documentation * update test to use new arg * update test to use new arg --- docs/config.md | 1 + docs/koboldcpp.md | 16 +++++++++---- docs/llamacpp.md | 16 +++++++++---- docs/lmstudio.md | 18 ++++++++++----- docs/local_llm.md | 57 ++++++++++++++++++++++++++++++++++++----------- docs/ollama.md | 24 +++++++++++++------- docs/presets.md | 21 +++++++++++++++++ docs/webui.md | 14 ++++++++---- memgpt/cli/cli.py | 8 +++---- mkdocs.yml | 3 +++ tests/test_cli.py | 4 ++-- 11 files changed, 136 insertions(+), 46 deletions(-) create mode 100644 docs/presets.md diff --git a/docs/config.md b/docs/config.md index b8e70b29..907dd179 100644 --- a/docs/config.md +++ b/docs/config.md @@ -14,6 +14,7 @@ The `memgpt run` command supports the following optional flags (if set, will ove * `--yes`/`-y`: (bool) Skip confirmation prompt and use defaults (default=False) You can override the parameters you set with `memgpt configure` with the following additional flags specific to local LLMs: + * `--model-wrapper`: (str) Model wrapper used by backend (e.g. `airoboros_xxx`) * `--model-endpoint-type`: (str) Model endpoint backend type (e.g. lmstudio, ollama) * `--model-endpoint`: (str) Model endpoint url (e.g. `localhost:5000`) diff --git a/docs/koboldcpp.md b/docs/koboldcpp.md index ceab546b..ef430098 100644 --- a/docs/koboldcpp.md +++ b/docs/koboldcpp.md @@ -10,8 +10,16 @@ For example, if we downloaded the model `dolphin-2.2.1-mistral-7b.Q6_K.gguf` and ./koboldcpp.py ~/models/TheBloke/dolphin-2.2.1-mistral-7B-GGUF/dolphin-2.2.1-mistral-7b.Q6_K.gguf --contextsize 8192 ``` -In your terminal where you're running MemGPT, run: -```sh -export OPENAI_API_BASE=http://localhost:5001 -export BACKEND_TYPE=koboldcpp +In your terminal where you're running MemGPT, run `memgpt configure` to set the default backend for MemGPT to point at koboldcpp: ``` +# if you are running koboldcpp locally, the default IP address + port will be http://localhost:5001 +? Select LLM inference provider: local +? Select LLM backend (select 'openai' if you have an OpenAI compatible proxy): koboldcpp +? Enter default endpoint: http://localhost:5001 +... +``` + +If you have an existing agent that you want to move to the web UI backend, add extra flags to `memgpt run`: +```sh +memgpt run --agent your_agent --model-endpoint-type koboldcpp --model-endpoint http://localhost:5001 +``` \ No newline at end of file diff --git a/docs/llamacpp.md b/docs/llamacpp.md index ea5a842a..f49ca1b2 100644 --- a/docs/llamacpp.md +++ b/docs/llamacpp.md @@ -10,8 +10,16 @@ For example, if we downloaded the model `dolphin-2.2.1-mistral-7b.Q6_K.gguf` and ./server -m ~/models/TheBloke/dolphin-2.2.1-mistral-7B-GGUF/dolphin-2.2.1-mistral-7b.Q6_K.gguf -c 8000 ``` -In your terminal where you're running MemGPT, run: -```sh -export OPENAI_API_BASE=http://localhost:8080 -export BACKEND_TYPE=llamacpp +In your terminal where you're running MemGPT, run `memgpt configure` to set the default backend for MemGPT to point at llama.cpp: ``` +# if you are running llama.cpp locally, the default IP address + port will be http://localhost:8080 +? Select LLM inference provider: local +? Select LLM backend (select 'openai' if you have an OpenAI compatible proxy): llamacpp +? Enter default endpoint: http://localhost:8080 +... +``` + +If you have an existing agent that you want to move to the web UI backend, add extra flags to `memgpt run`: +```sh +memgpt run --agent your_agent --model-endpoint-type llamacpp --model-endpoint http://localhost:8080 +``` \ No newline at end of file diff --git a/docs/lmstudio.md b/docs/lmstudio.md index 1af6d52f..ce9276b5 100644 --- a/docs/lmstudio.md +++ b/docs/lmstudio.md @@ -13,10 +13,16 @@ 3. Click "Start server" 4. Copy the IP address + port that your server is running on (in the example screenshot, the address is `http://localhost:1234`) -In your terminal where you're running MemGPT, run: - -```sh -# if you used a different port in LM Studio, change 1234 to the actual port -export OPENAI_API_BASE=http://localhost:1234 -export BACKEND_TYPE=lmstudio +In your terminal where you're running MemGPT, run `memgpt configure` to set the default backend for MemGPT to point at LM Studio: +``` +# if you are running LM Studio locally, the default IP address + port will be http://localhost:1234 +? Select LLM inference provider: local +? Select LLM backend (select 'openai' if you have an OpenAI compatible proxy): lmstudio +? Enter default endpoint: http://localhost:1234 +... +``` + +If you have an existing agent that you want to move to the web UI backend, add extra flags to `memgpt run`: +```sh +memgpt run --agent your_agent --model-endpoint-type lmstudio --model-endpoint http://localhost:1234 ``` diff --git a/docs/local_llm.md b/docs/local_llm.md index d48d3891..30be985a 100644 --- a/docs/local_llm.md +++ b/docs/local_llm.md @@ -15,30 +15,59 @@ pip install 'pymemgpt[local]' ### Quick overview 1. Put your own LLM behind a web server API (e.g. [oobabooga web UI](https://github.com/oobabooga/text-generation-webui#starting-the-web-ui)) -2. Set `OPENAI_API_BASE=YOUR_API_IP_ADDRESS` and `BACKEND_TYPE=webui` +2. Run `memgpt configure` and when prompted select your backend/endpoint type and endpoint address (a default will be provided but you may have to override it) -For example, if we are running web UI (which defaults to port 5000) on the same computer as MemGPT, we would do the following: -```sh -# set this to the backend we're using, eg 'webui', 'lmstudio', 'llamacpp', 'koboldcpp' -export BACKEND_TYPE=webui -# set this to the base address of llm web server -export OPENAI_API_BASE=http://127.0.0.1:5000 +For example, if we are running web UI (which defaults to port 5000) on the same computer as MemGPT, running `memgpt configure` would look like this: +``` +? Select LLM inference provider: local +? Select LLM backend (select 'openai' if you have an OpenAI compatible proxy): webui +? Enter default endpoint: http://localhost:5000 +? Select default model wrapper (recommended: airoboros-l2-70b-2.1): airoboros-l2-70b-2.1 +? Select your model's context window (for Mistral 7B models, this is probably 8k / 8192): 8192 +? Select embedding provider: local +? Select default preset: memgpt_chat +? Select default persona: sam_pov +? Select default human: cs_phd +? Select storage backend for archival data: local +Saving config to /home/user/.memgpt/config ``` -Now when we run MemGPT, it will use the LLM running on the local web server. +Now when we do `memgpt run`, it will use the LLM running on the local web server. + +If you want to change the local LLM settings of an existing agent, you can pass flags to `memgpt run`: +```sh +# --model-wrapper will override the wrapper +# --model-endpoint will override the endpoint address +# --model-endpoint-type will override the backend type + +# if we were previously using "agent_11" with web UI, and now want to use lmstudio, we can do: +memgpt run --agent agent_11 --model-endpoint http://localhost:1234 --model-endpoint-type lmstudio +``` ### Selecting a model wrapper -When you use local LLMs, `model` no longer specifies the LLM model that is run (you determine that yourself by loading a model in your backend interface). Instead, `model` refers to the _wrapper_ that is used to parse data sent to and from the LLM backend. +When you use local LLMs, you can specify a **model wrapper** that changes how the LLM input text is formatted before it is passed to your LLM. -You can change the wrapper used with the `--model` flag. For example, the following : +You can change the wrapper used with the `--model-wrapper` flag: ```sh -memgpt run --model airoboros-l2-70b-2.1 +memgpt run --model-wrapper airoboros-l2-70b-2.1 ``` -The default wrapper is `airoboros-l2-70b-2.1-grammar` if you are using a backend that supports grammar-based sampling, and `airoboros-l2-70b-2.1` otherwise. +You can see the full selection of model wrappers by running `memgpt configure`: +``` +? Select LLM inference provider: local +? Select LLM backend (select 'openai' if you have an OpenAI compatible proxy): webui +? Enter default endpoint: http://localhost:5000 +? Select default model wrapper (recommended: airoboros-l2-70b-2.1): (Use arrow keys) + ยป airoboros-l2-70b-2.1 + airoboros-l2-70b-2.1-grammar + dolphin-2.1-mistral-7b + dolphin-2.1-mistral-7b-grammar + zephyr-7B + zephyr-7B-grammar +``` -Note: the wrapper name does **not** have to match the model name. For example, the `dolphin-2.1-mistral-7b` model works better with the `airoboros-l2-70b-2.1` wrapper than the `dolphin-2.1-mistral-7b` wrapper. The model you load inside your LLM backend (e.g. LM Studio) determines what model is actually run, the `--model` flag just determines how the prompt is formatted before it is passed to the LLM backend. +Note: the wrapper name does **not** have to match the model name. For example, the `dolphin-2.1-mistral-7b` model works better with the `airoboros-l2-70b-2.1` wrapper than the `dolphin-2.1-mistral-7b` wrapper. The model you load inside your LLM backend (e.g. LM Studio) determines what model is actually run, the `--model-wrapper` flag just determines how the prompt is formatted before it is passed to the LLM backend. ### Grammars @@ -46,6 +75,8 @@ Grammar-based sampling can help improve the performance of MemGPT when using loc To use grammar-based sampling, make sure you're using a backend that supports it: webui, llama.cpp, or koboldcpp, then you should specify one of the new wrappers that implements grammars, eg: `airoboros-l2-70b-2.1-grammar`. +Note that even though grammar-based sampling can reduce the mistakes your LLM makes, it can also make your model inference significantly slower. + ### Supported backends Currently, MemGPT supports the following backends: diff --git a/docs/ollama.md b/docs/ollama.md index 95515fd6..7047ed73 100644 --- a/docs/ollama.md +++ b/docs/ollama.md @@ -28,12 +28,20 @@ removing any unused layers success ``` -In your terminal where you're running MemGPT, run: -```sh -# By default, Ollama runs an API server on port 11434 -export OPENAI_API_BASE=http://localhost:11434 -export BACKEND_TYPE=ollama - -# Make sure to add the tag! -export OLLAMA_MODEL=dolphin2.2-mistral:7b-q6_K +In your terminal where you're running MemGPT, run `memgpt configure` to set the default backend for MemGPT to point at Ollama: ``` +# if you are running Ollama locally, the default IP address + port will be http://localhost:11434 +# IMPORTANT: with Ollama, there is an extra required "model name" field +? Select LLM inference provider: local +? Select LLM backend (select 'openai' if you have an OpenAI compatible proxy): ollama +? Enter default endpoint: http://localhost:11434 +? Enter default model name (required for Ollama, see: https://memgpt.readthedocs.io/en/latest/ollama): dolphin2.2-mistral:7b-q6_K +... +``` + +If you have an existing agent that you want to move to the web UI backend, add extra flags to `memgpt run`: +```sh +# use --model to switch Ollama models (always include the full Ollama model name with the tag) +# use --model-wrapper to switch model wrappers +memgpt run --agent your_agent --model dolphin2.2-mistral:7b-q6_K --model-endpoint-type ollama --model-endpoint http://localhost:11434 +``` \ No newline at end of file diff --git a/docs/presets.md b/docs/presets.md new file mode 100644 index 00000000..ec5b2959 --- /dev/null +++ b/docs/presets.md @@ -0,0 +1,21 @@ +## Creating new MemGPT presets + +MemGPT **presets** are a combination default settings including a system prompt and a function set. For example, the `memgpt_docs` preset uses a system prompt that is tuned for document analysis, while the default `memgpt_chat` is tuned for general chatting purposes. + +You can create your own presets by creating a `.yaml` file in the `~/.memgpt/presets` directory. If you want to use a new custom system prompt in your preset, you can create a `.txt` file in the `~/.memgpt/system_prompts` directory. + +For example, if I create a new system prompt and place it in `~/.memgpt/system_prompts/custom_prompt.txt`, I can then create a preset that uses this system prompt by creating a new file `~/.memgpt/presets/custom_preset.yaml`: +```yaml +system_prompt: "custom_prompt" +functions: + - "send_message" + - "pause_heartbeats" + - "core_memory_append" + - "core_memory_replace" + - "conversation_search" + - "conversation_search_date" + - "archival_memory_insert" + - "archival_memory_search" +``` + +This preset uses the same base function set as the default presets. You can see the example presets provided [here](https://github.com/cpacker/MemGPT/tree/main/memgpt/presets/examples), and you can see example system prompts [here](https://github.com/cpacker/MemGPT/tree/main/memgpt/prompts/system). \ No newline at end of file diff --git a/docs/webui.md b/docs/webui.md index 30f5cdd7..9038f0e7 100644 --- a/docs/webui.md +++ b/docs/webui.md @@ -16,12 +16,18 @@ For the purposes of this example, we're going to serve (host) the LLMs using [oo 4. If the model was loaded successfully, you should be able to access it via the API (if local, this is probably on port `5000`) 5. Assuming steps 1-4 went correctly, the LLM is now properly hosted on a port you can point MemGPT to! -In your terminal where you're running MemGPT, run: +In your terminal where you're running MemGPT, run `memgpt configure` to set the default backend for MemGPT to point at web UI: +``` +# if you are running web UI locally, the default IP address + port will be http://localhost:5000 +? Select LLM inference provider: local +? Select LLM backend (select 'openai' if you have an OpenAI compatible proxy): webui +? Enter default endpoint: http://localhost:5000 +... +``` +If you have an existing agent that you want to move to the web UI backend, add extra flags to `memgpt run`: ```sh -# if you are running web UI locally, the default port will be 5000 -export OPENAI_API_BASE=http://127.0.0.1:5000 -export BACKEND_TYPE=webui +memgpt run --agent your_agent --model-endpoint-type webui --model-endpoint http://localhost:5000 ``` Text gen web UI exposes a lot of parameters that can dramatically change LLM outputs, to change these you can modify the [web UI settings file](https://github.com/cpacker/MemGPT/blob/main/memgpt/local_llm/webui/settings.py). diff --git a/memgpt/cli/cli.py b/memgpt/cli/cli.py index abcf3118..64b33ef1 100644 --- a/memgpt/cli/cli.py +++ b/memgpt/cli/cli.py @@ -42,14 +42,12 @@ def run( model_wrapper: str = typer.Option(None, help="Specify the LLM model wrapper"), model_endpoint: str = typer.Option(None, help="Specify the LLM model endpoint"), model_endpoint_type: str = typer.Option(None, help="Specify the LLM model endpoint type"), - context_window: int = typer.Option( - None, "--context_window", help="The context window of the LLM you are using (e.g. 8k for most Mistral 7B variants)" - ), + context_window: int = typer.Option(None, help="The context window of the LLM you are using (e.g. 8k for most Mistral 7B variants)"), # other first: bool = typer.Option(False, "--first", help="Use --first to send the first message in the sequence"), - strip_ui: bool = typer.Option(False, "--strip_ui", help="Remove all the bells and whistles in CLI output (helpful for testing)"), + strip_ui: bool = typer.Option(False, help="Remove all the bells and whistles in CLI output (helpful for testing)"), debug: bool = typer.Option(False, "--debug", help="Use --debug to enable debugging output"), - no_verify: bool = typer.Option(False, "--no_verify", help="Bypass message verification"), + no_verify: bool = typer.Option(False, help="Bypass message verification"), yes: bool = typer.Option(False, "-y", help="Skip confirmation prompt and use defaults"), ): """Start chatting with an MemGPT agent diff --git a/mkdocs.yml b/mkdocs.yml index 6ca2b38f..24776b9b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -23,6 +23,9 @@ nav: - 'koboldcpp': koboldcpp.md - 'ollama': ollama.md - 'Troubleshooting': local_llm_faq.md + - 'Customizing MemGPT': + - 'Creating new MemGPT presets': presets.md + - 'Giving MemGPT more tools': functions.md - 'Integrations': - 'Autogen': autogen.md - 'Advanced': diff --git a/tests/test_cli.py b/tests/test_cli.py index 90a55821..c0aa8287 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -14,7 +14,7 @@ def test_configure_memgpt(): def test_save_load(): # configure_memgpt() # rely on configure running first^ - child = pexpect.spawn("memgpt run --agent test_save_load --first --strip_ui") + child = pexpect.spawn("memgpt run --agent test_save_load --first --strip-ui") child.expect("Enter your message:", timeout=TIMEOUT) child.sendline() @@ -30,7 +30,7 @@ def test_save_load(): assert child.isalive() is False, "CLI should have terminated." assert child.exitstatus == 0, "CLI did not exit cleanly." - child = pexpect.spawn("memgpt run --agent test_save_load --first --strip_ui") + child = pexpect.spawn("memgpt run --agent test_save_load --first --strip-ui") child.expect("Using existing agent test_save_load", timeout=TIMEOUT) child.expect("Enter your message:", timeout=TIMEOUT) child.sendline("/exit")