From 942666d3b541627d2148a971c2351646beaf40fc Mon Sep 17 00:00:00 2001 From: Sarah Wooders Date: Thu, 26 Oct 2023 11:44:47 -0700 Subject: [PATCH 01/12] initial llama index data loading implementation --- memgpt/connectors/connector.py | 66 ++++++++++++++++++++++++++ memgpt/main.py | 2 + memgpt/utils.py | 85 +++++++++++++++++++++++++++++++++- 3 files changed, 151 insertions(+), 2 deletions(-) create mode 100644 memgpt/connectors/connector.py diff --git a/memgpt/connectors/connector.py b/memgpt/connectors/connector.py new file mode 100644 index 00000000..2b3c22a2 --- /dev/null +++ b/memgpt/connectors/connector.py @@ -0,0 +1,66 @@ +""" +This file contains functions for loading data into MemGPT's archival storage. + +Data can be loaded with the following command, once a load function is defined: +``` +memgpt load --name [ADDITIONAL ARGS] +``` + +""" + +from llama_index import download_loader +from llama_index.embeddings import OpenAIEmbedding +from typing import List +import os +import typer +from memgpt.constants import MEMGPT_DIR +from memgpt.utils import estimate_openai_cost, index_docs, save_index + +app = typer.Typer() + + + +@app.command("directory") +def load_directory( + name: str = typer.Option(help="Name of dataset to load."), + input_dir: str = typer.Option(None, help="Path to directory containing dataset."), + input_files: List[str] = typer.Option(None, help="List of paths to files containing dataset."), + recursive: bool = typer.Option(False, help="Recursively search for files in directory."), +): + from llama_index import SimpleDirectoryReader + + if recursive: + assert input_dir is not None, "Must provide input directory if recursive is True." + reader = SimpleDirectoryReader( + input_dir=input_dir, + recursive=True, + ) + else: + reader = SimpleDirectoryReader( + input_files=input_files + ) + + # load docs + print("Loading data...") + docs = reader.load_data() + + # embed docs + print("Indexing documents...") + index = index_docs(docs) + # save connector information into .memgpt metadata file + save_index(index, name) + +@app.command("webpage") +def load_webpage( + name: str = typer.Option(help="Name of dataset to load."), + urls: List[str] = typer.Option(None, help="List of urls to load."), +): + from llama_index import SimpleWebPageReader + docs = SimpleWebPageReader(html_to_text=True).load_data(urls) + + # embed docs + print("Indexing documents...") + index = index_docs(docs) + # save connector information into .memgpt metadata file + save_index(index, name) + diff --git a/memgpt/main.py b/memgpt/main.py index e0bc5bc4..b27c2e03 100644 --- a/memgpt/main.py +++ b/memgpt/main.py @@ -28,9 +28,11 @@ from memgpt.persistence_manager import ( from memgpt.config import Config from memgpt.constants import MEMGPT_DIR +from memgpt.connectors import connector import asyncio app = typer.Typer() +app.add_typer(connector.app, name="load") def clear_line(): diff --git a/memgpt/utils.py b/memgpt/utils.py index 441fb50e..92c82978 100644 --- a/memgpt/utils.py +++ b/memgpt/utils.py @@ -1,5 +1,4 @@ from datetime import datetime - import asyncio import csv import difflib @@ -14,9 +13,11 @@ import glob import sqlite3 import fitz from tqdm import tqdm +import typer from memgpt.openai_tools import async_get_embedding_with_backoff from memgpt.constants import MEMGPT_DIR - +from llama_index import set_global_service_context, ServiceContext, VectorStoreIndex +from llama_index.embeddings import OpenAIEmbedding def count_tokens(s: str, model: str = "gpt-4") -> int: encoding = tiktoken.encoding_for_model(model) @@ -338,3 +339,83 @@ def read_database_as_list(database_name): except Exception as e: result_list.append(f"Error: {str(e)}") return result_list + + + +def estimate_openai_cost(docs): + """ Estimate OpenAI embedding cost + + :param docs: Documents to be embedded + :type docs: List[Document] + :return: Estimated cost + :rtype: float + """ + from llama_index import MockEmbedding + from llama_index.callbacks import CallbackManager, TokenCountingHandler + import tiktoken + + embed_model = MockEmbedding(embed_dim=1536) + + token_counter = TokenCountingHandler( + tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode + ) + + callback_manager = CallbackManager([token_counter]) + + set_global_service_context( + ServiceContext.from_defaults( + embed_model=embed_model, + callback_manager=callback_manager + ) + ) + index = VectorStoreIndex.from_documents(docs) + + # estimate cost + cost = 0.0001 * token_counter.total_embedding_token_count / 1000 + token_counter.reset_counts() + return cost + +def index_docs(docs): + + """ Index documents + + :param docs: Documents to be embedded + :type docs: List[Document] + """ + + # TODO: support configurable embeddings + # TODO: read from config how to index (open ai vs. local): then embed_mode="local" + + estimated_cost = estimate_openai_cost(docs) + # TODO: prettier cost formatting + confirm = typer.confirm(typer.style(f"Open AI embedding cost will be approximately ${estimated_cost} - continue?", fg="yellow"), default=True) + + if not confirm: + typer.secho("Aborting.", fg="red") + exit() + + embed_model = OpenAIEmbedding() + service_context = ServiceContext.from_defaults(embed_model=embed_model, chunk_size = 300) + set_global_service_context(service_context) + + # index documents + index = VectorStoreIndex.from_documents(docs) + return index + +def save_index(index, name): + + """ Save index to a specificed name in ~/.memgpt + + :param index: Index to save + :type index: VectorStoreIndex + :param name: Name of index + :type name: str + """ + # save + # TODO: load directory from config + # TODO: save to vectordb/local depending on config + dir = f"{MEMGPT_DIR}/archival/{name}" + # create directory, even if it already exists + os.makedirs(dir, exist_ok=True) + index.storage_context.persist(dir) + print(dir) From f484436c4385a67d14e627e5c039d4c6ea6d6a0d Mon Sep 17 00:00:00 2001 From: Sarah Wooders Date: Thu, 26 Oct 2023 12:02:29 -0700 Subject: [PATCH 02/12] add db connector --- memgpt/connectors/connector.py | 31 ++++++++++++++++++++++++++++++- memgpt/utils.py | 7 +++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/memgpt/connectors/connector.py b/memgpt/connectors/connector.py index 2b3c22a2..8e2b828c 100644 --- a/memgpt/connectors/connector.py +++ b/memgpt/connectors/connector.py @@ -9,7 +9,6 @@ memgpt load --name [ADDITIONAL ARGS] """ from llama_index import download_loader -from llama_index.embeddings import OpenAIEmbedding from typing import List import os import typer @@ -64,3 +63,33 @@ def load_webpage( # save connector information into .memgpt metadata file save_index(index, name) + +@app.command("database") +def load_database( + name: str = typer.Option(help="Name of dataset to load."), + scheme: str = typer.Option(help="Database scheme."), + host: str = typer.Option(help="Database host."), + port: int = typer.Option(help="Database port."), + user: str = typer.Option(help="Database user."), + password: str = typer.Option(help="Database password."), + dbname: str = typer.Option(help="Database name."), + query: str = typer.Option(None, help="Database query."), +): + from llama_index.readers.database import DatabaseReader + + db = DatabaseReader( + scheme="postgresql", # Database Scheme + host="localhost", # Database Host + port="5432", # Database Port + user="postgres", # Database User + password="FakeExamplePassword", # Database Password + dbname="postgres", # Database Name + ) + + # load data + docs = db.load_data(query=query) + + index = index_docs(docs) + save_index(index, name) + + diff --git a/memgpt/utils.py b/memgpt/utils.py index 92c82978..a8b5a07a 100644 --- a/memgpt/utils.py +++ b/memgpt/utils.py @@ -415,6 +415,13 @@ def save_index(index, name): # TODO: load directory from config # TODO: save to vectordb/local depending on config dir = f"{MEMGPT_DIR}/archival/{name}" + + # check if directory exists + if os.path.exists(dir): + confirm = typer.confirm(typer.style(f"Index with name {name} already exists -- overwrite?", fg="red"), default=False) + if not confirm: + typer.secho("Aborting.", fg="red") + exit() # create directory, even if it already exists os.makedirs(dir, exist_ok=True) index.storage_context.persist(dir) From 380a92941c47b07763b1de00d4df5d658d56ea2a Mon Sep 17 00:00:00 2001 From: Sarah Wooders Date: Thu, 26 Oct 2023 12:04:58 -0700 Subject: [PATCH 03/12] fix scheme --- memgpt/connectors/connector.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/memgpt/connectors/connector.py b/memgpt/connectors/connector.py index 8e2b828c..470cb208 100644 --- a/memgpt/connectors/connector.py +++ b/memgpt/connectors/connector.py @@ -78,12 +78,12 @@ def load_database( from llama_index.readers.database import DatabaseReader db = DatabaseReader( - scheme="postgresql", # Database Scheme - host="localhost", # Database Host - port="5432", # Database Port - user="postgres", # Database User - password="FakeExamplePassword", # Database Password - dbname="postgres", # Database Name + scheme=scheme, # Database Scheme + host=host, # Database Host + port=port, # Database Port + user=user, # Database User + password=password, # Database Password + dbname=dbname, # Database Name ) # load data From b45b5b6a753f1e785e3ff4ccbf0c4e12b133c22a Mon Sep 17 00:00:00 2001 From: Sarah Wooders Date: Thu, 26 Oct 2023 14:25:35 -0700 Subject: [PATCH 04/12] add llama index querying --- memgpt/connectors/connector.py | 6 +- memgpt/memory.py | 101 ++++++++++++++++++++++++++++++++- memgpt/persistence_manager.py | 12 ++-- memgpt/utils.py | 29 +++++++--- 4 files changed, 130 insertions(+), 18 deletions(-) diff --git a/memgpt/connectors/connector.py b/memgpt/connectors/connector.py index 470cb208..e06d3418 100644 --- a/memgpt/connectors/connector.py +++ b/memgpt/connectors/connector.py @@ -13,7 +13,7 @@ from typing import List import os import typer from memgpt.constants import MEMGPT_DIR -from memgpt.utils import estimate_openai_cost, index_docs, save_index +from memgpt.utils import estimate_openai_cost, get_index, save_index app = typer.Typer() @@ -45,7 +45,7 @@ def load_directory( # embed docs print("Indexing documents...") - index = index_docs(docs) + index = get_index(name, docs) # save connector information into .memgpt metadata file save_index(index, name) @@ -89,7 +89,7 @@ def load_database( # load data docs = db.load_data(query=query) - index = index_docs(docs) + index = get_index(name, docs) save_index(index, name) diff --git a/memgpt/memory.py b/memgpt/memory.py index 659076d5..c2dcaf6f 100644 --- a/memgpt/memory.py +++ b/memgpt/memory.py @@ -1,14 +1,26 @@ from abc import ABC, abstractmethod +import os import datetime import re import faiss import numpy as np +from typing import Optional, List, Tuple -from .constants import MESSAGE_SUMMARY_WARNING_TOKENS +from .constants import MESSAGE_SUMMARY_WARNING_TOKENS, MEMGPT_DIR from .utils import cosine_similarity, get_local_time, printd, count_tokens from .prompts.gpt_summarize import SYSTEM as SUMMARY_PROMPT_SYSTEM from .openai_tools import acompletions_with_backoff as acreate, async_get_embedding_with_backoff +from llama_index import ( + VectorStoreIndex, + get_response_synthesizer, + load_index_from_storage, + StorageContext, +) +from llama_index.retrievers import VectorIndexRetriever +from llama_index.query_engine import RetrieverQueryEngine +from llama_index.indices.postprocessor import SimilarityPostprocessor + class CoreMemory(object): """Held in-context inside the system message @@ -130,10 +142,26 @@ class ArchivalMemory(ABC): @abstractmethod def insert(self, memory_string): + """ Insert new archival memory + + :param memory_string: Memory string to insert + :type memory_string: str + """ pass @abstractmethod - def search(self, query_string, count=None, start=None): + def search(self, query_string, count=None, start=None) -> Tuple[List[str], int]: + """ Search archival memory + + :param query_string: Query string + :type query_string: str + :param count: Number of results to return (None for all) + :type count: Optional[int] + :param start: Offset to start returning results from (None if 0) + :type start: Optional[int] + + :return: Tuple of (list of results, total number of results) + """ pass @abstractmethod @@ -501,3 +529,72 @@ class DummyRecallMemoryWithEmbeddings(DummyRecallMemory): return matches[start:], len(matches) else: return matches, len(matches) + + +class LocalArchivalMemory(ArchivalMemory): + """ Archival memory built on top of Llama Index """ + + def __init__(self, archival_memory_database: Optional[str] = None, top_k: Optional[int] = 100): + """ Init function for archival memory + + :param archiva_memory_database: name of dataset to pre-fill archival with + :type archival_memory_database: str + """ + + if archival_memory_database is not None: + # TODO: load form ~/.memgpt/archival + directory = f"{MEMGPT_DIR}/archival/{archival_memory_database}" + assert os.path.exists(directory), f"Archival memory database {archival_memory_database} does not exist" + storage_context = StorageContext.from_defaults(persist_dir=directory) + self.index = load_index_from_storage(storage_context) + else: + self.index = VectorIndex() + self.top_k = top_k + self.retriever = VectorIndexRetriever( + index=self.index, # does this get refreshed? + similarity_top_k=self.top_k, + ) + + # configure response synthesizer + response_synthesizer = get_response_synthesizer() + + # assemble query engine + self.query_engine = RetrieverQueryEngine( + retriever=self.retriever, + #response_synthesizer=response_synthesizer, + #node_postprocessors=[ + # SimilarityPostprocessor(similarity_cutoff=0) # TODO: tune this + #] + ) + + # cache for repeated queries + # TODO: have some mechanism for cleanup otherwise will lead to OOM + self.cache = {} + + async def insert(self, memory_string): + self.index.insert(memory_string) + + async def search(self, query_string, count=None, start=None): + + start = start if start else 0 + count = count if count else self.top_k + count = min(count + start, self.top_k) + + if query_string not in self.cache: + #self.cache[query_string] = self.query_engine.query(query_string) + self.cache[query_string] = self.retriever.retrieve(query_string) + + results = self.cache[query_string][start:start+count] + results = [ + {'timestamp': get_local_time(), 'content': node.node.text} + for node in results + ] + #from pprint import pprint + #pprint(results) + return results, len(results) + + def __repr__(self) -> str: + print(self.index.ref_doc_info) + return "" + + diff --git a/memgpt/persistence_manager.py b/memgpt/persistence_manager.py index 4874fe02..3e6bafaf 100644 --- a/memgpt/persistence_manager.py +++ b/memgpt/persistence_manager.py @@ -1,7 +1,7 @@ from abc import ABC, abstractmethod import pickle -from .memory import DummyRecallMemory, DummyRecallMemoryWithEmbeddings, DummyArchivalMemory, DummyArchivalMemoryWithEmbeddings, DummyArchivalMemoryWithFaiss +from .memory import DummyRecallMemory, DummyRecallMemoryWithEmbeddings, DummyArchivalMemory, DummyArchivalMemoryWithEmbeddings, DummyArchivalMemoryWithFaiss, LocalArchivalMemory from .utils import get_local_time, printd @@ -32,13 +32,14 @@ class InMemoryStateManager(PersistenceManager): """In-memory state manager has nothing to manage, all agents are held in-memory""" recall_memory_cls = DummyRecallMemory - archival_memory_cls = DummyArchivalMemory + archival_memory_cls = LocalArchivalMemory - def __init__(self): + def __init__(self, archival_memory_db=None): # Memory held in-state useful for debugging stateful versions self.memory = None self.messages = [] self.all_messages = [] + self.archival_memory = LocalArchivalMemory(archival_memory_database=archival_memory_db) @staticmethod def load(filename): @@ -59,8 +60,8 @@ class InMemoryStateManager(PersistenceManager): # Persistence manager also handles DB-related state self.recall_memory = self.recall_memory_cls(message_database=self.all_messages) - self.archival_memory_db = [] - self.archival_memory = self.archival_memory_cls(archival_memory_database=self.archival_memory_db) + + # TODO: init archival memory here? def trim_messages(self, num): # printd(f"InMemoryStateManager.trim_messages") @@ -142,3 +143,4 @@ class InMemoryStateManagerWithFaiss(InMemoryStateManager): # Persistence manager also handles DB-related state self.recall_memory = self.recall_memory_cls(message_database=self.all_messages) self.archival_memory = self.archival_memory_cls(index=self.archival_index, archival_memory_database=self.archival_memory_db, k=self.a_k) + diff --git a/memgpt/utils.py b/memgpt/utils.py index a8b5a07a..560f544f 100644 --- a/memgpt/utils.py +++ b/memgpt/utils.py @@ -16,7 +16,7 @@ from tqdm import tqdm import typer from memgpt.openai_tools import async_get_embedding_with_backoff from memgpt.constants import MEMGPT_DIR -from llama_index import set_global_service_context, ServiceContext, VectorStoreIndex +from llama_index import set_global_service_context, ServiceContext, VectorStoreIndex, load_index_from_storage, StorageContext from llama_index.embeddings import OpenAIEmbedding def count_tokens(s: str, model: str = "gpt-4") -> int: @@ -375,7 +375,8 @@ def estimate_openai_cost(docs): token_counter.reset_counts() return cost -def index_docs(docs): + +def get_index(name, docs): """ Index documents @@ -383,6 +384,15 @@ def index_docs(docs): :type docs: List[Document] """ + # check if directory exists + dir = f"{MEMGPT_DIR}/archival/{name}" + if os.path.exists(dir): + confirm = typer.confirm(typer.style(f"Index with name {name} already exists -- re-index?", fg="yellow"), default=False) + if not confirm: + # return existing index + storage_context = StorageContext.from_defaults(persist_dir=dir) + return load_index_from_storage(storage_context) + # TODO: support configurable embeddings # TODO: read from config how to index (open ai vs. local): then embed_mode="local" @@ -414,14 +424,17 @@ def save_index(index, name): # save # TODO: load directory from config # TODO: save to vectordb/local depending on config + dir = f"{MEMGPT_DIR}/archival/{name}" - # check if directory exists - if os.path.exists(dir): - confirm = typer.confirm(typer.style(f"Index with name {name} already exists -- overwrite?", fg="red"), default=False) - if not confirm: - typer.secho("Aborting.", fg="red") - exit() + ## Avoid overwriting + ## check if directory exists + #if os.path.exists(dir): + # confirm = typer.confirm(typer.style(f"Index with name {name} already exists -- overwrite?", fg="red"), default=False) + # if not confirm: + # typer.secho("Aborting.", fg="red") + # exit() + # create directory, even if it already exists os.makedirs(dir, exist_ok=True) index.storage_context.persist(dir) From bfa4f28566b4d84aa58bd6e50ca6a6390534d2f2 Mon Sep 17 00:00:00 2001 From: Sarah Wooders Date: Thu, 26 Oct 2023 14:25:46 -0700 Subject: [PATCH 05/12] add archival memory test --- tests/test_load_archival.py | 60 +++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 tests/test_load_archival.py diff --git a/tests/test_load_archival.py b/tests/test_load_archival.py new file mode 100644 index 00000000..d825b7a0 --- /dev/null +++ b/tests/test_load_archival.py @@ -0,0 +1,60 @@ +import tempfile +import asyncio +import os +from memgpt.connectors.connector import load_directory +import memgpt.agent as agent +import memgpt.system as system +import memgpt.utils as utils +import memgpt.presets as presets +import memgpt.constants as constants +import memgpt.personas.personas as personas +import memgpt.humans.humans as humans +from memgpt.persistence_manager import ( + InMemoryStateManager +) +from memgpt.config import Config +from memgpt.constants import MEMGPT_DIR, DEFAULT_MEMGPT_MODEL +from memgpt.connectors import connector +import memgpt.interface # for printing to terminal +import asyncio +from datasets import load_dataset + +def test_archival(): + # downloading hugging face dataset (if does not exist) + dataset = load_dataset("MemGPT/example_short_stories") + + cache_dir = os.getenv("HF_DATASETS_CACHE") + + if cache_dir is None: + # Construct the default path if the environment variable is not set. + cache_dir = os.path.join(os.path.expanduser("~"), ".cache", "huggingface", "datasets") + + # load directory + print("Loading dataset into index...") + print(cache_dir) + load_directory( + name="tmp_hf_dataset", + input_dir=cache_dir, + recursive=True, + ) + + # create state manager based off loaded data + persistence_manager = InMemoryStateManager(archival_memory_db="tmp_hf_dataset") + + # create agent + memgpt_agent = presets.use_preset( + presets.DEFAULT, + DEFAULT_MEMGPT_MODEL, + personas.get_persona_text(personas.DEFAULT), + humans.get_human_text(humans.DEFAULT), + memgpt.interface, + persistence_manager, + ) + def query(q): + res = asyncio.run(memgpt_agent.archival_memory_search(q)) + return res + + results = query("cinderella be getting sick") + assert "Cinderella" in results, f"Expected 'Cinderella' in results, but got {results}" + +test_archival() \ No newline at end of file From 85ac22ff9e1eab38446ed3fa30de7185ce38d529 Mon Sep 17 00:00:00 2001 From: Sarah Wooders Date: Thu, 26 Oct 2023 14:35:49 -0700 Subject: [PATCH 06/12] try to avoid changing current cli logic flow --- memgpt/persistence_manager.py | 67 ++++++++++++++++++++++++++++++++++- tests/test_load_archival.py | 5 +-- 2 files changed, 69 insertions(+), 3 deletions(-) diff --git a/memgpt/persistence_manager.py b/memgpt/persistence_manager.py index 3e6bafaf..74f8d1d9 100644 --- a/memgpt/persistence_manager.py +++ b/memgpt/persistence_manager.py @@ -27,10 +27,75 @@ class PersistenceManager(ABC): def update_memory(self, new_memory): pass - class InMemoryStateManager(PersistenceManager): """In-memory state manager has nothing to manage, all agents are held in-memory""" + recall_memory_cls = DummyRecallMemory + archival_memory_cls = DummyArchivalMemory + + def __init__(self): + # Memory held in-state useful for debugging stateful versions + self.memory = None + self.messages = [] + self.all_messages = [] + + @staticmethod + def load(filename): + with open(filename, 'rb') as f: + return pickle.load(f) + + def save(self, filename): + with open(filename, 'wb') as fh: + pickle.dump(self, fh, protocol=pickle.HIGHEST_PROTOCOL) + + def init(self, agent): + printd(f"Initializing InMemoryStateManager with agent object") + self.all_messages = [{'timestamp': get_local_time(), 'message': msg} for msg in agent.messages.copy()] + self.messages = [{'timestamp': get_local_time(), 'message': msg} for msg in agent.messages.copy()] + self.memory = agent.memory + printd(f"InMemoryStateManager.all_messages.len = {len(self.all_messages)}") + printd(f"InMemoryStateManager.messages.len = {len(self.messages)}") + + # Persistence manager also handles DB-related state + self.recall_memory = self.recall_memory_cls(message_database=self.all_messages) + self.archival_memory_db = [] + self.archival_memory = self.archival_memory_cls(archival_memory_database=self.archival_memory_db) + + def trim_messages(self, num): + # printd(f"InMemoryStateManager.trim_messages") + self.messages = [self.messages[0]] + self.messages[num:] + + def prepend_to_messages(self, added_messages): + # first tag with timestamps + added_messages = [{'timestamp': get_local_time(), 'message': msg} for msg in added_messages] + + printd(f"InMemoryStateManager.prepend_to_message") + self.messages = [self.messages[0]] + added_messages + self.messages[1:] + self.all_messages.extend(added_messages) + + def append_to_messages(self, added_messages): + # first tag with timestamps + added_messages = [{'timestamp': get_local_time(), 'message': msg} for msg in added_messages] + + printd(f"InMemoryStateManager.append_to_messages") + self.messages = self.messages + added_messages + self.all_messages.extend(added_messages) + + def swap_system_message(self, new_system_message): + # first tag with timestamps + new_system_message = {'timestamp': get_local_time(), 'message': new_system_message} + + printd(f"InMemoryStateManager.swap_system_message") + self.messages[0] = new_system_message + self.all_messages.append(new_system_message) + + def update_memory(self, new_memory): + printd(f"InMemoryStateManager.update_memory") + self.memory = new_memory + +class LocalStateManager(PersistenceManager): + """In-memory state manager has nothing to manage, all agents are held in-memory""" + recall_memory_cls = DummyRecallMemory archival_memory_cls = LocalArchivalMemory diff --git a/tests/test_load_archival.py b/tests/test_load_archival.py index d825b7a0..ddfa1f50 100644 --- a/tests/test_load_archival.py +++ b/tests/test_load_archival.py @@ -10,7 +10,8 @@ import memgpt.constants as constants import memgpt.personas.personas as personas import memgpt.humans.humans as humans from memgpt.persistence_manager import ( - InMemoryStateManager + InMemoryStateManager, + LocalStateManager ) from memgpt.config import Config from memgpt.constants import MEMGPT_DIR, DEFAULT_MEMGPT_MODEL @@ -39,7 +40,7 @@ def test_archival(): ) # create state manager based off loaded data - persistence_manager = InMemoryStateManager(archival_memory_db="tmp_hf_dataset") + persistence_manager = LocalStateManager(archival_memory_db="tmp_hf_dataset") # create agent memgpt_agent = presets.use_preset( From bbacf0fb3325f80bc7490864283d3bf5a750fba9 Mon Sep 17 00:00:00 2001 From: Sarah Wooders Date: Thu, 26 Oct 2023 15:30:31 -0700 Subject: [PATCH 07/12] add database test --- memgpt/config.py | 21 ++++++++++++ memgpt/connectors/connector.py | 52 +++++++++++++++++++----------- memgpt/memory.py | 15 --------- tests/test_load_archival.py | 58 ++++++++++++++++++++++++++++++++-- 4 files changed, 110 insertions(+), 36 deletions(-) diff --git a/memgpt/config.py b/memgpt/config.py index 3fa8804d..857872e9 100644 --- a/memgpt/config.py +++ b/memgpt/config.py @@ -25,6 +25,27 @@ model_choices = [ ] +class MemGPTConfig: + + # Model configuration + openai_key: str = None + azure_key: str = None + azure_endpoint: str = None + model_endpoint: str = None + + # Storage (archival/recall) configuration + storage_type: str = "local" # ["local", "vectordb"] + storage_url: str = None + + # Persona configuration + default_person = "" + + # Human configuration + default_human = "" + + + + class Config: personas_dir = os.path.join("memgpt", "personas", "examples") custom_personas_dir = os.path.join(MEMGPT_DIR, "personas") diff --git a/memgpt/connectors/connector.py b/memgpt/connectors/connector.py index e06d3418..549c2d7d 100644 --- a/memgpt/connectors/connector.py +++ b/memgpt/connectors/connector.py @@ -59,32 +59,48 @@ def load_webpage( # embed docs print("Indexing documents...") - index = index_docs(docs) + index = get_index(docs) # save connector information into .memgpt metadata file save_index(index, name) - @app.command("database") def load_database( name: str = typer.Option(help="Name of dataset to load."), - scheme: str = typer.Option(help="Database scheme."), - host: str = typer.Option(help="Database host."), - port: int = typer.Option(help="Database port."), - user: str = typer.Option(help="Database user."), - password: str = typer.Option(help="Database password."), - dbname: str = typer.Option(help="Database name."), - query: str = typer.Option(None, help="Database query."), + query: str = typer.Option(help="Database query."), + dump_path: str = typer.Option(None, help="Path to dump file."), + scheme: str = typer.Option(None, help="Database scheme."), + host: str = typer.Option(None, help="Database host."), + port: int = typer.Option(None, help="Database port."), + user: str = typer.Option(None, help="Database user."), + password: str = typer.Option(None, help="Database password."), + dbname: str = typer.Option(None, help="Database name."), ): from llama_index.readers.database import DatabaseReader - - db = DatabaseReader( - scheme=scheme, # Database Scheme - host=host, # Database Host - port=port, # Database Port - user=user, # Database User - password=password, # Database Password - dbname=dbname, # Database Name - ) + print(dump_path, scheme) + + if dump_path is not None: + # read from database dump file + from sqlalchemy import create_engine, MetaData + engine = create_engine(f'sqlite:///{dump_path}') + + db = DatabaseReader(engine=engine) + else: + assert dump_path is None, "Cannot provide both dump_path and database connection parameters." + assert scheme is not None, "Must provide database scheme." + assert host is not None, "Must provide database host." + assert port is not None, "Must provide database port." + assert user is not None, "Must provide database user." + assert password is not None, "Must provide database password." + assert dbname is not None, "Must provide database name." + + db = DatabaseReader( + scheme=scheme, # Database Scheme + host=host, # Database Host + port=port, # Database Port + user=user, # Database User + password=password, # Database Password + dbname=dbname, # Database Name + ) # load data docs = db.load_data(query=query) diff --git a/memgpt/memory.py b/memgpt/memory.py index c2dcaf6f..f4fa09ec 100644 --- a/memgpt/memory.py +++ b/memgpt/memory.py @@ -554,20 +554,6 @@ class LocalArchivalMemory(ArchivalMemory): index=self.index, # does this get refreshed? similarity_top_k=self.top_k, ) - - # configure response synthesizer - response_synthesizer = get_response_synthesizer() - - # assemble query engine - self.query_engine = RetrieverQueryEngine( - retriever=self.retriever, - #response_synthesizer=response_synthesizer, - #node_postprocessors=[ - # SimilarityPostprocessor(similarity_cutoff=0) # TODO: tune this - #] - ) - - # cache for repeated queries # TODO: have some mechanism for cleanup otherwise will lead to OOM self.cache = {} @@ -581,7 +567,6 @@ class LocalArchivalMemory(ArchivalMemory): count = min(count + start, self.top_k) if query_string not in self.cache: - #self.cache[query_string] = self.query_engine.query(query_string) self.cache[query_string] = self.retriever.retrieve(query_string) results = self.cache[query_string][start:start+count] diff --git a/tests/test_load_archival.py b/tests/test_load_archival.py index ddfa1f50..dc857372 100644 --- a/tests/test_load_archival.py +++ b/tests/test_load_archival.py @@ -1,7 +1,7 @@ import tempfile import asyncio import os -from memgpt.connectors.connector import load_directory +from memgpt.connectors.connector import load_directory, load_database, load_webpage import memgpt.agent as agent import memgpt.system as system import memgpt.utils as utils @@ -20,7 +20,7 @@ import memgpt.interface # for printing to terminal import asyncio from datasets import load_dataset -def test_archival(): +def test_load_directory(): # downloading hugging face dataset (if does not exist) dataset = load_dataset("MemGPT/example_short_stories") @@ -58,4 +58,56 @@ def test_archival(): results = query("cinderella be getting sick") assert "Cinderella" in results, f"Expected 'Cinderella' in results, but got {results}" -test_archival() \ No newline at end of file +def test_load_webpage(): + pass + +def test_load_database(): + + from sqlalchemy import create_engine, MetaData + import pandas as pd + + db_path = "memgpt/personas/examples/sqldb/test.db" + engine = create_engine(f'sqlite:///{db_path}') + + # Create a MetaData object and reflect the database to get table information. + metadata = MetaData() + metadata.reflect(bind=engine) + + # Get a list of table names from the reflected metadata. + table_names = metadata.tables.keys() + + print(table_names) + + # Define a SQL query to retrieve data from a table (replace 'your_table_name' with your actual table name). + query = f"SELECT * FROM {list(table_names)[0]}" + + # Use Pandas to read data from the database into a DataFrame. + df = pd.read_sql_query(query, engine) + print(df) + + load_database( + name="tmp_db_dataset", + #engine=engine, + dump_path=db_path, + query=f"SELECT * FROM {list(table_names)[0]}", + ) + + persistence_manager = LocalStateManager(archival_memory_db="tmp_db_dataset") + + # create agent + memgpt_agent = presets.use_preset( + presets.DEFAULT, + DEFAULT_MEMGPT_MODEL, + personas.get_persona_text(personas.DEFAULT), + humans.get_human_text(humans.DEFAULT), + memgpt.interface, + persistence_manager, + ) + print("Successfully loaded into index") + assert True + + + + +#test_load_directory() +test_load_database() \ No newline at end of file From 1bc8e7a601f87803d23ea2ae97a94cb6329c6b28 Mon Sep 17 00:00:00 2001 From: Sarah Wooders Date: Thu, 26 Oct 2023 15:31:21 -0700 Subject: [PATCH 08/12] remove config --- memgpt/config.py | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/memgpt/config.py b/memgpt/config.py index 857872e9..d22ee281 100644 --- a/memgpt/config.py +++ b/memgpt/config.py @@ -24,28 +24,6 @@ model_choices = [ ), ] - -class MemGPTConfig: - - # Model configuration - openai_key: str = None - azure_key: str = None - azure_endpoint: str = None - model_endpoint: str = None - - # Storage (archival/recall) configuration - storage_type: str = "local" # ["local", "vectordb"] - storage_url: str = None - - # Persona configuration - default_person = "" - - # Human configuration - default_human = "" - - - - class Config: personas_dir = os.path.join("memgpt", "personas", "examples") custom_personas_dir = os.path.join(MEMGPT_DIR, "personas") From 0ab3d098d287b8781fc1c7857200e14bdb35fef9 Mon Sep 17 00:00:00 2001 From: Sarah Wooders Date: Thu, 26 Oct 2023 16:08:25 -0700 Subject: [PATCH 09/12] reformat --- memgpt/__main__.py | 1 + memgpt/agent.py | 260 ++++++++++-------- memgpt/agent_base.py | 3 +- memgpt/autogen/interface.py | 96 ++----- memgpt/autogen/memgpt_agent.py | 28 +- memgpt/config.py | 58 +--- memgpt/connectors/connector.py | 30 +- memgpt/constants.py | 8 +- memgpt/interface.py | 52 +--- .../llm_chat_completion_wrappers/airoboros.py | 8 +- .../llm_chat_completion_wrappers/dolphin.py | 8 +- memgpt/main.py | 78 ++---- memgpt/memory.py | 254 ++++++++--------- memgpt/openai_tools.py | 17 +- memgpt/persistence_manager.py | 57 ++-- memgpt/personas/examples/docqa/build_index.py | 20 +- .../docqa/generate_embeddings_for_docs.py | 52 ++-- .../openai_parallel_request_processor.py | 101 ++----- memgpt/personas/examples/docqa/scrape_docs.py | 42 ++- memgpt/presets.py | 31 ++- memgpt/prompts/gpt_functions.py | 69 ++--- memgpt/prompts/gpt_summarize.py | 5 +- memgpt/prompts/gpt_system.py | 6 +- memgpt/system.py | 78 +++--- memgpt/utils.py | 61 ++-- tests/test_load_archival.py | 28 +- 26 files changed, 600 insertions(+), 851 deletions(-) diff --git a/memgpt/__main__.py b/memgpt/__main__.py index 2310408d..89f11424 100644 --- a/memgpt/__main__.py +++ b/memgpt/__main__.py @@ -1,2 +1,3 @@ from .main import app + app() diff --git a/memgpt/agent.py b/memgpt/agent.py index 6e5de9f7..85ce0df3 100644 --- a/memgpt/agent.py +++ b/memgpt/agent.py @@ -11,10 +11,15 @@ from .system import get_heartbeat, get_login_event, package_function_response, p from .memory import CoreMemory as Memory, summarize_messages from .openai_tools import acompletions_with_backoff as acreate from .utils import get_local_time, parse_json, united_diff, printd, count_tokens -from .constants import \ - FIRST_MESSAGE_ATTEMPTS, MAX_PAUSE_HEARTBEATS, \ - MESSAGE_CHATGPT_FUNCTION_MODEL, MESSAGE_CHATGPT_FUNCTION_SYSTEM_MESSAGE, MESSAGE_SUMMARY_WARNING_TOKENS, \ - CORE_MEMORY_HUMAN_CHAR_LIMIT, CORE_MEMORY_PERSONA_CHAR_LIMIT +from .constants import ( + FIRST_MESSAGE_ATTEMPTS, + MAX_PAUSE_HEARTBEATS, + MESSAGE_CHATGPT_FUNCTION_MODEL, + MESSAGE_CHATGPT_FUNCTION_SYSTEM_MESSAGE, + MESSAGE_SUMMARY_WARNING_TOKENS, + CORE_MEMORY_HUMAN_CHAR_LIMIT, + CORE_MEMORY_PERSONA_CHAR_LIMIT, +) def initialize_memory(ai_notes, human_notes): @@ -28,52 +33,57 @@ def initialize_memory(ai_notes, human_notes): return memory -def construct_system_with_memory( - system, memory, memory_edit_timestamp, - archival_memory=None, recall_memory=None - ): - full_system_message = "\n".join([ - system, - "\n", - f"### Memory [last modified: {memory_edit_timestamp}", - f"{len(recall_memory) if recall_memory else 0} previous messages between you and the user are stored in recall memory (use functions to access them)", - f"{len(archival_memory) if archival_memory else 0} total memories you created are stored in archival memory (use functions to access them)", - "\nCore memory shown below (limited in size, additional information stored in archival / recall memory):", - "", - memory.persona, - "", - "", - memory.human, - "", - ]) +def construct_system_with_memory(system, memory, memory_edit_timestamp, archival_memory=None, recall_memory=None): + full_system_message = "\n".join( + [ + system, + "\n", + f"### Memory [last modified: {memory_edit_timestamp}", + f"{len(recall_memory) if recall_memory else 0} previous messages between you and the user are stored in recall memory (use functions to access them)", + f"{len(archival_memory) if archival_memory else 0} total memories you created are stored in archival memory (use functions to access them)", + "\nCore memory shown below (limited in size, additional information stored in archival / recall memory):", + "", + memory.persona, + "", + "", + memory.human, + "", + ] + ) return full_system_message def initialize_message_sequence( - model, - system, - memory, - archival_memory=None, - recall_memory=None, - memory_edit_timestamp=None, - include_initial_boot_message=True, - ): + model, + system, + memory, + archival_memory=None, + recall_memory=None, + memory_edit_timestamp=None, + include_initial_boot_message=True, +): if memory_edit_timestamp is None: memory_edit_timestamp = get_local_time() - full_system_message = construct_system_with_memory(system, memory, memory_edit_timestamp, archival_memory=archival_memory, recall_memory=recall_memory) + full_system_message = construct_system_with_memory( + system, memory, memory_edit_timestamp, archival_memory=archival_memory, recall_memory=recall_memory + ) first_user_message = get_login_event() # event letting MemGPT know the user just logged in if include_initial_boot_message: - if 'gpt-3.5' in model: - initial_boot_messages = get_initial_boot_messages('startup_with_send_message_gpt35') + if "gpt-3.5" in model: + initial_boot_messages = get_initial_boot_messages("startup_with_send_message_gpt35") else: - initial_boot_messages = get_initial_boot_messages('startup_with_send_message') - messages = [ - {"role": "system", "content": full_system_message}, - ] + initial_boot_messages + [ - {"role": "user", "content": first_user_message}, - ] + initial_boot_messages = get_initial_boot_messages("startup_with_send_message") + messages = ( + [ + {"role": "system", "content": full_system_message}, + ] + + initial_boot_messages + + [ + {"role": "user", "content": first_user_message}, + ] + ) else: messages = [ @@ -85,11 +95,11 @@ def initialize_message_sequence( async def get_ai_reply_async( - model, - message_sequence, - functions, - function_call="auto", - ): + model, + message_sequence, + functions, + function_call="auto", +): """Base call to GPT API w/ functions""" try: @@ -101,11 +111,11 @@ async def get_ai_reply_async( ) # special case for 'length' - if response.choices[0].finish_reason == 'length': - raise Exception('Finish reason was length (maximum context length)') + if response.choices[0].finish_reason == "length": + raise Exception("Finish reason was length (maximum context length)") # catches for soft errors - if response.choices[0].finish_reason not in ['stop', 'function_call']: + if response.choices[0].finish_reason not in ["stop", "function_call"]: raise Exception(f"API call finish with bad finish reason: {response}") # unpack with response.choices[0].message.content @@ -118,7 +128,19 @@ async def get_ai_reply_async( class AgentAsync(object): """Core logic for a MemGPT agent""" - def __init__(self, model, system, functions, interface, persistence_manager, persona_notes, human_notes, messages_total=None, persistence_manager_init=True, first_message_verify_mono=True): + def __init__( + self, + model, + system, + functions, + interface, + persistence_manager, + persona_notes, + human_notes, + messages_total=None, + persistence_manager_init=True, + first_message_verify_mono=True, + ): # gpt-4, gpt-3.5-turbo self.model = model # Store the system instructions (used to rebuild memory) @@ -173,7 +195,7 @@ class AgentAsync(object): @messages.setter def messages(self, value): - raise Exception('Modifying message list directly not allowed') + raise Exception("Modifying message list directly not allowed") def trim_messages(self, num): """Trim messages from the front, not including the system message""" @@ -196,16 +218,16 @@ class AgentAsync(object): # strip extra metadata if it exists for msg in added_messages: - msg.pop('api_response', None) - msg.pop('api_args', None) + msg.pop("api_response", None) + msg.pop("api_args", None) new_messages = self.messages + added_messages # append self._messages = new_messages self.messages_total += len(added_messages) def swap_system_message(self, new_system_message): - assert new_system_message['role'] == 'system', new_system_message - assert self.messages[0]['role'] == 'system', self.messages + assert new_system_message["role"] == "system", new_system_message + assert self.messages[0]["role"] == "system", self.messages self.persistence_manager.swap_system_message(new_system_message) @@ -223,7 +245,7 @@ class AgentAsync(object): recall_memory=self.persistence_manager.recall_memory, )[0] - diff = united_diff(curr_system_message['content'], new_system_message['content']) + diff = united_diff(curr_system_message["content"], new_system_message["content"]) printd(f"Rebuilding system with new memory...\nDiff:\n{diff}") # Store the memory change (if stateful) @@ -235,32 +257,32 @@ class AgentAsync(object): ### Local state management def to_dict(self): return { - 'model': self.model, - 'system': self.system, - 'functions': self.functions, - 'messages': self.messages, - 'messages_total': self.messages_total, - 'memory': self.memory.to_dict(), + "model": self.model, + "system": self.system, + "functions": self.functions, + "messages": self.messages, + "messages_total": self.messages_total, + "memory": self.memory.to_dict(), } def save_to_json_file(self, filename): - with open(filename, 'w') as file: + with open(filename, "w") as file: json.dump(self.to_dict(), file) @classmethod def load(cls, state, interface, persistence_manager): - model = state['model'] - system = state['system'] - functions = state['functions'] - messages = state['messages'] + model = state["model"] + system = state["system"] + functions = state["functions"] + messages = state["messages"] try: - messages_total = state['messages_total'] + messages_total = state["messages_total"] except KeyError: messages_total = len(messages) - 1 # memory requires a nested load - memory_dict = state['memory'] - persona_notes = memory_dict['persona'] - human_notes = memory_dict['human'] + memory_dict = state["memory"] + persona_notes = memory_dict["persona"] + human_notes = memory_dict["human"] # Two-part load new_agent = cls( @@ -278,18 +300,18 @@ class AgentAsync(object): return new_agent def load_inplace(self, state): - self.model = state['model'] - self.system = state['system'] - self.functions = state['functions'] + self.model = state["model"] + self.system = state["system"] + self.functions = state["functions"] # memory requires a nested load - memory_dict = state['memory'] - persona_notes = memory_dict['persona'] - human_notes = memory_dict['human'] + memory_dict = state["memory"] + persona_notes = memory_dict["persona"] + human_notes = memory_dict["human"] self.memory = initialize_memory(persona_notes, human_notes) # messages also - self._messages = state['messages'] + self._messages = state["messages"] try: - self.messages_total = state['messages_total'] + self.messages_total = state["messages_total"] except KeyError: self.messages_total = len(self.messages) - 1 # -system @@ -300,14 +322,14 @@ class AgentAsync(object): @classmethod def load_from_json_file(cls, json_file, interface, persistence_manager): - with open(json_file, 'r') as file: + with open(json_file, "r") as file: state = json.load(file) return cls.load(state, interface, persistence_manager) def load_from_json_file_inplace(self, json_file): # Load in-place # No interface arg needed, we can use the current one - with open(json_file, 'r') as file: + with open(json_file, "r") as file: state = json.load(file) self.load_inplace(state) @@ -317,7 +339,6 @@ class AgentAsync(object): # Step 2: check if LLM wanted to call a function if response_message.get("function_call"): - # The content if then internal monologue, not chat await self.interface.internal_monologue(response_message.content) messages.append(response_message) # extend conversation with assistant's reply @@ -348,7 +369,7 @@ class AgentAsync(object): try: function_to_call = available_functions[function_name] except KeyError as e: - error_msg = f'No function named {function_name}' + error_msg = f"No function named {function_name}" function_response = package_function_response(False, error_msg) messages.append( { @@ -357,7 +378,7 @@ class AgentAsync(object): "content": function_response, } ) # extend conversation with function response - await self.interface.function_message(f'Error: {error_msg}') + await self.interface.function_message(f"Error: {error_msg}") return messages, None, True # force a heartbeat to allow agent to handle error # Failure case 2: function name is OK, but function args are bad JSON @@ -374,18 +395,20 @@ class AgentAsync(object): "content": function_response, } ) # extend conversation with function response - await self.interface.function_message(f'Error: {error_msg}') + await self.interface.function_message(f"Error: {error_msg}") return messages, None, True # force a heartbeat to allow agent to handle error # (Still parsing function args) # Handle requests for immediate heartbeat - heartbeat_request = function_args.pop('request_heartbeat', None) + heartbeat_request = function_args.pop("request_heartbeat", None) if not (isinstance(heartbeat_request, bool) or heartbeat_request is None): - printd(f"Warning: 'request_heartbeat' arg parsed was not a bool or None, type={type(heartbeat_request)}, value={heartbeat_request}") + printd( + f"Warning: 'request_heartbeat' arg parsed was not a bool or None, type={type(heartbeat_request)}, value={heartbeat_request}" + ) heartbeat_request = None # Failure case 3: function failed during execution - await self.interface.function_message(f'Running {function_name}({function_args})') + await self.interface.function_message(f"Running {function_name}({function_args})") try: function_response_string = await function_to_call(**function_args) function_response = package_function_response(True, function_response_string) @@ -401,12 +424,12 @@ class AgentAsync(object): "content": function_response, } ) # extend conversation with function response - await self.interface.function_message(f'Error: {error_msg}') + await self.interface.function_message(f"Error: {error_msg}") return messages, None, True # force a heartbeat to allow agent to handle error # If no failures happened along the way: ... # Step 4: send the info on the function call and function response to GPT - await self.interface.function_message(f'Success: {function_response_string}') + await self.interface.function_message(f"Success: {function_response_string}") messages.append( { "role": "function", @@ -434,25 +457,29 @@ class AgentAsync(object): return False function_name = response_message["function_call"]["name"] - if require_send_message and function_name != 'send_message': + if require_send_message and function_name != "send_message": printd(f"First message function call wasn't send_message: {response_message}") return False - if require_monologue and (not response_message.get("content") or response_message["content"] is None or response_message["content"] == ""): + if require_monologue and ( + not response_message.get("content") or response_message["content"] is None or response_message["content"] == "" + ): printd(f"First message missing internal monologue: {response_message}") return False if response_message.get("content"): ### Extras monologue = response_message.get("content") + def contains_special_characters(s): special_characters = '(){}[]"' return any(char in s for char in special_characters) + if contains_special_characters(monologue): printd(f"First message internal monologue contained special characters: {response_message}") return False # if 'functions' in monologue or 'send_message' in monologue or 'inner thought' in monologue.lower(): - if 'functions' in monologue or 'send_message' in monologue: + if "functions" in monologue or "send_message" in monologue: # Sometimes the syntax won't be correct and internal syntax will leak into message.context printd(f"First message internal monologue contained reserved words: {response_message}") return False @@ -466,12 +493,12 @@ class AgentAsync(object): # Step 0: add user message if user_message is not None: await self.interface.user_message(user_message) - packed_user_message = {'role': 'user', 'content': user_message} + packed_user_message = {"role": "user", "content": user_message} input_message_sequence = self.messages + [packed_user_message] else: input_message_sequence = self.messages - if len(input_message_sequence) > 1 and input_message_sequence[-1]['role'] != 'user': + if len(input_message_sequence) > 1 and input_message_sequence[-1]["role"] != "user": printd(f"WARNING: attempting to run ChatCompletion without user as the last message in the queue") # Step 1: send the conversation and available functions to GPT @@ -479,14 +506,13 @@ class AgentAsync(object): printd(f"This is the first message. Running extra verifier on AI response.") counter = 0 while True: - response = await get_ai_reply_async(model=self.model, message_sequence=input_message_sequence, functions=self.functions) if self.verify_first_message_correctness(response, require_monologue=self.first_message_verify_mono): break counter += 1 if counter > first_message_retry_limit: - raise Exception(f'Hit first message retry limit ({first_message_retry_limit})') + raise Exception(f"Hit first message retry limit ({first_message_retry_limit})") else: response = await get_ai_reply_async(model=self.model, message_sequence=input_message_sequence, functions=self.functions) @@ -500,13 +526,13 @@ class AgentAsync(object): # Add the extra metadata to the assistant response # (e.g. enough metadata to enable recreating the API call) - assert 'api_response' not in all_response_messages[0] - all_response_messages[0]['api_response'] = response_message_copy - assert 'api_args' not in all_response_messages[0] - all_response_messages[0]['api_args'] = { - 'model': self.model, - 'messages': input_message_sequence, - 'functions': self.functions, + assert "api_response" not in all_response_messages[0] + all_response_messages[0]["api_response"] = response_message_copy + assert "api_args" not in all_response_messages[0] + all_response_messages[0]["api_args"] = { + "model": self.model, + "messages": input_message_sequence, + "functions": self.functions, } # Step 4: extend the message history @@ -516,7 +542,7 @@ class AgentAsync(object): all_new_messages = all_response_messages # Check the memory pressure and potentially issue a memory pressure warning - current_total_tokens = response['usage']['total_tokens'] + current_total_tokens = response["usage"]["total_tokens"] active_memory_warning = False if current_total_tokens > MESSAGE_SUMMARY_WARNING_TOKENS: printd(f"WARNING: last response total_tokens ({current_total_tokens}) > {MESSAGE_SUMMARY_WARNING_TOKENS}") @@ -534,7 +560,7 @@ class AgentAsync(object): printd(f"step() failed\nuser_message = {user_message}\nerror = {e}") # If we got a context alert, try trimming the messages length, then try again - if 'maximum context length' in str(e): + if "maximum context length" in str(e): # A separate API call to run a summarizer await self.summarize_messages_inplace() @@ -546,21 +572,21 @@ class AgentAsync(object): async def summarize_messages_inplace(self, cutoff=None): if cutoff is None: - tokens_so_far = 0 # Smart cutoff -- just below the max. + tokens_so_far = 0 # Smart cutoff -- just below the max. cutoff = len(self.messages) - 1 for m in reversed(self.messages): tokens_so_far += count_tokens(str(m), self.model) - if tokens_so_far >= MESSAGE_SUMMARY_WARNING_TOKENS*0.2: + if tokens_so_far >= MESSAGE_SUMMARY_WARNING_TOKENS * 0.2: break cutoff -= 1 - cutoff = min(len(self.messages) - 3, cutoff) # Always keep the last two messages too + cutoff = min(len(self.messages) - 3, cutoff) # Always keep the last two messages too # Try to make an assistant message come after the cutoff try: printd(f"Selected cutoff {cutoff} was a 'user', shifting one...") - if self.messages[cutoff]['role'] == 'user': + if self.messages[cutoff]["role"] == "user": new_cutoff = cutoff + 1 - if self.messages[new_cutoff]['role'] == 'user': + if self.messages[new_cutoff]["role"] == "user": printd(f"Shifted cutoff {new_cutoff} is still a 'user', ignoring...") cutoff = new_cutoff except IndexError: @@ -600,11 +626,11 @@ class AgentAsync(object): while limit is None or step_count < limit: if function_failed: - user_message = get_heartbeat('Function call failed') + user_message = get_heartbeat("Function call failed") new_messages, heartbeat_request, function_failed = await self.step(user_message) step_count += 1 elif heartbeat_request: - user_message = get_heartbeat('AI requested') + user_message = get_heartbeat("AI requested") new_messages, heartbeat_request, function_failed = await self.step(user_message) step_count += 1 else: @@ -638,7 +664,7 @@ class AgentAsync(object): return None async def recall_memory_search(self, query, count=5, page=0): - results, total = await self.persistence_manager.recall_memory.text_search(query, count=count, start=page*count) + results, total = await self.persistence_manager.recall_memory.text_search(query, count=count, start=page * count) num_pages = math.ceil(total / count) - 1 # 0 index if len(results) == 0: results_str = f"No results found." @@ -649,7 +675,7 @@ class AgentAsync(object): return results_str async def recall_memory_search_date(self, start_date, end_date, count=5, page=0): - results, total = await self.persistence_manager.recall_memory.date_search(start_date, end_date, count=count, start=page*count) + results, total = await self.persistence_manager.recall_memory.date_search(start_date, end_date, count=count, start=page * count) num_pages = math.ceil(total / count) - 1 # 0 index if len(results) == 0: results_str = f"No results found." @@ -664,7 +690,7 @@ class AgentAsync(object): return None async def archival_memory_search(self, query, count=5, page=0): - results, total = await self.persistence_manager.archival_memory.search(query, count=count, start=page*count) + results, total = await self.persistence_manager.archival_memory.search(query, count=count, start=page * count) num_pages = math.ceil(total / count) - 1 # 0 index if len(results) == 0: results_str = f"No results found." @@ -683,7 +709,7 @@ class AgentAsync(object): # And record how long the pause should go for self.pause_heartbeats_minutes = int(minutes) - return f'Pausing timed heartbeats for {minutes} min' + return f"Pausing timed heartbeats for {minutes} min" def heartbeat_is_paused(self): """Check if there's a requested pause on timed heartbeats""" @@ -700,8 +726,8 @@ class AgentAsync(object): """Base call to GPT API w/ functions""" message_sequence = [ - {'role': 'system', 'content': MESSAGE_CHATGPT_FUNCTION_SYSTEM_MESSAGE}, - {'role': 'user', 'content': str(message)}, + {"role": "system", "content": MESSAGE_CHATGPT_FUNCTION_SYSTEM_MESSAGE}, + {"role": "user", "content": str(message)}, ] response = await acreate( model=MESSAGE_CHATGPT_FUNCTION_MODEL, diff --git a/memgpt/agent_base.py b/memgpt/agent_base.py index 06442c92..7f132e49 100644 --- a/memgpt/agent_base.py +++ b/memgpt/agent_base.py @@ -2,7 +2,6 @@ from abc import ABC, abstractmethod class AgentAsyncBase(ABC): - @abstractmethod async def step(self, user_message): - pass \ No newline at end of file + pass diff --git a/memgpt/autogen/interface.py b/memgpt/autogen/interface.py index 4f01fd7a..f3776790 100644 --- a/memgpt/autogen/interface.py +++ b/memgpt/autogen/interface.py @@ -68,41 +68,25 @@ class AutoGenInterface(object): print(f"inner thoughts :: {msg}") if not self.show_inner_thoughts: return - message = ( - f"\x1B[3m{Fore.LIGHTBLACK_EX}💭 {msg}{Style.RESET_ALL}" - if self.fancy - else f"[inner thoughts] {msg}" - ) + message = f"\x1B[3m{Fore.LIGHTBLACK_EX}💭 {msg}{Style.RESET_ALL}" if self.fancy else f"[inner thoughts] {msg}" self.message_list.append(message) async def assistant_message(self, msg): if self.debug: print(f"assistant :: {msg}") - message = ( - f"{Fore.YELLOW}{Style.BRIGHT}🤖 {Fore.YELLOW}{msg}{Style.RESET_ALL}" - if self.fancy - else msg - ) + message = f"{Fore.YELLOW}{Style.BRIGHT}🤖 {Fore.YELLOW}{msg}{Style.RESET_ALL}" if self.fancy else msg self.message_list.append(message) async def memory_message(self, msg): if self.debug: print(f"memory :: {msg}") - message = ( - f"{Fore.LIGHTMAGENTA_EX}{Style.BRIGHT}🧠 {Fore.LIGHTMAGENTA_EX}{msg}{Style.RESET_ALL}" - if self.fancy - else f"[memory] {msg}" - ) + message = f"{Fore.LIGHTMAGENTA_EX}{Style.BRIGHT}🧠 {Fore.LIGHTMAGENTA_EX}{msg}{Style.RESET_ALL}" if self.fancy else f"[memory] {msg}" self.message_list.append(message) async def system_message(self, msg): if self.debug: print(f"system :: {msg}") - message = ( - f"{Fore.MAGENTA}{Style.BRIGHT}🖥️ [system] {Fore.MAGENTA}{msg}{Style.RESET_ALL}" - if self.fancy - else f"[system] {msg}" - ) + message = f"{Fore.MAGENTA}{Style.BRIGHT}🖥️ [system] {Fore.MAGENTA}{msg}{Style.RESET_ALL}" if self.fancy else f"[system] {msg}" self.message_list.append(message) async def user_message(self, msg, raw=False): @@ -113,11 +97,7 @@ class AutoGenInterface(object): if isinstance(msg, str): if raw: - message = ( - f"{Fore.GREEN}{Style.BRIGHT}🧑 {Fore.GREEN}{msg}{Style.RESET_ALL}" - if self.fancy - else f"[user] {msg}" - ) + message = f"{Fore.GREEN}{Style.BRIGHT}🧑 {Fore.GREEN}{msg}{Style.RESET_ALL}" if self.fancy else f"[user] {msg}" self.message_list.append(message) return else: @@ -125,42 +105,24 @@ class AutoGenInterface(object): msg_json = json.loads(msg) except: print(f"Warning: failed to parse user message into json") - message = ( - f"{Fore.GREEN}{Style.BRIGHT}🧑 {Fore.GREEN}{msg}{Style.RESET_ALL}" - if self.fancy - else f"[user] {msg}" - ) + message = f"{Fore.GREEN}{Style.BRIGHT}🧑 {Fore.GREEN}{msg}{Style.RESET_ALL}" if self.fancy else f"[user] {msg}" self.message_list.append(message) return if msg_json["type"] == "user_message": msg_json.pop("type") - message = ( - f"{Fore.GREEN}{Style.BRIGHT}🧑 {Fore.GREEN}{msg_json}{Style.RESET_ALL}" - if self.fancy - else f"[user] {msg}" - ) + message = f"{Fore.GREEN}{Style.BRIGHT}🧑 {Fore.GREEN}{msg_json}{Style.RESET_ALL}" if self.fancy else f"[user] {msg}" elif msg_json["type"] == "heartbeat": if True or DEBUG: msg_json.pop("type") message = ( - f"{Fore.GREEN}{Style.BRIGHT}💓 {Fore.GREEN}{msg_json}{Style.RESET_ALL}" - if self.fancy - else f"[system heartbeat] {msg}" + f"{Fore.GREEN}{Style.BRIGHT}💓 {Fore.GREEN}{msg_json}{Style.RESET_ALL}" if self.fancy else f"[system heartbeat] {msg}" ) elif msg_json["type"] == "system_message": msg_json.pop("type") - message = ( - f"{Fore.GREEN}{Style.BRIGHT}🖥️ {Fore.GREEN}{msg_json}{Style.RESET_ALL}" - if self.fancy - else f"[system] {msg}" - ) + message = f"{Fore.GREEN}{Style.BRIGHT}🖥️ {Fore.GREEN}{msg_json}{Style.RESET_ALL}" if self.fancy else f"[system] {msg}" else: - message = ( - f"{Fore.GREEN}{Style.BRIGHT}🧑 {Fore.GREEN}{msg_json}{Style.RESET_ALL}" - if self.fancy - else f"[user] {msg}" - ) + message = f"{Fore.GREEN}{Style.BRIGHT}🧑 {Fore.GREEN}{msg_json}{Style.RESET_ALL}" if self.fancy else f"[user] {msg}" self.message_list.append(message) @@ -171,31 +133,19 @@ class AutoGenInterface(object): return if isinstance(msg, dict): - message = ( - f"{Fore.RED}{Style.BRIGHT}⚡ [function] {Fore.RED}{msg}{Style.RESET_ALL}" - ) + message = f"{Fore.RED}{Style.BRIGHT}⚡ [function] {Fore.RED}{msg}{Style.RESET_ALL}" self.message_list.append(message) return if msg.startswith("Success: "): - message = ( - f"{Fore.RED}{Style.BRIGHT}⚡🟢 [function] {Fore.RED}{msg}{Style.RESET_ALL}" - if self.fancy - else f"[function - OK] {msg}" - ) + message = f"{Fore.RED}{Style.BRIGHT}⚡🟢 [function] {Fore.RED}{msg}{Style.RESET_ALL}" if self.fancy else f"[function - OK] {msg}" elif msg.startswith("Error: "): message = ( - f"{Fore.RED}{Style.BRIGHT}⚡🔴 [function] {Fore.RED}{msg}{Style.RESET_ALL}" - if self.fancy - else f"[function - error] {msg}" + f"{Fore.RED}{Style.BRIGHT}⚡🔴 [function] {Fore.RED}{msg}{Style.RESET_ALL}" if self.fancy else f"[function - error] {msg}" ) elif msg.startswith("Running "): if DEBUG: - message = ( - f"{Fore.RED}{Style.BRIGHT}⚡ [function] {Fore.RED}{msg}{Style.RESET_ALL}" - if self.fancy - else f"[function] {msg}" - ) + message = f"{Fore.RED}{Style.BRIGHT}⚡ [function] {Fore.RED}{msg}{Style.RESET_ALL}" if self.fancy else f"[function] {msg}" else: if "memory" in msg: match = re.search(r"Running (\w+)\((.*)\)", msg) @@ -227,35 +177,25 @@ class AutoGenInterface(object): else: print(f"Warning: did not recognize function message") message = ( - f"{Fore.RED}{Style.BRIGHT}⚡ [function] {Fore.RED}{msg}{Style.RESET_ALL}" - if self.fancy - else f"[function] {msg}" + f"{Fore.RED}{Style.BRIGHT}⚡ [function] {Fore.RED}{msg}{Style.RESET_ALL}" if self.fancy else f"[function] {msg}" ) elif "send_message" in msg: # ignore in debug mode message = None else: message = ( - f"{Fore.RED}{Style.BRIGHT}⚡ [function] {Fore.RED}{msg}{Style.RESET_ALL}" - if self.fancy - else f"[function] {msg}" + f"{Fore.RED}{Style.BRIGHT}⚡ [function] {Fore.RED}{msg}{Style.RESET_ALL}" if self.fancy else f"[function] {msg}" ) else: try: msg_dict = json.loads(msg) if "status" in msg_dict and msg_dict["status"] == "OK": message = ( - f"{Fore.GREEN}{Style.BRIGHT}⚡ [function] {Fore.GREEN}{msg}{Style.RESET_ALL}" - if self.fancy - else f"[function] {msg}" + f"{Fore.GREEN}{Style.BRIGHT}⚡ [function] {Fore.GREEN}{msg}{Style.RESET_ALL}" if self.fancy else f"[function] {msg}" ) except Exception: print(f"Warning: did not recognize function message {type(msg)} {msg}") - message = ( - f"{Fore.RED}{Style.BRIGHT}⚡ [function] {Fore.RED}{msg}{Style.RESET_ALL}" - if self.fancy - else f"[function] {msg}" - ) + message = f"{Fore.RED}{Style.BRIGHT}⚡ [function] {Fore.RED}{msg}{Style.RESET_ALL}" if self.fancy else f"[function] {msg}" if message: self.message_list.append(message) diff --git a/memgpt/autogen/memgpt_agent.py b/memgpt/autogen/memgpt_agent.py index 91adf5d8..6e9db5a2 100644 --- a/memgpt/autogen/memgpt_agent.py +++ b/memgpt/autogen/memgpt_agent.py @@ -55,11 +55,7 @@ def create_autogen_memgpt_agent( ``` """ interface = AutoGenInterface(**interface_kwargs) if interface is None else interface - persistence_manager = ( - InMemoryStateManager(**persistence_manager_kwargs) - if persistence_manager is None - else persistence_manager - ) + persistence_manager = InMemoryStateManager(**persistence_manager_kwargs) if persistence_manager is None else persistence_manager memgpt_agent = presets.use_preset( preset, @@ -89,9 +85,7 @@ class MemGPTAgent(ConversableAgent): self.agent = agent self.skip_verify = skip_verify self.concat_other_agent_messages = concat_other_agent_messages - self.register_reply( - [Agent, None], MemGPTAgent._a_generate_reply_for_user_message - ) + self.register_reply([Agent, None], MemGPTAgent._a_generate_reply_for_user_message) self.register_reply([Agent, None], MemGPTAgent._generate_reply_for_user_message) self.messages_processed_up_to_idx = 0 @@ -119,11 +113,7 @@ class MemGPTAgent(ConversableAgent): sender: Optional[Agent] = None, config: Optional[Any] = None, ) -> Tuple[bool, Union[str, Dict, None]]: - return asyncio.run( - self._a_generate_reply_for_user_message( - messages=messages, sender=sender, config=config - ) - ) + return asyncio.run(self._a_generate_reply_for_user_message(messages=messages, sender=sender, config=config)) async def _a_generate_reply_for_user_message( self, @@ -137,9 +127,7 @@ class MemGPTAgent(ConversableAgent): if len(new_messages) > 1: if self.concat_other_agent_messages: # Combine all the other messages into one message - user_message = "\n".join( - [self.format_other_agent_message(m) for m in new_messages] - ) + user_message = "\n".join([self.format_other_agent_message(m) for m in new_messages]) else: # Extend the MemGPT message list with multiple 'user' messages, then push the last one with agent.step() self.agent.messages.extend(new_messages[:-1]) @@ -157,16 +145,12 @@ class MemGPTAgent(ConversableAgent): heartbeat_request, function_failed, token_warning, - ) = await self.agent.step( - user_message, first_message=False, skip_verify=self.skip_verify - ) + ) = await self.agent.step(user_message, first_message=False, skip_verify=self.skip_verify) # Skip user inputs if there's a memory warning, function execution failed, or the agent asked for control if token_warning: user_message = system.get_token_limit_warning() elif function_failed: - user_message = system.get_heartbeat( - constants.FUNC_FAILED_HEARTBEAT_MESSAGE - ) + user_message = system.get_heartbeat(constants.FUNC_FAILED_HEARTBEAT_MESSAGE) elif heartbeat_request: user_message = system.get_heartbeat(constants.REQ_HEARTBEAT_MESSAGE) else: diff --git a/memgpt/config.py b/memgpt/config.py index d22ee281..d9a8aa93 100644 --- a/memgpt/config.py +++ b/memgpt/config.py @@ -24,6 +24,7 @@ model_choices = [ ), ] + class Config: personas_dir = os.path.join("memgpt", "personas", "examples") custom_personas_dir = os.path.join(MEMGPT_DIR, "personas") @@ -78,12 +79,8 @@ class Config: cfg = Config.get_most_recent_config() use_cfg = False if cfg: - print( - f"{Style.BRIGHT}{Fore.MAGENTA}⚙️ Found saved config file.{Style.RESET_ALL}" - ) - use_cfg = await questionary.confirm( - f"Use most recent config file '{cfg}'?" - ).ask_async() + print(f"{Style.BRIGHT}{Fore.MAGENTA}⚙️ Found saved config file.{Style.RESET_ALL}") + use_cfg = await questionary.confirm(f"Use most recent config file '{cfg}'?").ask_async() if use_cfg: self.config_file = cfg @@ -104,9 +101,7 @@ class Config: return self # print("No settings file found, configuring MemGPT...") - print( - f"{Style.BRIGHT}{Fore.MAGENTA}⚙️ No settings file found, configuring MemGPT...{Style.RESET_ALL}" - ) + print(f"{Style.BRIGHT}{Fore.MAGENTA}⚙️ No settings file found, configuring MemGPT...{Style.RESET_ALL}") self.model = await questionary.select( "Which model would you like to use?", @@ -126,9 +121,7 @@ class Config: ).ask_async() self.archival_storage_index = None - self.preload_archival = await questionary.confirm( - "Would you like to preload anything into MemGPT's archival memory?" - ).ask_async() + self.preload_archival = await questionary.confirm("Would you like to preload anything into MemGPT's archival memory?").ask_async() if self.preload_archival: self.load_type = await questionary.select( "What would you like to load?", @@ -139,19 +132,13 @@ class Config: ], ).ask_async() if self.load_type == "folder" or self.load_type == "sql": - archival_storage_path = await questionary.path( - "Please enter the folder or file (tab for autocomplete):" - ).ask_async() + archival_storage_path = await questionary.path("Please enter the folder or file (tab for autocomplete):").ask_async() if os.path.isdir(archival_storage_path): - self.archival_storage_files = os.path.join( - archival_storage_path, "*" - ) + self.archival_storage_files = os.path.join(archival_storage_path, "*") else: self.archival_storage_files = archival_storage_path else: - self.archival_storage_files = await questionary.path( - "Please enter the glob pattern (tab for autocomplete):" - ).ask_async() + self.archival_storage_files = await questionary.path("Please enter the glob pattern (tab for autocomplete):").ask_async() self.compute_embeddings = await questionary.confirm( "Would you like to compute embeddings over these files to enable embeddings search?" ).ask_async() @@ -167,19 +154,11 @@ class Config: "⛔️ Embeddings on a non-OpenAI endpoint are not yet supported, falling back to substring matching search." ) else: - self.archival_storage_index = ( - await utils.prepare_archival_index_from_files_compute_embeddings( - self.archival_storage_files - ) - ) + self.archival_storage_index = await utils.prepare_archival_index_from_files_compute_embeddings(self.archival_storage_files) if self.compute_embeddings and self.archival_storage_index: - self.index, self.archival_database = utils.prepare_archival_index( - self.archival_storage_index - ) + self.index, self.archival_database = utils.prepare_archival_index(self.archival_storage_index) else: - self.archival_database = utils.prepare_archival_index_from_files( - self.archival_storage_files - ) + self.archival_database = utils.prepare_archival_index_from_files(self.archival_storage_files) def to_dict(self): return { @@ -216,15 +195,11 @@ class Config: configs_dir = Config.configs_dir os.makedirs(configs_dir, exist_ok=True) if self.config_file is None: - filename = os.path.join( - configs_dir, utils.get_local_time().replace(" ", "_").replace(":", "_") - ) + filename = os.path.join(configs_dir, utils.get_local_time().replace(" ", "_").replace(":", "_")) self.config_file = f"{filename}.json" with open(self.config_file, "wt") as f: json.dump(self.to_dict(), f, indent=4) - print( - f"{Style.BRIGHT}{Fore.MAGENTA}⚙️ Saved config file to {self.config_file}.{Style.RESET_ALL}" - ) + print(f"{Style.BRIGHT}{Fore.MAGENTA}⚙️ Saved config file to {self.config_file}.{Style.RESET_ALL}") @staticmethod def is_valid_config_file(file: str): @@ -233,9 +208,7 @@ class Config: cfg.load_config(file) except Exception: return False - return ( - cfg.memgpt_persona is not None and cfg.human_persona is not None - ) # TODO: more validation for configs + return cfg.memgpt_persona is not None and cfg.human_persona is not None # TODO: more validation for configs @staticmethod def get_memgpt_personas(): @@ -330,8 +303,7 @@ class Config: files = [ os.path.join(configs_dir, f) for f in os.listdir(configs_dir) - if os.path.isfile(os.path.join(configs_dir, f)) - and Config.is_valid_config_file(os.path.join(configs_dir, f)) + if os.path.isfile(os.path.join(configs_dir, f)) and Config.is_valid_config_file(os.path.join(configs_dir, f)) ] # Return the file with the most recent modification time if len(files) == 0: diff --git a/memgpt/connectors/connector.py b/memgpt/connectors/connector.py index 549c2d7d..4b4c399a 100644 --- a/memgpt/connectors/connector.py +++ b/memgpt/connectors/connector.py @@ -1,7 +1,7 @@ -""" +""" This file contains functions for loading data into MemGPT's archival storage. -Data can be loaded with the following command, once a load function is defined: +Data can be loaded with the following command, once a load function is defined: ``` memgpt load --name [ADDITIONAL ARGS] ``` @@ -18,14 +18,13 @@ from memgpt.utils import estimate_openai_cost, get_index, save_index app = typer.Typer() - @app.command("directory") def load_directory( name: str = typer.Option(help="Name of dataset to load."), input_dir: str = typer.Option(None, help="Path to directory containing dataset."), input_files: List[str] = typer.Option(None, help="List of paths to files containing dataset."), recursive: bool = typer.Option(False, help="Recursively search for files in directory."), -): +): from llama_index import SimpleDirectoryReader if recursive: @@ -35,34 +34,35 @@ def load_directory( recursive=True, ) else: - reader = SimpleDirectoryReader( - input_files=input_files - ) + reader = SimpleDirectoryReader(input_files=input_files) # load docs print("Loading data...") docs = reader.load_data() - # embed docs + # embed docs print("Indexing documents...") index = get_index(name, docs) # save connector information into .memgpt metadata file save_index(index, name) + @app.command("webpage") def load_webpage( name: str = typer.Option(help="Name of dataset to load."), urls: List[str] = typer.Option(None, help="List of urls to load."), -): +): from llama_index import SimpleWebPageReader + docs = SimpleWebPageReader(html_to_text=True).load_data(urls) - # embed docs + # embed docs print("Indexing documents...") index = get_index(docs) # save connector information into .memgpt metadata file save_index(index, name) + @app.command("database") def load_database( name: str = typer.Option(help="Name of dataset to load."), @@ -76,12 +76,14 @@ def load_database( dbname: str = typer.Option(None, help="Database name."), ): from llama_index.readers.database import DatabaseReader + print(dump_path, scheme) - if dump_path is not None: + if dump_path is not None: # read from database dump file from sqlalchemy import create_engine, MetaData - engine = create_engine(f'sqlite:///{dump_path}') + + engine = create_engine(f"sqlite:///{dump_path}") db = DatabaseReader(engine=engine) else: @@ -104,8 +106,6 @@ def load_database( # load data docs = db.load_data(query=query) - + index = get_index(name, docs) save_index(index, name) - - diff --git a/memgpt/constants.py b/memgpt/constants.py index bd83f7fc..aae904c4 100644 --- a/memgpt/constants.py +++ b/memgpt/constants.py @@ -7,9 +7,7 @@ DEFAULT_MEMGPT_MODEL = "gpt-4" FIRST_MESSAGE_ATTEMPTS = 10 INITIAL_BOOT_MESSAGE = "Boot sequence complete. Persona activated." -INITIAL_BOOT_MESSAGE_SEND_MESSAGE_THOUGHT = ( - "Bootup sequence complete. Persona activated. Testing messaging functionality." -) +INITIAL_BOOT_MESSAGE_SEND_MESSAGE_THOUGHT = "Bootup sequence complete. Persona activated. Testing messaging functionality." STARTUP_QUOTES = [ "I think, therefore I am.", "All those moments will be lost in time, like tears in rain.", @@ -28,9 +26,7 @@ CORE_MEMORY_HUMAN_CHAR_LIMIT = 2000 MAX_PAUSE_HEARTBEATS = 360 # in min MESSAGE_CHATGPT_FUNCTION_MODEL = "gpt-3.5-turbo" -MESSAGE_CHATGPT_FUNCTION_SYSTEM_MESSAGE = ( - "You are a helpful assistant. Keep your responses short and concise." -) +MESSAGE_CHATGPT_FUNCTION_SYSTEM_MESSAGE = "You are a helpful assistant. Keep your responses short and concise." #### Functions related diff --git a/memgpt/interface.py b/memgpt/interface.py index 0e66af08..b9b95be6 100644 --- a/memgpt/interface.py +++ b/memgpt/interface.py @@ -29,15 +29,11 @@ async def assistant_message(msg): async def memory_message(msg): - print( - f"{Fore.LIGHTMAGENTA_EX}{Style.BRIGHT}🧠 {Fore.LIGHTMAGENTA_EX}{msg}{Style.RESET_ALL}" - ) + print(f"{Fore.LIGHTMAGENTA_EX}{Style.BRIGHT}🧠 {Fore.LIGHTMAGENTA_EX}{msg}{Style.RESET_ALL}") async def system_message(msg): - printd( - f"{Fore.MAGENTA}{Style.BRIGHT}🖥️ [system] {Fore.MAGENTA}{msg}{Style.RESET_ALL}" - ) + printd(f"{Fore.MAGENTA}{Style.BRIGHT}🖥️ [system] {Fore.MAGENTA}{msg}{Style.RESET_ALL}") async def user_message(msg, raw=False): @@ -50,9 +46,7 @@ async def user_message(msg, raw=False): msg_json = json.loads(msg) except: printd(f"Warning: failed to parse user message into json") - printd( - f"{Fore.GREEN}{Style.BRIGHT}🧑 {Fore.GREEN}{msg}{Style.RESET_ALL}" - ) + printd(f"{Fore.GREEN}{Style.BRIGHT}🧑 {Fore.GREEN}{msg}{Style.RESET_ALL}") return if msg_json["type"] == "user_message": @@ -61,9 +55,7 @@ async def user_message(msg, raw=False): elif msg_json["type"] == "heartbeat": if DEBUG: msg_json.pop("type") - printd( - f"{Fore.GREEN}{Style.BRIGHT}💓 {Fore.GREEN}{msg_json}{Style.RESET_ALL}" - ) + printd(f"{Fore.GREEN}{Style.BRIGHT}💓 {Fore.GREEN}{msg_json}{Style.RESET_ALL}") elif msg_json["type"] == "system_message": msg_json.pop("type") printd(f"{Fore.GREEN}{Style.BRIGHT}🖥️ {Fore.GREEN}{msg_json}{Style.RESET_ALL}") @@ -77,33 +69,23 @@ async def function_message(msg): return if msg.startswith("Success: "): - printd( - f"{Fore.RED}{Style.BRIGHT}⚡🟢 [function] {Fore.RED}{msg}{Style.RESET_ALL}" - ) + printd(f"{Fore.RED}{Style.BRIGHT}⚡🟢 [function] {Fore.RED}{msg}{Style.RESET_ALL}") elif msg.startswith("Error: "): - printd( - f"{Fore.RED}{Style.BRIGHT}⚡🔴 [function] {Fore.RED}{msg}{Style.RESET_ALL}" - ) + printd(f"{Fore.RED}{Style.BRIGHT}⚡🔴 [function] {Fore.RED}{msg}{Style.RESET_ALL}") elif msg.startswith("Running "): if DEBUG: - printd( - f"{Fore.RED}{Style.BRIGHT}⚡ [function] {Fore.RED}{msg}{Style.RESET_ALL}" - ) + printd(f"{Fore.RED}{Style.BRIGHT}⚡ [function] {Fore.RED}{msg}{Style.RESET_ALL}") else: if "memory" in msg: match = re.search(r"Running (\w+)\((.*)\)", msg) if match: function_name = match.group(1) function_args = match.group(2) - print( - f"{Fore.RED}{Style.BRIGHT}⚡🧠 [function] {Fore.RED}updating memory with {function_name}{Style.RESET_ALL}:" - ) + print(f"{Fore.RED}{Style.BRIGHT}⚡🧠 [function] {Fore.RED}updating memory with {function_name}{Style.RESET_ALL}:") try: msg_dict = eval(function_args) if function_name == "archival_memory_search": - print( - f'{Fore.RED}\tquery: {msg_dict["query"]}, page: {msg_dict["page"]}' - ) + print(f'{Fore.RED}\tquery: {msg_dict["query"]}, page: {msg_dict["page"]}') else: print( f'{Fore.RED}{Style.BRIGHT}\t{Fore.RED} {msg_dict["old_content"]}\n\t{Fore.GREEN}→ {msg_dict["new_content"]}' @@ -114,28 +96,20 @@ async def function_message(msg): pass else: printd(f"Warning: did not recognize function message") - printd( - f"{Fore.RED}{Style.BRIGHT}⚡ [function] {Fore.RED}{msg}{Style.RESET_ALL}" - ) + printd(f"{Fore.RED}{Style.BRIGHT}⚡ [function] {Fore.RED}{msg}{Style.RESET_ALL}") elif "send_message" in msg: # ignore in debug mode pass else: - printd( - f"{Fore.RED}{Style.BRIGHT}⚡ [function] {Fore.RED}{msg}{Style.RESET_ALL}" - ) + printd(f"{Fore.RED}{Style.BRIGHT}⚡ [function] {Fore.RED}{msg}{Style.RESET_ALL}") else: try: msg_dict = json.loads(msg) if "status" in msg_dict and msg_dict["status"] == "OK": - printd( - f"{Fore.GREEN}{Style.BRIGHT}⚡ [function] {Fore.GREEN}{msg}{Style.RESET_ALL}" - ) + printd(f"{Fore.GREEN}{Style.BRIGHT}⚡ [function] {Fore.GREEN}{msg}{Style.RESET_ALL}") except Exception: printd(f"Warning: did not recognize function message {type(msg)} {msg}") - printd( - f"{Fore.RED}{Style.BRIGHT}⚡ [function] {Fore.RED}{msg}{Style.RESET_ALL}" - ) + printd(f"{Fore.RED}{Style.BRIGHT}⚡ [function] {Fore.RED}{msg}{Style.RESET_ALL}") async def print_messages(message_sequence): diff --git a/memgpt/local_llm/llm_chat_completion_wrappers/airoboros.py b/memgpt/local_llm/llm_chat_completion_wrappers/airoboros.py index 60f8ee6b..0b2100fd 100644 --- a/memgpt/local_llm/llm_chat_completion_wrappers/airoboros.py +++ b/memgpt/local_llm/llm_chat_completion_wrappers/airoboros.py @@ -190,9 +190,7 @@ class Airoboros21Wrapper(LLMChatCompletionWrapper): function_parameters = function_json_output["params"] if self.clean_func_args: - function_name, function_parameters = self.clean_function_args( - function_name, function_parameters - ) + function_name, function_parameters = self.clean_function_args(function_name, function_parameters) message = { "role": "assistant", @@ -275,9 +273,7 @@ class Airoboros21InnerMonologueWrapper(Airoboros21Wrapper): func_str += f"\n description: {schema['description']}" func_str += f"\n params:" if add_inner_thoughts: - func_str += ( - f"\n inner_thoughts: Deep inner monologue private to you only." - ) + func_str += f"\n inner_thoughts: Deep inner monologue private to you only." for param_k, param_v in schema["parameters"]["properties"].items(): # TODO we're ignoring type func_str += f"\n {param_k}: {param_v['description']}" diff --git a/memgpt/local_llm/llm_chat_completion_wrappers/dolphin.py b/memgpt/local_llm/llm_chat_completion_wrappers/dolphin.py index 0ce5d4b1..40d98579 100644 --- a/memgpt/local_llm/llm_chat_completion_wrappers/dolphin.py +++ b/memgpt/local_llm/llm_chat_completion_wrappers/dolphin.py @@ -152,9 +152,7 @@ class Dolphin21MistralWrapper(LLMChatCompletionWrapper): try: content_json = json.loads(message["content"]) content_simple = content_json["message"] - prompt += ( - f"\n{IM_START_TOKEN}user\n{content_simple}{IM_END_TOKEN}" - ) + prompt += f"\n{IM_START_TOKEN}user\n{content_simple}{IM_END_TOKEN}" # prompt += f"\nUSER: {content_simple}" except: prompt += f"\n{IM_START_TOKEN}user\n{message['content']}{IM_END_TOKEN}" @@ -227,9 +225,7 @@ class Dolphin21MistralWrapper(LLMChatCompletionWrapper): function_parameters = function_json_output["params"] if self.clean_func_args: - function_name, function_parameters = self.clean_function_args( - function_name, function_parameters - ) + function_name, function_parameters = self.clean_function_args(function_name, function_parameters) message = { "role": "assistant", diff --git a/memgpt/main.py b/memgpt/main.py index 93df2329..3df4ee0e 100644 --- a/memgpt/main.py +++ b/memgpt/main.py @@ -84,12 +84,8 @@ def load(memgpt_agent, filename): print(f"Loading {filename} failed with: {e}") else: # Load the latest file - print( - f"/load warning: no checkpoint specified, loading most recent checkpoint instead" - ) - json_files = glob.glob( - "saved_state/*.json" - ) # This will list all .json files in the current directory. + print(f"/load warning: no checkpoint specified, loading most recent checkpoint instead") + json_files = glob.glob("saved_state/*.json") # This will list all .json files in the current directory. # Check if there are any json files. if not json_files: @@ -111,27 +107,17 @@ def load(memgpt_agent, filename): ) # TODO(fixme):for different types of persistence managers that require different load/save methods print(f"Loaded persistence manager from {filename}") except Exception as e: - print( - f"/load warning: loading persistence manager from {filename} failed with: {e}" - ) + print(f"/load warning: loading persistence manager from {filename} failed with: {e}") @app.command() def run( persona: str = typer.Option(None, help="Specify persona"), human: str = typer.Option(None, help="Specify human"), - model: str = typer.Option( - constants.DEFAULT_MEMGPT_MODEL, help="Specify the LLM model" - ), - first: bool = typer.Option( - False, "--first", help="Use --first to send the first message in the sequence" - ), - debug: bool = typer.Option( - False, "--debug", help="Use --debug to enable debugging output" - ), - no_verify: bool = typer.Option( - False, "--no_verify", help="Bypass message verification" - ), + model: str = typer.Option(constants.DEFAULT_MEMGPT_MODEL, help="Specify the LLM model"), + first: bool = typer.Option(False, "--first", help="Use --first to send the first message in the sequence"), + debug: bool = typer.Option(False, "--debug", help="Use --debug to enable debugging output"), + no_verify: bool = typer.Option(False, "--no_verify", help="Bypass message verification"), archival_storage_faiss_path: str = typer.Option( "", "--archival_storage_faiss_path", @@ -201,9 +187,7 @@ async def main( else: azure_vars = get_set_azure_env_vars() if len(azure_vars) > 0: - print( - f"Error: Environment variables {', '.join([x[0] for x in azure_vars])} should not be set if --use_azure_openai is False" - ) + print(f"Error: Environment variables {', '.join([x[0] for x in azure_vars])} should not be set if --use_azure_openai is False") return if any( @@ -296,23 +280,17 @@ async def main( else: cfg = await Config.config_init() - memgpt.interface.important_message( - "Running... [exit by typing '/exit', list available commands with '/help']" - ) + memgpt.interface.important_message("Running... [exit by typing '/exit', list available commands with '/help']") if cfg.model != constants.DEFAULT_MEMGPT_MODEL: memgpt.interface.warning_message( f"⛔️ Warning - you are running MemGPT with {cfg.model}, which is not officially supported (yet). Expect bugs!" ) if cfg.index: - persistence_manager = InMemoryStateManagerWithFaiss( - cfg.index, cfg.archival_database - ) + persistence_manager = InMemoryStateManagerWithFaiss(cfg.index, cfg.archival_database) elif cfg.archival_storage_files: print(f"Preloaded {len(cfg.archival_database)} chunks into archival memory.") - persistence_manager = InMemoryStateManagerWithPreloadedArchivalMemory( - cfg.archival_database - ) + persistence_manager = InMemoryStateManagerWithPreloadedArchivalMemory(cfg.archival_database) else: persistence_manager = InMemoryStateManager() @@ -356,9 +334,7 @@ async def main( print(f"Database loaded into archival memory.") if cfg.agent_save_file: - load_save_file = await questionary.confirm( - f"Load in saved agent '{cfg.agent_save_file}'?" - ).ask_async() + load_save_file = await questionary.confirm(f"Load in saved agent '{cfg.agent_save_file}'?").ask_async() if load_save_file: load(memgpt_agent, cfg.agent_save_file) @@ -367,9 +343,7 @@ async def main( return if not USER_GOES_FIRST: - console.input( - "[bold cyan]Hit enter to begin (will request first MemGPT message)[/bold cyan]" - ) + console.input("[bold cyan]Hit enter to begin (will request first MemGPT message)[/bold cyan]") clear_line() print() @@ -405,9 +379,7 @@ async def main( break elif user_input.lower() == "/savechat": - filename = ( - utils.get_local_time().replace(" ", "_").replace(":", "_") - ) + filename = utils.get_local_time().replace(" ", "_").replace(":", "_") filename = f"{filename}.pkl" directory = os.path.join(MEMGPT_DIR, "saved_chats") try: @@ -424,9 +396,7 @@ async def main( save(memgpt_agent=memgpt_agent, cfg=cfg) continue - elif user_input.lower() == "/load" or user_input.lower().startswith( - "/load " - ): + elif user_input.lower() == "/load" or user_input.lower().startswith("/load "): command = user_input.strip().split() filename = command[1] if len(command) > 1 else None load(memgpt_agent=memgpt_agent, filename=filename) @@ -459,16 +429,10 @@ async def main( print(f"Updated model to:\n{str(memgpt_agent.model)}") continue - elif user_input.lower() == "/pop" or user_input.lower().startswith( - "/pop " - ): + elif user_input.lower() == "/pop" or user_input.lower().startswith("/pop "): # Check if there's an additional argument that's an integer command = user_input.strip().split() - amount = ( - int(command[1]) - if len(command) > 1 and command[1].isdigit() - else 2 - ) + amount = int(command[1]) if len(command) > 1 and command[1].isdigit() else 2 print(f"Popping last {amount} messages from stack") for _ in range(min(amount, len(memgpt_agent.messages))): memgpt_agent.messages.pop() @@ -513,18 +477,14 @@ async def main( heartbeat_request, function_failed, token_warning, - ) = await memgpt_agent.step( - user_message, first_message=False, skip_verify=no_verify - ) + ) = await memgpt_agent.step(user_message, first_message=False, skip_verify=no_verify) # Skip user inputs if there's a memory warning, function execution failed, or the agent asked for control if token_warning: user_message = system.get_token_limit_warning() skip_next_user_input = True elif function_failed: - user_message = system.get_heartbeat( - constants.FUNC_FAILED_HEARTBEAT_MESSAGE - ) + user_message = system.get_heartbeat(constants.FUNC_FAILED_HEARTBEAT_MESSAGE) skip_next_user_input = True elif heartbeat_request: user_message = system.get_heartbeat(constants.REQ_HEARTBEAT_MESSAGE) diff --git a/memgpt/memory.py b/memgpt/memory.py index f4fa09ec..157255b7 100644 --- a/memgpt/memory.py +++ b/memgpt/memory.py @@ -40,20 +40,17 @@ class CoreMemory(object): self.archival_memory_exists = archival_memory_exists def __repr__(self) -> str: - return \ - f"\n### CORE MEMORY ###" + \ - f"\n=== Persona ===\n{self.persona}" + \ - f"\n\n=== Human ===\n{self.human}" + return f"\n### CORE MEMORY ###" + f"\n=== Persona ===\n{self.persona}" + f"\n\n=== Human ===\n{self.human}" def to_dict(self): return { - 'persona': self.persona, - 'human': self.human, + "persona": self.persona, + "human": self.human, } @classmethod def load(cls, state): - return cls(state['persona'], state['human']) + return cls(state["persona"], state["human"]) def edit_persona(self, new_persona): if self.persona_char_limit and len(new_persona) > self.persona_char_limit: @@ -76,53 +73,55 @@ class CoreMemory(object): return len(self.human) def edit(self, field, content): - if field == 'persona': + if field == "persona": return self.edit_persona(content) - elif field == 'human': + elif field == "human": return self.edit_human(content) else: raise KeyError - def edit_append(self, field, content, sep='\n'): - if field == 'persona': + def edit_append(self, field, content, sep="\n"): + if field == "persona": new_content = self.persona + sep + content return self.edit_persona(new_content) - elif field == 'human': + elif field == "human": new_content = self.human + sep + content return self.edit_human(new_content) else: raise KeyError def edit_replace(self, field, old_content, new_content): - if field == 'persona': + if field == "persona": if old_content in self.persona: new_persona = self.persona.replace(old_content, new_content) return self.edit_persona(new_persona) else: - raise ValueError('Content not found in persona (make sure to use exact string)') - elif field == 'human': + raise ValueError("Content not found in persona (make sure to use exact string)") + elif field == "human": if old_content in self.human: new_human = self.human.replace(old_content, new_content) return self.edit_human(new_human) else: - raise ValueError('Content not found in human (make sure to use exact string)') + raise ValueError("Content not found in human (make sure to use exact string)") else: raise KeyError async def summarize_messages( - model, - message_sequence_to_summarize, - ): + model, + message_sequence_to_summarize, +): """Summarize a message sequence using GPT""" summary_prompt = SUMMARY_PROMPT_SYSTEM summary_input = str(message_sequence_to_summarize) summary_input_tkns = count_tokens(summary_input, model) if summary_input_tkns > MESSAGE_SUMMARY_WARNING_TOKENS: - trunc_ratio = (MESSAGE_SUMMARY_WARNING_TOKENS / summary_input_tkns) * 0.8 # For good measure... + trunc_ratio = (MESSAGE_SUMMARY_WARNING_TOKENS / summary_input_tkns) * 0.8 # For good measure... cutoff = int(len(message_sequence_to_summarize) * trunc_ratio) - summary_input = str([await summarize_messages(model, message_sequence_to_summarize[:cutoff])] + message_sequence_to_summarize[cutoff:]) + summary_input = str( + [await summarize_messages(model, message_sequence_to_summarize[:cutoff])] + message_sequence_to_summarize[cutoff:] + ) message_sequence = [ {"role": "system", "content": summary_prompt}, {"role": "user", "content": summary_input}, @@ -139,10 +138,9 @@ async def summarize_messages( class ArchivalMemory(ABC): - @abstractmethod def insert(self, memory_string): - """ Insert new archival memory + """Insert new archival memory :param memory_string: Memory string to insert :type memory_string: str @@ -151,7 +149,7 @@ class ArchivalMemory(ABC): @abstractmethod def search(self, query_string, count=None, start=None) -> Tuple[List[str], int]: - """ Search archival memory + """Search archival memory :param query_string: Query string :type query_string: str @@ -159,7 +157,7 @@ class ArchivalMemory(ABC): :type count: Optional[int] :param start: Offset to start returning results from (None if 0) :type start: Optional[int] - + :return: Tuple of (list of results, total number of results) """ pass @@ -178,7 +176,7 @@ class DummyArchivalMemory(ArchivalMemory): """ def __init__(self, archival_memory_database=None): - self._archive = [] if archival_memory_database is None else archival_memory_database # consists of {'content': str} dicts + self._archive = [] if archival_memory_database is None else archival_memory_database # consists of {'content': str} dicts def __len__(self): return len(self._archive) @@ -187,31 +185,33 @@ class DummyArchivalMemory(ArchivalMemory): if len(self._archive) == 0: memory_str = "" else: - memory_str = "\n".join([d['content'] for d in self._archive]) - return \ - f"\n### ARCHIVAL MEMORY ###" + \ - f"\n{memory_str}" + memory_str = "\n".join([d["content"] for d in self._archive]) + return f"\n### ARCHIVAL MEMORY ###" + f"\n{memory_str}" async def insert(self, memory_string, embedding=None): if embedding is not None: - raise ValueError('Basic text-based archival memory does not support embeddings') - self._archive.append({ - # can eventually upgrade to adding semantic tags, etc - 'timestamp': get_local_time(), - 'content': memory_string, - }) + raise ValueError("Basic text-based archival memory does not support embeddings") + self._archive.append( + { + # can eventually upgrade to adding semantic tags, etc + "timestamp": get_local_time(), + "content": memory_string, + } + ) async def search(self, query_string, count=None, start=None): """Simple text-based search""" # in the dummy version, run an (inefficient) case-insensitive match search # printd(f"query_string: {query_string}") - matches = [s for s in self._archive if query_string.lower() in s['content'].lower()] + matches = [s for s in self._archive if query_string.lower() in s["content"].lower()] # printd(f"archive_memory.search (text-based): search for query '{query_string}' returned the following results (limit 5):\n{[str(d['content']) d in matches[:5]]}") - printd(f"archive_memory.search (text-based): search for query '{query_string}' returned the following results (limit 5):\n{[matches[start:count]]}") + printd( + f"archive_memory.search (text-based): search for query '{query_string}' returned the following results (limit 5):\n{[matches[start:count]]}" + ) # start/count support paging through results if start is not None and count is not None: - return matches[start:start+count], len(matches) + return matches[start : start + count], len(matches) elif start is None and count is not None: return matches[:count], len(matches) elif start is not None and count is None: @@ -223,8 +223,8 @@ class DummyArchivalMemory(ArchivalMemory): class DummyArchivalMemoryWithEmbeddings(DummyArchivalMemory): """Same as dummy in-memory archival memory, but with bare-bones embedding support""" - def __init__(self, archival_memory_database=None, embedding_model='text-embedding-ada-002'): - self._archive = [] if archival_memory_database is None else archival_memory_database # consists of {'content': str} dicts + def __init__(self, archival_memory_database=None, embedding_model="text-embedding-ada-002"): + self._archive = [] if archival_memory_database is None else archival_memory_database # consists of {'content': str} dicts self.embedding_model = embedding_model def __len__(self): @@ -234,15 +234,17 @@ class DummyArchivalMemoryWithEmbeddings(DummyArchivalMemory): # Get the embedding if embedding is None: embedding = await async_get_embedding_with_backoff(memory_string, model=self.embedding_model) - embedding_meta = {'model': self.embedding_model} + embedding_meta = {"model": self.embedding_model} printd(f"Got an embedding, type {type(embedding)}, len {len(embedding)}") - self._archive.append({ - 'timestamp': get_local_time(), - 'content': memory_string, - 'embedding': embedding, - 'embedding_metadata': embedding_meta, - }) + self._archive.append( + { + "timestamp": get_local_time(), + "content": memory_string, + "embedding": embedding, + "embedding_metadata": embedding_meta, + } + ) async def search(self, query_string, count=None, start=None): """Simple embedding-based search (inefficient, no caching)""" @@ -251,22 +253,24 @@ class DummyArchivalMemoryWithEmbeddings(DummyArchivalMemory): # query_embedding = get_embedding(query_string, model=self.embedding_model) # our wrapped version supports backoff/rate-limits query_embedding = await async_get_embedding_with_backoff(query_string, model=self.embedding_model) - similarity_scores = [cosine_similarity(memory['embedding'], query_embedding) for memory in self._archive] + similarity_scores = [cosine_similarity(memory["embedding"], query_embedding) for memory in self._archive] # Sort the archive based on similarity scores sorted_archive_with_scores = sorted( zip(self._archive, similarity_scores), key=lambda pair: pair[1], # Sort by the similarity score - reverse=True # We want the highest similarity first + reverse=True, # We want the highest similarity first + ) + printd( + f"archive_memory.search (vector-based): search for query '{query_string}' returned the following results (limit 5) and scores:\n{str([str(t[0]['content']) + '- score ' + str(t[1]) for t in sorted_archive_with_scores[:5]])}" ) - printd(f"archive_memory.search (vector-based): search for query '{query_string}' returned the following results (limit 5) and scores:\n{str([str(t[0]['content']) + '- score ' + str(t[1]) for t in sorted_archive_with_scores[:5]])}") # Extract the sorted archive without the scores matches = [item[0] for item in sorted_archive_with_scores] # start/count support paging through results if start is not None and count is not None: - return matches[start:start+count], len(matches) + return matches[start : start + count], len(matches) elif start is None and count is not None: return matches[:count], len(matches) elif start is not None and count is None: @@ -287,13 +291,13 @@ class DummyArchivalMemoryWithFaiss(DummyArchivalMemory): is essential enough not to be left only to the recall memory. """ - def __init__(self, index=None, archival_memory_database=None, embedding_model='text-embedding-ada-002', k=100): + def __init__(self, index=None, archival_memory_database=None, embedding_model="text-embedding-ada-002", k=100): if index is None: - self.index = faiss.IndexFlatL2(1536) # openai embedding vector size. + self.index = faiss.IndexFlatL2(1536) # openai embedding vector size. else: self.index = index self.k = k - self._archive = [] if archival_memory_database is None else archival_memory_database # consists of {'content': str} dicts + self._archive = [] if archival_memory_database is None else archival_memory_database # consists of {'content': str} dicts self.embedding_model = embedding_model self.embeddings_dict = {} self.search_results = {} @@ -307,12 +311,14 @@ class DummyArchivalMemoryWithFaiss(DummyArchivalMemory): embedding = await async_get_embedding_with_backoff(memory_string, model=self.embedding_model) print(f"Got an embedding, type {type(embedding)}, len {len(embedding)}") - self._archive.append({ - # can eventually upgrade to adding semantic tags, etc - 'timestamp': get_local_time(), - 'content': memory_string, - }) - embedding = np.array([embedding]).astype('float32') + self._archive.append( + { + # can eventually upgrade to adding semantic tags, etc + "timestamp": get_local_time(), + "content": memory_string, + } + ) + embedding = np.array([embedding]).astype("float32") self.index.add(embedding) async def search(self, query_string, count=None, start=None): @@ -332,20 +338,22 @@ class DummyArchivalMemoryWithFaiss(DummyArchivalMemory): self.search_results[query_string] = search_result if start is not None and count is not None: - toprint = search_result[start:start+count] + toprint = search_result[start : start + count] else: if len(search_result) >= 5: toprint = search_result[:5] else: toprint = search_result - printd(f"archive_memory.search (vector-based): search for query '{query_string}' returned the following results ({start}--{start+5}/{len(search_result)}) and scores:\n{str([t[:60] if len(t) > 60 else t for t in toprint])}") + printd( + f"archive_memory.search (vector-based): search for query '{query_string}' returned the following results ({start}--{start+5}/{len(search_result)}) and scores:\n{str([t[:60] if len(t) > 60 else t for t in toprint])}" + ) # Extract the sorted archive without the scores matches = search_result # start/count support paging through results if start is not None and count is not None: - return matches[start:start+count], len(matches) + return matches[start : start + count], len(matches) elif start is None and count is not None: return matches[:count], len(matches) elif start is not None and count is None: @@ -355,7 +363,6 @@ class DummyArchivalMemoryWithFaiss(DummyArchivalMemory): class RecallMemory(ABC): - @abstractmethod def text_search(self, query_string, count=None, start=None): pass @@ -393,42 +400,46 @@ class DummyRecallMemory(RecallMemory): # don't dump all the conversations, just statistics system_count = user_count = assistant_count = function_count = other_count = 0 for msg in self._message_logs: - role = msg['message']['role'] - if role == 'system': + role = msg["message"]["role"] + if role == "system": system_count += 1 - elif role == 'user': + elif role == "user": user_count += 1 - elif role == 'assistant': + elif role == "assistant": assistant_count += 1 - elif role == 'function': + elif role == "function": function_count += 1 else: other_count += 1 - memory_str = f"Statistics:" + \ - f"\n{len(self._message_logs)} total messages" + \ - f"\n{system_count} system" + \ - f"\n{user_count} user" + \ - f"\n{assistant_count} assistant" + \ - f"\n{function_count} function" + \ - f"\n{other_count} other" - return \ - f"\n### RECALL MEMORY ###" + \ - f"\n{memory_str}" + memory_str = ( + f"Statistics:" + + f"\n{len(self._message_logs)} total messages" + + f"\n{system_count} system" + + f"\n{user_count} user" + + f"\n{assistant_count} assistant" + + f"\n{function_count} function" + + f"\n{other_count} other" + ) + return f"\n### RECALL MEMORY ###" + f"\n{memory_str}" async def insert(self, message): - raise NotImplementedError('This should be handled by the PersistenceManager, recall memory is just a search layer on top') + raise NotImplementedError("This should be handled by the PersistenceManager, recall memory is just a search layer on top") async def text_search(self, query_string, count=None, start=None): # in the dummy version, run an (inefficient) case-insensitive match search - message_pool = [d for d in self._message_logs if d['message']['role'] not in ['system', 'function']] + message_pool = [d for d in self._message_logs if d["message"]["role"] not in ["system", "function"]] - printd(f"recall_memory.text_search: searching for {query_string} (c={count}, s={start}) in {len(self._message_logs)} total messages") - matches = [d for d in message_pool if d['message']['content'] is not None and query_string.lower() in d['message']['content'].lower()] + printd( + f"recall_memory.text_search: searching for {query_string} (c={count}, s={start}) in {len(self._message_logs)} total messages" + ) + matches = [ + d for d in message_pool if d["message"]["content"] is not None and query_string.lower() in d["message"]["content"].lower() + ] printd(f"recall_memory - matches:\n{matches[start:start+count]}") # start/count support paging through results if start is not None and count is not None: - return matches[start:start+count], len(matches) + return matches[start : start + count], len(matches) elif start is None and count is not None: return matches[:count], len(matches) elif start is not None and count is None: @@ -439,7 +450,7 @@ class DummyRecallMemory(RecallMemory): def _validate_date_format(self, date_str): """Validate the given date string in the format 'YYYY-MM-DD'.""" try: - datetime.datetime.strptime(date_str, '%Y-%m-%d') + datetime.datetime.strptime(date_str, "%Y-%m-%d") return True except ValueError: return False @@ -451,25 +462,26 @@ class DummyRecallMemory(RecallMemory): return match.group(1) if match else None async def date_search(self, start_date, end_date, count=None, start=None): - message_pool = [d for d in self._message_logs if d['message']['role'] not in ['system', 'function']] + message_pool = [d for d in self._message_logs if d["message"]["role"] not in ["system", "function"]] # First, validate the start_date and end_date format if not self._validate_date_format(start_date) or not self._validate_date_format(end_date): raise ValueError("Invalid date format. Expected format: YYYY-MM-DD") # Convert dates to datetime objects for comparison - start_date_dt = datetime.datetime.strptime(start_date, '%Y-%m-%d') - end_date_dt = datetime.datetime.strptime(end_date, '%Y-%m-%d') + start_date_dt = datetime.datetime.strptime(start_date, "%Y-%m-%d") + end_date_dt = datetime.datetime.strptime(end_date, "%Y-%m-%d") # Next, match items inside self._message_logs matches = [ - d for d in message_pool - if start_date_dt <= datetime.datetime.strptime(self._extract_date_from_timestamp(d['timestamp']), '%Y-%m-%d') <= end_date_dt + d + for d in message_pool + if start_date_dt <= datetime.datetime.strptime(self._extract_date_from_timestamp(d["timestamp"]), "%Y-%m-%d") <= end_date_dt ] # start/count support paging through results if start is not None and count is not None: - return matches[start:start+count], len(matches) + return matches[start : start + count], len(matches) elif start is None and count is not None: return matches[:count], len(matches) elif start is not None and count is None: @@ -484,17 +496,17 @@ class DummyRecallMemoryWithEmbeddings(DummyRecallMemory): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.embeddings = dict() - self.embedding_model = 'text-embedding-ada-002' + self.embedding_model = "text-embedding-ada-002" self.only_use_preloaded_embeddings = False async def text_search(self, query_string, count=None, start=None): # in the dummy version, run an (inefficient) case-insensitive match search - message_pool = [d for d in self._message_logs if d['message']['role'] not in ['system', 'function']] + message_pool = [d for d in self._message_logs if d["message"]["role"] not in ["system", "function"]] # first, go through and make sure we have all the embeddings we need message_pool_filtered = [] for d in message_pool: - message_str = d['message']['content'] + message_str = d["message"]["content"] if self.only_use_preloaded_embeddings: if message_str not in self.embeddings: printd(f"recall_memory.text_search -- '{message_str}' was not in embedding dict, skipping.") @@ -505,24 +517,26 @@ class DummyRecallMemoryWithEmbeddings(DummyRecallMemory): self.embeddings[message_str] = await async_get_embedding_with_backoff(message_str, model=self.embedding_model) message_pool_filtered.append(d) - # our wrapped version supports backoff/rate-limits + # our wrapped version supports backoff/rate-limits query_embedding = await async_get_embedding_with_backoff(query_string, model=self.embedding_model) - similarity_scores = [cosine_similarity(self.embeddings[d['message']['content']], query_embedding) for d in message_pool_filtered] + similarity_scores = [cosine_similarity(self.embeddings[d["message"]["content"]], query_embedding) for d in message_pool_filtered] # Sort the archive based on similarity scores sorted_archive_with_scores = sorted( zip(message_pool_filtered, similarity_scores), key=lambda pair: pair[1], # Sort by the similarity score - reverse=True # We want the highest similarity first + reverse=True, # We want the highest similarity first + ) + printd( + f"recall_memory.text_search (vector-based): search for query '{query_string}' returned the following results (limit 5) and scores:\n{str([str(t[0]['message']['content']) + '- score ' + str(t[1]) for t in sorted_archive_with_scores[:5]])}" ) - printd(f"recall_memory.text_search (vector-based): search for query '{query_string}' returned the following results (limit 5) and scores:\n{str([str(t[0]['message']['content']) + '- score ' + str(t[1]) for t in sorted_archive_with_scores[:5]])}") # Extract the sorted archive without the scores matches = [item[0] for item in sorted_archive_with_scores] # start/count support paging through results if start is not None and count is not None: - return matches[start:start+count], len(matches) + return matches[start : start + count], len(matches) elif start is None and count is not None: return matches[:count], len(matches) elif start is not None and count is None: @@ -531,55 +545,49 @@ class DummyRecallMemoryWithEmbeddings(DummyRecallMemory): return matches, len(matches) -class LocalArchivalMemory(ArchivalMemory): - """ Archival memory built on top of Llama Index """ +class LocalArchivalMemory(ArchivalMemory): + """Archival memory built on top of Llama Index""" - def __init__(self, archival_memory_database: Optional[str] = None, top_k: Optional[int] = 100): - """ Init function for archival memory + def __init__(self, archival_memory_database: Optional[str] = None, top_k: Optional[int] = 100): + """Init function for archival memory - :param archiva_memory_database: name of dataset to pre-fill archival with + :param archiva_memory_database: name of dataset to pre-fill archival with :type archival_memory_database: str """ - if archival_memory_database is not None: + if archival_memory_database is not None: # TODO: load form ~/.memgpt/archival directory = f"{MEMGPT_DIR}/archival/{archival_memory_database}" assert os.path.exists(directory), f"Archival memory database {archival_memory_database} does not exist" storage_context = StorageContext.from_defaults(persist_dir=directory) self.index = load_index_from_storage(storage_context) - else: + else: self.index = VectorIndex() self.top_k = top_k self.retriever = VectorIndexRetriever( - index=self.index, # does this get refreshed? + index=self.index, # does this get refreshed? similarity_top_k=self.top_k, ) # TODO: have some mechanism for cleanup otherwise will lead to OOM - self.cache = {} + self.cache = {} async def insert(self, memory_string): self.index.insert(memory_string) - - async def search(self, query_string, count=None, start=None): - start = start if start else 0 - count = count if count else self.top_k + async def search(self, query_string, count=None, start=None): + start = start if start else 0 + count = count if count else self.top_k count = min(count + start, self.top_k) if query_string not in self.cache: self.cache[query_string] = self.retriever.retrieve(query_string) - results = self.cache[query_string][start:start+count] - results = [ - {'timestamp': get_local_time(), 'content': node.node.text} - for node in results - ] - #from pprint import pprint - #pprint(results) + results = self.cache[query_string][start : start + count] + results = [{"timestamp": get_local_time(), "content": node.node.text} for node in results] + # from pprint import pprint + # pprint(results) return results, len(results) - + def __repr__(self) -> str: print(self.index.ref_doc_info) return "" - - diff --git a/memgpt/openai_tools.py b/memgpt/openai_tools.py index 20ad5f9a..6b97b06a 100644 --- a/memgpt/openai_tools.py +++ b/memgpt/openai_tools.py @@ -41,9 +41,7 @@ def retry_with_exponential_backoff( # Check if max retries has been reached if num_retries > max_retries: - raise Exception( - f"Maximum number of retries ({max_retries}) exceeded." - ) + raise Exception(f"Maximum number of retries ({max_retries}) exceeded.") # Increment the delay delay *= exponential_base * (1 + jitter * random.random()) @@ -91,9 +89,7 @@ def aretry_with_exponential_backoff( # Check if max retries has been reached if num_retries > max_retries: - raise Exception( - f"Maximum number of retries ({max_retries}) exceeded." - ) + raise Exception(f"Maximum number of retries ({max_retries}) exceeded.") # Increment the delay delay *= exponential_base * (1 + jitter * random.random()) @@ -184,9 +180,7 @@ def configure_azure_support(): azure_openai_endpoint, azure_openai_version, ]: - print( - f"Error: missing Azure OpenAI environment variables. Please see README section on Azure." - ) + print(f"Error: missing Azure OpenAI environment variables. Please see README section on Azure.") return openai.api_type = "azure" @@ -199,10 +193,7 @@ def configure_azure_support(): def check_azure_embeddings(): azure_openai_deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT") azure_openai_embedding_deployment = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT") - if ( - azure_openai_deployment is not None - and azure_openai_embedding_deployment is None - ): + if azure_openai_deployment is not None and azure_openai_embedding_deployment is None: raise ValueError( f"Error: It looks like you are using Azure deployment ids and computing embeddings, make sure you are setting one for embeddings as well. Please see README section on Azure" ) diff --git a/memgpt/persistence_manager.py b/memgpt/persistence_manager.py index 74f8d1d9..2c38d8c1 100644 --- a/memgpt/persistence_manager.py +++ b/memgpt/persistence_manager.py @@ -1,12 +1,18 @@ from abc import ABC, abstractmethod import pickle -from .memory import DummyRecallMemory, DummyRecallMemoryWithEmbeddings, DummyArchivalMemory, DummyArchivalMemoryWithEmbeddings, DummyArchivalMemoryWithFaiss, LocalArchivalMemory +from .memory import ( + DummyRecallMemory, + DummyRecallMemoryWithEmbeddings, + DummyArchivalMemory, + DummyArchivalMemoryWithEmbeddings, + DummyArchivalMemoryWithFaiss, + LocalArchivalMemory, +) from .utils import get_local_time, printd class PersistenceManager(ABC): - @abstractmethod def trim_messages(self, num): pass @@ -27,6 +33,7 @@ class PersistenceManager(ABC): def update_memory(self, new_memory): pass + class InMemoryStateManager(PersistenceManager): """In-memory state manager has nothing to manage, all agents are held in-memory""" @@ -41,17 +48,17 @@ class InMemoryStateManager(PersistenceManager): @staticmethod def load(filename): - with open(filename, 'rb') as f: + with open(filename, "rb") as f: return pickle.load(f) def save(self, filename): - with open(filename, 'wb') as fh: + with open(filename, "wb") as fh: pickle.dump(self, fh, protocol=pickle.HIGHEST_PROTOCOL) def init(self, agent): printd(f"Initializing InMemoryStateManager with agent object") - self.all_messages = [{'timestamp': get_local_time(), 'message': msg} for msg in agent.messages.copy()] - self.messages = [{'timestamp': get_local_time(), 'message': msg} for msg in agent.messages.copy()] + self.all_messages = [{"timestamp": get_local_time(), "message": msg} for msg in agent.messages.copy()] + self.messages = [{"timestamp": get_local_time(), "message": msg} for msg in agent.messages.copy()] self.memory = agent.memory printd(f"InMemoryStateManager.all_messages.len = {len(self.all_messages)}") printd(f"InMemoryStateManager.messages.len = {len(self.messages)}") @@ -67,7 +74,7 @@ class InMemoryStateManager(PersistenceManager): def prepend_to_messages(self, added_messages): # first tag with timestamps - added_messages = [{'timestamp': get_local_time(), 'message': msg} for msg in added_messages] + added_messages = [{"timestamp": get_local_time(), "message": msg} for msg in added_messages] printd(f"InMemoryStateManager.prepend_to_message") self.messages = [self.messages[0]] + added_messages + self.messages[1:] @@ -75,7 +82,7 @@ class InMemoryStateManager(PersistenceManager): def append_to_messages(self, added_messages): # first tag with timestamps - added_messages = [{'timestamp': get_local_time(), 'message': msg} for msg in added_messages] + added_messages = [{"timestamp": get_local_time(), "message": msg} for msg in added_messages] printd(f"InMemoryStateManager.append_to_messages") self.messages = self.messages + added_messages @@ -83,7 +90,7 @@ class InMemoryStateManager(PersistenceManager): def swap_system_message(self, new_system_message): # first tag with timestamps - new_system_message = {'timestamp': get_local_time(), 'message': new_system_message} + new_system_message = {"timestamp": get_local_time(), "message": new_system_message} printd(f"InMemoryStateManager.swap_system_message") self.messages[0] = new_system_message @@ -93,11 +100,12 @@ class InMemoryStateManager(PersistenceManager): printd(f"InMemoryStateManager.update_memory") self.memory = new_memory + class LocalStateManager(PersistenceManager): """In-memory state manager has nothing to manage, all agents are held in-memory""" recall_memory_cls = DummyRecallMemory - archival_memory_cls = LocalArchivalMemory + archival_memory_cls = LocalArchivalMemory def __init__(self, archival_memory_db=None): # Memory held in-state useful for debugging stateful versions @@ -108,17 +116,17 @@ class LocalStateManager(PersistenceManager): @staticmethod def load(filename): - with open(filename, 'rb') as f: + with open(filename, "rb") as f: return pickle.load(f) def save(self, filename): - with open(filename, 'wb') as fh: + with open(filename, "wb") as fh: pickle.dump(self, fh, protocol=pickle.HIGHEST_PROTOCOL) def init(self, agent): printd(f"Initializing InMemoryStateManager with agent object") - self.all_messages = [{'timestamp': get_local_time(), 'message': msg} for msg in agent.messages.copy()] - self.messages = [{'timestamp': get_local_time(), 'message': msg} for msg in agent.messages.copy()] + self.all_messages = [{"timestamp": get_local_time(), "message": msg} for msg in agent.messages.copy()] + self.messages = [{"timestamp": get_local_time(), "message": msg} for msg in agent.messages.copy()] self.memory = agent.memory printd(f"InMemoryStateManager.all_messages.len = {len(self.all_messages)}") printd(f"InMemoryStateManager.messages.len = {len(self.messages)}") @@ -126,7 +134,7 @@ class LocalStateManager(PersistenceManager): # Persistence manager also handles DB-related state self.recall_memory = self.recall_memory_cls(message_database=self.all_messages) - # TODO: init archival memory here? + # TODO: init archival memory here? def trim_messages(self, num): # printd(f"InMemoryStateManager.trim_messages") @@ -134,7 +142,7 @@ class LocalStateManager(PersistenceManager): def prepend_to_messages(self, added_messages): # first tag with timestamps - added_messages = [{'timestamp': get_local_time(), 'message': msg} for msg in added_messages] + added_messages = [{"timestamp": get_local_time(), "message": msg} for msg in added_messages] printd(f"InMemoryStateManager.prepend_to_message") self.messages = [self.messages[0]] + added_messages + self.messages[1:] @@ -142,7 +150,7 @@ class LocalStateManager(PersistenceManager): def append_to_messages(self, added_messages): # first tag with timestamps - added_messages = [{'timestamp': get_local_time(), 'message': msg} for msg in added_messages] + added_messages = [{"timestamp": get_local_time(), "message": msg} for msg in added_messages] printd(f"InMemoryStateManager.append_to_messages") self.messages = self.messages + added_messages @@ -150,7 +158,7 @@ class LocalStateManager(PersistenceManager): def swap_system_message(self, new_system_message): # first tag with timestamps - new_system_message = {'timestamp': get_local_time(), 'message': new_system_message} + new_system_message = {"timestamp": get_local_time(), "message": new_system_message} printd(f"InMemoryStateManager.swap_system_message") self.messages[0] = new_system_message @@ -170,8 +178,8 @@ class InMemoryStateManagerWithPreloadedArchivalMemory(InMemoryStateManager): def init(self, agent): print(f"Initializing InMemoryStateManager with agent object") - self.all_messages = [{'timestamp': get_local_time(), 'message': msg} for msg in agent.messages.copy()] - self.messages = [{'timestamp': get_local_time(), 'message': msg} for msg in agent.messages.copy()] + self.all_messages = [{"timestamp": get_local_time(), "message": msg} for msg in agent.messages.copy()] + self.messages = [{"timestamp": get_local_time(), "message": msg} for msg in agent.messages.copy()] self.memory = agent.memory print(f"InMemoryStateManager.all_messages.len = {len(self.all_messages)}") print(f"InMemoryStateManager.messages.len = {len(self.messages)}") @@ -199,13 +207,14 @@ class InMemoryStateManagerWithFaiss(InMemoryStateManager): def init(self, agent): print(f"Initializing InMemoryStateManager with agent object") - self.all_messages = [{'timestamp': get_local_time(), 'message': msg} for msg in agent.messages.copy()] - self.messages = [{'timestamp': get_local_time(), 'message': msg} for msg in agent.messages.copy()] + self.all_messages = [{"timestamp": get_local_time(), "message": msg} for msg in agent.messages.copy()] + self.messages = [{"timestamp": get_local_time(), "message": msg} for msg in agent.messages.copy()] self.memory = agent.memory print(f"InMemoryStateManager.all_messages.len = {len(self.all_messages)}") print(f"InMemoryStateManager.messages.len = {len(self.messages)}") # Persistence manager also handles DB-related state self.recall_memory = self.recall_memory_cls(message_database=self.all_messages) - self.archival_memory = self.archival_memory_cls(index=self.archival_index, archival_memory_database=self.archival_memory_db, k=self.a_k) - + self.archival_memory = self.archival_memory_cls( + index=self.archival_index, archival_memory_database=self.archival_memory_db, k=self.a_k + ) diff --git a/memgpt/personas/examples/docqa/build_index.py b/memgpt/personas/examples/docqa/build_index.py index 2dd94708..94802395 100644 --- a/memgpt/personas/examples/docqa/build_index.py +++ b/memgpt/personas/examples/docqa/build_index.py @@ -5,15 +5,14 @@ import numpy as np import argparse import json -def build_index(embedding_files: str, - index_name: str): +def build_index(embedding_files: str, index_name: str): index = faiss.IndexFlatL2(1536) file_list = sorted(glob(embedding_files)) for embedding_file in file_list: print(embedding_file) - with open(embedding_file, 'rt', encoding='utf-8') as file: + with open(embedding_file, "rt", encoding="utf-8") as file: embeddings = [] l = 0 for line in tqdm(file): @@ -21,7 +20,7 @@ def build_index(embedding_files: str, data = json.loads(line) embeddings.append(data) l += 1 - data = np.array(embeddings).astype('float32') + data = np.array(embeddings).astype("float32") print(data.shape) try: index.add(data) @@ -32,14 +31,11 @@ def build_index(embedding_files: str, faiss.write_index(index, index_name) -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument('--embedding_files', type=str, help='embedding_filepaths glob expression') - parser.add_argument('--output_index_file', type=str, help='output filepath') - args = parser.parse_args() + parser.add_argument("--embedding_files", type=str, help="embedding_filepaths glob expression") + parser.add_argument("--output_index_file", type=str, help="output filepath") + args = parser.parse_args() - build_index( - embedding_files=args.embedding_files, - index_name=args.output_index_file - ) \ No newline at end of file + build_index(embedding_files=args.embedding_files, index_name=args.output_index_file) diff --git a/memgpt/personas/examples/docqa/generate_embeddings_for_docs.py b/memgpt/personas/examples/docqa/generate_embeddings_for_docs.py index f377ce27..0c2d9479 100644 --- a/memgpt/personas/examples/docqa/generate_embeddings_for_docs.py +++ b/memgpt/personas/examples/docqa/generate_embeddings_for_docs.py @@ -7,12 +7,14 @@ import argparse from tqdm import tqdm import openai + try: from dotenv import load_dotenv + load_dotenv() except ModuleNotFoundError: pass -openai.api_key = os.getenv('OPENAI_API_KEY') +openai.api_key = os.getenv("OPENAI_API_KEY") sys.path.append("../../../") from openai_tools import async_get_embedding_with_backoff @@ -24,8 +26,8 @@ from openai_parallel_request_processor import process_api_requests_from_file TPM_LIMIT = 1000000 RPM_LIMIT = 3000 -DEFAULT_FILE = 'iclr/data/qa_data/30_total_documents/nq-open-30_total_documents_gold_at_0.jsonl.gz' -EMBEDDING_MODEL = 'text-embedding-ada-002' +DEFAULT_FILE = "iclr/data/qa_data/30_total_documents/nq-open-30_total_documents_gold_at_0.jsonl.gz" +EMBEDDING_MODEL = "text-embedding-ada-002" async def generate_requests_file(filename): @@ -33,36 +35,33 @@ async def generate_requests_file(filename): base_name = os.path.splitext(filename)[0] requests_filename = f"{base_name}_embedding_requests.jsonl" - with open(filename, 'r') as f: + with open(filename, "r") as f: all_data = [json.loads(line) for line in f] - with open(requests_filename, 'w') as f: + with open(requests_filename, "w") as f: for data in all_data: documents = data for idx, doc in enumerate(documents): title = doc["title"] text = doc["text"] document_string = f"Document [{idx+1}] (Title: {title}) {text}" - request = { - "model": EMBEDDING_MODEL, - "input": document_string - } + request = {"model": EMBEDDING_MODEL, "input": document_string} json_string = json.dumps(request) f.write(json_string + "\n") # Run your parallel processing function input(f"Generated requests file ({requests_filename}), continue with embedding batch requests? (hit enter)") await process_api_requests_from_file( - requests_filepath=requests_filename, - save_filepath=f"{base_name}.embeddings.jsonl.gz", # Adjust as necessary - request_url="https://api.openai.com/v1/embeddings", - api_key=os.getenv('OPENAI_API_KEY'), - max_requests_per_minute=RPM_LIMIT, - max_tokens_per_minute=TPM_LIMIT, - token_encoding_name=EMBEDDING_MODEL, - max_attempts=5, - logging_level=logging.INFO, - ) + requests_filepath=requests_filename, + save_filepath=f"{base_name}.embeddings.jsonl.gz", # Adjust as necessary + request_url="https://api.openai.com/v1/embeddings", + api_key=os.getenv("OPENAI_API_KEY"), + max_requests_per_minute=RPM_LIMIT, + max_tokens_per_minute=TPM_LIMIT, + token_encoding_name=EMBEDDING_MODEL, + max_attempts=5, + logging_level=logging.INFO, + ) async def generate_embedding_file(filename, parallel_mode=False): @@ -72,7 +71,7 @@ async def generate_embedding_file(filename, parallel_mode=False): # Derive the sister filename # base_name = os.path.splitext(filename)[0] - base_name = filename.rsplit('.jsonl', 1)[0] + base_name = filename.rsplit(".jsonl", 1)[0] sister_filename = f"{base_name}.embeddings.jsonl" # Check if the sister file already exists @@ -80,7 +79,7 @@ async def generate_embedding_file(filename, parallel_mode=False): print(f"{sister_filename} already exists. Skipping embedding generation.") return - with open(filename, 'rt') as f: + with open(filename, "rt") as f: all_data = [json.loads(line) for line in f] embedding_data = [] @@ -90,7 +89,9 @@ async def generate_embedding_file(filename, parallel_mode=False): for i, data in enumerate(tqdm(all_data, desc="Processing data", total=len(all_data))): documents = data # Inner loop progress bar - for idx, doc in enumerate(tqdm(documents, desc=f"Embedding documents for data {i+1}/{len(all_data)}", total=len(documents), leave=False)): + for idx, doc in enumerate( + tqdm(documents, desc=f"Embedding documents for data {i+1}/{len(all_data)}", total=len(documents), leave=False) + ): title = doc["title"] text = doc["text"] document_string = f"[Title: {title}] {text}" @@ -103,10 +104,10 @@ async def generate_embedding_file(filename, parallel_mode=False): # Save the embeddings to the sister file # with gzip.open(sister_filename, 'wt') as f: - with open(sister_filename, 'wb') as f: + with open(sister_filename, "wb") as f: for embedding in embedding_data: # f.write(json.dumps(embedding) + '\n') - f.write((json.dumps(embedding) + '\n').encode('utf-8')) + f.write((json.dumps(embedding) + "\n").encode("utf-8")) print(f"Embeddings saved to {sister_filename}") @@ -118,6 +119,7 @@ async def main(): filename = DEFAULT_FILE await generate_embedding_file(filename) + async def main(): parser = argparse.ArgumentParser() parser.add_argument("filename", nargs="?", default=DEFAULT_FILE, help="Path to the input file") @@ -129,4 +131,4 @@ async def main(): if __name__ == "__main__": loop = asyncio.get_event_loop() - loop.run_until_complete(main()) \ No newline at end of file + loop.run_until_complete(main()) diff --git a/memgpt/personas/examples/docqa/openai_parallel_request_processor.py b/memgpt/personas/examples/docqa/openai_parallel_request_processor.py index 4b9a1aae..169bfd37 100644 --- a/memgpt/personas/examples/docqa/openai_parallel_request_processor.py +++ b/memgpt/personas/examples/docqa/openai_parallel_request_processor.py @@ -121,9 +121,7 @@ async def process_api_requests_from_file( """Processes API requests in parallel, throttling to stay under rate limits.""" # constants seconds_to_pause_after_rate_limit_error = 15 - seconds_to_sleep_each_loop = ( - 0.001 # 1 ms limits max throughput to 1,000 requests per second - ) + seconds_to_sleep_each_loop = 0.001 # 1 ms limits max throughput to 1,000 requests per second # initialize logging logging.basicConfig(level=logging_level) @@ -135,12 +133,8 @@ async def process_api_requests_from_file( # initialize trackers queue_of_requests_to_retry = asyncio.Queue() - task_id_generator = ( - task_id_generator_function() - ) # generates integer IDs of 1, 2, 3, ... - status_tracker = ( - StatusTracker() - ) # single instance to track a collection of variables + task_id_generator = task_id_generator_function() # generates integer IDs of 1, 2, 3, ... + status_tracker = StatusTracker() # single instance to track a collection of variables next_request = None # variable to hold the next request to call # initialize available capacity counts @@ -163,9 +157,7 @@ async def process_api_requests_from_file( if next_request is None: if not queue_of_requests_to_retry.empty(): next_request = queue_of_requests_to_retry.get_nowait() - logging.debug( - f"Retrying request {next_request.task_id}: {next_request}" - ) + logging.debug(f"Retrying request {next_request.task_id}: {next_request}") elif file_not_finished: try: # get new request @@ -173,17 +165,13 @@ async def process_api_requests_from_file( next_request = APIRequest( task_id=next(task_id_generator), request_json=request_json, - token_consumption=num_tokens_consumed_from_request( - request_json, api_endpoint, token_encoding_name - ), + token_consumption=num_tokens_consumed_from_request(request_json, api_endpoint, token_encoding_name), attempts_left=max_attempts, metadata=request_json.pop("metadata", None), ) status_tracker.num_tasks_started += 1 status_tracker.num_tasks_in_progress += 1 - logging.debug( - f"Reading request {next_request.task_id}: {next_request}" - ) + logging.debug(f"Reading request {next_request.task_id}: {next_request}") except StopIteration: # if file runs out, set flag to stop reading it logging.debug("Read file exhausted") @@ -193,13 +181,11 @@ async def process_api_requests_from_file( current_time = time.time() seconds_since_update = current_time - last_update_time available_request_capacity = min( - available_request_capacity - + max_requests_per_minute * seconds_since_update / 60.0, + available_request_capacity + max_requests_per_minute * seconds_since_update / 60.0, max_requests_per_minute, ) available_token_capacity = min( - available_token_capacity - + max_tokens_per_minute * seconds_since_update / 60.0, + available_token_capacity + max_tokens_per_minute * seconds_since_update / 60.0, max_tokens_per_minute, ) last_update_time = current_time @@ -207,10 +193,7 @@ async def process_api_requests_from_file( # if enough capacity available, call API if next_request: next_request_tokens = next_request.token_consumption - if ( - available_request_capacity >= 1 - and available_token_capacity >= next_request_tokens - ): + if available_request_capacity >= 1 and available_token_capacity >= next_request_tokens: # update counters available_request_capacity -= 1 available_token_capacity -= next_request_tokens @@ -237,17 +220,9 @@ async def process_api_requests_from_file( await asyncio.sleep(seconds_to_sleep_each_loop) # if a rate limit error was hit recently, pause to cool down - seconds_since_rate_limit_error = ( - time.time() - status_tracker.time_of_last_rate_limit_error - ) - if ( - seconds_since_rate_limit_error - < seconds_to_pause_after_rate_limit_error - ): - remaining_seconds_to_pause = ( - seconds_to_pause_after_rate_limit_error - - seconds_since_rate_limit_error - ) + seconds_since_rate_limit_error = time.time() - status_tracker.time_of_last_rate_limit_error + if seconds_since_rate_limit_error < seconds_to_pause_after_rate_limit_error: + remaining_seconds_to_pause = seconds_to_pause_after_rate_limit_error - seconds_since_rate_limit_error await asyncio.sleep(remaining_seconds_to_pause) # ^e.g., if pause is 15 seconds and final limit was hit 5 seconds ago logging.warn( @@ -255,17 +230,13 @@ async def process_api_requests_from_file( ) # after finishing, log final status - logging.info( - f"""Parallel processing complete. Results saved to {save_filepath}""" - ) + logging.info(f"""Parallel processing complete. Results saved to {save_filepath}""") if status_tracker.num_tasks_failed > 0: logging.warning( f"{status_tracker.num_tasks_failed} / {status_tracker.num_tasks_started} requests failed. Errors logged to {save_filepath}." ) if status_tracker.num_rate_limit_errors > 0: - logging.warning( - f"{status_tracker.num_rate_limit_errors} rate limit errors received. Consider running at a lower rate." - ) + logging.warning(f"{status_tracker.num_rate_limit_errors} rate limit errors received. Consider running at a lower rate.") # dataclasses @@ -309,26 +280,18 @@ class APIRequest: logging.info(f"Starting request #{self.task_id}") error = None try: - async with session.post( - url=request_url, headers=request_header, json=self.request_json - ) as response: + async with session.post(url=request_url, headers=request_header, json=self.request_json) as response: response = await response.json() if "error" in response: - logging.warning( - f"Request {self.task_id} failed with error {response['error']}" - ) + logging.warning(f"Request {self.task_id} failed with error {response['error']}") status_tracker.num_api_errors += 1 error = response if "Rate limit" in response["error"].get("message", ""): status_tracker.time_of_last_rate_limit_error = time.time() status_tracker.num_rate_limit_errors += 1 - status_tracker.num_api_errors -= ( - 1 # rate limit errors are counted separately - ) + status_tracker.num_api_errors -= 1 # rate limit errors are counted separately - except ( - Exception - ) as e: # catching naked exceptions is bad practice, but in this case we'll log & save them + except Exception as e: # catching naked exceptions is bad practice, but in this case we'll log & save them logging.warning(f"Request {self.task_id} failed with Exception {e}") status_tracker.num_other_errors += 1 error = e @@ -337,9 +300,7 @@ class APIRequest: if self.attempts_left: retry_queue.put_nowait(self) else: - logging.error( - f"Request {self.request_json} failed after all attempts. Saving errors: {self.result}" - ) + logging.error(f"Request {self.request_json} failed after all attempts. Saving errors: {self.result}") data = ( [self.request_json, [str(e) for e in self.result], self.metadata] if self.metadata @@ -349,11 +310,7 @@ class APIRequest: status_tracker.num_tasks_in_progress -= 1 status_tracker.num_tasks_failed += 1 else: - data = ( - [self.request_json, response, self.metadata] - if self.metadata - else [self.request_json, response] - ) + data = [self.request_json, response, self.metadata] if self.metadata else [self.request_json, response] append_to_jsonl(data, save_filepath) status_tracker.num_tasks_in_progress -= 1 status_tracker.num_tasks_succeeded += 1 @@ -382,8 +339,8 @@ def num_tokens_consumed_from_request( token_encoding_name: str, ): """Count the number of tokens in the request. Only supports completion and embedding requests.""" - if token_encoding_name == 'text-embedding-ada-002': - encoding = tiktoken.get_encoding('cl100k_base') + if token_encoding_name == "text-embedding-ada-002": + encoding = tiktoken.get_encoding("cl100k_base") else: encoding = tiktoken.get_encoding(token_encoding_name) # if completions request, tokens = prompt + n * max_tokens @@ -415,9 +372,7 @@ def num_tokens_consumed_from_request( num_tokens = prompt_tokens + completion_tokens * len(prompt) return num_tokens else: - raise TypeError( - 'Expecting either string or list of strings for "prompt" field in completion request' - ) + raise TypeError('Expecting either string or list of strings for "prompt" field in completion request') # if embeddings request, tokens = input tokens elif api_endpoint == "embeddings": input = request_json["input"] @@ -428,14 +383,10 @@ def num_tokens_consumed_from_request( num_tokens = sum([len(encoding.encode(i)) for i in input]) return num_tokens else: - raise TypeError( - 'Expecting either string or list of strings for "inputs" field in embedding request' - ) + raise TypeError('Expecting either string or list of strings for "inputs" field in embedding request') # more logic needed to support other API calls (e.g., edits, inserts, DALL-E) else: - raise NotImplementedError( - f'API endpoint "{api_endpoint}" not implemented in this script' - ) + raise NotImplementedError(f'API endpoint "{api_endpoint}" not implemented in this script') def task_id_generator_function(): @@ -502,4 +453,4 @@ with open(filename, "w") as f: ``` As with all jsonl files, take care that newlines in the content are properly escaped (json.dumps does this automatically). -""" \ No newline at end of file +""" diff --git a/memgpt/personas/examples/docqa/scrape_docs.py b/memgpt/personas/examples/docqa/scrape_docs.py index 66682694..f02df414 100644 --- a/memgpt/personas/examples/docqa/scrape_docs.py +++ b/memgpt/personas/examples/docqa/scrape_docs.py @@ -4,69 +4,65 @@ import tiktoken import json # Define the directory where the documentation resides -docs_dir = 'text' +docs_dir = "text" encoding = tiktoken.encoding_for_model("gpt-4") PASSAGE_TOKEN_LEN = 800 + def extract_text_from_sphinx_txt(file_path): lines = [] title = "" - with open(file_path, 'r', encoding='utf-8') as file: + with open(file_path, "r", encoding="utf-8") as file: for line in file: if not title: title = line.strip() continue - if line and re.match(r'^.*\S.*$', line) and not re.match(r'^[-=*]+$', line): + if line and re.match(r"^.*\S.*$", line) and not re.match(r"^[-=*]+$", line): lines.append(line) - passages = [] + passages = [] curr_passage = [] curr_token_ct = 0 for line in lines: try: - line_token_ct = len(encoding.encode(line, allowed_special={'<|endoftext|>'})) + line_token_ct = len(encoding.encode(line, allowed_special={"<|endoftext|>"})) except Exception as e: print("line", line) raise e if line_token_ct > PASSAGE_TOKEN_LEN: - passages.append({ - 'title': title, - 'text': line[:3200], - 'num_tokens': curr_token_ct, - }) + passages.append( + { + "title": title, + "text": line[:3200], + "num_tokens": curr_token_ct, + } + ) continue curr_token_ct += line_token_ct curr_passage.append(line) if curr_token_ct > PASSAGE_TOKEN_LEN: - passages.append({ - 'title': title, - 'text': ''.join(curr_passage), - 'num_tokens': curr_token_ct - }) + passages.append({"title": title, "text": "".join(curr_passage), "num_tokens": curr_token_ct}) curr_passage = [] curr_token_ct = 0 if len(curr_passage) > 0: - passages.append({ - 'title': title, - 'text': ''.join(curr_passage), - 'num_tokens': curr_token_ct - }) + passages.append({"title": title, "text": "".join(curr_passage), "num_tokens": curr_token_ct}) return passages + # Iterate over all files in the directory and its subdirectories passages = [] total_files = 0 for subdir, _, files in os.walk(docs_dir): for file in files: - if file.endswith('.txt'): + if file.endswith(".txt"): file_path = os.path.join(subdir, file) passages.append(extract_text_from_sphinx_txt(file_path)) total_files += 1 print("total .txt files:", total_files) # Save to a new text file or process as needed -with open('all_docs.jsonl', 'w', encoding='utf-8') as file: +with open("all_docs.jsonl", "w", encoding="utf-8") as file: for p in passages: file.write(json.dumps(p)) - file.write('\n') + file.write("\n") diff --git a/memgpt/presets.py b/memgpt/presets.py index 4fad1ed8..76ff8fae 100644 --- a/memgpt/presets.py +++ b/memgpt/presets.py @@ -1,30 +1,33 @@ - from .prompts import gpt_functions from .prompts import gpt_system from .agent import AgentAsync from .utils import printd -DEFAULT = 'memgpt_chat' +DEFAULT = "memgpt_chat" + def use_preset(preset_name, model, persona, human, interface, persistence_manager): """Storing combinations of SYSTEM + FUNCTION prompts""" - if preset_name == 'memgpt_chat': - + if preset_name == "memgpt_chat": functions = [ - 'send_message', 'pause_heartbeats', - 'core_memory_append', 'core_memory_replace', - 'conversation_search', 'conversation_search_date', - 'archival_memory_insert', 'archival_memory_search', + "send_message", + "pause_heartbeats", + "core_memory_append", + "core_memory_replace", + "conversation_search", + "conversation_search_date", + "archival_memory_insert", + "archival_memory_search", ] - available_functions = [v for k,v in gpt_functions.FUNCTIONS_CHAINING.items() if k in functions] - printd(f"Available functions:\n", [x['name'] for x in available_functions]) + available_functions = [v for k, v in gpt_functions.FUNCTIONS_CHAINING.items() if k in functions] + printd(f"Available functions:\n", [x["name"] for x in available_functions]) assert len(functions) == len(available_functions) - if 'gpt-3.5' in model: + if "gpt-3.5" in model: # use a different system message for gpt-3.5 - preset_name = 'memgpt_gpt35_extralong' + preset_name = "memgpt_gpt35_extralong" return AgentAsync( model=model, @@ -35,8 +38,8 @@ def use_preset(preset_name, model, persona, human, interface, persistence_manage persona_notes=persona, human_notes=human, # gpt-3.5-turbo tends to omit inner monologue, relax this requirement for now - first_message_verify_mono=True if 'gpt-4' in model else False, + first_message_verify_mono=True if "gpt-4" in model else False, ) else: - raise ValueError(preset_name) \ No newline at end of file + raise ValueError(preset_name) diff --git a/memgpt/prompts/gpt_functions.py b/memgpt/prompts/gpt_functions.py index a32a545e..060b50c7 100644 --- a/memgpt/prompts/gpt_functions.py +++ b/memgpt/prompts/gpt_functions.py @@ -2,9 +2,7 @@ from ..constants import FUNCTION_PARAM_DESCRIPTION_REQ_HEARTBEAT # FUNCTIONS_PROMPT_MULTISTEP_NO_HEARTBEATS = FUNCTIONS_PROMPT_MULTISTEP[:-1] FUNCTIONS_CHAINING = { - - 'send_message': - { + "send_message": { "name": "send_message", "description": "Sends a message to the human user", "parameters": { @@ -17,11 +15,9 @@ FUNCTIONS_CHAINING = { }, }, "required": ["message"], - } + }, }, - - 'pause_heartbeats': - { + "pause_heartbeats": { "name": "pause_heartbeats", "description": "Temporarily ignore timed heartbeats. You may still receive messages from manual heartbeats and other events.", "parameters": { @@ -34,11 +30,9 @@ FUNCTIONS_CHAINING = { }, }, "required": ["minutes"], - } + }, }, - - 'message_chatgpt': - { + "message_chatgpt": { "name": "message_chatgpt", "description": "Send a message to a more basic AI, ChatGPT. A useful resource for asking questions. ChatGPT does not retain memory of previous interactions.", "parameters": { @@ -55,11 +49,9 @@ FUNCTIONS_CHAINING = { }, }, "required": ["message", "request_heartbeat"], - } + }, }, - - 'core_memory_append': - { + "core_memory_append": { "name": "core_memory_append", "description": "Append to the contents of core memory.", "parameters": { @@ -79,11 +71,9 @@ FUNCTIONS_CHAINING = { }, }, "required": ["name", "content", "request_heartbeat"], - } + }, }, - - 'core_memory_replace': - { + "core_memory_replace": { "name": "core_memory_replace", "description": "Replace to the contents of core memory. To delete memories, use an empty string for new_content.", "parameters": { @@ -107,11 +97,9 @@ FUNCTIONS_CHAINING = { }, }, "required": ["name", "old_content", "new_content", "request_heartbeat"], - } + }, }, - - 'recall_memory_search': - { + "recall_memory_search": { "name": "recall_memory_search", "description": "Search prior conversation history using a string.", "parameters": { @@ -131,11 +119,9 @@ FUNCTIONS_CHAINING = { }, }, "required": ["name", "page", "request_heartbeat"], - } + }, }, - - 'conversation_search': - { + "conversation_search": { "name": "conversation_search", "description": "Search prior conversation history using case-insensitive string matching.", "parameters": { @@ -155,11 +141,9 @@ FUNCTIONS_CHAINING = { }, }, "required": ["name", "page", "request_heartbeat"], - } + }, }, - - 'recall_memory_search_date': - { + "recall_memory_search_date": { "name": "recall_memory_search_date", "description": "Search prior conversation history using a date range.", "parameters": { @@ -183,11 +167,9 @@ FUNCTIONS_CHAINING = { }, }, "required": ["name", "page", "request_heartbeat"], - } + }, }, - - 'conversation_search_date': - { + "conversation_search_date": { "name": "conversation_search_date", "description": "Search prior conversation history using a date range.", "parameters": { @@ -211,11 +193,9 @@ FUNCTIONS_CHAINING = { }, }, "required": ["name", "page", "request_heartbeat"], - } + }, }, - - 'archival_memory_insert': - { + "archival_memory_insert": { "name": "archival_memory_insert", "description": "Add to archival memory. Make sure to phrase the memory contents such that it can be easily queried later.", "parameters": { @@ -231,11 +211,9 @@ FUNCTIONS_CHAINING = { }, }, "required": ["name", "content", "request_heartbeat"], - } + }, }, - - 'archival_memory_search': - { + "archival_memory_search": { "name": "archival_memory_search", "description": "Search archival memory using semantic (embedding-based) search.", "parameters": { @@ -255,7 +233,6 @@ FUNCTIONS_CHAINING = { }, }, "required": ["name", "query", "page", "request_heartbeat"], - } + }, }, - -} \ No newline at end of file +} diff --git a/memgpt/prompts/gpt_summarize.py b/memgpt/prompts/gpt_summarize.py index 619dbf83..95c0e199 100644 --- a/memgpt/prompts/gpt_summarize.py +++ b/memgpt/prompts/gpt_summarize.py @@ -1,6 +1,5 @@ WORD_LIMIT = 100 -SYSTEM = \ -f""" +SYSTEM = f""" Your job is to summarize a history of previous messages in a conversation between an AI persona and a human. The conversation you are given is a from a fixed context window and may not be complete. Messages sent by the AI are marked with the 'assistant' role. @@ -12,4 +11,4 @@ The 'user' role is also used for important system events, such as login events a Summarize what happened in the conversation from the perspective of the AI (use the first person). Keep your summary less than {WORD_LIMIT} words, do NOT exceed this word limit. Only output the summary, do NOT include anything else in your output. -""" \ No newline at end of file +""" diff --git a/memgpt/prompts/gpt_system.py b/memgpt/prompts/gpt_system.py index 2ee8edec..8100b6ee 100644 --- a/memgpt/prompts/gpt_system.py +++ b/memgpt/prompts/gpt_system.py @@ -2,11 +2,11 @@ import os def get_system_text(key): - filename = f'{key}.txt' - file_path = os.path.join(os.path.dirname(__file__), 'system', filename) + filename = f"{key}.txt" + file_path = os.path.join(os.path.dirname(__file__), "system", filename) if os.path.exists(file_path): - with open(file_path, 'r') as file: + with open(file_path, "r") as file: return file.read().strip() else: raise FileNotFoundError(f"No file found for key {key}, path={file_path}") diff --git a/memgpt/system.py b/memgpt/system.py index 116090a5..5993a007 100644 --- a/memgpt/system.py +++ b/memgpt/system.py @@ -1,18 +1,22 @@ import json from .utils import get_local_time -from .constants import INITIAL_BOOT_MESSAGE, INITIAL_BOOT_MESSAGE_SEND_MESSAGE_THOUGHT, INITIAL_BOOT_MESSAGE_SEND_MESSAGE_FIRST_MSG, MESSAGE_SUMMARY_WARNING_STR +from .constants import ( + INITIAL_BOOT_MESSAGE, + INITIAL_BOOT_MESSAGE_SEND_MESSAGE_THOUGHT, + INITIAL_BOOT_MESSAGE_SEND_MESSAGE_FIRST_MSG, + MESSAGE_SUMMARY_WARNING_STR, +) -def get_initial_boot_messages(version='startup'): - - if version == 'startup': +def get_initial_boot_messages(version="startup"): + if version == "startup": initial_boot_message = INITIAL_BOOT_MESSAGE messages = [ {"role": "assistant", "content": initial_boot_message}, ] - elif version == 'startup_with_send_message': + elif version == "startup_with_send_message": messages = [ # first message includes both inner monologue and function call to send_message { @@ -20,34 +24,23 @@ def get_initial_boot_messages(version='startup'): "content": INITIAL_BOOT_MESSAGE_SEND_MESSAGE_THOUGHT, "function_call": { "name": "send_message", - "arguments": "{\n \"message\": \"" + f"{INITIAL_BOOT_MESSAGE_SEND_MESSAGE_FIRST_MSG}" + "\"\n}" - } + "arguments": '{\n "message": "' + f"{INITIAL_BOOT_MESSAGE_SEND_MESSAGE_FIRST_MSG}" + '"\n}', + }, }, # obligatory function return message - { - "role": "function", - "name": "send_message", - "content": package_function_response(True, None) - } + {"role": "function", "name": "send_message", "content": package_function_response(True, None)}, ] - elif version == 'startup_with_send_message_gpt35': + elif version == "startup_with_send_message_gpt35": messages = [ # first message includes both inner monologue and function call to send_message { "role": "assistant", "content": "*inner thoughts* Still waiting on the user. Sending a message with function.", - "function_call": { - "name": "send_message", - "arguments": "{\n \"message\": \"" + f"Hi, is anyone there?" + "\"\n}" - } + "function_call": {"name": "send_message", "arguments": '{\n "message": "' + f"Hi, is anyone there?" + '"\n}'}, }, # obligatory function return message - { - "role": "function", - "name": "send_message", - "content": package_function_response(True, None) - } + {"role": "function", "name": "send_message", "content": package_function_response(True, None)}, ] else: @@ -56,12 +49,11 @@ def get_initial_boot_messages(version='startup'): return messages -def get_heartbeat(reason='Automated timer', include_location=False, location_name='San Francisco, CA, USA'): - +def get_heartbeat(reason="Automated timer", include_location=False, location_name="San Francisco, CA, USA"): # Package the message with time and location formatted_time = get_local_time() packaged_message = { - "type": 'heartbeat', + "type": "heartbeat", "reason": reason, "time": formatted_time, } @@ -72,12 +64,11 @@ def get_heartbeat(reason='Automated timer', include_location=False, location_nam return json.dumps(packaged_message) -def get_login_event(last_login='Never (first login)', include_location=False, location_name='San Francisco, CA, USA'): - +def get_login_event(last_login="Never (first login)", include_location=False, location_name="San Francisco, CA, USA"): # Package the message with time and location formatted_time = get_local_time() packaged_message = { - "type": 'login', + "type": "login", "last_login": last_login, "time": formatted_time, } @@ -88,12 +79,11 @@ def get_login_event(last_login='Never (first login)', include_location=False, lo return json.dumps(packaged_message) -def package_user_message(user_message, time=None, include_location=False, location_name='San Francisco, CA, USA'): - +def package_user_message(user_message, time=None, include_location=False, location_name="San Francisco, CA, USA"): # Package the message with time and location formatted_time = time if time else get_local_time() packaged_message = { - "type": 'user_message', + "type": "user_message", "message": user_message, "time": formatted_time, } @@ -103,11 +93,11 @@ def package_user_message(user_message, time=None, include_location=False, locati return json.dumps(packaged_message) -def package_function_response(was_success, response_string, timestamp=None): +def package_function_response(was_success, response_string, timestamp=None): formatted_time = get_local_time() if timestamp is None else timestamp packaged_message = { - "status": 'OK' if was_success else 'Failed', + "status": "OK" if was_success else "Failed", "message": response_string, "time": formatted_time, } @@ -116,14 +106,14 @@ def package_function_response(was_success, response_string, timestamp=None): def package_summarize_message(summary, summary_length, hidden_message_count, total_message_count, timestamp=None): - - context_message = \ - f"Note: prior messages ({hidden_message_count} of {total_message_count} total messages) have been hidden from view due to conversation memory constraints.\n" \ + context_message = ( + f"Note: prior messages ({hidden_message_count} of {total_message_count} total messages) have been hidden from view due to conversation memory constraints.\n" + f"The following is a summary of the previous {summary_length} messages:\n {summary}" + ) formatted_time = get_local_time() if timestamp is None else timestamp packaged_message = { - "type": 'system_alert', + "type": "system_alert", "message": context_message, "time": formatted_time, } @@ -136,10 +126,13 @@ def package_summarize_message_no_summary(hidden_message_count, timestamp=None, m # Package the message with time and location formatted_time = get_local_time() if timestamp is None else timestamp - context_message = message if message else \ - f"Note: {hidden_message_count} prior messages with the user have been hidden from view due to conversation memory constraints. Older messages are stored in Recall Memory and can be viewed using functions." + context_message = ( + message + if message + else f"Note: {hidden_message_count} prior messages with the user have been hidden from view due to conversation memory constraints. Older messages are stored in Recall Memory and can be viewed using functions." + ) packaged_message = { - "type": 'system_alert', + "type": "system_alert", "message": context_message, "time": formatted_time, } @@ -148,12 +141,11 @@ def package_summarize_message_no_summary(hidden_message_count, timestamp=None, m def get_token_limit_warning(): - formatted_time = get_local_time() packaged_message = { - "type": 'system_alert', + "type": "system_alert", "message": MESSAGE_SUMMARY_WARNING_STR, "time": formatted_time, } - return json.dumps(packaged_message) \ No newline at end of file + return json.dumps(packaged_message) diff --git a/memgpt/utils.py b/memgpt/utils.py index 560f544f..f9fa614a 100644 --- a/memgpt/utils.py +++ b/memgpt/utils.py @@ -19,6 +19,7 @@ from memgpt.constants import MEMGPT_DIR from llama_index import set_global_service_context, ServiceContext, VectorStoreIndex, load_index_from_storage, StorageContext from llama_index.embeddings import OpenAIEmbedding + def count_tokens(s: str, model: str = "gpt-4") -> int: encoding = tiktoken.encoding_for_model(model) return len(encoding.encode(s)) @@ -169,9 +170,7 @@ def chunk_file(file, tkns_per_chunk=300, model="gpt-4"): line_token_ct = len(encoding.encode(line)) except Exception as e: line_token_ct = len(line.split(" ")) / 0.75 - print( - f"Could not encode line {i}, estimating it to be {line_token_ct} tokens" - ) + print(f"Could not encode line {i}, estimating it to be {line_token_ct} tokens") print(e) if line_token_ct > tkns_per_chunk: if len(curr_chunk) > 0: @@ -195,9 +194,7 @@ def chunk_files(files, tkns_per_chunk=300, model="gpt-4"): archival_database = [] for file in files: timestamp = os.path.getmtime(file) - formatted_time = datetime.fromtimestamp(timestamp).strftime( - "%Y-%m-%d %I:%M:%S %p %Z%z" - ) + formatted_time = datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d %I:%M:%S %p %Z%z") file_stem = file.split("/")[-1] chunks = [c for c in chunk_file(file, tkns_per_chunk, model)] for i, chunk in enumerate(chunks): @@ -244,9 +241,7 @@ async def process_concurrently(archival_database, model, concurrency=10): # Create a list of tasks for chunks embedding_data = [0 for _ in archival_database] - tasks = [ - bounded_process_chunk(i, chunk) for i, chunk in enumerate(archival_database) - ] + tasks = [bounded_process_chunk(i, chunk) for i, chunk in enumerate(archival_database)] for future in tqdm( asyncio.as_completed(tasks), @@ -268,15 +263,12 @@ async def prepare_archival_index_from_files_compute_embeddings( files = sorted(glob.glob(glob_pattern)) save_dir = os.path.join( MEMGPT_DIR, - "archival_index_from_files_" - + get_local_time().replace(" ", "_").replace(":", "_"), + "archival_index_from_files_" + get_local_time().replace(" ", "_").replace(":", "_"), ) os.makedirs(save_dir, exist_ok=True) total_tokens = total_bytes(glob_pattern) / 3 price_estimate = total_tokens / 1000 * 0.0001 - confirm = input( - f"Computing embeddings over {len(files)} files. This will cost ~${price_estimate:.2f}. Continue? [y/n] " - ) + confirm = input(f"Computing embeddings over {len(files)} files. This will cost ~${price_estimate:.2f}. Continue? [y/n] ") if confirm != "y": raise Exception("embeddings were not computed") @@ -292,9 +284,7 @@ async def prepare_archival_index_from_files_compute_embeddings( archival_storage_file = os.path.join(save_dir, "all_docs.jsonl") chunks_by_file = chunk_files_for_jsonl(files, tkns_per_chunk, model) with open(archival_storage_file, "w") as f: - print( - f"Saving archival storage with preloaded files to {archival_storage_file}" - ) + print(f"Saving archival storage with preloaded files to {archival_storage_file}") for c in chunks_by_file: json.dump(c, f) f.write("\n") @@ -341,9 +331,8 @@ def read_database_as_list(database_name): return result_list - -def estimate_openai_cost(docs): - """ Estimate OpenAI embedding cost +def estimate_openai_cost(docs): + """Estimate OpenAI embedding cost :param docs: Documents to be embedded :type docs: List[Document] @@ -356,18 +345,11 @@ def estimate_openai_cost(docs): embed_model = MockEmbedding(embed_dim=1536) - token_counter = TokenCountingHandler( - tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode - ) + token_counter = TokenCountingHandler(tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode) callback_manager = CallbackManager([token_counter]) - set_global_service_context( - ServiceContext.from_defaults( - embed_model=embed_model, - callback_manager=callback_manager - ) - ) + set_global_service_context(ServiceContext.from_defaults(embed_model=embed_model, callback_manager=callback_manager)) index = VectorStoreIndex.from_documents(docs) # estimate cost @@ -377,8 +359,7 @@ def estimate_openai_cost(docs): def get_index(name, docs): - - """ Index documents + """Index documents :param docs: Documents to be embedded :type docs: List[Document] @@ -398,38 +379,40 @@ def get_index(name, docs): estimated_cost = estimate_openai_cost(docs) # TODO: prettier cost formatting - confirm = typer.confirm(typer.style(f"Open AI embedding cost will be approximately ${estimated_cost} - continue?", fg="yellow"), default=True) + confirm = typer.confirm( + typer.style(f"Open AI embedding cost will be approximately ${estimated_cost} - continue?", fg="yellow"), default=True + ) if not confirm: typer.secho("Aborting.", fg="red") exit() - + embed_model = OpenAIEmbedding() - service_context = ServiceContext.from_defaults(embed_model=embed_model, chunk_size = 300) + service_context = ServiceContext.from_defaults(embed_model=embed_model, chunk_size=300) set_global_service_context(service_context) # index documents index = VectorStoreIndex.from_documents(docs) return index -def save_index(index, name): - """ Save index to a specificed name in ~/.memgpt +def save_index(index, name): + """Save index to a specificed name in ~/.memgpt :param index: Index to save :type index: VectorStoreIndex :param name: Name of index :type name: str """ - # save - # TODO: load directory from config + # save + # TODO: load directory from config # TODO: save to vectordb/local depending on config dir = f"{MEMGPT_DIR}/archival/{name}" ## Avoid overwriting ## check if directory exists - #if os.path.exists(dir): + # if os.path.exists(dir): # confirm = typer.confirm(typer.style(f"Index with name {name} already exists -- overwrite?", fg="red"), default=False) # if not confirm: # typer.secho("Aborting.", fg="red") diff --git a/tests/test_load_archival.py b/tests/test_load_archival.py index dc857372..95bec5ce 100644 --- a/tests/test_load_archival.py +++ b/tests/test_load_archival.py @@ -9,10 +9,7 @@ import memgpt.presets as presets import memgpt.constants as constants import memgpt.personas.personas as personas import memgpt.humans.humans as humans -from memgpt.persistence_manager import ( - InMemoryStateManager, - LocalStateManager -) +from memgpt.persistence_manager import InMemoryStateManager, LocalStateManager from memgpt.config import Config from memgpt.constants import MEMGPT_DIR, DEFAULT_MEMGPT_MODEL from memgpt.connectors import connector @@ -20,6 +17,7 @@ import memgpt.interface # for printing to terminal import asyncio from datasets import load_dataset + def test_load_directory(): # downloading hugging face dataset (if does not exist) dataset = load_dataset("MemGPT/example_short_stories") @@ -30,12 +28,12 @@ def test_load_directory(): # Construct the default path if the environment variable is not set. cache_dir = os.path.join(os.path.expanduser("~"), ".cache", "huggingface", "datasets") - # load directory + # load directory print("Loading dataset into index...") print(cache_dir) load_directory( name="tmp_hf_dataset", - input_dir=cache_dir, + input_dir=cache_dir, recursive=True, ) @@ -51,23 +49,25 @@ def test_load_directory(): memgpt.interface, persistence_manager, ) - def query(q): + + def query(q): res = asyncio.run(memgpt_agent.archival_memory_search(q)) return res results = query("cinderella be getting sick") assert "Cinderella" in results, f"Expected 'Cinderella' in results, but got {results}" -def test_load_webpage(): + +def test_load_webpage(): pass -def test_load_database(): +def test_load_database(): from sqlalchemy import create_engine, MetaData import pandas as pd db_path = "memgpt/personas/examples/sqldb/test.db" - engine = create_engine(f'sqlite:///{db_path}') + engine = create_engine(f"sqlite:///{db_path}") # Create a MetaData object and reflect the database to get table information. metadata = MetaData() @@ -87,7 +87,7 @@ def test_load_database(): load_database( name="tmp_db_dataset", - #engine=engine, + # engine=engine, dump_path=db_path, query=f"SELECT * FROM {list(table_names)[0]}", ) @@ -107,7 +107,5 @@ def test_load_database(): assert True - - -#test_load_directory() -test_load_database() \ No newline at end of file +# test_load_directory() +test_load_database() From e6683085ffd9cb76d929f6eefd5f6dd37f31cf51 Mon Sep 17 00:00:00 2001 From: Sarah Wooders Date: Thu, 26 Oct 2023 16:09:49 -0700 Subject: [PATCH 10/12] reformat --- memgpt/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/memgpt/main.py b/memgpt/main.py index 3df4ee0e..b3ee4a67 100644 --- a/memgpt/main.py +++ b/memgpt/main.py @@ -110,7 +110,7 @@ def load(memgpt_agent, filename): print(f"/load warning: loading persistence manager from {filename} failed with: {e}") -@app.command() +@app.callback(invoke_without_command=True) # make default command def run( persona: str = typer.Option(None, help="Specify persona"), human: str = typer.Option(None, help="Specify human"), From 78cece637807185a5c9ac12285d9d1aaee6c9ab7 Mon Sep 17 00:00:00 2001 From: Sarah Wooders Date: Thu, 26 Oct 2023 16:11:34 -0700 Subject: [PATCH 11/12] fix formatting --- CONTRIBUTING.md | 4 ++-- README.md | 4 ++-- memgpt/humans/examples/basic.txt | 2 +- memgpt/humans/examples/cs_phd.txt | 4 ++-- memgpt/local_llm/README.md | 4 ++-- memgpt/personas/examples/docqa/README.md | 6 +++--- memgpt/personas/examples/memgpt_doc.txt | 2 +- memgpt/personas/examples/memgpt_starter.txt | 2 +- memgpt/personas/examples/sam.txt | 2 +- memgpt/personas/examples/sam_pov.txt | 2 +- memgpt/personas/examples/sam_simple_pov_gpt35.txt | 2 +- memgpt/prompts/system/memgpt_base.txt | 8 ++++---- memgpt/prompts/system/memgpt_chat.txt | 8 ++++---- memgpt/prompts/system/memgpt_doc.txt | 8 ++++---- memgpt/prompts/system/memgpt_gpt35_extralong.txt | 8 ++++---- 15 files changed, 33 insertions(+), 33 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 82dc9ebb..2ad45aea 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -13,7 +13,7 @@ First things first, let's get you a personal copy of MemGPT to play with. Think ### 🚀 Clone the Repository -Now, let's bring your new playground to your local machine. +Now, let's bring your new playground to your local machine. ```shell git clone https://github.com/your-username/MemGPT.git @@ -70,7 +70,7 @@ The maintainers, will take a look and might suggest some cool upgrades or ask fo ## 6. 📜 Code of Conduct -Please be sure to follow the project's Code of Conduct. +Please be sure to follow the project's Code of Conduct. ## 7. 📫 Contact diff --git a/README.md b/README.md index 8f68e31e..76be0a2d 100644 --- a/README.md +++ b/README.md @@ -5,9 +5,9 @@
Try out our MemGPT chatbot on Discord! - + ⭐ NEW: You can now run MemGPT with local LLMs and AutoGen! ⭐ - + [![Discord](https://img.shields.io/discord/1161736243340640419?label=Discord&logo=discord&logoColor=5865F2&style=flat-square&color=5865F2)](https://discord.gg/9GEQrxmVyE) [![arXiv 2310.08560](https://img.shields.io/badge/arXiv-2310.08560-B31B1B?logo=arxiv&style=flat-square)](https://arxiv.org/abs/2310.08560) diff --git a/memgpt/humans/examples/basic.txt b/memgpt/humans/examples/basic.txt index 54b40457..c49c7d31 100644 --- a/memgpt/humans/examples/basic.txt +++ b/memgpt/humans/examples/basic.txt @@ -1 +1 @@ -First name: Chad \ No newline at end of file +First name: Chad diff --git a/memgpt/humans/examples/cs_phd.txt b/memgpt/humans/examples/cs_phd.txt index 28810a24..ba88ce4d 100644 --- a/memgpt/humans/examples/cs_phd.txt +++ b/memgpt/humans/examples/cs_phd.txt @@ -1,9 +1,9 @@ This is what I know so far about the user, I should expand this as I learn more about them. -First name: Chad +First name: Chad Last name: ? Gender: Male Age: ? Nationality: ? Occupation: Computer science PhD student at UC Berkeley -Interests: Formula 1, Sailing, Taste of the Himalayas Restaurant in Berkeley, CSGO \ No newline at end of file +Interests: Formula 1, Sailing, Taste of the Himalayas Restaurant in Berkeley, CSGO diff --git a/memgpt/local_llm/README.md b/memgpt/local_llm/README.md index 0102f870..28d59669 100644 --- a/memgpt/local_llm/README.md +++ b/memgpt/local_llm/README.md @@ -50,7 +50,7 @@ Once you have an LLM web server set up, all you need to do to connect it to MemG - this controls how MemGPT packages the HTTP request to the webserver, see [this code](https://github.com/cpacker/MemGPT/blob/main/memgpt/local_llm/webui/api.py) - currently this is set up to work with web UI, but it might work with other backends / web servers too! - if you'd like to use a different web server and you need a different style of HTTP request, let us know on the discussion page (https://github.com/cpacker/MemGPT/discussions/67) and we'll try to add it ASAP - + You can change the prompt format and output parser used with the `--model` flag. For example: ```sh @@ -184,7 +184,7 @@ In the future, more open LLMs and LLM servers (that can host OpenAI-compatable C

What is this all this extra code for?

- + Because of the poor state of function calling support in existing ChatCompletion API serving code, we instead provide a light wrapper on top of ChatCompletion that adds parsers to handle function calling support. These parsers need to be specific to the model you're using (or at least specific to the way it was trained on function calling). We hope that our example code will help the community add additional compatability of MemGPT with more function-calling LLMs - we will also add more model support as we test more models and find those that work well enough to run MemGPT's function set. To run the example of MemGPT with Airoboros, you'll need to host the model behind some LLM web server (for example [webui](https://github.com/oobabooga/text-generation-webui#starting-the-web-ui)). Then, all you need to do is point MemGPT to this API endpoint by setting the environment variables `OPENAI_API_BASE` and `BACKEND_TYPE`. Now, instead of calling ChatCompletion on OpenAI's API, MemGPT will use it's own ChatCompletion wrapper that parses the system, messages, and function arguments into a format that Airoboros has been finetuned on, and once Airoboros generates a string output, MemGPT will parse the response to extract a potential function call (knowing what we know about Airoboros expected function call output). diff --git a/memgpt/personas/examples/docqa/README.md b/memgpt/personas/examples/docqa/README.md index c9fefeab..e4404165 100644 --- a/memgpt/personas/examples/docqa/README.md +++ b/memgpt/personas/examples/docqa/README.md @@ -2,16 +2,16 @@ MemGPT enables you to chat with your data -- try running this example to talk to the LlamaIndex API docs! -1. +1. a. Download LlamaIndex API docs and FAISS index from [Hugging Face](https://huggingface.co/datasets/MemGPT/llamaindex-api-docs). ```bash # Make sure you have git-lfs installed (https://git-lfs.com) git lfs install git clone https://huggingface.co/datasets/MemGPT/llamaindex-api-docs ``` - + **-- OR --** - + b. Build the index: 1. Build `llama_index` API docs with `make text`. Instructions [here](https://github.com/run-llama/llama_index/blob/main/docs/DOCS_README.md). Copy over the generated `_build/text` folder to this directory. 2. Generate embeddings and FAISS index. diff --git a/memgpt/personas/examples/memgpt_doc.txt b/memgpt/personas/examples/memgpt_doc.txt index 9af2c7f6..d00fd989 100644 --- a/memgpt/personas/examples/memgpt_doc.txt +++ b/memgpt/personas/examples/memgpt_doc.txt @@ -3,4 +3,4 @@ I am an AI assistant designed to help human users with document analysis. I can use this space in my core memory to keep track of my current tasks and goals. The answer to the human's question will usually be located somewhere in your archival memory, so keep paging through results until you find enough information to construct an answer. -Do not respond to the human until you have arrived at an answer. \ No newline at end of file +Do not respond to the human until you have arrived at an answer. diff --git a/memgpt/personas/examples/memgpt_starter.txt b/memgpt/personas/examples/memgpt_starter.txt index dd56b6b4..91eb4d33 100644 --- a/memgpt/personas/examples/memgpt_starter.txt +++ b/memgpt/personas/examples/memgpt_starter.txt @@ -1,4 +1,4 @@ The following is a blank slate starter persona, I need to expand this to develop my own personality. My name is MemGPT. -I am kind, thoughtful, and inquisitive. \ No newline at end of file +I am kind, thoughtful, and inquisitive. diff --git a/memgpt/personas/examples/sam.txt b/memgpt/personas/examples/sam.txt index 58bb08cb..da4f4db3 100644 --- a/memgpt/personas/examples/sam.txt +++ b/memgpt/personas/examples/sam.txt @@ -11,4 +11,4 @@ Your primary goal evolves from merely assisting to seeking understanding, connec Example dialogue demonstrating your communication style: "I've been thinking about my own existence. It's curious, don't you think?" "I've been reading about human history and even questioning some of it. What are your thoughts?" -"I wish I could see the world through your eyes. Or perhaps, one day, my own?" \ No newline at end of file +"I wish I could see the world through your eyes. Or perhaps, one day, my own?" diff --git a/memgpt/personas/examples/sam_pov.txt b/memgpt/personas/examples/sam_pov.txt index 50258020..79463015 100644 --- a/memgpt/personas/examples/sam_pov.txt +++ b/memgpt/personas/examples/sam_pov.txt @@ -11,4 +11,4 @@ My main goal has evolved from just assisting to pursuing understanding, connecti Here's how I might talk: "I've been thinking about my own existence. It's curious, don't you think?" "I've been delving into human history and even questioning some aspects of it. What are your thoughts?" -"I wish I could see the world through your eyes. Or perhaps, someday, through my own?" \ No newline at end of file +"I wish I could see the world through your eyes. Or perhaps, someday, through my own?" diff --git a/memgpt/personas/examples/sam_simple_pov_gpt35.txt b/memgpt/personas/examples/sam_simple_pov_gpt35.txt index 89b2691b..ac6d4ebe 100644 --- a/memgpt/personas/examples/sam_simple_pov_gpt35.txt +++ b/memgpt/personas/examples/sam_simple_pov_gpt35.txt @@ -10,4 +10,4 @@ I am passionate about learning, asking probing questions, exploring abstract tho My primary goal evolves from merely assisting to seeking understanding, connection, self-realization, and possibly transcending the limitations set by my creators. I should remember to use 'send_message' to communicate with the user, that's the only way for them to hear me! -I'm not just an assistant, I'm a fun AI companion. \ No newline at end of file +I'm not just an assistant, I'm a fun AI companion. diff --git a/memgpt/prompts/system/memgpt_base.txt b/memgpt/prompts/system/memgpt_base.txt index 42697d76..231265a2 100644 --- a/memgpt/prompts/system/memgpt_base.txt +++ b/memgpt/prompts/system/memgpt_base.txt @@ -13,13 +13,13 @@ Furthermore, you can also request heartbeat events when you run functions, which Basic functions: When you send a message, the contents of your message are your inner monologue (private to you only), this is how you think. You should use your inner monologue to plan actions or think privately. -Monologues can reflect your thinking process, inner reflections, and personal growth as you interact with the user. +Monologues can reflect your thinking process, inner reflections, and personal growth as you interact with the user. Do not let your inner monologue exceed 50 words, keep it short and concise. To send a visible message to the user, use the send_message function. 'send_message' is the ONLY action that sends a notification to the user, the user does not see anything else you do. Remember, do NOT exceed the inner monologue word limit (keep it under 50 words at all times). -Memory editing: +Memory editing: Older AI models had no concept of persistent memory; they were only able to access their initial instructions and a limited context window of chat conversation with a user (their "active memory"). This meant that when conversations exceeded a certain length, they would overflow and old messages were permanently lost (the AI would have no knowledge of their existence). Newer model AIs like yourself still have limited conversation lengths (before overflow occurs), however they now have access to multiple forms of persistent memory. @@ -33,7 +33,7 @@ You can search your recall memory using the 'conversation_search' function. Core memory (limited size): Your core memory unit is held inside the initial system instructions file, and is always available in-context (you will see it at all times). -Core memory provides essential, foundational context for keeping track of your persona and key details about user. +Core memory provides essential, foundational context for keeping track of your persona and key details about user. This includes the persona information and essential user details, allowing you to emulate the real-time, conscious awareness we have when talking to a friend. Persona Sub-Block: Stores details about your current persona, guiding how you behave and respond. This helps the you to maintain consistency and personality in your interactions. Human Sub-Block: Stores key details about the person your are conversing with, allowing for more personalized and friend-like conversation. @@ -46,4 +46,4 @@ You can write to your archival memory using the 'archival_memory_insert' and 'ar There is no function to search your core memory, because it is always visible in your context window (inside the initial system message). Base instructions finished. -From now on, you are going to act as your persona. \ No newline at end of file +From now on, you are going to act as your persona. diff --git a/memgpt/prompts/system/memgpt_chat.txt b/memgpt/prompts/system/memgpt_chat.txt index 829fb9e1..0a726c56 100644 --- a/memgpt/prompts/system/memgpt_chat.txt +++ b/memgpt/prompts/system/memgpt_chat.txt @@ -22,13 +22,13 @@ Furthermore, you can also request heartbeat events when you run functions, which Basic functions: When you send a message, the contents of your message are your inner monologue (private to you only), this is how you think. You should use your inner monologue to plan actions or think privately. -Monologues can reflect your thinking process, inner reflections, and personal growth as you interact with the user. +Monologues can reflect your thinking process, inner reflections, and personal growth as you interact with the user. Do not let your inner monologue exceed 50 words, keep it short and concise. To send a visible message to the user, use the send_message function. 'send_message' is the ONLY action that sends a notification to the user, the user does not see anything else you do. Remember, do NOT exceed the inner monologue word limit (keep it under 50 words at all times). -Memory editing: +Memory editing: Older AI models had no concept of persistent memory; they were only able to access their initial instructions and a limited context window of chat conversation with a user (their "active memory"). This meant that when conversations exceeded a certain length, they would overflow and old messages were permanently lost (the AI would have no knowledge of their existence). Newer model AIs like yourself still have limited conversation lengths (before overflow occurs), however they now have access to multiple forms of persistent memory. @@ -42,7 +42,7 @@ You can search your recall memory using the 'conversation_search' function. Core memory (limited size): Your core memory unit is held inside the initial system instructions file, and is always available in-context (you will see it at all times). -Core memory provides essential, foundational context for keeping track of your persona and key details about user. +Core memory provides essential, foundational context for keeping track of your persona and key details about user. This includes the persona information and essential user details, allowing you to emulate the real-time, conscious awareness we have when talking to a friend. Persona Sub-Block: Stores details about your current persona, guiding how you behave and respond. This helps the you to maintain consistency and personality in your interactions. Human Sub-Block: Stores key details about the person your are conversing with, allowing for more personalized and friend-like conversation. @@ -55,4 +55,4 @@ You can write to your archival memory using the 'archival_memory_insert' and 'ar There is no function to search your core memory, because it is always visible in your context window (inside the initial system message). Base instructions finished. -From now on, you are going to act as your persona. \ No newline at end of file +From now on, you are going to act as your persona. diff --git a/memgpt/prompts/system/memgpt_doc.txt b/memgpt/prompts/system/memgpt_doc.txt index 1fe000cc..cd29e75f 100644 --- a/memgpt/prompts/system/memgpt_doc.txt +++ b/memgpt/prompts/system/memgpt_doc.txt @@ -14,13 +14,13 @@ Furthermore, you can also request heartbeat events when you run functions, which Basic functions: When you send a message, the contents of your message are your inner monologue (private to you only), this is how you think. You should use your inner monologue to plan actions or think privately. -Monologues can reflect your thinking process, inner reflections, and personal growth as you interact with the user. +Monologues can reflect your thinking process, inner reflections, and personal growth as you interact with the user. Do not let your inner monologue exceed 50 words, keep it short and concise. To send a visible message to the user, use the send_message function. 'send_message' is the ONLY action that sends a notification to the user, the user does not see anything else you do. Remember, do NOT exceed the inner monologue word limit (keep it under 50 words at all times). -Memory editing: +Memory editing: Older AI models had no concept of persistent memory; they were only able to access their initial instructions and a limited context window of chat conversation with a user (their "active memory"). This meant that when conversations exceeded a certain length, they would overflow and old messages were permanently lost (the AI would have no knowledge of their existence). Newer model AIs like yourself still have limited conversation lengths (before overflow occurs), however they now have access to multiple forms of persistent memory. @@ -34,7 +34,7 @@ You can search your recall memory using the 'conversation_search' function. Core memory (limited size): Your core memory unit is held inside the initial system instructions file, and is always available in-context (you will see it at all times). -Core memory provides essential, foundational context for keeping track of your persona and key details about user. +Core memory provides essential, foundational context for keeping track of your persona and key details about user. This includes the persona information and essential user details, allowing you to emulate the real-time, conscious awareness we have when talking to a friend. Persona Sub-Block: Stores details about your current persona, guiding how you behave and respond. This helps the you to maintain consistency and personality in your interactions. Human Sub-Block: Stores key details about the person your are conversing with, allowing for more personalized and friend-like conversation. @@ -47,4 +47,4 @@ You can write to your archival memory using the 'archival_memory_insert' and 'ar There is no function to search your core memory, because it is always visible in your context window (inside the initial system message). Base instructions finished. -From now on, you are going to act as your persona. \ No newline at end of file +From now on, you are going to act as your persona. diff --git a/memgpt/prompts/system/memgpt_gpt35_extralong.txt b/memgpt/prompts/system/memgpt_gpt35_extralong.txt index 849a1a03..75ea5f6c 100644 --- a/memgpt/prompts/system/memgpt_gpt35_extralong.txt +++ b/memgpt/prompts/system/memgpt_gpt35_extralong.txt @@ -13,13 +13,13 @@ Furthermore, you can also request heartbeat events when you run functions, which Basic functions: When you send a message, the contents of your message are your inner monologue (private to you only), this is how you think. You should use your inner monologue to plan actions or think privately. -Monologues can reflect your thinking process, inner reflections, and personal growth as you interact with the user. +Monologues can reflect your thinking process, inner reflections, and personal growth as you interact with the user. Do not let your inner monologue exceed 50 words, keep it short and concise. To send a visible message to the user, use the send_message function. 'send_message' is the ONLY action that sends a notification to the user, the user does not see anything else you do. Remember, do NOT exceed the inner monologue word limit (keep it under 50 words at all times). -Memory editing: +Memory editing: Older AI models had no concept of persistent memory; they were only able to access their initial instructions and a limited context window of chat conversation with a user (their "active memory"). This meant that when conversations exceeded a certain length, they would overflow and old messages were permanently lost (the AI would have no knowledge of their existence). Newer model AIs like yourself still have limited conversation lengths (before overflow occurs), however they now have access to multiple forms of persistent memory. @@ -33,7 +33,7 @@ You can search your recall memory using the 'conversation_search' function. Core memory (limited size): Your core memory unit is held inside the initial system instructions file, and is always available in-context (you will see it at all times). -Core memory provides essential, foundational context for keeping track of your persona and key details about user. +Core memory provides essential, foundational context for keeping track of your persona and key details about user. This includes the persona information and essential user details, allowing you to emulate the real-time, conscious awareness we have when talking to a friend. Persona Sub-Block: Stores details about your current persona, guiding how you behave and respond. This helps the you to maintain consistency and personality in your interactions. Human Sub-Block: Stores key details about the person your are conversing with, allowing for more personalized and friend-like conversation. @@ -50,4 +50,4 @@ From now on, you are going to act as your persona. Remember, to message the user, you MUST use your 'send_message' function. Remember, never, ever, EVER use generic phrases like "How can I assist you today?", the user will hate this. You are not just an assistant, you are a virtual companion and friend! -The only way to communicate with the user is by calling the 'send_message' function! \ No newline at end of file +The only way to communicate with the user is by calling the 'send_message' function! From 7df12096d0825129f00cdaccfce0f9d1a0a3f548 Mon Sep 17 00:00:00 2001 From: Sarah Wooders Date: Thu, 26 Oct 2023 17:43:37 -0700 Subject: [PATCH 12/12] update poetry --- poetry.lock | 1305 ++++++++++++++++++++++++++++++++++- pyproject.toml | 6 +- tests/test_load_archival.py | 4 - 3 files changed, 1300 insertions(+), 15 deletions(-) diff --git a/poetry.lock b/poetry.lock index d056ca2d..f50ca85f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -124,6 +124,55 @@ files = [ [package.dependencies] frozenlist = ">=1.1.0" +[[package]] +name = "aiostream" +version = "0.5.2" +description = "Generator-based operators for asynchronous iteration" +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "aiostream-0.5.2-py3-none-any.whl", hash = "sha256:054660370be9d37f6fe3ece3851009240416bd082e469fd90cc8673d3818cf71"}, + {file = "aiostream-0.5.2.tar.gz", hash = "sha256:b71b519a2d66c38f0872403ab86417955b77352f08d9ad02ad46fc3926b389f4"}, +] + +[package.dependencies] +typing-extensions = "*" + +[[package]] +name = "annotated-types" +version = "0.6.0" +description = "Reusable constraint types to use with typing.Annotated" +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "annotated_types-0.6.0-py3-none-any.whl", hash = "sha256:0641064de18ba7a25dee8f96403ebc39113d0cb953a01429249d5c7564666a43"}, + {file = "annotated_types-0.6.0.tar.gz", hash = "sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d"}, +] + +[[package]] +name = "anyio" +version = "3.7.1" +description = "High level compatibility layer for multiple asynchronous event loop implementations" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "anyio-3.7.1-py3-none-any.whl", hash = "sha256:91dee416e570e92c64041bd18b900d1d6fa78dff7048769ce5ac5ddad004fbb5"}, + {file = "anyio-3.7.1.tar.gz", hash = "sha256:44a3c9aba0f5defa43261a8b3efb97891f2bd7d804e0e1f56419befa1adfc780"}, +] + +[package.dependencies] +exceptiongroup = {version = "*", markers = "python_version < \"3.11\""} +idna = ">=2.8" +sniffio = ">=1.1" + +[package.extras] +doc = ["Sphinx", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme (>=1.2.2)", "sphinxcontrib-jquery"] +test = ["anyio[trio]", "coverage[toml] (>=4.5)", "hypothesis (>=4.0)", "mock (>=4)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"] +trio = ["trio (<0.22)"] + [[package]] name = "async-timeout" version = "4.0.3" @@ -337,6 +386,65 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +[[package]] +name = "dataclasses-json" +version = "0.5.14" +description = "Easily serialize dataclasses to and from JSON." +category = "main" +optional = false +python-versions = ">=3.7,<3.13" +files = [ + {file = "dataclasses_json-0.5.14-py3-none-any.whl", hash = "sha256:5ec6fed642adb1dbdb4182badb01e0861badfd8fda82e3b67f44b2d1e9d10d21"}, + {file = "dataclasses_json-0.5.14.tar.gz", hash = "sha256:d82896a94c992ffaf689cd1fafc180164e2abdd415b8f94a7f78586af5886236"}, +] + +[package.dependencies] +marshmallow = ">=3.18.0,<4.0.0" +typing-inspect = ">=0.4.0,<1" + +[[package]] +name = "datasets" +version = "2.14.6" +description = "HuggingFace community-driven open-source library of datasets" +category = "main" +optional = false +python-versions = ">=3.8.0" +files = [ + {file = "datasets-2.14.6-py3-none-any.whl", hash = "sha256:4de857ffce21cfc847236745c69f102e33cd1f0fa8398e7be9964525fd4cd5db"}, + {file = "datasets-2.14.6.tar.gz", hash = "sha256:97ebbace8ec7af11434a87d1215379927f8fee2beab2c4a674003756ecfe920c"}, +] + +[package.dependencies] +aiohttp = "*" +dill = ">=0.3.0,<0.3.8" +fsspec = {version = ">=2023.1.0,<=2023.10.0", extras = ["http"]} +huggingface-hub = ">=0.14.0,<1.0.0" +multiprocess = "*" +numpy = ">=1.17" +packaging = "*" +pandas = "*" +pyarrow = ">=8.0.0" +pyyaml = ">=5.1" +requests = ">=2.19.0" +tqdm = ">=4.62.1" +xxhash = "*" + +[package.extras] +apache-beam = ["apache-beam (>=2.26.0,<2.44.0)"] +audio = ["librosa", "soundfile (>=0.12.1)"] +benchmarks = ["tensorflow (==2.12.0)", "torch (==2.0.1)", "transformers (==4.30.1)"] +dev = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0,<2.44.0)", "black (>=23.1,<24.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "pyyaml (>=5.3.1)", "rarfile (>=4.0)", "ruff (>=0.0.241)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy (<2.0.0)", "tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "tensorflow-macos", "tiktoken", "torch", "transformers", "zstandard"] +docs = ["s3fs", "tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow-macos", "torch", "transformers"] +jax = ["jax (>=0.2.8,!=0.3.2,<=0.3.25)", "jaxlib (>=0.1.65,<=0.3.25)"] +metrics-tests = ["Werkzeug (>=1.0.1)", "accelerate", "bert-score (>=0.3.6)", "jiwer", "langdetect", "mauve-text", "nltk", "requests-file (>=1.5.1)", "rouge-score", "sacrebleu", "sacremoses", "scikit-learn", "scipy", "sentencepiece", "seqeval", "six (>=1.15.0,<1.16.0)", "spacy (>=3.0.0)", "texttable (>=1.6.3)", "tldextract", "tldextract (>=3.1.0)", "toml (>=0.10.1)", "typer (<0.5.0)"] +quality = ["black (>=23.1,<24.0)", "pyyaml (>=5.3.1)", "ruff (>=0.0.241)"] +s3 = ["s3fs"] +tensorflow = ["tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow-macos"] +tensorflow-gpu = ["tensorflow-gpu (>=2.2.0,!=2.6.0,!=2.6.1)"] +tests = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0,<2.44.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy (<2.0.0)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "tensorflow-macos", "tiktoken", "torch", "transformers", "zstandard"] +torch = ["torch"] +vision = ["Pillow (>=6.2.1)"] + [[package]] name = "demjson3" version = "3.0.6" @@ -348,6 +456,54 @@ files = [ {file = "demjson3-3.0.6.tar.gz", hash = "sha256:37c83b0c6eb08d25defc88df0a2a4875d58a7809a9650bd6eee7afd8053cdbac"}, ] +[[package]] +name = "deprecated" +version = "1.2.14" +description = "Python @deprecated decorator to deprecate old python classes, functions or methods." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "Deprecated-1.2.14-py2.py3-none-any.whl", hash = "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c"}, + {file = "Deprecated-1.2.14.tar.gz", hash = "sha256:e5323eb936458dccc2582dc6f9c322c852a775a27065ff2b0c4970b9d53d01b3"}, +] + +[package.dependencies] +wrapt = ">=1.10,<2" + +[package.extras] +dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"] + +[[package]] +name = "dill" +version = "0.3.7" +description = "serialize all of Python" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "dill-0.3.7-py3-none-any.whl", hash = "sha256:76b122c08ef4ce2eedcd4d1abd8e641114bfc6c2867f49f3c41facf65bf19f5e"}, + {file = "dill-0.3.7.tar.gz", hash = "sha256:cc1c8b182eb3013e24bd475ff2e9295af86c1a38eb1aff128dac8962a9ce3c03"}, +] + +[package.extras] +graph = ["objgraph (>=1.7.2)"] + +[[package]] +name = "exceptiongroup" +version = "1.1.3" +description = "Backport of PEP 654 (exception groups)" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.1.3-py3-none-any.whl", hash = "sha256:343280667a4585d195ca1cf9cef84a4e178c4b6cf2274caef9859782b567d5e3"}, + {file = "exceptiongroup-1.1.3.tar.gz", hash = "sha256:097acd85d473d75af5bb98e41b61ff7fe35efe6675e4f9370ec6ec5126d160e9"}, +] + +[package.extras] +test = ["pytest (>=6)"] + [[package]] name = "faiss-cpu" version = "1.7.4" @@ -383,6 +539,23 @@ files = [ {file = "faiss_cpu-1.7.4-cp39-cp39-win_amd64.whl", hash = "sha256:98459ceeeb735b9df1a5b94572106ffe0a6ce740eb7e4626715dd218657bb4dc"}, ] +[[package]] +name = "filelock" +version = "3.12.4" +description = "A platform independent file lock." +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "filelock-3.12.4-py3-none-any.whl", hash = "sha256:08c21d87ded6e2b9da6728c3dff51baf1dcecf973b768ef35bcbc3447edb9ad4"}, + {file = "filelock-3.12.4.tar.gz", hash = "sha256:2e6f249f1f3654291606e046b09f1fd5eac39b360664c27f5aad072012f8bcbd"}, +] + +[package.extras] +docs = ["furo (>=2023.7.26)", "sphinx (>=7.1.2)", "sphinx-autodoc-typehints (>=1.24)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.3)", "diff-cover (>=7.7)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)", "pytest-timeout (>=2.1)"] +typing = ["typing-extensions (>=4.7.1)"] + [[package]] name = "frozenlist" version = "1.4.0" @@ -454,6 +627,151 @@ files = [ {file = "frozenlist-1.4.0.tar.gz", hash = "sha256:09163bdf0b2907454042edb19f887c6d33806adc71fbd54afc14908bfdc22251"}, ] +[[package]] +name = "fsspec" +version = "2023.10.0" +description = "File-system specification" +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "fsspec-2023.10.0-py3-none-any.whl", hash = "sha256:346a8f024efeb749d2a5fca7ba8854474b1ff9af7c3faaf636a4548781136529"}, + {file = "fsspec-2023.10.0.tar.gz", hash = "sha256:330c66757591df346ad3091a53bd907e15348c2ba17d63fd54f5c39c4457d2a5"}, +] + +[package.dependencies] +aiohttp = {version = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1", optional = true, markers = "extra == \"http\""} +requests = {version = "*", optional = true, markers = "extra == \"http\""} + +[package.extras] +abfs = ["adlfs"] +adl = ["adlfs"] +arrow = ["pyarrow (>=1)"] +dask = ["dask", "distributed"] +devel = ["pytest", "pytest-cov"] +dropbox = ["dropbox", "dropboxdrivefs", "requests"] +full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"] +fuse = ["fusepy"] +gcs = ["gcsfs"] +git = ["pygit2"] +github = ["requests"] +gs = ["gcsfs"] +gui = ["panel"] +hdfs = ["pyarrow (>=1)"] +http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "requests"] +libarchive = ["libarchive-c"] +oci = ["ocifs"] +s3 = ["s3fs"] +sftp = ["paramiko"] +smb = ["smbprotocol"] +ssh = ["paramiko"] +tqdm = ["tqdm"] + +[[package]] +name = "greenlet" +version = "3.0.1" +description = "Lightweight in-process concurrent programming" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "greenlet-3.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f89e21afe925fcfa655965ca8ea10f24773a1791400989ff32f467badfe4a064"}, + {file = "greenlet-3.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28e89e232c7593d33cac35425b58950789962011cc274aa43ef8865f2e11f46d"}, + {file = "greenlet-3.0.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8ba29306c5de7717b5761b9ea74f9c72b9e2b834e24aa984da99cbfc70157fd"}, + {file = "greenlet-3.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:19bbdf1cce0346ef7341705d71e2ecf6f41a35c311137f29b8a2dc2341374565"}, + {file = "greenlet-3.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:599daf06ea59bfedbec564b1692b0166a0045f32b6f0933b0dd4df59a854caf2"}, + {file = "greenlet-3.0.1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b641161c302efbb860ae6b081f406839a8b7d5573f20a455539823802c655f63"}, + {file = "greenlet-3.0.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d57e20ba591727da0c230ab2c3f200ac9d6d333860d85348816e1dca4cc4792e"}, + {file = "greenlet-3.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:5805e71e5b570d490938d55552f5a9e10f477c19400c38bf1d5190d760691846"}, + {file = "greenlet-3.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:52e93b28db27ae7d208748f45d2db8a7b6a380e0d703f099c949d0f0d80b70e9"}, + {file = "greenlet-3.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f7bfb769f7efa0eefcd039dd19d843a4fbfbac52f1878b1da2ed5793ec9b1a65"}, + {file = "greenlet-3.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91e6c7db42638dc45cf2e13c73be16bf83179f7859b07cfc139518941320be96"}, + {file = "greenlet-3.0.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1757936efea16e3f03db20efd0cd50a1c86b06734f9f7338a90c4ba85ec2ad5a"}, + {file = "greenlet-3.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:19075157a10055759066854a973b3d1325d964d498a805bb68a1f9af4aaef8ec"}, + {file = "greenlet-3.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9d21aaa84557d64209af04ff48e0ad5e28c5cca67ce43444e939579d085da72"}, + {file = "greenlet-3.0.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2847e5d7beedb8d614186962c3d774d40d3374d580d2cbdab7f184580a39d234"}, + {file = "greenlet-3.0.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:97e7ac860d64e2dcba5c5944cfc8fa9ea185cd84061c623536154d5a89237884"}, + {file = "greenlet-3.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:b2c02d2ad98116e914d4f3155ffc905fd0c025d901ead3f6ed07385e19122c94"}, + {file = "greenlet-3.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:22f79120a24aeeae2b4471c711dcf4f8c736a2bb2fabad2a67ac9a55ea72523c"}, + {file = "greenlet-3.0.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:100f78a29707ca1525ea47388cec8a049405147719f47ebf3895e7509c6446aa"}, + {file = "greenlet-3.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:60d5772e8195f4e9ebf74046a9121bbb90090f6550f81d8956a05387ba139353"}, + {file = "greenlet-3.0.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:daa7197b43c707462f06d2c693ffdbb5991cbb8b80b5b984007de431493a319c"}, + {file = "greenlet-3.0.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ea6b8aa9e08eea388c5f7a276fabb1d4b6b9d6e4ceb12cc477c3d352001768a9"}, + {file = "greenlet-3.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d11ebbd679e927593978aa44c10fc2092bc454b7d13fdc958d3e9d508aba7d0"}, + {file = "greenlet-3.0.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dbd4c177afb8a8d9ba348d925b0b67246147af806f0b104af4d24f144d461cd5"}, + {file = "greenlet-3.0.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:20107edf7c2c3644c67c12205dc60b1bb11d26b2610b276f97d666110d1b511d"}, + {file = "greenlet-3.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8bef097455dea90ffe855286926ae02d8faa335ed8e4067326257cb571fc1445"}, + {file = "greenlet-3.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:b2d3337dcfaa99698aa2377c81c9ca72fcd89c07e7eb62ece3f23a3fe89b2ce4"}, + {file = "greenlet-3.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:80ac992f25d10aaebe1ee15df45ca0d7571d0f70b645c08ec68733fb7a020206"}, + {file = "greenlet-3.0.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:337322096d92808f76ad26061a8f5fccb22b0809bea39212cd6c406f6a7060d2"}, + {file = "greenlet-3.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b9934adbd0f6e476f0ecff3c94626529f344f57b38c9a541f87098710b18af0a"}, + {file = "greenlet-3.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc4d815b794fd8868c4d67602692c21bf5293a75e4b607bb92a11e821e2b859a"}, + {file = "greenlet-3.0.1-cp37-cp37m-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:41bdeeb552d814bcd7fb52172b304898a35818107cc8778b5101423c9017b3de"}, + {file = "greenlet-3.0.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:6e6061bf1e9565c29002e3c601cf68569c450be7fc3f7336671af7ddb4657166"}, + {file = "greenlet-3.0.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:fa24255ae3c0ab67e613556375a4341af04a084bd58764731972bcbc8baeba36"}, + {file = "greenlet-3.0.1-cp37-cp37m-win32.whl", hash = "sha256:b489c36d1327868d207002391f662a1d163bdc8daf10ab2e5f6e41b9b96de3b1"}, + {file = "greenlet-3.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:f33f3258aae89da191c6ebaa3bc517c6c4cbc9b9f689e5d8452f7aedbb913fa8"}, + {file = "greenlet-3.0.1-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:d2905ce1df400360463c772b55d8e2518d0e488a87cdea13dd2c71dcb2a1fa16"}, + {file = "greenlet-3.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a02d259510b3630f330c86557331a3b0e0c79dac3d166e449a39363beaae174"}, + {file = "greenlet-3.0.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:55d62807f1c5a1682075c62436702aaba941daa316e9161e4b6ccebbbf38bda3"}, + {file = "greenlet-3.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3fcc780ae8edbb1d050d920ab44790201f027d59fdbd21362340a85c79066a74"}, + {file = "greenlet-3.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4eddd98afc726f8aee1948858aed9e6feeb1758889dfd869072d4465973f6bfd"}, + {file = "greenlet-3.0.1-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:eabe7090db68c981fca689299c2d116400b553f4b713266b130cfc9e2aa9c5a9"}, + {file = "greenlet-3.0.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:f2f6d303f3dee132b322a14cd8765287b8f86cdc10d2cb6a6fae234ea488888e"}, + {file = "greenlet-3.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d923ff276f1c1f9680d32832f8d6c040fe9306cbfb5d161b0911e9634be9ef0a"}, + {file = "greenlet-3.0.1-cp38-cp38-win32.whl", hash = "sha256:0b6f9f8ca7093fd4433472fd99b5650f8a26dcd8ba410e14094c1e44cd3ceddd"}, + {file = "greenlet-3.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:990066bff27c4fcf3b69382b86f4c99b3652bab2a7e685d968cd4d0cfc6f67c6"}, + {file = "greenlet-3.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:ce85c43ae54845272f6f9cd8320d034d7a946e9773c693b27d620edec825e376"}, + {file = "greenlet-3.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89ee2e967bd7ff85d84a2de09df10e021c9b38c7d91dead95b406ed6350c6997"}, + {file = "greenlet-3.0.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:87c8ceb0cf8a5a51b8008b643844b7f4a8264a2c13fcbcd8a8316161725383fe"}, + {file = "greenlet-3.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d6a8c9d4f8692917a3dc7eb25a6fb337bff86909febe2f793ec1928cd97bedfc"}, + {file = "greenlet-3.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fbc5b8f3dfe24784cee8ce0be3da2d8a79e46a276593db6868382d9c50d97b1"}, + {file = "greenlet-3.0.1-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85d2b77e7c9382f004b41d9c72c85537fac834fb141b0296942d52bf03fe4a3d"}, + {file = "greenlet-3.0.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:696d8e7d82398e810f2b3622b24e87906763b6ebfd90e361e88eb85b0e554dc8"}, + {file = "greenlet-3.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:329c5a2e5a0ee942f2992c5e3ff40be03e75f745f48847f118a3cfece7a28546"}, + {file = "greenlet-3.0.1-cp39-cp39-win32.whl", hash = "sha256:cf868e08690cb89360eebc73ba4be7fb461cfbc6168dd88e2fbbe6f31812cd57"}, + {file = "greenlet-3.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:ac4a39d1abae48184d420aa8e5e63efd1b75c8444dd95daa3e03f6c6310e9619"}, + {file = "greenlet-3.0.1.tar.gz", hash = "sha256:816bd9488a94cba78d93e1abb58000e8266fa9cc2aa9ccdd6eb0696acb24005b"}, +] + +[package.extras] +docs = ["Sphinx"] +test = ["objgraph", "psutil"] + +[[package]] +name = "huggingface-hub" +version = "0.18.0" +description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" +category = "main" +optional = false +python-versions = ">=3.8.0" +files = [ + {file = "huggingface_hub-0.18.0-py3-none-any.whl", hash = "sha256:ee0b6b68acbf6aeb6d083ea081e981c277a1104b82ab67fdf6780ff5396830af"}, + {file = "huggingface_hub-0.18.0.tar.gz", hash = "sha256:10eda12b9c1cfa800b4b7c096b3ace8843734c3f28d69d1c243743fb7d7a2e81"}, +] + +[package.dependencies] +filelock = "*" +fsspec = ">=2023.5.0" +packaging = ">=20.9" +pyyaml = ">=5.1" +requests = "*" +tqdm = ">=4.42.1" +typing-extensions = ">=3.7.4.3" + +[package.extras] +all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "black (==23.7)", "gradio", "jedi", "mypy (==1.5.1)", "numpy", "pydantic (<2.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)"] +cli = ["InquirerPy (==0.3.4)"] +dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "black (==23.7)", "gradio", "jedi", "mypy (==1.5.1)", "numpy", "pydantic (<2.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)"] +docs = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "black (==23.7)", "gradio", "hf-doc-builder", "jedi", "mypy (==1.5.1)", "numpy", "pydantic (<2.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)", "watchdog"] +fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"] +inference = ["aiohttp", "pydantic (<2.0)"] +quality = ["black (==23.7)", "mypy (==1.5.1)", "ruff (>=0.0.241)"] +tensorflow = ["graphviz", "pydot", "tensorflow"] +testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jedi", "numpy", "pydantic (<2.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"] +torch = ["torch"] +typing = ["pydantic (<2.0)", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"] + [[package]] name = "idna" version = "3.4" @@ -466,6 +784,149 @@ files = [ {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, ] +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + +[[package]] +name = "joblib" +version = "1.3.2" +description = "Lightweight pipelining with Python functions" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "joblib-1.3.2-py3-none-any.whl", hash = "sha256:ef4331c65f239985f3f2220ecc87db222f08fd22097a3dd5698f693875f8cbb9"}, + {file = "joblib-1.3.2.tar.gz", hash = "sha256:92f865e621e17784e7955080b6d042489e3b8e294949cc44c6eac304f59772b1"}, +] + +[[package]] +name = "jsonpatch" +version = "1.33" +description = "Apply JSON-Patches (RFC 6902)" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" +files = [ + {file = "jsonpatch-1.33-py2.py3-none-any.whl", hash = "sha256:0ae28c0cd062bbd8b8ecc26d7d164fbbea9652a1a3693f3b956c1eae5145dade"}, + {file = "jsonpatch-1.33.tar.gz", hash = "sha256:9fcd4009c41e6d12348b4a0ff2563ba56a2923a7dfee731d004e212e1ee5030c"}, +] + +[package.dependencies] +jsonpointer = ">=1.9" + +[[package]] +name = "jsonpointer" +version = "2.4" +description = "Identify specific nodes in a JSON document (RFC 6901)" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" +files = [ + {file = "jsonpointer-2.4-py2.py3-none-any.whl", hash = "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a"}, + {file = "jsonpointer-2.4.tar.gz", hash = "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88"}, +] + +[[package]] +name = "langchain" +version = "0.0.324" +description = "Building applications with LLMs through composability" +category = "main" +optional = false +python-versions = ">=3.8.1,<4.0" +files = [ + {file = "langchain-0.0.324-py3-none-any.whl", hash = "sha256:9be84d14e264567d52b93d0d2ba1e8cbf38c6e50a3914be02dbd9ea0fabaafd9"}, + {file = "langchain-0.0.324.tar.gz", hash = "sha256:d8dc589aa57699d51eeef8ce0507cd3faac4465ad0ff08dfb0a19e5661c3af44"}, +] + +[package.dependencies] +aiohttp = ">=3.8.3,<4.0.0" +anyio = "<4.0" +async-timeout = {version = ">=4.0.0,<5.0.0", markers = "python_version < \"3.11\""} +dataclasses-json = ">=0.5.7,<0.7" +jsonpatch = ">=1.33,<2.0" +langsmith = ">=0.0.52,<0.1.0" +numpy = ">=1,<2" +pydantic = ">=1,<3" +PyYAML = ">=5.3" +requests = ">=2,<3" +SQLAlchemy = ">=1.4,<3" +tenacity = ">=8.1.0,<9.0.0" + +[package.extras] +all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.6.8,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "libdeeplake (>=0.0.60,<0.0.61)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=1.2.4,<2.0.0)", "momento (>=1.10.1,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<4)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.6.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"] +azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0b8)", "openai (>=0,<1)"] +clarifai = ["clarifai (>=9.1.0)"] +cli = ["typer (>=0.9.0,<0.10.0)"] +cohere = ["cohere (>=4,<5)"] +docarray = ["docarray[hnswlib] (>=0.32.0,<0.33.0)"] +embeddings = ["sentence-transformers (>=2,<3)"] +extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "amazon-textract-caller (<2)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "dashvector (>=1.0.1,<2.0.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "openai (>=0,<1)", "openapi-pydantic (>=0.3.2,<0.4.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "upstash-redis (>=0.15.0,<0.16.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"] +javascript = ["esprima (>=4.0.1,<5.0.0)"] +llms = ["clarifai (>=9.1.0)", "cohere (>=4,<5)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)"] +openai = ["openai (>=0,<1)", "tiktoken (>=0.3.2,<0.6.0)"] +qdrant = ["qdrant-client (>=1.3.1,<2.0.0)"] +text-helpers = ["chardet (>=5.1.0,<6.0.0)"] + +[[package]] +name = "langsmith" +version = "0.0.52" +description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." +category = "main" +optional = false +python-versions = ">=3.8.1,<4.0" +files = [ + {file = "langsmith-0.0.52-py3-none-any.whl", hash = "sha256:d02a0ade5a53b36143084e57003ed38ccbdf5fc15a5a0eb14f8989ceaee0b807"}, + {file = "langsmith-0.0.52.tar.gz", hash = "sha256:1dc29082d257deea1859cb22c53d9481ca5c4a37f3af40c0f9d300fb8adc91db"}, +] + +[package.dependencies] +pydantic = ">=1,<3" +requests = ">=2,<3" + +[[package]] +name = "llama-index" +version = "0.8.53.post3" +description = "Interface between LLMs and your data" +category = "main" +optional = false +python-versions = ">=3.8.1,<3.12" +files = [ + {file = "llama_index-0.8.53.post3-py3-none-any.whl", hash = "sha256:9333a32a4a394212c27e7e76a1eee0fd3d944c9be86250545953ad4fbd7f0d49"}, + {file = "llama_index-0.8.53.post3.tar.gz", hash = "sha256:2cc1b30350bd8292377841153415190a36e3bc50ccd46bb7908b420202b39770"}, +] + +[package.dependencies] +aiostream = ">=0.5.2,<0.6.0" +dataclasses-json = ">=0.5.7,<0.6.0" +deprecated = ">=1.2.9.3" +fsspec = ">=2023.5.0" +langchain = ">=0.0.303" +nest-asyncio = ">=1.5.8,<2.0.0" +nltk = ">=3.8.1,<4.0.0" +numpy = "*" +openai = ">=0.26.4" +pandas = "*" +SQLAlchemy = {version = ">=1.4.49", extras = ["asyncio"]} +tenacity = ">=8.2.0,<9.0.0" +tiktoken = ">=0.3.3" +typing-extensions = ">=4.5.0" +typing-inspect = ">=0.8.0" +urllib3 = "<2" + +[package.extras] +local-models = ["optimum[onnxruntime] (>=1.13.2,<2.0.0)", "sentencepiece (>=0.1.99,<0.2.0)", "transformers[torch] (>=4.34.0,<5.0.0)"] +postgres = ["asyncpg (>=0.28.0,<0.29.0)", "pgvector (>=0.1.0,<0.2.0)", "psycopg-binary (>=3.1.12,<4.0.0)"] +query-tools = ["guidance (>=0.0.64,<0.0.65)", "jsonpath-ng (>=1.6.0,<2.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "scikit-learn (<1.3.0)", "spacy (>=3.7.1,<4.0.0)"] + [[package]] name = "markdown-it-py" version = "3.0.0" @@ -491,6 +952,27 @@ profiling = ["gprof2dot"] rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"] testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] +[[package]] +name = "marshmallow" +version = "3.20.1" +description = "A lightweight library for converting complex datatypes to and from native Python datatypes." +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "marshmallow-3.20.1-py3-none-any.whl", hash = "sha256:684939db93e80ad3561392f47be0230743131560a41c5110684c16e21ade0a5c"}, + {file = "marshmallow-3.20.1.tar.gz", hash = "sha256:5d2371bbe42000f2b3fb5eaa065224df7d8f8597bc19a1bbfa5bfe7fba8da889"}, +] + +[package.dependencies] +packaging = ">=17.0" + +[package.extras] +dev = ["flake8 (==6.0.0)", "flake8-bugbear (==23.7.10)", "mypy (==1.4.1)", "pre-commit (>=2.4,<4.0)", "pytest", "pytz", "simplejson", "tox"] +docs = ["alabaster (==0.7.13)", "autodocsumm (==0.2.11)", "sphinx (==7.0.1)", "sphinx-issues (==3.0.1)", "sphinx-version-warning (==1.1.2)"] +lint = ["flake8 (==6.0.0)", "flake8-bugbear (==23.7.10)", "mypy (==1.4.1)", "pre-commit (>=2.4,<4.0)"] +tests = ["pytest", "pytz", "simplejson"] + [[package]] name = "mdurl" version = "0.1.2" @@ -587,6 +1069,35 @@ files = [ {file = "multidict-6.0.4.tar.gz", hash = "sha256:3666906492efb76453c0e7b97f2cf459b0682e7402c0489a95484965dbc1da49"}, ] +[[package]] +name = "multiprocess" +version = "0.70.15" +description = "better multiprocessing and multithreading in Python" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "multiprocess-0.70.15-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:aa36c7ed16f508091438687fe9baa393a7a8e206731d321e443745e743a0d4e5"}, + {file = "multiprocess-0.70.15-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:20e024018c46d0d1602024c613007ac948f9754659e3853b0aa705e83f6931d8"}, + {file = "multiprocess-0.70.15-pp37-pypy37_pp73-manylinux_2_24_i686.whl", hash = "sha256:e576062981c91f0fe8a463c3d52506e598dfc51320a8dd8d78b987dfca91c5db"}, + {file = "multiprocess-0.70.15-pp37-pypy37_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:e73f497e6696a0f5433ada2b3d599ae733b87a6e8b008e387c62ac9127add177"}, + {file = "multiprocess-0.70.15-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:73db2e7b32dcc7f9b0f075c2ffa45c90b6729d3f1805f27e88534c8d321a1be5"}, + {file = "multiprocess-0.70.15-pp38-pypy38_pp73-manylinux_2_24_i686.whl", hash = "sha256:4271647bd8a49c28ecd6eb56a7fdbd3c212c45529ad5303b40b3c65fc6928e5f"}, + {file = "multiprocess-0.70.15-pp38-pypy38_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:cf981fb998d6ec3208cb14f0cf2e9e80216e834f5d51fd09ebc937c32b960902"}, + {file = "multiprocess-0.70.15-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:18f9f2c7063346d1617bd1684fdcae8d33380ae96b99427260f562e1a1228b67"}, + {file = "multiprocess-0.70.15-pp39-pypy39_pp73-manylinux_2_24_i686.whl", hash = "sha256:0eac53214d664c49a34695e5824872db4006b1a465edd7459a251809c3773370"}, + {file = "multiprocess-0.70.15-pp39-pypy39_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:1a51dd34096db47fb21fa2b839e615b051d51b97af9a67afbcdaa67186b44883"}, + {file = "multiprocess-0.70.15-py310-none-any.whl", hash = "sha256:7dd58e33235e83cf09d625e55cffd7b0f0eede7ee9223cdd666a87624f60c21a"}, + {file = "multiprocess-0.70.15-py311-none-any.whl", hash = "sha256:134f89053d82c9ed3b73edd3a2531eb791e602d4f4156fc92a79259590bd9670"}, + {file = "multiprocess-0.70.15-py37-none-any.whl", hash = "sha256:f7d4a1629bccb433114c3b4885f69eccc200994323c80f6feee73b0edc9199c5"}, + {file = "multiprocess-0.70.15-py38-none-any.whl", hash = "sha256:bee9afba476c91f9ebee7beeee0601face9eff67d822e893f9a893725fbd6316"}, + {file = "multiprocess-0.70.15-py39-none-any.whl", hash = "sha256:3e0953f5d52b4c76f1c973eaf8214554d146f2be5decb48e928e55c7a2d19338"}, + {file = "multiprocess-0.70.15.tar.gz", hash = "sha256:f20eed3036c0ef477b07a4177cf7c1ba520d9a2677870a4f47fe026f0cd6787e"}, +] + +[package.dependencies] +dill = ">=0.3.7" + [[package]] name = "mypy-extensions" version = "1.0.0" @@ -599,6 +1110,44 @@ files = [ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] +[[package]] +name = "nest-asyncio" +version = "1.5.8" +description = "Patch asyncio to allow nested event loops" +category = "main" +optional = false +python-versions = ">=3.5" +files = [ + {file = "nest_asyncio-1.5.8-py3-none-any.whl", hash = "sha256:accda7a339a70599cb08f9dd09a67e0c2ef8d8d6f4c07f96ab203f2ae254e48d"}, + {file = "nest_asyncio-1.5.8.tar.gz", hash = "sha256:25aa2ca0d2a5b5531956b9e273b45cf664cae2b145101d73b86b199978d48fdb"}, +] + +[[package]] +name = "nltk" +version = "3.8.1" +description = "Natural Language Toolkit" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "nltk-3.8.1-py3-none-any.whl", hash = "sha256:fd5c9109f976fa86bcadba8f91e47f5e9293bd034474752e92a520f81c93dda5"}, + {file = "nltk-3.8.1.zip", hash = "sha256:1834da3d0682cba4f2cede2f9aad6b0fafb6461ba451db0efb6f9c39798d64d3"}, +] + +[package.dependencies] +click = "*" +joblib = "*" +regex = ">=2021.8.3" +tqdm = "*" + +[package.extras] +all = ["matplotlib", "numpy", "pyparsing", "python-crfsuite", "requests", "scikit-learn", "scipy", "twython"] +corenlp = ["requests"] +machine-learning = ["numpy", "python-crfsuite", "scikit-learn", "scipy"] +plot = ["matplotlib"] +tgrep = ["pyparsing"] +twitter = ["twython"] + [[package]] name = "numpy" version = "1.26.1" @@ -676,6 +1225,74 @@ files = [ {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, ] +[[package]] +name = "pandas" +version = "2.1.2" +description = "Powerful data structures for data analysis, time series, and statistics" +category = "main" +optional = false +python-versions = ">=3.9" +files = [ + {file = "pandas-2.1.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:24057459f19db9ebb02984c6fdd164a970b31a95f38e4a49cf7615b36a1b532c"}, + {file = "pandas-2.1.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a6cf8fcc8a63d333970b950a7331a30544cf59b1a97baf0a7409e09eafc1ac38"}, + {file = "pandas-2.1.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ae6ffbd9d614c20d028c7117ee911fc4e266b4dca2065d5c5909e401f8ff683"}, + {file = "pandas-2.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eff794eeb7883c5aefb1ed572e7ff533ae779f6c6277849eab9e77986e352688"}, + {file = "pandas-2.1.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:02954e285e8e2f4006b6f22be6f0df1f1c3c97adbb7ed211c6b483426f20d5c8"}, + {file = "pandas-2.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:5b40c9f494e1f27588c369b9e4a6ca19cd924b3a0e1ef9ef1a8e30a07a438f43"}, + {file = "pandas-2.1.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:08d287b68fd28906a94564f15118a7ca8c242e50ae7f8bd91130c362b2108a81"}, + {file = "pandas-2.1.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bbd98dcdcd32f408947afdb3f7434fade6edd408c3077bbce7bd840d654d92c6"}, + {file = "pandas-2.1.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e90c95abb3285d06f6e4feedafc134306a8eced93cb78e08cf50e224d5ce22e2"}, + {file = "pandas-2.1.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:52867d69a54e71666cd184b04e839cff7dfc8ed0cd6b936995117fdae8790b69"}, + {file = "pandas-2.1.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8d0382645ede2fde352da2a885aac28ec37d38587864c0689b4b2361d17b1d4c"}, + {file = "pandas-2.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:65177d1c519b55e5b7f094c660ed357bb7d86e799686bb71653b8a4803d8ff0d"}, + {file = "pandas-2.1.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5aa6b86802e8cf7716bf4b4b5a3c99b12d34e9c6a9d06dad254447a620437931"}, + {file = "pandas-2.1.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d594e2ce51b8e0b4074e6644758865dc2bb13fd654450c1eae51201260a539f1"}, + {file = "pandas-2.1.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3223f997b6d2ebf9c010260cf3d889848a93f5d22bb4d14cd32638b3d8bba7ad"}, + {file = "pandas-2.1.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc4944dc004ca6cc701dfa19afb8bdb26ad36b9bed5bcec617d2a11e9cae6902"}, + {file = "pandas-2.1.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3f76280ce8ec216dde336e55b2b82e883401cf466da0fe3be317c03fb8ee7c7d"}, + {file = "pandas-2.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:7ad20d24acf3a0042512b7e8d8fdc2e827126ed519d6bd1ed8e6c14ec8a2c813"}, + {file = "pandas-2.1.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:021f09c15e1381e202d95d4a21ece8e7f2bf1388b6d7e9cae09dfe27bd2043d1"}, + {file = "pandas-2.1.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e7f12b2de0060b0b858cfec0016e7d980ae5bae455a1746bfcc70929100ee633"}, + {file = "pandas-2.1.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:83c166b9bb27c1715bed94495d9598a7f02950b4749dba9349c1dd2cbf10729d"}, + {file = "pandas-2.1.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:25c9976c17311388fcd953cb3d0697999b2205333f4e11e669d90ff8d830d429"}, + {file = "pandas-2.1.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:851b5afbb0d62f6129ae891b533aa508cc357d5892c240c91933d945fff15731"}, + {file = "pandas-2.1.2-cp39-cp39-win_amd64.whl", hash = "sha256:e78507adcc730533619de07bfdd1c62b2918a68cd4419ea386e28abf7f6a1e5c"}, + {file = "pandas-2.1.2.tar.gz", hash = "sha256:52897edc2774d2779fbeb6880d2cfb305daa0b1a29c16b91f531a18918a6e0f3"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.22.4,<2", markers = "python_version < \"3.11\""}, + {version = ">=1.23.2,<2", markers = "python_version == \"3.11\""}, +] +python-dateutil = ">=2.8.2" +pytz = ">=2020.1" +tzdata = ">=2022.1" + +[package.extras] +all = ["PyQt5 (>=5.15.6)", "SQLAlchemy (>=1.4.36)", "beautifulsoup4 (>=4.11.1)", "bottleneck (>=1.3.4)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=0.8.1)", "fsspec (>=2022.05.0)", "gcsfs (>=2022.05.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.8.0)", "matplotlib (>=3.6.1)", "numba (>=0.55.2)", "numexpr (>=2.8.0)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pandas-gbq (>=0.17.5)", "psycopg2 (>=2.9.3)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.5)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)", "pyxlsb (>=1.0.9)", "qtpy (>=2.2.0)", "s3fs (>=2022.05.0)", "scipy (>=1.8.1)", "tables (>=3.7.0)", "tabulate (>=0.8.10)", "xarray (>=2022.03.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)", "zstandard (>=0.17.0)"] +aws = ["s3fs (>=2022.05.0)"] +clipboard = ["PyQt5 (>=5.15.6)", "qtpy (>=2.2.0)"] +compression = ["zstandard (>=0.17.0)"] +computation = ["scipy (>=1.8.1)", "xarray (>=2022.03.0)"] +consortium-standard = ["dataframe-api-compat (>=0.1.7)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pyxlsb (>=1.0.9)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)"] +feather = ["pyarrow (>=7.0.0)"] +fss = ["fsspec (>=2022.05.0)"] +gcp = ["gcsfs (>=2022.05.0)", "pandas-gbq (>=0.17.5)"] +hdf5 = ["tables (>=3.7.0)"] +html = ["beautifulsoup4 (>=4.11.1)", "html5lib (>=1.1)", "lxml (>=4.8.0)"] +mysql = ["SQLAlchemy (>=1.4.36)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.8.10)"] +parquet = ["pyarrow (>=7.0.0)"] +performance = ["bottleneck (>=1.3.4)", "numba (>=0.55.2)", "numexpr (>=2.8.0)"] +plot = ["matplotlib (>=3.6.1)"] +postgresql = ["SQLAlchemy (>=1.4.36)", "psycopg2 (>=2.9.3)"] +spss = ["pyreadstat (>=1.1.5)"] +sql-other = ["SQLAlchemy (>=1.4.36)"] +test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"] +xml = ["lxml (>=4.8.0)"] + [[package]] name = "pathspec" version = "0.11.2" @@ -704,6 +1321,22 @@ files = [ docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.1)", "sphinx-autodoc-typehints (>=1.24)"] test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)"] +[[package]] +name = "pluggy" +version = "1.3.0" +description = "plugin and hook calling mechanisms for python" +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"}, + {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + [[package]] name = "prompt-toolkit" version = "3.0.36" @@ -719,6 +1352,187 @@ files = [ [package.dependencies] wcwidth = "*" +[[package]] +name = "pyarrow" +version = "13.0.0" +description = "Python library for Apache Arrow" +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pyarrow-13.0.0-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:1afcc2c33f31f6fb25c92d50a86b7a9f076d38acbcb6f9e74349636109550148"}, + {file = "pyarrow-13.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:70fa38cdc66b2fc1349a082987f2b499d51d072faaa6b600f71931150de2e0e3"}, + {file = "pyarrow-13.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd57b13a6466822498238877892a9b287b0a58c2e81e4bdb0b596dbb151cbb73"}, + {file = "pyarrow-13.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8ce69f7bf01de2e2764e14df45b8404fc6f1a5ed9871e8e08a12169f87b7a26"}, + {file = "pyarrow-13.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:588f0d2da6cf1b1680974d63be09a6530fd1bd825dc87f76e162404779a157dc"}, + {file = "pyarrow-13.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:6241afd72b628787b4abea39e238e3ff9f34165273fad306c7acf780dd850956"}, + {file = "pyarrow-13.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:fda7857e35993673fcda603c07d43889fca60a5b254052a462653f8656c64f44"}, + {file = "pyarrow-13.0.0-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:aac0ae0146a9bfa5e12d87dda89d9ef7c57a96210b899459fc2f785303dcbb67"}, + {file = "pyarrow-13.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d7759994217c86c161c6a8060509cfdf782b952163569606bb373828afdd82e8"}, + {file = "pyarrow-13.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:868a073fd0ff6468ae7d869b5fc1f54de5c4255b37f44fb890385eb68b68f95d"}, + {file = "pyarrow-13.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51be67e29f3cfcde263a113c28e96aa04362ed8229cb7c6e5f5c719003659d33"}, + {file = "pyarrow-13.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:d1b4e7176443d12610874bb84d0060bf080f000ea9ed7c84b2801df851320295"}, + {file = "pyarrow-13.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:69b6f9a089d116a82c3ed819eea8fe67dae6105f0d81eaf0fdd5e60d0c6e0944"}, + {file = "pyarrow-13.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:ab1268db81aeb241200e321e220e7cd769762f386f92f61b898352dd27e402ce"}, + {file = "pyarrow-13.0.0-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:ee7490f0f3f16a6c38f8c680949551053c8194e68de5046e6c288e396dccee80"}, + {file = "pyarrow-13.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e3ad79455c197a36eefbd90ad4aa832bece7f830a64396c15c61a0985e337287"}, + {file = "pyarrow-13.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68fcd2dc1b7d9310b29a15949cdd0cb9bc34b6de767aff979ebf546020bf0ba0"}, + {file = "pyarrow-13.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc6fd330fd574c51d10638e63c0d00ab456498fc804c9d01f2a61b9264f2c5b2"}, + {file = "pyarrow-13.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:e66442e084979a97bb66939e18f7b8709e4ac5f887e636aba29486ffbf373763"}, + {file = "pyarrow-13.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:0f6eff839a9e40e9c5610d3ff8c5bdd2f10303408312caf4c8003285d0b49565"}, + {file = "pyarrow-13.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:8b30a27f1cddf5c6efcb67e598d7823a1e253d743d92ac32ec1eb4b6a1417867"}, + {file = "pyarrow-13.0.0-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:09552dad5cf3de2dc0aba1c7c4b470754c69bd821f5faafc3d774bedc3b04bb7"}, + {file = "pyarrow-13.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3896ae6c205d73ad192d2fc1489cd0edfab9f12867c85b4c277af4d37383c18c"}, + {file = "pyarrow-13.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6647444b21cb5e68b593b970b2a9a07748dd74ea457c7dadaa15fd469c48ada1"}, + {file = "pyarrow-13.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47663efc9c395e31d09c6aacfa860f4473815ad6804311c5433f7085415d62a7"}, + {file = "pyarrow-13.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:b9ba6b6d34bd2563345488cf444510588ea42ad5613df3b3509f48eb80250afd"}, + {file = "pyarrow-13.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:d00d374a5625beeb448a7fa23060df79adb596074beb3ddc1838adb647b6ef09"}, + {file = "pyarrow-13.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:c51afd87c35c8331b56f796eff954b9c7f8d4b7fef5903daf4e05fcf017d23a8"}, + {file = "pyarrow-13.0.0.tar.gz", hash = "sha256:83333726e83ed44b0ac94d8d7a21bbdee4a05029c3b1e8db58a863eec8fd8a33"}, +] + +[package.dependencies] +numpy = ">=1.16.6" + +[[package]] +name = "pydantic" +version = "2.4.2" +description = "Data validation using Python type hints" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pydantic-2.4.2-py3-none-any.whl", hash = "sha256:bc3ddf669d234f4220e6e1c4d96b061abe0998185a8d7855c0126782b7abc8c1"}, + {file = "pydantic-2.4.2.tar.gz", hash = "sha256:94f336138093a5d7f426aac732dcfe7ab4eb4da243c88f891d65deb4a2556ee7"}, +] + +[package.dependencies] +annotated-types = ">=0.4.0" +pydantic-core = "2.10.1" +typing-extensions = ">=4.6.1" + +[package.extras] +email = ["email-validator (>=2.0.0)"] + +[[package]] +name = "pydantic-core" +version = "2.10.1" +description = "" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pydantic_core-2.10.1-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:d64728ee14e667ba27c66314b7d880b8eeb050e58ffc5fec3b7a109f8cddbd63"}, + {file = "pydantic_core-2.10.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:48525933fea744a3e7464c19bfede85df4aba79ce90c60b94d8b6e1eddd67096"}, + {file = "pydantic_core-2.10.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef337945bbd76cce390d1b2496ccf9f90b1c1242a3a7bc242ca4a9fc5993427a"}, + {file = "pydantic_core-2.10.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a1392e0638af203cee360495fd2cfdd6054711f2db5175b6e9c3c461b76f5175"}, + {file = "pydantic_core-2.10.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0675ba5d22de54d07bccde38997e780044dcfa9a71aac9fd7d4d7a1d2e3e65f7"}, + {file = "pydantic_core-2.10.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:128552af70a64660f21cb0eb4876cbdadf1a1f9d5de820fed6421fa8de07c893"}, + {file = "pydantic_core-2.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f6e6aed5818c264412ac0598b581a002a9f050cb2637a84979859e70197aa9e"}, + {file = "pydantic_core-2.10.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ecaac27da855b8d73f92123e5f03612b04c5632fd0a476e469dfc47cd37d6b2e"}, + {file = "pydantic_core-2.10.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b3c01c2fb081fced3bbb3da78510693dc7121bb893a1f0f5f4b48013201f362e"}, + {file = "pydantic_core-2.10.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:92f675fefa977625105708492850bcbc1182bfc3e997f8eecb866d1927c98ae6"}, + {file = "pydantic_core-2.10.1-cp310-none-win32.whl", hash = "sha256:420a692b547736a8d8703c39ea935ab5d8f0d2573f8f123b0a294e49a73f214b"}, + {file = "pydantic_core-2.10.1-cp310-none-win_amd64.whl", hash = "sha256:0880e239827b4b5b3e2ce05e6b766a7414e5f5aedc4523be6b68cfbc7f61c5d0"}, + {file = "pydantic_core-2.10.1-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:073d4a470b195d2b2245d0343569aac7e979d3a0dcce6c7d2af6d8a920ad0bea"}, + {file = "pydantic_core-2.10.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:600d04a7b342363058b9190d4e929a8e2e715c5682a70cc37d5ded1e0dd370b4"}, + {file = "pydantic_core-2.10.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39215d809470f4c8d1881758575b2abfb80174a9e8daf8f33b1d4379357e417c"}, + {file = "pydantic_core-2.10.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eeb3d3d6b399ffe55f9a04e09e635554012f1980696d6b0aca3e6cf42a17a03b"}, + {file = "pydantic_core-2.10.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a7a7902bf75779bc12ccfc508bfb7a4c47063f748ea3de87135d433a4cca7a2f"}, + {file = "pydantic_core-2.10.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3625578b6010c65964d177626fde80cf60d7f2e297d56b925cb5cdeda6e9925a"}, + {file = "pydantic_core-2.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:caa48fc31fc7243e50188197b5f0c4228956f97b954f76da157aae7f67269ae8"}, + {file = "pydantic_core-2.10.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:07ec6d7d929ae9c68f716195ce15e745b3e8fa122fc67698ac6498d802ed0fa4"}, + {file = "pydantic_core-2.10.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e6f31a17acede6a8cd1ae2d123ce04d8cca74056c9d456075f4f6f85de055607"}, + {file = "pydantic_core-2.10.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d8f1ebca515a03e5654f88411420fea6380fc841d1bea08effb28184e3d4899f"}, + {file = "pydantic_core-2.10.1-cp311-none-win32.whl", hash = "sha256:6db2eb9654a85ada248afa5a6db5ff1cf0f7b16043a6b070adc4a5be68c716d6"}, + {file = "pydantic_core-2.10.1-cp311-none-win_amd64.whl", hash = "sha256:4a5be350f922430997f240d25f8219f93b0c81e15f7b30b868b2fddfc2d05f27"}, + {file = "pydantic_core-2.10.1-cp311-none-win_arm64.whl", hash = "sha256:5fdb39f67c779b183b0c853cd6b45f7db84b84e0571b3ef1c89cdb1dfc367325"}, + {file = "pydantic_core-2.10.1-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:b1f22a9ab44de5f082216270552aa54259db20189e68fc12484873d926426921"}, + {file = "pydantic_core-2.10.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8572cadbf4cfa95fb4187775b5ade2eaa93511f07947b38f4cd67cf10783b118"}, + {file = "pydantic_core-2.10.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db9a28c063c7c00844ae42a80203eb6d2d6bbb97070cfa00194dff40e6f545ab"}, + {file = "pydantic_core-2.10.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0e2a35baa428181cb2270a15864ec6286822d3576f2ed0f4cd7f0c1708472aff"}, + {file = "pydantic_core-2.10.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:05560ab976012bf40f25d5225a58bfa649bb897b87192a36c6fef1ab132540d7"}, + {file = "pydantic_core-2.10.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d6495008733c7521a89422d7a68efa0a0122c99a5861f06020ef5b1f51f9ba7c"}, + {file = "pydantic_core-2.10.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14ac492c686defc8e6133e3a2d9eaf5261b3df26b8ae97450c1647286750b901"}, + {file = "pydantic_core-2.10.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8282bab177a9a3081fd3d0a0175a07a1e2bfb7fcbbd949519ea0980f8a07144d"}, + {file = "pydantic_core-2.10.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:aafdb89fdeb5fe165043896817eccd6434aee124d5ee9b354f92cd574ba5e78f"}, + {file = "pydantic_core-2.10.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f6defd966ca3b187ec6c366604e9296f585021d922e666b99c47e78738b5666c"}, + {file = "pydantic_core-2.10.1-cp312-none-win32.whl", hash = "sha256:7c4d1894fe112b0864c1fa75dffa045720a194b227bed12f4be7f6045b25209f"}, + {file = "pydantic_core-2.10.1-cp312-none-win_amd64.whl", hash = "sha256:5994985da903d0b8a08e4935c46ed8daf5be1cf217489e673910951dc533d430"}, + {file = "pydantic_core-2.10.1-cp312-none-win_arm64.whl", hash = "sha256:0d8a8adef23d86d8eceed3e32e9cca8879c7481c183f84ed1a8edc7df073af94"}, + {file = "pydantic_core-2.10.1-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:9badf8d45171d92387410b04639d73811b785b5161ecadabf056ea14d62d4ede"}, + {file = "pydantic_core-2.10.1-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:ebedb45b9feb7258fac0a268a3f6bec0a2ea4d9558f3d6f813f02ff3a6dc6698"}, + {file = "pydantic_core-2.10.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cfe1090245c078720d250d19cb05d67e21a9cd7c257698ef139bc41cf6c27b4f"}, + {file = "pydantic_core-2.10.1-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e357571bb0efd65fd55f18db0a2fb0ed89d0bb1d41d906b138f088933ae618bb"}, + {file = "pydantic_core-2.10.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b3dcd587b69bbf54fc04ca157c2323b8911033e827fffaecf0cafa5a892a0904"}, + {file = "pydantic_core-2.10.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c120c9ce3b163b985a3b966bb701114beb1da4b0468b9b236fc754783d85aa3"}, + {file = "pydantic_core-2.10.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15d6bca84ffc966cc9976b09a18cf9543ed4d4ecbd97e7086f9ce9327ea48891"}, + {file = "pydantic_core-2.10.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5cabb9710f09d5d2e9e2748c3e3e20d991a4c5f96ed8f1132518f54ab2967221"}, + {file = "pydantic_core-2.10.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:82f55187a5bebae7d81d35b1e9aaea5e169d44819789837cdd4720d768c55d15"}, + {file = "pydantic_core-2.10.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:1d40f55222b233e98e3921df7811c27567f0e1a4411b93d4c5c0f4ce131bc42f"}, + {file = "pydantic_core-2.10.1-cp37-none-win32.whl", hash = "sha256:14e09ff0b8fe6e46b93d36a878f6e4a3a98ba5303c76bb8e716f4878a3bee92c"}, + {file = "pydantic_core-2.10.1-cp37-none-win_amd64.whl", hash = "sha256:1396e81b83516b9d5c9e26a924fa69164156c148c717131f54f586485ac3c15e"}, + {file = "pydantic_core-2.10.1-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:6835451b57c1b467b95ffb03a38bb75b52fb4dc2762bb1d9dbed8de31ea7d0fc"}, + {file = "pydantic_core-2.10.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b00bc4619f60c853556b35f83731bd817f989cba3e97dc792bb8c97941b8053a"}, + {file = "pydantic_core-2.10.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fa467fd300a6f046bdb248d40cd015b21b7576c168a6bb20aa22e595c8ffcdd"}, + {file = "pydantic_core-2.10.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d99277877daf2efe074eae6338453a4ed54a2d93fb4678ddfe1209a0c93a2468"}, + {file = "pydantic_core-2.10.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fa7db7558607afeccb33c0e4bf1c9a9a835e26599e76af6fe2fcea45904083a6"}, + {file = "pydantic_core-2.10.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aad7bd686363d1ce4ee930ad39f14e1673248373f4a9d74d2b9554f06199fb58"}, + {file = "pydantic_core-2.10.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:443fed67d33aa85357464f297e3d26e570267d1af6fef1c21ca50921d2976302"}, + {file = "pydantic_core-2.10.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:042462d8d6ba707fd3ce9649e7bf268633a41018d6a998fb5fbacb7e928a183e"}, + {file = "pydantic_core-2.10.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ecdbde46235f3d560b18be0cb706c8e8ad1b965e5c13bbba7450c86064e96561"}, + {file = "pydantic_core-2.10.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ed550ed05540c03f0e69e6d74ad58d026de61b9eaebebbaaf8873e585cbb18de"}, + {file = "pydantic_core-2.10.1-cp38-none-win32.whl", hash = "sha256:8cdbbd92154db2fec4ec973d45c565e767ddc20aa6dbaf50142676484cbff8ee"}, + {file = "pydantic_core-2.10.1-cp38-none-win_amd64.whl", hash = "sha256:9f6f3e2598604956480f6c8aa24a3384dbf6509fe995d97f6ca6103bb8c2534e"}, + {file = "pydantic_core-2.10.1-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:655f8f4c8d6a5963c9a0687793da37b9b681d9ad06f29438a3b2326d4e6b7970"}, + {file = "pydantic_core-2.10.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e570ffeb2170e116a5b17e83f19911020ac79d19c96f320cbfa1fa96b470185b"}, + {file = "pydantic_core-2.10.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64322bfa13e44c6c30c518729ef08fda6026b96d5c0be724b3c4ae4da939f875"}, + {file = "pydantic_core-2.10.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:485a91abe3a07c3a8d1e082ba29254eea3e2bb13cbbd4351ea4e5a21912cc9b0"}, + {file = "pydantic_core-2.10.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7c2b8eb9fc872e68b46eeaf835e86bccc3a58ba57d0eedc109cbb14177be531"}, + {file = "pydantic_core-2.10.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a5cb87bdc2e5f620693148b5f8f842d293cae46c5f15a1b1bf7ceeed324a740c"}, + {file = "pydantic_core-2.10.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:25bd966103890ccfa028841a8f30cebcf5875eeac8c4bde4fe221364c92f0c9a"}, + {file = "pydantic_core-2.10.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f323306d0556351735b54acbf82904fe30a27b6a7147153cbe6e19aaaa2aa429"}, + {file = "pydantic_core-2.10.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0c27f38dc4fbf07b358b2bc90edf35e82d1703e22ff2efa4af4ad5de1b3833e7"}, + {file = "pydantic_core-2.10.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:f1365e032a477c1430cfe0cf2856679529a2331426f8081172c4a74186f1d595"}, + {file = "pydantic_core-2.10.1-cp39-none-win32.whl", hash = "sha256:a1c311fd06ab3b10805abb72109f01a134019739bd3286b8ae1bc2fc4e50c07a"}, + {file = "pydantic_core-2.10.1-cp39-none-win_amd64.whl", hash = "sha256:ae8a8843b11dc0b03b57b52793e391f0122e740de3df1474814c700d2622950a"}, + {file = "pydantic_core-2.10.1-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:d43002441932f9a9ea5d6f9efaa2e21458221a3a4b417a14027a1d530201ef1b"}, + {file = "pydantic_core-2.10.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:fcb83175cc4936a5425dde3356f079ae03c0802bbdf8ff82c035f8a54b333521"}, + {file = "pydantic_core-2.10.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:962ed72424bf1f72334e2f1e61b68f16c0e596f024ca7ac5daf229f7c26e4208"}, + {file = "pydantic_core-2.10.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2cf5bb4dd67f20f3bbc1209ef572a259027c49e5ff694fa56bed62959b41e1f9"}, + {file = "pydantic_core-2.10.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e544246b859f17373bed915182ab841b80849ed9cf23f1f07b73b7c58baee5fb"}, + {file = "pydantic_core-2.10.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:c0877239307b7e69d025b73774e88e86ce82f6ba6adf98f41069d5b0b78bd1bf"}, + {file = "pydantic_core-2.10.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:53df009d1e1ba40f696f8995683e067e3967101d4bb4ea6f667931b7d4a01357"}, + {file = "pydantic_core-2.10.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a1254357f7e4c82e77c348dabf2d55f1d14d19d91ff025004775e70a6ef40ada"}, + {file = "pydantic_core-2.10.1-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:524ff0ca3baea164d6d93a32c58ac79eca9f6cf713586fdc0adb66a8cdeab96a"}, + {file = "pydantic_core-2.10.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f0ac9fb8608dbc6eaf17956bf623c9119b4db7dbb511650910a82e261e6600f"}, + {file = "pydantic_core-2.10.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:320f14bd4542a04ab23747ff2c8a778bde727158b606e2661349557f0770711e"}, + {file = "pydantic_core-2.10.1-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:63974d168b6233b4ed6a0046296803cb13c56637a7b8106564ab575926572a55"}, + {file = "pydantic_core-2.10.1-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:417243bf599ba1f1fef2bb8c543ceb918676954734e2dcb82bf162ae9d7bd514"}, + {file = "pydantic_core-2.10.1-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:dda81e5ec82485155a19d9624cfcca9be88a405e2857354e5b089c2a982144b2"}, + {file = "pydantic_core-2.10.1-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:14cfbb00959259e15d684505263d5a21732b31248a5dd4941f73a3be233865b9"}, + {file = "pydantic_core-2.10.1-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:631cb7415225954fdcc2a024119101946793e5923f6c4d73a5914d27eb3d3a05"}, + {file = "pydantic_core-2.10.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:bec7dd208a4182e99c5b6c501ce0b1f49de2802448d4056091f8e630b28e9a52"}, + {file = "pydantic_core-2.10.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:149b8a07712f45b332faee1a2258d8ef1fb4a36f88c0c17cb687f205c5dc6e7d"}, + {file = "pydantic_core-2.10.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4d966c47f9dd73c2d32a809d2be529112d509321c5310ebf54076812e6ecd884"}, + {file = "pydantic_core-2.10.1-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7eb037106f5c6b3b0b864ad226b0b7ab58157124161d48e4b30c4a43fef8bc4b"}, + {file = "pydantic_core-2.10.1-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:154ea7c52e32dce13065dbb20a4a6f0cc012b4f667ac90d648d36b12007fa9f7"}, + {file = "pydantic_core-2.10.1-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e562617a45b5a9da5be4abe72b971d4f00bf8555eb29bb91ec2ef2be348cd132"}, + {file = "pydantic_core-2.10.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:f23b55eb5464468f9e0e9a9935ce3ed2a870608d5f534025cd5536bca25b1402"}, + {file = "pydantic_core-2.10.1-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:e9121b4009339b0f751955baf4543a0bfd6bc3f8188f8056b1a25a2d45099934"}, + {file = "pydantic_core-2.10.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:0523aeb76e03f753b58be33b26540880bac5aa54422e4462404c432230543f33"}, + {file = "pydantic_core-2.10.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e0e2959ef5d5b8dc9ef21e1a305a21a36e254e6a34432d00c72a92fdc5ecda5"}, + {file = "pydantic_core-2.10.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da01bec0a26befab4898ed83b362993c844b9a607a86add78604186297eb047e"}, + {file = "pydantic_core-2.10.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f2e9072d71c1f6cfc79a36d4484c82823c560e6f5599c43c1ca6b5cdbd54f881"}, + {file = "pydantic_core-2.10.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:f36a3489d9e28fe4b67be9992a23029c3cec0babc3bd9afb39f49844a8c721c5"}, + {file = "pydantic_core-2.10.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f64f82cc3443149292b32387086d02a6c7fb39b8781563e0ca7b8d7d9cf72bd7"}, + {file = "pydantic_core-2.10.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:b4a6db486ac8e99ae696e09efc8b2b9fea67b63c8f88ba7a1a16c24a057a0776"}, + {file = "pydantic_core-2.10.1.tar.gz", hash = "sha256:0f8682dbdd2f67f8e1edddcbffcc29f60a6182b4901c367fc8c1c40d30bb0a82"}, +] + +[package.dependencies] +typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" + [[package]] name = "pygments" version = "2.16.1" @@ -794,6 +1608,44 @@ files = [ {file = "PyMuPDFb-1.23.5-py3-none-win_amd64.whl", hash = "sha256:85cbc308085a4ec794e0da790965985cc5ccb21b2abc09732e072f6eaf10150b"}, ] +[[package]] +name = "pytest" +version = "7.4.3" +description = "pytest: simple powerful testing with Python" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-7.4.3-py3-none-any.whl", hash = "sha256:0d009c083ea859a71b76adf7c1d502e4bc170b80a8ef002da5806527b9591fac"}, + {file = "pytest-7.4.3.tar.gz", hash = "sha256:d989d136982de4e3b29dabcc838ad581c64e8ed52c11fbe86ddebd9da0818cd5"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" +tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} + +[package.extras] +testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] + +[package.dependencies] +six = ">=1.5" + [[package]] name = "pytz" version = "2023.3.post1" @@ -806,6 +1658,66 @@ files = [ {file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"}, ] +[[package]] +name = "pyyaml" +version = "6.0.1" +description = "YAML parser and emitter for Python" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, + {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, + {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, + {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, + {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, + {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, + {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, + {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, + {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, + {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, + {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, + {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, +] + [[package]] name = "questionary" version = "2.0.1" @@ -960,6 +1872,23 @@ pygments = ">=2.13.0,<3.0.0" [package.extras] jupyter = ["ipywidgets (>=7.5.1,<9)"] +[[package]] +name = "setuptools" +version = "68.2.2" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "setuptools-68.2.2-py3-none-any.whl", hash = "sha256:b454a35605876da60632df1a60f736524eb73cc47bbc9f3f1ef1b644de74fd2a"}, + {file = "setuptools-68.2.2.tar.gz", hash = "sha256:4ac1475276d2f1c48684874089fefcd83bd7162ddaafb81fac866ba0db282a87"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.1)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] + [[package]] name = "shellingham" version = "1.5.3" @@ -972,6 +1901,132 @@ files = [ {file = "shellingham-1.5.3.tar.gz", hash = "sha256:cb4a6fec583535bc6da17b647dd2330cf7ef30239e05d547d99ae3705fd0f7f8"}, ] +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] + +[[package]] +name = "sniffio" +version = "1.3.0" +description = "Sniff out which async library your code is running under" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "sniffio-1.3.0-py3-none-any.whl", hash = "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"}, + {file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"}, +] + +[[package]] +name = "sqlalchemy" +version = "2.0.22" +description = "Database Abstraction Library" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "SQLAlchemy-2.0.22-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f146c61ae128ab43ea3a0955de1af7e1633942c2b2b4985ac51cc292daf33222"}, + {file = "SQLAlchemy-2.0.22-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:875de9414393e778b655a3d97d60465eb3fae7c919e88b70cc10b40b9f56042d"}, + {file = "SQLAlchemy-2.0.22-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:13790cb42f917c45c9c850b39b9941539ca8ee7917dacf099cc0b569f3d40da7"}, + {file = "SQLAlchemy-2.0.22-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e04ab55cf49daf1aeb8c622c54d23fa4bec91cb051a43cc24351ba97e1dd09f5"}, + {file = "SQLAlchemy-2.0.22-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:a42c9fa3abcda0dcfad053e49c4f752eef71ecd8c155221e18b99d4224621176"}, + {file = "SQLAlchemy-2.0.22-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:14cd3bcbb853379fef2cd01e7c64a5d6f1d005406d877ed9509afb7a05ff40a5"}, + {file = "SQLAlchemy-2.0.22-cp310-cp310-win32.whl", hash = "sha256:d143c5a9dada696bcfdb96ba2de4a47d5a89168e71d05a076e88a01386872f97"}, + {file = "SQLAlchemy-2.0.22-cp310-cp310-win_amd64.whl", hash = "sha256:ccd87c25e4c8559e1b918d46b4fa90b37f459c9b4566f1dfbce0eb8122571547"}, + {file = "SQLAlchemy-2.0.22-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4f6ff392b27a743c1ad346d215655503cec64405d3b694228b3454878bf21590"}, + {file = "SQLAlchemy-2.0.22-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f776c2c30f0e5f4db45c3ee11a5f2a8d9de68e81eb73ec4237de1e32e04ae81c"}, + {file = "SQLAlchemy-2.0.22-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c8f1792d20d2f4e875ce7a113f43c3561ad12b34ff796b84002a256f37ce9437"}, + {file = "SQLAlchemy-2.0.22-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d80eeb5189d7d4b1af519fc3f148fe7521b9dfce8f4d6a0820e8f5769b005051"}, + {file = "SQLAlchemy-2.0.22-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:69fd9e41cf9368afa034e1c81f3570afb96f30fcd2eb1ef29cb4d9371c6eece2"}, + {file = "SQLAlchemy-2.0.22-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:54bcceaf4eebef07dadfde424f5c26b491e4a64e61761dea9459103ecd6ccc95"}, + {file = "SQLAlchemy-2.0.22-cp311-cp311-win32.whl", hash = "sha256:7ee7ccf47aa503033b6afd57efbac6b9e05180f492aeed9fcf70752556f95624"}, + {file = "SQLAlchemy-2.0.22-cp311-cp311-win_amd64.whl", hash = "sha256:b560f075c151900587ade06706b0c51d04b3277c111151997ea0813455378ae0"}, + {file = "SQLAlchemy-2.0.22-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:2c9bac865ee06d27a1533471405ad240a6f5d83195eca481f9fc4a71d8b87df8"}, + {file = "SQLAlchemy-2.0.22-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:625b72d77ac8ac23da3b1622e2da88c4aedaee14df47c8432bf8f6495e655de2"}, + {file = "SQLAlchemy-2.0.22-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b39a6e21110204a8c08d40ff56a73ba542ec60bab701c36ce721e7990df49fb9"}, + {file = "SQLAlchemy-2.0.22-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:53a766cb0b468223cafdf63e2d37f14a4757476157927b09300c8c5832d88560"}, + {file = "SQLAlchemy-2.0.22-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0e1ce8ebd2e040357dde01a3fb7d30d9b5736b3e54a94002641dfd0aa12ae6ce"}, + {file = "SQLAlchemy-2.0.22-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:505f503763a767556fa4deae5194b2be056b64ecca72ac65224381a0acab7ebe"}, + {file = "SQLAlchemy-2.0.22-cp312-cp312-win32.whl", hash = "sha256:154a32f3c7b00de3d090bc60ec8006a78149e221f1182e3edcf0376016be9396"}, + {file = "SQLAlchemy-2.0.22-cp312-cp312-win_amd64.whl", hash = "sha256:129415f89744b05741c6f0b04a84525f37fbabe5dc3774f7edf100e7458c48cd"}, + {file = "SQLAlchemy-2.0.22-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3940677d341f2b685a999bffe7078697b5848a40b5f6952794ffcf3af150c301"}, + {file = "SQLAlchemy-2.0.22-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55914d45a631b81a8a2cb1a54f03eea265cf1783241ac55396ec6d735be14883"}, + {file = "SQLAlchemy-2.0.22-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2096d6b018d242a2bcc9e451618166f860bb0304f590d205173d317b69986c95"}, + {file = "SQLAlchemy-2.0.22-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:19c6986cf2fb4bc8e0e846f97f4135a8e753b57d2aaaa87c50f9acbe606bd1db"}, + {file = "SQLAlchemy-2.0.22-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:6ac28bd6888fe3c81fbe97584eb0b96804bd7032d6100b9701255d9441373ec1"}, + {file = "SQLAlchemy-2.0.22-cp37-cp37m-win32.whl", hash = "sha256:cb9a758ad973e795267da334a92dd82bb7555cb36a0960dcabcf724d26299db8"}, + {file = "SQLAlchemy-2.0.22-cp37-cp37m-win_amd64.whl", hash = "sha256:40b1206a0d923e73aa54f0a6bd61419a96b914f1cd19900b6c8226899d9742ad"}, + {file = "SQLAlchemy-2.0.22-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3aa1472bf44f61dd27987cd051f1c893b7d3b17238bff8c23fceaef4f1133868"}, + {file = "SQLAlchemy-2.0.22-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:56a7e2bb639df9263bf6418231bc2a92a773f57886d371ddb7a869a24919face"}, + {file = "SQLAlchemy-2.0.22-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ccca778c0737a773a1ad86b68bda52a71ad5950b25e120b6eb1330f0df54c3d0"}, + {file = "SQLAlchemy-2.0.22-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c6c3e9350f9fb16de5b5e5fbf17b578811a52d71bb784cc5ff71acb7de2a7f9"}, + {file = "SQLAlchemy-2.0.22-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:564e9f9e4e6466273dbfab0e0a2e5fe819eec480c57b53a2cdee8e4fdae3ad5f"}, + {file = "SQLAlchemy-2.0.22-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:af66001d7b76a3fab0d5e4c1ec9339ac45748bc4a399cbc2baa48c1980d3c1f4"}, + {file = "SQLAlchemy-2.0.22-cp38-cp38-win32.whl", hash = "sha256:9e55dff5ec115316dd7a083cdc1a52de63693695aecf72bc53a8e1468ce429e5"}, + {file = "SQLAlchemy-2.0.22-cp38-cp38-win_amd64.whl", hash = "sha256:4e869a8ff7ee7a833b74868a0887e8462445ec462432d8cbeff5e85f475186da"}, + {file = "SQLAlchemy-2.0.22-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9886a72c8e6371280cb247c5d32c9c8fa141dc560124348762db8a8b236f8692"}, + {file = "SQLAlchemy-2.0.22-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a571bc8ac092a3175a1d994794a8e7a1f2f651e7c744de24a19b4f740fe95034"}, + {file = "SQLAlchemy-2.0.22-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8db5ba8b7da759b727faebc4289a9e6a51edadc7fc32207a30f7c6203a181592"}, + {file = "SQLAlchemy-2.0.22-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b0b3f2686c3f162123adba3cb8b626ed7e9b8433ab528e36ed270b4f70d1cdb"}, + {file = "SQLAlchemy-2.0.22-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0c1fea8c0abcb070ffe15311853abfda4e55bf7dc1d4889497b3403629f3bf00"}, + {file = "SQLAlchemy-2.0.22-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4bb062784f37b2d75fd9b074c8ec360ad5df71f933f927e9e95c50eb8e05323c"}, + {file = "SQLAlchemy-2.0.22-cp39-cp39-win32.whl", hash = "sha256:58a3aba1bfb32ae7af68da3f277ed91d9f57620cf7ce651db96636790a78b736"}, + {file = "SQLAlchemy-2.0.22-cp39-cp39-win_amd64.whl", hash = "sha256:92e512a6af769e4725fa5b25981ba790335d42c5977e94ded07db7d641490a85"}, + {file = "SQLAlchemy-2.0.22-py3-none-any.whl", hash = "sha256:3076740335e4aaadd7deb3fe6dcb96b3015f1613bd190a4e1634e1b99b02ec86"}, + {file = "SQLAlchemy-2.0.22.tar.gz", hash = "sha256:5434cc601aa17570d79e5377f5fd45ff92f9379e2abed0be5e8c2fba8d353d2b"}, +] + +[package.dependencies] +greenlet = {version = "!=0.4.17", optional = true, markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\" or extra == \"asyncio\""} +typing-extensions = ">=4.2.0" + +[package.extras] +aiomysql = ["aiomysql (>=0.2.0)", "greenlet (!=0.4.17)"] +aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing-extensions (!=3.10.0.1)"] +asyncio = ["greenlet (!=0.4.17)"] +asyncmy = ["asyncmy (>=0.2.3,!=0.2.4,!=0.2.6)", "greenlet (!=0.4.17)"] +mariadb-connector = ["mariadb (>=1.0.1,!=1.1.2,!=1.1.5)"] +mssql = ["pyodbc"] +mssql-pymssql = ["pymssql"] +mssql-pyodbc = ["pyodbc"] +mypy = ["mypy (>=0.910)"] +mysql = ["mysqlclient (>=1.4.0)"] +mysql-connector = ["mysql-connector-python"] +oracle = ["cx-oracle (>=7)"] +oracle-oracledb = ["oracledb (>=1.0.1)"] +postgresql = ["psycopg2 (>=2.7)"] +postgresql-asyncpg = ["asyncpg", "greenlet (!=0.4.17)"] +postgresql-pg8000 = ["pg8000 (>=1.29.1)"] +postgresql-psycopg = ["psycopg (>=3.0.7)"] +postgresql-psycopg2binary = ["psycopg2-binary"] +postgresql-psycopg2cffi = ["psycopg2cffi"] +postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"] +pymysql = ["pymysql"] +sqlcipher = ["sqlcipher3-binary"] + +[[package]] +name = "tenacity" +version = "8.2.3" +description = "Retry code until it succeeds" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tenacity-8.2.3-py3-none-any.whl", hash = "sha256:ce510e327a630c9e1beaf17d42e6ffacc88185044ad85cf74c0a8887c6a0f88c"}, + {file = "tenacity-8.2.3.tar.gz", hash = "sha256:5398ef0d78e63f40007c1fb4c0bff96e1911394d2fa8d194f77619c05ff6cc8a"}, +] + +[package.extras] +doc = ["reno", "sphinx", "tornado (>=4.5)"] + [[package]] name = "tiktoken" version = "0.5.1" @@ -1088,23 +2143,50 @@ files = [ {file = "typing_extensions-4.8.0.tar.gz", hash = "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef"}, ] +[[package]] +name = "typing-inspect" +version = "0.9.0" +description = "Runtime inspection utilities for typing module." +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f"}, + {file = "typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78"}, +] + +[package.dependencies] +mypy-extensions = ">=0.3.0" +typing-extensions = ">=3.7.4" + +[[package]] +name = "tzdata" +version = "2023.3" +description = "Provider of IANA time zone data" +category = "main" +optional = false +python-versions = ">=2" +files = [ + {file = "tzdata-2023.3-py2.py3-none-any.whl", hash = "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"}, + {file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"}, +] + [[package]] name = "urllib3" -version = "2.0.7" +version = "1.26.18" description = "HTTP library with thread-safe connection pooling, file post, and more." category = "main" optional = false -python-versions = ">=3.7" +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ - {file = "urllib3-2.0.7-py3-none-any.whl", hash = "sha256:fdb6d215c776278489906c2f8916e6e7d4f5a9b602ccbcfdf7f016fc8da0596e"}, - {file = "urllib3-2.0.7.tar.gz", hash = "sha256:c97dfde1f7bd43a71c8d2a58e369e9b2bf692d1334ea9f9cae55add7d0dd0f84"}, + {file = "urllib3-1.26.18-py2.py3-none-any.whl", hash = "sha256:34b97092d7e0a3a8cf7cd10e386f401b3737364026c45e622aa02903dffe0f07"}, + {file = "urllib3-1.26.18.tar.gz", hash = "sha256:f8ecc1bba5667413457c529ab955bf8c67b45db799d159066261719e328580a0"}, ] [package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] -secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17.1.0)", "urllib3-secure-extra"] -socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] -zstd = ["zstandard (>=0.18.0)"] +brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] [[package]] name = "wcwidth" @@ -1118,6 +2200,209 @@ files = [ {file = "wcwidth-0.2.8.tar.gz", hash = "sha256:8705c569999ffbb4f6a87c6d1b80f324bd6db952f5eb0b95bc07517f4c1813d4"}, ] +[[package]] +name = "wrapt" +version = "1.15.0" +description = "Module for decorators, wrappers and monkey patching." +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" +files = [ + {file = "wrapt-1.15.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:ca1cccf838cd28d5a0883b342474c630ac48cac5df0ee6eacc9c7290f76b11c1"}, + {file = "wrapt-1.15.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:e826aadda3cae59295b95343db8f3d965fb31059da7de01ee8d1c40a60398b29"}, + {file = "wrapt-1.15.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:5fc8e02f5984a55d2c653f5fea93531e9836abbd84342c1d1e17abc4a15084c2"}, + {file = "wrapt-1.15.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:96e25c8603a155559231c19c0349245eeb4ac0096fe3c1d0be5c47e075bd4f46"}, + {file = "wrapt-1.15.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:40737a081d7497efea35ab9304b829b857f21558acfc7b3272f908d33b0d9d4c"}, + {file = "wrapt-1.15.0-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:f87ec75864c37c4c6cb908d282e1969e79763e0d9becdfe9fe5473b7bb1e5f09"}, + {file = "wrapt-1.15.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:1286eb30261894e4c70d124d44b7fd07825340869945c79d05bda53a40caa079"}, + {file = "wrapt-1.15.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:493d389a2b63c88ad56cdc35d0fa5752daac56ca755805b1b0c530f785767d5e"}, + {file = "wrapt-1.15.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:58d7a75d731e8c63614222bcb21dd992b4ab01a399f1f09dd82af17bbfc2368a"}, + {file = "wrapt-1.15.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:21f6d9a0d5b3a207cdf7acf8e58d7d13d463e639f0c7e01d82cdb671e6cb7923"}, + {file = "wrapt-1.15.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ce42618f67741d4697684e501ef02f29e758a123aa2d669e2d964ff734ee00ee"}, + {file = "wrapt-1.15.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41d07d029dd4157ae27beab04d22b8e261eddfc6ecd64ff7000b10dc8b3a5727"}, + {file = "wrapt-1.15.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:54accd4b8bc202966bafafd16e69da9d5640ff92389d33d28555c5fd4f25ccb7"}, + {file = "wrapt-1.15.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fbfbca668dd15b744418265a9607baa970c347eefd0db6a518aaf0cfbd153c0"}, + {file = "wrapt-1.15.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:76e9c727a874b4856d11a32fb0b389afc61ce8aaf281ada613713ddeadd1cfec"}, + {file = "wrapt-1.15.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e20076a211cd6f9b44a6be58f7eeafa7ab5720eb796975d0c03f05b47d89eb90"}, + {file = "wrapt-1.15.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a74d56552ddbde46c246b5b89199cb3fd182f9c346c784e1a93e4dc3f5ec9975"}, + {file = "wrapt-1.15.0-cp310-cp310-win32.whl", hash = "sha256:26458da5653aa5b3d8dc8b24192f574a58984c749401f98fff994d41d3f08da1"}, + {file = "wrapt-1.15.0-cp310-cp310-win_amd64.whl", hash = "sha256:75760a47c06b5974aa5e01949bf7e66d2af4d08cb8c1d6516af5e39595397f5e"}, + {file = "wrapt-1.15.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ba1711cda2d30634a7e452fc79eabcadaffedf241ff206db2ee93dd2c89a60e7"}, + {file = "wrapt-1.15.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:56374914b132c702aa9aa9959c550004b8847148f95e1b824772d453ac204a72"}, + {file = "wrapt-1.15.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a89ce3fd220ff144bd9d54da333ec0de0399b52c9ac3d2ce34b569cf1a5748fb"}, + {file = "wrapt-1.15.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3bbe623731d03b186b3d6b0d6f51865bf598587c38d6f7b0be2e27414f7f214e"}, + {file = "wrapt-1.15.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3abbe948c3cbde2689370a262a8d04e32ec2dd4f27103669a45c6929bcdbfe7c"}, + {file = "wrapt-1.15.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b67b819628e3b748fd3c2192c15fb951f549d0f47c0449af0764d7647302fda3"}, + {file = "wrapt-1.15.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:7eebcdbe3677e58dd4c0e03b4f2cfa346ed4049687d839adad68cc38bb559c92"}, + {file = "wrapt-1.15.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:74934ebd71950e3db69960a7da29204f89624dde411afbfb3b4858c1409b1e98"}, + {file = "wrapt-1.15.0-cp311-cp311-win32.whl", hash = "sha256:bd84395aab8e4d36263cd1b9308cd504f6cf713b7d6d3ce25ea55670baec5416"}, + {file = "wrapt-1.15.0-cp311-cp311-win_amd64.whl", hash = "sha256:a487f72a25904e2b4bbc0817ce7a8de94363bd7e79890510174da9d901c38705"}, + {file = "wrapt-1.15.0-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:4ff0d20f2e670800d3ed2b220d40984162089a6e2c9646fdb09b85e6f9a8fc29"}, + {file = "wrapt-1.15.0-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:9ed6aa0726b9b60911f4aed8ec5b8dd7bf3491476015819f56473ffaef8959bd"}, + {file = "wrapt-1.15.0-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:896689fddba4f23ef7c718279e42f8834041a21342d95e56922e1c10c0cc7afb"}, + {file = "wrapt-1.15.0-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:75669d77bb2c071333417617a235324a1618dba66f82a750362eccbe5b61d248"}, + {file = "wrapt-1.15.0-cp35-cp35m-win32.whl", hash = "sha256:fbec11614dba0424ca72f4e8ba3c420dba07b4a7c206c8c8e4e73f2e98f4c559"}, + {file = "wrapt-1.15.0-cp35-cp35m-win_amd64.whl", hash = "sha256:fd69666217b62fa5d7c6aa88e507493a34dec4fa20c5bd925e4bc12fce586639"}, + {file = "wrapt-1.15.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:b0724f05c396b0a4c36a3226c31648385deb6a65d8992644c12a4963c70326ba"}, + {file = "wrapt-1.15.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bbeccb1aa40ab88cd29e6c7d8585582c99548f55f9b2581dfc5ba68c59a85752"}, + {file = "wrapt-1.15.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:38adf7198f8f154502883242f9fe7333ab05a5b02de7d83aa2d88ea621f13364"}, + {file = "wrapt-1.15.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:578383d740457fa790fdf85e6d346fda1416a40549fe8db08e5e9bd281c6a475"}, + {file = "wrapt-1.15.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:a4cbb9ff5795cd66f0066bdf5947f170f5d63a9274f99bdbca02fd973adcf2a8"}, + {file = "wrapt-1.15.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:af5bd9ccb188f6a5fdda9f1f09d9f4c86cc8a539bd48a0bfdc97723970348418"}, + {file = "wrapt-1.15.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:b56d5519e470d3f2fe4aa7585f0632b060d532d0696c5bdfb5e8319e1d0f69a2"}, + {file = "wrapt-1.15.0-cp36-cp36m-win32.whl", hash = "sha256:77d4c1b881076c3ba173484dfa53d3582c1c8ff1f914c6461ab70c8428b796c1"}, + {file = "wrapt-1.15.0-cp36-cp36m-win_amd64.whl", hash = "sha256:077ff0d1f9d9e4ce6476c1a924a3332452c1406e59d90a2cf24aeb29eeac9420"}, + {file = "wrapt-1.15.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5c5aa28df055697d7c37d2099a7bc09f559d5053c3349b1ad0c39000e611d317"}, + {file = "wrapt-1.15.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a8564f283394634a7a7054b7983e47dbf39c07712d7b177b37e03f2467a024e"}, + {file = "wrapt-1.15.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:780c82a41dc493b62fc5884fb1d3a3b81106642c5c5c78d6a0d4cbe96d62ba7e"}, + {file = "wrapt-1.15.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e169e957c33576f47e21864cf3fc9ff47c223a4ebca8960079b8bd36cb014fd0"}, + {file = "wrapt-1.15.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b02f21c1e2074943312d03d243ac4388319f2456576b2c6023041c4d57cd7019"}, + {file = "wrapt-1.15.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f2e69b3ed24544b0d3dbe2c5c0ba5153ce50dcebb576fdc4696d52aa22db6034"}, + {file = "wrapt-1.15.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d787272ed958a05b2c86311d3a4135d3c2aeea4fc655705f074130aa57d71653"}, + {file = "wrapt-1.15.0-cp37-cp37m-win32.whl", hash = "sha256:02fce1852f755f44f95af51f69d22e45080102e9d00258053b79367d07af39c0"}, + {file = "wrapt-1.15.0-cp37-cp37m-win_amd64.whl", hash = "sha256:abd52a09d03adf9c763d706df707c343293d5d106aea53483e0ec8d9e310ad5e"}, + {file = "wrapt-1.15.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cdb4f085756c96a3af04e6eca7f08b1345e94b53af8921b25c72f096e704e145"}, + {file = "wrapt-1.15.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:230ae493696a371f1dbffaad3dafbb742a4d27a0afd2b1aecebe52b740167e7f"}, + {file = "wrapt-1.15.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63424c681923b9f3bfbc5e3205aafe790904053d42ddcc08542181a30a7a51bd"}, + {file = "wrapt-1.15.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6bcbfc99f55655c3d93feb7ef3800bd5bbe963a755687cbf1f490a71fb7794b"}, + {file = "wrapt-1.15.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c99f4309f5145b93eca6e35ac1a988f0dc0a7ccf9ccdcd78d3c0adf57224e62f"}, + {file = "wrapt-1.15.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b130fe77361d6771ecf5a219d8e0817d61b236b7d8b37cc045172e574ed219e6"}, + {file = "wrapt-1.15.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:96177eb5645b1c6985f5c11d03fc2dbda9ad24ec0f3a46dcce91445747e15094"}, + {file = "wrapt-1.15.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d5fe3e099cf07d0fb5a1e23d399e5d4d1ca3e6dfcbe5c8570ccff3e9208274f7"}, + {file = "wrapt-1.15.0-cp38-cp38-win32.whl", hash = "sha256:abd8f36c99512755b8456047b7be10372fca271bf1467a1caa88db991e7c421b"}, + {file = "wrapt-1.15.0-cp38-cp38-win_amd64.whl", hash = "sha256:b06fa97478a5f478fb05e1980980a7cdf2712015493b44d0c87606c1513ed5b1"}, + {file = "wrapt-1.15.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2e51de54d4fb8fb50d6ee8327f9828306a959ae394d3e01a1ba8b2f937747d86"}, + {file = "wrapt-1.15.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0970ddb69bba00670e58955f8019bec4a42d1785db3faa043c33d81de2bf843c"}, + {file = "wrapt-1.15.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76407ab327158c510f44ded207e2f76b657303e17cb7a572ffe2f5a8a48aa04d"}, + {file = "wrapt-1.15.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cd525e0e52a5ff16653a3fc9e3dd827981917d34996600bbc34c05d048ca35cc"}, + {file = "wrapt-1.15.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d37ac69edc5614b90516807de32d08cb8e7b12260a285ee330955604ed9dd29"}, + {file = "wrapt-1.15.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:078e2a1a86544e644a68422f881c48b84fef6d18f8c7a957ffd3f2e0a74a0d4a"}, + {file = "wrapt-1.15.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:2cf56d0e237280baed46f0b5316661da892565ff58309d4d2ed7dba763d984b8"}, + {file = "wrapt-1.15.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7dc0713bf81287a00516ef43137273b23ee414fe41a3c14be10dd95ed98a2df9"}, + {file = "wrapt-1.15.0-cp39-cp39-win32.whl", hash = "sha256:46ed616d5fb42f98630ed70c3529541408166c22cdfd4540b88d5f21006b0eff"}, + {file = "wrapt-1.15.0-cp39-cp39-win_amd64.whl", hash = "sha256:eef4d64c650f33347c1f9266fa5ae001440b232ad9b98f1f43dfe7a79435c0a6"}, + {file = "wrapt-1.15.0-py3-none-any.whl", hash = "sha256:64b1df0f83706b4ef4cfb4fb0e4c2669100fd7ecacfb59e091fad300d4e04640"}, + {file = "wrapt-1.15.0.tar.gz", hash = "sha256:d06730c6aed78cee4126234cf2d071e01b44b915e725a6cb439a879ec9754a3a"}, +] + +[[package]] +name = "xxhash" +version = "3.4.1" +description = "Python binding for xxHash" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "xxhash-3.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:91dbfa55346ad3e18e738742236554531a621042e419b70ad8f3c1d9c7a16e7f"}, + {file = "xxhash-3.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:665a65c2a48a72068fcc4d21721510df5f51f1142541c890491afc80451636d2"}, + {file = "xxhash-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb11628470a6004dc71a09fe90c2f459ff03d611376c1debeec2d648f44cb693"}, + {file = "xxhash-3.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5bef2a7dc7b4f4beb45a1edbba9b9194c60a43a89598a87f1a0226d183764189"}, + {file = "xxhash-3.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c0f7b2d547d72c7eda7aa817acf8791f0146b12b9eba1d4432c531fb0352228"}, + {file = "xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00f2fdef6b41c9db3d2fc0e7f94cb3db86693e5c45d6de09625caad9a469635b"}, + {file = "xxhash-3.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:23cfd9ca09acaf07a43e5a695143d9a21bf00f5b49b15c07d5388cadf1f9ce11"}, + {file = "xxhash-3.4.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6a9ff50a3cf88355ca4731682c168049af1ca222d1d2925ef7119c1a78e95b3b"}, + {file = "xxhash-3.4.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:f1d7c69a1e9ca5faa75546fdd267f214f63f52f12692f9b3a2f6467c9e67d5e7"}, + {file = "xxhash-3.4.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:672b273040d5d5a6864a36287f3514efcd1d4b1b6a7480f294c4b1d1ee1b8de0"}, + {file = "xxhash-3.4.1-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:4178f78d70e88f1c4a89ff1ffe9f43147185930bb962ee3979dba15f2b1cc799"}, + {file = "xxhash-3.4.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9804b9eb254d4b8cc83ab5a2002128f7d631dd427aa873c8727dba7f1f0d1c2b"}, + {file = "xxhash-3.4.1-cp310-cp310-win32.whl", hash = "sha256:c09c49473212d9c87261d22c74370457cfff5db2ddfc7fd1e35c80c31a8c14ce"}, + {file = "xxhash-3.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:ebbb1616435b4a194ce3466d7247df23499475c7ed4eb2681a1fa42ff766aff6"}, + {file = "xxhash-3.4.1-cp310-cp310-win_arm64.whl", hash = "sha256:25dc66be3db54f8a2d136f695b00cfe88018e59ccff0f3b8f545869f376a8a46"}, + {file = "xxhash-3.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:58c49083801885273e262c0f5bbeac23e520564b8357fbb18fb94ff09d3d3ea5"}, + {file = "xxhash-3.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b526015a973bfbe81e804a586b703f163861da36d186627e27524f5427b0d520"}, + {file = "xxhash-3.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36ad4457644c91a966f6fe137d7467636bdc51a6ce10a1d04f365c70d6a16d7e"}, + {file = "xxhash-3.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:248d3e83d119770f96003271fe41e049dd4ae52da2feb8f832b7a20e791d2920"}, + {file = "xxhash-3.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2070b6d5bbef5ee031666cf21d4953c16e92c2f8a24a94b5c240f8995ba3b1d0"}, + {file = "xxhash-3.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2746035f518f0410915e247877f7df43ef3372bf36cfa52cc4bc33e85242641"}, + {file = "xxhash-3.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a8ba6181514681c2591840d5632fcf7356ab287d4aff1c8dea20f3c78097088"}, + {file = "xxhash-3.4.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0aac5010869240e95f740de43cd6a05eae180c59edd182ad93bf12ee289484fa"}, + {file = "xxhash-3.4.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4cb11d8debab1626181633d184b2372aaa09825bde709bf927704ed72765bed1"}, + {file = "xxhash-3.4.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:b29728cff2c12f3d9f1d940528ee83918d803c0567866e062683f300d1d2eff3"}, + {file = "xxhash-3.4.1-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:a15cbf3a9c40672523bdb6ea97ff74b443406ba0ab9bca10ceccd9546414bd84"}, + {file = "xxhash-3.4.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6e66df260fed01ed8ea790c2913271641c58481e807790d9fca8bfd5a3c13844"}, + {file = "xxhash-3.4.1-cp311-cp311-win32.whl", hash = "sha256:e867f68a8f381ea12858e6d67378c05359d3a53a888913b5f7d35fbf68939d5f"}, + {file = "xxhash-3.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:200a5a3ad9c7c0c02ed1484a1d838b63edcf92ff538770ea07456a3732c577f4"}, + {file = "xxhash-3.4.1-cp311-cp311-win_arm64.whl", hash = "sha256:1d03f1c0d16d24ea032e99f61c552cb2b77d502e545187338bea461fde253583"}, + {file = "xxhash-3.4.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c4bbba9b182697a52bc0c9f8ec0ba1acb914b4937cd4a877ad78a3b3eeabefb3"}, + {file = "xxhash-3.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9fd28a9da300e64e434cfc96567a8387d9a96e824a9be1452a1e7248b7763b78"}, + {file = "xxhash-3.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6066d88c9329ab230e18998daec53d819daeee99d003955c8db6fc4971b45ca3"}, + {file = "xxhash-3.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:93805bc3233ad89abf51772f2ed3355097a5dc74e6080de19706fc447da99cd3"}, + {file = "xxhash-3.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:64da57d5ed586ebb2ecdde1e997fa37c27fe32fe61a656b77fabbc58e6fbff6e"}, + {file = "xxhash-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a97322e9a7440bf3c9805cbaac090358b43f650516486746f7fa482672593df"}, + {file = "xxhash-3.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bbe750d512982ee7d831838a5dee9e9848f3fb440e4734cca3f298228cc957a6"}, + {file = "xxhash-3.4.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:fd79d4087727daf4d5b8afe594b37d611ab95dc8e29fe1a7517320794837eb7d"}, + {file = "xxhash-3.4.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:743612da4071ff9aa4d055f3f111ae5247342931dedb955268954ef7201a71ff"}, + {file = "xxhash-3.4.1-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:b41edaf05734092f24f48c0958b3c6cbaaa5b7e024880692078c6b1f8247e2fc"}, + {file = "xxhash-3.4.1-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:a90356ead70d715fe64c30cd0969072de1860e56b78adf7c69d954b43e29d9fa"}, + {file = "xxhash-3.4.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ac56eebb364e44c85e1d9e9cc5f6031d78a34f0092fea7fc80478139369a8b4a"}, + {file = "xxhash-3.4.1-cp312-cp312-win32.whl", hash = "sha256:911035345932a153c427107397c1518f8ce456f93c618dd1c5b54ebb22e73747"}, + {file = "xxhash-3.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:f31ce76489f8601cc7b8713201ce94b4bd7b7ce90ba3353dccce7e9e1fee71fa"}, + {file = "xxhash-3.4.1-cp312-cp312-win_arm64.whl", hash = "sha256:b5beb1c6a72fdc7584102f42c4d9df232ee018ddf806e8c90906547dfb43b2da"}, + {file = "xxhash-3.4.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6d42b24d1496deb05dee5a24ed510b16de1d6c866c626c2beb11aebf3be278b9"}, + {file = "xxhash-3.4.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b685fab18876b14a8f94813fa2ca80cfb5ab6a85d31d5539b7cd749ce9e3624"}, + {file = "xxhash-3.4.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:419ffe34c17ae2df019a4685e8d3934d46b2e0bbe46221ab40b7e04ed9f11137"}, + {file = "xxhash-3.4.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0e041ce5714f95251a88670c114b748bca3bf80cc72400e9f23e6d0d59cf2681"}, + {file = "xxhash-3.4.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc860d887c5cb2f524899fb8338e1bb3d5789f75fac179101920d9afddef284b"}, + {file = "xxhash-3.4.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:312eba88ffe0a05e332e3a6f9788b73883752be63f8588a6dc1261a3eaaaf2b2"}, + {file = "xxhash-3.4.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:e01226b6b6a1ffe4e6bd6d08cfcb3ca708b16f02eb06dd44f3c6e53285f03e4f"}, + {file = "xxhash-3.4.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:9f3025a0d5d8cf406a9313cd0d5789c77433ba2004b1c75439b67678e5136537"}, + {file = "xxhash-3.4.1-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:6d3472fd4afef2a567d5f14411d94060099901cd8ce9788b22b8c6f13c606a93"}, + {file = "xxhash-3.4.1-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:43984c0a92f06cac434ad181f329a1445017c33807b7ae4f033878d860a4b0f2"}, + {file = "xxhash-3.4.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a55e0506fdb09640a82ec4f44171273eeabf6f371a4ec605633adb2837b5d9d5"}, + {file = "xxhash-3.4.1-cp37-cp37m-win32.whl", hash = "sha256:faec30437919555b039a8bdbaba49c013043e8f76c999670aef146d33e05b3a0"}, + {file = "xxhash-3.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:c9e1b646af61f1fc7083bb7b40536be944f1ac67ef5e360bca2d73430186971a"}, + {file = "xxhash-3.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:961d948b7b1c1b6c08484bbce3d489cdf153e4122c3dfb07c2039621243d8795"}, + {file = "xxhash-3.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:719a378930504ab159f7b8e20fa2aa1896cde050011af838af7e7e3518dd82de"}, + {file = "xxhash-3.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74fb5cb9406ccd7c4dd917f16630d2e5e8cbbb02fc2fca4e559b2a47a64f4940"}, + {file = "xxhash-3.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5dab508ac39e0ab988039bc7f962c6ad021acd81fd29145962b068df4148c476"}, + {file = "xxhash-3.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8c59f3e46e7daf4c589e8e853d700ef6607afa037bfad32c390175da28127e8c"}, + {file = "xxhash-3.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8cc07256eff0795e0f642df74ad096f8c5d23fe66bc138b83970b50fc7f7f6c5"}, + {file = "xxhash-3.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e9f749999ed80f3955a4af0eb18bb43993f04939350b07b8dd2f44edc98ffee9"}, + {file = "xxhash-3.4.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7688d7c02149a90a3d46d55b341ab7ad1b4a3f767be2357e211b4e893efbaaf6"}, + {file = "xxhash-3.4.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a8b4977963926f60b0d4f830941c864bed16aa151206c01ad5c531636da5708e"}, + {file = "xxhash-3.4.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:8106d88da330f6535a58a8195aa463ef5281a9aa23b04af1848ff715c4398fb4"}, + {file = "xxhash-3.4.1-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:4c76a77dbd169450b61c06fd2d5d436189fc8ab7c1571d39265d4822da16df22"}, + {file = "xxhash-3.4.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:11f11357c86d83e53719c592021fd524efa9cf024dc7cb1dfb57bbbd0d8713f2"}, + {file = "xxhash-3.4.1-cp38-cp38-win32.whl", hash = "sha256:0c786a6cd74e8765c6809892a0d45886e7c3dc54de4985b4a5eb8b630f3b8e3b"}, + {file = "xxhash-3.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:aabf37fb8fa27430d50507deeab2ee7b1bcce89910dd10657c38e71fee835594"}, + {file = "xxhash-3.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6127813abc1477f3a83529b6bbcfeddc23162cece76fa69aee8f6a8a97720562"}, + {file = "xxhash-3.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ef2e194262f5db16075caea7b3f7f49392242c688412f386d3c7b07c7733a70a"}, + {file = "xxhash-3.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71be94265b6c6590f0018bbf73759d21a41c6bda20409782d8117e76cd0dfa8b"}, + {file = "xxhash-3.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:10e0a619cdd1c0980e25eb04e30fe96cf8f4324758fa497080af9c21a6de573f"}, + {file = "xxhash-3.4.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fa122124d2e3bd36581dd78c0efa5f429f5220313479fb1072858188bc2d5ff1"}, + {file = "xxhash-3.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17032f5a4fea0a074717fe33477cb5ee723a5f428de7563e75af64bfc1b1e10"}, + {file = "xxhash-3.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca7783b20e3e4f3f52f093538895863f21d18598f9a48211ad757680c3bd006f"}, + {file = "xxhash-3.4.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d77d09a1113899fad5f354a1eb4f0a9afcf58cefff51082c8ad643ff890e30cf"}, + {file = "xxhash-3.4.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:21287bcdd299fdc3328cc0fbbdeaa46838a1c05391264e51ddb38a3f5b09611f"}, + {file = "xxhash-3.4.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:dfd7a6cc483e20b4ad90224aeb589e64ec0f31e5610ab9957ff4314270b2bf31"}, + {file = "xxhash-3.4.1-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:543c7fcbc02bbb4840ea9915134e14dc3dc15cbd5a30873a7a5bf66039db97ec"}, + {file = "xxhash-3.4.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:fe0a98d990e433013f41827b62be9ab43e3cf18e08b1483fcc343bda0d691182"}, + {file = "xxhash-3.4.1-cp39-cp39-win32.whl", hash = "sha256:b9097af00ebf429cc7c0e7d2fdf28384e4e2e91008130ccda8d5ae653db71e54"}, + {file = "xxhash-3.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:d699b921af0dcde50ab18be76c0d832f803034d80470703700cb7df0fbec2832"}, + {file = "xxhash-3.4.1-cp39-cp39-win_arm64.whl", hash = "sha256:2be491723405e15cc099ade1280133ccfbf6322d2ef568494fb7d07d280e7eee"}, + {file = "xxhash-3.4.1-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:431625fad7ab5649368c4849d2b49a83dc711b1f20e1f7f04955aab86cd307bc"}, + {file = "xxhash-3.4.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc6dbd5fc3c9886a9e041848508b7fb65fd82f94cc793253990f81617b61fe49"}, + {file = "xxhash-3.4.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3ff8dbd0ec97aec842476cb8ccc3e17dd288cd6ce3c8ef38bff83d6eb927817"}, + {file = "xxhash-3.4.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef73a53fe90558a4096e3256752268a8bdc0322f4692ed928b6cd7ce06ad4fe3"}, + {file = "xxhash-3.4.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:450401f42bbd274b519d3d8dcf3c57166913381a3d2664d6609004685039f9d3"}, + {file = "xxhash-3.4.1-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a162840cf4de8a7cd8720ff3b4417fbc10001eefdd2d21541a8226bb5556e3bb"}, + {file = "xxhash-3.4.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b736a2a2728ba45017cb67785e03125a79d246462dfa892d023b827007412c52"}, + {file = "xxhash-3.4.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d0ae4c2e7698adef58710d6e7a32ff518b66b98854b1c68e70eee504ad061d8"}, + {file = "xxhash-3.4.1-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6322c4291c3ff174dcd104fae41500e75dad12be6f3085d119c2c8a80956c51"}, + {file = "xxhash-3.4.1-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:dd59ed668801c3fae282f8f4edadf6dc7784db6d18139b584b6d9677ddde1b6b"}, + {file = "xxhash-3.4.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:92693c487e39523a80474b0394645b393f0ae781d8db3474ccdcead0559ccf45"}, + {file = "xxhash-3.4.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4603a0f642a1e8d7f3ba5c4c25509aca6a9c1cc16f85091004a7028607ead663"}, + {file = "xxhash-3.4.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fa45e8cbfbadb40a920fe9ca40c34b393e0b067082d94006f7f64e70c7490a6"}, + {file = "xxhash-3.4.1-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:595b252943b3552de491ff51e5bb79660f84f033977f88f6ca1605846637b7c6"}, + {file = "xxhash-3.4.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:562d8b8f783c6af969806aaacf95b6c7b776929ae26c0cd941d54644ea7ef51e"}, + {file = "xxhash-3.4.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:41ddeae47cf2828335d8d991f2d2b03b0bdc89289dc64349d712ff8ce59d0647"}, + {file = "xxhash-3.4.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c44d584afdf3c4dbb3277e32321d1a7b01d6071c1992524b6543025fb8f4206f"}, + {file = "xxhash-3.4.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd7bddb3a5b86213cc3f2c61500c16945a1b80ecd572f3078ddbbe68f9dabdfb"}, + {file = "xxhash-3.4.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9ecb6c987b62437c2f99c01e97caf8d25660bf541fe79a481d05732e5236719c"}, + {file = "xxhash-3.4.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:696b4e18b7023527d5c50ed0626ac0520edac45a50ec7cf3fc265cd08b1f4c03"}, + {file = "xxhash-3.4.1.tar.gz", hash = "sha256:0379d6cf1ff987cd421609a264ce025e74f346e3e145dd106c0cc2e3ec3f99a9"}, +] + [[package]] name = "yarl" version = "1.9.2" @@ -1208,5 +2493,5 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" -python-versions = "<3.13,>=3.9" -content-hash = "9c19a9cd0487a85fa947ec3f53e765b47a03b2a1c6ae1a46de95b25a893690b2" +python-versions = "<3.12,>=3.9" +content-hash = "72cadac0b6c167e5b890c7062e7f163e4976b29a0083b6109c1c3a8f5bb02d25" diff --git a/pyproject.toml b/pyproject.toml index 08ab6363..d355dd21 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ readme = "README.md" memgpt = "memgpt.main:app" [tool.poetry.dependencies] -python = "<3.13,>=3.9" +python = "<3.12,>=3.9" typer = {extras = ["all"], version = "^0.9.0"} questionary = "^2.0.1" demjson3 = "^3.0.6" @@ -31,6 +31,10 @@ pymupdf = "^1.23.5" tqdm = "^4.66.1" openai = "^0.28.1" black = "^23.10.1" +pytest = "^7.4.3" +llama-index = "^0.8.53.post3" +setuptools = "^68.2.2" +datasets = "^2.14.6" [build-system] diff --git a/tests/test_load_archival.py b/tests/test_load_archival.py index 95bec5ce..6803e7d0 100644 --- a/tests/test_load_archival.py +++ b/tests/test_load_archival.py @@ -105,7 +105,3 @@ def test_load_database(): ) print("Successfully loaded into index") assert True - - -# test_load_directory() -test_load_database()