Files
letta-server/memgpt/migrate.py

462 lines
19 KiB
Python

import configparser
import datetime
import os
import pickle
import glob
import sys
import traceback
import uuid
import json
import shutil
import typer
from tqdm import tqdm
import questionary
from llama_index import (
StorageContext,
load_index_from_storage,
)
from memgpt.agent import Agent
from memgpt.data_types import AgentState, User, Passage, Source
from memgpt.metadata import MetadataStore
from memgpt.utils import MEMGPT_DIR, version_less_than, OpenAIBackcompatUnpickler, annotate_message_json_list_with_tool_calls
from memgpt.config import MemGPTConfig
from memgpt.cli.cli_config import configure
from memgpt.agent_store.storage import StorageConnector, TableType
# This is the version where the breaking change was made
VERSION_CUTOFF = "0.2.12"
# Migration backup dir (where we'll dump old agents that we successfully migrated)
MIGRATION_BACKUP_FOLDER = "migration_backups"
def wipe_config_and_reconfigure(run_configure=True):
"""Wipe (backup) the config file, and launch `memgpt configure`"""
if not os.path.exists(os.path.join(MEMGPT_DIR, MIGRATION_BACKUP_FOLDER)):
os.makedirs(os.path.join(MEMGPT_DIR, MIGRATION_BACKUP_FOLDER))
os.makedirs(os.path.join(MEMGPT_DIR, MIGRATION_BACKUP_FOLDER, "agents"))
# Get the current timestamp in a readable format (e.g., YYYYMMDD_HHMMSS)
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
# Construct the new backup directory name with the timestamp
backup_filename = os.path.join(MEMGPT_DIR, MIGRATION_BACKUP_FOLDER, f"config_backup_{timestamp}")
existing_filename = os.path.join(MEMGPT_DIR, "config")
# Check if the existing file exists before moving
if os.path.exists(existing_filename):
# shutil should work cross-platform
shutil.move(existing_filename, backup_filename)
typer.secho(f"Deleted config file ({existing_filename}) and saved as backup ({backup_filename})", fg=typer.colors.GREEN)
else:
typer.secho(f"Couldn't find an existing config file to delete", fg=typer.colors.RED)
if run_configure:
# Either run configure
configure()
else:
# Or create a new config with defaults
MemGPTConfig.load()
def config_is_compatible(allow_empty=False, echo=False) -> bool:
"""Check if the config is OK to use with 0.2.12, or if it needs to be deleted"""
# NOTE: don't use built-in load(), since that will apply defaults
# memgpt_config = MemGPTConfig.load()
memgpt_config_file = os.path.join(MEMGPT_DIR, "config")
if not os.path.exists(memgpt_config_file):
return True if allow_empty else False
parser = configparser.ConfigParser()
parser.read(memgpt_config_file)
if "version" in parser and "memgpt_version" in parser["version"]:
version = parser["version"]["memgpt_version"]
else:
version = None
if version is None:
if echo:
typer.secho(f"Current config version is missing", fg=typer.colors.RED)
return False
elif version_less_than(version, VERSION_CUTOFF):
if echo:
typer.secho(f"Current config version ({version}) is older than migration cutoff ({VERSION_CUTOFF})", fg=typer.colors.RED)
return False
else:
if echo:
typer.secho(f"Current config version {version} is compatible!", fg=typer.colors.GREEN)
return True
def agent_is_migrateable(agent_name: str) -> bool:
"""Determine whether or not the agent folder is a migration target"""
agent_folder = os.path.join(MEMGPT_DIR, "agents", agent_name)
if not os.path.exists(agent_folder):
raise ValueError(f"Folder {agent_folder} does not exist")
agent_config_file = os.path.join(agent_folder, "config.json")
if not os.path.exists(agent_config_file):
raise ValueError(f"Agent folder {agent_folder} does not have a config file")
try:
with open(agent_config_file, "r") as fh:
agent_config = json.load(fh)
except Exception as e:
raise ValueError(f"Failed to load agent config file ({agent_config_file}), error = {e}")
if not hasattr(agent_config, "memgpt_version") or version_less_than(agent_config.memgpt_version, VERSION_CUTOFF):
return True
else:
return False
def migrate_source(source_name: str):
"""
Migrate an old source folder (`~/.memgpt/sources/{source_name}`).
"""
# 1. Load the VectorIndex from ~/.memgpt/sources/{source_name}/index
# TODO
source_path = os.path.join(MEMGPT_DIR, "archival", source_name, "nodes.pkl")
assert os.path.exists(source_path), f"Source {source_name} does not exist at {source_path}"
# load state from old checkpoint file
from memgpt.cli.cli_load import load_index
# 2. Create a new AgentState using the agent config + agent internal state
config = MemGPTConfig.load()
# gets default user
ms = MetadataStore(config)
user_id = uuid.UUID(config.anon_clientid)
user = ms.get_user(user_id=user_id)
if user is None:
raise ValueError(
f"Failed to load user {str(user_id)} from database. Please make sure to migrate your config before migrating agents."
)
# insert source into metadata store
source = Source(user_id=user.id, name=source_name)
ms.create_source(source)
try:
nodes = pickle.load(open(source_path, "rb"))
passages = []
for node in nodes:
# print(len(node.embedding))
# TODO: make sure embedding config matches embedding size?
passages.append(Passage(user_id=user.id, data_source=source_name, text=node.text, embedding=node.embedding))
assert len(passages) > 0, f"Source {source_name} has no passages"
conn = StorageConnector.get_storage_connector(TableType.PASSAGES, config=config, user_id=user_id)
conn.insert_many(passages)
print(f"Inserted {len(passages)} to {source_name}")
except Exception as e:
# delete from metadata store
ms.delete_source(source.id)
raise ValueError(f"Failed to migrate {source_name}: {str(e)}")
# basic checks
source = ms.get_source(user_id=user.id, source_name=source_name)
assert source is not None, f"Failed to load source {source_name} from database after migration"
def migrate_agent(agent_name: str):
"""Migrate an old agent folder (`~/.memgpt/agents/{agent_name}`)
Steps:
1. Load the agent state JSON from the old folder
2. Create a new AgentState using the agent config + agent internal state
3. Instantiate a new Agent by passing AgentState to Agent.__init__
(This will automatically run into a new database)
"""
# 1. Load the agent state JSON from the old folder
# TODO
agent_folder = os.path.join(MEMGPT_DIR, "agents", agent_name)
# migration_file = os.path.join(agent_folder, MIGRATION_FILE_NAME)
# load state from old checkpoint file
agent_ckpt_directory = os.path.join(agent_folder, "agent_state")
json_files = glob.glob(os.path.join(agent_ckpt_directory, "*.json")) # This will list all .json files in the current directory.
if not json_files:
raise ValueError(f"Cannot load {agent_name} - no saved checkpoints found in {agent_ckpt_directory}")
# NOTE this is a soft fail, just allow it to pass
# return
# Sort files based on modified timestamp, with the latest file being the first.
state_filename = max(json_files, key=os.path.getmtime)
state_dict = json.load(open(state_filename, "r"))
# print(state_dict.keys())
# print(state_dict["memory"])
# dict_keys(['model', 'system', 'functions', 'messages', 'messages_total', 'memory'])
# load old data from the persistence manager
persistence_filename = os.path.basename(state_filename).replace(".json", ".persistence.pickle")
persistence_filename = os.path.join(agent_folder, "persistence_manager", persistence_filename)
archival_filename = os.path.join(agent_folder, "persistence_manager", "index", "nodes.pkl")
if not os.path.exists(persistence_filename):
raise ValueError(f"Cannot load {agent_name} - no saved persistence pickle found at {persistence_filename}")
try:
with open(persistence_filename, "rb") as f:
data = pickle.load(f)
except ModuleNotFoundError as e:
# Patch for stripped openai package
# ModuleNotFoundError: No module named 'openai.openai_object'
with open(persistence_filename, "rb") as f:
unpickler = OpenAIBackcompatUnpickler(f)
data = unpickler.load()
from memgpt.openai_backcompat.openai_object import OpenAIObject
def convert_openai_objects_to_dict(obj):
if isinstance(obj, OpenAIObject):
# Convert to dict or handle as needed
# print(f"detected OpenAIObject on {obj}")
return obj.to_dict_recursive()
elif isinstance(obj, dict):
return {k: convert_openai_objects_to_dict(v) for k, v in obj.items()}
elif isinstance(obj, list):
return [convert_openai_objects_to_dict(v) for v in obj]
else:
return obj
data = convert_openai_objects_to_dict(data)
# data will contain:
# print("data.keys()", data.keys())
# manager.all_messages = data["all_messages"]
# manager.messages = data["messages"]
# manager.recall_memory = data["recall_memory"]
agent_config_filename = os.path.join(agent_folder, "config.json")
with open(agent_config_filename, "r") as fh:
agent_config = json.load(fh)
# 2. Create a new AgentState using the agent config + agent internal state
config = MemGPTConfig.load()
# gets default user
ms = MetadataStore(config)
user_id = uuid.UUID(config.anon_clientid)
user = ms.get_user(user_id=user_id)
if user is None:
raise ValueError(
f"Failed to load user {str(user_id)} from database. Please make sure to migrate your config before migrating agents."
)
# ms.create_user(User(id=user_id))
# user = ms.get_user(user_id=user_id)
# if user is None:
# typer.secho(f"Failed to create default user in database.", fg=typer.colors.RED)
# sys.exit(1)
agent_state = AgentState(
name=agent_config["name"],
user_id=user.id,
persona=agent_config["persona"], # eg 'sam_pov'
human=agent_config["human"], # eg 'basic'
preset=agent_config["preset"], # eg 'memgpt_chat'
state=dict(
human=state_dict["memory"]["human"],
persona=state_dict["memory"]["persona"],
system=state_dict["system"],
functions=state_dict["functions"], # this shouldn't matter, since Agent.__init__ will re-link
messages=annotate_message_json_list_with_tool_calls(state_dict["messages"]),
),
llm_config=user.default_llm_config,
embedding_config=user.default_embedding_config,
)
# 3. Instantiate a new Agent by passing AgentState to Agent.__init__
# NOTE: the Agent.__init__ will trigger a save, which will write to the DB
try:
agent = Agent(
agent_state=agent_state,
messages_total=state_dict["messages_total"], # TODO: do we need this?
interface=None,
)
except Exception as e:
# if "Agent with name" in str(e):
# print(e)
# return
# elif "was specified in agent.state.functions":
# print(e)
# return
# else:
# raise
raise
# Wrap the rest in a try-except so that we can cleanup by deleting the agent if we fail
try:
## 4. Insert into recall
# TODO should this be 'messages', or 'all_messages'?
# all_messages in recall will have fields "timestamp" and "message"
full_message_history_buffer = annotate_message_json_list_with_tool_calls([d["message"] for d in data["all_messages"]])
# We want to keep the timestamp
for i in range(len(data["all_messages"])):
data["all_messages"][i]["message"] = full_message_history_buffer[i]
messages_to_insert = [agent.persistence_manager.json_to_message(msg) for msg in data["all_messages"]]
agent.persistence_manager.recall_memory.insert_many(messages_to_insert)
# print("Finished migrating recall memory")
# TODO should we also assign data["messages"] to RecallMemory.messages?
# 5. Insert into archival
if os.path.exists(archival_filename):
nodes = pickle.load(open(archival_filename, "rb"))
passages = []
for node in nodes:
# print(len(node.embedding))
# TODO: make sure embeding size matches embedding config?
passages.append(Passage(user_id=user.id, agent_id=agent_state.id, text=node.text, embedding=node.embedding))
if len(passages) > 0:
agent.persistence_manager.archival_memory.storage.insert_many(passages)
print(f"Inserted {len(passages)} passages into archival memory")
else:
print("No archival memory found at", archival_filename)
except:
ms.delete_agent(agent_state.id)
raise
try:
new_agent_folder = os.path.join(MEMGPT_DIR, MIGRATION_BACKUP_FOLDER, "agents", agent_name)
shutil.move(agent_folder, new_agent_folder)
except Exception as e:
print(f"Failed to move agent folder from {agent_folder} to {new_agent_folder}")
raise
# def migrate_all_agents(stop_on_fail=True):
def migrate_all_agents(stop_on_fail: bool = False) -> dict:
"""Scan over all agent folders in MEMGPT_DIR and migrate each agent."""
if not os.path.exists(os.path.join(MEMGPT_DIR, MIGRATION_BACKUP_FOLDER)):
os.makedirs(os.path.join(MEMGPT_DIR, MIGRATION_BACKUP_FOLDER))
os.makedirs(os.path.join(MEMGPT_DIR, MIGRATION_BACKUP_FOLDER, "agents"))
if not config_is_compatible(echo=True):
typer.secho(f"Your current config file is incompatible with MemGPT versions >= {VERSION_CUTOFF}", fg=typer.colors.RED)
if questionary.confirm(
"To migrate old MemGPT agents, you must delete your config file and run `memgpt configure`. Would you like to proceed?"
).ask():
try:
wipe_config_and_reconfigure()
except Exception as e:
typer.secho(f"Fresh config generation failed - error:\n{e}", fg=typer.colors.RED)
raise
else:
typer.secho("Migration cancelled (to migrate old agents, run `memgpt migrate`)", fg=typer.colors.RED)
raise KeyboardInterrupt()
agents_dir = os.path.join(MEMGPT_DIR, "agents")
# Ensure the directory exists
if not os.path.exists(agents_dir):
raise ValueError(f"Directory {agents_dir} does not exist.")
# Get a list of all folders in agents_dir
agent_folders = [f for f in os.listdir(agents_dir) if os.path.isdir(os.path.join(agents_dir, f))]
# Iterate over each folder with a tqdm progress bar
count = 0
failures = []
candidates = []
try:
for agent_name in tqdm(agent_folders, desc="Migrating agents"):
# Assuming migrate_agent is a function that takes the agent name and performs migration
try:
if agent_is_migrateable(agent_name=agent_name):
candidates.append(agent_name)
migrate_agent(agent_name)
count += 1
else:
continue
except Exception as e:
failures.append({"name": agent_name, "reason": str(e)})
# typer.secho(f"Migrating {agent_name} failed with: {str(e)}", fg=typer.colors.RED)
traceback.print_exc()
if stop_on_fail:
raise
except KeyboardInterrupt:
typer.secho(f"User cancelled operation", fg=typer.colors.RED)
if len(candidates) == 0:
typer.secho(f"No migration candidates found ({len(agent_folders)} agent folders total)", fg=typer.colors.GREEN)
else:
typer.secho(f"Inspected {len(agent_folders)} agent folders")
if len(failures) > 0:
typer.secho(f"Failed migrations:", fg=typer.colors.RED)
for fail in failures:
typer.secho(f"{fail['name']}: {fail['reason']}", fg=typer.colors.RED)
typer.secho(f"{len(failures)}/{len(candidates)} migration targets failed (see reasons above)", fg=typer.colors.RED)
if count > 0:
typer.secho(f"{count}/{len(candidates)} agents were successfully migrated to the new database format", fg=typer.colors.GREEN)
return {
"agent_folders": len(agent_folders),
"migration_candidates": len(candidates),
"successful_migrations": count,
"failed_migrations": len(failures),
}
def migrate_all_sources(stop_on_fail: bool = False) -> dict:
"""Scan over all agent folders in MEMGPT_DIR and migrate each agent."""
sources_dir = os.path.join(MEMGPT_DIR, "archival")
# Ensure the directory exists
if not os.path.exists(sources_dir):
raise ValueError(f"Directory {sources_dir} does not exist.")
# Get a list of all folders in agents_dir
source_folders = [f for f in os.listdir(sources_dir) if os.path.isdir(os.path.join(sources_dir, f))]
# Iterate over each folder with a tqdm progress bar
count = 0
failures = []
candidates = []
try:
for source_name in tqdm(source_folders, desc="Migrating data sources"):
# Assuming migrate_agent is a function that takes the agent name and performs migration
try:
candidates.append(source_name)
migrate_source(source_name)
count += 1
except Exception as e:
failures.append({"name": source_name, "reason": str(e)})
traceback.print_exc()
if stop_on_fail:
raise
# typer.secho(f"Migrating {agent_name} failed with: {str(e)}", fg=typer.colors.RED)
except KeyboardInterrupt:
typer.secho(f"User cancelled operation", fg=typer.colors.RED)
if len(candidates) == 0:
typer.secho(f"No migration candidates found ({len(source_folders)} source folders total)", fg=typer.colors.GREEN)
else:
typer.secho(f"Inspected {len(source_folders)} source folders")
if len(failures) > 0:
typer.secho(f"Failed migrations:", fg=typer.colors.RED)
for fail in failures:
typer.secho(f"{fail['name']}: {fail['reason']}", fg=typer.colors.RED)
typer.secho(f"{len(failures)}/{len(candidates)} migration targets failed (see reasons above)", fg=typer.colors.RED)
if count > 0:
typer.secho(f"{count}/{len(candidates)} sources were successfully migrated to the new database format", fg=typer.colors.GREEN)
return {
"source_folders": len(source_folders),
"migration_candidates": len(candidates),
"successful_migrations": count,
"failed_migrations": len(failures),
}