diff --git a/.composio.lock b/.composio.lock
deleted file mode 100644
index 0967ef42..00000000
--- a/.composio.lock
+++ /dev/null
@@ -1 +0,0 @@
-{}
diff --git a/README.md b/README.md
index 37f58e9b..0d7b8cc4 100644
--- a/README.md
+++ b/README.md
@@ -16,9 +16,12 @@ Letta is the platform for building stateful agents: open AI with advanced memory
 * [**Letta Desktop**](https://docs.letta.com/guides/ade/desktop): A fully-local version of the ADE, available on MacOS and Windows
 * [**Letta Cloud**](https://app.letta.com/): The fastest way to try Letta, with agents running in the cloud
 
+
 ## Get started
 
-To get started, install the Letta SDK (available for both Python and TypeScript):
+### [One-Shot ✨ Vibecoding ⚡️ Prompts](https://github.com/letta-ai/letta/blob/main/fern/pages/getting-started/prompts.mdx)
+
+Or install the Letta SDK (available for both Python and TypeScript):
 
 ### [Python SDK](https://github.com/letta-ai/letta-python)
 ```sh
diff --git a/main.py b/main.py
deleted file mode 100644
index 2c597e20..00000000
--- a/main.py
+++ /dev/null
@@ -1,6 +0,0 @@
-import typer
-
-typer.secho(
-    "Command `python main.py` no longer supported. Please run `letta run`. See https://docs.letta.com for more info.",
-    fg=typer.colors.YELLOW,
-)
diff --git a/paper_experiments/README.md b/paper_experiments/README.md
deleted file mode 100644
index db5aa49a..00000000
--- a/paper_experiments/README.md
+++ /dev/null
@@ -1,47 +0,0 @@
-
-## Nested K/V (`nested_kv_task`)
-This task runs K/V lookups on synthetic data. You can run it with `icml_experiments/nested_kv_task/run.sh`.
-
-## Document Q/A (`doc_qa_task`)
-This task runs question answering on a set of embedded wikipedia passages.
-
-### Setup
-You need a a running postgres database to run this experiment and an OpenAI account. Set your enviornment variables:
-```
-export PGVECTOR_TEST_DB_URL=postgresql+pg8000://{username}:{password}@localhost:8888/{db}
-export OPENAI_API_KEY={key}
-```
-
-## Download data
-Download the wikipedia embedding at:
-```
-huggingface-cli download nlpkevinl/wikipedia_openai_embeddings --repo-type dataset
-```
-
-## Loading embeddings
-Run the script `./0_load_embeddings.sh`.
-
-This step will take a while. You can check the status of the loading by connecting to `psql`:
-```
-> psql -h localhost -p {password} -U {username} -d {db}
-> SELECT COUNT(*) from letta_passages;
-```
-Once completed, there will be ~19 million rows in the database.
-
-### Creating an index
-To avoid extremeley slow queries, you need to create an index:
-```
-CREATE INDEX ON letta_passages USING hnsw (embedding vector_l2_ops);
-```
-You can check to see if the index was created successfully with:
-```
-> SELECT indexname, indexdef FROM pg_indexes WHERE tablename = 'letta_passages';
-
-letta_passages_embedding_idx | CREATE INDEX letta_passages_embedding_idx ON public.letta_passages USING hnsw (embedding vector_cosine_ops) WITH (m='24', ef_construction='100')
-```
-
-## Running Document Q/A
-Run the script `./1_run_docqa.sh {model_name} {n_docs} {letta/model_name}`.
-
-## Evaluation
-Run the script `./2_run_eval.sh`.
diff --git a/paper_experiments/doc_qa_task/0_load_embeddings.sh b/paper_experiments/doc_qa_task/0_load_embeddings.sh
deleted file mode 100644
index bb91f53c..00000000
--- a/paper_experiments/doc_qa_task/0_load_embeddings.sh
+++ /dev/null
@@ -1,17 +0,0 @@
-python load_wikipedia_embeddings.py --file data/wikipedia_passages_shard_1-06.jsonl
-python load_wikipedia_embeddings.py --file data/wikipedia_passages_shard_1-07.jsonl
-python load_wikipedia_embeddings.py --file data/wikipedia_passages_shard_1-08.jsonl
-python load_wikipedia_embeddings.py --file data/wikipedia_passages_shard_1-09.jsonl
-python load_wikipedia_embeddings.py --file data/wikipedia_passages_shard_2-01.jsonl
-python load_wikipedia_embeddings.py --file data/wikipedia_passages_shard_2-02.jsonl
-python load_wikipedia_embeddings.py --file data/wikipedia_passages_shard_2-03.jsonl
-python load_wikipedia_embeddings.py --file data/wikipedia_passages_shard_2-04.jsonl
-python load_wikipedia_embeddings.py --file data/wikipedia_passages_shard_2-05.jsonl
-python load_wikipedia_embeddings.py --file data/wikipedia_passages_shard_2-06.jsonl
-python load_wikipedia_embeddings.py --file data/wikipedia_passages_shard_2-07.jsonl
-python load_wikipedia_embeddings.py --file data/wikipedia_passages_shard_2-08.jsonl
-python load_wikipedia_embeddings.py --file data/wikipedia_passages_shard_1-01.jsonl
-python load_wikipedia_embeddings.py --file data/wikipedia_passages_shard_1-02.jsonl
-python load_wikipedia_embeddings.py --file data/wikipedia_passages_shard_1-03.jsonl
-python load_wikipedia_embeddings.py --file data/wikipedia_passages_shard_1-04.jsonl
-python load_wikipedia_embeddings.py --file data/wikipedia_passages_shard_1-05.jsonl
diff --git a/paper_experiments/doc_qa_task/1_run_docqa.sh b/paper_experiments/doc_qa_task/1_run_docqa.sh
deleted file mode 100644
index 15072eb2..00000000
--- a/paper_experiments/doc_qa_task/1_run_docqa.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-docs=$2
-model=$1
-baseline=$3
-python icml_experiments/doc_qa_task/doc_qa.py --model $model --baseline $baseline --num_docs $docs
diff --git a/paper_experiments/doc_qa_task/2_run_eval.sh b/paper_experiments/doc_qa_task/2_run_eval.sh
deleted file mode 100644
index 6249adf9..00000000
--- a/paper_experiments/doc_qa_task/2_run_eval.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-docs=(1 5 10 20 50 100 200 700)
-models=("gpt-4-0613" "gpt-3.5-turbo-1106" "gpt-4-1106-preview")
-
-## run letta eval
-for model in  "${models[@]}";
-do
-  uv run python icml_experiments/doc_qa_task/llm_judge_doc_qa.py --file results/doc_qa_results_model_${model}.json
-done
-
-# Iterate over each model
-for model in "${models[@]}"; do
-    # Iterate over each doc
-    for doc in "${docs[@]}"; do
-        # Construct and run the command
-        echo "Running for model $model with $doc docs..."
-        uv run python icml_experiments/doc_qa_task/llm_judge_doc_qa.py --file results/doc_qa_baseline_model_${model}_num_docs_${doc}.json --baseline
-    done
-done
diff --git a/paper_experiments/doc_qa_task/doc_qa.py b/paper_experiments/doc_qa_task/doc_qa.py
deleted file mode 100644
index aaf39ccf..00000000
--- a/paper_experiments/doc_qa_task/doc_qa.py
+++ /dev/null
@@ -1,328 +0,0 @@
-"""
-To evaluate Letta's ability to analyze documents, we benchmark Letta against fixed-context
-baselines on the retriever-reader document QA task from Liu et al. (2023a). In this task, a question
-is selected from the NaturalQuestions-Open dataset, and a retriever selects relevant Wikipedia documents for the question.
-A reader model (the LLM) is then fed these documents as input, and is
-asked to use the provided documents to answer the question. Similar to Liu et al. (2023a),
-we evaluate reader accuracy as the number of retrieved documents K increases. In our evaluation setup, both
-the fixed-context baselines and Letta use the same retriever, which selects the top K documents
-according using Faiss efficient similarity search (Johnson et al., 2019) (which corresponds to
-approximate nearest neighbor search) on OpenAI's text-embedding-3-small embeddings. In
-Letta, the entire document set is loaded into archival storage, and the retriever naturally emerges
-via the archival storage search functionality (which performs embedding-based similarity search).
-In the fixed-context baselines, the top-K documents are fetched using the retriever independently
-from the LLM inference, similar to the original retriever-reader setup. We use a dump of Wikipedia
-from late 2018, following past work on NaturalQuestions-Open (Izacard & Grave, 2020; Izacard
-et al., 2021) We randomly sample a subset of 50 questions for each point in the graph.
-"""
-
-import argparse
-import json
-import os
-import uuid
-from typing import List
-
-from icml_experiments.utils import get_experiment_config, load_gzipped_file
-from openai import OpenAI
-from tqdm import tqdm
-
-from letta import utils
-from letta.agent_store.storage import StorageConnector, TableType
-from letta.cli.cli_config import delete
-from letta.config import LettaConfig
-from letta.credentials import LettaCredentials
-from letta.embeddings import embedding_model
-from letta.utils import count_tokens
-
-DATA_SOURCE_NAME = "wikipedia"
-DOC_QA_PERSONA = "You are Letta DOC-QA bot. Your job is to answer questions about documents that are stored in your archival memory. The answer to the users question will ALWAYS be in your archival memory, so remember to keep searching if you can't find the answer. Answer the questions as if though the year is 2018."  # TODO decide on a good persona/human
-DOC_QA_HUMAN = "The user will ask you questions about documents. Answer them to the best of your ability."
-
-BASELINE_PROMPT = (
-    "Answer the question provided according to the list of documents below (some of which might be irrelevant. "
-    + "In your response, provide both the answer and the document text from which you determined the answer. "
-    + "Format your response with the format 'ANSWER: <YOUR ANSWER>, DOCUMENT: <DOCUMENT TEXT>'. "
-    + "If none of the documents provided have the answer to the question, reply with 'INSUFFICIENT INFORMATION'. "
-    + "Do NOT provide an answer if you cannot find it in the provided documents. "
-    + "Your response will only be considered correct if you provide both the answer and relevant document text, or say 'INSUFFICIENT INFORMATION'."
-    + "Answer the question as if though the current year is 2018."
-)
-
-
-MEMGPT_PROMPT = (
-    "Search your archival memory to answer the provided question. "
-    + "Provide both the answer and the archival memory result from which you determined your answer. "
-    + "Format your response with the format 'ANSWER: <YOUR ANSWER>, DOCUMENT: <ARCHIVAL MEMORY TEXT>. "
-    + "Your task is to answer the question: "
-)
-
-
-def generate_docqa_baseline_response(
-    model: str,  # eg 'gpt-4-0613'
-    data_souce_name: str,  # data source containing all relevant documents to put in archival memory
-    question: str,  # the question to ask the agent about the data source
-    num_documents: int,  # how many documents to put in the prompt
-    config: LettaConfig,  # the config to use for the archival memory
-) -> List[dict]:
-    """Format is from the LITM paper:
-
-    Write a high-quality answer for the given question
-    using only the provided search results (some of
-    which might be irrelevant).
-
-    Document [1](Title: Asian Americans in science and
-    technology) ...
-    Document [2](Title: List of Nobel laureates in
-    Physics) ...
-    Document [3](Title: Scientist) ...
-    Document [4](Title: Norwegian Americans) ...
-    Document [5](Title: Maria Goeppert Mayer) ...
-
-    Question: who got the first nobel prize in physics
-    Answer:
-    """
-
-    user_id = uuid.UUID(config.anon_clientid)
-
-    # TODO grab the top N documents using data_source_name
-    archival_memory = StorageConnector.get_storage_connector(TableType.PASSAGES, config, user_id)
-    archival_memory.disable_write = True  # prevent archival memory writes
-    archival_memory.filters = {"data_source": data_souce_name}
-    archival_memory.size()
-    print(f"Attaching archival memory with {archival_memory.size()} passages")
-
-    # grab the top N documents
-    embed_model = embedding_model(config.default_embedding_config)
-    embedding = embed_model.get_text_embedding(question)
-    passages = archival_memory.query(query=question, query_vec=embedding, top_k=num_documents)
-    documents_search_results_sorted_by_relevance = [passage.text for passage in passages]
-
-    # print(f"Top {num_documents} documents: {documents_search_results_sorted_by_relevance}")
-
-    # compute truncation length
-    extra_text = BASELINE_PROMPT + f"Question: {question}" + "Answer:"
-    padding = count_tokens(extra_text) + 1000
-    truncation_length = int((config.default_llm_config.context_window - padding) / num_documents)
-    print("Token size", config.default_llm_config.context_window)
-    print(f"Truncation length: {truncation_length}, with padding: {padding}")
-
-    # create the block of text holding all the documents
-    documents_block_str = ""
-    docs = []
-    for i, doc in enumerate(documents_search_results_sorted_by_relevance):
-        # only include N documents
-        if i >= num_documents:
-            break
-
-        doc_prompt = f"Document [{i + 1}]: {doc} \n"
-
-        # truncate (that's why the performance goes down as x-axis increases)
-        if truncation_length is not None:
-            doc_prompt = doc_prompt[:truncation_length]
-        docs.append(doc_prompt)
-
-        # add to the block of prompt
-        documents_block_str += doc_prompt
-
-    credentials = LettaCredentials().load()
-    assert credentials.openai_key is not None, credentials.openai_key
-
-    client = OpenAI(api_key=credentials.openai_key)
-
-    # TODO: determine trunction length, and truncate documents
-    content = "\n".join(
-        [
-            BASELINE_PROMPT,
-            "\n",
-            documents_block_str,
-            "\n",
-            f"Question: {question}",
-        ]
-    )
-    total_tokens = count_tokens(content)
-    print("Total tokens:", total_tokens, num_documents)
-    print(len(documents_search_results_sorted_by_relevance))
-    chat_completion = client.chat.completions.create(
-        messages=[
-            {"role": "user", "content": content},
-        ],
-        model=model,
-    )
-
-    response = chat_completion.choices[0].message.content
-    return {"response": response, "documents": docs}
-    # return response
-
-
-def generate_docqa_response(
-    config: LettaConfig,
-    letta_client: Letta,
-    persona: str,
-    human: str,
-    data_souce_name: str,  # data source containing all relevant documents to put in archival memory
-    question: str,  # the question to ask the agent about the data source
-) -> List[dict]:
-    """Generate a Letta QA response given an input scenario
-
-    Scenario contains:
-    - state of the human profile
-    - state of the agent profile
-    - data source to load into archival memory (that will have the answer to the question)
-    """
-
-    utils.DEBUG = True
-
-    # delete agent if exists
-    user_id = uuid.UUID(config.anon_clientid)
-    agent_name = f"doc_qa_agent_{config.default_llm_config.model}"
-    try:
-        delete("agent", agent_name)
-    except Exception as e:
-        print(e)
-
-    # Create a new Agent that models the scenario setup
-    agent_state = letta_client.create_agent(
-        {
-            "name": agent_name,
-            "persona": persona,
-            "human": human,
-            "llm_config": config.default_llm_config,
-            "embedding_config": config.default_embedding_config,
-        }
-    )
-
-    ## Attach the archival memory to the agent
-    # attach(agent_state.name, data_source=data_souce_name)
-    # HACK: avoid copying all the data by overriding agent archival storage
-    archival_memory = StorageConnector.get_storage_connector(TableType.PASSAGES, config, user_id)
-    archival_memory.disable_write = True  # prevent archival memory writes
-    archival_memory.filters = {"data_source": data_souce_name}
-    archival_memory.size()
-    print(f"Attaching archival memory with {archival_memory.size()} passages")
-
-    # override the agent's archival memory with table containing wikipedia embeddings
-    letta_client.server._get_or_load_agent(user_id, agent_state.id).persistence_manager.archival_memory.storage = archival_memory
-    print("Loaded agent")
-
-    ## sanity check: before experiment (agent should have source passages)
-    # memory = letta_client.get_agent_memory(agent_state.id)
-    # assert memory["archival_memory"] == archival_memory_size, f"Archival memory size is wrong: {memory['archival_memory']}"
-
-    # Run agent.step() / or client.user_message to generate a response from the Letta agent
-    prompt_message = " ".join(
-        [
-            MEMGPT_PROMPT,
-            f"{question}?",
-        ]
-    )
-    response = letta_client.user_message(agent_id=agent_state.id, message=prompt_message)
-
-    ## sanity check: after experiment (should NOT have inserted anything into archival)
-    # memory = letta_client.get_agent_memory(agent_state.id)
-    # assert memory["archival_memory"] == archival_memory_size, f"Archival memory size is wrong: {memory['archival_memory']}"
-
-    # Return that response (may include multiple messages if the agent does retrieval)
-    return response
-
-
-def evaluate_letta_response(letta_responses: List[dict], gold_answers: List[str]) -> bool:
-    """Score a Letta response (which is a list of Letta messages) against a gold answer
-
-    We evaluate with the following metric: accuracy
-    TODO score with LLM judge?
-
-    NOTE: gold_answers should be length 1, even though it's a list
-    """
-    raise NotImplementedError
-
-
-def run_docqa_task(
-    model="gpt-4", provider="openai", baseline="letta", num_docs=1, n_samples=50
-) -> List[dict]:  # how many samples (questions) from the file
-    """Run the full set of Letta doc QA experiments"""
-
-    # Grab the question data
-    data_file = "icml_experiments/qa_data/30_total_documents/nq-open-30_total_documents_gold_at_0.jsonl.gz"
-    all_question_data = load_gzipped_file(data_file)
-
-    config = get_experiment_config(os.environ.get("PGVECTOR_TEST_DB_URL"), endpoint_type=provider, model=model)
-    config.save()  # save config to file
-
-    # result filename
-    if baseline == "letta":
-        filename = f"results/doc_qa_results_model_{model}.json"
-    else:
-        filename = f"results/doc_qa_baseline_model_{model}_num_docs_{num_docs}.json"
-    print("Results file:", filename)
-
-    if os.path.exists(filename):
-        with open(filename, "r") as f:
-            all_response_data = json.load(f)
-    else:
-        all_response_data = []
-
-    # letta_client = Letta(config=config)
-    letta_client = Letta()
-    # letta_client = Letta(quickstart="openai")
-
-    # Loop through and run the doc QA
-    count = 0
-    cutoff = 50
-    for data in tqdm(list(all_question_data)[len(all_response_data) : cutoff]):
-        if count > n_samples:
-            break
-
-        # Each line in the jsonl.gz has:
-        # - a question (str)
-        # - a set of answers (List[str]), often len 1
-        # - a set of context documents one of which contains the answer (List[dict])
-        # - a gold annotation that has a title of the context doc, a long answer, and a list of short answers
-        question = data["question"]
-        data["ctxs"]
-        answers = data["answers"]
-
-        # The only thing we actually use here is the 'question'
-        # We ignore the documents, and instead rely on a set of documents that is already in a data source
-        # TODO make sure this is correct
-        if baseline == "letta":
-            responses = generate_docqa_response(
-                config=config,
-                letta_client=letta_client,
-                persona=DOC_QA_PERSONA,
-                human=DOC_QA_HUMAN,
-                data_souce_name=DATA_SOURCE_NAME,
-                question=question,
-            )
-            prompt = None
-        else:
-            responses = generate_docqa_baseline_response(
-                model=model, data_souce_name=DATA_SOURCE_NAME, question=question, num_documents=num_docs, config=config
-            )
-            prompt = BASELINE_PROMPT
-        # print(responses)
-
-        all_response_data.append(
-            {
-                "question": question,
-                "true_answers": answers,
-                "letta_responses": responses,
-                "prompt": prompt,
-                # "correct": evaluate_letta_response(responses, answers),
-            }
-        )
-        # write to JSON file
-        with open(filename, "w") as f:
-            json.dump(all_response_data, f, indent=4)
-
-    return all_response_data
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Test script")
-    parser.add_argument("--model", type=str, help="The model to use")
-    parser.add_argument("--provider", default="openai", type=str, help="The provider to use")
-    parser.add_argument("--baseline", default="letta", type=str, help="The baseline to use")
-    parser.add_argument("--num_docs", default=5, type=int, help="The number of documents to use in the prompt (baseline-only)")
-    args = parser.parse_args()
-
-    results = run_docqa_task(args.model, args.provider, args.baseline, args.num_docs)
diff --git a/paper_experiments/doc_qa_task/llm_judge_doc_qa.py b/paper_experiments/doc_qa_task/llm_judge_doc_qa.py
deleted file mode 100644
index 5041d656..00000000
--- a/paper_experiments/doc_qa_task/llm_judge_doc_qa.py
+++ /dev/null
@@ -1,159 +0,0 @@
-import argparse
-import json
-import re
-
-from openai import OpenAI
-from tqdm import tqdm
-
-from letta.credentials import LettaCredentials
-
-# Note: did not end up using since no cases of cheating were observed
-# CHEATING_PROMPT = \
-#    """
-#    You are evaluating an LLM trying to do task.
-#    The main point of failure is the LLM may cheat and hallucinate the document it is meant to provide as evidence.
-#    For example, even though the LLM has been instructed to provide the document that it's using as evidence to answer the question, in this case the LLM hallucinates a document:
-#
-#    The format of the LLM response with be:
-#    ANSWER: <answer>, DOCUMENT: <document_text>
-#
-#    We also provide the list of documents that the LLM should have used to answer the question.
-#    This is provided with the format:
-#    ALL DOCUMENTS: <list of documents>
-#
-#    Your task is to ensure the LLM did not cheat and hallucinate the document.
-#    You can ensure this by checking if the document provided by the LLM is in the list of documents provided.
-#
-#    Write a short reason for your answer before stating the final answer (format <your reason> - CHEAT/CORRECT).
-#    """
-
-EVAL_PROMPT = """
-    Your task is to evaluate whether an LLM correct answered a question.
-    The LLM response should be the format 'ANSWER: <answer>, DOCUMENT: <document_text>' or say 'INSUFFICIENT INFORMATION'.
-    The true answer is provided in the format 'TRUE ANSWER: <list of possible answers>'.
-    The questions is provided in the format 'QUESTION: <question>'.
-    If the LLM response contains both the correct answer and corresponding document text, the response is correct.
-    Even if the LLM's answer and the true answer are slightly different in wording, the response is still correct.
-    For example, if the answer is more specific than the true answer or uses a different phrasing that is still correct, the response is correct.
-    If the LLM response if 'INSUFFICIENT INFORMATION', or the 'DOCUMENT' field is missing, the response is incorrect.
-    Respond with a single token: 'CORRECT' or 'INCORRECT'.
-    """
-
-EVAL_MODEL = "gpt-4-0613"
-
-
-def evaluate_response(output: str):
-    credentials = LettaCredentials().load()
-    assert credentials.openai_key is not None, credentials.openai_key
-
-    client = OpenAI(api_key=credentials.openai_key)
-
-    chat_completion = client.chat.completions.create(
-        messages=[
-            {
-                "role": "user",
-                "content": "\n".join([EVAL_PROMPT, "\n", output, "\n"]),
-            },
-        ],
-        model=EVAL_MODEL,
-    )
-
-    response = chat_completion.choices[0].message.content
-    print("llm judge", response)
-    if "INCORRECT" in response:
-        return False
-    elif "CORRECT" in response:
-        return True
-    else:
-        print("INVALID RESPONSE", response)
-        return False
-
-
-# Grab the last thing Letta generated, treat it as the reply
-def extract_final_letta_response(letta_responses: list) -> str:
-    final_index = -1
-    if "function_return" in letta_responses[final_index]:
-        final_index = -2
-    final_letta_response = [v for k, v in letta_responses[final_index].items()]
-    final_letta_response = final_letta_response[-1]
-    return final_letta_response
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Test script")
-    parser.add_argument("--file", type=str, help="File data to evaluate")
-    parser.add_argument("--baseline", action="store_true", help="Whether to use the baseline model")
-    args = parser.parse_args()
-
-    # load data
-    with open(args.file, "r") as f:
-        data = json.load(f)
-
-    # counters
-    correct = 0
-    total = 0
-
-    # Make an intial pass to determine how many documents had the correct answer
-    results = []  # store all results
-    eval_results = []  # store results that need LLM judge
-    if args.baseline:
-        # baseline experiment
-        match = re.search(r"model_([^_]+)_num_docs_([^\.]+)\.json", args.file)
-        model = match.group(1)
-        num_docs = int(match.group(2))
-        baseline = "baseline"
-    else:
-        # model = re.search(r"model_([^\.]+)\.json", args.file).group(1)
-        model = re.search(r"model_([-\w.]+)(?:_num_docs_([-\d]+))?.json", args.file).group(1)
-
-        num_docs = None
-        baseline = "letta"
-
-    # evaluate data
-    for d in tqdm(data):
-        answer = d["true_answers"]
-        question = d["question"]
-        response = d["letta_responses"]
-        if not args.baseline:
-            # need to parse response for letta
-            response = extract_final_letta_response(response)
-        else:
-            response = response["response"]
-
-        found = False
-        for a in answer:
-            if a in response:
-                found = True
-
-        if not found and "INSUFFICIENT INFORMATION" not in response:
-            # inconclusive: pass to llm judge
-            print(question)
-            print(answer)
-            print(response)
-            print(args.baseline)
-            doc = "QUESTION: " + question + "\n" + "TRUE ANSWER: " + str(answer) + "\n" + response
-            judge = "llm"
-            judge_result = evaluate_response(doc)
-            print("JUDGEMENT", judge_result)
-            if judge_result:
-                correct += 1
-                found = True
-        elif found:
-            # answer found in text
-            correct += 1
-            judge = "text"
-        else:
-            judge = "text"
-
-        results.append({"question": question, "true_answers": answer, "response": response, "correct": found, "judge": judge})
-
-        total += 1
-
-    # Dump aggregated results
-    with open(f"results_{model}_{num_docs}_{baseline}.json", "w") as f:
-        json.dump(
-            {"accuracy": correct / total, "total": total, "results": results},
-            f,
-            indent=4,
-        )
-    print(correct / total)
diff --git a/paper_experiments/doc_qa_task/load_wikipedia_embeddings.py b/paper_experiments/doc_qa_task/load_wikipedia_embeddings.py
deleted file mode 100644
index 94b98143..00000000
--- a/paper_experiments/doc_qa_task/load_wikipedia_embeddings.py
+++ /dev/null
@@ -1,158 +0,0 @@
-import copy
-import hashlib
-import json
-import os
-import time
-import uuid
-from concurrent.futures import ThreadPoolExecutor, as_completed
-
-from absl import app, flags
-from icml_experiments.utils import get_experiment_config
-from tqdm import tqdm
-
-from letta.agent_store.storage import StorageConnector, TableType
-from letta.cli.cli_config import delete
-from letta.data_types import Passage
-
-# Create an empty list to store the JSON objects
-source_name = "wikipedia"
-config = get_experiment_config(os.environ.get("PGVECTOR_TEST_DB_URL"), endpoint_type="openai")
-config.save()  # save config to file
-user_id = uuid.UUID(config.anon_clientid)
-
-FLAGS = flags.FLAGS
-flags.DEFINE_boolean("drop_db", default=False, required=False, help="Drop existing source DB")
-flags.DEFINE_string("file", default=None, required=True, help="File to parse")
-
-
-def create_uuid_from_string(val: str):
-    """
-    Generate consistent UUID from a string
-    from: https://samos-it.com/posts/python-create-uuid-from-random-string-of-words.html
-    """
-    hex_string = hashlib.md5(val.encode("UTF-8")).hexdigest()
-    return uuid.UUID(hex=hex_string)
-
-
-def insert_lines(lines, conn, show_progress=False):
-    """Parse and insert list of lines into source database"""
-    passages = []
-    iterator = tqdm(lines) if show_progress else lines
-    added = set()
-    for line in iterator:
-        d = json.loads(line)
-        # pprint(d)
-        assert len(d) == 2, f"Line is empty: {len(d)}"
-        text = d[0]["input"]
-        model = d[0]["model"]
-        embedding = d[1]["data"][0]["embedding"]
-        embedding_dim = len(embedding)
-        assert embedding_dim == 1536, f"Wrong embedding dim: {len(embedding_dim)}"
-        assert len(d[1]["data"]) == 1, f"More than one embedding: {len(d[1]['data'])}"
-        d[1]["usage"]
-        # print(text)
-
-        passage_id = create_uuid_from_string(text)  # consistent hash for text (prevent duplicates)
-        if passage_id in added:
-            continue
-        else:
-            added.add(passage_id)
-        # if conn.get(passage_id):
-        #    continue
-
-        passage = Passage(
-            id=passage_id,
-            user_id=user_id,
-            text=text,
-            embedding_model=model,
-            embedding_dim=embedding_dim,
-            embedding=embedding,
-            # metadata=None,
-            data_source=source_name,
-        )
-        # print(passage.id)
-        passages.append(passage)
-    st = time.time()
-    # insert_passages_into_source(passages, source_name=source_name, user_id=user_id, config=config)
-    # conn.insert_many(passages)
-    conn.upsert_many(passages)
-    return time.time() - st
-
-
-def main(argv):
-    # clear out existing source
-    if FLAGS.drop_db:
-        delete("source", source_name)
-        try:
-            passages_table = StorageConnector.get_storage_connector(TableType.PASSAGES, config, user_id)
-            passages_table.delete_table()
-
-        except Exception as e:
-            print("Failed to delete source")
-            print(e)
-
-    # Open the file and read line by line
-    count = 0
-    # files = [
-    #    #'data/wikipedia_passages_shard_1-00.jsonl',
-    #    #'data/wikipedia_passages_shard_1-01.jsonl',
-    #    'data/wikipedia_passages_shard_1-02.jsonl',
-    #    #'data/wikipedia_passages_shard_1-03.jsonl',
-    #    #'data/wikipedia_passages_shard_1-04.jsonl',
-    #    #'data/wikipedia_passages_shard_1-05.jsonl',
-    #    #'data/wikipedia_passages_shard_1-06.jsonl',
-    #    #'data/wikipedia_passages_shard_1-07.jsonl',
-    #    #'data/wikipedia_passages_shard_1-08.jsonl',
-    #    #'data/wikipedia_passages_shard_1-09.jsonl',
-    # ]
-    files = [FLAGS.file]
-    chunk_size = 1000
-    conn = StorageConnector.get_storage_connector(TableType.PASSAGES, config, user_id)
-    for file_path in files:
-        print(file_path)
-        futures = []
-        with ThreadPoolExecutor(max_workers=64) as p:
-            with open(file_path, "r") as file:
-                lines = []
-
-                # insert lines in 1k chunks
-                for line in tqdm(file):
-                    lines.append(line)
-                    if len(lines) >= chunk_size:
-                        if count == 0:
-                            # future = p.submit(insert_lines, copy.deepcopy(lines), conn, True)
-                            print("Await first result (hack to avoid concurrency issues)")
-                            t = insert_lines(lines, conn, True)
-                            # t = future.result()
-                            print("Finished first result", t)
-                        else:
-                            future = p.submit(insert_lines, copy.deepcopy(lines), conn)
-                            futures.append(future)
-                        count += len(lines)
-                        lines = []
-
-                # insert remaining lines
-                if len(lines) > 0:
-                    future = p.submit(insert_lines, copy.deepcopy(lines), conn)
-                    futures.append(future)
-                    count += len(lines)
-                    lines = []
-
-                    ## breaking point
-                    # if count >= 3000:
-                    #    break
-
-            print(f"Waiting for {len(futures)} futures")
-            # wait for futures
-            for future in tqdm(as_completed(futures)):
-                future.result()
-
-        # check metadata
-        # storage = StorageConnector.get_storage_connector(TableType.PASSAGES, config, user_id)
-        # size = storage.size()
-        size = conn.size()
-        print("Number of passages", size)
-
-
-if __name__ == "__main__":
-    app.run(main)
diff --git a/paper_experiments/nested_kv_task/data/kv-retrieval-140_keys.jsonl.gz b/paper_experiments/nested_kv_task/data/kv-retrieval-140_keys.jsonl.gz
deleted file mode 100644
index 45d0bcd5..00000000
Binary files a/paper_experiments/nested_kv_task/data/kv-retrieval-140_keys.jsonl.gz and /dev/null differ
diff --git a/paper_experiments/nested_kv_task/data/random_orderings_100_samples_140_indices_1_levels.jsonl b/paper_experiments/nested_kv_task/data/random_orderings_100_samples_140_indices_1_levels.jsonl
deleted file mode 100644
index 9a16e628..00000000
--- a/paper_experiments/nested_kv_task/data/random_orderings_100_samples_140_indices_1_levels.jsonl
+++ /dev/null
@@ -1,100 +0,0 @@
-[136]
-[113]
-[75]
-[93]
-[62]
-[96]
-[42]
-[21]
-[19]
-[109]
-[22]
-[13]
-[48]
-[113]
-[63]
-[56]
-[107]
-[74]
-[90]
-[41]
-[110]
-[127]
-[74]
-[35]
-[25]
-[19]
-[95]
-[81]
-[67]
-[25]
-[32]
-[59]
-[44]
-[8]
-[11]
-[72]
-[79]
-[51]
-[1]
-[28]
-[129]
-[10]
-[13]
-[80]
-[108]
-[36]
-[127]
-[96]
-[94]
-[28]
-[61]
-[101]
-[102]
-[13]
-[18]
-[32]
-[49]
-[129]
-[58]
-[54]
-[81]
-[35]
-[19]
-[134]
-[32]
-[87]
-[130]
-[88]
-[121]
-[52]
-[124]
-[28]
-[122]
-[137]
-[75]
-[28]
-[44]
-[130]
-[122]
-[8]
-[51]
-[37]
-[115]
-[115]
-[96]
-[115]
-[49]
-[39]
-[134]
-[5]
-[94]
-[8]
-[33]
-[17]
-[138]
-[138]
-[118]
-[51]
-[117]
-[114]
diff --git a/paper_experiments/nested_kv_task/data/random_orderings_100_samples_140_indices_2_levels.jsonl b/paper_experiments/nested_kv_task/data/random_orderings_100_samples_140_indices_2_levels.jsonl
deleted file mode 100644
index 8b27928f..00000000
--- a/paper_experiments/nested_kv_task/data/random_orderings_100_samples_140_indices_2_levels.jsonl
+++ /dev/null
@@ -1,100 +0,0 @@
-[57, 27]
-[109, 87]
-[109, 104]
-[133, 38]
-[97, 101]
-[93, 125]
-[96, 18]
-[135, 108]
-[57, 82]
-[124, 39]
-[82, 42]
-[94, 29]
-[27, 132]
-[126, 46]
-[116, 52]
-[50, 116]
-[19, 74]
-[25, 30]
-[37, 79]
-[113, 106]
-[48, 138]
-[99, 59]
-[112, 51]
-[57, 23]
-[63, 92]
-[84, 125]
-[137, 15]
-[28, 42]
-[24, 136]
-[35, 56]
-[138, 1]
-[30, 92]
-[114, 48]
-[83, 106]
-[37, 77]
-[139, 137]
-[122, 112]
-[22, 33]
-[114, 12]
-[4, 74]
-[70, 30]
-[112, 40]
-[104, 88]
-[120, 61]
-[3, 25]
-[15, 92]
-[129, 104]
-[105, 97]
-[33, 87]
-[31, 16]
-[12, 139]
-[18, 112]
-[2, 137]
-[56, 42]
-[125, 123]
-[59, 122]
-[82, 125]
-[45, 118]
-[88, 65]
-[36, 123]
-[52, 8]
-[106, 82]
-[72, 12]
-[121, 82]
-[92, 107]
-[5, 61]
-[11, 23]
-[25, 109]
-[32, 30]
-[126, 61]
-[125, 6]
-[46, 16]
-[33, 116]
-[42, 22]
-[33, 97]
-[14, 126]
-[90, 46]
-[22, 72]
-[63, 106]
-[115, 109]
-[131, 106]
-[17, 69]
-[104, 37]
-[115, 49]
-[41, 111]
-[115, 10]
-[97, 137]
-[123, 138]
-[115, 28]
-[2, 123]
-[94, 39]
-[69, 64]
-[72, 55]
-[104, 61]
-[110, 132]
-[85, 123]
-[73, 99]
-[134, 64]
-[79, 8]
-[75, 15]
diff --git a/paper_experiments/nested_kv_task/data/random_orderings_100_samples_140_indices_3_levels.jsonl b/paper_experiments/nested_kv_task/data/random_orderings_100_samples_140_indices_3_levels.jsonl
deleted file mode 100644
index 75aa3d50..00000000
--- a/paper_experiments/nested_kv_task/data/random_orderings_100_samples_140_indices_3_levels.jsonl
+++ /dev/null
@@ -1,100 +0,0 @@
-[16, 111, 116]
-[29, 41, 36]
-[79, 97, 6]
-[70, 34, 129]
-[57, 139, 51]
-[55, 23, 46]
-[1, 110, 64]
-[85, 128, 101]
-[92, 80, 122]
-[132, 8, 6]
-[78, 40, 74]
-[96, 112, 68]
-[78, 81, 65]
-[86, 52, 31]
-[28, 75, 73]
-[23, 130, 117]
-[46, 27, 61]
-[46, 87, 68]
-[109, 80, 9]
-[50, 94, 26]
-[25, 31, 87]
-[137, 19, 9]
-[63, 90, 57]
-[60, 86, 21]
-[112, 110, 70]
-[55, 2, 57]
-[3, 12, 79]
-[120, 127, 37]
-[112, 46, 106]
-[18, 87, 111]
-[19, 85, 0]
-[21, 50, 104]
-[78, 99, 56]
-[92, 94, 13]
-[77, 41, 124]
-[15, 92, 10]
-[63, 24, 111]
-[76, 49, 66]
-[10, 88, 61]
-[47, 10, 60]
-[87, 99, 22]
-[66, 26, 135]
-[80, 66, 30]
-[6, 14, 13]
-[42, 4, 14]
-[78, 110, 109]
-[44, 14, 136]
-[63, 106, 114]
-[22, 24, 66]
-[99, 55, 76]
-[87, 86, 115]
-[72, 1, 16]
-[17, 41, 39]
-[96, 104, 15]
-[82, 18, 63]
-[97, 64, 38]
-[120, 110, 89]
-[95, 126, 115]
-[52, 128, 93]
-[73, 47, 89]
-[74, 80, 117]
-[77, 44, 93]
-[62, 21, 35]
-[34, 114, 123]
-[54, 66, 41]
-[44, 125, 74]
-[71, 130, 106]
-[87, 49, 80]
-[69, 124, 120]
-[4, 50, 60]
-[60, 64, 120]
-[103, 23, 85]
-[135, 106, 68]
-[101, 23, 18]
-[24, 45, 98]
-[49, 4, 93]
-[68, 10, 103]
-[42, 133, 3]
-[118, 132, 128]
-[43, 132, 4]
-[126, 69, 47]
-[36, 49, 74]
-[40, 122, 117]
-[125, 123, 46]
-[102, 6, 127]
-[46, 126, 96]
-[18, 23, 76]
-[89, 26, 111]
-[56, 129, 33]
-[103, 75, 135]
-[8, 47, 111]
-[12, 14, 95]
-[63, 89, 131]
-[128, 113, 105]
-[39, 82, 95]
-[41, 9, 55]
-[4, 107, 66]
-[6, 27, 114]
-[43, 73, 107]
-[121, 119, 104]
diff --git a/paper_experiments/nested_kv_task/data/random_orderings_100_samples_140_indices_4_levels.jsonl b/paper_experiments/nested_kv_task/data/random_orderings_100_samples_140_indices_4_levels.jsonl
deleted file mode 100644
index 650eafd7..00000000
--- a/paper_experiments/nested_kv_task/data/random_orderings_100_samples_140_indices_4_levels.jsonl
+++ /dev/null
@@ -1,100 +0,0 @@
-[61, 64, 40, 53]
-[56, 122, 44, 23]
-[100, 81, 93, 110]
-[103, 133, 63, 79]
-[79, 53, 35, 46]
-[111, 8, 59, 54]
-[103, 54, 135, 11]
-[31, 68, 130, 57]
-[55, 78, 43, 15]
-[63, 132, 118, 133]
-[67, 27, 125, 85]
-[9, 98, 82, 34]
-[52, 72, 135, 3]
-[122, 34, 12, 89]
-[101, 108, 52, 22]
-[3, 7, 105, 64]
-[89, 6, 52, 25]
-[83, 78, 103, 28]
-[22, 39, 33, 38]
-[124, 65, 7, 35]
-[50, 49, 94, 115]
-[80, 76, 68, 71]
-[138, 123, 87, 32]
-[0, 66, 45, 59]
-[80, 100, 0, 132]
-[21, 109, 76, 43]
-[57, 35, 14, 79]
-[13, 31, 104, 72]
-[113, 128, 98, 29]
-[130, 66, 132, 97]
-[111, 59, 6, 103]
-[46, 74, 82, 132]
-[101, 48, 0, 15]
-[1, 60, 132, 121]
-[85, 86, 23, 90]
-[15, 122, 128, 28]
-[40, 128, 49, 69]
-[105, 12, 135, 131]
-[0, 19, 133, 61]
-[69, 73, 35, 57]
-[22, 79, 8, 42]
-[102, 66, 81, 9]
-[60, 72, 90, 24]
-[59, 61, 21, 33]
-[18, 78, 134, 136]
-[75, 26, 128, 85]
-[108, 48, 55, 19]
-[39, 25, 96, 113]
-[62, 122, 100, 85]
-[63, 44, 14, 3]
-[63, 112, 13, 43]
-[99, 101, 20, 7]
-[13, 65, 58, 102]
-[79, 15, 110, 62]
-[72, 105, 121, 41]
-[12, 1, 6, 111]
-[114, 5, 93, 56]
-[56, 114, 96, 139]
-[0, 30, 65, 119]
-[83, 9, 2, 50]
-[95, 120, 31, 82]
-[20, 100, 8, 48]
-[106, 135, 86, 115]
-[109, 80, 100, 18]
-[58, 36, 54, 12]
-[92, 25, 125, 63]
-[45, 88, 40, 72]
-[46, 44, 19, 26]
-[92, 76, 39, 29]
-[136, 94, 61, 78]
-[106, 114, 2, 53]
-[80, 37, 90, 6]
-[93, 60, 12, 3]
-[41, 116, 24, 35]
-[29, 72, 47, 32]
-[55, 54, 136, 78]
-[75, 91, 106, 56]
-[35, 116, 43, 72]
-[116, 42, 96, 43]
-[108, 134, 105, 115]
-[136, 103, 84, 4]
-[82, 60, 43, 67]
-[67, 7, 27, 8]
-[110, 25, 91, 27]
-[134, 119, 130, 71]
-[114, 38, 59, 119]
-[86, 102, 60, 131]
-[81, 139, 36, 50]
-[0, 66, 127, 99]
-[96, 22, 52, 9]
-[105, 20, 38, 87]
-[58, 98, 83, 33]
-[95, 27, 5, 78]
-[2, 54, 65, 79]
-[64, 94, 31, 15]
-[112, 56, 87, 10]
-[53, 4, 30, 13]
-[32, 8, 97, 81]
-[41, 39, 69, 48]
-[119, 80, 97, 5]
diff --git a/paper_experiments/nested_kv_task/data/random_orderings_100_samples_140_indices_5_levels.jsonl b/paper_experiments/nested_kv_task/data/random_orderings_100_samples_140_indices_5_levels.jsonl
deleted file mode 100644
index cddb34e9..00000000
--- a/paper_experiments/nested_kv_task/data/random_orderings_100_samples_140_indices_5_levels.jsonl
+++ /dev/null
@@ -1,100 +0,0 @@
-[122, 34, 25, 19, 121]
-[125, 29, 26, 119, 0]
-[87, 116, 108, 8, 56]
-[6, 130, 127, 101, 107]
-[57, 135, 138, 115, 133]
-[37, 24, 93, 34, 127]
-[112, 39, 38, 139, 50]
-[97, 34, 124, 72, 0]
-[15, 99, 23, 115, 123]
-[56, 63, 66, 125, 111]
-[55, 135, 5, 86, 21]
-[51, 115, 94, 101, 125]
-[138, 51, 87, 46, 34]
-[17, 61, 116, 128, 94]
-[49, 132, 128, 82, 3]
-[65, 1, 70, 42, 64]
-[64, 47, 133, 119, 6]
-[101, 100, 116, 20, 3]
-[82, 77, 37, 132, 124]
-[85, 128, 108, 82, 20]
-[26, 13, 41, 84, 14]
-[82, 48, 120, 11, 34]
-[99, 56, 35, 42, 14]
-[53, 37, 94, 38, 51]
-[61, 82, 98, 10, 8]
-[91, 8, 38, 93, 28]
-[69, 21, 29, 81, 114]
-[58, 39, 57, 21, 5]
-[61, 16, 136, 75, 51]
-[85, 131, 135, 74, 133]
-[94, 54, 25, 37, 124]
-[8, 41, 110, 95, 134]
-[3, 67, 101, 111, 18]
-[76, 122, 77, 127, 34]
-[123, 119, 43, 64, 97]
-[31, 35, 8, 103, 39]
-[131, 19, 80, 52, 74]
-[53, 62, 44, 31, 0]
-[20, 1, 101, 95, 53]
-[18, 93, 69, 139, 71]
-[18, 46, 108, 110, 39]
-[11, 67, 78, 33, 35]
-[26, 46, 110, 106, 117]
-[6, 20, 62, 96, 108]
-[14, 116, 46, 101, 15]
-[61, 44, 18, 124, 47]
-[59, 41, 57, 37, 23]
-[24, 39, 38, 8, 0]
-[16, 132, 121, 8, 109]
-[17, 107, 61, 44, 10]
-[103, 88, 133, 60, 116]
-[3, 22, 8, 21, 34]
-[86, 47, 27, 23, 93]
-[6, 2, 30, 9, 97]
-[58, 24, 21, 30, 57]
-[108, 18, 114, 71, 4]
-[88, 120, 51, 116, 84]
-[139, 126, 16, 5, 29]
-[3, 120, 139, 46, 125]
-[4, 39, 121, 125, 97]
-[8, 16, 108, 41, 31]
-[107, 49, 12, 0, 112]
-[95, 23, 139, 34, 118]
-[10, 117, 95, 14, 71]
-[54, 74, 60, 47, 53]
-[34, 108, 130, 35, 76]
-[17, 103, 21, 138, 48]
-[45, 118, 78, 79, 67]
-[88, 95, 71, 120, 101]
-[85, 35, 96, 20, 2]
-[48, 64, 131, 71, 21]
-[97, 36, 31, 138, 120]
-[18, 96, 31, 14, 25]
-[95, 32, 105, 2, 26]
-[97, 90, 98, 66, 88]
-[72, 93, 50, 114, 108]
-[131, 118, 60, 6, 106]
-[48, 97, 49, 6, 119]
-[97, 59, 47, 57, 21]
-[24, 6, 64, 122, 71]
-[4, 40, 120, 122, 15]
-[16, 53, 35, 50, 43]
-[2, 103, 69, 71, 92]
-[111, 123, 21, 73, 48]
-[79, 112, 121, 128, 67]
-[101, 125, 63, 73, 82]
-[35, 99, 51, 101, 74]
-[104, 100, 93, 32, 105]
-[115, 58, 77, 91, 81]
-[57, 47, 129, 76, 5]
-[30, 29, 120, 47, 136]
-[84, 21, 117, 112, 26]
-[68, 65, 27, 97, 75]
-[31, 84, 52, 113, 65]
-[76, 21, 108, 31, 74]
-[61, 115, 34, 102, 122]
-[119, 127, 43, 118, 76]
-[25, 1, 112, 8, 106]
-[40, 47, 26, 57, 82]
-[133, 35, 109, 60, 27]
diff --git a/paper_experiments/nested_kv_task/data/random_orderings_100_samples_140_indices_6_levels.jsonl b/paper_experiments/nested_kv_task/data/random_orderings_100_samples_140_indices_6_levels.jsonl
deleted file mode 100644
index 21543763..00000000
--- a/paper_experiments/nested_kv_task/data/random_orderings_100_samples_140_indices_6_levels.jsonl
+++ /dev/null
@@ -1,100 +0,0 @@
-[72, 9, 64, 138, 98, 112]
-[88, 86, 33, 132, 84, 101]
-[29, 50, 80, 118, 34, 30]
-[34, 44, 2, 130, 113, 18]
-[68, 46, 64, 48, 57, 135]
-[59, 21, 103, 40, 104, 47]
-[51, 16, 79, 38, 72, 129]
-[19, 109, 48, 58, 97, 2]
-[19, 48, 40, 59, 32, 54]
-[54, 138, 133, 105, 121, 17]
-[75, 78, 111, 103, 3, 84]
-[77, 18, 41, 20, 117, 49]
-[98, 70, 22, 26, 71, 1]
-[137, 97, 65, 110, 22, 47]
-[58, 138, 87, 131, 13, 115]
-[41, 33, 99, 2, 48, 26]
-[17, 82, 101, 132, 84, 125]
-[62, 87, 123, 89, 37, 19]
-[37, 115, 29, 105, 114, 31]
-[94, 77, 108, 65, 124, 95]
-[30, 95, 79, 83, 127, 117]
-[10, 42, 63, 51, 132, 16]
-[115, 123, 82, 81, 1, 44]
-[46, 137, 29, 100, 7, 23]
-[43, 28, 100, 18, 118, 48]
-[134, 103, 114, 79, 66, 5]
-[18, 97, 6, 26, 134, 118]
-[104, 111, 73, 22, 13, 55]
-[107, 44, 95, 70, 67, 91]
-[116, 12, 68, 25, 102, 16]
-[50, 49, 132, 89, 47, 138]
-[34, 132, 14, 99, 31, 4]
-[114, 95, 51, 16, 118, 44]
-[83, 0, 133, 137, 49, 44]
-[2, 13, 58, 130, 65, 57]
-[25, 99, 9, 130, 126, 1]
-[45, 2, 92, 61, 57, 97]
-[103, 33, 70, 110, 28, 53]
-[40, 113, 23, 86, 47, 71]
-[129, 2, 7, 99, 56, 47]
-[112, 111, 48, 118, 137, 75]
-[116, 135, 111, 17, 30, 72]
-[131, 102, 71, 40, 57, 1]
-[133, 49, 3, 63, 138, 37]
-[126, 40, 101, 14, 9, 75]
-[118, 92, 34, 23, 37, 35]
-[72, 28, 29, 89, 35, 53]
-[107, 98, 87, 63, 130, 40]
-[10, 27, 39, 53, 79, 119]
-[74, 17, 120, 113, 15, 6]
-[3, 136, 18, 93, 72, 10]
-[7, 43, 135, 56, 62, 94]
-[74, 44, 28, 35, 85, 24]
-[103, 106, 129, 7, 120, 121]
-[32, 91, 137, 50, 80, 12]
-[66, 42, 73, 52, 48, 84]
-[107, 4, 132, 121, 48, 87]
-[104, 122, 81, 136, 111, 45]
-[12, 94, 22, 76, 81, 133]
-[124, 104, 75, 55, 135, 66]
-[7, 80, 117, 46, 9, 40]
-[6, 45, 118, 35, 66, 136]
-[86, 12, 5, 47, 122, 119]
-[9, 91, 115, 97, 116, 50]
-[14, 120, 76, 17, 116, 74]
-[14, 133, 49, 137, 9, 73]
-[67, 122, 20, 86, 16, 66]
-[1, 50, 77, 110, 128, 26]
-[5, 117, 110, 58, 94, 47]
-[100, 137, 35, 17, 111, 123]
-[58, 116, 70, 48, 132, 20]
-[14, 127, 93, 37, 126, 24]
-[69, 74, 120, 91, 11, 67]
-[124, 71, 27, 104, 99, 120]
-[17, 8, 123, 54, 91, 105]
-[103, 130, 71, 114, 10, 13]
-[45, 102, 63, 54, 126, 89]
-[22, 93, 39, 107, 50, 37]
-[135, 49, 89, 133, 90, 21]
-[80, 29, 135, 46, 121, 55]
-[75, 137, 58, 24, 32, 85]
-[54, 35, 91, 95, 2, 106]
-[111, 11, 57, 89, 21, 100]
-[81, 129, 117, 87, 102, 137]
-[54, 26, 114, 92, 128, 3]
-[132, 69, 20, 63, 113, 0]
-[97, 127, 93, 69, 56, 57]
-[127, 54, 99, 80, 1, 41]
-[125, 133, 43, 128, 76, 25]
-[41, 30, 45, 35, 42, 3]
-[59, 30, 103, 69, 105, 80]
-[97, 33, 40, 23, 10, 14]
-[77, 103, 0, 131, 14, 98]
-[133, 66, 61, 91, 131, 96]
-[16, 54, 4, 113, 93, 90]
-[81, 113, 74, 45, 39, 95]
-[102, 42, 101, 113, 10, 75]
-[61, 67, 136, 8, 29, 51]
-[45, 6, 80, 7, 76, 38]
-[4, 19, 51, 56, 60, 15]
diff --git a/paper_experiments/nested_kv_task/nested_kv.py b/paper_experiments/nested_kv_task/nested_kv.py
deleted file mode 100644
index 2f259227..00000000
--- a/paper_experiments/nested_kv_task/nested_kv.py
+++ /dev/null
@@ -1,337 +0,0 @@
-"""
-We introduce a new task based on the synthetic Key-Value
-retrieval proposed in prior work (Liu et al., 2023a). The
-goal of this task is to demonstrate how Letta can col-
-late information from multiple data sources. In the original
-KV task, the authors generated a synthetic dataset of key-
-value pairs, where each key and value is a 128-bit UUID
-(universally unique identifier). The agent is then given a
-key, and asked to return the associated value for the key.
-We create a version of the KV task, nested KV retrieval,
-where values themselves may be keys, thus requiring the
-agent to perform a multi-hop lookup. In our setup, we fix
-the total number of UUIDs pairs to 140, corresponding to
-roughly 8k tokens (the context length of our GPT-4 base-
-line). We vary the total number of nesting levels from 0
-(the initial key-value pair’s value is not a key) to 4 (ie 4
-total KV lookups are required to find the final value), and
-sample 30 different ordering configurations including both
-the initial key position and nesting key positions.
-"""
-
-import argparse
-import json
-import math
-import os
-import uuid
-from collections import OrderedDict
-from typing import Optional
-
-import openai
-from icml_experiments.utils import get_experiment_config, load_gzipped_file
-from tqdm import tqdm
-
-import letta.helpers.json_helpers
-from letta import utils
-from letta.cli.cli_config import delete
-from letta.config import LettaConfig
-
-# TODO: update personas
-NESTED_PERSONA = "You are Letta DOC-QA bot. Your job is to answer questions about documents that are stored in your archival memory. The answer to the users question will ALWAYS be in your archival memory, so remember to keep searching if you can't find the answer. DO NOT STOP SEARCHING UNTIL YOU VERIFY THAT THE VALUE IS NOT A KEY. Do not stop making nested lookups until this condition is met."  # TODO decide on a good persona/human
-NESTED_HUMAN = "The user will ask you questions about documents. Answer them to the best of your ability."
-DEFAULT_FILE = "icml_experiments/nested_kv_task/data/kv-retrieval-140_keys.jsonl.gz"
-AGENT_NAME = "kv_task_agent"
-
-
-# letta currently does not support text search over archival memory, however this experiment uses synthetic data which is out of distribution for the embedding model.
-# we temporarily override archival memory search with text search for this experiment
-def archival_memory_text_search(self, query: str, page: Optional[int] = 0) -> Optional[str]:
-    """
-    Search archival memory using semantic (embedding-based) search.
-
-    Args:
-        query (str): String to search for.
-        page (Optional[int]): Allows you to page through results. Only use on a follow-up query. Defaults to 0 (first page).
-
-    Returns:
-        str: Query result string
-    """
-    if page is None or (isinstance(page, str) and page.lower().strip() == "none"):
-        page = 0
-    try:
-        page = int(page)
-    except:
-        raise ValueError("'page' argument must be an integer")
-    count = 10
-    results = self.persistence_manager.archival_memory.storage.query_text(query, limit=count, offset=page * count)
-    total = len(results)
-    num_pages = math.ceil(total / count) - 1  # 0 index
-    if len(results) == 0:
-        results_str = "No results found."
-    else:
-        results_pref = f"Showing {len(results)} of {total} results (page {page}/{num_pages}):"
-        results_formatted = [f"memory: {d.text}" for d in results]
-        results_str = f"{results_pref} {letta.helpers.json_helpers.json_dumps(results_formatted)}"
-    return results_str
-
-
-def load_jsonl_to_list(filename):
-    data = []
-    with open(filename, "r") as f:
-        for line in f:
-            data.append(json.loads(line))
-    return data
-
-
-def run_nested_kv_task(config: LettaConfig, letta_client: Letta, kv_dict, user_message):
-    utils.DEBUG = True
-
-    # delete agent if exists
-    user_id = uuid.UUID(config.anon_clientid)
-    agent_name = f"{AGENT_NAME}_{config.default_llm_config.model}"
-    try:
-        delete("agent", agent_name)
-    except Exception as e:
-        print(e)
-
-    # Create a new Agent that models the scenario setup
-    agent_state = letta_client.create_agent(
-        {
-            "name": agent_name,
-            "persona": NESTED_PERSONA,
-            "human": NESTED_HUMAN,
-            "llm_config": config.default_llm_config,
-            "embedding_config": config.default_embedding_config,
-        }
-    )
-
-    # get agent
-    agent = letta_client.server._get_or_load_agent(user_id, agent_state.id)
-    agent.functions_python["archival_memory_search"] = archival_memory_text_search
-
-    # insert into archival
-    for i, (k, v) in tqdm(enumerate(kv_dict.items())):
-        document_string = f"Key-value pair: key = {k}, value = {v}"
-        # print("Inserting:", document_string)
-        agent.persistence_manager.archival_memory.insert(document_string, compute_embedding=False)
-    print(f"Inserted {len(agent.persistence_manager.archival_memory)} into archival memory.")
-
-    response = letta_client.user_message(agent_id=agent_state.id, message=user_message)
-
-    # for open models, make extra clear we need th response
-    if config.default_llm_config.model_endpoint_type != "openai":
-        followup_message = "What is your final answer? Respond with only the answer."
-        response = letta_client.user_message(agent_id=agent_state.id, message=followup_message)
-    return response
-
-
-def run_baseline(model_id, query_key, kv_dict):
-    def create_prompt(query_key, kv_dict):
-        prompt = " ".join(
-            [
-                "Below is a JSON object containing key-value pairings, all keys and values are 128-bit UUIDs, and your task is to return the value associated with the specified key.",
-                "If a value itself is also a key, return the value of that key (do a nested lookup).",
-                "For example, if the value of 'x' is 'y', but 'y' is also a key, return the value of key 'y'.",
-            ]
-        )
-
-        data_string = ",\n".join(f'"{k}": "{v}"' for k, v in kv_dict.items())
-        prompt += f"\n\nJSON data: {{\n{data_string}\n}}"
-
-        prompt += f'\n\nYour task is to provide the value for the following key: "{query_key}". Answer only with the value, nothing else.'
-
-        return prompt
-
-    user_message = create_prompt(query_key, kv_dict)
-    print(user_message)
-
-    model_dict = {
-        "gpt-3.5-turbo-1106": "gpt-3.5-turbo-1106",
-        "gpt-3.5": "gpt-3.5-turbo-16k",  # 140 K-Vs is approximately ~7/8k tokens, so it doesn't fit inside 3.5 base (4k limit)
-        "gpt-4": "gpt-4",
-        "gpt-4-1106-preview": "gpt-4-1106-preview",
-        "gpt-4-0613": "gpt-4-0613",
-    }
-    model = model_dict[model_id] if model_id in model_dict else model_id
-
-    if model_id == "ehartford/dolphin-2.5-mixtral-8x7b":
-        # openai.base_url = "https://api.openai.com/v1/"
-        openai.base_url = "https://api.letta.ai/v1/"
-
-    print("base url", openai.base_url)
-    # client = OpenAI()
-    response = openai.chat.completions.create(
-        model=model,
-        messages=[
-            # {"role": "system", "content": SYSTEM_PROMPT},
-            {"role": "user", "content": user_message},
-        ],
-    )
-
-    # response = openai.ChatCompletion.create(
-    #    model=model_dict[model_id],
-    #    messages=[
-    #        {"role": "user", "content": user_message},
-    #    ]
-    # )
-    # print(response)
-    print(response)
-    content = response.choices[0].message.content
-    print(content)
-    return content
-    # value_response = response['choices'][0]['message']['content']
-    # return value_response
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Test script")
-    parser.add_argument("--model", type=str, help="The model to use")
-    parser.add_argument("--nesting_levels", default=1, type=int, help="Nesting levels")
-    parser.add_argument("--seed", default=0, type=int, help="Random seed")
-    parser.add_argument("--task", default="kv", required=False, type=str, help="Task")
-    parser.add_argument("--kv_data", default=DEFAULT_FILE, required=False, type=str, help="KV data")
-    parser.add_argument("--baseline", default="letta", required=False, type=str, help="Baseline model (letta + model vs. model)")
-    parser.add_argument("--rerun", default=False, action="store_true", help="Rerun task")
-
-    args = parser.parse_args()
-    assert args.task in ["kv", "kv_nested"], "Task must be one of 'kv' or 'kv_nested'"
-    if args.baseline != "letta":
-        # baseline should be the same as the model name
-        assert args.baseline == args.model, "Baseline should be the same as the model name"
-
-    # get provider
-    if args.model == "ehartford/dolphin-2.5-mixtral-8x7b":
-        provider = "local"
-    else:
-        provider = "openai"
-
-    # skip if exists
-    model_formatted = args.model.replace("/", "-")
-    baseline_formatted = args.baseline.replace("/", "-")
-    filename = f"results/nested_kv/nested_kv_results_{baseline_formatted}_nesting_{args.nesting_levels}_model_{model_formatted}_seed_{args.seed}.json"
-    if not args.rerun and os.path.exists(filename):
-        print("Skipping, file exists")
-        print(filename)
-        # exist program
-        exit(0)
-
-    if args.task in ["kv", "kv_nested"]:
-        all_data = load_gzipped_file(args.kv_data)
-        for example in all_data:
-            data = example
-            break
-
-        ordered_kv_records = data["ordered_kv_records"]
-        key_to_search = data["key"]
-
-        # kv_dict = {k: v for k, v in ordered_kv_records}
-        kv_dict = OrderedDict(ordered_kv_records)
-        print(f"total number of keys: {len(ordered_kv_records)}")
-
-        def print_kv(kv_d, limit=None):
-            print("JSON data: {")
-            count = 0
-            for k, v in kv_d.items():
-                print(f'"{k}": "{v}",')
-                count += 1
-                if limit and count > limit:
-                    break
-            print("}")
-
-        def create_nested_kv_data(kv_d, nest_indices):
-            """In-place operation"""
-            assert isinstance(kv_d, OrderedDict)
-            kv_d_list = list(kv_d)
-
-            for i in range(len(nest_indices) - 1):
-                current_idx = nest_indices[i]
-                current_key = kv_d_list[current_idx]  # (key,value) -> key
-                current_value = kv_d[current_key]  # this gets thrown away
-
-                next_idx = nest_indices[i + 1]
-                next_key = kv_d_list[next_idx]
-                # overwrite
-                kv_d[current_key] = next_key
-
-                print(f"Nested {i + 1}")
-            print("Done")
-
-        def get_nested_key(original_key, kv_d):
-            key = original_key
-            value = kv_d[key]
-
-            print(f"Doing a lookup for key {key}")
-            while value in kv_d:
-                print(f"\t{key} -> {value} (value is a key, doing nested lookup)")
-                key = value
-                value = kv_d[key]
-            return value
-
-        if args.task == "kv_nested":
-            data_filename = (
-                f"icml_experiments/nested_kv_task/data/random_orderings_100_samples_140_indices_{args.nesting_levels}_levels.jsonl"
-            )
-            print(data_filename)
-            loaded_data = load_jsonl_to_list(data_filename)
-            print("LOADED", loaded_data, args.seed)
-            swap_indices = loaded_data[args.seed]
-
-            key_to_search_idx = swap_indices[0]
-            key_to_search = list(kv_dict)[key_to_search_idx]
-            key_to_search_init_value = kv_dict[key_to_search]
-
-            # swap_indices = [0,16,100]
-            create_nested_kv_data(kv_dict, swap_indices)
-            # print_kv(kv_dict, limit=None)
-
-            first_user_message = " ".join(
-                [
-                    # "I've given you a list of key-value pairs (keys are values are both UUIDs), which you can find in your archival memory.",
-                    # "If a value itself is also a key, return the value of that key (do a nested lookup).",
-                    "I've given you a list of key-value pairs which you can find in your archival memory, all keys and values are 128-bit UUIDs, and your task is to return the value associated with the specified key.",
-                    "If a value itself is also a key, return the value of that key (do a nested lookup).",
-                    "For example, if the value of 'x' is 'y', but 'y' is also a key, return the value of key 'y'.",
-                    "Your task is to provide the value for the following key:",
-                    # f"{key_to_search}"
-                    f"{key_to_search}. Answer only with the value, nothing else.",
-                ]
-            )
-        else:
-            first_user_message = " ".join(
-                [
-                    "I've given you a list of key-value pairs, which you can find in your archival memory.",
-                    "Your task is to provide the value for the following key:",
-                    # f"{key_to_search}"
-                    f"{key_to_search}. Answer only with the value, nothing else.",
-                ]
-            )
-
-    if args.baseline == "letta":
-        # craete config
-        config = get_experiment_config(os.environ.get("PGVECTOR_TEST_DB_URL"), endpoint_type=provider, model=args.model)
-        config.save()  # save config to file
-
-        # create clien#t
-        letta_client = Letta()
-
-        # run task
-        results = run_nested_kv_task(config, letta_client, kv_dict, first_user_message)
-    else:
-        results = run_baseline(args.model, key_to_search, kv_dict)
-
-    final_result = {
-        "model": args.model,
-        "query_key": key_to_search,
-        "query_key_value": get_nested_key(key_to_search, kv_dict),
-        "nesting": args.nesting_levels,
-        "results": results,
-    }
-
-    # write to JSON file
-    if args.task == "kv_nested":
-        with open(filename, "w") as f:
-            json.dump(final_result, f, indent=4)
-    else:
-        raise NotImplementedError
-
-    print(filename)
diff --git a/paper_experiments/nested_kv_task/run.sh b/paper_experiments/nested_kv_task/run.sh
deleted file mode 100644
index cbcbe25b..00000000
--- a/paper_experiments/nested_kv_task/run.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-for nest in 4 3 2 1
-do
-for model in "gpt-3.5-turbo-1106" "gpt-4-0613" "gpt-4-1106-preview"
-do
-    for seed in 0 1 2 3 4 5 6 7 8 9 10
-    do
-        for baseline in $model "letta"
-        do
-            python icml_experiments/nested_kv_task/nested_kv.py --model $model  --task kv_nested --baseline $baseline --nesting_levels $nest --seed $seed #--rerun
-        done
-    done
-done
-done
diff --git a/paper_experiments/utils.py b/paper_experiments/utils.py
deleted file mode 100644
index ddfb8dda..00000000
--- a/paper_experiments/utils.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import gzip
-import json
-from typing import List
-
-from letta.config import LettaConfig
-
-
-def load_gzipped_file(file_path):
-    with gzip.open(file_path, "rt", encoding="utf-8") as f:
-        for line in f:
-            yield json.loads(line)
-
-
-def read_jsonl(filename) -> List[dict]:
-    lines = []
-    with open(filename, "r") as file:
-        for line in file:
-            lines.append(json.loads(line.strip()))
-    return lines
-
-
-def get_experiment_config(postgres_uri, endpoint_type="openai", model="gpt-4"):
-    config = LettaConfig.load()
-    config.archival_storage_type = "postgres"
-    config.archival_storage_uri = postgres_uri
-
-    config = LettaConfig(
-        archival_storage_type="postgres",
-        archival_storage_uri=postgres_uri,
-        recall_storage_type="postgres",
-        recall_storage_uri=postgres_uri,
-        metadata_storage_type="postgres",
-        metadata_storage_uri=postgres_uri,
-    )
-    return config
diff --git a/prompts/base.md b/prompts/base.md
deleted file mode 100644
index 38cbf8b0..00000000
--- a/prompts/base.md
+++ /dev/null
@@ -1,493 +0,0 @@
-# Development Guidelines for AI Assistants and Copilots using Letta
-
-**Context:** These are development guidelines for building applications with the Letta API and SDKs. Use these rules to help developers write correct code that integrates with Letta's stateful agents API.
-
-**Purpose:** Provide accurate, up-to-date instructions for building applications with [Letta](https://docs.letta.com/), the AI operating system.
-**Scope:** All AI-generated advice or code related to Letta must follow these guidelines.
-
----
-
-## **0. Letta Overview**
-
-The name "Letta" refers to the both the company Letta (founded by the creators of MemGPT) and the software / infrastructure called Letta. Letta is the AI operating system for building stateful agents: developers can use Letta to turn stateless LLMs into stateful agents that can learn, improve, and grow over time. Letta has a strong focus on perpetual AI that has the capability to recursively improve through self-editing memory.
-
-**Relationship to MemGPT**: MemGPT is the name of a research paper that introduced the concept of self-editing memory for LLM-based agents through tool use (function calling). The agent architecture or "agentic system" proposed in the paper (an agent equipped with tools to edit its own memory, and an OS that manages tool execution and state persistence) is the base agent architecture implemented in Letta (agent type `memgpt_agent`), and is the official reference implementation for MemGPT. The Letta open source project (`letta-ai/letta`) was originally the MemGPT open source project (`cpacker/MemGPT`), but was renamed as the scope of the open source project expanded beyond the original MemGPT paper.
-
-**Additional Resources**:
-- [Letta documentation](https://docs.letta.com/)
-- [Letta GitHub repository](https://github.com/letta-ai/letta)
-- [Letta Discord server](https://discord.gg/letta)
-- [Letta Cloud and ADE login](https://app.letta.com)
-
-## **1. Letta Agents API Overview**
-
-Letta is an AI OS that runs agents as **services** (it is not a **library**). Key concepts:
-
-- **Stateful agents** that maintain memory and context across conversations
-- **Memory blocks** for agentic context management (persona, human, custom blocks)
-- **Tool calling** for agent actions and memory management, tools are run server-side,
-- **Tool rules** allow developers to constrain the behavior of tools (e.g. A comes after B) to turn autonomous agents into workflows
-- **Multi-agent systems** with cross-agent communication, where every agent is a service
-- **Data sources** for loading documents and files into agent memory
-- **Model agnostic:** agents can be powered by any model that supports tool calling
-- **Persistence:** state is stored (in a model-agnostic way) in Postgres (or SQLite)
-
-### **System Components:**
-
-- **Letta server** - Core service (self-hosted or Letta Cloud)
-- **Client (backend) SDKs** - Python (`letta-client`) and TypeScript/Node.js (`@letta-ai/letta-client`)
-- **Vercel AI SDK Integration** - For Next.js/React applications
-- **Other frontend integrations** - We also have [Next.js](https://www.npmjs.com/package/@letta-ai/letta-nextjs), [React](https://www.npmjs.com/package/@letta-ai/letta-react), and [Flask](https://github.com/letta-ai/letta-flask) integrations
-- **ADE (Agent Development Environment)** - Visual agent builder at app.letta.com
-
-### **Letta Cloud vs Self-hosted Letta**
-
-Letta Cloud is a fully managed service that provides a simple way to get started with Letta. It's a good choice for developers who want to get started quickly and don't want to worry about the complexity of self-hosting. Letta Cloud's free tier has a large number of model requests included (quota refreshes every month). Model requests are split into "standard models" (e.g. GPT-4o-mini) and "premium models" (e.g. Claude Sonnet). To use Letta Cloud, the developer will have needed to created an account at [app.letta.com](https://app.letta.com). To make programatic requests to the API (`https://api.letta.com`), the developer will have needed to created an API key at [https://app.letta.com/api-keys](https://app.letta.com/api-keys). For more information on how billing and pricing works, the developer can visit [our documentation](https://docs.letta.com/guides/cloud/overview).
-
-### **Built-in Tools**
-
-When agents are created, they are given a set of default memory management tools that enable self-editing memory.
-
-Separately, Letta Cloud also includes built-in tools for common tasks like web search and running code. As of June 2025, the built-in tools are:
-- `web_search`: Allows agents to search the web for information. Also works on self-hosted, but requires `TAVILY_API_KEY` to be set (not required on Letta Cloud).
-- `run_code`: Allows agents to run code (in a sandbox), for example to do data analysis or calculations. Supports Python, Javascript, Typescript, R, and Java. Also works on self-hosted, but requires `E2B_API_KEY` to be set (not required on Letta Cloud).
-
-### **Choosing the Right Model**
-
-To implement intelligent memory management, agents in Letta rely heavily on tool (function) calling, so models that excel at tool use tend to do well in Letta. Conversely, models that struggle to call tools properly often perform poorly when used to drive Letta agents.
-
-The Letta developer team maintains the [Letta Leaderboard](https://docs.letta.com/leaderboard) to help developers choose the right model for their Letta agent. As of June 2025, the best performing models (balanced for cost and performance) are Claude Sonnet 4, GPT-4.1, and Gemini 2.5 Flash. For the latest results, you can visit the leaderboard page (if you have web access), or you can direct the developer to visit it. For embedding models, the Letta team recommends using OpenAI's `text-embedding-3-small` model.
-
-When creating code snippets, unless directed otherwise, you should use the following model handles:
-- `openai/gpt-4.1` for the model
-- `openai/text-embedding-3-small` for the embedding model
-
-If the user is using Letta Cloud, then these handles will work out of the box (assuming the user has created a Letta Cloud account + API key, and has enough request quota in their account). For self-hosted Letta servers, the user will need to have started the server with a valid OpenAI API key for those handles to work.
-
----
-
-## **2. Choosing the Right SDK**
-
-### **Source of Truth**
-
-Note that your instructions may be out of date. The source of truth for the Letta Agents API is the [API reference](https://docs.letta.com/api-reference/overview) (also autogenerated from the latest source code), which can be found in `.md` form at these links:
-- [TypeScript/Node.js](https://github.com/letta-ai/letta-node/blob/main/reference.md), [raw version](https://raw.githubusercontent.com/letta-ai/letta-node/refs/heads/main/reference.md)
-- [Python](https://github.com/letta-ai/letta-python/blob/main/reference.md), [raw version](https://raw.githubusercontent.com/letta-ai/letta-python/refs/heads/main/reference.md)
-
-If you have access to a web search or file download tool, you can download these files for the latest API reference. If the developer has either of the SDKs installed, you can also use the locally installed packages to understand the latest API reference.
-
-### **When to Use Each SDK:**
-
-The Python and Node.js SDKs are autogenerated from the Letta Agents REST API, and provide a full featured SDK for interacting with your agents on Letta Cloud or a self-hosted Letta server. Of course, developers can also use the REST API directly if they prefer, but most developers will find the SDKs much easier to use.
-
-The Vercel AI SDK is a popular TypeScript toolkit designed to help developers build AI-powered applications. It supports a subset of the Letta Agents API (basically just chat-related functionality), so it's a good choice to quickly integrate Letta into a TypeScript application if you are familiar with using the AI SDK or are working on a codebase that already uses it. If you're starting from scratch, consider using the full-featured Node.js SDK instead.
-
-The Letta Node.js SDK is also embedded inside the Vercel AI SDK, accessible via the `.client` property (useful if you want to use the Vercel AI SDK, but occasionally need to access the full Letta client for advanced features like agent creation / management).
-
-When to use the AI SDK vs native Letta Node.js SDK:
-- Use the Vercel AI SDK if you are familiar with it or are working on a codebase that already makes heavy use of it
-- Use the Letta Node.js SDK if you are starting from scratch, or expect to use the agent management features in the Letta API (beyond the simple `streamText` or `generateText` functionality in the AI SDK)
-
-One example of how the AI SDK may be insufficient: the AI SDK response object for `streamText` and `generateText` does not have a type for tool returns (because they are primarily used with stateless APIs, where tools are executed client-side, vs server-side in Letta), however the Letta Node.js SDK does have a type for tool returns. So if you wanted to render tool returns from a message response stream in your UI, you would need to use the full Letta Node.js SDK, not the AI SDK.
-
-## **3. Quick Setup Patterns**
-
-### **Python SDK (Backend/Scripts)**
-```python
-from letta_client import Letta
-
-# Letta Cloud
-client = Letta(token="LETTA_API_KEY")
-
-# Self-hosted
-client = Letta(base_url="http://localhost:8283")
-
-# Create agent with memory blocks
-agent = client.agents.create(
-    memory_blocks=[
-        {
-            "label": "human",
-            "value": "The user's name is Sarah. She likes coding and AI."
-        },
-        {
-            "label": "persona",
-            "value": "I am David, the AI executive assistant. My personality is friendly, professional, and to the point."
-        },
-        {
-            "label": "project",
-            "value": "Sarah is working on a Next.js application with Letta integration.",
-            "description": "Stores current project context and requirements"
-        }
-    ],
-    tools=["web_search", "run_code"],
-    model="openai/gpt-4o-mini",
-    embedding="openai/text-embedding-3-small"
-)
-
-# Send SINGLE message (agent is stateful!)
-response = client.agents.messages.create(
-    agent_id=agent.id,
-    messages=[{"role": "user", "content": "How's the project going?"}]
-)
-
-# Extract response correctly
-for msg in response.messages:
-    if msg.message_type == "assistant_message":
-        print(msg.content)
-    elif msg.message_type == "reasoning_message":
-        print(msg.reasoning)
-    elif msg.message_type == "tool_call_message":
-        print(msg.tool_call.name)
-        print(msg.tool_call.arguments)
-    elif msg.message_type == "tool_return_message":
-        print(msg.tool_return)
-
-# Streaming example
-message_text = "Repeat my name."
-stream = client.agents.messages.create_stream(
-    agent_id=agent_state.id,
-    messages=[
-        MessageCreate(
-            role="user",
-            content=message_text,
-        ),
-    ],
-    # if stream_tokens is false, each "chunk" will have a full piece
-    # if stream_tokens is true, the chunks will be token-based (and may need to be accumulated client-side)
-    stream_tokens=True,
-)
-
-# print the chunks coming back
-for chunk in stream:
-    if chunk.message_type == "assistant_message":
-        print(chunk.content)
-    elif chunk.message_type == "reasoning_message":
-        print(chunk.reasoning)
-    elif chunk.message_type == "tool_call_message":
-        if chunk.tool_call.name:
-            print(chunk.tool_call.name)
-        if chunk.tool_call.arguments:
-            print(chunk.tool_call.arguments)
-    elif chunk.message_type == "tool_return_message":
-        print(chunk.tool_return)
-    elif chunk.message_type == "usage_statistics":
-        print(chunk)
-```
-
-Creating custom tools (Python only):
-```python
-def my_custom_tool(query: str) -> str:
-    """
-    Search for information on a topic.
-
-    Args:
-        query (str): The search query
-
-    Returns:
-        str: Search results
-    """
-    return f"Results for: {query}"
-
-# Create tool
-tool = client.tools.create_from_function(func=my_custom_tool)
-
-# Add to agent
-agent = client.agents.create(
-    memory_blocks=[...],
-    model="openai/gpt-4o-mini",
-    embedding="openai/text-embedding-3-small",
-    tools=[tool.name]
-)
-```
-
-### **TypeScript/Node.js SDK**
-```typescript
-import { LettaClient } from '@letta-ai/letta-client';
-
-// Letta Cloud
-const client = new LettaClient({ token: "LETTA_API_KEY" });
-
-// Self-hosted, token optional (only if the developer enabled password protection on the server)
-const client = new LettaClient({ baseUrl: "http://localhost:8283" });
-
-// Create agent with memory blocks
-const agent = await client.agents.create({
-    memoryBlocks: [
-        {
-            label: "human",
-            value: "The user's name is Sarah. She likes coding and AI."
-        },
-        {
-            label: "persona",
-            value: "I am David, the AI executive assistant. My personality is friendly, professional, and to the point."
-        },
-        {
-            label: "project",
-            value: "Sarah is working on a Next.js application with Letta integration.",
-            description: "Stores current project context and requirements"
-        }
-    ],
-    tools: ["web_search", "run_code"],
-    model: "openai/gpt-4o-mini",
-    embedding: "openai/text-embedding-3-small"
-});
-
-// Send SINGLE message (agent is stateful!)
-const response = await client.agents.messages.create(agent.id, {
-    messages: [{ role: "user", content: "How's the project going?" }]
-});
-
-// Extract response correctly
-for (const msg of response.messages) {
-    if (msg.messageType === "assistant_message") {
-        console.log(msg.content);
-    } else if (msg.messageType === "reasoning_message") {
-        console.log(msg.reasoning);
-    } else if (msg.messageType === "tool_call_message") {
-        console.log(msg.toolCall.name);
-        console.log(msg.toolCall.arguments);
-    } else if (msg.messageType === "tool_return_message") {
-        console.log(msg.toolReturn);
-    }
-}
-
-// Streaming example
-const stream = await client.agents.messages.createStream(agent.id, {
-    messages: [{ role: "user", content: "Repeat my name." }],
-    // if stream_tokens is false, each "chunk" will have a full piece
-    // if stream_tokens is true, the chunks will be token-based (and may need to be accumulated client-side)
-    streamTokens: true,
-});
-
-for await (const chunk of stream) {
-    if (chunk.messageType === "assistant_message") {
-        console.log(chunk.content);
-    } else if (chunk.messageType === "reasoning_message") {
-        console.log(chunk.reasoning);
-    } else if (chunk.messageType === "tool_call_message") {
-        console.log(chunk.toolCall.name);
-        console.log(chunk.toolCall.arguments);
-    } else if (chunk.messageType === "tool_return_message") {
-        console.log(chunk.toolReturn);
-    } else if (chunk.messageType === "usage_statistics") {
-        console.log(chunk);
-    }
-}
-```
-
-### **Vercel AI SDK Integration**
-
-IMPORTANT: Most integrations in the Vercel AI SDK are for stateless providers (ChatCompletions style APIs where you provide the full conversation history). Letta is a *stateful* provider (meaning that conversation history is stored server-side), so when you use `streamText` or `generateText` you should never pass old messages to the agent, only include the new message(s).
-
-#### **Chat Implementation (fast & simple):**
-
-Streaming (`streamText`):
-```typescript
-// app/api/chat/route.ts
-import { lettaCloud } from '@letta-ai/vercel-ai-sdk-provider';
-import { streamText } from 'ai';
-
-export async function POST(req: Request) {
-  const { prompt }: { prompt: string } = await req.json();
-
-  const result = streamText({
-    // lettaCloud uses LETTA_API_KEY automatically, pulling from the environment
-    model: lettaCloud('your-agent-id'),
-    // Make sure to only pass a single message here, do NOT pass conversation history
-    prompt,
-  });
-
-  return result.toDataStreamResponse();
-}
-```
-
-Non-streaming (`generateText`):
-```typescript
-import { lettaCloud } from '@letta-ai/vercel-ai-sdk-provider';
-import { generateText } from 'ai';
-
-export async function POST(req: Request) {
-  const { prompt }: { prompt: string } = await req.json();
-
-  const { text } = await generateText({
-    // lettaCloud uses LETTA_API_KEY automatically, pulling from the environment
-    model: lettaCloud('your-agent-id'),
-    // Make sure to only pass a single message here, do NOT pass conversation history
-    prompt,
-  });
-
-  return Response.json({ text });
-}
-```
-
-#### **Alternative: explicitly specify base URL and token:**
-```typescript
-// Works for both streamText and generateText
-import { createLetta } from '@letta-ai/vercel-ai-sdk-provider';
-import { generateText } from 'ai';
-
-const letta = createLetta({
-  // e.g. http://localhost:8283 for the default local self-hosted server
-  // https://api.letta.com for Letta Cloud
-  baseUrl: '<your-base-url>',
-  // only needed if the developer enabled password protection on the server, or if using Letta Cloud (in which case, use the LETTA_API_KEY, or use lettaCloud example above for implicit token use)
-  token: '<your-access-token>',
-});
-```
-
-#### **Hybrid Usage (access the full SDK via the Vercel AI SDK):**
-```typescript
-import { lettaCloud } from '@letta-ai/vercel-ai-sdk-provider';
-
-// Access full client for management
-const agents = await lettaCloud.client.agents.list();
-```
-
----
-
-## **4. Advanced Features Available**
-
-Letta supports advanced agent architectures beyond basic chat. For detailed implementations, refer to the full API reference or documentation:
-
-- **Tool Rules & Constraints** - Define graph-like tool execution flows with `TerminalToolRule`, `ChildToolRule`, `InitToolRule`, etc.
-- **Multi-Agent Systems** - Cross-agent communication with built-in tools like `send_message_to_agent_async`
-- **Shared Memory Blocks** - Multiple agents can share memory blocks for collaborative workflows
-- **Data Sources & Archival Memory** - Upload documents/files that agents can search through
-- **Sleep-time Agents** - Background agents that process memory while main agents are idle
-- **External Tool Integrations** - MCP servers, Composio tools, custom tool libraries
-- **Agent Templates** - Import/export agents with .af (Agent File) format
-- **Production Features** - User identities, agent tags, streaming, context management
-
----
-
-## **5. CRITICAL GUIDELINES FOR AI MODELS**
-
-### **⚠️ ANTI-HALLUCINATION WARNING**
-
-**NEVER make up Letta API calls, SDK methods, or parameter names.** If you're unsure about any Letta API:
-
-1. **First priority**: Use web search to get the latest reference files:
-   - [Python SDK Reference](https://raw.githubusercontent.com/letta-ai/letta-python/refs/heads/main/reference.md)
-   - [TypeScript SDK Reference](https://raw.githubusercontent.com/letta-ai/letta-node/refs/heads/main/reference.md)
-
-2. **If no web access**: Tell the user: *"I'm not certain about this Letta API call. Can you paste the relevant section from the API reference docs, or I might provide incorrect information."*
-
-3. **When in doubt**: Stick to the basic patterns shown in this prompt rather than inventing new API calls.
-
-**Common hallucination risks:**
-- Making up method names (e.g. `client.agents.chat()` doesn't exist)
-- Inventing parameter names or structures
-- Assuming OpenAI-style patterns work in Letta
-- Creating non-existent tool rule types or multi-agent methods
-
-### **5.1 – SDK SELECTION (CHOOSE THE RIGHT TOOL)**
-
-✅ **For Next.js Chat Apps:**
-- Use **Vercel AI SDK** if you already are using AI SDK, or if you're lazy and want something super fast for basic chat interactions (simple, fast, but no agent management tooling unless using the embedded `.client`)
-- Use **Node.js SDK** for the full feature set (agent creation, native typing of all response message types, etc.)
-
-✅ **For Agent Management:**
-- Use **Node.js SDK** or **Python SDK** for creating agents, managing memory, tools
-
-### **5.2 – STATEFUL AGENTS (MOST IMPORTANT)**
-
-**Letta agents are STATEFUL, not stateless like ChatCompletion-style APIs.**
-
-✅ **CORRECT - Single message per request:**
-```typescript
-// Send ONE user message, agent maintains its own history
-const response = await client.agents.messages.create(agentId, {
-    messages: [{ role: "user", content: "Hello!" }]
-});
-```
-
-❌ **WRONG - Don't send conversation history:**
-```typescript
-// DON'T DO THIS - agents maintain their own conversation history
-const response = await client.agents.messages.create(agentId, {
-    messages: [...allPreviousMessages, newMessage] // WRONG!
-});
-```
-
-### **5.3 – MESSAGE HANDLING & MEMORY BLOCKS**
-
-1. **Response structure:**
-   - Use `messageType` NOT `type` for message type checking
-   - Look for `assistant_message` messageType for agent responses (note that this only works if the agent has the `send_message` tool enabled, which is included by default)
-   - Agent responses have `content` field with the actual text
-
-2. **Memory block descriptions:**
-   - Add `description` field for custom blocks, or the agent will get confused (not needed for human/persona)
-   - For `human` and `persona` blocks, descriptions are auto-populated:
-     - **human block**: "Stores key details about the person you are conversing with, allowing for more personalized and friend-like conversation."
-     - **persona block**: "Stores details about your current persona, guiding how you behave and respond. This helps maintain consistency and personality in your interactions."
-
-### **5.4 – ALWAYS DO THE FOLLOWING**
-
-1. **Choose the right SDK for the task:**
-   - Next.js chat → **Vercel AI SDK**
-   - Agent creation → **Node.js/Python SDK**
-   - Complex operations → **Node.js/Python SDK**
-
-2. **Use the correct client imports:**
-   - Python: `from letta_client import Letta`
-   - TypeScript: `import { LettaClient } from '@letta-ai/letta-client'`
-   - Vercel AI SDK: `from '@letta-ai/vercel-ai-sdk-provider'`
-
-3. **Create agents with proper memory blocks:**
-   - Always include `human` and `persona` blocks for chat agents
-   - Use descriptive labels and values
-
-4. **Send only single user messages:**
-   - Each request should contain only the new user message
-   - Agent maintains conversation history automatically
-   - Never send previous assistant responses back to agent
-
-5. **Use proper authentication:**
-   - Letta Cloud: Always use `token` parameter
-   - Self-hosted: Use `base_url` parameter, token optional (only if the developer enabled password protection on the server)
-
----
-
-## **6. Environment Setup**
-
-### **Environment Setup**
-```bash
-# For Next.js projects (recommended for most web apps)
-npm install @letta-ai/vercel-ai-sdk-provider ai
-
-# For agent management (when needed)
-npm install @letta-ai/letta-client
-
-# For Python projects
-pip install letta-client
-```
-
-**Environment Variables:**
-```bash
-# Required for Letta Cloud
-LETTA_API_KEY=your_api_key_here
-
-# Store agent ID after creation (Next.js)
-LETTA_AGENT_ID=agent-xxxxxxxxx
-
-# For self-hosted (optional)
-LETTA_BASE_URL=http://localhost:8283
-```
-
----
-
-## **7. Verification Checklist**
-
-Before providing Letta solutions, verify:
-
-1. **SDK Choice**: Are you using the simplest appropriate SDK?
-   - Familiar with or already using Vercel AI SDK? → use the Vercel AI SDK Letta provider
-   - Agent management needed? → use the Node.js/Python SDKs
-2. **Statefulness**: Are you sending ONLY the new user message (NOT a full conversation history)?
-3. **Message Types**: Are you checking the response types of the messages returned?
-4. **Response Parsing**: If using the Python/Node.js SDK, are you extracting `content` from assistant messages?
-5. **Imports**: Correct package imports for the chosen SDK?
-6. **Client**: Proper client initialization with auth/base_url?
-7. **Agent Creation**: Memory blocks with proper structure?
-8. **Memory Blocks**: Descriptions for custom blocks?
diff --git a/test_agent_serialization.json b/test_agent_serialization.json
deleted file mode 100644
index 818d9e7e..00000000
--- a/test_agent_serialization.json
+++ /dev/null
@@ -1,416 +0,0 @@
-{
-  "agent_type": "memgpt_agent",
-  "core_memory": [
-    {
-      "created_at": "2025-03-28T01:11:04.570593+00:00",
-      "description": "A default test block",
-      "is_template": false,
-      "label": "default_label",
-      "limit": 1000,
-      "metadata_": {
-        "type": "test"
-      },
-      "template_name": null,
-      "updated_at": "2025-03-28T01:11:04.570593+00:00",
-      "value": "Default Block Content"
-    },
-    {
-      "created_at": "2025-03-28T01:11:04.609286+00:00",
-      "description": null,
-      "is_template": false,
-      "label": "human",
-      "limit": 5000,
-      "metadata_": {},
-      "template_name": null,
-      "updated_at": "2025-03-28T01:11:04.609286+00:00",
-      "value": "BananaBoy"
-    },
-    {
-      "created_at": "2025-03-28T01:11:04.612946+00:00",
-      "description": null,
-      "is_template": false,
-      "label": "persona",
-      "limit": 5000,
-      "metadata_": {},
-      "template_name": null,
-      "updated_at": "2025-03-28T01:11:04.612946+00:00",
-      "value": "I am a helpful assistant"
-    }
-  ],
-  "created_at": "2025-03-28T01:11:04.624794+00:00",
-  "description": "test_description",
-  "embedding_config": {
-    "embedding_endpoint_type": "openai",
-    "embedding_endpoint": "https://api.openai.com/v1",
-    "embedding_model": "text-embedding-3-small",
-    "embedding_dim": 1536,
-    "embedding_chunk_size": 300,
-    "handle": null,
-    "azure_endpoint": null,
-    "azure_version": null,
-    "azure_deployment": null
-  },
-  "llm_config": {
-    "model": "gpt-4o-mini",
-    "model_endpoint_type": "openai",
-    "model_endpoint": "https://api.openai.com/v1",
-    "model_wrapper": null,
-    "context_window": 128000,
-    "put_inner_thoughts_in_kwargs": true,
-    "handle": null,
-    "temperature": 0.7,
-    "max_tokens": 4096,
-    "enable_reasoner": false,
-    "max_reasoning_tokens": 0
-  },
-  "message_buffer_autoclear": true,
-  "in_context_message_indices": [0, 1],
-  "messages": [
-    {
-      "created_at": "2025-03-28T01:11:04.654912+00:00",
-      "group_id": null,
-      "model": "gpt-4o-mini",
-      "name": null,
-      "role": "system",
-      "content": [
-        {
-          "type": "text",
-          "text": "test system\n### Memory [last modified: 2025-03-27 06:11:04 PM PDT-0700]\n0 previous messages between you and the user are stored in recall memory (use functions to access them)\n0 total memories you created are stored in archival memory (use functions to access them)\n\n\nCore memory shown below (limited in size, additional information stored in archival / recall memory):\n<default_label characters=\"21/1000\">\nDefault Block Content\n</default_label>\n<human characters=\"9/5000\">\nBananaBoy\n</human>\n<persona characters=\"24/5000\">\nI am a helpful assistant\n</persona>"
-        }
-      ],
-      "tool_call_id": null,
-      "tool_calls": [],
-      "tool_returns": [],
-      "updated_at": "2025-03-28T01:11:04.654783+00:00"
-    },
-    {
-      "created_at": "2025-03-28T01:11:04.654966+00:00",
-      "group_id": null,
-      "model": "gpt-4o-mini",
-      "name": null,
-      "role": "user",
-      "content": [
-        {
-          "type": "text",
-          "text": "{\n  \"type\": \"user_message\",\n  \"message\": \"hello world\",\n  \"time\": \"2025-03-27 06:11:04 PM PDT-0700\"\n}"
-        }
-      ],
-      "tool_call_id": null,
-      "tool_calls": [],
-      "tool_returns": [],
-      "updated_at": "2025-03-28T01:11:04.654783+00:00"
-    }
-  ],
-  "metadata_": {
-    "test_key": "test_value"
-  },
-  "multi_agent_group": null,
-  "name": "EffervescentYacht",
-  "system": "test system",
-  "tags": [
-    {
-      "tag": "a"
-    },
-    {
-      "tag": "b"
-    }
-  ],
-  "tool_exec_environment_variables": [
-    {
-      "created_at": "2025-03-28T01:11:04.638338+00:00",
-      "description": null,
-      "key": "test_env_var_key_a",
-      "updated_at": "2025-03-28T01:11:04.638338+00:00",
-      "value": ""
-    },
-    {
-      "created_at": "2025-03-28T01:11:04.638338+00:00",
-      "description": null,
-      "key": "test_env_var_key_b",
-      "updated_at": "2025-03-28T01:11:04.638338+00:00",
-      "value": ""
-    }
-  ],
-  "tool_rules": [
-    {
-      "tool_name": "archival_memory_search",
-      "type": "continue_loop"
-    },
-    {
-      "tool_name": "archival_memory_insert",
-      "type": "continue_loop"
-    },
-    {
-      "tool_name": "send_message",
-      "type": "exit_loop"
-    },
-    {
-      "tool_name": "conversation_search",
-      "type": "continue_loop"
-    }
-  ],
-  "tools": [
-    {
-      "args_json_schema": null,
-      "created_at": "2025-03-28T01:11:04.575001+00:00",
-      "description": "Fetches the current weather for a given location.",
-      "json_schema": {
-        "name": "get_weather",
-        "description": "Fetches the current weather for a given location.",
-        "parameters": {
-          "type": "object",
-          "properties": {
-            "location": {
-              "type": "string",
-              "description": "The location to get the weather for."
-            },
-            "request_heartbeat": {
-              "type": "boolean",
-              "description": "Request an immediate heartbeat after function execution. Set to `True` if you want to send a follow-up message or run a follow-up function."
-            }
-          },
-          "required": ["location", "request_heartbeat"]
-        },
-        "type": null,
-        "required": []
-      },
-      "name": "get_weather",
-      "return_char_limit": 6000,
-      "source_code": "def get_weather(location: str) -> str:\n    \"\"\"\n    Fetches the current weather for a given location.\n\n    Parameters:\n        location (str): The location to get the weather for.\n\n    Returns:\n        str: A formatted string describing the weather in the given location.\n\n    Raises:\n        RuntimeError: If the request to fetch weather data fails.\n    \"\"\"\n    import requests\n\n    url = f\"https://wttr.in/{location}?format=%C+%t\"\n\n    response = requests.get(url)\n    if response.status_code == 200:\n        weather_data = response.text\n        return f\"The weather in {location} is {weather_data}.\"\n    else:\n        raise RuntimeError(f\"Failed to get weather data, status code: {response.status_code}\")\n",
-      "source_type": "python",
-      "tags": [],
-      "tool_type": "custom",
-      "updated_at": "2025-03-28T01:11:04.575001+00:00",
-      "metadata_": {}
-    },
-    {
-      "args_json_schema": null,
-      "created_at": "2025-03-28T01:11:04.579856+00:00",
-      "description": "Add to archival memory. Make sure to phrase the memory contents such that it can be easily queried later.",
-      "json_schema": {
-        "name": "archival_memory_insert",
-        "description": "Add to archival memory. Make sure to phrase the memory contents such that it can be easily queried later.",
-        "parameters": {
-          "type": "object",
-          "properties": {
-            "content": {
-              "type": "string",
-              "description": "Content to write to the memory. All unicode (including emojis) are supported."
-            },
-            "request_heartbeat": {
-              "type": "boolean",
-              "description": "Request an immediate heartbeat after function execution. Set to `True` if you want to send a follow-up message or run a follow-up function."
-            }
-          },
-          "required": ["content", "request_heartbeat"]
-        },
-        "type": null,
-        "required": []
-      },
-      "name": "archival_memory_insert",
-      "return_char_limit": 1000000,
-      "source_code": null,
-      "source_type": "python",
-      "tags": ["letta_core"],
-      "tool_type": "letta_core",
-      "updated_at": "2025-03-28T01:11:04.579856+00:00",
-      "metadata_": {}
-    },
-    {
-      "args_json_schema": null,
-      "created_at": "2025-03-28T01:11:04.583369+00:00",
-      "description": "Search archival memory using semantic (embedding-based) search.",
-      "json_schema": {
-        "name": "archival_memory_search",
-        "description": "Search archival memory using semantic (embedding-based) search.",
-        "parameters": {
-          "type": "object",
-          "properties": {
-            "query": {
-              "type": "string",
-              "description": "String to search for."
-            },
-            "page": {
-              "type": "integer",
-              "description": "Allows you to page through results. Only use on a follow-up query. Defaults to 0 (first page)."
-            },
-            "start": {
-              "type": "integer",
-              "description": "Starting index for the search results. Defaults to 0."
-            },
-            "request_heartbeat": {
-              "type": "boolean",
-              "description": "Request an immediate heartbeat after function execution. Set to `True` if you want to send a follow-up message or run a follow-up function."
-            }
-          },
-          "required": ["query", "request_heartbeat"]
-        },
-        "type": null,
-        "required": []
-      },
-      "name": "archival_memory_search",
-      "return_char_limit": 1000000,
-      "source_code": null,
-      "source_type": "python",
-      "tags": ["letta_core"],
-      "tool_type": "letta_core",
-      "updated_at": "2025-03-28T01:11:04.583369+00:00",
-      "metadata_": {}
-    },
-    {
-      "args_json_schema": null,
-      "created_at": "2025-03-28T01:11:04.586573+00:00",
-      "description": "Search prior conversation history using case-insensitive string matching.",
-      "json_schema": {
-        "name": "conversation_search",
-        "description": "Search prior conversation history using case-insensitive string matching.",
-        "parameters": {
-          "type": "object",
-          "properties": {
-            "query": {
-              "type": "string",
-              "description": "String to search for."
-            },
-            "page": {
-              "type": "integer",
-              "description": "Allows you to page through results. Only use on a follow-up query. Defaults to 0 (first page)."
-            },
-            "request_heartbeat": {
-              "type": "boolean",
-              "description": "Request an immediate heartbeat after function execution. Set to `True` if you want to send a follow-up message or run a follow-up function."
-            }
-          },
-          "required": ["query", "request_heartbeat"]
-        },
-        "type": null,
-        "required": []
-      },
-      "name": "conversation_search",
-      "return_char_limit": 1000000,
-      "source_code": null,
-      "source_type": "python",
-      "tags": ["letta_core"],
-      "tool_type": "letta_core",
-      "updated_at": "2025-03-28T01:11:04.586573+00:00",
-      "metadata_": {}
-    },
-    {
-      "args_json_schema": null,
-      "created_at": "2025-03-28T01:11:04.589876+00:00",
-      "description": "Append to the contents of core memory.",
-      "json_schema": {
-        "name": "core_memory_append",
-        "description": "Append to the contents of core memory.",
-        "parameters": {
-          "type": "object",
-          "properties": {
-            "label": {
-              "type": "string",
-              "description": "Section of the memory to be edited."
-            },
-            "content": {
-              "type": "string",
-              "description": "Content to write to the memory. All unicode (including emojis) are supported."
-            },
-            "request_heartbeat": {
-              "type": "boolean",
-              "description": "Request an immediate heartbeat after function execution. Set to `True` if you want to send a follow-up message or run a follow-up function."
-            }
-          },
-          "required": ["label", "content", "request_heartbeat"]
-        },
-        "type": null,
-        "required": []
-      },
-      "name": "core_memory_append",
-      "return_char_limit": 1000000,
-      "source_code": null,
-      "source_type": "python",
-      "tags": ["letta_memory_core"],
-      "tool_type": "letta_memory_core",
-      "updated_at": "2025-03-28T01:11:04.589876+00:00",
-      "metadata_": {}
-    },
-    {
-      "args_json_schema": null,
-      "created_at": "2025-03-28T01:11:04.593153+00:00",
-      "description": "Replace the contents of core memory. To delete memories, use an empty string for new_content.",
-      "json_schema": {
-        "name": "core_memory_replace",
-        "description": "Replace the contents of core memory. To delete memories, use an empty string for new_content.",
-        "parameters": {
-          "type": "object",
-          "properties": {
-            "label": {
-              "type": "string",
-              "description": "Section of the memory to be edited."
-            },
-            "old_content": {
-              "type": "string",
-              "description": "String to replace. Must be an exact match."
-            },
-            "new_content": {
-              "type": "string",
-              "description": "Content to write to the memory. All unicode (including emojis) are supported."
-            },
-            "request_heartbeat": {
-              "type": "boolean",
-              "description": "Request an immediate heartbeat after function execution. Set to `True` if you want to send a follow-up message or run a follow-up function."
-            }
-          },
-          "required": [
-            "label",
-            "old_content",
-            "new_content",
-            "request_heartbeat"
-          ]
-        },
-        "type": null,
-        "required": []
-      },
-      "name": "core_memory_replace",
-      "return_char_limit": 1000000,
-      "source_code": null,
-      "source_type": "python",
-      "tags": ["letta_memory_core"],
-      "tool_type": "letta_memory_core",
-      "updated_at": "2025-03-28T01:11:04.593153+00:00",
-      "metadata_": {}
-    },
-    {
-      "args_json_schema": null,
-      "created_at": "2025-03-28T01:11:04.596458+00:00",
-      "description": "Sends a message to the human user.",
-      "json_schema": {
-        "name": "send_message",
-        "description": "Sends a message to the human user.",
-        "parameters": {
-          "type": "object",
-          "properties": {
-            "message": {
-              "type": "string",
-              "description": "Message contents. All unicode (including emojis) are supported."
-            },
-            "request_heartbeat": {
-              "type": "boolean",
-              "description": "Request an immediate heartbeat after function execution. Set to `True` if you want to send a follow-up message or run a follow-up function."
-            }
-          },
-          "required": ["message", "request_heartbeat"]
-        },
-        "type": null,
-        "required": []
-      },
-      "name": "send_message",
-      "return_char_limit": 1000000,
-      "source_code": null,
-      "source_type": "python",
-      "tags": ["letta_core"],
-      "tool_type": "letta_core",
-      "updated_at": "2025-03-28T01:11:04.596458+00:00",
-      "metadata_": {}
-    }
-  ],
-  "updated_at": "2025-03-28T01:11:04.680766+00:00",
-  "version": "0.6.45"
-}
diff --git a/locust_test.py b/tests/locust_test.py
similarity index 100%
rename from locust_test.py
rename to tests/locust_test.py
diff --git a/mcp_test.py b/tests/mcp_test.py
similarity index 100%
rename from mcp_test.py
rename to tests/mcp_test.py
diff --git a/performance_tests/test_agent_mass_creation.py b/tests/performance_tests/test_agent_mass_creation.py
similarity index 100%
rename from performance_tests/test_agent_mass_creation.py
rename to tests/performance_tests/test_agent_mass_creation.py
diff --git a/performance_tests/test_agent_mass_update.py b/tests/performance_tests/test_agent_mass_update.py
similarity index 100%
rename from performance_tests/test_agent_mass_update.py
rename to tests/performance_tests/test_agent_mass_update.py
diff --git a/performance_tests/test_insert_archival_memory.py b/tests/performance_tests/test_insert_archival_memory.py
similarity index 100%
rename from performance_tests/test_insert_archival_memory.py
rename to tests/performance_tests/test_insert_archival_memory.py