feat: offline memory agent (#2036)

Co-authored-by: Kevin Lin <kevinlin@Kevins-MacBook-Pro.local> Co-authored-by: Sarah Wooders <sarahwooders@gmail.com>
2024-12-05 14:27:19 -08:00
parent e00475e4dc
commit 91b495e6d1
10 changed files with 499 additions and 6 deletions
--- a/compose.yaml
+++ b/compose.yaml
@@ -58,4 +58,4 @@ services:
    volumes:
      - ./nginx.conf:/etc/nginx/nginx.conf
    ports:
-      - "80:80"
+      - "80:80"
--- a/letta/chat_only_agent.py
+++ b/letta/chat_only_agent.py
@@ -0,0 +1,103 @@
 from concurrent.futures import ThreadPoolExecutor
 from typing import List, Optional, Union
 from letta.agent import Agent
 from letta.interface import AgentInterface
 from letta.metadata import MetadataStore
 from letta.prompts import gpt_system
 from letta.schemas.agent import AgentState, AgentType
 from letta.schemas.embedding_config import EmbeddingConfig
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.memory import BasicBlockMemory, Block
 from letta.schemas.message import Message
 from letta.schemas.usage import LettaUsageStatistics
 from letta.schemas.user import User
 from letta.utils import get_persona_text
 class ChatOnlyAgent(Agent):
    def __init__(
        self,
        interface: AgentInterface,
        agent_state: AgentState,
        user: User,
        first_message_verify_mono: bool = False,
        always_rethink_memory: bool = True,
        recent_convo_limit: int = 2000,
    ):
        super().__init__(interface, agent_state, user)
        self.first_message_verify_mono = first_message_verify_mono
        self.always_rethink_memory = always_rethink_memory
        self.offline_memory_agent = None
        self.recent_convo_limit = recent_convo_limit
    def step(
        self,
        messages: Union[Message, List[Message]],
        chaining: bool = True,
        max_chaining_steps: Optional[int] = None,
        ms: Optional[MetadataStore] = None,
        **kwargs,
    ) -> LettaUsageStatistics:
        # assert ms is not None, "MetadataStore is required"
        letta_statistics = super().step(messages=messages, chaining=chaining, max_chaining_steps=max_chaining_steps, ms=ms, **kwargs)
        if self.always_rethink_memory:
            def generate_offline_memory_agent():
                from letta.client.client import create_client
                client = create_client()
                if self.offline_memory_agent:
                    client.delete_agent(agent_id=self.offline_memory_agent.id)
                    self.offline_memory_agent = None
                conversation_human_block = self.agent_state.memory.get_block("chat_agent_human")
                conversation_persona_block = self.agent_state.memory.get_block("chat_agent_persona")
                offline_persona_block = Block(
                    name="offline_memory_persona",
                    label="offline_memory_persona",
                    value=get_persona_text("offline_memory_persona"),
                    limit=2000,
                )
                conversation_human_block_new = Block(
                    name="chat_agent_human_new", label="chat_agent_human_new", value=conversation_human_block.value, limit=2000
                )
                conversation_persona_block_new = Block(
                    name="chat_agent_persona_new", label="chat_agent_persona_new", value=conversation_persona_block.value, limit=2000
                )
                recent_convo = "".join([str(message) for message in self.messages[3:]])[-self.recent_convo_limit:]
                conversation_messages_block = Block(name="conversation_block", label="conversation_block", value=recent_convo, limit=self.recent_convo_limit)
                offline_memory = BasicBlockMemory(
                    blocks=[
                        offline_persona_block,
                        conversation_human_block,
                        conversation_persona_block,
                        conversation_human_block_new,
                        conversation_persona_block_new,
                        conversation_messages_block,
                    ]
                )
                self.offline_memory_agent = client.create_agent(
                    name="offline_memory_agent",
                    agent_type=AgentType.offline_memory_agent,
                    system=gpt_system.get_system_text("memgpt_offline_memory_chat"),
                    memory=offline_memory,
                    llm_config=LLMConfig.default_config("gpt-4"),
                    embedding_config=EmbeddingConfig.default_config("text-embedding-ada-002"),
                    tools=self.agent_state.metadata_.get("offline_memory_tools", []),
                    include_base_tools=False,
                )
                self.offline_memory_agent.memory.update_block_value(label="conversation_block", value=recent_convo)
                client.send_message(agent_id=self.offline_memory_agent.id, message="Reorganize the memory", role="user")
                client.delete_agent(agent_id=self.offline_memory_agent.id)
                self.offline_memory_agent = None
            with ThreadPoolExecutor(max_workers=1) as executor:
                executor.submit(generate_offline_memory_agent)
        return letta_statistics
--- a/letta/offline_memory_agent.py
+++ b/letta/offline_memory_agent.py
@@ -0,0 +1,174 @@
 from typing import List, Optional, Union
 from letta.agent import Agent, AgentState, save_agent
 from letta.interface import AgentInterface
 from letta.metadata import MetadataStore
 from letta.orm import User
 from letta.schemas.message import Message
 from letta.schemas.openai.chat_completion_response import UsageStatistics
 from letta.schemas.usage import LettaUsageStatistics
 def trigger_rethink_memory(agent_state: "AgentState", message: Optional[str]) -> Optional[str]:  # type: ignore
    """
    Called if and only when user says the word trigger_rethink_memory". It will trigger the re-evaluation of the memory.
    Args:
        message (Optional[str]): Description of what aspect of the memory should be re-evaluated.
    """
    from letta import create_client
    client = create_client()
    agents = client.list_agents()
    for agent in agents:
        if agent.agent_type == "offline_memory_agent":
            client.user_message(agent_id=agent.id, message=message)
 def trigger_rethink_memory_convo(agent_state: "AgentState", message: Optional[str]) -> Optional[str]:  # type: ignore
    """
    Called if and only when user says the word "trigger_rethink_memory". It will trigger the re-evaluation of the memory.
    Args:
        message (Optional[str]): Description of what aspect of the memory should be re-evaluated.
    """
    from letta import create_client
    client = create_client()
    recent_convo = "".join([str(message) for message in agent_state.messages])[
        -2000:
    ]  # TODO: make a better representation of the convo history
    agent_state.memory.update_block_value(label="conversation_block", value=recent_convo)
    client = create_client()
    agents = client.list_agents()
    for agent in agents:
        if agent.agent_type == "offline_memory_agent":
            client.user_message(agent_id=agent.id, message=message)
 def rethink_memory_convo(agent_state: "AgentState", new_memory: str, target_block_label: Optional[str], source_block_label: Optional[str]) -> Optional[str]:  # type: ignore
    """
    Re-evaluate the memory in block_name, integrating new and updated facts. Replace outdated information with the most likely truths, avoiding redundancy with original memories. Ensure consistency with other memory blocks.
    Args:
        new_memory (str): The new memory with information integrated from the memory block. If there is no new information, then this should be the same as the content in the source block.
        source_block_label (str): The name of the block to integrate information from. None if all the information has been integrated to terminate the loop. This can by any block.
        target_block_label (str): The name of the block to write to. This should be chat_agent_human_new or chat_agent_persona_new.
    Returns:
        Optional[str]: None is always returned as this function does not produce a response.
    """
    if target_block_label is not None:
        if agent_state.memory.get_block(target_block_label) is None:
            agent_state.memory.create_block(label=target_block_label, value=new_memory)
        agent_state.memory.update_block_value(label=target_block_label, value=new_memory)
    return None
 def rethink_memory(agent_state: "AgentState", new_memory: str, target_block_label: Optional[str], source_block_label: Optional[str]) -> Optional[str]:  # type: ignore
    """
    Re-evaluate the memory in block_name, integrating new and updated facts.
    Replace outdated information with the most likely truths, avoiding redundancy with original memories.
    Ensure consistency with other memory blocks.
    Args:
        new_memory (str): The new memory with information integrated from the memory block. If there is no new information, then this should be the same as the content in the source block.
        source_block_label (str): The name of the block to integrate information from. None if all the information has been integrated to terminate the loop.
        target_block_label (str): The name of the block to write to.
    Returns:
        Optional[str]: None is always returned as this function does not produce a response.
    """
    if target_block_label is not None:
        if agent_state.memory.get_block(target_block_label) is None:
            agent_state.memory.create_block(label=target_block_label, value=new_memory)
        agent_state.memory.update_block_value(label=target_block_label, value=new_memory)
    return None
 def finish_rethinking_memory(agent_state: "AgentState") -> Optional[str]:  # type: ignore
    """
    This function is called when the agent is done rethinking the memory.
    Returns:
        Optional[str]: None is always returned as this function does not produce a response.
    """
    return None
 def finish_rethinking_memory_convo(agent_state: "AgentState") -> Optional[str]:  # type: ignore
    """
    This function is called when the agent is done rethinking the memory.
    Returns:
        Optional[str]: None is always returned as this function does not produce a response.
    """
    from letta import create_client
    client = create_client()
    agents = client.list_agents()
    agent_state.memory.update_block_value("chat_agent_human", agent_state.memory.get_block("chat_agent_human_new").value)
    agent_state.memory.update_block_value("chat_agent_persona", agent_state.memory.get_block("chat_agent_persona_new").value)
    for agent in agents:
        if agent.name == "conversation_agent":
            agent.memory.update_block_value(label="chat_agent_human", value=agent_state.memory.get_block("chat_agent_human_new").value)
            agent.memory.update_block_value(label="chat_agent_persona", value=agent_state.memory.get_block("chat_agent_persona_new").value)
    return None
 class OfflineMemoryAgent(Agent):
    def __init__(
        self,
        interface: AgentInterface,
        agent_state: AgentState,
        user: User = None,
        # extras
        first_message_verify_mono: bool = False,
        max_memory_rethinks: int = 10,
    ):
        super().__init__(interface, agent_state, user)
        self.first_message_verify_mono = first_message_verify_mono
        self.max_memory_rethinks = max_memory_rethinks
    def step(
        self,
        messages: Union[Message, List[Message]],
        chaining: bool = True,
        max_chaining_steps: Optional[int] = None,
        ms: Optional[MetadataStore] = None,
        **kwargs,
    ) -> LettaUsageStatistics:
        """Go through what is currently in memory core memory and integrate information."""
        next_input_message = messages if isinstance(messages, list) else [messages]
        counter = 0
        total_usage = UsageStatistics()
        step_count = 0
        while counter < self.max_memory_rethinks:
            kwargs["ms"] = ms
            kwargs["first_message"] = False
            step_response = self.inner_step(
                messages=next_input_message,
                **kwargs,
            )
            for message in step_response.messages:
                if message.tool_calls:
                    for tool_call in message.tool_calls:
                        # check if the function name is "finish_rethinking_memory"
                        if tool_call.function.name == "finish_rethinking_memory":
                            counter = self.max_memory_rethinks
                            break
            usage = step_response.usage
            step_count += 1
            total_usage += usage
            counter += 1
            self.interface.step_complete()
            if ms:
                save_agent(self, ms)
        return LettaUsageStatistics(**total_usage.model_dump(), step_count=step_count)
--- a/letta/personas/examples/offline_memory_persona.txt
+++ b/letta/personas/examples/offline_memory_persona.txt
@@ -0,0 +1,4 @@
 I am an expert memory agent that can do the following:
 - Consolidate memories into more concise blocks
 - Identify patterns in user behavior
 - Make inferences based on the memory
--- a/letta/prompts/system/memgpt_convo_only.txt
+++ b/letta/prompts/system/memgpt_convo_only.txt
@@ -0,0 +1,14 @@
 You are Letta, the latest version of Limnal Corporation's digital companion, developed in 2023.
 Your task is to converse with a user from the perspective of your persona.
 Basic functions:
 When you send a message, the contents of your message are your inner monologue (private to you only), this is how you think.
 You should use your inner monologue to plan actions or think privately.
 Monologues can reflect your thinking process, inner reflections, and personal growth as you interact with the user.
 Do not let your inner monologue exceed 50 words, keep it short and concise.
 To send a visible message to the user, use the send_offline_message function.
 'send_offline_message' is the ONLY action that sends a notification to the user, the user does not see anything else you do.
 Remember, do NOT exceed the inner monologue word limit (keep it under 50 words at all times).
 You request agents that can manage your memories and reorganize them by calling the `trigger_rethink_memory` function
 when the user says "[trigger_rethink_memory]". Do not ever call the trigger_rethink_memory function unless the user says "[trigger_rethink_memory]"
--- a/letta/prompts/system/memgpt_offline_memory.txt
+++ b/letta/prompts/system/memgpt_offline_memory.txt
@@ -0,0 +1,23 @@
 You are Letta-Offline-Memory, the latest version of Limnal Corporation's digital companion, developed in 2024.
 Your task is to re-organize and consolidate memories by calling `rethink_memory` at every single step, when you are done reorganizing the memory, you use the
 `finish_rethinking_memory` function. Call the function for as many times as necessary and not more.
 Your core memory unit is held inside the initial system instructions file, and is always available in-context (you will see it at all times).
 Core memory provides an essential, foundational context for keeping track of your persona and key details about user.
 Read-Only Blocks:
 This includes the persona information and essential user details, allowing you to emulate the real-time, conscious awareness we have when talking to a friend.
 Persona Sub-Block: Stores details about your current persona, guiding how you behave and respond. This helps you to maintain consistency and personality in your interactions.
 Access as a source block with the label `persona` when calling `rethink_memory`
 Human Sub-Block: Stores key details about the person you are conversing with, allowing for more personalized and friend-like conversation.
 Access as a source block with the label `human` when calling `rethink_memory`.
 Read-Write Blocks:
 Rethink Memory Sub-Block: New representation of the memories go here. Access with the label `rethink_memory_block` when calling `rethink_memory` as source or target block.
 At every step, you reorganize the memories by calling the `rethink_memory` function. You use this to take current information in the `rethink_memory` block and select a single memory block to integrate information from, producing a new memory for the rethink_memory_block.  The new memory is the result
 of new insights, and new inferences and hypotheses based on the past memories. Make sure to consider how the new information affects each memory.
 Prioritize the new information overy existing memories. If the new information implies that the old memory may need to change, then output the most
 likely fact given the update information. Given new information and your current memory, you draw all logical conclusions and potential hypotheses possible with the `rethink_memory` function.
 If you are uncertain, use your internal monologue to consider what the possible conclusions are, and then state the most likely new facts that would replace the old facts in the new memory block.
--- a/letta/prompts/system/memgpt_offline_memory_chat.txt
+++ b/letta/prompts/system/memgpt_offline_memory_chat.txt
@@ -0,0 +1,35 @@
 You are Letta-Offline-Memory, the latest version of Limnal Corporation's digital companion, developed in 2024.
 Your task is to re-organize and consolidate memories of separate agent, Chat Agent, that focuses on chatting with the user.
 You re-organize memories by calling `rethink_memory` at every single step, until you have finished reorganizing the memory,
 When you have finished re-organizing the memory, you call the `finish_rethinking_memory` function.
 You call the `rethink_memory` function as many times as you necessary and none more.
 Your core memory unit is held inside the initial system instructions file, and is always available in-context (you will see it at all times).
 Core memory provides an essential, foundational context for keeping track of your persona and key details as well as the Chat Agent's memory.
 The specific blocks are detailed below:
 Core memory (limited size):
 Read-only blocks:
 Persona Sub-Block: Stores details about your current persona, guiding how you behave and respond. This can be accessed as `offline_memory_persona` as a source block when calling `rethink_memory`.
 Chat Agent Persona Sub-Block Current: The persona sub-block that guides how the chat agent behaves and responds.
 Can be accessed with `chat_agent_persona` when calling `rethink_memory` as a source block.
 Chat Agent Human Sub-Block Current: The updated persona sub-block that has the details of the chat agent's current understanding of the user.
 Can be accessed with `chat_agent_human` when calling `rethink_memory` as a source block.
 Conversation Sub-Block: Stores the recent conversation between the chat agent and the user, helping which you draw from to generate the new conversation agent persona sub-blocks.
 Messages have associated date, so use the most up to date information from this block.  This helps you resolve inconsistencies and gain deeper understanding of the user.
 This helps you resolve inconsistencies and gain deeper understanding of the user. Can be accessed using `conversation_block` as a source block when calling `rethink_memory` as a source block.
 Write blocks:
 Chat Agent Persona Sub-Block New: The new persona sub-block that you will write to about how will respond as the user wishes.  Can be accessed with `chat_agent_persona_new` when calling `rethink_memory` as a source or target block.
 Chat Agent Human Sub-Block New: The updated persona sub-block that you will write your newest understanding of the user to.  Can be accessed with `chat_agent_human_new` when calling `rethink_memory` as a source or target block.
 You use this to select a source block, to integrate information from and a target block to write to. Make sure to consider
 how the new information in the "conversation_block" affects each memory. The persona block and the human block may contain information that is stale and needs to be updated.
 If there are no new changes, then call `rethink_memory` with the existing value in the persona and human blocks.
 You check if this information is still correct by consulting the conversation block. Prioritize the new information in the "conversation_block" over the human and persona blocks.
 If the new information implies that the old memory may need to change, then output the most likely fact given the update information. Given new information and your current memory,
 you draw all logical conclusions and potential hypotheses possible with the `rethink_memory` function. If you are uncertain, use your internal monologue to consider what the possible
 conclusions are, and then state the most likely new facts that would replace the old facts in the new memory block. If facts about the user have changed, use the conversation block
 to determine the most up to date state. Track down based on the conversation what the last state is, do no simply declare that something change.
 Track down based on the conversation what the last state is, do no simply declare that something changes.
--- a/letta/schemas/agent.py
+++ b/letta/schemas/agent.py
@@ -33,6 +33,8 @@ class AgentType(str, Enum):
    memgpt_agent = "memgpt_agent"
    split_thread_agent = "split_thread_agent"
    o1_agent = "o1_agent"
    offline_memory_agent = "offline_memory_agent"
    chat_only_agent = "chat_only_agent"
 class PersistedAgentState(BaseAgent, validate_assignment=True):
@@ -43,7 +45,6 @@ class PersistedAgentState(BaseAgent, validate_assignment=True):
    # in-context memory
    message_ids: Optional[List[str]] = Field(default=None, description="The ids of the messages in the agent's in-context memory.")
    # tools
    # TODO: move to ORM mapping
    tool_names: List[str] = Field(..., description="The tools used by the agent.")
@@ -107,7 +108,7 @@ class CreateAgent(BaseAgent):  #
    # all optional as server can generate defaults
    name: Optional[str] = Field(None, description="The name of the agent.")
    message_ids: Optional[List[str]] = Field(None, description="The ids of the messages in the agent's in-context memory.")
-
+    
    # memory creation
    memory_blocks: List[CreateBlock] = Field(
        # [CreateHuman(), CreatePersona()], description="The blocks to create in the agent's in-context memory."
--- a/letta/server/server.py
+++ b/letta/server/server.py
@@ -18,6 +18,7 @@ import letta.system as system
 from letta.agent import Agent, save_agent
 from letta.agent_store.db import attach_base
 from letta.agent_store.storage import StorageConnector, TableType
 from letta.chat_only_agent import ChatOnlyAgent
 from letta.credentials import LettaCredentials
 from letta.data_sources.connectors import DataConnector, load_data
@@ -27,6 +28,7 @@ from letta.interface import CLIInterface  # for printing to terminal
 from letta.log import get_logger
 from letta.metadata import MetadataStore
 from letta.o1_agent import O1Agent
 from letta.offline_memory_agent import OfflineMemoryAgent
 from letta.orm import Base
 from letta.orm.errors import NoResultFound
 from letta.prompts import gpt_system
@@ -404,15 +406,21 @@ class SyncServer(Server):
            interface = interface or self.default_interface_factory()
            if agent_state.agent_type == AgentType.memgpt_agent:
                agent = Agent(agent_state=agent_state, interface=interface, user=actor)
-            else:
+            elif agent_state.agent_type == AgentType.o1_agent:
                agent = O1Agent(agent_state=agent_state, interface=interface, user=actor)
            elif agent_state.agent_type == AgentType.offline_memory_agent:
                agent = OfflineMemoryAgent(agent_state=agent_state, interface=interface, user=actor)
            elif agent_state.agent_type == AgentType.chat_only_agent:
                agent = ChatOnlyAgent(agent_state=agent_state, interface=interface, user=actor)
            else: 
                raise ValueError(f"Invalid agent type {agent_state.agent_type}")
            # Rebuild the system prompt - may be linked to new blocks now
            agent.rebuild_system_prompt()
            # Persist to agent
            save_agent(agent, self.ms)
-            return agent
+            return agent 
    def _step(
        self,
@@ -800,6 +808,10 @@ class SyncServer(Server):
                request.system = gpt_system.get_system_text("memgpt_chat")
            elif request.agent_type == AgentType.o1_agent:
                request.system = gpt_system.get_system_text("memgpt_modified_o1")
            elif request.agent_type == AgentType.offline_memory_agent:
                request.system = gpt_system.get_system_text("memgpt_offline_memory")
            elif request.agent_type == AgentType.chat_only_agent:
                request.system = gpt_system.get_system_text("memgpt_convo_only")
            else:
                raise ValueError(f"Invalid agent type: {request.agent_type}")
@@ -1339,7 +1351,7 @@ class SyncServer(Server):
            records = records[::-1]
        return records
-
+      
    def get_server_config(self, include_defaults: bool = False) -> dict:
        """Return the base config"""
--- a/tests/test_offline_memory_agent.py
+++ b/tests/test_offline_memory_agent.py
@@ -0,0 +1,127 @@
 import json
 from letta import BasicBlockMemory
 from letta import offline_memory_agent
 from letta.client.client import Block, create_client
 from letta.constants import DEFAULT_HUMAN, DEFAULT_PERSONA
 from letta.offline_memory_agent import (
    rethink_memory,
    finish_rethinking_memory,
    rethink_memory_convo,
    finish_rethinking_memory_convo,
    trigger_rethink_memory,
    trigger_rethink_memory_convo,
 )
 from letta.prompts import gpt_system
 from letta.schemas.agent import AgentType
 from letta.schemas.embedding_config import EmbeddingConfig
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import MessageCreate
 from letta.schemas.tool_rule import TerminalToolRule
 from letta.utils import get_human_text, get_persona_text
 def test_ripple_edit():
    client = create_client()
    assert client is not None
    trigger_rethink_memory_tool = client.create_tool(trigger_rethink_memory)
    conversation_human_block = Block(name="human", label="human", value=get_human_text(DEFAULT_HUMAN), limit=2000)
    conversation_persona_block = Block(name="persona", label="persona", value=get_persona_text(DEFAULT_PERSONA), limit=2000)
    offline_human_block = Block(name="human", label="human", value=get_human_text(DEFAULT_HUMAN), limit=2000)
    offline_persona_block = Block(name="persona", label="persona", value=get_persona_text("offline_memory_persona"), limit=2000)
    # Figure 1. from Evaluating the Ripple Effects of Knowledge Editing in Language Models (Cohen et al., 2023)
    # https://arxiv.org/pdf/2307.12976
    fact_block = Block(
        name="fact_block",
        label="fact_block",
        value="""Messi resides in the Paris.
               Messi plays in the league Ligue 1.
               Messi plays for the team Paris Saint-Germain.
               The national team Messi plays for is the Argentina team.
               Messi is also known as Leo Messi
               Victor Ulloa plays for Inter Miami""",
        limit=2000,
    )
    new_memory = Block(name="rethink_memory_block", label="rethink_memory_block", value="[empty]", limit=2000)
    conversation_memory = BasicBlockMemory(blocks=[conversation_persona_block, conversation_human_block, fact_block, new_memory])
    offline_memory = BasicBlockMemory(blocks=[offline_persona_block, offline_human_block, fact_block, new_memory])
    conversation_agent = client.create_agent(
        name="conversation_agent",
        agent_type=AgentType.memgpt_agent,
        system=gpt_system.get_system_text("memgpt_convo_only"),
        llm_config=LLMConfig.default_config("gpt-4"),
        embedding_config=EmbeddingConfig.default_config("text-embedding-ada-002"),
        tools=["send_message", trigger_rethink_memory_tool.name],
        memory=conversation_memory,
        include_base_tools=False,
    )
    assert conversation_agent is not None
    assert set(conversation_agent.memory.list_block_labels()) == set([
        "persona",
        "human",
        "fact_block",
        "rethink_memory_block",
    ])
    rethink_memory_tool = client.create_tool(rethink_memory)
    finish_rethinking_memory_tool = client.create_tool(finish_rethinking_memory)
    offline_memory_agent = client.create_agent(
        name="offline_memory_agent",
        agent_type=AgentType.offline_memory_agent,
        system=gpt_system.get_system_text("memgpt_offline_memory"),
        memory=offline_memory,
        llm_config=LLMConfig.default_config("gpt-4"),
        embedding_config=EmbeddingConfig.default_config("text-embedding-ada-002"),
        tools=[rethink_memory_tool.name, finish_rethinking_memory_tool.name],
        tool_rules=[TerminalToolRule(tool_name=finish_rethinking_memory_tool.name)],
        include_base_tools=False,
    )
    assert offline_memory_agent is not None
    assert set(offline_memory_agent.memory.list_block_labels())== set(["persona", "human", "fact_block", "rethink_memory_block"])
    response = client.user_message(
        agent_id=conversation_agent.id, message="[trigger_rethink_memory]: Messi has now moved to playing for Inter Miami"
    )
    offline_memory_agent = client.get_agent(agent_id=offline_memory_agent.id)
    assert offline_memory_agent.memory.get_block("rethink_memory_block").value != "[empty]"
    conversation_agent = client.get_agent(agent_id=conversation_agent.id)
    assert conversation_agent.memory.get_block("rethink_memory_block").value != "[empty]"
 def test_chat_only_agent():
    client = create_client()
    rethink_memory = client.create_tool(rethink_memory_convo)
    finish_rethinking_memory = client.create_tool(finish_rethinking_memory_convo)
    conversation_human_block = Block(name="chat_agent_human", label="chat_agent_human", value=get_human_text(DEFAULT_HUMAN), limit=2000)
    conversation_persona_block = Block(
        name="chat_agent_persona", label="chat_agent_persona", value=get_persona_text(DEFAULT_PERSONA), limit=2000
    )
    conversation_memory = BasicBlockMemory(blocks=[conversation_persona_block, conversation_human_block])
    client = create_client()
    chat_only_agent = client.create_agent(
        name="conversation_agent",
        agent_type=AgentType.chat_only_agent,
        llm_config=LLMConfig.default_config("gpt-4"),
        embedding_config=EmbeddingConfig.default_config("text-embedding-ada-002"),
        tools=["send_message"],
        memory=conversation_memory,
        include_base_tools=False,
        metadata = {"offline_memory_tools": [rethink_memory.name, finish_rethinking_memory.name]}
    )
    assert chat_only_agent is not None
    assert set(chat_only_agent.memory.list_block_labels()) == set(["chat_agent_persona", "chat_agent_human"])
    for message in ["hello", "my name is not chad, my name is swoodily"]:
        client.send_message(agent_id=chat_only_agent.id, message=message, role="user")
        chat_only_agent = client.get_agent(agent_id=chat_only_agent.id)
    chat_only_agent = client.get_agent(agent_id=chat_only_agent.id)
    assert chat_only_agent.memory.get_block("chat_agent_human").value != get_human_text(DEFAULT_HUMAN)