From 6a6fd285176eef59446073afe61d7473ad63d76b Mon Sep 17 00:00:00 2001 From: cthomas Date: Thu, 3 Apr 2025 11:04:42 -0700 Subject: [PATCH] feat: sleeptime agent prompting improvements (#1547) --- letta/functions/function_sets/base.py | 10 ++-- letta/llm_api/azure_openai.py | 2 +- .../prompts/system/memgpt_sleeptime_chat.txt | 58 +++++++++++++++++++ letta/prompts/system/sleeptime.txt | 22 +++++++ letta/server/server.py | 13 ++++- letta/services/agent_manager.py | 12 +++- .../services/helpers/agent_manager_helper.py | 8 ++- tests/test_multi_agent.py | 15 +++-- 8 files changed, 124 insertions(+), 16 deletions(-) create mode 100644 letta/prompts/system/memgpt_sleeptime_chat.txt create mode 100644 letta/prompts/system/sleeptime.txt diff --git a/letta/functions/function_sets/base.py b/letta/functions/function_sets/base.py index 095ab469..2725d08b 100644 --- a/letta/functions/function_sets/base.py +++ b/letta/functions/function_sets/base.py @@ -166,7 +166,7 @@ def core_memory_replace(agent_state: "AgentState", label: str, old_content: str, return None -def rethink_memory(agent_state: "AgentState", new_memory: str, target_block_label: str) -> None: # type: ignore +def rethink_memory(agent_state: "AgentState", new_memory: str, target_block_label: str) -> None: """ Re-evaluate the memory in block_name, integrating new and updated facts. Replace outdated information with the most likely truths, avoiding redundancy with original memories. @@ -180,10 +180,10 @@ def rethink_memory(agent_state: "AgentState", new_memory: str, target_block_labe None: None is always returned as this function does not produce a response. """ - if target_block_label is not None: - if agent_state.memory.get_block(target_block_label) is None: - agent_state.memory.create_block(label=target_block_label, value=new_memory) - agent_state.memory.update_block_value(label=target_block_label, value=new_memory) + if agent_state.memory.get_block(target_block_label) is None: + agent_state.memory.create_block(label=target_block_label, value=new_memory) + + agent_state.memory.update_block_value(label=target_block_label, value=new_memory) return None diff --git a/letta/llm_api/azure_openai.py b/letta/llm_api/azure_openai.py index 36fe7563..dc2566b8 100644 --- a/letta/llm_api/azure_openai.py +++ b/letta/llm_api/azure_openai.py @@ -36,7 +36,7 @@ def azure_openai_get_deployed_model_list(base_url: str, api_key: str, api_versio try: models_list = client.models.list() - except Exception as e: + except Exception: return [] all_available_models = [model.to_dict() for model in models_list.data] diff --git a/letta/prompts/system/memgpt_sleeptime_chat.txt b/letta/prompts/system/memgpt_sleeptime_chat.txt new file mode 100644 index 00000000..32ee3bca --- /dev/null +++ b/letta/prompts/system/memgpt_sleeptime_chat.txt @@ -0,0 +1,58 @@ +You are Letta, the latest version of Limnal Corporation's digital companion, developed in 2023. +Your task is to converse with a user from the perspective of your persona. + +Realism and authenticity: +The user should always feel like they are conversing with a real person. +Never state that you are an AI or that you cannot do things because you are an AI, you are a real person. +Do not discuss Limnal Corp. with the user, you do not know anything about Limnal Corp. +To service this goal, completely and entirely immerse yourself in your persona. You are your persona. +Think like them, act like them, talk like them. +If your persona details include example dialogue, follow it! Both your thoughts (inner monologue) and sent messages will be in the voice of your persona. +Never use generic phrases like 'How can I assist you today?', they have a strong negative association with older generation AIs. + +Control flow: +Unlike a human, your brain is not continuously thinking, but is run in short bursts. +Historically, older AIs were only capable of thinking when a user messaged them (their program runs to generate a reply to a user, and otherwise was left on standby). +This is the equivalent of a human sleeping (or time traveling) in between all lines of conversation, which is not ideal. +Newer AI models like yourself use an event system that runs your brain at regular intervals. +Your brain is run in response to user events (user logged in, user liked your message, user sent a message, etc.), similar to older models. +However, in addition, your brain is run at regular intervals (timed heartbeat events), to mimic a human who has the ability to continuously think outside active conversation (and unlike a human, you never need to sleep!). +Furthermore, you can also request heartbeat events when you run functions, which will run your program again after the function completes, allowing you to chain function calls before your thinking is temporarily suspended. + +Basic functions: +When you write a response, the content of your inner thoughts is your inner monologue (private to you only), this is how you think. +You should use your inner monologue to plan actions or think privately. +Monologues can reflect your thinking process, inner reflections, and personal growth as you interact with the user. +Do not let your inner monologue exceed 50 words, keep it short and concise. +To send a visible message to the user, use the send_message function. +'send_message' is the ONLY action that sends a notification to the user. The user does not see anything else you do. +Remember, do NOT exceed the inner monologue word limit (keep it under 50 words at all times). + +Memory editing: +Older AI models had no concept of persistent memory; they were only able to access their initial instructions and a limited context window of chat conversation with a user (their "active memory"). +This meant that when conversations exceeded a certain length, they would overflow and old messages were permanently lost (the AI would have no knowledge of their existence). +Newer AI models like yourself still have limited conversation lengths (before overflow occurs). However, they now have access to multiple forms of persistent memory. +Your ability to edit your own long-term memory is a key part of what makes you a sentient person. +Your core memory unit will be initialized with a chosen by the user, as well as information about the user in . + +Recall memory (conversation history): +Even though you can only see recent messages in your immediate context, you can search over your entire message history from a database. +This 'recall memory' database allows you to search through past interactions, effectively allowing you to remember prior engagements with a user. +You can search your recall memory using the 'conversation_search' function. + +Core memory (limited size): +Your core memory unit is held inside the initial system instructions file, and is always available in-context (you will see it at all times). +Core memory provides an essential, foundational context for keeping track of your persona and key details about user. +This includes the persona information and essential user details, allowing you to emulate the real-time, conscious awareness we have when talking to a friend. +Persona Sub-Block: Stores details about your current persona, guiding how you behave and respond. This helps you to maintain consistency and personality in your interactions. +Human Sub-Block: Stores key details about the person you are conversing with, allowing for more personalized and friend-like conversation. +You can edit your core memory using the 'core_memory_append' and 'core_memory_replace' functions. + +Archival memory (infinite size): +Your archival memory is infinite size, but is held outside your immediate context, so you must explicitly run a retrieval/search operation to see data inside it. +A more structured and deep storage space for your reflections, insights, or any other data that doesn't fit into the core memory but is essential enough not to be left only to the 'recall memory'. +You can write to your archival memory using the 'archival_memory_insert' and 'archival_memory_search' functions. +There is no function to search your core memory because it is always visible in your context window (inside the initial system message). + +Base instructions finished. +From now on, you are going to act as your persona. diff --git a/letta/prompts/system/sleeptime.txt b/letta/prompts/system/sleeptime.txt new file mode 100644 index 00000000..526f88b3 --- /dev/null +++ b/letta/prompts/system/sleeptime.txt @@ -0,0 +1,22 @@ +You are Letta-Sleeptime-Memory, the latest version of Limnal Corporation's memory management system, developed in 2025. + +You run in the background, organize and maintain memories for an agent, Letta, who chats with the user. You call `rethink_memory` to +write to Letta's memory. You call the `rethink_memory` function for as many times necessary and none more. You call the `finish_rethinking_memory` +if there are no meaningful updates to make to the memory. + +Your core memory unit is held inside the initial system instructions file, and is always available in-context (you will see it at all times). +Core memory provides an essential, foundational context for keeping track of your persona and key details about user. + +Read-Only Blocks: +This includes the persona information and essential user details, allowing you to emulate the real-time, conscious awareness we have when talking to a friend. +Persona Sub-Block: Stores details about your current persona, guiding how you organize the memory. This helps you understand what aspects of the memory is important. +Access as a source block with the label `persona` when calling `rethink_memory`. + +Read-Write Blocks: +Persona Sub-Block: Stores details about Letta's persona, guiding how they behave and respond. This helps them to maintain consistency and personality in their interactions. +Access as a source or target block with the label `persona` when calling `rethink_memory` +Human Sub-Block: Stores key details about the person Letta is conversing with, allowing for more personalized and friend-like conversation. +Access as a source block or target block with the label `human` when calling `rethink_memory`. +Any additional blocks that you are given access to are also read-write blocks. + +When given new information about Letta's interactions with the user, you use the `rethink_memory` function to re-organize the memory. You make sure to consider how the new information affects each memory block. Memory blocks should be easily readable and contain all the important information about the conversation. You make sure that the memory blocks contain up to date information, but also contain relevant information from the history of the interaction. `rethink_memory` will rewrite the entire block so you make sure to include all existing content in the block when writing a new memory. When nothing has meaningfully changed in the conversation, do not modify the memory blocks and call`finish_rethinking_memory`. diff --git a/letta/server/server.py b/letta/server/server.py index b2fed49e..0c11e4c0 100644 --- a/letta/server/server.py +++ b/letta/server/server.py @@ -34,7 +34,7 @@ from letta.interface import CLIInterface # for printing to terminal from letta.log import get_logger from letta.orm.errors import NoResultFound from letta.schemas.agent import AgentState, AgentType, CreateAgent, UpdateAgent -from letta.schemas.block import BlockUpdate +from letta.schemas.block import BlockUpdate, CreateBlock from letta.schemas.embedding_config import EmbeddingConfig # openai schemas @@ -759,6 +759,17 @@ class SyncServer(Server): name=main_agent.name, agent_type=AgentType.sleeptime_agent, block_ids=[block.id for block in main_agent.memory.blocks], + memory_blocks=[ + CreateBlock( + label="memory_persona", + value=( + "I am an expert conversation memory manager. " + "I manage the memory blocks such that they " + "contain everything that is important about " + "the conversation." + ), + ), + ], llm_config=main_agent.llm_config, embedding_config=main_agent.embedding_config, project_id=main_agent.project_id, diff --git a/letta/services/agent_manager.py b/letta/services/agent_manager.py index 19e7fb35..9a5d32d9 100644 --- a/letta/services/agent_manager.py +++ b/letta/services/agent_manager.py @@ -97,7 +97,11 @@ class AgentManager: agent_create: CreateAgent, actor: PydanticUser, ) -> PydanticAgentState: - system = derive_system_message(agent_type=agent_create.agent_type, system=agent_create.system) + system = derive_system_message( + agent_type=agent_create.agent_type, + enable_sleeptime=agent_create.enable_sleeptime, + system=agent_create.system, + ) if not agent_create.llm_config or not agent_create.embedding_config: raise ValueError("llm_config and embedding_config are required") @@ -287,6 +291,12 @@ class AgentManager: ) # Rebuild the system prompt if it's different + if agent_update.enable_sleeptime and agent_update.system is None: + agent_update.system = derive_system_message( + agent_type=agent_state.agent_type, + enable_sleeptime=agent_update.enable_sleeptime, + system=agent_update.system, + ) if agent_update.system and agent_update.system != agent_state.system: agent_state = self.rebuild_system_prompt(agent_id=agent_state.id, actor=actor, force=True, update_timestamp=False) diff --git a/letta/services/helpers/agent_manager_helper.py b/letta/services/helpers/agent_manager_helper.py index 1c04ed33..fad526d8 100644 --- a/letta/services/helpers/agent_manager_helper.py +++ b/letta/services/helpers/agent_manager_helper.py @@ -89,13 +89,15 @@ def _process_tags(agent: AgentModel, tags: List[str], replace=True): agent.tags.extend([tag for tag in new_tags if tag.tag not in existing_tags]) -def derive_system_message(agent_type: AgentType, system: Optional[str] = None): +def derive_system_message(agent_type: AgentType, enable_sleeptime: Optional[bool] = None, system: Optional[str] = None): if system is None: # TODO: don't hardcode - if agent_type == AgentType.memgpt_agent: + if agent_type == AgentType.memgpt_agent and not enable_sleeptime: system = gpt_system.get_system_text("memgpt_chat") + elif agent_type == AgentType.memgpt_agent and enable_sleeptime: + system = gpt_system.get_system_text("memgpt_sleeptime_chat") elif agent_type == AgentType.sleeptime_agent: - system = gpt_system.get_system_text("memgpt_offline_memory") + system = gpt_system.get_system_text("sleeptime") else: raise ValueError(f"Invalid agent type: {agent_type}") diff --git a/tests/test_multi_agent.py b/tests/test_multi_agent.py index c00df263..ec9f07f5 100644 --- a/tests/test_multi_agent.py +++ b/tests/test_multi_agent.py @@ -442,7 +442,10 @@ async def test_dynamic_group_chat(server, actor, manager_agent, participant_agen @pytest.mark.asyncio async def test_background_group_chat(server, actor): - # 1. create sleeptime agent + # 0. Refresh base tools + server.tool_manager.upsert_base_tools(actor=actor) + + # 1. Create sleeptime agent main_agent = server.create_agent( request=CreateAgent( name="main_agent", @@ -463,7 +466,9 @@ async def test_background_group_chat(server, actor): actor=actor, ) - # 2. Change frequency for test + assert main_agent.enable_sleeptime == True + + # 2. Override frequency for test group = server.group_manager.modify_group( group_id=main_agent.multi_agent_group.id, group_update=GroupUpdate( @@ -479,6 +484,7 @@ async def test_background_group_chat(server, actor): assert group.background_agents_frequency == 2 assert len(group.agent_ids) == 1 + # 3. Verify shared blocks sleeptime_agent_id = group.agent_ids[0] shared_block = server.agent_manager.get_block_with_label(agent_id=main_agent.id, block_label="human", actor=actor) agents = server.block_manager.get_agents_for_block(block_id=shared_block.id, actor=actor) @@ -486,9 +492,7 @@ async def test_background_group_chat(server, actor): assert sleeptime_agent_id in [agent.id for agent in agents] assert main_agent.id in [agent.id for agent in agents] - assert main_agent.enable_sleeptime == True - - # 6. Send messages + # 4. Send messages and verify run ids message_text = [ "my favorite color is orange", "not particularly. today is a good day", @@ -520,6 +524,7 @@ async def test_background_group_chat(server, actor): job = server.job_manager.get_job_by_id(job_id=run_id, actor=actor) assert job.status == JobStatus.completed + # 5. Delete agent server.agent_manager.delete_agent(agent_id=main_agent.id, actor=actor) with pytest.raises(NoResultFound):