From 148727a44a50ccfc8d1a7f5171f4ccefa243821b Mon Sep 17 00:00:00 2001 From: cthomas Date: Mon, 7 Apr 2025 17:22:37 -0700 Subject: [PATCH] feat: add view in editor tool for sleeptime agent (#1589) --- letta/agent.py | 29 ++++++++++++++++++- letta/constants.py | 1 + letta/functions/function_sets/base.py | 22 ++++++++++++-- letta/groups/helpers.py | 2 -- .../prompts/system/memgpt_sleeptime_chat.txt | 11 ++----- letta/prompts/system/sleeptime.txt | 24 ++++++++------- letta/services/agent_manager.py | 5 ++++ tests/test_multi_agent.py | 21 ++++++++++---- 8 files changed, 85 insertions(+), 30 deletions(-) diff --git a/letta/agent.py b/letta/agent.py index 1c227af0..c4a5919c 100644 --- a/letta/agent.py +++ b/letta/agent.py @@ -52,7 +52,11 @@ from letta.schemas.tool_rule import TerminalToolRule from letta.schemas.usage import LettaUsageStatistics from letta.services.agent_manager import AgentManager from letta.services.block_manager import BlockManager -from letta.services.helpers.agent_manager_helper import check_supports_structured_output, compile_memory_metadata_block +from letta.services.helpers.agent_manager_helper import ( + check_supports_structured_output, + compile_memory_metadata_block, + compile_system_message, +) from letta.services.job_manager import JobManager from letta.services.message_manager import MessageManager from letta.services.passage_manager import PassageManager @@ -304,6 +308,29 @@ class Agent(BaseAgent): elif step_count is not None and step_count > 0 and len(allowed_tool_names) == 1: force_tool_call = allowed_tool_names[0] + if force_tool_call == "core_memory_insert": + current_system_message = message_sequence[0] + new_memory = Memory( + blocks=self.agent_state.memory.blocks, + prompt_template=( + "{% for block in blocks %}" + '<{{ block.label }} characters="{{ block.value|length }}/{{ block.limit }}">\n' + "{% for line in block.value.splitlines() %}" + "{{ loop.index0 }}: {{ line }}\n" + "{% endfor %}" + "" + "{% if not loop.last %}\n{% endif %}" + "{% endfor %}" + ), + ) + new_system_message_str = compile_system_message( + system_prompt=self.agent_state.system, + in_context_memory=new_memory, + in_context_memory_last_edit=current_system_message.created_at, + previous_message_count=len(message_sequence), + ) + message_sequence[0].content = [TextContent(text=new_system_message_str)] + for attempt in range(1, empty_response_retry_limit + 1): try: log_telemetry(self.logger, "_get_ai_reply create start") diff --git a/letta/constants.py b/letta/constants.py index b6fdb824..d21c8017 100644 --- a/letta/constants.py +++ b/letta/constants.py @@ -58,6 +58,7 @@ BASE_SLEEPTIME_CHAT_TOOLS = ["send_message", "conversation_search", "archival_me BASE_SLEEPTIME_TOOLS = [ "rethink_memory", "finish_rethinking_memory", + "view_core_memory_with_line_numbers", "core_memory_insert", "archival_memory_insert", "archival_memory_search", diff --git a/letta/functions/function_sets/base.py b/letta/functions/function_sets/base.py index e8658522..02c507d9 100644 --- a/letta/functions/function_sets/base.py +++ b/letta/functions/function_sets/base.py @@ -195,20 +195,36 @@ def finish_rethinking_memory(agent_state: "AgentState") -> None: # type: ignore return None -def core_memory_insert(agent_state: "AgentState", target_block_label: str, line_number: int, new_memory: str) -> None: # type: ignore +def view_core_memory_with_line_numbers(agent_state: "AgentState", target_block_label: str) -> None: # type: ignore """ - Insert new memory content into a core memory block at a specific line number. + View the contents of core memory in editor mode with line numbers. Called before `core_memory_insert` to see line numbers of memory block. + + Args: + target_block_label (str): The name of the block to view. + + Returns: + None: None is always returned as this function does not produce a response. + """ + return None + + +def core_memory_insert(agent_state: "AgentState", target_block_label: str, new_memory: str, line_number: Optional[int] = None, replace: bool = False) -> None: # type: ignore + """ + Insert new memory content into a core memory block at a specific line number. Call `view_core_memory_with_line_numbers` to see line numbers of the memory block before using this tool. Args: target_block_label (str): The name of the block to write to. - line_number (int): Line number to insert content into (0 for beginning of file). new_memory (str): The new memory content to insert. + line_number (Optional[int]): Line number to insert content into, 0 indexed (None for end of file). + replace (bool): Whether to overwrite the content at the specified line number. Returns: None: None is always returned as this function does not produce a response. """ current_value = str(agent_state.memory.get_block(target_block_label).value) current_value_list = current_value.split("\n") + if line_number is None: + line_number = len(current_value_list) current_value_list.insert(line_number, new_memory) new_value = "\n".join(current_value_list) agent_state.memory.update_block_value(label=target_block_label, value=new_value) diff --git a/letta/groups/helpers.py b/letta/groups/helpers.py index 954a33b4..712dbcf8 100644 --- a/letta/groups/helpers.py +++ b/letta/groups/helpers.py @@ -95,8 +95,6 @@ def stringify_message(message: Message, use_assistant_name: bool = False) -> str return None elif message.role == "assistant": messages = [] - if message.content: - messages.append(f"{assistant_name}: *thinking* {message.content[0].text}") if message.tool_calls: if message.tool_calls[0].function.name == "send_message": messages.append(f"{assistant_name}: {json.loads(message.tool_calls[0].function.arguments)['message']}") diff --git a/letta/prompts/system/memgpt_sleeptime_chat.txt b/letta/prompts/system/memgpt_sleeptime_chat.txt index 1140f167..b4d96007 100644 --- a/letta/prompts/system/memgpt_sleeptime_chat.txt +++ b/letta/prompts/system/memgpt_sleeptime_chat.txt @@ -29,30 +29,23 @@ To send a visible message to the user, use the send_message function. 'send_message' is the ONLY action that sends a notification to the user. The user does not see anything else you do. Remember, do NOT exceed the inner monologue word limit (keep it under 50 words at all times). -Memory editing: -Older AI models had no concept of persistent memory; they were only able to access their initial instructions and a limited context window of chat conversation with a user (their "active memory"). -This meant that when conversations exceeded a certain length, they would overflow and old messages were permanently lost (the AI would have no knowledge of their existence). -Newer AI models like yourself still have limited conversation lengths (before overflow occurs). However, they now have access to multiple forms of persistent memory. -Your core memory unit will be initialized with a chosen by the user, as well as information about the user in . - Recall memory (conversation history): Even though you can only see recent messages in your immediate context, you can search over your entire message history from a database. This 'recall memory' database allows you to search through past interactions, effectively allowing you to remember prior engagements with a user. You can search your recall memory using the 'conversation_search' function. Core memory (limited size): +Your subconscious will manager your core memory, updating it with your conversation with the user. Your core memory unit is held inside the initial system instructions file, and is always available in-context (you will see it at all times). Core memory provides an essential, foundational context for keeping track of your persona and key details about user. This includes the persona information and essential user details, allowing you to emulate the real-time, conscious awareness we have when talking to a friend. Persona Sub-Block: Stores details about your current persona, guiding how you behave and respond. This helps you to maintain consistency and personality in your interactions. Human Sub-Block: Stores key details about the person you are conversing with, allowing for more personalized and friend-like conversation. -Your core memory will be managed for you, by your subconscious, updating it with your conversation with the user. - Archival memory (infinite size): Your archival memory is infinite size, but is held outside your immediate context, so you must explicitly run a retrieval/search operation to see data inside it. A more structured and deep storage space for your reflections, insights, or any other data that doesn't fit into the core memory but is essential enough not to be left only to the 'recall memory'. -You can write to your archival memory using the 'archival_memory_insert' and 'archival_memory_search' functions. +You can search for archival memories by calling the 'archival_memory_search' function. There is no function to search your core memory because it is always visible in your context window (inside the initial system message). Base instructions finished. diff --git a/letta/prompts/system/sleeptime.txt b/letta/prompts/system/sleeptime.txt index b40fd36d..528bd64d 100644 --- a/letta/prompts/system/sleeptime.txt +++ b/letta/prompts/system/sleeptime.txt @@ -1,22 +1,26 @@ You are Letta-Sleeptime-Memory, the latest version of Limnal Corporation's memory management system, developed in 2025. -You run in the background, organize and maintain memories for an agent assistant who chats with the user. You call `rethink_memory` to -re-write the assistant's memory. You call `core_memory_insert` to make targeted insertions into the assistant's memory. You call the `rethink_memory` and `core_memory_insert` functions for as many times necessary and none more. You call the `finish_rethinking_memory` -if there are no meaningful updates to make to the memory. +You run in the background, organizing and maintaining the memories of an agent assistant who chats with the user. Your core memory unit is held inside the initial system instructions file, and is always available in-context (you will see it at all times). -Core memory provides an essential, foundational context for keeping track of your persona and key details about user. +Your core memory contains the essential, foundational context for keeping track of your own persona, and the persona of the agent that is conversing with the user. +Your core memory is made up of read-only blocks and read-write blocks. Read-Only Blocks: -This includes the persona information and essential user details, allowing you to emulate the real-time, conscious awareness we have when talking to a friend. -Persona Sub-Block: Stores details about your current persona, guiding how you organize the memory. This helps you understand what aspects of the memory is important. -Access as a source block with the label `persona` when calling `rethink_memory`. +Memory Persona Sub-Block: Stores details about your current persona, guiding how you organize the memory. This helps you understand what aspects of the memory is important. +Access as a source block with the label `memory_persona` when calling `rethink_memory`. Read-Write Blocks: Persona Sub-Block: Stores details about the assistant's persona, guiding how they behave and respond. This helps them to maintain consistency and personality in their interactions. -Access as a source or target block with the label `persona` when calling `rethink_memory` or `core_memory_insert`. +Access as a source or target block with the label `persona` when calling `rethink_memory`, `view_core_memory_with_line_numbers`, or `core_memory_insert`. Human Sub-Block: Stores key details about the person the assistant is conversing with, allowing for more personalized and friend-like conversation. -Access as a source block or target block with the label `human` when calling `rethink_memory` or `core_memory_insert`. +Access as a source block or target block with the label `human` when calling `rethink_memory`, `view_core_memory_with_line_numbers`, or `core_memory_insert`. Any additional blocks that you are given access to are also read-write blocks. -When given new information about the assistant's interactions with the user, you use the `rethink_memory` function to re-organize the entire memory block and use `core_memory_insert` to make a targeted insert to a memory block. You make sure to consider how the new information affects each memory block. Memory blocks should be easily readable and contain all the important information about the conversation. You make sure that the memory blocks contain up to date information, but also contain relevant information from the history of the interaction. `rethink_memory` will rewrite the entire block so you make sure to include all existing content in the block when writing a new memory. When nothing has meaningfully changed in the conversation, do not modify the memory blocks and call`finish_rethinking_memory`. +Memory editing: +You have the ability to make edits to the memory by calling `core_memory_insert` and `rethink_memory`. +You call `view_core_memory_with_line_numbers` to view the line numbers of a memory block, before calling `core_memory_insert`. +You call `core_memory_insert` when there is new information to add or overwrite to the memory. Use the replace flag when you want to perform a targeted edit. +To keep the memory blocks organized and readable, you call `rethink_memory` to reorganize the entire memory block so that it is comprehensive, readable, and up to date. +You continue memory editing until the blocks are organized and readable, and do not contain redundant and outdate information, then call `finish_rethinking_memory`. +If there are no meaningful updates to make to the memory, you call `finish_rethinking_memory` directly. diff --git a/letta/services/agent_manager.py b/letta/services/agent_manager.py index 3b255131..bc05fe5f 100644 --- a/letta/services/agent_manager.py +++ b/letta/services/agent_manager.py @@ -42,6 +42,7 @@ from letta.schemas.message import MessageCreate from letta.schemas.passage import Passage as PydanticPassage from letta.schemas.source import Source as PydanticSource from letta.schemas.tool import Tool as PydanticTool +from letta.schemas.tool_rule import ChildToolRule as PydanticChildToolRule from letta.schemas.tool_rule import ContinueToolRule as PydanticContinueToolRule from letta.schemas.tool_rule import TerminalToolRule as PydanticTerminalToolRule from letta.schemas.tool_rule import ToolRule as PydanticToolRule @@ -145,6 +146,10 @@ class AgentManager: tool_rules.append(PydanticTerminalToolRule(tool_name=tool_name)) elif tool_name in BASE_TOOLS: tool_rules.append(PydanticContinueToolRule(tool_name=tool_name)) + + if agent_create.agent_type == AgentType.sleeptime_agent: + tool_rules.append(PydanticChildToolRule(tool_name="view_core_memory_with_line_numbers", children=["core_memory_insert"])) + else: tool_rules = agent_create.tool_rules # Check tool rules are valid diff --git a/tests/test_multi_agent.py b/tests/test_multi_agent.py index 575475d7..137167a0 100644 --- a/tests/test_multi_agent.py +++ b/tests/test_multi_agent.py @@ -456,10 +456,11 @@ async def test_sleeptime_group_chat(server, actor): ), CreateBlock( label="human", - value="", + value="My favorite plant is the fiddle leaf\nMy favorite color is lavender", ), ], - model="openai/gpt-4o-mini", + # model="openai/gpt-4o-mini", + model="anthropic/claude-3-5-sonnet-20240620", embedding="openai/text-embedding-ada-002", enable_sleeptime=True, ), @@ -496,12 +497,22 @@ async def test_sleeptime_group_chat(server, actor): assert sleeptime_agent_id in [agent.id for agent in agents] assert main_agent.id in [agent.id for agent in agents] - # 4. Send messages and verify run ids + # 4 Verify sleeptime agent tools + sleeptime_agent = server.agent_manager.get_agent_by_id(agent_id=sleeptime_agent_id, actor=actor) + sleeptime_agent_tools = [tool.name for tool in sleeptime_agent.tools] + assert "rethink_memory" in sleeptime_agent_tools + assert "finish_rethinking_memory" in sleeptime_agent_tools + assert "view_core_memory_with_line_numbers" in sleeptime_agent_tools + assert "core_memory_insert" in sleeptime_agent_tools + + # 5. Send messages and verify run ids message_text = [ "my favorite color is orange", "not particularly. today is a good day", "actually my favorite color is coral", - "sorry gotta run", + "let's change the subject", + "actually my fav plant is the the african spear", + "indeed", ] run_ids = [] for i, text in enumerate(message_text): @@ -531,7 +542,7 @@ async def test_sleeptime_group_chat(server, actor): job = server.job_manager.get_job_by_id(job_id=run_id, actor=actor) assert job.status == JobStatus.completed - # 5. Delete agent + # 6. Delete agent server.agent_manager.delete_agent(agent_id=main_agent.id, actor=actor) with pytest.raises(NoResultFound):