feat: match line number rendering to Anthropic / OAI defaults (#5492)

2025-10-21 12:56:03 -07:00
parent bd918fcffa
commit 35b5383724
11 changed files with 73 additions and 25 deletions
--- a/letta/agents/base_agent.py
+++ b/letta/agents/base_agent.py
@@ -140,7 +140,10 @@ class BaseAgent(ABC):

            # generate just the memory string with current state for comparison
            curr_memory_str = agent_state.memory.compile(
-                tool_usage_rules=tool_constraint_block, sources=agent_state.sources, max_files_open=agent_state.max_files_open
+                tool_usage_rules=tool_constraint_block,
+                sources=agent_state.sources,
+                max_files_open=agent_state.max_files_open,
+                llm_config=agent_state.llm_config,
            )
            new_dynamic_section = extract_dynamic_section(curr_memory_str)

--- a/letta/agents/letta_agent_v2.py
+++ b/letta/agents/letta_agent_v2.py
@@ -700,7 +700,10 @@ class LettaAgentV2(BaseAgentV2):

        # generate just the memory string with current state for comparison
        curr_memory_str = agent_state.memory.compile(
-            tool_usage_rules=tool_constraint_block, sources=agent_state.sources, max_files_open=agent_state.max_files_open
+            tool_usage_rules=tool_constraint_block,
+            sources=agent_state.sources,
+            max_files_open=agent_state.max_files_open,
+            llm_config=agent_state.llm_config,
        )
        new_dynamic_section = extract_dynamic_section(curr_memory_str)

--- a/letta/agents/voice_agent.py
+++ b/letta/agents/voice_agent.py
@@ -153,6 +153,7 @@ class VoiceAgent(BaseAgent):
            archival_memory_size=self.num_archival_memories,
            sources=agent_state.sources,
            max_files_open=agent_state.max_files_open,
+            llm_config=agent_state.llm_config,
        )
        letta_message_db_queue = create_input_messages(
            input_messages=input_messages, agent_id=agent_state.id, timezone=agent_state.timezone, actor=self.actor
--- a/letta/constants.py
+++ b/letta/constants.py
@@ -125,10 +125,10 @@ LOCAL_ONLY_MULTI_AGENT_TOOLS = ["send_message_to_agent_async"]

 # Used to catch if line numbers are pushed in
 # MEMORY_TOOLS_LINE_NUMBER_PREFIX_REGEX = re.compile(r"^Line \d+: ", re.MULTILINE)
-# More "robust" version that handles different kinds of whitespace
+# Updated to match new arrow format: "1→ content"
 # shared constant for both memory_insert and memory_replace
 MEMORY_TOOLS_LINE_NUMBER_PREFIX_REGEX = re.compile(
-    r"^[ \t]*Line[ \t]+\d+[ \t]*:",  # allow any leading whitespace and flexible spacing
+    r"^[ \t]*\d+→[ \t]*",  # match number followed by arrow, with optional whitespace
    re.MULTILINE,
 )

@@ -210,9 +210,7 @@ ERROR_MESSAGE_PREFIX = "Error"

 NON_USER_MSG_PREFIX = "[This is an automated system message hidden from the user] "

-CORE_MEMORY_LINE_NUMBER_WARNING = (
-    "# NOTE: Line numbers shown below are to help during editing. Do NOT include line number prefixes in your memory edit tool calls."
-)
+CORE_MEMORY_LINE_NUMBER_WARNING = "# NOTE: Line numbers shown below (with arrows like '1→') are to help during editing. Do NOT include line number prefixes in your memory edit tool calls."


 # Constants to do with summarization / conversation length window
--- a/letta/functions/function_sets/base.py
+++ b/letta/functions/function_sets/base.py
@@ -294,6 +294,7 @@ SNIPPET_LINES: int = 4
 def memory_replace(agent_state: "AgentState", label: str, old_str: str, new_str: str) -> str:  # type: ignore
    """
    The memory_replace command allows you to replace a specific string in a memory block with a new string. This is used for making precise edits.
+    Do NOT attempt to replace long strings, e.g. do not attempt to replace the entire contents of a memory block with a new string.

    Args:
        label (str): Section of the memory to be edited, identified by its label.
@@ -311,10 +312,10 @@ def memory_replace(agent_state: "AgentState", label: str, old_str: str, new_str:
        memory_replace(label="human", old_str="Their name is Alice", new_str="")

        # Bad example - do NOT add (view-only) line numbers to the args
-        memory_replace(label="human", old_str="Line 1: Their name is Alice", new_str="Line 1: Their name is Bob")
+        memory_replace(label="human", old_str="1: Their name is Alice", new_str="1: Their name is Bob")

-        # Bad example - do NOT include the number number warning either
-        memory_replace(label="human", old_str="# NOTE: Line numbers shown below are to help during editing. Do NOT include line number prefixes in your memory edit tool calls.\\nLine 1: Their name is Alice", new_str="Line 1: Their name is Bob")
+        # Bad example - do NOT include the line number warning either
+        memory_replace(label="human", old_str="# NOTE: Line numbers shown below (with arrows like '1→') are to help during editing. Do NOT include line number prefixes in your memory edit tool calls.\\n1→ Their name is Alice", new_str="1→ Their name is Bob")

        # Good example - no line numbers or line number warning (they are view-only), just the text
        memory_replace(label="human", old_str="Their name is Alice", new_str="Their name is Bob")
--- a/letta/prompts/prompt_generator.py
+++ b/letta/prompts/prompt_generator.py
@@ -170,6 +170,7 @@ class PromptGenerator:
        tool_rules_solver: Optional[ToolRulesSolver] = None,
        sources: Optional[List] = None,
        max_files_open: Optional[int] = None,
+        llm_config: Optional[object] = None,
    ) -> str:
        tool_constraint_block = None
        if tool_rules_solver is not None:
@@ -182,7 +183,7 @@ class PromptGenerator:
            pass

        memory_with_sources = in_context_memory.compile(
-            tool_usage_rules=tool_constraint_block, sources=sources, max_files_open=max_files_open
+            tool_usage_rules=tool_constraint_block, sources=sources, max_files_open=max_files_open, llm_config=llm_config
        )

        return PromptGenerator.get_system_message_from_compiled_memory(
--- a/letta/schemas/memory.py
+++ b/letta/schemas/memory.py
@@ -153,11 +153,11 @@ class Memory(BaseModel, validate_assignment=True):
            s.write(f"\n- chars_current={len(value)}")
            s.write(f"\n- chars_limit={limit}\n")
            s.write("</metadata>\n")
+            s.write(f"<warning>\n{CORE_MEMORY_LINE_NUMBER_WARNING}\n</warning>\n")
            s.write("<value>\n")
-            s.write(f"{CORE_MEMORY_LINE_NUMBER_WARNING}\n")
            if value:
                for i, line in enumerate(value.split("\n"), start=1):
-                    s.write(f"Line {i}: {line}\n")
+                    s.write(f"{i}→ {line}\n")
            s.write("</value>\n")
            s.write(f"</{label}>\n")
            if idx != len(self.blocks) - 1:
@@ -264,14 +264,21 @@ class Memory(BaseModel, validate_assignment=True):
            s.write("</directory>\n")
        s.write("</directories>")

-    def compile(self, tool_usage_rules=None, sources=None, max_files_open=None) -> str:
+    def compile(self, tool_usage_rules=None, sources=None, max_files_open=None, llm_config=None) -> str:
        """Efficiently render memory, tool rules, and sources into a prompt string."""
        s = StringIO()

        raw_type = self.agent_type.value if hasattr(self.agent_type, "value") else (self.agent_type or "")
        norm_type = raw_type.lower()
        is_react = norm_type in ("react_agent", "workflow_agent")
-        is_line_numbered = norm_type in ("sleeptime_agent", "memgpt_v2_agent", "letta_v1_agent")
+
+        # Check if we should use line numbers based on both agent type and model provider
+        is_line_numbered = False  # Default to no line numbers
+        if llm_config and hasattr(llm_config, "model_endpoint_type"):
+            is_anthropic = llm_config.model_endpoint_type == "anthropic"
+            is_line_numbered_agent_type = norm_type in ("sleeptime_agent", "memgpt_v2_agent", "letta_v1_agent")
+            # Only use line numbers for specific agent types AND Anthropic models
+            is_line_numbered = is_line_numbered_agent_type and is_anthropic

        # Memory blocks (not for react/workflow). Always include wrapper for preview/tests.
        if not is_react:
@@ -297,22 +304,23 @@ class Memory(BaseModel, validate_assignment=True):
        return s.getvalue()

    @trace_method
-    async def compile_async(self, tool_usage_rules=None, sources=None, max_files_open=None) -> str:
+    async def compile_async(self, tool_usage_rules=None, sources=None, max_files_open=None, llm_config=None) -> str:
        """Async version that offloads to a thread for CPU-bound string building."""
        return await asyncio.to_thread(
            self.compile,
            tool_usage_rules=tool_usage_rules,
            sources=sources,
            max_files_open=max_files_open,
+            llm_config=llm_config,
        )

    @trace_method
-    async def compile_in_thread_async(self, tool_usage_rules=None, sources=None, max_files_open=None) -> str:
+    async def compile_in_thread_async(self, tool_usage_rules=None, sources=None, max_files_open=None, llm_config=None) -> str:
        """Deprecated: use compile() instead."""
        import warnings

        warnings.warn("compile_in_thread_async is deprecated; use compile()", DeprecationWarning, stacklevel=2)
-        return self.compile(tool_usage_rules=tool_usage_rules, sources=sources, max_files_open=max_files_open)
+        return self.compile(tool_usage_rules=tool_usage_rules, sources=sources, max_files_open=max_files_open, llm_config=llm_config)

    def list_block_labels(self) -> List[str]:
        """Return a list of the block names held inside the memory object"""
--- a/letta/services/agent_manager.py
+++ b/letta/services/agent_manager.py
@@ -1173,7 +1173,7 @@ class AgentManager:

        # note: we only update the system prompt if the core memory is changed
        # this means that the archival/recall memory statistics may be someout out of date
-        curr_memory_str = agent_state.memory.compile(sources=agent_state.sources)
+        curr_memory_str = agent_state.memory.compile(sources=agent_state.sources, llm_config=agent_state.llm_config)
        if curr_memory_str in curr_system_message_openai["content"] and not force:
            # NOTE: could this cause issues if a block is removed? (substring match would still work)
            logger.debug(
@@ -1202,6 +1202,7 @@ class AgentManager:
            archival_memory_size=num_archival_memories,
            sources=agent_state.sources,
            max_files_open=agent_state.max_files_open,
+            llm_config=agent_state.llm_config,
        )

        diff = united_diff(curr_system_message_openai["content"], new_system_message_str)
@@ -1264,6 +1265,7 @@ class AgentManager:
            sources=agent_state.sources,
            tool_usage_rules=tool_rules_solver.compile_tool_rule_prompts(),
            max_files_open=agent_state.max_files_open,
+            llm_config=agent_state.llm_config,
        )
        if curr_memory_str in curr_system_message_openai["content"] and not force:
            # NOTE: could this cause issues if a block is removed? (substring match would still work)
@@ -1451,6 +1453,7 @@ class AgentManager:
            sources=agent_state.sources,
            tool_usage_rules=temp_tool_rules_solver.compile_tool_rule_prompts(),
            max_files_open=agent_state.max_files_open,
+            llm_config=agent_state.llm_config,
        )
        if new_memory_str not in system_message.content[0].text:
            # update the blocks (LRW) in the DB
--- a/letta/services/helpers/agent_manager_helper.py
+++ b/letta/services/helpers/agent_manager_helper.py
@@ -256,6 +256,7 @@ def compile_system_message(
    tool_rules_solver: Optional[ToolRulesSolver] = None,
    sources: Optional[List] = None,
    max_files_open: Optional[int] = None,
+    llm_config: Optional[object] = None,
 ) -> str:
    """Prepare the final/full system message that will be fed into the LLM API

@@ -289,7 +290,7 @@ def compile_system_message(
        )

        memory_with_sources = in_context_memory.compile(
-            tool_usage_rules=tool_constraint_block, sources=sources, max_files_open=max_files_open
+            tool_usage_rules=tool_constraint_block, sources=sources, max_files_open=max_files_open, llm_config=llm_config
        )
        full_memory_string = memory_with_sources + "\n\n" + memory_metadata_string

--- a/letta/services/tool_executor/sandbox_tool_executor.py
+++ b/letta/services/tool_executor/sandbox_tool_executor.py
@@ -36,7 +36,7 @@ class SandboxToolExecutor(ToolExecutor):
    ) -> ToolExecutionResult:
        # Store original memory state
        if agent_state:
-            orig_memory_str = agent_state.memory.compile()
+            orig_memory_str = agent_state.memory.compile(llm_config=agent_state.llm_config)
        else:
            orig_memory_str = None

@@ -89,7 +89,7 @@ class SandboxToolExecutor(ToolExecutor):

            # Verify memory integrity
            if agent_state:
-                new_memory_str = agent_state.memory.compile()
+                new_memory_str = agent_state.memory.compile(llm_config=agent_state.llm_config)
                assert orig_memory_str == new_memory_str, "Memory should not be modified in a sandbox tool"

            # Update agent memory if needed
--- a/tests/test_memory.py
+++ b/tests/test_memory.py
@@ -65,14 +65,43 @@ def test_compile_line_numbered_blocks_sleeptime():
    m = Memory(agent_type=AgentType.sleeptime_agent, blocks=[Block(label="notes", value="line1\nline2", limit=100)])
    out = m.compile()
    assert "<memory_blocks>" in out
-    assert CORE_MEMORY_LINE_NUMBER_WARNING in out
-    assert "Line 1: line1" in out and "Line 2: line2" in out
+    # Without llm_config, should NOT show line numbers (backward compatibility)
+    assert CORE_MEMORY_LINE_NUMBER_WARNING not in out
+    assert "1→ line1" not in out and "2→ line2" not in out
+    assert "line1" in out and "line2" in out  # Content should still be there


 def test_compile_line_numbered_blocks_memgpt_v2():
    m = Memory(agent_type=AgentType.memgpt_v2_agent, blocks=[Block(label="notes", value="a\nb", limit=100)])
    out = m.compile()
-    assert "Line 1: a" in out and "Line 2: b" in out
+    # Without llm_config, should NOT show line numbers (backward compatibility)
+    assert "1→ a" not in out and "2→ b" not in out
+    assert "a" in out and "b" in out  # Content should still be there
+
+
+def test_compile_line_numbered_blocks_with_anthropic():
+    """Test that line numbers appear when using Anthropic models."""
+    from letta.schemas.llm_config import LLMConfig
+
+    m = Memory(agent_type=AgentType.letta_v1_agent, blocks=[Block(label="notes", value="line1\nline2", limit=100)])
+    anthropic_config = LLMConfig(model="claude-3-sonnet-20240229", model_endpoint_type="anthropic", context_window=200000)
+    out = m.compile(llm_config=anthropic_config)
+    assert "<memory_blocks>" in out
+    assert CORE_MEMORY_LINE_NUMBER_WARNING in out
+    assert "1→ line1" in out and "2→ line2" in out
+
+
+def test_compile_line_numbered_blocks_with_openai():
+    """Test that line numbers do NOT appear when using OpenAI models."""
+    from letta.schemas.llm_config import LLMConfig
+
+    m = Memory(agent_type=AgentType.letta_v1_agent, blocks=[Block(label="notes", value="line1\nline2", limit=100)])
+    openai_config = LLMConfig(model="gpt-4", model_endpoint_type="openai", context_window=128000)
+    out = m.compile(llm_config=openai_config)
+    assert "<memory_blocks>" in out
+    assert CORE_MEMORY_LINE_NUMBER_WARNING not in out
+    assert "1→ line1" not in out and "2→ line2" not in out
+    assert "line1" in out and "line2" in out  # Content should still be there


 def test_compile_empty_returns_empty_string():