From 35b53837247244d7af938491422221c3561d6488 Mon Sep 17 00:00:00 2001
From: Kevin Lin <klin5061@gmail.com>
Date: Tue, 21 Oct 2025 12:56:03 -0700
Subject: [PATCH] feat: match line number rendering to Anthropic / OAI defaults
 (#5492)

---
 letta/agents/base_agent.py                    |  5 ++-
 letta/agents/letta_agent_v2.py                |  5 ++-
 letta/agents/voice_agent.py                   |  1 +
 letta/constants.py                            |  8 ++---
 letta/functions/function_sets/base.py         |  7 ++--
 letta/prompts/prompt_generator.py             |  3 +-
 letta/schemas/memory.py                       | 22 ++++++++----
 letta/services/agent_manager.py               |  5 ++-
 .../services/helpers/agent_manager_helper.py  |  3 +-
 .../tool_executor/sandbox_tool_executor.py    |  4 +--
 tests/test_memory.py                          | 35 +++++++++++++++++--
 11 files changed, 73 insertions(+), 25 deletions(-)

diff --git a/letta/agents/base_agent.py b/letta/agents/base_agent.py
index 271e4502..e072146d 100644
--- a/letta/agents/base_agent.py
+++ b/letta/agents/base_agent.py
@@ -140,7 +140,10 @@ class BaseAgent(ABC):
 
             # generate just the memory string with current state for comparison
             curr_memory_str = agent_state.memory.compile(
-                tool_usage_rules=tool_constraint_block, sources=agent_state.sources, max_files_open=agent_state.max_files_open
+                tool_usage_rules=tool_constraint_block,
+                sources=agent_state.sources,
+                max_files_open=agent_state.max_files_open,
+                llm_config=agent_state.llm_config,
             )
             new_dynamic_section = extract_dynamic_section(curr_memory_str)
 
diff --git a/letta/agents/letta_agent_v2.py b/letta/agents/letta_agent_v2.py
index 3b9281d1..e42438db 100644
--- a/letta/agents/letta_agent_v2.py
+++ b/letta/agents/letta_agent_v2.py
@@ -700,7 +700,10 @@ class LettaAgentV2(BaseAgentV2):
 
         # generate just the memory string with current state for comparison
         curr_memory_str = agent_state.memory.compile(
-            tool_usage_rules=tool_constraint_block, sources=agent_state.sources, max_files_open=agent_state.max_files_open
+            tool_usage_rules=tool_constraint_block,
+            sources=agent_state.sources,
+            max_files_open=agent_state.max_files_open,
+            llm_config=agent_state.llm_config,
         )
         new_dynamic_section = extract_dynamic_section(curr_memory_str)
 
diff --git a/letta/agents/voice_agent.py b/letta/agents/voice_agent.py
index 2c7dc5f5..3002bc61 100644
--- a/letta/agents/voice_agent.py
+++ b/letta/agents/voice_agent.py
@@ -153,6 +153,7 @@ class VoiceAgent(BaseAgent):
             archival_memory_size=self.num_archival_memories,
             sources=agent_state.sources,
             max_files_open=agent_state.max_files_open,
+            llm_config=agent_state.llm_config,
         )
         letta_message_db_queue = create_input_messages(
             input_messages=input_messages, agent_id=agent_state.id, timezone=agent_state.timezone, actor=self.actor
diff --git a/letta/constants.py b/letta/constants.py
index 60220d17..673eed0d 100644
--- a/letta/constants.py
+++ b/letta/constants.py
@@ -125,10 +125,10 @@ LOCAL_ONLY_MULTI_AGENT_TOOLS = ["send_message_to_agent_async"]
 
 # Used to catch if line numbers are pushed in
 # MEMORY_TOOLS_LINE_NUMBER_PREFIX_REGEX = re.compile(r"^Line \d+: ", re.MULTILINE)
-# More "robust" version that handles different kinds of whitespace
+# Updated to match new arrow format: "1→ content"
 # shared constant for both memory_insert and memory_replace
 MEMORY_TOOLS_LINE_NUMBER_PREFIX_REGEX = re.compile(
-    r"^[ \t]*Line[ \t]+\d+[ \t]*:",  # allow any leading whitespace and flexible spacing
+    r"^[ \t]*\d+→[ \t]*",  # match number followed by arrow, with optional whitespace
     re.MULTILINE,
 )
 
@@ -210,9 +210,7 @@ ERROR_MESSAGE_PREFIX = "Error"
 
 NON_USER_MSG_PREFIX = "[This is an automated system message hidden from the user] "
 
-CORE_MEMORY_LINE_NUMBER_WARNING = (
-    "# NOTE: Line numbers shown below are to help during editing. Do NOT include line number prefixes in your memory edit tool calls."
-)
+CORE_MEMORY_LINE_NUMBER_WARNING = "# NOTE: Line numbers shown below (with arrows like '1→') are to help during editing. Do NOT include line number prefixes in your memory edit tool calls."
 
 
 # Constants to do with summarization / conversation length window
diff --git a/letta/functions/function_sets/base.py b/letta/functions/function_sets/base.py
index 6cb4c795..181edd86 100644
--- a/letta/functions/function_sets/base.py
+++ b/letta/functions/function_sets/base.py
@@ -294,6 +294,7 @@ SNIPPET_LINES: int = 4
 def memory_replace(agent_state: "AgentState", label: str, old_str: str, new_str: str) -> str:  # type: ignore
     """
     The memory_replace command allows you to replace a specific string in a memory block with a new string. This is used for making precise edits.
+    Do NOT attempt to replace long strings, e.g. do not attempt to replace the entire contents of a memory block with a new string.
 
     Args:
         label (str): Section of the memory to be edited, identified by its label.
@@ -311,10 +312,10 @@ def memory_replace(agent_state: "AgentState", label: str, old_str: str, new_str:
         memory_replace(label="human", old_str="Their name is Alice", new_str="")
 
         # Bad example - do NOT add (view-only) line numbers to the args
-        memory_replace(label="human", old_str="Line 1: Their name is Alice", new_str="Line 1: Their name is Bob")
+        memory_replace(label="human", old_str="1: Their name is Alice", new_str="1: Their name is Bob")
 
-        # Bad example - do NOT include the number number warning either
-        memory_replace(label="human", old_str="# NOTE: Line numbers shown below are to help during editing. Do NOT include line number prefixes in your memory edit tool calls.\\nLine 1: Their name is Alice", new_str="Line 1: Their name is Bob")
+        # Bad example - do NOT include the line number warning either
+        memory_replace(label="human", old_str="# NOTE: Line numbers shown below (with arrows like '1→') are to help during editing. Do NOT include line number prefixes in your memory edit tool calls.\\n1→ Their name is Alice", new_str="1→ Their name is Bob")
 
         # Good example - no line numbers or line number warning (they are view-only), just the text
         memory_replace(label="human", old_str="Their name is Alice", new_str="Their name is Bob")
diff --git a/letta/prompts/prompt_generator.py b/letta/prompts/prompt_generator.py
index 8c2623ae..f4e1c737 100644
--- a/letta/prompts/prompt_generator.py
+++ b/letta/prompts/prompt_generator.py
@@ -170,6 +170,7 @@ class PromptGenerator:
         tool_rules_solver: Optional[ToolRulesSolver] = None,
         sources: Optional[List] = None,
         max_files_open: Optional[int] = None,
+        llm_config: Optional[object] = None,
     ) -> str:
         tool_constraint_block = None
         if tool_rules_solver is not None:
@@ -182,7 +183,7 @@ class PromptGenerator:
             pass
 
         memory_with_sources = in_context_memory.compile(
-            tool_usage_rules=tool_constraint_block, sources=sources, max_files_open=max_files_open
+            tool_usage_rules=tool_constraint_block, sources=sources, max_files_open=max_files_open, llm_config=llm_config
         )
 
         return PromptGenerator.get_system_message_from_compiled_memory(
diff --git a/letta/schemas/memory.py b/letta/schemas/memory.py
index 78f5404c..6cb7a2ce 100644
--- a/letta/schemas/memory.py
+++ b/letta/schemas/memory.py
@@ -153,11 +153,11 @@ class Memory(BaseModel, validate_assignment=True):
             s.write(f"\n- chars_current={len(value)}")
             s.write(f"\n- chars_limit={limit}\n")
             s.write("</metadata>\n")
+            s.write(f"<warning>\n{CORE_MEMORY_LINE_NUMBER_WARNING}\n</warning>\n")
             s.write("<value>\n")
-            s.write(f"{CORE_MEMORY_LINE_NUMBER_WARNING}\n")
             if value:
                 for i, line in enumerate(value.split("\n"), start=1):
-                    s.write(f"Line {i}: {line}\n")
+                    s.write(f"{i}→ {line}\n")
             s.write("</value>\n")
             s.write(f"</{label}>\n")
             if idx != len(self.blocks) - 1:
@@ -264,14 +264,21 @@ class Memory(BaseModel, validate_assignment=True):
             s.write("</directory>\n")
         s.write("</directories>")
 
-    def compile(self, tool_usage_rules=None, sources=None, max_files_open=None) -> str:
+    def compile(self, tool_usage_rules=None, sources=None, max_files_open=None, llm_config=None) -> str:
         """Efficiently render memory, tool rules, and sources into a prompt string."""
         s = StringIO()
 
         raw_type = self.agent_type.value if hasattr(self.agent_type, "value") else (self.agent_type or "")
         norm_type = raw_type.lower()
         is_react = norm_type in ("react_agent", "workflow_agent")
-        is_line_numbered = norm_type in ("sleeptime_agent", "memgpt_v2_agent", "letta_v1_agent")
+
+        # Check if we should use line numbers based on both agent type and model provider
+        is_line_numbered = False  # Default to no line numbers
+        if llm_config and hasattr(llm_config, "model_endpoint_type"):
+            is_anthropic = llm_config.model_endpoint_type == "anthropic"
+            is_line_numbered_agent_type = norm_type in ("sleeptime_agent", "memgpt_v2_agent", "letta_v1_agent")
+            # Only use line numbers for specific agent types AND Anthropic models
+            is_line_numbered = is_line_numbered_agent_type and is_anthropic
 
         # Memory blocks (not for react/workflow). Always include wrapper for preview/tests.
         if not is_react:
@@ -297,22 +304,23 @@ class Memory(BaseModel, validate_assignment=True):
         return s.getvalue()
 
     @trace_method
-    async def compile_async(self, tool_usage_rules=None, sources=None, max_files_open=None) -> str:
+    async def compile_async(self, tool_usage_rules=None, sources=None, max_files_open=None, llm_config=None) -> str:
         """Async version that offloads to a thread for CPU-bound string building."""
         return await asyncio.to_thread(
             self.compile,
             tool_usage_rules=tool_usage_rules,
             sources=sources,
             max_files_open=max_files_open,
+            llm_config=llm_config,
         )
 
     @trace_method
-    async def compile_in_thread_async(self, tool_usage_rules=None, sources=None, max_files_open=None) -> str:
+    async def compile_in_thread_async(self, tool_usage_rules=None, sources=None, max_files_open=None, llm_config=None) -> str:
         """Deprecated: use compile() instead."""
         import warnings
 
         warnings.warn("compile_in_thread_async is deprecated; use compile()", DeprecationWarning, stacklevel=2)
-        return self.compile(tool_usage_rules=tool_usage_rules, sources=sources, max_files_open=max_files_open)
+        return self.compile(tool_usage_rules=tool_usage_rules, sources=sources, max_files_open=max_files_open, llm_config=llm_config)
 
     def list_block_labels(self) -> List[str]:
         """Return a list of the block names held inside the memory object"""
diff --git a/letta/services/agent_manager.py b/letta/services/agent_manager.py
index 8a4bb1c0..d57d736c 100644
--- a/letta/services/agent_manager.py
+++ b/letta/services/agent_manager.py
@@ -1173,7 +1173,7 @@ class AgentManager:
 
         # note: we only update the system prompt if the core memory is changed
         # this means that the archival/recall memory statistics may be someout out of date
-        curr_memory_str = agent_state.memory.compile(sources=agent_state.sources)
+        curr_memory_str = agent_state.memory.compile(sources=agent_state.sources, llm_config=agent_state.llm_config)
         if curr_memory_str in curr_system_message_openai["content"] and not force:
             # NOTE: could this cause issues if a block is removed? (substring match would still work)
             logger.debug(
@@ -1202,6 +1202,7 @@ class AgentManager:
             archival_memory_size=num_archival_memories,
             sources=agent_state.sources,
             max_files_open=agent_state.max_files_open,
+            llm_config=agent_state.llm_config,
         )
 
         diff = united_diff(curr_system_message_openai["content"], new_system_message_str)
@@ -1264,6 +1265,7 @@ class AgentManager:
             sources=agent_state.sources,
             tool_usage_rules=tool_rules_solver.compile_tool_rule_prompts(),
             max_files_open=agent_state.max_files_open,
+            llm_config=agent_state.llm_config,
         )
         if curr_memory_str in curr_system_message_openai["content"] and not force:
             # NOTE: could this cause issues if a block is removed? (substring match would still work)
@@ -1451,6 +1453,7 @@ class AgentManager:
             sources=agent_state.sources,
             tool_usage_rules=temp_tool_rules_solver.compile_tool_rule_prompts(),
             max_files_open=agent_state.max_files_open,
+            llm_config=agent_state.llm_config,
         )
         if new_memory_str not in system_message.content[0].text:
             # update the blocks (LRW) in the DB
diff --git a/letta/services/helpers/agent_manager_helper.py b/letta/services/helpers/agent_manager_helper.py
index e9c74e40..3d49e38e 100644
--- a/letta/services/helpers/agent_manager_helper.py
+++ b/letta/services/helpers/agent_manager_helper.py
@@ -256,6 +256,7 @@ def compile_system_message(
     tool_rules_solver: Optional[ToolRulesSolver] = None,
     sources: Optional[List] = None,
     max_files_open: Optional[int] = None,
+    llm_config: Optional[object] = None,
 ) -> str:
     """Prepare the final/full system message that will be fed into the LLM API
 
@@ -289,7 +290,7 @@ def compile_system_message(
         )
 
         memory_with_sources = in_context_memory.compile(
-            tool_usage_rules=tool_constraint_block, sources=sources, max_files_open=max_files_open
+            tool_usage_rules=tool_constraint_block, sources=sources, max_files_open=max_files_open, llm_config=llm_config
         )
         full_memory_string = memory_with_sources + "\n\n" + memory_metadata_string
 
diff --git a/letta/services/tool_executor/sandbox_tool_executor.py b/letta/services/tool_executor/sandbox_tool_executor.py
index 525aabc5..4415bba3 100644
--- a/letta/services/tool_executor/sandbox_tool_executor.py
+++ b/letta/services/tool_executor/sandbox_tool_executor.py
@@ -36,7 +36,7 @@ class SandboxToolExecutor(ToolExecutor):
     ) -> ToolExecutionResult:
         # Store original memory state
         if agent_state:
-            orig_memory_str = agent_state.memory.compile()
+            orig_memory_str = agent_state.memory.compile(llm_config=agent_state.llm_config)
         else:
             orig_memory_str = None
 
@@ -89,7 +89,7 @@ class SandboxToolExecutor(ToolExecutor):
 
             # Verify memory integrity
             if agent_state:
-                new_memory_str = agent_state.memory.compile()
+                new_memory_str = agent_state.memory.compile(llm_config=agent_state.llm_config)
                 assert orig_memory_str == new_memory_str, "Memory should not be modified in a sandbox tool"
 
             # Update agent memory if needed
diff --git a/tests/test_memory.py b/tests/test_memory.py
index 0da6e7f4..26df7e41 100644
--- a/tests/test_memory.py
+++ b/tests/test_memory.py
@@ -65,14 +65,43 @@ def test_compile_line_numbered_blocks_sleeptime():
     m = Memory(agent_type=AgentType.sleeptime_agent, blocks=[Block(label="notes", value="line1\nline2", limit=100)])
     out = m.compile()
     assert "<memory_blocks>" in out
-    assert CORE_MEMORY_LINE_NUMBER_WARNING in out
-    assert "Line 1: line1" in out and "Line 2: line2" in out
+    # Without llm_config, should NOT show line numbers (backward compatibility)
+    assert CORE_MEMORY_LINE_NUMBER_WARNING not in out
+    assert "1→ line1" not in out and "2→ line2" not in out
+    assert "line1" in out and "line2" in out  # Content should still be there
 
 
 def test_compile_line_numbered_blocks_memgpt_v2():
     m = Memory(agent_type=AgentType.memgpt_v2_agent, blocks=[Block(label="notes", value="a\nb", limit=100)])
     out = m.compile()
-    assert "Line 1: a" in out and "Line 2: b" in out
+    # Without llm_config, should NOT show line numbers (backward compatibility)
+    assert "1→ a" not in out and "2→ b" not in out
+    assert "a" in out and "b" in out  # Content should still be there
+
+
+def test_compile_line_numbered_blocks_with_anthropic():
+    """Test that line numbers appear when using Anthropic models."""
+    from letta.schemas.llm_config import LLMConfig
+
+    m = Memory(agent_type=AgentType.letta_v1_agent, blocks=[Block(label="notes", value="line1\nline2", limit=100)])
+    anthropic_config = LLMConfig(model="claude-3-sonnet-20240229", model_endpoint_type="anthropic", context_window=200000)
+    out = m.compile(llm_config=anthropic_config)
+    assert "<memory_blocks>" in out
+    assert CORE_MEMORY_LINE_NUMBER_WARNING in out
+    assert "1→ line1" in out and "2→ line2" in out
+
+
+def test_compile_line_numbered_blocks_with_openai():
+    """Test that line numbers do NOT appear when using OpenAI models."""
+    from letta.schemas.llm_config import LLMConfig
+
+    m = Memory(agent_type=AgentType.letta_v1_agent, blocks=[Block(label="notes", value="line1\nline2", limit=100)])
+    openai_config = LLMConfig(model="gpt-4", model_endpoint_type="openai", context_window=128000)
+    out = m.compile(llm_config=openai_config)
+    assert "<memory_blocks>" in out
+    assert CORE_MEMORY_LINE_NUMBER_WARNING not in out
+    assert "1→ line1" not in out and "2→ line2" not in out
+    assert "line1" in out and "line2" in out  # Content should still be there
 
 
 def test_compile_empty_returns_empty_string():