Fix git-memory context preview parsing (#9414)

* fix(core): handle git memory label prefix collisions in filesystem view Prevent context window preview crashes when a block label is both a leaf and a prefix (e.g. system/human and system/human/context) by rendering a node as both file and directory. Add regression test. 👾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * fix(core): parse git-backed core memory in context window preview ContextWindowCalculator.extract_system_components now detects git-backed memory rendering (<memory_filesystem> and <system/...> tags) when <memory_blocks> wrapper is absent, so core_memory is populated in the context preview. Add regression tests. 👾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> --------- Co-authored-by: Letta <noreply@letta.com>
2026-02-10 15:17:00 -08:00
parent ca32311b9a
commit 2ffef0fb31
4 changed files with 176 additions and 33 deletions
--- a/letta/schemas/memory.py
+++ b/letta/schemas/memory.py
@@ -237,24 +237,65 @@ class Memory(BaseModel, validate_assignment=True):
        if not self.blocks:
            return

-        # Build tree structure from block labels
-        # e.g. "system/human" -> {"system": {"human": block}}
-        #      "organization" -> {"organization": block}
+        # Build tree structure from block labels.
+        #
+        # IMPORTANT: labels are path-like (e.g. "system/human"). In real filesystems a
+        # path component cannot be both a directory and a file, but our block namespace
+        # can contain collisions like:
+        #   - "system" (a block)
+        #   - "system/human" (a block under a virtual "system/" directory)
+        #
+        # When we detect a collision, we convert the would-be directory node into a
+        # dict and store the colliding leaf block under LEAF_KEY.
+        LEAF_KEY = "__block__"
+
        tree: dict = {}
        for block in self.blocks:
            label = block.label or "block"
-            parts = label.split("/")
-            node = tree
+            parts = [p for p in label.split("/") if p]
+            if not parts:
+                parts = ["block"]
+
+            node: dict = tree
            for part in parts[:-1]:
-                node = node.setdefault(part, {})
-            node[parts[-1]] = block
+                existing = node.get(part)
+                if existing is None:
+                    node[part] = {}
+                elif not isinstance(existing, dict):
+                    # Collision: leaf at `part` and now we need it to be a directory.
+                    node[part] = {LEAF_KEY: existing}
+                node = node[part]  # type: ignore[assignment]
+
+            leaf = parts[-1]
+            existing_leaf = node.get(leaf)
+            if existing_leaf is None:
+                node[leaf] = block
+            elif isinstance(existing_leaf, dict):
+                # Collision: directory at `leaf` already exists; attach the leaf block.
+                existing_leaf[LEAF_KEY] = block
+            else:
+                # Duplicate leaf label; last writer wins.
+                node[leaf] = block

        s.write("\n\n<memory_filesystem>\n")

        def _render_tree(node: dict, prefix: str = ""):
-            # Sort: directories first, then files
-            dirs = sorted(k for k, v in node.items() if isinstance(v, dict))
-            files = sorted(k for k, v in node.items() if not isinstance(v, dict))
+            # Sort: directories first, then files. If a node is both a directory and a
+            # leaf (LEAF_KEY present), show both <name>/ and <name>.md.
+            dirs = []
+            files = []
+            for name, val in node.items():
+                if name == LEAF_KEY:
+                    continue
+                if isinstance(val, dict):
+                    dirs.append(name)
+                    if LEAF_KEY in val:
+                        files.append(name)
+                else:
+                    files.append(name)
+
+            dirs = sorted(dirs)
+            files = sorted(files)
            entries = [(d, True) for d in dirs] + [(f, False) for f in files]

            for i, (name, is_dir) in enumerate(entries):
--- a/letta/services/context_window_calculator/context_window_calculator.py
+++ b/letta/services/context_window_calculator/context_window_calculator.py
@@ -21,28 +21,21 @@ class ContextWindowCalculator:

    @staticmethod
    def extract_system_components(system_message: str) -> Tuple[str, str, str]:
+        """Extract system prompt + core memory + metadata from a system message.
+
+        Historically, Letta system messages were formatted with:
+        - <base_instructions> ...
+        - <memory_blocks> ...
+        - <memory_metadata> ...
+
+        Git-backed memory agents do NOT wrap their rendered memory in <memory_blocks>.
+        Instead, the memory content typically begins with <memory_filesystem> followed
+        by file-like tags such as <system/human.md>...</system/human.md>.
+        
+        This helper supports both formats so the context window preview can display
+        core memory for git-enabled agents.
        """
-        Extract structured components from a formatted system message.

-        Parses the system message to extract three distinct sections marked by XML-style tags:
-        - base_instructions: The core system prompt and agent instructions
-        - memory_blocks: The agent's core memory (persistent context)
-        - memory_metadata: Metadata about external memory systems
-
-        Args:
-            system_message: A formatted system message containing XML-style section markers
-
-        Returns:
-            A tuple of (system_prompt, core_memory, external_memory_summary)
-            Each component will be an empty string if its section is not found
-
-        Note:
-            This method assumes a specific format with sections delimited by:
-            <base_instructions>, <memory_blocks>, and <memory_metadata> tags.
-            For git-memory-enabled agents, <memory_filesystem> is used instead
-            of <memory_blocks> as the core memory delimiter.
-            The extraction is position-based and expects sections in this order.
-        """
        base_start = system_message.find("<base_instructions>")
        memory_blocks_start = system_message.find("<memory_blocks>")
        if memory_blocks_start == -1:
@@ -54,14 +47,39 @@ class ContextWindowCalculator:
        core_memory = ""
        external_memory_summary = ""

+        # Always extract metadata if present
+        if metadata_start != -1:
+            external_memory_summary = system_message[metadata_start:].strip()
+
+        # Preferred (legacy) parsing when tags are present
        if base_start != -1 and memory_blocks_start != -1:
            system_prompt = system_message[base_start:memory_blocks_start].strip()
-
        if memory_blocks_start != -1 and metadata_start != -1:
            core_memory = system_message[memory_blocks_start:metadata_start].strip()

-        if metadata_start != -1:
-            external_memory_summary = system_message[metadata_start:].strip()
+        # Fallback parsing for git-backed memory rendering (no <memory_blocks> wrapper)
+        if not core_memory and metadata_start != -1:
+            # Identify where the "memory" section begins.
+            candidates = []
+            for marker in (
+                "<memory_filesystem>",
+                "<system/",  # e.g. <system/human.md>
+                "<organization/",  # future-proofing
+            ):
+                pos = system_message.find(marker)
+                if pos != -1:
+                    candidates.append(pos)
+
+            # If <memory_blocks> is present but core_memory wasn't extracted (e.g. missing base tags),
+            # allow it as a candidate as well.
+            if memory_blocks_start != -1:
+                candidates.append(memory_blocks_start)
+
+            if candidates:
+                mem_start = min(candidates)
+                core_memory = system_message[mem_start:metadata_start].strip()
+                if not system_prompt:
+                    system_prompt = system_message[:mem_start].strip()

        return system_prompt, core_memory, external_memory_summary

--- a/tests/test_context_window_calculator.py
+++ b/tests/test_context_window_calculator.py
@@ -0,0 +1,54 @@
+import pytest
+
+from letta.services.context_window_calculator.context_window_calculator import ContextWindowCalculator
+
+
+def test_extract_system_components_git_backed_memory_without_memory_blocks_wrapper():
+    system_message = """You are some system prompt.
+
+<memory_filesystem>
+Memory Directory: ~/.letta/agents/agent-123/memory
+
+/memory/
+└── system/
+    └── human.md
+</memory_filesystem>
+
+<system/human.md>
+---
+description: test
+limit: 10
+---
+hello
+</system/human.md>
+
+<memory_metadata>
+- foo=bar
+</memory_metadata>
+"""
+
+    system_prompt, core_memory, external_memory_summary = ContextWindowCalculator.extract_system_components(system_message)
+
+    assert "You are some system prompt" in system_prompt
+    assert "<memory_filesystem>" in core_memory
+    assert "<system/human.md>" in core_memory
+    assert external_memory_summary.startswith("<memory_metadata>")
+
+
+def test_extract_system_components_legacy_memory_blocks_wrapper():
+    system_message = """<base_instructions>SYS</base_instructions>
+
+<memory_blocks>
+<persona>p</persona>
+</memory_blocks>
+
+<memory_metadata>
+- x=y
+</memory_metadata>
+"""
+
+    system_prompt, core_memory, external_memory_summary = ContextWindowCalculator.extract_system_components(system_message)
+
+    assert system_prompt.startswith("<base_instructions>")
+    assert core_memory.startswith("<memory_blocks>")
+    assert external_memory_summary.startswith("<memory_metadata>")
--- a/tests/test_memory.py
+++ b/tests/test_memory.py
@@ -223,3 +223,33 @@ def test_current_files_open_counts_truthy_only():
    m = Memory(agent_type=AgentType.react_agent, blocks=[], file_blocks=[fb1, fb2, fb3])
    out = m.compile(sources=[src], max_files_open=10)
    assert "- current_files_open=1" in out
+
+
+def test_compile_git_memory_filesystem_handles_leaf_directory_collisions():
+    """Git memory filesystem rendering should tolerate label prefix collisions.
+
+    Example collisions:
+    - leaf at "system" and children under "system/..."
+    - leaf at "system/human" and children under "system/human/..."
+
+    These occur naturally in git-backed memory where both index-like blocks and
+    nested blocks can exist.
+    """
+
+    m = Memory(
+        agent_type=AgentType.letta_v1_agent,
+        git_enabled=True,
+        blocks=[
+            Block(label="system", value="root", limit=100),
+            Block(label="system/human", value="human index", limit=100),
+            Block(label="system/human/context", value="context", limit=100),
+        ],
+    )
+
+    out = m.compile()
+
+    # Should include the filesystem view and not raise.
+    assert "<memory_filesystem>" in out
+    assert "system/" in out
+    assert "system.md" in out
+    assert "human.md" in out