From 2ffef0fb31de1e69dadb5da050beacbe267bd836 Mon Sep 17 00:00:00 2001 From: Sarah Wooders Date: Tue, 10 Feb 2026 15:17:00 -0800 Subject: [PATCH] Fix git-memory context preview parsing (#9414) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(core): handle git memory label prefix collisions in filesystem view Prevent context window preview crashes when a block label is both a leaf and a prefix (e.g. system/human and system/human/context) by rendering a node as both file and directory. Add regression test. 👾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta * fix(core): parse git-backed core memory in context window preview ContextWindowCalculator.extract_system_components now detects git-backed memory rendering ( and tags) when wrapper is absent, so core_memory is populated in the context preview. Add regression tests. 👾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta --------- Co-authored-by: Letta --- letta/schemas/memory.py | 61 +++++++++++++++--- .../context_window_calculator.py | 64 ++++++++++++------- tests/test_context_window_calculator.py | 54 ++++++++++++++++ tests/test_memory.py | 30 +++++++++ 4 files changed, 176 insertions(+), 33 deletions(-) create mode 100644 tests/test_context_window_calculator.py diff --git a/letta/schemas/memory.py b/letta/schemas/memory.py index 79c9cb5f..496eb308 100644 --- a/letta/schemas/memory.py +++ b/letta/schemas/memory.py @@ -237,24 +237,65 @@ class Memory(BaseModel, validate_assignment=True): if not self.blocks: return - # Build tree structure from block labels - # e.g. "system/human" -> {"system": {"human": block}} - # "organization" -> {"organization": block} + # Build tree structure from block labels. + # + # IMPORTANT: labels are path-like (e.g. "system/human"). In real filesystems a + # path component cannot be both a directory and a file, but our block namespace + # can contain collisions like: + # - "system" (a block) + # - "system/human" (a block under a virtual "system/" directory) + # + # When we detect a collision, we convert the would-be directory node into a + # dict and store the colliding leaf block under LEAF_KEY. + LEAF_KEY = "__block__" + tree: dict = {} for block in self.blocks: label = block.label or "block" - parts = label.split("/") - node = tree + parts = [p for p in label.split("/") if p] + if not parts: + parts = ["block"] + + node: dict = tree for part in parts[:-1]: - node = node.setdefault(part, {}) - node[parts[-1]] = block + existing = node.get(part) + if existing is None: + node[part] = {} + elif not isinstance(existing, dict): + # Collision: leaf at `part` and now we need it to be a directory. + node[part] = {LEAF_KEY: existing} + node = node[part] # type: ignore[assignment] + + leaf = parts[-1] + existing_leaf = node.get(leaf) + if existing_leaf is None: + node[leaf] = block + elif isinstance(existing_leaf, dict): + # Collision: directory at `leaf` already exists; attach the leaf block. + existing_leaf[LEAF_KEY] = block + else: + # Duplicate leaf label; last writer wins. + node[leaf] = block s.write("\n\n\n") def _render_tree(node: dict, prefix: str = ""): - # Sort: directories first, then files - dirs = sorted(k for k, v in node.items() if isinstance(v, dict)) - files = sorted(k for k, v in node.items() if not isinstance(v, dict)) + # Sort: directories first, then files. If a node is both a directory and a + # leaf (LEAF_KEY present), show both / and .md. + dirs = [] + files = [] + for name, val in node.items(): + if name == LEAF_KEY: + continue + if isinstance(val, dict): + dirs.append(name) + if LEAF_KEY in val: + files.append(name) + else: + files.append(name) + + dirs = sorted(dirs) + files = sorted(files) entries = [(d, True) for d in dirs] + [(f, False) for f in files] for i, (name, is_dir) in enumerate(entries): diff --git a/letta/services/context_window_calculator/context_window_calculator.py b/letta/services/context_window_calculator/context_window_calculator.py index f8bb9ae3..b228b532 100644 --- a/letta/services/context_window_calculator/context_window_calculator.py +++ b/letta/services/context_window_calculator/context_window_calculator.py @@ -21,28 +21,21 @@ class ContextWindowCalculator: @staticmethod def extract_system_components(system_message: str) -> Tuple[str, str, str]: + """Extract system prompt + core memory + metadata from a system message. + + Historically, Letta system messages were formatted with: + - ... + - ... + - ... + + Git-backed memory agents do NOT wrap their rendered memory in . + Instead, the memory content typically begins with followed + by file-like tags such as .... + + This helper supports both formats so the context window preview can display + core memory for git-enabled agents. """ - Extract structured components from a formatted system message. - Parses the system message to extract three distinct sections marked by XML-style tags: - - base_instructions: The core system prompt and agent instructions - - memory_blocks: The agent's core memory (persistent context) - - memory_metadata: Metadata about external memory systems - - Args: - system_message: A formatted system message containing XML-style section markers - - Returns: - A tuple of (system_prompt, core_memory, external_memory_summary) - Each component will be an empty string if its section is not found - - Note: - This method assumes a specific format with sections delimited by: - , , and tags. - For git-memory-enabled agents, is used instead - of as the core memory delimiter. - The extraction is position-based and expects sections in this order. - """ base_start = system_message.find("") memory_blocks_start = system_message.find("") if memory_blocks_start == -1: @@ -54,14 +47,39 @@ class ContextWindowCalculator: core_memory = "" external_memory_summary = "" + # Always extract metadata if present + if metadata_start != -1: + external_memory_summary = system_message[metadata_start:].strip() + + # Preferred (legacy) parsing when tags are present if base_start != -1 and memory_blocks_start != -1: system_prompt = system_message[base_start:memory_blocks_start].strip() - if memory_blocks_start != -1 and metadata_start != -1: core_memory = system_message[memory_blocks_start:metadata_start].strip() - if metadata_start != -1: - external_memory_summary = system_message[metadata_start:].strip() + # Fallback parsing for git-backed memory rendering (no wrapper) + if not core_memory and metadata_start != -1: + # Identify where the "memory" section begins. + candidates = [] + for marker in ( + "", + " + " is present but core_memory wasn't extracted (e.g. missing base tags), + # allow it as a candidate as well. + if memory_blocks_start != -1: + candidates.append(memory_blocks_start) + + if candidates: + mem_start = min(candidates) + core_memory = system_message[mem_start:metadata_start].strip() + if not system_prompt: + system_prompt = system_message[:mem_start].strip() return system_prompt, core_memory, external_memory_summary diff --git a/tests/test_context_window_calculator.py b/tests/test_context_window_calculator.py new file mode 100644 index 00000000..de17a685 --- /dev/null +++ b/tests/test_context_window_calculator.py @@ -0,0 +1,54 @@ +import pytest + +from letta.services.context_window_calculator.context_window_calculator import ContextWindowCalculator + + +def test_extract_system_components_git_backed_memory_without_memory_blocks_wrapper(): + system_message = """You are some system prompt. + + +Memory Directory: ~/.letta/agents/agent-123/memory + +/memory/ +└── system/ + └── human.md + + + +--- +description: test +limit: 10 +--- +hello + + + +- foo=bar + +""" + + system_prompt, core_memory, external_memory_summary = ContextWindowCalculator.extract_system_components(system_message) + + assert "You are some system prompt" in system_prompt + assert "" in core_memory + assert "" in core_memory + assert external_memory_summary.startswith("") + + +def test_extract_system_components_legacy_memory_blocks_wrapper(): + system_message = """SYS + + +p + + + +- x=y + +""" + + system_prompt, core_memory, external_memory_summary = ContextWindowCalculator.extract_system_components(system_message) + + assert system_prompt.startswith("") + assert core_memory.startswith("") + assert external_memory_summary.startswith("") diff --git a/tests/test_memory.py b/tests/test_memory.py index 26df7e41..7c83bebe 100644 --- a/tests/test_memory.py +++ b/tests/test_memory.py @@ -223,3 +223,33 @@ def test_current_files_open_counts_truthy_only(): m = Memory(agent_type=AgentType.react_agent, blocks=[], file_blocks=[fb1, fb2, fb3]) out = m.compile(sources=[src], max_files_open=10) assert "- current_files_open=1" in out + + +def test_compile_git_memory_filesystem_handles_leaf_directory_collisions(): + """Git memory filesystem rendering should tolerate label prefix collisions. + + Example collisions: + - leaf at "system" and children under "system/..." + - leaf at "system/human" and children under "system/human/..." + + These occur naturally in git-backed memory where both index-like blocks and + nested blocks can exist. + """ + + m = Memory( + agent_type=AgentType.letta_v1_agent, + git_enabled=True, + blocks=[ + Block(label="system", value="root", limit=100), + Block(label="system/human", value="human index", limit=100), + Block(label="system/human/context", value="context", limit=100), + ], + ) + + out = m.compile() + + # Should include the filesystem view and not raise. + assert "" in out + assert "system/" in out + assert "system.md" in out + assert "human.md" in out