Fix git-memory context preview parsing (#9414)

* fix(core): handle git memory label prefix collisions in filesystem view

Prevent context window preview crashes when a block label is both a leaf and a prefix (e.g. system/human and system/human/context) by rendering a node as both file and directory. Add regression test.

👾 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

* fix(core): parse git-backed core memory in context window preview

ContextWindowCalculator.extract_system_components now detects git-backed memory rendering (<memory_filesystem> and <system/...> tags) when <memory_blocks> wrapper is absent, so core_memory is populated in the context preview. Add regression tests.

👾 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

---------

Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
Sarah Wooders
2026-02-10 15:17:00 -08:00
committed by Caren Thomas
parent ca32311b9a
commit 2ffef0fb31
4 changed files with 176 additions and 33 deletions

View File

@@ -237,24 +237,65 @@ class Memory(BaseModel, validate_assignment=True):
if not self.blocks:
return
# Build tree structure from block labels
# e.g. "system/human" -> {"system": {"human": block}}
# "organization" -> {"organization": block}
# Build tree structure from block labels.
#
# IMPORTANT: labels are path-like (e.g. "system/human"). In real filesystems a
# path component cannot be both a directory and a file, but our block namespace
# can contain collisions like:
# - "system" (a block)
# - "system/human" (a block under a virtual "system/" directory)
#
# When we detect a collision, we convert the would-be directory node into a
# dict and store the colliding leaf block under LEAF_KEY.
LEAF_KEY = "__block__"
tree: dict = {}
for block in self.blocks:
label = block.label or "block"
parts = label.split("/")
node = tree
parts = [p for p in label.split("/") if p]
if not parts:
parts = ["block"]
node: dict = tree
for part in parts[:-1]:
node = node.setdefault(part, {})
node[parts[-1]] = block
existing = node.get(part)
if existing is None:
node[part] = {}
elif not isinstance(existing, dict):
# Collision: leaf at `part` and now we need it to be a directory.
node[part] = {LEAF_KEY: existing}
node = node[part] # type: ignore[assignment]
leaf = parts[-1]
existing_leaf = node.get(leaf)
if existing_leaf is None:
node[leaf] = block
elif isinstance(existing_leaf, dict):
# Collision: directory at `leaf` already exists; attach the leaf block.
existing_leaf[LEAF_KEY] = block
else:
# Duplicate leaf label; last writer wins.
node[leaf] = block
s.write("\n\n<memory_filesystem>\n")
def _render_tree(node: dict, prefix: str = ""):
# Sort: directories first, then files
dirs = sorted(k for k, v in node.items() if isinstance(v, dict))
files = sorted(k for k, v in node.items() if not isinstance(v, dict))
# Sort: directories first, then files. If a node is both a directory and a
# leaf (LEAF_KEY present), show both <name>/ and <name>.md.
dirs = []
files = []
for name, val in node.items():
if name == LEAF_KEY:
continue
if isinstance(val, dict):
dirs.append(name)
if LEAF_KEY in val:
files.append(name)
else:
files.append(name)
dirs = sorted(dirs)
files = sorted(files)
entries = [(d, True) for d in dirs] + [(f, False) for f in files]
for i, (name, is_dir) in enumerate(entries):

View File

@@ -21,28 +21,21 @@ class ContextWindowCalculator:
@staticmethod
def extract_system_components(system_message: str) -> Tuple[str, str, str]:
"""Extract system prompt + core memory + metadata from a system message.
Historically, Letta system messages were formatted with:
- <base_instructions> ...
- <memory_blocks> ...
- <memory_metadata> ...
Git-backed memory agents do NOT wrap their rendered memory in <memory_blocks>.
Instead, the memory content typically begins with <memory_filesystem> followed
by file-like tags such as <system/human.md>...</system/human.md>.
This helper supports both formats so the context window preview can display
core memory for git-enabled agents.
"""
Extract structured components from a formatted system message.
Parses the system message to extract three distinct sections marked by XML-style tags:
- base_instructions: The core system prompt and agent instructions
- memory_blocks: The agent's core memory (persistent context)
- memory_metadata: Metadata about external memory systems
Args:
system_message: A formatted system message containing XML-style section markers
Returns:
A tuple of (system_prompt, core_memory, external_memory_summary)
Each component will be an empty string if its section is not found
Note:
This method assumes a specific format with sections delimited by:
<base_instructions>, <memory_blocks>, and <memory_metadata> tags.
For git-memory-enabled agents, <memory_filesystem> is used instead
of <memory_blocks> as the core memory delimiter.
The extraction is position-based and expects sections in this order.
"""
base_start = system_message.find("<base_instructions>")
memory_blocks_start = system_message.find("<memory_blocks>")
if memory_blocks_start == -1:
@@ -54,14 +47,39 @@ class ContextWindowCalculator:
core_memory = ""
external_memory_summary = ""
# Always extract metadata if present
if metadata_start != -1:
external_memory_summary = system_message[metadata_start:].strip()
# Preferred (legacy) parsing when tags are present
if base_start != -1 and memory_blocks_start != -1:
system_prompt = system_message[base_start:memory_blocks_start].strip()
if memory_blocks_start != -1 and metadata_start != -1:
core_memory = system_message[memory_blocks_start:metadata_start].strip()
if metadata_start != -1:
external_memory_summary = system_message[metadata_start:].strip()
# Fallback parsing for git-backed memory rendering (no <memory_blocks> wrapper)
if not core_memory and metadata_start != -1:
# Identify where the "memory" section begins.
candidates = []
for marker in (
"<memory_filesystem>",
"<system/", # e.g. <system/human.md>
"<organization/", # future-proofing
):
pos = system_message.find(marker)
if pos != -1:
candidates.append(pos)
# If <memory_blocks> is present but core_memory wasn't extracted (e.g. missing base tags),
# allow it as a candidate as well.
if memory_blocks_start != -1:
candidates.append(memory_blocks_start)
if candidates:
mem_start = min(candidates)
core_memory = system_message[mem_start:metadata_start].strip()
if not system_prompt:
system_prompt = system_message[:mem_start].strip()
return system_prompt, core_memory, external_memory_summary

View File

@@ -0,0 +1,54 @@
import pytest
from letta.services.context_window_calculator.context_window_calculator import ContextWindowCalculator
def test_extract_system_components_git_backed_memory_without_memory_blocks_wrapper():
system_message = """You are some system prompt.
<memory_filesystem>
Memory Directory: ~/.letta/agents/agent-123/memory
/memory/
└── system/
└── human.md
</memory_filesystem>
<system/human.md>
---
description: test
limit: 10
---
hello
</system/human.md>
<memory_metadata>
- foo=bar
</memory_metadata>
"""
system_prompt, core_memory, external_memory_summary = ContextWindowCalculator.extract_system_components(system_message)
assert "You are some system prompt" in system_prompt
assert "<memory_filesystem>" in core_memory
assert "<system/human.md>" in core_memory
assert external_memory_summary.startswith("<memory_metadata>")
def test_extract_system_components_legacy_memory_blocks_wrapper():
system_message = """<base_instructions>SYS</base_instructions>
<memory_blocks>
<persona>p</persona>
</memory_blocks>
<memory_metadata>
- x=y
</memory_metadata>
"""
system_prompt, core_memory, external_memory_summary = ContextWindowCalculator.extract_system_components(system_message)
assert system_prompt.startswith("<base_instructions>")
assert core_memory.startswith("<memory_blocks>")
assert external_memory_summary.startswith("<memory_metadata>")

View File

@@ -223,3 +223,33 @@ def test_current_files_open_counts_truthy_only():
m = Memory(agent_type=AgentType.react_agent, blocks=[], file_blocks=[fb1, fb2, fb3])
out = m.compile(sources=[src], max_files_open=10)
assert "- current_files_open=1" in out
def test_compile_git_memory_filesystem_handles_leaf_directory_collisions():
"""Git memory filesystem rendering should tolerate label prefix collisions.
Example collisions:
- leaf at "system" and children under "system/..."
- leaf at "system/human" and children under "system/human/..."
These occur naturally in git-backed memory where both index-like blocks and
nested blocks can exist.
"""
m = Memory(
agent_type=AgentType.letta_v1_agent,
git_enabled=True,
blocks=[
Block(label="system", value="root", limit=100),
Block(label="system/human", value="human index", limit=100),
Block(label="system/human/context", value="context", limit=100),
],
)
out = m.compile()
# Should include the filesystem view and not raise.
assert "<memory_filesystem>" in out
assert "system/" in out
assert "system.md" in out
assert "human.md" in out