Fix git-memory context preview parsing (#9414)
* fix(core): handle git memory label prefix collisions in filesystem view Prevent context window preview crashes when a block label is both a leaf and a prefix (e.g. system/human and system/human/context) by rendering a node as both file and directory. Add regression test. 👾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * fix(core): parse git-backed core memory in context window preview ContextWindowCalculator.extract_system_components now detects git-backed memory rendering (<memory_filesystem> and <system/...> tags) when <memory_blocks> wrapper is absent, so core_memory is populated in the context preview. Add regression tests. 👾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> --------- Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
committed by
Caren Thomas
parent
ca32311b9a
commit
2ffef0fb31
@@ -237,24 +237,65 @@ class Memory(BaseModel, validate_assignment=True):
|
||||
if not self.blocks:
|
||||
return
|
||||
|
||||
# Build tree structure from block labels
|
||||
# e.g. "system/human" -> {"system": {"human": block}}
|
||||
# "organization" -> {"organization": block}
|
||||
# Build tree structure from block labels.
|
||||
#
|
||||
# IMPORTANT: labels are path-like (e.g. "system/human"). In real filesystems a
|
||||
# path component cannot be both a directory and a file, but our block namespace
|
||||
# can contain collisions like:
|
||||
# - "system" (a block)
|
||||
# - "system/human" (a block under a virtual "system/" directory)
|
||||
#
|
||||
# When we detect a collision, we convert the would-be directory node into a
|
||||
# dict and store the colliding leaf block under LEAF_KEY.
|
||||
LEAF_KEY = "__block__"
|
||||
|
||||
tree: dict = {}
|
||||
for block in self.blocks:
|
||||
label = block.label or "block"
|
||||
parts = label.split("/")
|
||||
node = tree
|
||||
parts = [p for p in label.split("/") if p]
|
||||
if not parts:
|
||||
parts = ["block"]
|
||||
|
||||
node: dict = tree
|
||||
for part in parts[:-1]:
|
||||
node = node.setdefault(part, {})
|
||||
node[parts[-1]] = block
|
||||
existing = node.get(part)
|
||||
if existing is None:
|
||||
node[part] = {}
|
||||
elif not isinstance(existing, dict):
|
||||
# Collision: leaf at `part` and now we need it to be a directory.
|
||||
node[part] = {LEAF_KEY: existing}
|
||||
node = node[part] # type: ignore[assignment]
|
||||
|
||||
leaf = parts[-1]
|
||||
existing_leaf = node.get(leaf)
|
||||
if existing_leaf is None:
|
||||
node[leaf] = block
|
||||
elif isinstance(existing_leaf, dict):
|
||||
# Collision: directory at `leaf` already exists; attach the leaf block.
|
||||
existing_leaf[LEAF_KEY] = block
|
||||
else:
|
||||
# Duplicate leaf label; last writer wins.
|
||||
node[leaf] = block
|
||||
|
||||
s.write("\n\n<memory_filesystem>\n")
|
||||
|
||||
def _render_tree(node: dict, prefix: str = ""):
|
||||
# Sort: directories first, then files
|
||||
dirs = sorted(k for k, v in node.items() if isinstance(v, dict))
|
||||
files = sorted(k for k, v in node.items() if not isinstance(v, dict))
|
||||
# Sort: directories first, then files. If a node is both a directory and a
|
||||
# leaf (LEAF_KEY present), show both <name>/ and <name>.md.
|
||||
dirs = []
|
||||
files = []
|
||||
for name, val in node.items():
|
||||
if name == LEAF_KEY:
|
||||
continue
|
||||
if isinstance(val, dict):
|
||||
dirs.append(name)
|
||||
if LEAF_KEY in val:
|
||||
files.append(name)
|
||||
else:
|
||||
files.append(name)
|
||||
|
||||
dirs = sorted(dirs)
|
||||
files = sorted(files)
|
||||
entries = [(d, True) for d in dirs] + [(f, False) for f in files]
|
||||
|
||||
for i, (name, is_dir) in enumerate(entries):
|
||||
|
||||
@@ -21,28 +21,21 @@ class ContextWindowCalculator:
|
||||
|
||||
@staticmethod
|
||||
def extract_system_components(system_message: str) -> Tuple[str, str, str]:
|
||||
"""Extract system prompt + core memory + metadata from a system message.
|
||||
|
||||
Historically, Letta system messages were formatted with:
|
||||
- <base_instructions> ...
|
||||
- <memory_blocks> ...
|
||||
- <memory_metadata> ...
|
||||
|
||||
Git-backed memory agents do NOT wrap their rendered memory in <memory_blocks>.
|
||||
Instead, the memory content typically begins with <memory_filesystem> followed
|
||||
by file-like tags such as <system/human.md>...</system/human.md>.
|
||||
|
||||
This helper supports both formats so the context window preview can display
|
||||
core memory for git-enabled agents.
|
||||
"""
|
||||
Extract structured components from a formatted system message.
|
||||
|
||||
Parses the system message to extract three distinct sections marked by XML-style tags:
|
||||
- base_instructions: The core system prompt and agent instructions
|
||||
- memory_blocks: The agent's core memory (persistent context)
|
||||
- memory_metadata: Metadata about external memory systems
|
||||
|
||||
Args:
|
||||
system_message: A formatted system message containing XML-style section markers
|
||||
|
||||
Returns:
|
||||
A tuple of (system_prompt, core_memory, external_memory_summary)
|
||||
Each component will be an empty string if its section is not found
|
||||
|
||||
Note:
|
||||
This method assumes a specific format with sections delimited by:
|
||||
<base_instructions>, <memory_blocks>, and <memory_metadata> tags.
|
||||
For git-memory-enabled agents, <memory_filesystem> is used instead
|
||||
of <memory_blocks> as the core memory delimiter.
|
||||
The extraction is position-based and expects sections in this order.
|
||||
"""
|
||||
base_start = system_message.find("<base_instructions>")
|
||||
memory_blocks_start = system_message.find("<memory_blocks>")
|
||||
if memory_blocks_start == -1:
|
||||
@@ -54,14 +47,39 @@ class ContextWindowCalculator:
|
||||
core_memory = ""
|
||||
external_memory_summary = ""
|
||||
|
||||
# Always extract metadata if present
|
||||
if metadata_start != -1:
|
||||
external_memory_summary = system_message[metadata_start:].strip()
|
||||
|
||||
# Preferred (legacy) parsing when tags are present
|
||||
if base_start != -1 and memory_blocks_start != -1:
|
||||
system_prompt = system_message[base_start:memory_blocks_start].strip()
|
||||
|
||||
if memory_blocks_start != -1 and metadata_start != -1:
|
||||
core_memory = system_message[memory_blocks_start:metadata_start].strip()
|
||||
|
||||
if metadata_start != -1:
|
||||
external_memory_summary = system_message[metadata_start:].strip()
|
||||
# Fallback parsing for git-backed memory rendering (no <memory_blocks> wrapper)
|
||||
if not core_memory and metadata_start != -1:
|
||||
# Identify where the "memory" section begins.
|
||||
candidates = []
|
||||
for marker in (
|
||||
"<memory_filesystem>",
|
||||
"<system/", # e.g. <system/human.md>
|
||||
"<organization/", # future-proofing
|
||||
):
|
||||
pos = system_message.find(marker)
|
||||
if pos != -1:
|
||||
candidates.append(pos)
|
||||
|
||||
# If <memory_blocks> is present but core_memory wasn't extracted (e.g. missing base tags),
|
||||
# allow it as a candidate as well.
|
||||
if memory_blocks_start != -1:
|
||||
candidates.append(memory_blocks_start)
|
||||
|
||||
if candidates:
|
||||
mem_start = min(candidates)
|
||||
core_memory = system_message[mem_start:metadata_start].strip()
|
||||
if not system_prompt:
|
||||
system_prompt = system_message[:mem_start].strip()
|
||||
|
||||
return system_prompt, core_memory, external_memory_summary
|
||||
|
||||
|
||||
54
tests/test_context_window_calculator.py
Normal file
54
tests/test_context_window_calculator.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import pytest
|
||||
|
||||
from letta.services.context_window_calculator.context_window_calculator import ContextWindowCalculator
|
||||
|
||||
|
||||
def test_extract_system_components_git_backed_memory_without_memory_blocks_wrapper():
|
||||
system_message = """You are some system prompt.
|
||||
|
||||
<memory_filesystem>
|
||||
Memory Directory: ~/.letta/agents/agent-123/memory
|
||||
|
||||
/memory/
|
||||
└── system/
|
||||
└── human.md
|
||||
</memory_filesystem>
|
||||
|
||||
<system/human.md>
|
||||
---
|
||||
description: test
|
||||
limit: 10
|
||||
---
|
||||
hello
|
||||
</system/human.md>
|
||||
|
||||
<memory_metadata>
|
||||
- foo=bar
|
||||
</memory_metadata>
|
||||
"""
|
||||
|
||||
system_prompt, core_memory, external_memory_summary = ContextWindowCalculator.extract_system_components(system_message)
|
||||
|
||||
assert "You are some system prompt" in system_prompt
|
||||
assert "<memory_filesystem>" in core_memory
|
||||
assert "<system/human.md>" in core_memory
|
||||
assert external_memory_summary.startswith("<memory_metadata>")
|
||||
|
||||
|
||||
def test_extract_system_components_legacy_memory_blocks_wrapper():
|
||||
system_message = """<base_instructions>SYS</base_instructions>
|
||||
|
||||
<memory_blocks>
|
||||
<persona>p</persona>
|
||||
</memory_blocks>
|
||||
|
||||
<memory_metadata>
|
||||
- x=y
|
||||
</memory_metadata>
|
||||
"""
|
||||
|
||||
system_prompt, core_memory, external_memory_summary = ContextWindowCalculator.extract_system_components(system_message)
|
||||
|
||||
assert system_prompt.startswith("<base_instructions>")
|
||||
assert core_memory.startswith("<memory_blocks>")
|
||||
assert external_memory_summary.startswith("<memory_metadata>")
|
||||
@@ -223,3 +223,33 @@ def test_current_files_open_counts_truthy_only():
|
||||
m = Memory(agent_type=AgentType.react_agent, blocks=[], file_blocks=[fb1, fb2, fb3])
|
||||
out = m.compile(sources=[src], max_files_open=10)
|
||||
assert "- current_files_open=1" in out
|
||||
|
||||
|
||||
def test_compile_git_memory_filesystem_handles_leaf_directory_collisions():
|
||||
"""Git memory filesystem rendering should tolerate label prefix collisions.
|
||||
|
||||
Example collisions:
|
||||
- leaf at "system" and children under "system/..."
|
||||
- leaf at "system/human" and children under "system/human/..."
|
||||
|
||||
These occur naturally in git-backed memory where both index-like blocks and
|
||||
nested blocks can exist.
|
||||
"""
|
||||
|
||||
m = Memory(
|
||||
agent_type=AgentType.letta_v1_agent,
|
||||
git_enabled=True,
|
||||
blocks=[
|
||||
Block(label="system", value="root", limit=100),
|
||||
Block(label="system/human", value="human index", limit=100),
|
||||
Block(label="system/human/context", value="context", limit=100),
|
||||
],
|
||||
)
|
||||
|
||||
out = m.compile()
|
||||
|
||||
# Should include the filesystem view and not raise.
|
||||
assert "<memory_filesystem>" in out
|
||||
assert "system/" in out
|
||||
assert "system.md" in out
|
||||
assert "human.md" in out
|
||||
|
||||
Reference in New Issue
Block a user