feat(core): structure memory directory and block labels [LET-7336] (#9309)

This commit is contained in:
Sarah Wooders
2026-02-05 18:21:56 -08:00
committed by Caren Thomas
parent 3709be28dd
commit 21e880907f
11 changed files with 172 additions and 34 deletions

View File

@@ -38202,6 +38202,12 @@
"title": "Agent Type",
"description": "Agent type controlling prompt rendering."
},
"git_enabled": {
"type": "boolean",
"title": "Git Enabled",
"description": "Whether this agent uses git-backed memory with structured labels.",
"default": false
},
"blocks": {
"items": {
"$ref": "#/components/schemas/Block"

View File

@@ -286,6 +286,7 @@ class Agent(SqlalchemyBase, OrganizationMixin, ProjectMixin, TemplateEntityMixin
is not None
],
agent_type=self.agent_type,
git_enabled=any(t.tag == "git-memory-enabled" for t in self.tags),
),
"blocks": lambda: [b.to_pydantic() for b in self.core_memory],
"identity_ids": lambda: [i.id for i in self.identities],
@@ -418,7 +419,15 @@ class Agent(SqlalchemyBase, OrganizationMixin, ProjectMixin, TemplateEntityMixin
return None
# Only load requested relationships
tags = self.awaitable_attrs.tags if "tags" in include_relationships or "agent.tags" in include_set else empty_list_async()
# Always load tags when memory is requested, since git_enabled depends on them
tags = (
self.awaitable_attrs.tags
if "tags" in include_relationships
or "memory" in include_relationships
or "agent.tags" in include_set
or "agent.blocks" in include_set
else empty_list_async()
)
tools = self.awaitable_attrs.tools if "tools" in include_relationships or "agent.tools" in include_set else empty_list_async()
sources = (
self.awaitable_attrs.sources if "sources" in include_relationships or "agent.sources" in include_set else empty_list_async()
@@ -473,6 +482,7 @@ class Agent(SqlalchemyBase, OrganizationMixin, ProjectMixin, TemplateEntityMixin
if (block := b.to_pydantic_block(per_file_view_window_char_limit=self._get_per_file_view_window_char_limit())) is not None
],
agent_type=self.agent_type,
git_enabled="git-memory-enabled" in state["tags"],
)
state["blocks"] = [m.to_pydantic() for m in memory]
state["identity_ids"] = [i.id for i in identities]

View File

@@ -92,9 +92,9 @@ class Block(OrganizationMixin, SqlalchemyBase, ProjectMixin, TemplateEntityMixin
def to_pydantic(self) -> Type:
match self.label:
case "human":
case "human" | "system/human":
Schema = Human
case "persona":
case "persona" | "system/persona":
Schema = Persona
case _:
Schema = PydanticBlock

View File

@@ -61,6 +61,7 @@ class Memory(BaseModel, validate_assignment=True):
"""
agent_type: Optional[Union["AgentType", str]] = Field(None, description="Agent type controlling prompt rendering.")
git_enabled: bool = Field(False, description="Whether this agent uses git-backed memory with structured labels.")
blocks: List[Block] = Field(..., description="Memory blocks contained in the agent's in-context memory")
file_blocks: List[FileBlock] = Field(
default_factory=list, description="Special blocks representing the agent's in-context memory of an attached file"
@@ -106,16 +107,36 @@ class Memory(BaseModel, validate_assignment=True):
"""Deprecated. Async setter that stores the string but does not validate or use it."""
self.prompt_template = prompt_template
def _get_renderable_blocks(self) -> list:
"""Return blocks that should be rendered into <memory_blocks>.
For git-memory-enabled agents, only system/ blocks are rendered.
For standard agents, all blocks are rendered.
"""
if self.git_enabled:
return [b for b in self.blocks if b.label and b.label.startswith("system/")]
return list(self.blocks)
def _display_label(self, label: str) -> str:
"""Return the XML tag name for a block label.
For git-memory-enabled agents, strip the 'system/' prefix so
system/human renders as <human>.
"""
if self.git_enabled and label.startswith("system/"):
return label.removeprefix("system/")
return label
@trace_method
def _render_memory_blocks_standard(self, s: StringIO):
if len(self.blocks) == 0:
# s.write("<memory_blocks></memory_blocks>") # TODO: consider empty tags
renderable = self._get_renderable_blocks()
if len(renderable) == 0:
s.write("")
return
s.write("<memory_blocks>\nThe following memory blocks are currently engaged in your core memory unit:\n\n")
for idx, block in enumerate(self.blocks):
label = block.label or "block"
for idx, block in enumerate(renderable):
label = self._display_label(block.label or "block")
value = block.value or ""
desc = block.description or ""
chars_current = len(value)
@@ -135,14 +156,15 @@ class Memory(BaseModel, validate_assignment=True):
s.write(f"{value}\n")
s.write("</value>\n")
s.write(f"</{label}>\n")
if idx != len(self.blocks) - 1:
if idx != len(renderable) - 1:
s.write("\n")
s.write("\n</memory_blocks>")
def _render_memory_blocks_line_numbered(self, s: StringIO):
renderable = self._get_renderable_blocks()
s.write("<memory_blocks>\nThe following memory blocks are currently engaged in your core memory unit:\n\n")
for idx, block in enumerate(self.blocks):
label = block.label or "block"
for idx, block in enumerate(renderable):
label = self._display_label(block.label or "block")
value = block.value or ""
desc = block.description or ""
limit = block.limit if block.limit is not None else 0
@@ -164,10 +186,55 @@ class Memory(BaseModel, validate_assignment=True):
s.write(f"{i}{line}\n")
s.write("</value>\n")
s.write(f"</{label}>\n")
if idx != len(self.blocks) - 1:
if idx != len(renderable) - 1:
s.write("\n")
s.write("\n</memory_blocks>")
def _render_memory_filesystem(self, s: StringIO):
"""Render a filesystem tree view of all memory blocks.
Only rendered for git-memory-enabled agents. Shows all blocks
(system and non-system) as a tree with char counts and descriptions.
"""
if not self.blocks:
return
# Build tree structure from block labels
# e.g. "system/human" -> {"system": {"human": block}}
# "organization" -> {"organization": block}
tree: dict = {}
for block in self.blocks:
label = block.label or "block"
parts = label.split("/")
node = tree
for part in parts[:-1]:
node = node.setdefault(part, {})
node[parts[-1]] = block
s.write("\n\n<memory_filesystem>\nmemory/\n")
def _render_tree(node: dict, indent: int = 1):
prefix = " " * indent
# Sort: directories first, then files
dirs = sorted(k for k, v in node.items() if isinstance(v, dict))
files = sorted(k for k, v in node.items() if not isinstance(v, dict))
for d in dirs:
s.write(f"{prefix}{d}/\n")
_render_tree(node[d], indent + 1)
for f in files:
block = node[f]
chars = len(block.value or "")
desc = block.description or ""
line = f"{prefix}{f}.md ({chars} chars)"
if desc:
line += f" - {desc}"
s.write(f"{line}\n")
_render_tree(tree)
s.write("</memory_filesystem>")
def _render_directories_common(self, s: StringIO, sources, max_files_open):
s.write("\n\n<directories>\n")
if max_files_open is not None:
@@ -291,6 +358,10 @@ class Memory(BaseModel, validate_assignment=True):
else:
self._render_memory_blocks_standard(s)
# For git-memory-enabled agents, render a filesystem tree of all blocks
if self.git_enabled:
self._render_memory_filesystem(s)
if tool_usage_rules is not None:
desc = getattr(tool_usage_rules, "description", None) or ""
val = getattr(tool_usage_rules, "value", None) or ""

View File

@@ -468,22 +468,25 @@ async def _sync_after_push(actor_id: str, agent_id: str) -> None:
expected_labels = set()
synced = 0
for file_path, content in files.items():
if not file_path.startswith("blocks/") or not file_path.endswith(".md"):
if not file_path.startswith("memory/") or not file_path.endswith(".md"):
continue
label = file_path[len("blocks/") : -3]
label = file_path[len("memory/") : -3]
expected_labels.add(label)
await _server_instance.block_manager._sync_block_to_postgres(
agent_id=agent_id,
label=label,
value=content,
actor=actor,
)
synced += 1
logger.info("Synced block %s to PostgreSQL", label)
try:
await _server_instance.block_manager._sync_block_to_postgres(
agent_id=agent_id,
label=label,
value=content,
actor=actor,
)
synced += 1
logger.info("Synced block %s to PostgreSQL", label)
except Exception:
logger.exception("Failed to sync block %s to PostgreSQL (agent=%s)", label, agent_id)
if synced == 0:
logger.warning("No blocks/*.md files found in repo HEAD during post-push sync (agent=%s)", agent_id)
logger.warning("No memory/*.md files found in repo HEAD during post-push sync (agent=%s)", agent_id)
else:
# Detach blocks that were removed in git.
#

View File

@@ -641,7 +641,21 @@ class SyncServer(object):
create_request = request
if wants_git_memory:
filtered_tags = [t for t in (request.tags or []) if t != GIT_MEMORY_ENABLED_TAG]
create_request = request.model_copy(update={"tags": filtered_tags})
updates: dict = {"tags": filtered_tags}
# Transform block labels to path-based for git-memory agents.
# Blocks without a "/" prefix go under system/ (rendered in system prompt).
# e.g. "human" -> "system/human", "persona" -> "system/persona"
# Blocks with an explicit path (e.g. "notes/project") keep their label.
if request.memory_blocks:
transformed_blocks = []
for block in request.memory_blocks:
if "/" not in block.label:
block = block.model_copy(update={"label": f"system/{block.label}"})
transformed_blocks.append(block)
updates["memory_blocks"] = transformed_blocks
create_request = request.model_copy(update=updates)
log_event(name="start create_agent db")
main_agent = await self.agent_manager.create_agent_async(
@@ -653,9 +667,10 @@ class SyncServer(object):
# Enable git-backed memory (creates repo + commits initial blocks + adds tag)
if wants_git_memory and isinstance(self.block_manager, GitEnabledBlockManager):
await self.block_manager.enable_git_memory_for_agent(agent_id=main_agent.id, actor=actor)
# Preserve the user's requested tags in the response model.
# Preserve the user's requested tags and git_enabled flag in the response model.
try:
main_agent.tags = list(request.tags or [])
main_agent.memory.git_enabled = True
except Exception:
pass

View File

@@ -1674,6 +1674,7 @@ class AgentManager:
blocks=blocks,
file_blocks=agent_state.memory.file_blocks,
agent_type=agent_state.agent_type,
git_enabled=agent_state.memory.git_enabled,
)
# NOTE: don't do this since re-buildin the memory is handled at the start of the step

View File

@@ -115,15 +115,18 @@ class GitEnabledBlockManager(BlockManager):
block.limit = limit
await block.update_async(db_session=session, actor=actor)
else:
# Create new block
# Create new block and link to agent in a single transaction
from letta.schemas.block import BaseBlock
block = BlockModel(
id=BaseBlock.generate_id(),
label=label,
value=value,
description=description or f"{label} block",
limit=limit or 5000,
organization_id=actor.organization_id,
)
await block.create_async(db_session=session, actor=actor)
await block.create_async(db_session=session, actor=actor, no_commit=True)
# Link to agent
from letta.orm.blocks_agents import BlocksAgents
@@ -131,8 +134,10 @@ class GitEnabledBlockManager(BlockManager):
blocks_agents = BlocksAgents(
agent_id=agent_id,
block_id=block.id,
block_label=label,
)
session.add(blocks_agents)
await session.commit()
return block.to_pydantic()
@@ -364,6 +369,17 @@ class GitEnabledBlockManager(BlockManager):
agent_id,
)
blocks = await self.get_blocks_by_agent_async(agent_id, actor)
# Ensure blocks have path-based labels before creating repo
for block in blocks:
if "/" not in block.label:
old_label = block.label
new_label = f"system/{block.label}"
async with db_registry.async_session() as session:
block_orm = await BlockModel.read_async(db_session=session, identifier=block.id, actor=actor)
block_orm.label = new_label
await session.commit()
block.label = new_label
logger.info(f"Transformed block label '{old_label}' -> '{new_label}' during backfill for agent {agent_id}")
await self.memory_repo_manager.create_repo_async(
agent_id=agent_id,
actor=actor,
@@ -372,10 +388,24 @@ class GitEnabledBlockManager(BlockManager):
logger.info(f"Backfilled git repo for agent {agent_id} with {len(blocks)} blocks")
return
# Get current blocks for this agent
# Get current blocks for this agent and transform labels to path-based.
# Flat labels (e.g. "human") become "system/human" for the git directory structure.
blocks = await self.get_blocks_by_agent_async(agent_id, actor)
for block in blocks:
if "/" not in block.label:
old_label = block.label
new_label = f"system/{block.label}"
logger.info(f"Transforming block label '{old_label}' -> '{new_label}' for agent {agent_id}")
# Create git repo with current blocks
# Rename in PostgreSQL directly
async with db_registry.async_session() as session:
block_orm = await BlockModel.read_async(db_session=session, identifier=block.id, actor=actor)
block_orm.label = new_label
await session.commit()
block.label = new_label
# Create git repo with path-based blocks
await self.memory_repo_manager.create_repo_async(
agent_id=agent_id,
actor=actor,
@@ -466,7 +496,7 @@ class GitEnabledBlockManager(BlockManager):
if self.memory_repo_manager is None:
raise ValueError("Memory repo manager not configured")
path = f"blocks/{label}.md" if label else None
path = f"memory/{label}.md" if label else None
return await self.memory_repo_manager.get_history_async(
agent_id=agent_id,
actor=actor,

View File

@@ -614,6 +614,7 @@ class ConversationManager:
blocks=memory_blocks,
file_blocks=agent_state.memory.file_blocks,
agent_type=agent_state.memory.agent_type,
git_enabled=agent_state.memory.git_enabled,
)
return agent_state

View File

@@ -822,7 +822,7 @@ def get_column_names_from_includes_params(
include_relationships: Optional[List[str]] = None, includes: Optional[List[str]] = None
) -> Set[str]:
include_mapping = {
"agent.blocks": ["core_memory", "file_agents"],
"agent.blocks": ["core_memory", "file_agents", "tags"],
"agent.identities": ["identities"],
"agent.managed_group": ["multi_agent_group"],
"agent.secrets": ["tool_exec_environment_variables"],
@@ -830,7 +830,7 @@ def get_column_names_from_includes_params(
"agent.tags": ["tags"],
"agent.tools": ["tools"],
# legacy
"memory": ["core_memory", "file_agents"],
"memory": ["core_memory", "file_agents", "tags"],
"identity_ids": ["identities"],
"multi_agent_group": ["multi_agent_group"],
"tool_exec_environment_variables": ["tool_exec_environment_variables"],

View File

@@ -1738,7 +1738,7 @@ async def test_agent_state_schema_unchanged(server: SyncServer):
# Validate nested object schemas
# Memory schema
memory_fields = Memory.model_fields
expected_memory_fields = {"agent_type", "blocks", "file_blocks", "prompt_template"}
expected_memory_fields = {"agent_type", "git_enabled", "blocks", "file_blocks", "prompt_template"}
actual_memory_fields = set(memory_fields.keys())
if actual_memory_fields != expected_memory_fields:
pytest.fail(
@@ -1980,6 +1980,7 @@ async def test_agent_state_relationship_loads(server: SyncServer, default_user,
assert not agent_state.tools
# Test include_relationships override with specific relationships
# Note: tags are always loaded alongside memory (needed for git_enabled)
agent_state = await server.agent_manager.get_agent_by_id_async(
agent_id=created_agent.id,
actor=default_user,
@@ -1987,7 +1988,7 @@ async def test_agent_state_relationship_loads(server: SyncServer, default_user,
)
assert agent_state.blocks
assert agent_state.sources
assert not agent_state.tags
assert agent_state.tags # tags loaded with memory for git_enabled
assert not agent_state.tools
# Test include override with specific relationships
@@ -1999,7 +2000,7 @@ async def test_agent_state_relationship_loads(server: SyncServer, default_user,
)
assert agent_state.blocks
assert agent_state.sources
assert not agent_state.tags
assert agent_state.tags # tags loaded with blocks for git_enabled
assert not agent_state.tools