feat(core): structure memory directory and block labels [LET-7336] (#9309)

This commit is contained in:
Sarah Wooders
2026-02-05 18:21:56 -08:00
committed by Caren Thomas
parent 3709be28dd
commit 21e880907f
11 changed files with 172 additions and 34 deletions

View File

@@ -38202,6 +38202,12 @@
"title": "Agent Type", "title": "Agent Type",
"description": "Agent type controlling prompt rendering." "description": "Agent type controlling prompt rendering."
}, },
"git_enabled": {
"type": "boolean",
"title": "Git Enabled",
"description": "Whether this agent uses git-backed memory with structured labels.",
"default": false
},
"blocks": { "blocks": {
"items": { "items": {
"$ref": "#/components/schemas/Block" "$ref": "#/components/schemas/Block"

View File

@@ -286,6 +286,7 @@ class Agent(SqlalchemyBase, OrganizationMixin, ProjectMixin, TemplateEntityMixin
is not None is not None
], ],
agent_type=self.agent_type, agent_type=self.agent_type,
git_enabled=any(t.tag == "git-memory-enabled" for t in self.tags),
), ),
"blocks": lambda: [b.to_pydantic() for b in self.core_memory], "blocks": lambda: [b.to_pydantic() for b in self.core_memory],
"identity_ids": lambda: [i.id for i in self.identities], "identity_ids": lambda: [i.id for i in self.identities],
@@ -418,7 +419,15 @@ class Agent(SqlalchemyBase, OrganizationMixin, ProjectMixin, TemplateEntityMixin
return None return None
# Only load requested relationships # Only load requested relationships
tags = self.awaitable_attrs.tags if "tags" in include_relationships or "agent.tags" in include_set else empty_list_async() # Always load tags when memory is requested, since git_enabled depends on them
tags = (
self.awaitable_attrs.tags
if "tags" in include_relationships
or "memory" in include_relationships
or "agent.tags" in include_set
or "agent.blocks" in include_set
else empty_list_async()
)
tools = self.awaitable_attrs.tools if "tools" in include_relationships or "agent.tools" in include_set else empty_list_async() tools = self.awaitable_attrs.tools if "tools" in include_relationships or "agent.tools" in include_set else empty_list_async()
sources = ( sources = (
self.awaitable_attrs.sources if "sources" in include_relationships or "agent.sources" in include_set else empty_list_async() self.awaitable_attrs.sources if "sources" in include_relationships or "agent.sources" in include_set else empty_list_async()
@@ -473,6 +482,7 @@ class Agent(SqlalchemyBase, OrganizationMixin, ProjectMixin, TemplateEntityMixin
if (block := b.to_pydantic_block(per_file_view_window_char_limit=self._get_per_file_view_window_char_limit())) is not None if (block := b.to_pydantic_block(per_file_view_window_char_limit=self._get_per_file_view_window_char_limit())) is not None
], ],
agent_type=self.agent_type, agent_type=self.agent_type,
git_enabled="git-memory-enabled" in state["tags"],
) )
state["blocks"] = [m.to_pydantic() for m in memory] state["blocks"] = [m.to_pydantic() for m in memory]
state["identity_ids"] = [i.id for i in identities] state["identity_ids"] = [i.id for i in identities]

View File

@@ -92,9 +92,9 @@ class Block(OrganizationMixin, SqlalchemyBase, ProjectMixin, TemplateEntityMixin
def to_pydantic(self) -> Type: def to_pydantic(self) -> Type:
match self.label: match self.label:
case "human": case "human" | "system/human":
Schema = Human Schema = Human
case "persona": case "persona" | "system/persona":
Schema = Persona Schema = Persona
case _: case _:
Schema = PydanticBlock Schema = PydanticBlock

View File

@@ -61,6 +61,7 @@ class Memory(BaseModel, validate_assignment=True):
""" """
agent_type: Optional[Union["AgentType", str]] = Field(None, description="Agent type controlling prompt rendering.") agent_type: Optional[Union["AgentType", str]] = Field(None, description="Agent type controlling prompt rendering.")
git_enabled: bool = Field(False, description="Whether this agent uses git-backed memory with structured labels.")
blocks: List[Block] = Field(..., description="Memory blocks contained in the agent's in-context memory") blocks: List[Block] = Field(..., description="Memory blocks contained in the agent's in-context memory")
file_blocks: List[FileBlock] = Field( file_blocks: List[FileBlock] = Field(
default_factory=list, description="Special blocks representing the agent's in-context memory of an attached file" default_factory=list, description="Special blocks representing the agent's in-context memory of an attached file"
@@ -106,16 +107,36 @@ class Memory(BaseModel, validate_assignment=True):
"""Deprecated. Async setter that stores the string but does not validate or use it.""" """Deprecated. Async setter that stores the string but does not validate or use it."""
self.prompt_template = prompt_template self.prompt_template = prompt_template
def _get_renderable_blocks(self) -> list:
"""Return blocks that should be rendered into <memory_blocks>.
For git-memory-enabled agents, only system/ blocks are rendered.
For standard agents, all blocks are rendered.
"""
if self.git_enabled:
return [b for b in self.blocks if b.label and b.label.startswith("system/")]
return list(self.blocks)
def _display_label(self, label: str) -> str:
"""Return the XML tag name for a block label.
For git-memory-enabled agents, strip the 'system/' prefix so
system/human renders as <human>.
"""
if self.git_enabled and label.startswith("system/"):
return label.removeprefix("system/")
return label
@trace_method @trace_method
def _render_memory_blocks_standard(self, s: StringIO): def _render_memory_blocks_standard(self, s: StringIO):
if len(self.blocks) == 0: renderable = self._get_renderable_blocks()
# s.write("<memory_blocks></memory_blocks>") # TODO: consider empty tags if len(renderable) == 0:
s.write("") s.write("")
return return
s.write("<memory_blocks>\nThe following memory blocks are currently engaged in your core memory unit:\n\n") s.write("<memory_blocks>\nThe following memory blocks are currently engaged in your core memory unit:\n\n")
for idx, block in enumerate(self.blocks): for idx, block in enumerate(renderable):
label = block.label or "block" label = self._display_label(block.label or "block")
value = block.value or "" value = block.value or ""
desc = block.description or "" desc = block.description or ""
chars_current = len(value) chars_current = len(value)
@@ -135,14 +156,15 @@ class Memory(BaseModel, validate_assignment=True):
s.write(f"{value}\n") s.write(f"{value}\n")
s.write("</value>\n") s.write("</value>\n")
s.write(f"</{label}>\n") s.write(f"</{label}>\n")
if idx != len(self.blocks) - 1: if idx != len(renderable) - 1:
s.write("\n") s.write("\n")
s.write("\n</memory_blocks>") s.write("\n</memory_blocks>")
def _render_memory_blocks_line_numbered(self, s: StringIO): def _render_memory_blocks_line_numbered(self, s: StringIO):
renderable = self._get_renderable_blocks()
s.write("<memory_blocks>\nThe following memory blocks are currently engaged in your core memory unit:\n\n") s.write("<memory_blocks>\nThe following memory blocks are currently engaged in your core memory unit:\n\n")
for idx, block in enumerate(self.blocks): for idx, block in enumerate(renderable):
label = block.label or "block" label = self._display_label(block.label or "block")
value = block.value or "" value = block.value or ""
desc = block.description or "" desc = block.description or ""
limit = block.limit if block.limit is not None else 0 limit = block.limit if block.limit is not None else 0
@@ -164,10 +186,55 @@ class Memory(BaseModel, validate_assignment=True):
s.write(f"{i}{line}\n") s.write(f"{i}{line}\n")
s.write("</value>\n") s.write("</value>\n")
s.write(f"</{label}>\n") s.write(f"</{label}>\n")
if idx != len(self.blocks) - 1: if idx != len(renderable) - 1:
s.write("\n") s.write("\n")
s.write("\n</memory_blocks>") s.write("\n</memory_blocks>")
def _render_memory_filesystem(self, s: StringIO):
"""Render a filesystem tree view of all memory blocks.
Only rendered for git-memory-enabled agents. Shows all blocks
(system and non-system) as a tree with char counts and descriptions.
"""
if not self.blocks:
return
# Build tree structure from block labels
# e.g. "system/human" -> {"system": {"human": block}}
# "organization" -> {"organization": block}
tree: dict = {}
for block in self.blocks:
label = block.label or "block"
parts = label.split("/")
node = tree
for part in parts[:-1]:
node = node.setdefault(part, {})
node[parts[-1]] = block
s.write("\n\n<memory_filesystem>\nmemory/\n")
def _render_tree(node: dict, indent: int = 1):
prefix = " " * indent
# Sort: directories first, then files
dirs = sorted(k for k, v in node.items() if isinstance(v, dict))
files = sorted(k for k, v in node.items() if not isinstance(v, dict))
for d in dirs:
s.write(f"{prefix}{d}/\n")
_render_tree(node[d], indent + 1)
for f in files:
block = node[f]
chars = len(block.value or "")
desc = block.description or ""
line = f"{prefix}{f}.md ({chars} chars)"
if desc:
line += f" - {desc}"
s.write(f"{line}\n")
_render_tree(tree)
s.write("</memory_filesystem>")
def _render_directories_common(self, s: StringIO, sources, max_files_open): def _render_directories_common(self, s: StringIO, sources, max_files_open):
s.write("\n\n<directories>\n") s.write("\n\n<directories>\n")
if max_files_open is not None: if max_files_open is not None:
@@ -291,6 +358,10 @@ class Memory(BaseModel, validate_assignment=True):
else: else:
self._render_memory_blocks_standard(s) self._render_memory_blocks_standard(s)
# For git-memory-enabled agents, render a filesystem tree of all blocks
if self.git_enabled:
self._render_memory_filesystem(s)
if tool_usage_rules is not None: if tool_usage_rules is not None:
desc = getattr(tool_usage_rules, "description", None) or "" desc = getattr(tool_usage_rules, "description", None) or ""
val = getattr(tool_usage_rules, "value", None) or "" val = getattr(tool_usage_rules, "value", None) or ""

View File

@@ -468,22 +468,25 @@ async def _sync_after_push(actor_id: str, agent_id: str) -> None:
expected_labels = set() expected_labels = set()
synced = 0 synced = 0
for file_path, content in files.items(): for file_path, content in files.items():
if not file_path.startswith("blocks/") or not file_path.endswith(".md"): if not file_path.startswith("memory/") or not file_path.endswith(".md"):
continue continue
label = file_path[len("blocks/") : -3] label = file_path[len("memory/") : -3]
expected_labels.add(label) expected_labels.add(label)
await _server_instance.block_manager._sync_block_to_postgres( try:
agent_id=agent_id, await _server_instance.block_manager._sync_block_to_postgres(
label=label, agent_id=agent_id,
value=content, label=label,
actor=actor, value=content,
) actor=actor,
synced += 1 )
logger.info("Synced block %s to PostgreSQL", label) synced += 1
logger.info("Synced block %s to PostgreSQL", label)
except Exception:
logger.exception("Failed to sync block %s to PostgreSQL (agent=%s)", label, agent_id)
if synced == 0: if synced == 0:
logger.warning("No blocks/*.md files found in repo HEAD during post-push sync (agent=%s)", agent_id) logger.warning("No memory/*.md files found in repo HEAD during post-push sync (agent=%s)", agent_id)
else: else:
# Detach blocks that were removed in git. # Detach blocks that were removed in git.
# #

View File

@@ -641,7 +641,21 @@ class SyncServer(object):
create_request = request create_request = request
if wants_git_memory: if wants_git_memory:
filtered_tags = [t for t in (request.tags or []) if t != GIT_MEMORY_ENABLED_TAG] filtered_tags = [t for t in (request.tags or []) if t != GIT_MEMORY_ENABLED_TAG]
create_request = request.model_copy(update={"tags": filtered_tags}) updates: dict = {"tags": filtered_tags}
# Transform block labels to path-based for git-memory agents.
# Blocks without a "/" prefix go under system/ (rendered in system prompt).
# e.g. "human" -> "system/human", "persona" -> "system/persona"
# Blocks with an explicit path (e.g. "notes/project") keep their label.
if request.memory_blocks:
transformed_blocks = []
for block in request.memory_blocks:
if "/" not in block.label:
block = block.model_copy(update={"label": f"system/{block.label}"})
transformed_blocks.append(block)
updates["memory_blocks"] = transformed_blocks
create_request = request.model_copy(update=updates)
log_event(name="start create_agent db") log_event(name="start create_agent db")
main_agent = await self.agent_manager.create_agent_async( main_agent = await self.agent_manager.create_agent_async(
@@ -653,9 +667,10 @@ class SyncServer(object):
# Enable git-backed memory (creates repo + commits initial blocks + adds tag) # Enable git-backed memory (creates repo + commits initial blocks + adds tag)
if wants_git_memory and isinstance(self.block_manager, GitEnabledBlockManager): if wants_git_memory and isinstance(self.block_manager, GitEnabledBlockManager):
await self.block_manager.enable_git_memory_for_agent(agent_id=main_agent.id, actor=actor) await self.block_manager.enable_git_memory_for_agent(agent_id=main_agent.id, actor=actor)
# Preserve the user's requested tags in the response model. # Preserve the user's requested tags and git_enabled flag in the response model.
try: try:
main_agent.tags = list(request.tags or []) main_agent.tags = list(request.tags or [])
main_agent.memory.git_enabled = True
except Exception: except Exception:
pass pass

View File

@@ -1674,6 +1674,7 @@ class AgentManager:
blocks=blocks, blocks=blocks,
file_blocks=agent_state.memory.file_blocks, file_blocks=agent_state.memory.file_blocks,
agent_type=agent_state.agent_type, agent_type=agent_state.agent_type,
git_enabled=agent_state.memory.git_enabled,
) )
# NOTE: don't do this since re-buildin the memory is handled at the start of the step # NOTE: don't do this since re-buildin the memory is handled at the start of the step

View File

@@ -115,15 +115,18 @@ class GitEnabledBlockManager(BlockManager):
block.limit = limit block.limit = limit
await block.update_async(db_session=session, actor=actor) await block.update_async(db_session=session, actor=actor)
else: else:
# Create new block # Create new block and link to agent in a single transaction
from letta.schemas.block import BaseBlock
block = BlockModel( block = BlockModel(
id=BaseBlock.generate_id(),
label=label, label=label,
value=value, value=value,
description=description or f"{label} block", description=description or f"{label} block",
limit=limit or 5000, limit=limit or 5000,
organization_id=actor.organization_id, organization_id=actor.organization_id,
) )
await block.create_async(db_session=session, actor=actor) await block.create_async(db_session=session, actor=actor, no_commit=True)
# Link to agent # Link to agent
from letta.orm.blocks_agents import BlocksAgents from letta.orm.blocks_agents import BlocksAgents
@@ -131,8 +134,10 @@ class GitEnabledBlockManager(BlockManager):
blocks_agents = BlocksAgents( blocks_agents = BlocksAgents(
agent_id=agent_id, agent_id=agent_id,
block_id=block.id, block_id=block.id,
block_label=label,
) )
session.add(blocks_agents) session.add(blocks_agents)
await session.commit()
return block.to_pydantic() return block.to_pydantic()
@@ -364,6 +369,17 @@ class GitEnabledBlockManager(BlockManager):
agent_id, agent_id,
) )
blocks = await self.get_blocks_by_agent_async(agent_id, actor) blocks = await self.get_blocks_by_agent_async(agent_id, actor)
# Ensure blocks have path-based labels before creating repo
for block in blocks:
if "/" not in block.label:
old_label = block.label
new_label = f"system/{block.label}"
async with db_registry.async_session() as session:
block_orm = await BlockModel.read_async(db_session=session, identifier=block.id, actor=actor)
block_orm.label = new_label
await session.commit()
block.label = new_label
logger.info(f"Transformed block label '{old_label}' -> '{new_label}' during backfill for agent {agent_id}")
await self.memory_repo_manager.create_repo_async( await self.memory_repo_manager.create_repo_async(
agent_id=agent_id, agent_id=agent_id,
actor=actor, actor=actor,
@@ -372,10 +388,24 @@ class GitEnabledBlockManager(BlockManager):
logger.info(f"Backfilled git repo for agent {agent_id} with {len(blocks)} blocks") logger.info(f"Backfilled git repo for agent {agent_id} with {len(blocks)} blocks")
return return
# Get current blocks for this agent # Get current blocks for this agent and transform labels to path-based.
# Flat labels (e.g. "human") become "system/human" for the git directory structure.
blocks = await self.get_blocks_by_agent_async(agent_id, actor) blocks = await self.get_blocks_by_agent_async(agent_id, actor)
for block in blocks:
if "/" not in block.label:
old_label = block.label
new_label = f"system/{block.label}"
logger.info(f"Transforming block label '{old_label}' -> '{new_label}' for agent {agent_id}")
# Create git repo with current blocks # Rename in PostgreSQL directly
async with db_registry.async_session() as session:
block_orm = await BlockModel.read_async(db_session=session, identifier=block.id, actor=actor)
block_orm.label = new_label
await session.commit()
block.label = new_label
# Create git repo with path-based blocks
await self.memory_repo_manager.create_repo_async( await self.memory_repo_manager.create_repo_async(
agent_id=agent_id, agent_id=agent_id,
actor=actor, actor=actor,
@@ -466,7 +496,7 @@ class GitEnabledBlockManager(BlockManager):
if self.memory_repo_manager is None: if self.memory_repo_manager is None:
raise ValueError("Memory repo manager not configured") raise ValueError("Memory repo manager not configured")
path = f"blocks/{label}.md" if label else None path = f"memory/{label}.md" if label else None
return await self.memory_repo_manager.get_history_async( return await self.memory_repo_manager.get_history_async(
agent_id=agent_id, agent_id=agent_id,
actor=actor, actor=actor,

View File

@@ -614,6 +614,7 @@ class ConversationManager:
blocks=memory_blocks, blocks=memory_blocks,
file_blocks=agent_state.memory.file_blocks, file_blocks=agent_state.memory.file_blocks,
agent_type=agent_state.memory.agent_type, agent_type=agent_state.memory.agent_type,
git_enabled=agent_state.memory.git_enabled,
) )
return agent_state return agent_state

View File

@@ -822,7 +822,7 @@ def get_column_names_from_includes_params(
include_relationships: Optional[List[str]] = None, includes: Optional[List[str]] = None include_relationships: Optional[List[str]] = None, includes: Optional[List[str]] = None
) -> Set[str]: ) -> Set[str]:
include_mapping = { include_mapping = {
"agent.blocks": ["core_memory", "file_agents"], "agent.blocks": ["core_memory", "file_agents", "tags"],
"agent.identities": ["identities"], "agent.identities": ["identities"],
"agent.managed_group": ["multi_agent_group"], "agent.managed_group": ["multi_agent_group"],
"agent.secrets": ["tool_exec_environment_variables"], "agent.secrets": ["tool_exec_environment_variables"],
@@ -830,7 +830,7 @@ def get_column_names_from_includes_params(
"agent.tags": ["tags"], "agent.tags": ["tags"],
"agent.tools": ["tools"], "agent.tools": ["tools"],
# legacy # legacy
"memory": ["core_memory", "file_agents"], "memory": ["core_memory", "file_agents", "tags"],
"identity_ids": ["identities"], "identity_ids": ["identities"],
"multi_agent_group": ["multi_agent_group"], "multi_agent_group": ["multi_agent_group"],
"tool_exec_environment_variables": ["tool_exec_environment_variables"], "tool_exec_environment_variables": ["tool_exec_environment_variables"],

View File

@@ -1738,7 +1738,7 @@ async def test_agent_state_schema_unchanged(server: SyncServer):
# Validate nested object schemas # Validate nested object schemas
# Memory schema # Memory schema
memory_fields = Memory.model_fields memory_fields = Memory.model_fields
expected_memory_fields = {"agent_type", "blocks", "file_blocks", "prompt_template"} expected_memory_fields = {"agent_type", "git_enabled", "blocks", "file_blocks", "prompt_template"}
actual_memory_fields = set(memory_fields.keys()) actual_memory_fields = set(memory_fields.keys())
if actual_memory_fields != expected_memory_fields: if actual_memory_fields != expected_memory_fields:
pytest.fail( pytest.fail(
@@ -1980,6 +1980,7 @@ async def test_agent_state_relationship_loads(server: SyncServer, default_user,
assert not agent_state.tools assert not agent_state.tools
# Test include_relationships override with specific relationships # Test include_relationships override with specific relationships
# Note: tags are always loaded alongside memory (needed for git_enabled)
agent_state = await server.agent_manager.get_agent_by_id_async( agent_state = await server.agent_manager.get_agent_by_id_async(
agent_id=created_agent.id, agent_id=created_agent.id,
actor=default_user, actor=default_user,
@@ -1987,7 +1988,7 @@ async def test_agent_state_relationship_loads(server: SyncServer, default_user,
) )
assert agent_state.blocks assert agent_state.blocks
assert agent_state.sources assert agent_state.sources
assert not agent_state.tags assert agent_state.tags # tags loaded with memory for git_enabled
assert not agent_state.tools assert not agent_state.tools
# Test include override with specific relationships # Test include override with specific relationships
@@ -1999,7 +2000,7 @@ async def test_agent_state_relationship_loads(server: SyncServer, default_user,
) )
assert agent_state.blocks assert agent_state.blocks
assert agent_state.sources assert agent_state.sources
assert not agent_state.tags assert agent_state.tags # tags loaded with blocks for git_enabled
assert not agent_state.tools assert not agent_state.tools