feat(core): store block metadata as YAML frontmatter in .md files (#9365)

* feat(core): store block metadata as YAML frontmatter in .md files

Block .md files in git repos now embed metadata (description, limit,
read_only, metadata dict) as YAML frontmatter instead of a separate
metadata/blocks.json file. Only non-default values are rendered.

Format:
  ---
  description: "Who I am"
  limit: 5000
  ---
  Block value content here...

Changes:
- New block_markdown.py utility (serialize_block / parse_block_markdown)
- Updated all three write/read paths: manager.py, memfs_client.py,
  memfs_client_base.py
- block_manager_git.py now passes description/limit/read_only/metadata
  through to git commits
- Post-push sync (git_http.py) parses frontmatter and syncs metadata
  fields to Postgres
- Removed metadata/blocks.json reads/writes entirely
- Backward compat: files without frontmatter treated as raw value
- Integration test verifies frontmatter in cloned files and metadata
  sync via git push

🐾 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

* fix: derive frontmatter defaults from BaseBlock schema, not hardcoded dict

Remove _DEFAULTS dict from block_markdown.py. The core version now
imports BaseBlock and reads field defaults via model_fields. This
fixes the limit default (was 5000, should be CORE_MEMORY_BLOCK_CHAR_LIMIT=20000).

Also:
- memfs-py copy simplified to parse-only (no serialize, no letta imports)
- All hardcoded limit=5000 fallbacks replaced with CORE_MEMORY_BLOCK_CHAR_LIMIT
- Test updated: blocks with all-default metadata correctly have no frontmatter;
  frontmatter verified after setting non-default description via API

🐾 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

* fix: always include description and limit in frontmatter

description and limit are always rendered in the YAML frontmatter,
even when at their default values. Only read_only and metadata are
conditional (omitted when at defaults).

🐾 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

* fix: resolve read_only from block_update before git commit

read_only was using the old Postgres value instead of the update value
when committing to git. Also adds integration test coverage for
read_only: true appearing in frontmatter after API PATCH, and
verifying it's omitted when false (default).

🐾 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

* test: add API→git round-trip coverage for description and limit

Verifies that PATCH description/limit via API is reflected in
frontmatter after git pull. Combined with the existing push→API
test (step 6), this gives full bidirectional coverage:
- API edit description/limit → pull → frontmatter updated
- Push frontmatter with description/limit → API reflects changes

🐾 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

---------

Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
Sarah Wooders
2026-02-06 11:59:16 -08:00
committed by Caren Thomas
parent 9dee331e6c
commit 369cdf72c7
4 changed files with 186 additions and 97 deletions

View File

@@ -494,6 +494,8 @@ async def _sync_after_push(actor_id: str, agent_id: str) -> None:
logger.exception("Failed to read repo files after %d retries (agent=%s)", max_retries, agent_id)
expected_labels = set()
from letta.services.memory_repo.block_markdown import parse_block_markdown
synced = 0
for file_path, content in files.items():
if not file_path.startswith("memory/") or not file_path.endswith(".md"):
@@ -501,12 +503,20 @@ async def _sync_after_push(actor_id: str, agent_id: str) -> None:
label = file_path[len("memory/") : -3]
expected_labels.add(label)
# Parse frontmatter to extract metadata alongside value
parsed = parse_block_markdown(content)
try:
await _server_instance.block_manager._sync_block_to_postgres(
agent_id=agent_id,
label=label,
value=content,
value=parsed["value"],
actor=actor,
description=parsed.get("description"),
limit=parsed.get("limit"),
read_only=parsed.get("read_only"),
metadata=parsed.get("metadata"),
)
synced += 1
logger.info("Synced block %s to PostgreSQL", label)

View File

@@ -11,6 +11,7 @@ import json
import time
from typing import List, Optional
from letta.constants import CORE_MEMORY_BLOCK_CHAR_LIMIT
from letta.log import get_logger
from letta.orm.block import Block as BlockModel
from letta.otel.tracing import trace_method
@@ -87,6 +88,8 @@ class GitEnabledBlockManager(BlockManager):
actor: PydanticUser,
description: Optional[str] = None,
limit: Optional[int] = None,
read_only: Optional[bool] = None,
metadata: Optional[dict] = None,
) -> PydanticBlock:
"""Sync a block from git to PostgreSQL cache."""
async with db_registry.async_session() as session:
@@ -113,6 +116,10 @@ class GitEnabledBlockManager(BlockManager):
block.description = description
if limit is not None:
block.limit = limit
if read_only is not None:
block.read_only = read_only
if metadata is not None:
block.metadata_ = metadata
await block.update_async(db_session=session, actor=actor)
else:
# Create new block and link to agent in a single transaction
@@ -123,7 +130,9 @@ class GitEnabledBlockManager(BlockManager):
label=label,
value=value,
description=description or f"{label} block",
limit=limit or 5000,
limit=limit or CORE_MEMORY_BLOCK_CHAR_LIMIT,
read_only=read_only or False,
metadata_=metadata or {},
organization_id=actor.organization_id,
)
await block.create_async(db_session=session, actor=actor, no_commit=True)
@@ -206,17 +215,28 @@ class GitEnabledBlockManager(BlockManager):
logger.info(f"[GIT_PERF] BlockModel.read_async took {(time.perf_counter() - t0) * 1000:.2f}ms label={label}")
# 1. Commit to git (source of truth)
if block_update.value is not None:
t0 = time.perf_counter()
commit = await self.memory_repo_manager.update_block_async(
agent_id=agent_id,
label=label,
value=block_update.value,
actor=actor,
message=f"Update {label} block",
)
git_time = (time.perf_counter() - t0) * 1000
logger.info(f"[GIT_PERF] memory_repo_manager.update_block_async took {git_time:.2f}ms commit={commit.sha[:8]}")
# Resolve each field: use the update value if provided, else fall back
# to the current block value from Postgres.
resolved_value = block_update.value if block_update.value is not None else block.value
resolved_description = block_update.description if block_update.description is not None else block.description
resolved_limit = block_update.limit if block_update.limit is not None else block.limit
resolved_read_only = block_update.read_only if block_update.read_only is not None else block.read_only
resolved_metadata = block_update.metadata if block_update.metadata is not None else (block.metadata_ or {})
t0 = time.perf_counter()
commit = await self.memory_repo_manager.update_block_async(
agent_id=agent_id,
label=label,
value=resolved_value,
actor=actor,
message=f"Update {label} block",
description=resolved_description,
limit=resolved_limit,
read_only=resolved_read_only,
metadata=resolved_metadata,
)
git_time = (time.perf_counter() - t0) * 1000
logger.info(f"[GIT_PERF] memory_repo_manager.update_block_async took {git_time:.2f}ms commit={commit.sha[:8]}")
# 2. Sync to PostgreSQL cache
t0 = time.perf_counter()
@@ -283,7 +303,7 @@ class GitEnabledBlockManager(BlockManager):
label=block.label,
value=block.value,
description=block.description,
limit=block.limit or 5000,
limit=block.limit or CORE_MEMORY_BLOCK_CHAR_LIMIT,
),
actor=actor,
message=f"Create {block.label} block",

View File

@@ -0,0 +1,99 @@
"""Serialize and parse block data as Markdown with YAML frontmatter.
File format:
---
description: "Who I am and how I approach work"
limit: 20000
---
My name is Memo. I'm a stateful coding assistant...
- Frontmatter fields are only rendered when they differ from defaults.
- Files without frontmatter are treated as value-only (backward compat).
"""
from typing import Any, Dict, Optional
import yaml
from letta.schemas.block import BaseBlock
def _get_field_default(field_name: str) -> Any:
"""Get the default value for a BaseBlock field."""
field = BaseBlock.model_fields[field_name]
return field.default
def serialize_block(
value: str,
*,
description: Optional[str] = None,
limit: Optional[int] = None,
read_only: bool = False,
metadata: Optional[dict] = None,
) -> str:
"""Serialize a block to Markdown with optional YAML frontmatter.
Only non-default fields are included in the frontmatter.
If all fields are at their defaults, no frontmatter is emitted.
"""
# description and limit are always included in frontmatter.
# read_only and metadata are only included when non-default.
front: Dict[str, Any] = {}
front["description"] = description
front["limit"] = limit if limit is not None else _get_field_default("limit")
if read_only != _get_field_default("read_only"):
front["read_only"] = read_only
if metadata and metadata != _get_field_default("metadata"):
front["metadata"] = metadata
# Use block style for cleaner YAML, default_flow_style=False
yaml_str = yaml.dump(front, default_flow_style=False, sort_keys=False, allow_unicode=True).rstrip("\n")
return f"---\n{yaml_str}\n---\n{value}"
def parse_block_markdown(content: str) -> Dict[str, Any]:
"""Parse a Markdown file into block fields.
Returns a dict with:
- "value": the body content after frontmatter
- "description", "limit", "read_only", "metadata": from frontmatter (if present)
If no frontmatter is detected, the entire content is treated as the value
(backward compat with old repos that stored raw values).
"""
if not content.startswith("---\n"):
return {"value": content}
# Find the closing --- delimiter
end_idx = content.find("\n---\n", 4)
if end_idx == -1:
# No closing delimiter — treat entire content as value
return {"value": content}
yaml_str = content[4:end_idx]
body = content[end_idx + 5 :] # skip past \n---\n
try:
front = yaml.safe_load(yaml_str)
except yaml.YAMLError:
# Malformed YAML — treat entire content as value
return {"value": content}
if not isinstance(front, dict):
return {"value": content}
result: Dict[str, Any] = {"value": body}
if "description" in front:
result["description"] = front["description"]
if "limit" in front:
result["limit"] = front["limit"]
if "read_only" in front:
result["read_only"] = front["read_only"]
if "metadata" in front:
result["metadata"] = front["metadata"]
return result

View File

@@ -9,16 +9,17 @@ HTTP service instead.
"""
import hashlib
import json
import os
import uuid
from typing import Dict, List, Optional
from letta.constants import CORE_MEMORY_BLOCK_CHAR_LIMIT
from letta.log import get_logger
from letta.otel.tracing import trace_method
from letta.schemas.block import Block as PydanticBlock
from letta.schemas.memory_repo import MemoryCommit
from letta.schemas.user import User as PydanticUser
from letta.services.memory_repo.block_markdown import parse_block_markdown, serialize_block
from letta.services.memory_repo.git_operations import GitOperations
from letta.services.memory_repo.storage.local import LocalStorageBackend
from letta.utils import enforce_types
@@ -27,7 +28,6 @@ logger = get_logger(__name__)
# File paths within the memory repository
MEMORY_DIR = "memory"
METADATA_FILE = "metadata/blocks.json"
# Default local storage path
DEFAULT_LOCAL_PATH = os.path.expanduser("~/.letta/memfs")
@@ -84,20 +84,18 @@ class MemfsClient:
initial_blocks = initial_blocks or []
org_id = actor.organization_id
# Build initial files from blocks
# Build initial files from blocks (frontmatter embeds metadata)
initial_files = {}
metadata = {"blocks": {}}
for block in initial_blocks:
file_path = f"{MEMORY_DIR}/{block.label}.md"
initial_files[file_path] = block.value or ""
metadata["blocks"][block.label] = {
"description": block.description,
"limit": block.limit,
}
if metadata["blocks"]:
initial_files[METADATA_FILE] = json.dumps(metadata, indent=2)
initial_files[file_path] = serialize_block(
value=block.value or "",
description=block.description,
limit=block.limit,
read_only=block.read_only,
metadata=block.metadata,
)
return await self.git.create_repo(
agent_id=agent_id,
@@ -136,33 +134,24 @@ class MemfsClient:
except FileNotFoundError:
return []
# Parse metadata
metadata: dict = {}
if METADATA_FILE in files:
try:
metadata_json = json.loads(files[METADATA_FILE])
if isinstance(metadata_json, dict):
metadata = metadata_json.get("blocks", {}) or {}
except json.JSONDecodeError:
logger.warning(f"Failed to parse metadata for agent {agent_id}")
# Convert block files to PydanticBlock
# Convert block files to PydanticBlock (metadata is in frontmatter)
blocks = []
for file_path, content in files.items():
if file_path.startswith(f"{MEMORY_DIR}/") and file_path.endswith(".md"):
label = file_path[len(f"{MEMORY_DIR}/") : -3]
block_meta = metadata.get(label, {})
# Generate deterministic UUID-style ID from agent_id + label
parsed = parse_block_markdown(content)
synthetic_uuid = uuid.UUID(hashlib.md5(f"{agent_id}:{label}".encode()).hexdigest())
blocks.append(
PydanticBlock(
id=f"block-{synthetic_uuid}",
label=label,
value=content,
description=block_meta.get("description"),
limit=block_meta.get("limit", 5000),
metadata=block_meta.get("metadata", {}),
value=parsed["value"],
description=parsed.get("description"),
limit=parsed.get("limit", CORE_MEMORY_BLOCK_CHAR_LIMIT),
read_only=parsed.get("read_only", False),
metadata=parsed.get("metadata", {}),
)
)
@@ -220,6 +209,11 @@ class MemfsClient:
value: str,
actor: PydanticUser,
message: Optional[str] = None,
*,
description: Optional[str] = None,
limit: Optional[int] = None,
read_only: bool = False,
metadata: Optional[dict] = None,
) -> MemoryCommit:
"""Update a memory block.
@@ -229,6 +223,10 @@ class MemfsClient:
value: New block value
actor: User performing the operation
message: Optional commit message
description: Block description (for frontmatter)
limit: Block character limit (for frontmatter)
read_only: Block read-only flag (for frontmatter)
metadata: Block metadata dict (for frontmatter)
Returns:
Commit details
@@ -238,12 +236,19 @@ class MemfsClient:
await self._ensure_repo_exists(agent_id, actor)
file_path = f"{MEMORY_DIR}/{label}.md"
file_content = serialize_block(
value=value,
description=description,
limit=limit,
read_only=read_only,
metadata=metadata,
)
commit_message = message or f"Update {label}"
return await self.git.commit(
agent_id=agent_id,
org_id=actor.organization_id,
changes=[FileChange(path=file_path, content=value, change_type="modify")],
changes=[FileChange(path=file_path, content=file_content, change_type="modify")],
message=commit_message,
author_name=f"User {actor.id}",
author_email=f"{actor.id}@letta.ai",
@@ -274,40 +279,20 @@ class MemfsClient:
await self._ensure_repo_exists(agent_id, actor)
org_id = actor.organization_id
# Get current metadata
try:
files = await self.git.get_files(agent_id, org_id)
except FileNotFoundError:
files = {}
file_content = serialize_block(
value=block.value or "",
description=block.description,
limit=block.limit,
read_only=block.read_only,
metadata=block.metadata,
)
metadata = {"blocks": {}}
if METADATA_FILE in files:
try:
raw_metadata = json.loads(files[METADATA_FILE])
if isinstance(raw_metadata, dict) and isinstance(raw_metadata.get("blocks"), dict):
metadata = raw_metadata
except json.JSONDecodeError:
pass
# Add new block metadata
metadata["blocks"][block.label] = {
"description": block.description,
"limit": block.limit,
"metadata": block.metadata or {},
}
# Prepare changes
changes = [
FileChange(
path=f"{MEMORY_DIR}/{block.label}.md",
content=block.value,
content=file_content,
change_type="add",
),
FileChange(
path=METADATA_FILE,
content=json.dumps(metadata, indent=2),
change_type="modify",
),
]
commit_message = message or f"Create block {block.label}"
@@ -346,37 +331,12 @@ class MemfsClient:
await self._ensure_repo_exists(agent_id, actor)
org_id = actor.organization_id
# Get current metadata
try:
files = await self.git.get_files(agent_id, org_id)
except FileNotFoundError:
files = {}
metadata = {"blocks": {}}
if METADATA_FILE in files:
try:
raw_metadata = json.loads(files[METADATA_FILE])
if isinstance(raw_metadata, dict) and isinstance(raw_metadata.get("blocks"), dict):
metadata = raw_metadata
except json.JSONDecodeError:
pass
# Remove block from metadata
if label in metadata["blocks"]:
del metadata["blocks"][label]
# Prepare changes
changes = [
FileChange(
path=f"{MEMORY_DIR}/{label}.md",
content=None,
change_type="delete",
),
FileChange(
path=METADATA_FILE,
content=json.dumps(metadata, indent=2),
change_type="modify",
),
]
commit_message = message or f"Delete block {label}"