feat(core): store block metadata as YAML frontmatter in .md files (#9365)
* feat(core): store block metadata as YAML frontmatter in .md files Block .md files in git repos now embed metadata (description, limit, read_only, metadata dict) as YAML frontmatter instead of a separate metadata/blocks.json file. Only non-default values are rendered. Format: --- description: "Who I am" limit: 5000 --- Block value content here... Changes: - New block_markdown.py utility (serialize_block / parse_block_markdown) - Updated all three write/read paths: manager.py, memfs_client.py, memfs_client_base.py - block_manager_git.py now passes description/limit/read_only/metadata through to git commits - Post-push sync (git_http.py) parses frontmatter and syncs metadata fields to Postgres - Removed metadata/blocks.json reads/writes entirely - Backward compat: files without frontmatter treated as raw value - Integration test verifies frontmatter in cloned files and metadata sync via git push 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * fix: derive frontmatter defaults from BaseBlock schema, not hardcoded dict Remove _DEFAULTS dict from block_markdown.py. The core version now imports BaseBlock and reads field defaults via model_fields. This fixes the limit default (was 5000, should be CORE_MEMORY_BLOCK_CHAR_LIMIT=20000). Also: - memfs-py copy simplified to parse-only (no serialize, no letta imports) - All hardcoded limit=5000 fallbacks replaced with CORE_MEMORY_BLOCK_CHAR_LIMIT - Test updated: blocks with all-default metadata correctly have no frontmatter; frontmatter verified after setting non-default description via API 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * fix: always include description and limit in frontmatter description and limit are always rendered in the YAML frontmatter, even when at their default values. Only read_only and metadata are conditional (omitted when at defaults). 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * fix: resolve read_only from block_update before git commit read_only was using the old Postgres value instead of the update value when committing to git. Also adds integration test coverage for read_only: true appearing in frontmatter after API PATCH, and verifying it's omitted when false (default). 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * test: add API→git round-trip coverage for description and limit Verifies that PATCH description/limit via API is reflected in frontmatter after git pull. Combined with the existing push→API test (step 6), this gives full bidirectional coverage: - API edit description/limit → pull → frontmatter updated - Push frontmatter with description/limit → API reflects changes 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> --------- Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
committed by
Caren Thomas
parent
9dee331e6c
commit
369cdf72c7
@@ -494,6 +494,8 @@ async def _sync_after_push(actor_id: str, agent_id: str) -> None:
|
||||
logger.exception("Failed to read repo files after %d retries (agent=%s)", max_retries, agent_id)
|
||||
|
||||
expected_labels = set()
|
||||
from letta.services.memory_repo.block_markdown import parse_block_markdown
|
||||
|
||||
synced = 0
|
||||
for file_path, content in files.items():
|
||||
if not file_path.startswith("memory/") or not file_path.endswith(".md"):
|
||||
@@ -501,12 +503,20 @@ async def _sync_after_push(actor_id: str, agent_id: str) -> None:
|
||||
|
||||
label = file_path[len("memory/") : -3]
|
||||
expected_labels.add(label)
|
||||
|
||||
# Parse frontmatter to extract metadata alongside value
|
||||
parsed = parse_block_markdown(content)
|
||||
|
||||
try:
|
||||
await _server_instance.block_manager._sync_block_to_postgres(
|
||||
agent_id=agent_id,
|
||||
label=label,
|
||||
value=content,
|
||||
value=parsed["value"],
|
||||
actor=actor,
|
||||
description=parsed.get("description"),
|
||||
limit=parsed.get("limit"),
|
||||
read_only=parsed.get("read_only"),
|
||||
metadata=parsed.get("metadata"),
|
||||
)
|
||||
synced += 1
|
||||
logger.info("Synced block %s to PostgreSQL", label)
|
||||
|
||||
@@ -11,6 +11,7 @@ import json
|
||||
import time
|
||||
from typing import List, Optional
|
||||
|
||||
from letta.constants import CORE_MEMORY_BLOCK_CHAR_LIMIT
|
||||
from letta.log import get_logger
|
||||
from letta.orm.block import Block as BlockModel
|
||||
from letta.otel.tracing import trace_method
|
||||
@@ -87,6 +88,8 @@ class GitEnabledBlockManager(BlockManager):
|
||||
actor: PydanticUser,
|
||||
description: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
read_only: Optional[bool] = None,
|
||||
metadata: Optional[dict] = None,
|
||||
) -> PydanticBlock:
|
||||
"""Sync a block from git to PostgreSQL cache."""
|
||||
async with db_registry.async_session() as session:
|
||||
@@ -113,6 +116,10 @@ class GitEnabledBlockManager(BlockManager):
|
||||
block.description = description
|
||||
if limit is not None:
|
||||
block.limit = limit
|
||||
if read_only is not None:
|
||||
block.read_only = read_only
|
||||
if metadata is not None:
|
||||
block.metadata_ = metadata
|
||||
await block.update_async(db_session=session, actor=actor)
|
||||
else:
|
||||
# Create new block and link to agent in a single transaction
|
||||
@@ -123,7 +130,9 @@ class GitEnabledBlockManager(BlockManager):
|
||||
label=label,
|
||||
value=value,
|
||||
description=description or f"{label} block",
|
||||
limit=limit or 5000,
|
||||
limit=limit or CORE_MEMORY_BLOCK_CHAR_LIMIT,
|
||||
read_only=read_only or False,
|
||||
metadata_=metadata or {},
|
||||
organization_id=actor.organization_id,
|
||||
)
|
||||
await block.create_async(db_session=session, actor=actor, no_commit=True)
|
||||
@@ -206,17 +215,28 @@ class GitEnabledBlockManager(BlockManager):
|
||||
logger.info(f"[GIT_PERF] BlockModel.read_async took {(time.perf_counter() - t0) * 1000:.2f}ms label={label}")
|
||||
|
||||
# 1. Commit to git (source of truth)
|
||||
if block_update.value is not None:
|
||||
t0 = time.perf_counter()
|
||||
commit = await self.memory_repo_manager.update_block_async(
|
||||
agent_id=agent_id,
|
||||
label=label,
|
||||
value=block_update.value,
|
||||
actor=actor,
|
||||
message=f"Update {label} block",
|
||||
)
|
||||
git_time = (time.perf_counter() - t0) * 1000
|
||||
logger.info(f"[GIT_PERF] memory_repo_manager.update_block_async took {git_time:.2f}ms commit={commit.sha[:8]}")
|
||||
# Resolve each field: use the update value if provided, else fall back
|
||||
# to the current block value from Postgres.
|
||||
resolved_value = block_update.value if block_update.value is not None else block.value
|
||||
resolved_description = block_update.description if block_update.description is not None else block.description
|
||||
resolved_limit = block_update.limit if block_update.limit is not None else block.limit
|
||||
resolved_read_only = block_update.read_only if block_update.read_only is not None else block.read_only
|
||||
resolved_metadata = block_update.metadata if block_update.metadata is not None else (block.metadata_ or {})
|
||||
|
||||
t0 = time.perf_counter()
|
||||
commit = await self.memory_repo_manager.update_block_async(
|
||||
agent_id=agent_id,
|
||||
label=label,
|
||||
value=resolved_value,
|
||||
actor=actor,
|
||||
message=f"Update {label} block",
|
||||
description=resolved_description,
|
||||
limit=resolved_limit,
|
||||
read_only=resolved_read_only,
|
||||
metadata=resolved_metadata,
|
||||
)
|
||||
git_time = (time.perf_counter() - t0) * 1000
|
||||
logger.info(f"[GIT_PERF] memory_repo_manager.update_block_async took {git_time:.2f}ms commit={commit.sha[:8]}")
|
||||
|
||||
# 2. Sync to PostgreSQL cache
|
||||
t0 = time.perf_counter()
|
||||
@@ -283,7 +303,7 @@ class GitEnabledBlockManager(BlockManager):
|
||||
label=block.label,
|
||||
value=block.value,
|
||||
description=block.description,
|
||||
limit=block.limit or 5000,
|
||||
limit=block.limit or CORE_MEMORY_BLOCK_CHAR_LIMIT,
|
||||
),
|
||||
actor=actor,
|
||||
message=f"Create {block.label} block",
|
||||
|
||||
99
letta/services/memory_repo/block_markdown.py
Normal file
99
letta/services/memory_repo/block_markdown.py
Normal file
@@ -0,0 +1,99 @@
|
||||
"""Serialize and parse block data as Markdown with YAML frontmatter.
|
||||
|
||||
File format:
|
||||
---
|
||||
description: "Who I am and how I approach work"
|
||||
limit: 20000
|
||||
---
|
||||
My name is Memo. I'm a stateful coding assistant...
|
||||
|
||||
- Frontmatter fields are only rendered when they differ from defaults.
|
||||
- Files without frontmatter are treated as value-only (backward compat).
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import yaml
|
||||
|
||||
from letta.schemas.block import BaseBlock
|
||||
|
||||
|
||||
def _get_field_default(field_name: str) -> Any:
|
||||
"""Get the default value for a BaseBlock field."""
|
||||
field = BaseBlock.model_fields[field_name]
|
||||
return field.default
|
||||
|
||||
|
||||
def serialize_block(
|
||||
value: str,
|
||||
*,
|
||||
description: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
read_only: bool = False,
|
||||
metadata: Optional[dict] = None,
|
||||
) -> str:
|
||||
"""Serialize a block to Markdown with optional YAML frontmatter.
|
||||
|
||||
Only non-default fields are included in the frontmatter.
|
||||
If all fields are at their defaults, no frontmatter is emitted.
|
||||
"""
|
||||
# description and limit are always included in frontmatter.
|
||||
# read_only and metadata are only included when non-default.
|
||||
front: Dict[str, Any] = {}
|
||||
|
||||
front["description"] = description
|
||||
front["limit"] = limit if limit is not None else _get_field_default("limit")
|
||||
|
||||
if read_only != _get_field_default("read_only"):
|
||||
front["read_only"] = read_only
|
||||
if metadata and metadata != _get_field_default("metadata"):
|
||||
front["metadata"] = metadata
|
||||
|
||||
# Use block style for cleaner YAML, default_flow_style=False
|
||||
yaml_str = yaml.dump(front, default_flow_style=False, sort_keys=False, allow_unicode=True).rstrip("\n")
|
||||
return f"---\n{yaml_str}\n---\n{value}"
|
||||
|
||||
|
||||
def parse_block_markdown(content: str) -> Dict[str, Any]:
|
||||
"""Parse a Markdown file into block fields.
|
||||
|
||||
Returns a dict with:
|
||||
- "value": the body content after frontmatter
|
||||
- "description", "limit", "read_only", "metadata": from frontmatter (if present)
|
||||
|
||||
If no frontmatter is detected, the entire content is treated as the value
|
||||
(backward compat with old repos that stored raw values).
|
||||
"""
|
||||
if not content.startswith("---\n"):
|
||||
return {"value": content}
|
||||
|
||||
# Find the closing --- delimiter
|
||||
end_idx = content.find("\n---\n", 4)
|
||||
if end_idx == -1:
|
||||
# No closing delimiter — treat entire content as value
|
||||
return {"value": content}
|
||||
|
||||
yaml_str = content[4:end_idx]
|
||||
body = content[end_idx + 5 :] # skip past \n---\n
|
||||
|
||||
try:
|
||||
front = yaml.safe_load(yaml_str)
|
||||
except yaml.YAMLError:
|
||||
# Malformed YAML — treat entire content as value
|
||||
return {"value": content}
|
||||
|
||||
if not isinstance(front, dict):
|
||||
return {"value": content}
|
||||
|
||||
result: Dict[str, Any] = {"value": body}
|
||||
|
||||
if "description" in front:
|
||||
result["description"] = front["description"]
|
||||
if "limit" in front:
|
||||
result["limit"] = front["limit"]
|
||||
if "read_only" in front:
|
||||
result["read_only"] = front["read_only"]
|
||||
if "metadata" in front:
|
||||
result["metadata"] = front["metadata"]
|
||||
|
||||
return result
|
||||
@@ -9,16 +9,17 @@ HTTP service instead.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import uuid
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from letta.constants import CORE_MEMORY_BLOCK_CHAR_LIMIT
|
||||
from letta.log import get_logger
|
||||
from letta.otel.tracing import trace_method
|
||||
from letta.schemas.block import Block as PydanticBlock
|
||||
from letta.schemas.memory_repo import MemoryCommit
|
||||
from letta.schemas.user import User as PydanticUser
|
||||
from letta.services.memory_repo.block_markdown import parse_block_markdown, serialize_block
|
||||
from letta.services.memory_repo.git_operations import GitOperations
|
||||
from letta.services.memory_repo.storage.local import LocalStorageBackend
|
||||
from letta.utils import enforce_types
|
||||
@@ -27,7 +28,6 @@ logger = get_logger(__name__)
|
||||
|
||||
# File paths within the memory repository
|
||||
MEMORY_DIR = "memory"
|
||||
METADATA_FILE = "metadata/blocks.json"
|
||||
|
||||
# Default local storage path
|
||||
DEFAULT_LOCAL_PATH = os.path.expanduser("~/.letta/memfs")
|
||||
@@ -84,20 +84,18 @@ class MemfsClient:
|
||||
initial_blocks = initial_blocks or []
|
||||
org_id = actor.organization_id
|
||||
|
||||
# Build initial files from blocks
|
||||
# Build initial files from blocks (frontmatter embeds metadata)
|
||||
initial_files = {}
|
||||
metadata = {"blocks": {}}
|
||||
|
||||
for block in initial_blocks:
|
||||
file_path = f"{MEMORY_DIR}/{block.label}.md"
|
||||
initial_files[file_path] = block.value or ""
|
||||
metadata["blocks"][block.label] = {
|
||||
"description": block.description,
|
||||
"limit": block.limit,
|
||||
}
|
||||
|
||||
if metadata["blocks"]:
|
||||
initial_files[METADATA_FILE] = json.dumps(metadata, indent=2)
|
||||
initial_files[file_path] = serialize_block(
|
||||
value=block.value or "",
|
||||
description=block.description,
|
||||
limit=block.limit,
|
||||
read_only=block.read_only,
|
||||
metadata=block.metadata,
|
||||
)
|
||||
|
||||
return await self.git.create_repo(
|
||||
agent_id=agent_id,
|
||||
@@ -136,33 +134,24 @@ class MemfsClient:
|
||||
except FileNotFoundError:
|
||||
return []
|
||||
|
||||
# Parse metadata
|
||||
metadata: dict = {}
|
||||
if METADATA_FILE in files:
|
||||
try:
|
||||
metadata_json = json.loads(files[METADATA_FILE])
|
||||
if isinstance(metadata_json, dict):
|
||||
metadata = metadata_json.get("blocks", {}) or {}
|
||||
except json.JSONDecodeError:
|
||||
logger.warning(f"Failed to parse metadata for agent {agent_id}")
|
||||
|
||||
# Convert block files to PydanticBlock
|
||||
# Convert block files to PydanticBlock (metadata is in frontmatter)
|
||||
blocks = []
|
||||
for file_path, content in files.items():
|
||||
if file_path.startswith(f"{MEMORY_DIR}/") and file_path.endswith(".md"):
|
||||
label = file_path[len(f"{MEMORY_DIR}/") : -3]
|
||||
block_meta = metadata.get(label, {})
|
||||
|
||||
# Generate deterministic UUID-style ID from agent_id + label
|
||||
parsed = parse_block_markdown(content)
|
||||
|
||||
synthetic_uuid = uuid.UUID(hashlib.md5(f"{agent_id}:{label}".encode()).hexdigest())
|
||||
blocks.append(
|
||||
PydanticBlock(
|
||||
id=f"block-{synthetic_uuid}",
|
||||
label=label,
|
||||
value=content,
|
||||
description=block_meta.get("description"),
|
||||
limit=block_meta.get("limit", 5000),
|
||||
metadata=block_meta.get("metadata", {}),
|
||||
value=parsed["value"],
|
||||
description=parsed.get("description"),
|
||||
limit=parsed.get("limit", CORE_MEMORY_BLOCK_CHAR_LIMIT),
|
||||
read_only=parsed.get("read_only", False),
|
||||
metadata=parsed.get("metadata", {}),
|
||||
)
|
||||
)
|
||||
|
||||
@@ -220,6 +209,11 @@ class MemfsClient:
|
||||
value: str,
|
||||
actor: PydanticUser,
|
||||
message: Optional[str] = None,
|
||||
*,
|
||||
description: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
read_only: bool = False,
|
||||
metadata: Optional[dict] = None,
|
||||
) -> MemoryCommit:
|
||||
"""Update a memory block.
|
||||
|
||||
@@ -229,6 +223,10 @@ class MemfsClient:
|
||||
value: New block value
|
||||
actor: User performing the operation
|
||||
message: Optional commit message
|
||||
description: Block description (for frontmatter)
|
||||
limit: Block character limit (for frontmatter)
|
||||
read_only: Block read-only flag (for frontmatter)
|
||||
metadata: Block metadata dict (for frontmatter)
|
||||
|
||||
Returns:
|
||||
Commit details
|
||||
@@ -238,12 +236,19 @@ class MemfsClient:
|
||||
await self._ensure_repo_exists(agent_id, actor)
|
||||
|
||||
file_path = f"{MEMORY_DIR}/{label}.md"
|
||||
file_content = serialize_block(
|
||||
value=value,
|
||||
description=description,
|
||||
limit=limit,
|
||||
read_only=read_only,
|
||||
metadata=metadata,
|
||||
)
|
||||
commit_message = message or f"Update {label}"
|
||||
|
||||
return await self.git.commit(
|
||||
agent_id=agent_id,
|
||||
org_id=actor.organization_id,
|
||||
changes=[FileChange(path=file_path, content=value, change_type="modify")],
|
||||
changes=[FileChange(path=file_path, content=file_content, change_type="modify")],
|
||||
message=commit_message,
|
||||
author_name=f"User {actor.id}",
|
||||
author_email=f"{actor.id}@letta.ai",
|
||||
@@ -274,40 +279,20 @@ class MemfsClient:
|
||||
await self._ensure_repo_exists(agent_id, actor)
|
||||
org_id = actor.organization_id
|
||||
|
||||
# Get current metadata
|
||||
try:
|
||||
files = await self.git.get_files(agent_id, org_id)
|
||||
except FileNotFoundError:
|
||||
files = {}
|
||||
file_content = serialize_block(
|
||||
value=block.value or "",
|
||||
description=block.description,
|
||||
limit=block.limit,
|
||||
read_only=block.read_only,
|
||||
metadata=block.metadata,
|
||||
)
|
||||
|
||||
metadata = {"blocks": {}}
|
||||
if METADATA_FILE in files:
|
||||
try:
|
||||
raw_metadata = json.loads(files[METADATA_FILE])
|
||||
if isinstance(raw_metadata, dict) and isinstance(raw_metadata.get("blocks"), dict):
|
||||
metadata = raw_metadata
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Add new block metadata
|
||||
metadata["blocks"][block.label] = {
|
||||
"description": block.description,
|
||||
"limit": block.limit,
|
||||
"metadata": block.metadata or {},
|
||||
}
|
||||
|
||||
# Prepare changes
|
||||
changes = [
|
||||
FileChange(
|
||||
path=f"{MEMORY_DIR}/{block.label}.md",
|
||||
content=block.value,
|
||||
content=file_content,
|
||||
change_type="add",
|
||||
),
|
||||
FileChange(
|
||||
path=METADATA_FILE,
|
||||
content=json.dumps(metadata, indent=2),
|
||||
change_type="modify",
|
||||
),
|
||||
]
|
||||
|
||||
commit_message = message or f"Create block {block.label}"
|
||||
@@ -346,37 +331,12 @@ class MemfsClient:
|
||||
await self._ensure_repo_exists(agent_id, actor)
|
||||
org_id = actor.organization_id
|
||||
|
||||
# Get current metadata
|
||||
try:
|
||||
files = await self.git.get_files(agent_id, org_id)
|
||||
except FileNotFoundError:
|
||||
files = {}
|
||||
|
||||
metadata = {"blocks": {}}
|
||||
if METADATA_FILE in files:
|
||||
try:
|
||||
raw_metadata = json.loads(files[METADATA_FILE])
|
||||
if isinstance(raw_metadata, dict) and isinstance(raw_metadata.get("blocks"), dict):
|
||||
metadata = raw_metadata
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Remove block from metadata
|
||||
if label in metadata["blocks"]:
|
||||
del metadata["blocks"][label]
|
||||
|
||||
# Prepare changes
|
||||
changes = [
|
||||
FileChange(
|
||||
path=f"{MEMORY_DIR}/{label}.md",
|
||||
content=None,
|
||||
change_type="delete",
|
||||
),
|
||||
FileChange(
|
||||
path=METADATA_FILE,
|
||||
content=json.dumps(metadata, indent=2),
|
||||
change_type="modify",
|
||||
),
|
||||
]
|
||||
|
||||
commit_message = message or f"Delete block {label}"
|
||||
|
||||
Reference in New Issue
Block a user