From 369cdf72c7bc6e21f876cf72ac591b30ce56ff10 Mon Sep 17 00:00:00 2001 From: Sarah Wooders Date: Fri, 6 Feb 2026 11:59:16 -0800 Subject: [PATCH] feat(core): store block metadata as YAML frontmatter in .md files (#9365) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(core): store block metadata as YAML frontmatter in .md files Block .md files in git repos now embed metadata (description, limit, read_only, metadata dict) as YAML frontmatter instead of a separate metadata/blocks.json file. Only non-default values are rendered. Format: --- description: "Who I am" limit: 5000 --- Block value content here... Changes: - New block_markdown.py utility (serialize_block / parse_block_markdown) - Updated all three write/read paths: manager.py, memfs_client.py, memfs_client_base.py - block_manager_git.py now passes description/limit/read_only/metadata through to git commits - Post-push sync (git_http.py) parses frontmatter and syncs metadata fields to Postgres - Removed metadata/blocks.json reads/writes entirely - Backward compat: files without frontmatter treated as raw value - Integration test verifies frontmatter in cloned files and metadata sync via git push 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta * fix: derive frontmatter defaults from BaseBlock schema, not hardcoded dict Remove _DEFAULTS dict from block_markdown.py. The core version now imports BaseBlock and reads field defaults via model_fields. This fixes the limit default (was 5000, should be CORE_MEMORY_BLOCK_CHAR_LIMIT=20000). Also: - memfs-py copy simplified to parse-only (no serialize, no letta imports) - All hardcoded limit=5000 fallbacks replaced with CORE_MEMORY_BLOCK_CHAR_LIMIT - Test updated: blocks with all-default metadata correctly have no frontmatter; frontmatter verified after setting non-default description via API 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta * fix: always include description and limit in frontmatter description and limit are always rendered in the YAML frontmatter, even when at their default values. Only read_only and metadata are conditional (omitted when at defaults). 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta * fix: resolve read_only from block_update before git commit read_only was using the old Postgres value instead of the update value when committing to git. Also adds integration test coverage for read_only: true appearing in frontmatter after API PATCH, and verifying it's omitted when false (default). 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta * test: add API→git round-trip coverage for description and limit Verifies that PATCH description/limit via API is reflected in frontmatter after git pull. Combined with the existing push→API test (step 6), this gives full bidirectional coverage: - API edit description/limit → pull → frontmatter updated - Push frontmatter with description/limit → API reflects changes 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta --------- Co-authored-by: Letta --- letta/server/rest_api/routers/v1/git_http.py | 12 +- letta/services/block_manager_git.py | 46 +++++-- letta/services/memory_repo/block_markdown.py | 99 ++++++++++++++ .../services/memory_repo/memfs_client_base.py | 126 ++++++------------ 4 files changed, 186 insertions(+), 97 deletions(-) create mode 100644 letta/services/memory_repo/block_markdown.py diff --git a/letta/server/rest_api/routers/v1/git_http.py b/letta/server/rest_api/routers/v1/git_http.py index 559efeea..db8cd658 100644 --- a/letta/server/rest_api/routers/v1/git_http.py +++ b/letta/server/rest_api/routers/v1/git_http.py @@ -494,6 +494,8 @@ async def _sync_after_push(actor_id: str, agent_id: str) -> None: logger.exception("Failed to read repo files after %d retries (agent=%s)", max_retries, agent_id) expected_labels = set() + from letta.services.memory_repo.block_markdown import parse_block_markdown + synced = 0 for file_path, content in files.items(): if not file_path.startswith("memory/") or not file_path.endswith(".md"): @@ -501,12 +503,20 @@ async def _sync_after_push(actor_id: str, agent_id: str) -> None: label = file_path[len("memory/") : -3] expected_labels.add(label) + + # Parse frontmatter to extract metadata alongside value + parsed = parse_block_markdown(content) + try: await _server_instance.block_manager._sync_block_to_postgres( agent_id=agent_id, label=label, - value=content, + value=parsed["value"], actor=actor, + description=parsed.get("description"), + limit=parsed.get("limit"), + read_only=parsed.get("read_only"), + metadata=parsed.get("metadata"), ) synced += 1 logger.info("Synced block %s to PostgreSQL", label) diff --git a/letta/services/block_manager_git.py b/letta/services/block_manager_git.py index 43277ac2..22d97999 100644 --- a/letta/services/block_manager_git.py +++ b/letta/services/block_manager_git.py @@ -11,6 +11,7 @@ import json import time from typing import List, Optional +from letta.constants import CORE_MEMORY_BLOCK_CHAR_LIMIT from letta.log import get_logger from letta.orm.block import Block as BlockModel from letta.otel.tracing import trace_method @@ -87,6 +88,8 @@ class GitEnabledBlockManager(BlockManager): actor: PydanticUser, description: Optional[str] = None, limit: Optional[int] = None, + read_only: Optional[bool] = None, + metadata: Optional[dict] = None, ) -> PydanticBlock: """Sync a block from git to PostgreSQL cache.""" async with db_registry.async_session() as session: @@ -113,6 +116,10 @@ class GitEnabledBlockManager(BlockManager): block.description = description if limit is not None: block.limit = limit + if read_only is not None: + block.read_only = read_only + if metadata is not None: + block.metadata_ = metadata await block.update_async(db_session=session, actor=actor) else: # Create new block and link to agent in a single transaction @@ -123,7 +130,9 @@ class GitEnabledBlockManager(BlockManager): label=label, value=value, description=description or f"{label} block", - limit=limit or 5000, + limit=limit or CORE_MEMORY_BLOCK_CHAR_LIMIT, + read_only=read_only or False, + metadata_=metadata or {}, organization_id=actor.organization_id, ) await block.create_async(db_session=session, actor=actor, no_commit=True) @@ -206,17 +215,28 @@ class GitEnabledBlockManager(BlockManager): logger.info(f"[GIT_PERF] BlockModel.read_async took {(time.perf_counter() - t0) * 1000:.2f}ms label={label}") # 1. Commit to git (source of truth) - if block_update.value is not None: - t0 = time.perf_counter() - commit = await self.memory_repo_manager.update_block_async( - agent_id=agent_id, - label=label, - value=block_update.value, - actor=actor, - message=f"Update {label} block", - ) - git_time = (time.perf_counter() - t0) * 1000 - logger.info(f"[GIT_PERF] memory_repo_manager.update_block_async took {git_time:.2f}ms commit={commit.sha[:8]}") + # Resolve each field: use the update value if provided, else fall back + # to the current block value from Postgres. + resolved_value = block_update.value if block_update.value is not None else block.value + resolved_description = block_update.description if block_update.description is not None else block.description + resolved_limit = block_update.limit if block_update.limit is not None else block.limit + resolved_read_only = block_update.read_only if block_update.read_only is not None else block.read_only + resolved_metadata = block_update.metadata if block_update.metadata is not None else (block.metadata_ or {}) + + t0 = time.perf_counter() + commit = await self.memory_repo_manager.update_block_async( + agent_id=agent_id, + label=label, + value=resolved_value, + actor=actor, + message=f"Update {label} block", + description=resolved_description, + limit=resolved_limit, + read_only=resolved_read_only, + metadata=resolved_metadata, + ) + git_time = (time.perf_counter() - t0) * 1000 + logger.info(f"[GIT_PERF] memory_repo_manager.update_block_async took {git_time:.2f}ms commit={commit.sha[:8]}") # 2. Sync to PostgreSQL cache t0 = time.perf_counter() @@ -283,7 +303,7 @@ class GitEnabledBlockManager(BlockManager): label=block.label, value=block.value, description=block.description, - limit=block.limit or 5000, + limit=block.limit or CORE_MEMORY_BLOCK_CHAR_LIMIT, ), actor=actor, message=f"Create {block.label} block", diff --git a/letta/services/memory_repo/block_markdown.py b/letta/services/memory_repo/block_markdown.py new file mode 100644 index 00000000..aad40560 --- /dev/null +++ b/letta/services/memory_repo/block_markdown.py @@ -0,0 +1,99 @@ +"""Serialize and parse block data as Markdown with YAML frontmatter. + +File format: + --- + description: "Who I am and how I approach work" + limit: 20000 + --- + My name is Memo. I'm a stateful coding assistant... + +- Frontmatter fields are only rendered when they differ from defaults. +- Files without frontmatter are treated as value-only (backward compat). +""" + +from typing import Any, Dict, Optional + +import yaml + +from letta.schemas.block import BaseBlock + + +def _get_field_default(field_name: str) -> Any: + """Get the default value for a BaseBlock field.""" + field = BaseBlock.model_fields[field_name] + return field.default + + +def serialize_block( + value: str, + *, + description: Optional[str] = None, + limit: Optional[int] = None, + read_only: bool = False, + metadata: Optional[dict] = None, +) -> str: + """Serialize a block to Markdown with optional YAML frontmatter. + + Only non-default fields are included in the frontmatter. + If all fields are at their defaults, no frontmatter is emitted. + """ + # description and limit are always included in frontmatter. + # read_only and metadata are only included when non-default. + front: Dict[str, Any] = {} + + front["description"] = description + front["limit"] = limit if limit is not None else _get_field_default("limit") + + if read_only != _get_field_default("read_only"): + front["read_only"] = read_only + if metadata and metadata != _get_field_default("metadata"): + front["metadata"] = metadata + + # Use block style for cleaner YAML, default_flow_style=False + yaml_str = yaml.dump(front, default_flow_style=False, sort_keys=False, allow_unicode=True).rstrip("\n") + return f"---\n{yaml_str}\n---\n{value}" + + +def parse_block_markdown(content: str) -> Dict[str, Any]: + """Parse a Markdown file into block fields. + + Returns a dict with: + - "value": the body content after frontmatter + - "description", "limit", "read_only", "metadata": from frontmatter (if present) + + If no frontmatter is detected, the entire content is treated as the value + (backward compat with old repos that stored raw values). + """ + if not content.startswith("---\n"): + return {"value": content} + + # Find the closing --- delimiter + end_idx = content.find("\n---\n", 4) + if end_idx == -1: + # No closing delimiter — treat entire content as value + return {"value": content} + + yaml_str = content[4:end_idx] + body = content[end_idx + 5 :] # skip past \n---\n + + try: + front = yaml.safe_load(yaml_str) + except yaml.YAMLError: + # Malformed YAML — treat entire content as value + return {"value": content} + + if not isinstance(front, dict): + return {"value": content} + + result: Dict[str, Any] = {"value": body} + + if "description" in front: + result["description"] = front["description"] + if "limit" in front: + result["limit"] = front["limit"] + if "read_only" in front: + result["read_only"] = front["read_only"] + if "metadata" in front: + result["metadata"] = front["metadata"] + + return result diff --git a/letta/services/memory_repo/memfs_client_base.py b/letta/services/memory_repo/memfs_client_base.py index bf1ca887..b5122bd3 100644 --- a/letta/services/memory_repo/memfs_client_base.py +++ b/letta/services/memory_repo/memfs_client_base.py @@ -9,16 +9,17 @@ HTTP service instead. """ import hashlib -import json import os import uuid from typing import Dict, List, Optional +from letta.constants import CORE_MEMORY_BLOCK_CHAR_LIMIT from letta.log import get_logger from letta.otel.tracing import trace_method from letta.schemas.block import Block as PydanticBlock from letta.schemas.memory_repo import MemoryCommit from letta.schemas.user import User as PydanticUser +from letta.services.memory_repo.block_markdown import parse_block_markdown, serialize_block from letta.services.memory_repo.git_operations import GitOperations from letta.services.memory_repo.storage.local import LocalStorageBackend from letta.utils import enforce_types @@ -27,7 +28,6 @@ logger = get_logger(__name__) # File paths within the memory repository MEMORY_DIR = "memory" -METADATA_FILE = "metadata/blocks.json" # Default local storage path DEFAULT_LOCAL_PATH = os.path.expanduser("~/.letta/memfs") @@ -84,20 +84,18 @@ class MemfsClient: initial_blocks = initial_blocks or [] org_id = actor.organization_id - # Build initial files from blocks + # Build initial files from blocks (frontmatter embeds metadata) initial_files = {} - metadata = {"blocks": {}} for block in initial_blocks: file_path = f"{MEMORY_DIR}/{block.label}.md" - initial_files[file_path] = block.value or "" - metadata["blocks"][block.label] = { - "description": block.description, - "limit": block.limit, - } - - if metadata["blocks"]: - initial_files[METADATA_FILE] = json.dumps(metadata, indent=2) + initial_files[file_path] = serialize_block( + value=block.value or "", + description=block.description, + limit=block.limit, + read_only=block.read_only, + metadata=block.metadata, + ) return await self.git.create_repo( agent_id=agent_id, @@ -136,33 +134,24 @@ class MemfsClient: except FileNotFoundError: return [] - # Parse metadata - metadata: dict = {} - if METADATA_FILE in files: - try: - metadata_json = json.loads(files[METADATA_FILE]) - if isinstance(metadata_json, dict): - metadata = metadata_json.get("blocks", {}) or {} - except json.JSONDecodeError: - logger.warning(f"Failed to parse metadata for agent {agent_id}") - - # Convert block files to PydanticBlock + # Convert block files to PydanticBlock (metadata is in frontmatter) blocks = [] for file_path, content in files.items(): if file_path.startswith(f"{MEMORY_DIR}/") and file_path.endswith(".md"): label = file_path[len(f"{MEMORY_DIR}/") : -3] - block_meta = metadata.get(label, {}) - # Generate deterministic UUID-style ID from agent_id + label + parsed = parse_block_markdown(content) + synthetic_uuid = uuid.UUID(hashlib.md5(f"{agent_id}:{label}".encode()).hexdigest()) blocks.append( PydanticBlock( id=f"block-{synthetic_uuid}", label=label, - value=content, - description=block_meta.get("description"), - limit=block_meta.get("limit", 5000), - metadata=block_meta.get("metadata", {}), + value=parsed["value"], + description=parsed.get("description"), + limit=parsed.get("limit", CORE_MEMORY_BLOCK_CHAR_LIMIT), + read_only=parsed.get("read_only", False), + metadata=parsed.get("metadata", {}), ) ) @@ -220,6 +209,11 @@ class MemfsClient: value: str, actor: PydanticUser, message: Optional[str] = None, + *, + description: Optional[str] = None, + limit: Optional[int] = None, + read_only: bool = False, + metadata: Optional[dict] = None, ) -> MemoryCommit: """Update a memory block. @@ -229,6 +223,10 @@ class MemfsClient: value: New block value actor: User performing the operation message: Optional commit message + description: Block description (for frontmatter) + limit: Block character limit (for frontmatter) + read_only: Block read-only flag (for frontmatter) + metadata: Block metadata dict (for frontmatter) Returns: Commit details @@ -238,12 +236,19 @@ class MemfsClient: await self._ensure_repo_exists(agent_id, actor) file_path = f"{MEMORY_DIR}/{label}.md" + file_content = serialize_block( + value=value, + description=description, + limit=limit, + read_only=read_only, + metadata=metadata, + ) commit_message = message or f"Update {label}" return await self.git.commit( agent_id=agent_id, org_id=actor.organization_id, - changes=[FileChange(path=file_path, content=value, change_type="modify")], + changes=[FileChange(path=file_path, content=file_content, change_type="modify")], message=commit_message, author_name=f"User {actor.id}", author_email=f"{actor.id}@letta.ai", @@ -274,40 +279,20 @@ class MemfsClient: await self._ensure_repo_exists(agent_id, actor) org_id = actor.organization_id - # Get current metadata - try: - files = await self.git.get_files(agent_id, org_id) - except FileNotFoundError: - files = {} + file_content = serialize_block( + value=block.value or "", + description=block.description, + limit=block.limit, + read_only=block.read_only, + metadata=block.metadata, + ) - metadata = {"blocks": {}} - if METADATA_FILE in files: - try: - raw_metadata = json.loads(files[METADATA_FILE]) - if isinstance(raw_metadata, dict) and isinstance(raw_metadata.get("blocks"), dict): - metadata = raw_metadata - except json.JSONDecodeError: - pass - - # Add new block metadata - metadata["blocks"][block.label] = { - "description": block.description, - "limit": block.limit, - "metadata": block.metadata or {}, - } - - # Prepare changes changes = [ FileChange( path=f"{MEMORY_DIR}/{block.label}.md", - content=block.value, + content=file_content, change_type="add", ), - FileChange( - path=METADATA_FILE, - content=json.dumps(metadata, indent=2), - change_type="modify", - ), ] commit_message = message or f"Create block {block.label}" @@ -346,37 +331,12 @@ class MemfsClient: await self._ensure_repo_exists(agent_id, actor) org_id = actor.organization_id - # Get current metadata - try: - files = await self.git.get_files(agent_id, org_id) - except FileNotFoundError: - files = {} - - metadata = {"blocks": {}} - if METADATA_FILE in files: - try: - raw_metadata = json.loads(files[METADATA_FILE]) - if isinstance(raw_metadata, dict) and isinstance(raw_metadata.get("blocks"), dict): - metadata = raw_metadata - except json.JSONDecodeError: - pass - - # Remove block from metadata - if label in metadata["blocks"]: - del metadata["blocks"][label] - - # Prepare changes changes = [ FileChange( path=f"{MEMORY_DIR}/{label}.md", content=None, change_type="delete", ), - FileChange( - path=METADATA_FILE, - content=json.dumps(metadata, indent=2), - change_type="modify", - ), ] commit_message = message or f"Delete block {label}"