feat(core): sync skills from SKILL.md into memFS blocks (#9718)

This commit is contained in:
Sarah Wooders
2026-02-27 14:47:14 -08:00
committed by Caren Thomas
parent a11ba9710c
commit a50482e6d3
6 changed files with 111 additions and 60 deletions

View File

@@ -290,27 +290,47 @@ class Memory(BaseModel, validate_assignment=True):
s.write("\n\n<memory_filesystem>\n") s.write("\n\n<memory_filesystem>\n")
def _render_tree(node: dict, prefix: str = "", in_system: bool = False, path_parts: tuple[str, ...] = ()): def _render_tree(node: dict, prefix: str = "", in_system: bool = False, path_parts: tuple[str, ...] = ()):
# Render skills/ as concise top-level entries only, using both
# current (`skills/<name>`) and legacy (`skills/<name>/SKILL`) labels.
if path_parts == ("skills",):
skill_entries: list[tuple[str, str]] = []
for name, val in node.items():
if name == LEAF_KEY:
continue
block = None
if isinstance(val, dict):
legacy_skill_block = val.get("SKILL")
if legacy_skill_block is not None and not isinstance(legacy_skill_block, dict):
block = legacy_skill_block
elif LEAF_KEY in val and not isinstance(val[LEAF_KEY], dict):
block = val[LEAF_KEY]
else:
block = val
if block is None:
continue
desc = getattr(block, "description", None)
desc_line = (desc or "").strip().split("\n")[0].strip()
skill_entries.append((name, desc_line))
skill_entries.sort(key=lambda e: e[0])
for i, (name, desc_line) in enumerate(skill_entries):
is_last = i == len(skill_entries) - 1
connector = "└── " if is_last else "├── "
desc_suffix = f" ({desc_line})" if desc_line else ""
s.write(f"{prefix}{connector}{name}{desc_suffix}\n")
return
# Sort: directories first, then files. If a node is both a directory and a # Sort: directories first, then files. If a node is both a directory and a
# leaf (LEAF_KEY present), show both <name>/ and <name>.md. # leaf (LEAF_KEY present), show both <name>/ and <name>.md.
dirs = [] dirs = []
files = [] files = []
skill_summary_blocks = {}
for name, val in node.items(): for name, val in node.items():
if name == LEAF_KEY: if name == LEAF_KEY:
continue continue
if isinstance(val, dict): if isinstance(val, dict):
# Special-case skills/<skill_name>/SKILL.md so the skills section
# is concise in the system prompt:
# skills/
# skills/<skill_name> (description)
# instead of rendering nested SKILL.md + support docs/scripts.
if path_parts == ("skills",):
skill_block = val.get("SKILL")
if skill_block is not None and not isinstance(skill_block, dict):
files.append(name)
skill_summary_blocks[name] = skill_block
continue
dirs.append(name) dirs.append(name)
if LEAF_KEY in val: if LEAF_KEY in val:
files.append(name) files.append(name)
@@ -334,15 +354,6 @@ class Memory(BaseModel, validate_assignment=True):
path_parts=(*path_parts, name), path_parts=(*path_parts, name),
) )
else: else:
# Render condensed skills top-level summaries.
if path_parts == ("skills",) and name in skill_summary_blocks:
block = skill_summary_blocks[name]
desc = getattr(block, "description", None)
desc_line = (desc or "").strip().split("\n")[0].strip()
desc_suffix = f" ({desc_line})" if desc_line else ""
s.write(f"{prefix}{connector}{name}/{desc_suffix}\n")
continue
# For files outside system/, append the block description # For files outside system/, append the block description
desc_suffix = "" desc_suffix = ""
if not in_system: if not in_system:

View File

@@ -29,6 +29,7 @@ from starlette.background import BackgroundTask
from letta.log import get_logger from letta.log import get_logger
from letta.server.rest_api.dependencies import HeaderParams, get_headers, get_letta_server from letta.server.rest_api.dependencies import HeaderParams, get_headers, get_letta_server
from letta.services.memory_repo.path_mapping import memory_block_label_from_markdown_path
logger = get_logger(__name__) logger = get_logger(__name__)
@@ -38,17 +39,11 @@ _background_tasks: set[asyncio.Task] = set()
def _is_syncable_block_markdown_path(path: str) -> bool: def _is_syncable_block_markdown_path(path: str) -> bool:
"""Return whether a markdown path should be mirrored into block cache. """Return whether a markdown path should be mirrored into block cache.
For skills/, do not mirror any files into block cache. Special-case skills so only skill definitions are mirrored:
Agent-scoped skills are stored in MemFS, but they should not be injected - sync `skills/{skill_name}/SKILL.md` as label `skills/{skill_name}`
into block-backed core memory/system prompt. - ignore all other markdown under `skills/`
""" """
if not path.endswith(".md"): return memory_block_label_from_markdown_path(path) is not None
return False
if path.startswith("skills/"):
return False
return True
router = APIRouter(prefix="/git", tags=["git"], include_in_schema=False) router = APIRouter(prefix="/git", tags=["git"], include_in_schema=False)
@@ -133,7 +128,9 @@ async def _sync_after_push(actor_id: str, agent_id: str) -> None:
if not _is_syncable_block_markdown_path(file_path): if not _is_syncable_block_markdown_path(file_path):
continue continue
label = file_path[:-3] label = memory_block_label_from_markdown_path(file_path)
if label is None:
continue
expected_labels.add(label) expected_labels.add(label)
# Parse frontmatter to extract metadata alongside value # Parse frontmatter to extract metadata alongside value

View File

@@ -21,6 +21,7 @@ from letta.schemas.memory_repo import MemoryCommit
from letta.schemas.user import User as PydanticUser from letta.schemas.user import User as PydanticUser
from letta.services.memory_repo.block_markdown import parse_block_markdown, serialize_block from letta.services.memory_repo.block_markdown import parse_block_markdown, serialize_block
from letta.services.memory_repo.git_operations import GitOperations from letta.services.memory_repo.git_operations import GitOperations
from letta.services.memory_repo.path_mapping import memory_block_label_from_markdown_path
from letta.services.memory_repo.storage.local import LocalStorageBackend from letta.services.memory_repo.storage.local import LocalStorageBackend
from letta.utils import enforce_types from letta.utils import enforce_types
@@ -134,26 +135,28 @@ class MemfsClient:
return [] return []
# Convert block files to PydanticBlock (metadata is in frontmatter). # Convert block files to PydanticBlock (metadata is in frontmatter).
# skills/ is intentionally excluded from block sync/render. # skills/{skill_name}/SKILL.md is mapped to block label skills/{skill_name};
# other files under skills/ are intentionally ignored.
blocks = [] blocks = []
for file_path, content in files.items(): for file_path, content in files.items():
if file_path.endswith(".md"): label = memory_block_label_from_markdown_path(file_path)
label = file_path[:-3] if label is None:
continue
parsed = parse_block_markdown(content) parsed = parse_block_markdown(content)
synthetic_uuid = uuid.UUID(hashlib.md5(f"{agent_id}:{label}".encode()).hexdigest()) synthetic_uuid = uuid.UUID(hashlib.md5(f"{agent_id}:{label}".encode()).hexdigest())
blocks.append( blocks.append(
PydanticBlock( PydanticBlock(
id=f"block-{synthetic_uuid}", id=f"block-{synthetic_uuid}",
label=label, label=label,
value=parsed["value"], value=parsed["value"],
description=parsed.get("description"), description=parsed.get("description"),
limit=parsed.get("limit", CORE_MEMORY_BLOCK_CHAR_LIMIT), limit=parsed.get("limit", CORE_MEMORY_BLOCK_CHAR_LIMIT),
read_only=parsed.get("read_only", False), read_only=parsed.get("read_only", False),
metadata=parsed.get("metadata", {}), metadata=parsed.get("metadata", {}),
)
) )
)
return blocks return blocks

View File

@@ -0,0 +1,29 @@
"""Helpers for mapping memory-repo markdown paths to block labels.
Special handling for skills:
- sync `skills/{skill_name}/SKILL.md` as block label `skills/{skill_name}`
- ignore all other markdown files under `skills/`
"""
from __future__ import annotations
def memory_block_label_from_markdown_path(path: str) -> str | None:
"""Return block label for a syncable markdown path, else None.
Rules:
- Non-`.md` files are ignored.
- `skills/{skill_name}/SKILL.md` -> `skills/{skill_name}`
- Other `skills/**` markdown files are ignored.
- All other markdown files map to `path[:-3]`.
"""
if not path.endswith(".md"):
return None
if path.startswith("skills/"):
parts = path.split("/")
if len(parts) == 3 and parts[0] == "skills" and parts[1] and parts[2] == "SKILL.md":
return f"skills/{parts[1]}"
return None
return path[:-3]

View File

@@ -54,7 +54,14 @@ class TestLogContextMiddleware:
return { return {
"system/human.md": "---\ndescription: human\n---\nname: sarah", "system/human.md": "---\ndescription: human\n---\nname: sarah",
"system/persona.md": "---\ndescription: persona\n---\nbe helpful", "system/persona.md": "---\ndescription: persona\n---\nbe helpful",
"skills/research-helper/SKILL.md": "---\ndescription: helper\n---\n# Research Helper", "skills/research-helper/SKILL.md": (
"---\n"
"name: research-helper\n"
"description: Search the web and summarize findings.\n"
"---\n"
"# Research Helper\n\n"
"Use this skill to do deep web research and summarize results.\n"
),
"skills/research-helper/references/details.md": "---\ndescription: nested\n---\nShould not be synced", "skills/research-helper/references/details.md": "---\ndescription: nested\n---\nShould not be synced",
} }
@@ -97,9 +104,13 @@ class TestLogContextMiddleware:
labels = {call["label"] for call in synced_calls} labels = {call["label"] for call in synced_calls}
assert "system/human" in labels assert "system/human" in labels
assert "system/persona" in labels assert "system/persona" in labels
assert "skills/research-helper/SKILL" not in labels assert "skills/research-helper" in labels
assert "skills/research-helper/references/details" not in labels assert "skills/research-helper/references/details" not in labels
by_label = {call["label"]: call for call in synced_calls}
assert by_label["skills/research-helper"]["description"] == "Search the web and summarize findings."
assert by_label["skills/research-helper"]["value"].startswith("# Research Helper")
def test_extracts_actor_id_from_headers(self, client): def test_extracts_actor_id_from_headers(self, client):
response = client.get("/v1/agents/agent-123e4567-e89b-42d3-8456-426614174000", headers={"user_id": "user-abc123"}) response = client.get("/v1/agents/agent-123e4567-e89b-42d3-8456-426614174000", headers={"user_id": "user-abc123"})
assert response.status_code == 200 assert response.status_code == 200

View File

@@ -312,10 +312,10 @@ def test_compile_git_memory_filesystem_no_description_when_empty():
def test_compile_git_memory_filesystem_condenses_skills_to_top_level_entries(): def test_compile_git_memory_filesystem_condenses_skills_to_top_level_entries():
"""skills/ should render as top-level skill folders with description. """skills/ should render as top-level skill entries with description.
We intentionally avoid showing nested files under skills/ in the system prompt We intentionally avoid showing nested files under skills/ in the system
tree to keep context concise. prompt tree to keep context concise.
""" """
m = Memory( m = Memory(
@@ -324,13 +324,13 @@ def test_compile_git_memory_filesystem_condenses_skills_to_top_level_entries():
blocks=[ blocks=[
Block(label="system/human", value="human data", limit=100), Block(label="system/human", value="human data", limit=100),
Block( Block(
label="skills/searching-messages/SKILL", label="skills/searching-messages",
value="# searching messages", value="# searching messages",
limit=100, limit=100,
description="Search past messages to recall context.", description="Search past messages to recall context.",
), ),
Block( Block(
label="skills/creating-skills/SKILL", label="skills/creating-skills",
value="# creating skills", value="# creating skills",
limit=100, limit=100,
description="Guide for creating effective skills.", description="Guide for creating effective skills.",
@@ -347,10 +347,10 @@ def test_compile_git_memory_filesystem_condenses_skills_to_top_level_entries():
out = m.compile() out = m.compile()
# Condensed top-level skill entries with descriptions. # Condensed top-level skill entries with descriptions.
assert "searching-messages/ (Search past messages to recall context.)" in out assert "searching-messages (Search past messages to recall context.)" in out
assert "creating-skills/ (Guide for creating effective skills.)" in out assert "creating-skills (Guide for creating effective skills.)" in out
# Do not show SKILL.md or nested skill docs in tree. # Do not show .md suffixes or nested skill docs in tree.
assert "skills/searching-messages/SKILL.md" not in out assert "searching-messages.md" not in out
assert "skills/creating-skills/SKILL.md" not in out assert "creating-skills.md" not in out
assert "references/workflows" not in out assert "references/workflows" not in out