feat(core): sync skills from SKILL.md into memFS blocks (#9718)

2026-02-27 14:47:14 -08:00
parent a11ba9710c
commit a50482e6d3
6 changed files with 111 additions and 60 deletions
--- a/letta/schemas/memory.py
+++ b/letta/schemas/memory.py
@@ -290,27 +290,47 @@ class Memory(BaseModel, validate_assignment=True):
        s.write("\n\n<memory_filesystem>\n")

        def _render_tree(node: dict, prefix: str = "", in_system: bool = False, path_parts: tuple[str, ...] = ()):
+            # Render skills/ as concise top-level entries only, using both
+            # current (`skills/<name>`) and legacy (`skills/<name>/SKILL`) labels.
+            if path_parts == ("skills",):
+                skill_entries: list[tuple[str, str]] = []
+                for name, val in node.items():
+                    if name == LEAF_KEY:
+                        continue
+
+                    block = None
+                    if isinstance(val, dict):
+                        legacy_skill_block = val.get("SKILL")
+                        if legacy_skill_block is not None and not isinstance(legacy_skill_block, dict):
+                            block = legacy_skill_block
+                        elif LEAF_KEY in val and not isinstance(val[LEAF_KEY], dict):
+                            block = val[LEAF_KEY]
+                    else:
+                        block = val
+
+                    if block is None:
+                        continue
+
+                    desc = getattr(block, "description", None)
+                    desc_line = (desc or "").strip().split("\n")[0].strip()
+                    skill_entries.append((name, desc_line))
+
+                skill_entries.sort(key=lambda e: e[0])
+                for i, (name, desc_line) in enumerate(skill_entries):
+                    is_last = i == len(skill_entries) - 1
+                    connector = "└── " if is_last else "├── "
+                    desc_suffix = f" ({desc_line})" if desc_line else ""
+                    s.write(f"{prefix}{connector}{name}{desc_suffix}\n")
+                return
+
            # Sort: directories first, then files. If a node is both a directory and a
            # leaf (LEAF_KEY present), show both <name>/ and <name>.md.
            dirs = []
            files = []
-            skill_summary_blocks = {}
            for name, val in node.items():
                if name == LEAF_KEY:
                    continue
                if isinstance(val, dict):
-                    # Special-case skills/<skill_name>/SKILL.md so the skills section
-                    # is concise in the system prompt:
-                    #   skills/
-                    #     skills/<skill_name> (description)
-                    # instead of rendering nested SKILL.md + support docs/scripts.
-                    if path_parts == ("skills",):
-                        skill_block = val.get("SKILL")
-                        if skill_block is not None and not isinstance(skill_block, dict):
-                            files.append(name)
-                            skill_summary_blocks[name] = skill_block
-                            continue
-
                    dirs.append(name)
                    if LEAF_KEY in val:
                        files.append(name)
@@ -334,15 +354,6 @@ class Memory(BaseModel, validate_assignment=True):
                        path_parts=(*path_parts, name),
                    )
                else:
-                    # Render condensed skills top-level summaries.
-                    if path_parts == ("skills",) and name in skill_summary_blocks:
-                        block = skill_summary_blocks[name]
-                        desc = getattr(block, "description", None)
-                        desc_line = (desc or "").strip().split("\n")[0].strip()
-                        desc_suffix = f" ({desc_line})" if desc_line else ""
-                        s.write(f"{prefix}{connector}{name}/{desc_suffix}\n")
-                        continue
-
                    # For files outside system/, append the block description
                    desc_suffix = ""
                    if not in_system:
--- a/letta/server/rest_api/routers/v1/git_http.py
+++ b/letta/server/rest_api/routers/v1/git_http.py
@@ -29,6 +29,7 @@ from starlette.background import BackgroundTask

 from letta.log import get_logger
 from letta.server.rest_api.dependencies import HeaderParams, get_headers, get_letta_server
+from letta.services.memory_repo.path_mapping import memory_block_label_from_markdown_path

 logger = get_logger(__name__)

@@ -38,17 +39,11 @@ _background_tasks: set[asyncio.Task] = set()
 def _is_syncable_block_markdown_path(path: str) -> bool:
    """Return whether a markdown path should be mirrored into block cache.

-    For skills/, do not mirror any files into block cache.
-    Agent-scoped skills are stored in MemFS, but they should not be injected
-    into block-backed core memory/system prompt.
+    Special-case skills so only skill definitions are mirrored:
+    - sync `skills/{skill_name}/SKILL.md` as label `skills/{skill_name}`
+    - ignore all other markdown under `skills/`
    """
-    if not path.endswith(".md"):
-        return False
-
-    if path.startswith("skills/"):
-        return False
-
-    return True
+    return memory_block_label_from_markdown_path(path) is not None


 router = APIRouter(prefix="/git", tags=["git"], include_in_schema=False)
@@ -133,7 +128,9 @@ async def _sync_after_push(actor_id: str, agent_id: str) -> None:
        if not _is_syncable_block_markdown_path(file_path):
            continue

-        label = file_path[:-3]
+        label = memory_block_label_from_markdown_path(file_path)
+        if label is None:
+            continue
        expected_labels.add(label)

        # Parse frontmatter to extract metadata alongside value
--- a/letta/services/memory_repo/memfs_client_base.py
+++ b/letta/services/memory_repo/memfs_client_base.py
@@ -21,6 +21,7 @@ from letta.schemas.memory_repo import MemoryCommit
 from letta.schemas.user import User as PydanticUser
 from letta.services.memory_repo.block_markdown import parse_block_markdown, serialize_block
 from letta.services.memory_repo.git_operations import GitOperations
+from letta.services.memory_repo.path_mapping import memory_block_label_from_markdown_path
 from letta.services.memory_repo.storage.local import LocalStorageBackend
 from letta.utils import enforce_types

@@ -134,26 +135,28 @@ class MemfsClient:
            return []

        # Convert block files to PydanticBlock (metadata is in frontmatter).
-        # skills/ is intentionally excluded from block sync/render.
+        # skills/{skill_name}/SKILL.md is mapped to block label skills/{skill_name};
+        # other files under skills/ are intentionally ignored.
        blocks = []
        for file_path, content in files.items():
-            if file_path.endswith(".md"):
-                label = file_path[:-3]
+            label = memory_block_label_from_markdown_path(file_path)
+            if label is None:
+                continue

-                parsed = parse_block_markdown(content)
+            parsed = parse_block_markdown(content)

-                synthetic_uuid = uuid.UUID(hashlib.md5(f"{agent_id}:{label}".encode()).hexdigest())
-                blocks.append(
-                    PydanticBlock(
-                        id=f"block-{synthetic_uuid}",
-                        label=label,
-                        value=parsed["value"],
-                        description=parsed.get("description"),
-                        limit=parsed.get("limit", CORE_MEMORY_BLOCK_CHAR_LIMIT),
-                        read_only=parsed.get("read_only", False),
-                        metadata=parsed.get("metadata", {}),
-                    )
+            synthetic_uuid = uuid.UUID(hashlib.md5(f"{agent_id}:{label}".encode()).hexdigest())
+            blocks.append(
+                PydanticBlock(
+                    id=f"block-{synthetic_uuid}",
+                    label=label,
+                    value=parsed["value"],
+                    description=parsed.get("description"),
+                    limit=parsed.get("limit", CORE_MEMORY_BLOCK_CHAR_LIMIT),
+                    read_only=parsed.get("read_only", False),
+                    metadata=parsed.get("metadata", {}),
                )
+            )

        return blocks

--- a/letta/services/memory_repo/path_mapping.py
+++ b/letta/services/memory_repo/path_mapping.py
@@ -0,0 +1,29 @@
+"""Helpers for mapping memory-repo markdown paths to block labels.
+
+Special handling for skills:
+- sync `skills/{skill_name}/SKILL.md` as block label `skills/{skill_name}`
+- ignore all other markdown files under `skills/`
+"""
+
+from __future__ import annotations
+
+
+def memory_block_label_from_markdown_path(path: str) -> str | None:
+    """Return block label for a syncable markdown path, else None.
+
+    Rules:
+    - Non-`.md` files are ignored.
+    - `skills/{skill_name}/SKILL.md` -> `skills/{skill_name}`
+    - Other `skills/**` markdown files are ignored.
+    - All other markdown files map to `path[:-3]`.
+    """
+    if not path.endswith(".md"):
+        return None
+
+    if path.startswith("skills/"):
+        parts = path.split("/")
+        if len(parts) == 3 and parts[0] == "skills" and parts[1] and parts[2] == "SKILL.md":
+            return f"skills/{parts[1]}"
+        return None
+
+    return path[:-3]
--- a/tests/test_log_context_middleware.py
+++ b/tests/test_log_context_middleware.py
@@ -54,7 +54,14 @@ class TestLogContextMiddleware:
                return {
                    "system/human.md": "---\ndescription: human\n---\nname: sarah",
                    "system/persona.md": "---\ndescription: persona\n---\nbe helpful",
-                    "skills/research-helper/SKILL.md": "---\ndescription: helper\n---\n# Research Helper",
+                    "skills/research-helper/SKILL.md": (
+                        "---\n"
+                        "name: research-helper\n"
+                        "description: Search the web and summarize findings.\n"
+                        "---\n"
+                        "# Research Helper\n\n"
+                        "Use this skill to do deep web research and summarize results.\n"
+                    ),
                    "skills/research-helper/references/details.md": "---\ndescription: nested\n---\nShould not be synced",
                }

@@ -97,9 +104,13 @@ class TestLogContextMiddleware:
        labels = {call["label"] for call in synced_calls}
        assert "system/human" in labels
        assert "system/persona" in labels
-        assert "skills/research-helper/SKILL" not in labels
+        assert "skills/research-helper" in labels
        assert "skills/research-helper/references/details" not in labels

+        by_label = {call["label"]: call for call in synced_calls}
+        assert by_label["skills/research-helper"]["description"] == "Search the web and summarize findings."
+        assert by_label["skills/research-helper"]["value"].startswith("# Research Helper")
+
    def test_extracts_actor_id_from_headers(self, client):
        response = client.get("/v1/agents/agent-123e4567-e89b-42d3-8456-426614174000", headers={"user_id": "user-abc123"})
        assert response.status_code == 200
--- a/tests/test_memory.py
+++ b/tests/test_memory.py
@@ -312,10 +312,10 @@ def test_compile_git_memory_filesystem_no_description_when_empty():


 def test_compile_git_memory_filesystem_condenses_skills_to_top_level_entries():
-    """skills/ should render as top-level skill folders with description.
+    """skills/ should render as top-level skill entries with description.

-    We intentionally avoid showing nested files under skills/ in the system prompt
-    tree to keep context concise.
+    We intentionally avoid showing nested files under skills/ in the system
+    prompt tree to keep context concise.
    """

    m = Memory(
@@ -324,13 +324,13 @@ def test_compile_git_memory_filesystem_condenses_skills_to_top_level_entries():
        blocks=[
            Block(label="system/human", value="human data", limit=100),
            Block(
-                label="skills/searching-messages/SKILL",
+                label="skills/searching-messages",
                value="# searching messages",
                limit=100,
                description="Search past messages to recall context.",
            ),
            Block(
-                label="skills/creating-skills/SKILL",
+                label="skills/creating-skills",
                value="# creating skills",
                limit=100,
                description="Guide for creating effective skills.",
@@ -347,10 +347,10 @@ def test_compile_git_memory_filesystem_condenses_skills_to_top_level_entries():
    out = m.compile()

    # Condensed top-level skill entries with descriptions.
-    assert "searching-messages/ (Search past messages to recall context.)" in out
-    assert "creating-skills/ (Guide for creating effective skills.)" in out
+    assert "searching-messages (Search past messages to recall context.)" in out
+    assert "creating-skills (Guide for creating effective skills.)" in out

-    # Do not show SKILL.md or nested skill docs in tree.
-    assert "skills/searching-messages/SKILL.md" not in out
-    assert "skills/creating-skills/SKILL.md" not in out
+    # Do not show .md suffixes or nested skill docs in tree.
+    assert "searching-messages.md" not in out
+    assert "creating-skills.md" not in out
    assert "references/workflows" not in out