From 8f56527958f01514b3ee7ca1a61f237aa4e6fb74 Mon Sep 17 00:00:00 2001 From: Kian Jones <11655409+kianjones9@users.noreply.github.com> Date: Wed, 18 Feb 2026 21:16:03 -0800 Subject: [PATCH] =?UTF-8?q?perf(memfs):=20delta=20upload=20=E2=80=94=20onl?= =?UTF-8?q?y=20push=20new=20git=20objects=20after=20commit=20(#9548)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit perf(memfs): delta upload — only push new/modified git objects after commit Instead of re-uploading the entire .git/ directory after every commit, snapshot file mtimes before the commit and only upload files that are new or changed. A typical single-block update creates ~5 new objects (blob, trees, commit, ref) vs re-uploading all ~30. Full _upload_repo retained for create_repo and other paths that need it. 🤖 Generated with [Letta Code](https://letta.com) Co-authored-by: Letta --- letta/services/memory_repo/git_operations.py | 60 ++++++++++++++++++-- 1 file changed, 56 insertions(+), 4 deletions(-) diff --git a/letta/services/memory_repo/git_operations.py b/letta/services/memory_repo/git_operations.py index ba4080b5..04db32a6 100644 --- a/letta/services/memory_repo/git_operations.py +++ b/letta/services/memory_repo/git_operations.py @@ -146,7 +146,7 @@ class GitOperations: shutil.rmtree(os.path.dirname(repo_path), ignore_errors=True) async def _upload_repo(self, local_repo_path: str, agent_id: str, org_id: str) -> None: - """Upload a local repo to storage.""" + """Upload a local repo to storage (full upload).""" t_start = time.perf_counter() storage_prefix = self._repo_path(agent_id, org_id) @@ -182,6 +182,55 @@ class GitOperations: f"upload_time={upload_time:.2f}ms" ) + @staticmethod + def _snapshot_git_files(git_dir: str) -> Dict[str, float]: + """Snapshot mtime of all files under .git/ for delta detection.""" + snapshot = {} + for root, _dirs, files in os.walk(git_dir): + for filename in files: + path = os.path.join(root, filename) + snapshot[path] = os.path.getmtime(path) + return snapshot + + async def _upload_delta( + self, + local_repo_path: str, + agent_id: str, + org_id: str, + before_snapshot: Dict[str, float], + ) -> None: + """Upload only new/modified files since before_snapshot.""" + t_start = time.perf_counter() + storage_prefix = self._repo_path(agent_id, org_id) + git_dir = os.path.join(local_repo_path, ".git") + + upload_tasks = [] + total_bytes = 0 + + for root, _dirs, files in os.walk(git_dir): + for filename in files: + local_path = os.path.join(root, filename) + old_mtime = before_snapshot.get(local_path) + # New file or modified since snapshot + if old_mtime is None or os.path.getmtime(local_path) != old_mtime: + rel_path = os.path.relpath(local_path, git_dir) + storage_path = f"{storage_prefix}/{rel_path}" + with open(local_path, "rb") as f: + content = f.read() + total_bytes += len(content) + upload_tasks.append((storage_path, content)) + + t0 = time.perf_counter() + await asyncio.gather(*[self.storage.upload_bytes(path, content) for path, content in upload_tasks]) + upload_time = (time.perf_counter() - t0) * 1000 + + total_time = (time.perf_counter() - t_start) * 1000 + logger.info( + f"[GIT_PERF] _upload_delta TOTAL {total_time:.2f}ms " + f"files={len(upload_tasks)} bytes={total_bytes} " + f"upload_time={upload_time:.2f}ms" + ) + async def _download_repo(self, agent_id: str, org_id: str) -> str: """Download a repo from storage to a temp directory. @@ -396,6 +445,9 @@ class GitOperations: logger.info(f"[GIT_PERF] _commit_with_lock download phase took {download_time:.2f}ms") try: + # Snapshot git objects before commit for delta upload + git_dir = os.path.join(repo_path, ".git") + before_snapshot = self._snapshot_git_files(git_dir) def _commit(): t_git_start = time.perf_counter() @@ -484,11 +536,11 @@ class GitOperations: git_thread_time = (time.perf_counter() - t0) * 1000 logger.info(f"[GIT_PERF] _commit_with_lock git thread took {git_thread_time:.2f}ms") - # Upload the updated repo + # Upload only new/modified objects (delta) t0 = time.perf_counter() - await self._upload_repo(repo_path, agent_id, org_id) + await self._upload_delta(repo_path, agent_id, org_id, before_snapshot) upload_time = (time.perf_counter() - t0) * 1000 - logger.info(f"[GIT_PERF] _commit_with_lock upload phase took {upload_time:.2f}ms") + logger.info(f"[GIT_PERF] _commit_with_lock upload phase (delta) took {upload_time:.2f}ms") total_time = (time.perf_counter() - t_start) * 1000 logger.info(