From aa4071c721560f8a525b863d9d719b1f8c0b694b Mon Sep 17 00:00:00 2001 From: Charles Packer Date: Fri, 26 Dec 2025 23:50:28 -0800 Subject: [PATCH] fix: use ripgrep for Glob tool to prevent stack overflow (#402) Co-authored-by: Letta --- src/tools/impl/Glob.ts | 135 +++++++++++++++++++++++------------------ 1 file changed, 75 insertions(+), 60 deletions(-) diff --git a/src/tools/impl/Glob.ts b/src/tools/impl/Glob.ts index 0daabf9..c57b55c 100644 --- a/src/tools/impl/Glob.ts +++ b/src/tools/impl/Glob.ts @@ -1,13 +1,31 @@ -import { promises as fs } from "node:fs"; +import { execFile } from "node:child_process"; +import { createRequire } from "node:module"; import * as path from "node:path"; -import picomatch from "picomatch"; +import { fileURLToPath } from "node:url"; +import { promisify } from "node:util"; import { LIMITS } from "./truncation.js"; import { validateRequiredParams } from "./validation.js"; +const execFileAsync = promisify(execFile); + +function getRipgrepPath(): string { + try { + const __filename = fileURLToPath(import.meta.url); + const require = createRequire(__filename); + const rgPackage = require("@vscode/ripgrep"); + return rgPackage.rgPath; + } catch (_error) { + return "rg"; + } +} + +const rgPath = getRipgrepPath(); + interface GlobArgs { pattern: string; path?: string; } + interface GlobResult { files: string[]; truncated?: boolean; @@ -21,7 +39,6 @@ function applyFileLimit(files: string[]): GlobResult { } const truncatedFiles = files.slice(0, LIMITS.GLOB_MAX_FILES); - // Add truncation notice as last entry truncatedFiles.push( `\n[Output truncated: showing ${LIMITS.GLOB_MAX_FILES.toLocaleString()} of ${totalFiles.toLocaleString()} files.]`, ); @@ -33,66 +50,64 @@ function applyFileLimit(files: string[]): GlobResult { }; } -async function walkDirectory(dir: string): Promise { - const files: string[] = []; - try { - const entries = await fs.readdir(dir, { withFileTypes: true }); - for (const entry of entries) { - const fullPath = path.join(dir, entry.name); - if (entry.isDirectory()) { - if (entry.name === "node_modules" || entry.name === ".git") continue; - const subFiles = await walkDirectory(fullPath); - files.push(...subFiles); - } else if (entry.isFile()) { - files.push(fullPath); - } - } - } catch (error) { - const err = error as NodeJS.ErrnoException; - if (err.code !== "EACCES" && err.code !== "EPERM") throw err; - } - return files; -} - export async function glob(args: GlobArgs): Promise { validateRequiredParams(args, ["pattern"], "Glob"); const { pattern, path: searchPath } = args; - const userCwd = process.env.USER_CWD || process.cwd(); - let baseDir: string; - if (searchPath) - baseDir = path.isAbsolute(searchPath) - ? searchPath - : path.resolve(userCwd, searchPath); - else baseDir = userCwd; - try { - const stats = await fs.stat(baseDir); - if (!stats.isDirectory()) - throw new Error(`Path is not a directory: ${baseDir}`); - } catch (error) { - const err = error as NodeJS.ErrnoException; - if (err.code === "ENOENT") - throw new Error(`Directory does not exist: ${baseDir}`); - throw err; + + // Explicit check for undefined/empty pattern (validateRequiredParams only checks key existence) + if (!pattern) { + throw new Error("Glob tool missing required parameter: pattern"); } - const allFiles = await walkDirectory(baseDir); - let matcher: (input: string) => boolean; - if (pattern.startsWith("**/")) { - const subPattern = pattern.slice(3); - matcher = picomatch(subPattern); - const matchedFiles = allFiles.filter((file) => - matcher(path.basename(file)), - ); - return applyFileLimit(matchedFiles.sort()); - } else if (pattern.includes("**")) { - const fullPattern = path.join(baseDir, pattern); - matcher = picomatch(fullPattern, { dot: true }); - const matchedFiles = allFiles.filter((file) => matcher(file)); - return applyFileLimit(matchedFiles.sort()); - } else { - matcher = picomatch(pattern, { dot: true }); - const matchedFiles = allFiles.filter((file) => - matcher(path.relative(baseDir, file)), - ); - return applyFileLimit(matchedFiles.sort()); + const userCwd = process.env.USER_CWD || process.cwd(); + + const baseDir = searchPath + ? path.isAbsolute(searchPath) + ? searchPath + : path.resolve(userCwd, searchPath) + : userCwd; + + // Build ripgrep args for file listing + // --files: list files instead of searching content + // --glob: filter by pattern + // --hidden: include hidden files (dotfiles) + // --follow: follow symlinks + // --no-messages: suppress error messages for unreadable dirs + const rgArgs = [ + "--files", + "--hidden", + "--follow", + "--no-messages", + "--glob", + pattern, + baseDir, + ]; + + try { + const { stdout } = await execFileAsync(rgPath, rgArgs, { + maxBuffer: 50 * 1024 * 1024, // 50MB buffer for large file lists + cwd: userCwd, + }); + + const files = stdout.trim().split("\n").filter(Boolean).sort(); + + return applyFileLimit(files); + } catch (error) { + const err = error as Error & { + stdout?: string; + code?: string | number; + }; + + // ripgrep exits with code 1 when no files match - that's not an error + if (err.code === 1 || err.code === "1") { + return { files: [] }; + } + + // If stdout has content despite error, use it (partial results) + if (err.stdout?.trim()) { + const files = err.stdout.trim().split("\n").filter(Boolean).sort(); + return applyFileLimit(files); + } + + throw new Error(`Glob failed: ${err.message || "Unknown error"}`); } }