fix: use ripgrep for Glob tool to prevent stack overflow (#402)

Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
Charles Packer
2025-12-26 23:50:28 -08:00
committed by GitHub
parent 0722729021
commit aa4071c721

View File

@@ -1,13 +1,31 @@
import { promises as fs } from "node:fs";
import { execFile } from "node:child_process";
import { createRequire } from "node:module";
import * as path from "node:path";
import picomatch from "picomatch";
import { fileURLToPath } from "node:url";
import { promisify } from "node:util";
import { LIMITS } from "./truncation.js";
import { validateRequiredParams } from "./validation.js";
const execFileAsync = promisify(execFile);
function getRipgrepPath(): string {
try {
const __filename = fileURLToPath(import.meta.url);
const require = createRequire(__filename);
const rgPackage = require("@vscode/ripgrep");
return rgPackage.rgPath;
} catch (_error) {
return "rg";
}
}
const rgPath = getRipgrepPath();
interface GlobArgs {
pattern: string;
path?: string;
}
interface GlobResult {
files: string[];
truncated?: boolean;
@@ -21,7 +39,6 @@ function applyFileLimit(files: string[]): GlobResult {
}
const truncatedFiles = files.slice(0, LIMITS.GLOB_MAX_FILES);
// Add truncation notice as last entry
truncatedFiles.push(
`\n[Output truncated: showing ${LIMITS.GLOB_MAX_FILES.toLocaleString()} of ${totalFiles.toLocaleString()} files.]`,
);
@@ -33,66 +50,64 @@ function applyFileLimit(files: string[]): GlobResult {
};
}
async function walkDirectory(dir: string): Promise<string[]> {
const files: string[] = [];
try {
const entries = await fs.readdir(dir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(dir, entry.name);
if (entry.isDirectory()) {
if (entry.name === "node_modules" || entry.name === ".git") continue;
const subFiles = await walkDirectory(fullPath);
files.push(...subFiles);
} else if (entry.isFile()) {
files.push(fullPath);
}
}
} catch (error) {
const err = error as NodeJS.ErrnoException;
if (err.code !== "EACCES" && err.code !== "EPERM") throw err;
}
return files;
}
export async function glob(args: GlobArgs): Promise<GlobResult> {
validateRequiredParams(args, ["pattern"], "Glob");
const { pattern, path: searchPath } = args;
const userCwd = process.env.USER_CWD || process.cwd();
let baseDir: string;
if (searchPath)
baseDir = path.isAbsolute(searchPath)
? searchPath
: path.resolve(userCwd, searchPath);
else baseDir = userCwd;
try {
const stats = await fs.stat(baseDir);
if (!stats.isDirectory())
throw new Error(`Path is not a directory: ${baseDir}`);
} catch (error) {
const err = error as NodeJS.ErrnoException;
if (err.code === "ENOENT")
throw new Error(`Directory does not exist: ${baseDir}`);
throw err;
// Explicit check for undefined/empty pattern (validateRequiredParams only checks key existence)
if (!pattern) {
throw new Error("Glob tool missing required parameter: pattern");
}
const allFiles = await walkDirectory(baseDir);
let matcher: (input: string) => boolean;
if (pattern.startsWith("**/")) {
const subPattern = pattern.slice(3);
matcher = picomatch(subPattern);
const matchedFiles = allFiles.filter((file) =>
matcher(path.basename(file)),
);
return applyFileLimit(matchedFiles.sort());
} else if (pattern.includes("**")) {
const fullPattern = path.join(baseDir, pattern);
matcher = picomatch(fullPattern, { dot: true });
const matchedFiles = allFiles.filter((file) => matcher(file));
return applyFileLimit(matchedFiles.sort());
} else {
matcher = picomatch(pattern, { dot: true });
const matchedFiles = allFiles.filter((file) =>
matcher(path.relative(baseDir, file)),
);
return applyFileLimit(matchedFiles.sort());
const userCwd = process.env.USER_CWD || process.cwd();
const baseDir = searchPath
? path.isAbsolute(searchPath)
? searchPath
: path.resolve(userCwd, searchPath)
: userCwd;
// Build ripgrep args for file listing
// --files: list files instead of searching content
// --glob: filter by pattern
// --hidden: include hidden files (dotfiles)
// --follow: follow symlinks
// --no-messages: suppress error messages for unreadable dirs
const rgArgs = [
"--files",
"--hidden",
"--follow",
"--no-messages",
"--glob",
pattern,
baseDir,
];
try {
const { stdout } = await execFileAsync(rgPath, rgArgs, {
maxBuffer: 50 * 1024 * 1024, // 50MB buffer for large file lists
cwd: userCwd,
});
const files = stdout.trim().split("\n").filter(Boolean).sort();
return applyFileLimit(files);
} catch (error) {
const err = error as Error & {
stdout?: string;
code?: string | number;
};
// ripgrep exits with code 1 when no files match - that's not an error
if (err.code === 1 || err.code === "1") {
return { files: [] };
}
// If stdout has content despite error, use it (partial results)
if (err.stdout?.trim()) {
const files = err.stdout.trim().split("\n").filter(Boolean).sort();
return applyFileLimit(files);
}
throw new Error(`Glob failed: ${err.message || "Unknown error"}`);
}
}