fix: use ripgrep for Glob tool to prevent stack overflow (#402)
Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
@@ -1,13 +1,31 @@
|
||||
import { promises as fs } from "node:fs";
|
||||
import { execFile } from "node:child_process";
|
||||
import { createRequire } from "node:module";
|
||||
import * as path from "node:path";
|
||||
import picomatch from "picomatch";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { promisify } from "node:util";
|
||||
import { LIMITS } from "./truncation.js";
|
||||
import { validateRequiredParams } from "./validation.js";
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
function getRipgrepPath(): string {
|
||||
try {
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const require = createRequire(__filename);
|
||||
const rgPackage = require("@vscode/ripgrep");
|
||||
return rgPackage.rgPath;
|
||||
} catch (_error) {
|
||||
return "rg";
|
||||
}
|
||||
}
|
||||
|
||||
const rgPath = getRipgrepPath();
|
||||
|
||||
interface GlobArgs {
|
||||
pattern: string;
|
||||
path?: string;
|
||||
}
|
||||
|
||||
interface GlobResult {
|
||||
files: string[];
|
||||
truncated?: boolean;
|
||||
@@ -21,7 +39,6 @@ function applyFileLimit(files: string[]): GlobResult {
|
||||
}
|
||||
|
||||
const truncatedFiles = files.slice(0, LIMITS.GLOB_MAX_FILES);
|
||||
// Add truncation notice as last entry
|
||||
truncatedFiles.push(
|
||||
`\n[Output truncated: showing ${LIMITS.GLOB_MAX_FILES.toLocaleString()} of ${totalFiles.toLocaleString()} files.]`,
|
||||
);
|
||||
@@ -33,66 +50,64 @@ function applyFileLimit(files: string[]): GlobResult {
|
||||
};
|
||||
}
|
||||
|
||||
async function walkDirectory(dir: string): Promise<string[]> {
|
||||
const files: string[] = [];
|
||||
try {
|
||||
const entries = await fs.readdir(dir, { withFileTypes: true });
|
||||
for (const entry of entries) {
|
||||
const fullPath = path.join(dir, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
if (entry.name === "node_modules" || entry.name === ".git") continue;
|
||||
const subFiles = await walkDirectory(fullPath);
|
||||
files.push(...subFiles);
|
||||
} else if (entry.isFile()) {
|
||||
files.push(fullPath);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
const err = error as NodeJS.ErrnoException;
|
||||
if (err.code !== "EACCES" && err.code !== "EPERM") throw err;
|
||||
}
|
||||
return files;
|
||||
}
|
||||
|
||||
export async function glob(args: GlobArgs): Promise<GlobResult> {
|
||||
validateRequiredParams(args, ["pattern"], "Glob");
|
||||
const { pattern, path: searchPath } = args;
|
||||
const userCwd = process.env.USER_CWD || process.cwd();
|
||||
let baseDir: string;
|
||||
if (searchPath)
|
||||
baseDir = path.isAbsolute(searchPath)
|
||||
? searchPath
|
||||
: path.resolve(userCwd, searchPath);
|
||||
else baseDir = userCwd;
|
||||
try {
|
||||
const stats = await fs.stat(baseDir);
|
||||
if (!stats.isDirectory())
|
||||
throw new Error(`Path is not a directory: ${baseDir}`);
|
||||
} catch (error) {
|
||||
const err = error as NodeJS.ErrnoException;
|
||||
if (err.code === "ENOENT")
|
||||
throw new Error(`Directory does not exist: ${baseDir}`);
|
||||
throw err;
|
||||
|
||||
// Explicit check for undefined/empty pattern (validateRequiredParams only checks key existence)
|
||||
if (!pattern) {
|
||||
throw new Error("Glob tool missing required parameter: pattern");
|
||||
}
|
||||
const allFiles = await walkDirectory(baseDir);
|
||||
let matcher: (input: string) => boolean;
|
||||
if (pattern.startsWith("**/")) {
|
||||
const subPattern = pattern.slice(3);
|
||||
matcher = picomatch(subPattern);
|
||||
const matchedFiles = allFiles.filter((file) =>
|
||||
matcher(path.basename(file)),
|
||||
);
|
||||
return applyFileLimit(matchedFiles.sort());
|
||||
} else if (pattern.includes("**")) {
|
||||
const fullPattern = path.join(baseDir, pattern);
|
||||
matcher = picomatch(fullPattern, { dot: true });
|
||||
const matchedFiles = allFiles.filter((file) => matcher(file));
|
||||
return applyFileLimit(matchedFiles.sort());
|
||||
} else {
|
||||
matcher = picomatch(pattern, { dot: true });
|
||||
const matchedFiles = allFiles.filter((file) =>
|
||||
matcher(path.relative(baseDir, file)),
|
||||
);
|
||||
return applyFileLimit(matchedFiles.sort());
|
||||
const userCwd = process.env.USER_CWD || process.cwd();
|
||||
|
||||
const baseDir = searchPath
|
||||
? path.isAbsolute(searchPath)
|
||||
? searchPath
|
||||
: path.resolve(userCwd, searchPath)
|
||||
: userCwd;
|
||||
|
||||
// Build ripgrep args for file listing
|
||||
// --files: list files instead of searching content
|
||||
// --glob: filter by pattern
|
||||
// --hidden: include hidden files (dotfiles)
|
||||
// --follow: follow symlinks
|
||||
// --no-messages: suppress error messages for unreadable dirs
|
||||
const rgArgs = [
|
||||
"--files",
|
||||
"--hidden",
|
||||
"--follow",
|
||||
"--no-messages",
|
||||
"--glob",
|
||||
pattern,
|
||||
baseDir,
|
||||
];
|
||||
|
||||
try {
|
||||
const { stdout } = await execFileAsync(rgPath, rgArgs, {
|
||||
maxBuffer: 50 * 1024 * 1024, // 50MB buffer for large file lists
|
||||
cwd: userCwd,
|
||||
});
|
||||
|
||||
const files = stdout.trim().split("\n").filter(Boolean).sort();
|
||||
|
||||
return applyFileLimit(files);
|
||||
} catch (error) {
|
||||
const err = error as Error & {
|
||||
stdout?: string;
|
||||
code?: string | number;
|
||||
};
|
||||
|
||||
// ripgrep exits with code 1 when no files match - that's not an error
|
||||
if (err.code === 1 || err.code === "1") {
|
||||
return { files: [] };
|
||||
}
|
||||
|
||||
// If stdout has content despite error, use it (partial results)
|
||||
if (err.stdout?.trim()) {
|
||||
const files = err.stdout.trim().split("\n").filter(Boolean).sort();
|
||||
return applyFileLimit(files);
|
||||
}
|
||||
|
||||
throw new Error(`Glob failed: ${err.message || "Unknown error"}`);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user