feat: file indexing (#1352)

This commit is contained in:
Shelley Pham
2026-03-11 14:58:30 -07:00
committed by GitHub
parent 0a07bf7343
commit 0a5e3d6132
12 changed files with 1849 additions and 114 deletions

View File

@@ -0,0 +1,747 @@
import { createHash } from "node:crypto";
import type { Stats as FsStats } from "node:fs";
import {
existsSync,
mkdirSync,
readdirSync,
readFileSync,
statSync,
writeFileSync,
} from "node:fs";
import { homedir } from "node:os";
import { join, normalize, relative, sep } from "node:path";
import { debugLog } from "../../utils/debug";
import { readIntSetting } from "../../utils/lettaSettings";
import { shouldExcludeEntry } from "./fileSearchConfig";
interface FileIndexEntry {
path: string;
type: "file" | "dir";
lowerPath: string;
parent: string;
}
interface SearchFileIndexOptions {
searchDir: string;
pattern: string;
deep: boolean;
maxResults: number;
}
interface FileStats {
type: "file" | "dir";
mtimeMs: number;
ino: number;
size?: number;
}
type StatsMap = Record<string, FileStats>;
type MerkleMap = Record<string, string>;
export interface FileMatch {
path: string;
type: "file" | "dir";
}
const MAX_INDEX_DEPTH = 12;
const PROJECT_INDEX_FILENAME = "file-index.json";
// Read from ~/.letta/.lettasettings (MAX_ENTRIES), falling back to 50 000.
// The file is auto-created with comments on first run so users can find it.
const MAX_CACHE_ENTRIES = readIntSetting("MAX_ENTRIES", 50_000);
let cachedEntries: FileIndexEntry[] = [];
// Kept in sync with cachedEntries for O(1) membership checks in addEntriesToCache.
let cachedEntryPaths = new Set<string>();
let buildPromise: Promise<void> | null = null;
let hasCompletedBuild = false;
interface FileIndexCache {
metadata: {
rootHash: string;
};
entries: FileIndexEntry[];
merkle: MerkleMap;
stats: StatsMap;
}
interface PreviousIndexData {
entries: FileIndexEntry[];
entryPaths: string[];
merkle: MerkleMap;
merkleKeys: string[];
stats: StatsMap;
statsKeys: string[];
}
interface BuildContext {
newEntryCount: number;
truncated: boolean;
}
interface FileIndexBuildResult {
entries: FileIndexEntry[];
merkle: MerkleMap;
stats: StatsMap;
rootHash: string;
truncated: boolean;
}
function normalizeParent(relativePath: string): string {
if (relativePath.length === 0) {
return "";
}
const lastSepIndex = relativePath.lastIndexOf(sep);
return lastSepIndex === -1 ? "" : relativePath.slice(0, lastSepIndex);
}
function hashValue(input: string): string {
return createHash("sha256").update(input).digest("hex");
}
function lowerBound(sorted: string[], target: string): number {
let low = 0;
let high = sorted.length;
while (low < high) {
const mid = (low + high) >> 1;
if ((sorted[mid] ?? "") < target) {
low = mid + 1;
} else {
high = mid;
}
}
return low;
}
function findPrefixRange(sorted: string[], prefix: string): [number, number] {
const start = lowerBound(sorted, prefix);
let end = start;
while (end < sorted.length && sorted[end]!.startsWith(prefix)) {
end++;
}
return [start, end];
}
function preparePreviousIndexData(cache: FileIndexCache): PreviousIndexData {
const entryPaths = cache.entries.map((entry) => entry.path);
const merkleKeys = Object.keys(cache.merkle).sort();
const stats: StatsMap = { ...cache.stats };
const statsKeys = Object.keys(stats).sort();
return {
entries: cache.entries,
entryPaths,
merkle: cache.merkle,
merkleKeys,
stats,
statsKeys,
};
}
function appendSubtreeEntries(
targetEntries: FileIndexEntry[],
previous: PreviousIndexData,
path: string,
): void {
if (path === "") {
for (const e of previous.entries) targetEntries.push(e);
return;
}
const { entryPaths, entries: previousEntries } = previous;
// Do NOT push the directory's own entry here — the parent loop already did
// that before making the recursive buildDirectory call. Only copy children.
const prefix = `${path}/`;
const [start, end] = findPrefixRange(entryPaths, prefix);
for (let i = start; i < end; i++) {
const entry = previousEntries[i];
if (entry !== undefined) targetEntries.push(entry);
}
}
function copyMerkleSubtree(
previous: PreviousIndexData,
path: string,
target: MerkleMap,
): void {
if (path !== "" && previous.merkle[path]) {
target[path] = previous.merkle[path];
}
const prefix = path === "" ? "" : `${path}/`;
const [start, end] =
prefix === ""
? [0, previous.merkleKeys.length]
: findPrefixRange(previous.merkleKeys, prefix);
for (let i = start; i < end; i++) {
const key = previous.merkleKeys[i];
if (key === undefined) continue;
target[key] = previous.merkle[key] ?? "";
}
}
function copyStatsSubtree(
previous: PreviousIndexData,
path: string,
target: StatsMap,
): void {
if (path !== "" && previous.stats[path]) {
target[path] = previous.stats[path];
}
const prefix = path === "" ? "" : `${path}/`;
const [start, end] =
prefix === ""
? [0, previous.statsKeys.length]
: findPrefixRange(previous.statsKeys, prefix);
for (let i = start; i < end; i++) {
const key = previous.statsKeys[i];
if (key === undefined) continue;
const val = previous.stats[key];
if (val !== undefined) target[key] = val;
}
}
function collectPreviousChildNames(
previous: PreviousIndexData,
path: string,
): Set<string> {
const names = new Set<string>();
const prefix = path === "" ? "" : `${path}/`;
// Use binary search to jump to the relevant range instead of scanning all
// statsKeys. For root (prefix="") every key qualifies so we start at 0;
// for any other path findPrefixRange narrows it to O(log n + k).
const [start, end] =
prefix === ""
? [0, previous.statsKeys.length]
: findPrefixRange(previous.statsKeys, prefix);
for (let i = start; i < end; i++) {
const key = previous.statsKeys[i]!;
const remainder = key.slice(prefix.length);
const slashIndex = remainder.indexOf("/");
const childName =
slashIndex === -1 ? remainder : remainder.slice(0, slashIndex);
if (childName.length > 0) {
names.add(childName);
}
}
return names;
}
function statsMatch(prev: FileStats, current: FsStats): boolean {
if (prev.type === "dir" && !current.isDirectory()) {
return false;
}
if (prev.type === "file" && !current.isFile()) {
return false;
}
if (prev.mtimeMs !== current.mtimeMs || prev.ino !== (current.ino ?? 0)) {
return false;
}
if (prev.type === "file") {
return typeof prev.size === "number" ? prev.size === current.size : true;
}
return true;
}
function shouldReuseDirectory(
previous: PreviousIndexData | undefined,
path: string,
stats: FileStats,
childNames: string[],
childStats: Map<string, FsStats>,
): boolean {
if (!previous) {
return false;
}
const previousStats = previous.stats[path];
if (!previousStats || previousStats.type !== "dir") {
return false;
}
if (
previousStats.mtimeMs !== stats.mtimeMs ||
previousStats.ino !== stats.ino
) {
return false;
}
const previousChildNames = collectPreviousChildNames(previous, path);
const seen = new Set<string>();
for (const childName of childNames) {
const childPath = path === "" ? childName : `${path}/${childName}`;
const prevStats = previous.stats[childPath];
const currentStats = childStats.get(childName);
if (!prevStats || !currentStats) {
return false;
}
if (!statsMatch(prevStats, currentStats)) {
return false;
}
seen.add(childName);
}
if (seen.size !== previousChildNames.size) {
return false;
}
for (const name of previousChildNames) {
if (!seen.has(name)) {
return false;
}
}
return true;
}
async function buildDirectory(
dir: string,
relativePath: string,
entries: FileIndexEntry[],
merkle: MerkleMap,
statsMap: StatsMap,
previous: PreviousIndexData | undefined,
depth: number,
context: BuildContext,
): Promise<string> {
let dirStats: FsStats;
try {
dirStats = statSync(dir);
} catch {
const unreadableHash = hashValue("__unreadable__");
merkle[relativePath] = unreadableHash;
return unreadableHash;
}
const currentStats: FileStats = {
type: "dir",
mtimeMs: dirStats.mtimeMs,
ino: dirStats.ino ?? 0,
};
let dirEntries: string[];
try {
dirEntries = readdirSync(dir);
} catch {
const unreadableHash = hashValue("__unreadable__");
merkle[relativePath] = unreadableHash;
return unreadableHash;
}
const childNames: string[] = [];
const childStatsMap = new Map<string, FsStats>();
for (const entry of dirEntries) {
const entryRelPath =
relativePath === "" ? entry : `${relativePath}/${entry}`;
if (shouldExcludeEntry(entry, entryRelPath)) {
continue;
}
try {
const childStat = statSync(join(dir, entry));
childNames.push(entry);
childStatsMap.set(entry, childStat);
} catch {}
}
if (
previous !== undefined &&
shouldReuseDirectory(
previous,
relativePath,
currentStats,
childNames,
childStatsMap,
)
) {
copyStatsSubtree(previous, relativePath, statsMap);
appendSubtreeEntries(entries, previous, relativePath);
copyMerkleSubtree(previous, relativePath, merkle);
return previous.merkle[relativePath] ?? hashValue("__reused__");
}
statsMap[relativePath] = currentStats;
if (depth >= MAX_INDEX_DEPTH) {
context.truncated = true;
const truncatedHash = hashValue("__truncated__");
merkle[relativePath] = truncatedHash;
return truncatedHash;
}
const childHashes: string[] = [];
for (const entry of childNames) {
// Yield to the event loop every 500 entries to keep the UI responsive
// during the initial walk of large workspaces.
if (context.newEntryCount > 0 && context.newEntryCount % 500 === 0) {
await new Promise<void>((resolve) => setImmediate(resolve));
}
const entryStat = childStatsMap.get(entry);
if (!entryStat) {
continue;
}
const fullPath = join(dir, entry);
const entryPath = relative(process.cwd(), fullPath);
if (!entryPath) {
continue;
}
if (entryStat.isDirectory()) {
entries.push({
path: entryPath,
type: "dir",
lowerPath: entryPath.toLowerCase(),
parent: normalizeParent(entryPath),
});
context.newEntryCount++;
const childHash = await buildDirectory(
fullPath,
entryPath,
entries,
merkle,
statsMap,
previous,
depth + 1,
context,
);
childHashes.push(`dir:${entry}:${childHash}`);
} else {
const fileHash = hashValue(
`${entryPath}:${entryStat.size}:${entryStat.mtimeMs}:${entryStat.ino ?? 0}`,
);
statsMap[entryPath] = {
type: "file",
mtimeMs: entryStat.mtimeMs,
ino: entryStat.ino ?? 0,
size: entryStat.size,
};
merkle[entryPath] = fileHash;
entries.push({
path: entryPath,
type: "file",
lowerPath: entryPath.toLowerCase(),
parent: normalizeParent(entryPath),
});
context.newEntryCount++;
childHashes.push(`file:${entry}:${fileHash}`);
}
}
const dirHash = hashValue(childHashes.sort().join("|"));
merkle[relativePath] = dirHash;
return dirHash;
}
async function buildIndex(
previous?: PreviousIndexData,
): Promise<FileIndexBuildResult> {
const entries: FileIndexEntry[] = [];
const merkle: MerkleMap = {};
const statsMap: StatsMap = {};
const context: BuildContext = { newEntryCount: 0, truncated: false };
const rootHash = await buildDirectory(
process.cwd(),
"",
entries,
merkle,
statsMap,
previous,
0,
context,
);
entries.sort((a, b) => a.path.localeCompare(b.path));
// Deduplicate by path. Duplicates can occur when a dirty cache is reused
// via appendSubtreeEntries — the parent pushed the dir entry, and the cache
// contained it again. This is a one-time cleanup that also writes a clean
// cache to disk so subsequent sessions start fresh.
const seen = new Set<string>();
const deduped = entries.filter((e) => {
if (seen.has(e.path)) return false;
seen.add(e.path);
return true;
});
return {
entries: deduped,
merkle,
stats: statsMap,
rootHash,
truncated: context.truncated,
};
}
function sanitizeWorkspacePath(workspacePath: string): string {
const normalizedPath = normalize(workspacePath);
const strippedPath = normalizedPath.replace(/^[/\\]+/, "");
const sanitized = strippedPath.replace(/[/\\:]/g, "_").replace(/\s+/g, "_");
return sanitized.length === 0 ? "workspace" : sanitized;
}
function getProjectStorageDir(): string {
const homeDir = homedir();
const sanitizedWorkspace = sanitizeWorkspacePath(process.cwd());
return join(homeDir, ".letta", "projects", sanitizedWorkspace);
}
function ensureProjectStorageDir(): string {
const storageDir = getProjectStorageDir();
if (!existsSync(storageDir)) {
mkdirSync(storageDir, { recursive: true });
}
return storageDir;
}
function getProjectIndexPath(): string {
return join(getProjectStorageDir(), PROJECT_INDEX_FILENAME);
}
function loadCachedIndex(): FileIndexCache | null {
const indexPath = getProjectIndexPath();
if (!existsSync(indexPath)) {
return null;
}
try {
const content = readFileSync(indexPath, "utf-8");
const parsed = JSON.parse(content);
if (
parsed &&
parsed.metadata &&
typeof parsed.metadata.rootHash === "string" &&
Array.isArray(parsed.entries) &&
parsed.merkle &&
typeof parsed.merkle === "object"
) {
const merkle: MerkleMap = {};
for (const [key, value] of Object.entries(parsed.merkle)) {
if (typeof value === "string") {
merkle[key] = value;
}
}
const stats: StatsMap = {};
if (parsed.stats && typeof parsed.stats === "object") {
for (const [path, rawStats] of Object.entries(parsed.stats)) {
const sv = rawStats as Record<string, unknown>;
if (
sv &&
typeof sv["mtimeMs"] === "number" &&
typeof sv["ino"] === "number" &&
(sv["type"] === "file" || sv["type"] === "dir")
) {
stats[path] = {
type: sv["type"] as "file" | "dir",
mtimeMs: sv["mtimeMs"],
ino: sv["ino"],
};
}
}
}
return {
metadata: {
rootHash: parsed.metadata.rootHash,
},
entries: parsed.entries,
merkle,
stats,
};
}
} catch {
// Ignore parse errors
}
return null;
}
function cacheProjectIndex(result: FileIndexBuildResult): void {
try {
const storageDir = ensureProjectStorageDir();
const indexPath = join(storageDir, PROJECT_INDEX_FILENAME);
const payload: FileIndexCache = {
metadata: {
rootHash: result.rootHash,
},
entries: result.entries,
merkle: result.merkle,
stats: result.stats,
};
writeFileSync(indexPath, JSON.stringify(payload, null, 2), "utf-8");
} catch {
// Silently ignore persistence errors to avoid breaking search.
}
}
/**
* Build the in-memory search cache from a full entries list.
* Sorts dirs first, then by mtime descending (most recently modified files
* appear first in results), and caps at MAX_CACHE_ENTRIES.
*
* NOTE: buildIndex keeps entries sorted by path — that ordering is load-bearing
* for the binary searches in appendSubtreeEntries/findPrefixRange. This helper
* produces a separate mtime-sorted copy only for the in-memory search cache.
*/
function buildCachedEntries(
entries: FileIndexEntry[],
stats: StatsMap,
): { entries: FileIndexEntry[]; paths: Set<string> } {
const sorted = [...entries]
.sort((a, b) => {
if (a.type === "dir" && b.type !== "dir") return -1;
if (a.type !== "dir" && b.type === "dir") return 1;
const aMtime = stats[a.path]?.mtimeMs ?? 0;
const bMtime = stats[b.path]?.mtimeMs ?? 0;
return bMtime - aMtime;
})
.slice(0, MAX_CACHE_ENTRIES);
return { entries: sorted, paths: new Set(sorted.map((e) => e.path)) };
}
/**
* Ensure the file index is built at least once per session.
*/
export function ensureFileIndex(): Promise<void> {
if (hasCompletedBuild) return Promise.resolve();
if (!buildPromise) {
let currentPromise!: Promise<void>;
currentPromise = (async () => {
let succeeded = false;
try {
const diskIndex = loadCachedIndex();
const previousData = diskIndex
? preparePreviousIndexData(diskIndex)
: undefined;
const buildResult = await buildIndex(previousData);
if (diskIndex && diskIndex.metadata.rootHash === buildResult.rootHash) {
({ entries: cachedEntries, paths: cachedEntryPaths } =
buildCachedEntries(buildResult.entries, buildResult.stats));
succeeded = true;
return;
}
if (buildResult.truncated) {
debugLog(
"file-index",
`Index truncated: workspace exceeds ${MAX_INDEX_DEPTH} directory levels deep. ` +
`Files beyond that depth will fall back to disk search.`,
);
}
cacheProjectIndex(buildResult);
({ entries: cachedEntries, paths: cachedEntryPaths } =
buildCachedEntries(buildResult.entries, buildResult.stats));
succeeded = true;
} finally {
// Only clear buildPromise if it's still ours — refreshFileIndex may
// have already replaced it with a newer promise.
if (buildPromise === currentPromise) buildPromise = null;
if (succeeded) hasCompletedBuild = true;
}
})();
buildPromise = currentPromise;
}
return buildPromise;
}
export function refreshFileIndex(): Promise<void> {
hasCompletedBuild = false;
buildPromise = null;
return ensureFileIndex();
}
/**
* Add newly discovered entries to the in-memory cache without a full rebuild.
* Called when a disk scan finds files that weren't in the index (e.g. created
* externally). Skips paths that are already cached.
*
* The initial build has priority — it fills the cache up to MAX_CACHE_ENTRIES
* with the most recently modified files. Disk scan hits fill any remaining
* space. Once the cap is reached, new entries are not added until the next
* rebuild; the disk scan will still find them on demand.
*/
export function addEntriesToCache(matches: FileMatch[]): void {
const available = MAX_CACHE_ENTRIES - cachedEntries.length;
if (available <= 0) return;
let added = 0;
for (const match of matches) {
if (added >= available) break;
if (!cachedEntryPaths.has(match.path)) {
cachedEntries.push({
path: match.path,
type: match.type,
lowerPath: match.path.toLowerCase(),
parent: normalizeParent(match.path),
});
cachedEntryPaths.add(match.path);
added++;
}
}
}
export function searchFileIndex(options: SearchFileIndexOptions): FileMatch[] {
const { searchDir, pattern, deep, maxResults } = options;
const normalizedDir = searchDir === "." ? "" : searchDir;
const dirWithSep = normalizedDir === "" ? "" : `${normalizedDir}${sep}`;
const lowerPattern = pattern.toLowerCase();
const results: FileMatch[] = [];
for (const entry of cachedEntries) {
if (normalizedDir) {
if (entry.path !== normalizedDir && !entry.path.startsWith(dirWithSep)) {
continue;
}
}
if (!deep && entry.parent !== normalizedDir) {
continue;
}
if (lowerPattern && !entry.lowerPath.includes(lowerPattern)) {
continue;
}
results.push({ path: entry.path, type: entry.type });
if (results.length >= maxResults) {
break;
}
}
return results;
}

View File

@@ -1,52 +1,13 @@
import { readdirSync, statSync } from "node:fs";
import { join, resolve } from "node:path";
import { join, relative, resolve } from "node:path";
import { debugLog } from "../../utils/debug";
interface FileMatch {
path: string;
type: "file" | "dir" | "url";
}
/**
* Directories to exclude from file search autocomplete.
* These are common dependency/build directories that cause lag when searched.
* All values are lowercase for case-insensitive matching (Windows compatibility).
*/
const IGNORED_DIRECTORIES = new Set([
// JavaScript/Node
"node_modules",
"dist",
"build",
".next",
".nuxt",
"bower_components",
// Python
"venv",
".venv",
"__pycache__",
".tox",
"env",
// Build outputs
"target", // Rust/Maven/Java
"out",
"coverage",
".cache",
]);
/**
* Check if a directory entry should be excluded from search results.
* Uses case-insensitive matching for Windows compatibility.
*/
function shouldExcludeEntry(entry: string): boolean {
// Skip hidden files/directories (starts with .)
if (entry.startsWith(".")) {
return true;
}
// Case-insensitive check for Windows compatibility
return IGNORED_DIRECTORIES.has(entry.toLowerCase());
}
import {
addEntriesToCache,
ensureFileIndex,
type FileMatch,
searchFileIndex,
} from "./fileIndex";
import { shouldHardExcludeEntry } from "./fileSearchConfig";
export function debounce<T extends (...args: never[]) => unknown>(
func: T,
@@ -75,6 +36,7 @@ function searchDirectoryRecursive(
results: FileMatch[] = [],
depth: number = 0,
maxDepth: number = 10,
lowerPattern: string = pattern.toLowerCase(),
): FileMatch[] {
if (results.length >= maxResults || depth >= maxDepth) {
return results;
@@ -84,23 +46,20 @@ function searchDirectoryRecursive(
const entries = readdirSync(dir);
for (const entry of entries) {
// Skip hidden files and common dependency/build directories
if (shouldExcludeEntry(entry)) {
continue;
}
try {
const fullPath = join(dir, entry);
const stats = statSync(fullPath);
const relativePath = relative(process.cwd(), fullPath);
const relativePath = fullPath.startsWith(process.cwd())
? fullPath.slice(process.cwd().length + 1)
: fullPath;
if (shouldHardExcludeEntry(entry)) {
continue;
}
const stats = statSync(fullPath);
// Check if entry matches the pattern (match against full relative path for partial path support)
const matches =
pattern.length === 0 ||
relativePath.toLowerCase().includes(pattern.toLowerCase());
relativePath.toLowerCase().includes(lowerPattern);
if (matches) {
results.push({
@@ -122,6 +81,7 @@ function searchDirectoryRecursive(
results,
depth + 1,
maxDepth,
lowerPattern,
);
}
} catch {}
@@ -182,68 +142,109 @@ export async function searchFiles(
// Use shallow search to avoid recursively walking the entire subtree.
const effectiveDeep = deep && searchPattern.length > 0;
if (effectiveDeep) {
// Deep search: recursively search subdirectories
// Use a shallower depth limit when searching outside the project directory
// to avoid walking massive sibling directory trees
const isOutsideCwd = !searchDir.startsWith(process.cwd());
const maxDepth = isOutsideCwd ? 3 : 10;
const deepResults = searchDirectoryRecursive(
searchDir,
searchPattern,
200,
[],
0,
maxDepth,
);
results.push(...deepResults);
} else {
// Shallow search: only current directory
let entries: string[] = [];
const relativeSearchDir = relative(process.cwd(), searchDir);
const normalizedSearchDir =
relativeSearchDir === "." ? "" : relativeSearchDir;
const insideWorkspace =
normalizedSearchDir === "" || !normalizedSearchDir.startsWith("..");
let indexSearchSucceeded = false;
if (insideWorkspace) {
try {
entries = readdirSync(searchDir);
} catch {
// Directory doesn't exist or can't be read
return [];
}
// Filter entries matching the search pattern
// If pattern is empty, show all entries (for when user just types "@")
// Also exclude common dependency/build directories
const matchingEntries = entries
.filter((entry) => !shouldExcludeEntry(entry))
.filter(
(entry) =>
searchPattern.length === 0 ||
entry.toLowerCase().includes(searchPattern.toLowerCase()),
await ensureFileIndex();
results.push(
...searchFileIndex({
searchDir: normalizedSearchDir,
pattern: searchPattern,
deep: effectiveDeep,
maxResults: effectiveDeep ? 200 : 50,
}),
);
indexSearchSucceeded = true;
} catch (error) {
debugLog(
"file-search",
"Indexed search failed, falling back to disk scan: %O",
error,
);
// Get stats for each matching entry
for (const entry of matchingEntries.slice(0, 50)) {
// Limit to 50 results
try {
const fullPath = join(searchDir, entry);
const stats = statSync(fullPath);
// Make path relative to cwd if possible
const relativePath = fullPath.startsWith(process.cwd())
? fullPath.slice(process.cwd().length + 1)
: fullPath;
results.push({
path: relativePath,
type: stats.isDirectory() ? "dir" : "file",
});
} catch {}
}
}
// Sort: directories first, then files, alphabetically within each group
results.sort((a, b) => {
if (a.type === "dir" && b.type !== "dir") return -1;
if (a.type !== "dir" && b.type === "dir") return 1;
return a.path.localeCompare(b.path);
});
if (!indexSearchSucceeded || results.length === 0) {
const diskResultsBefore = results.length;
if (effectiveDeep) {
// Deep search: recursively search subdirectories.
// Use a shallower depth limit when searching outside the project directory
// to avoid walking massive sibling directory trees.
const isOutsideCwd = normalizedSearchDir.startsWith("..");
const maxDepth = isOutsideCwd ? 3 : 10;
const deepResults = searchDirectoryRecursive(
searchDir,
searchPattern,
200,
[],
0,
maxDepth,
);
results.push(...deepResults);
} else {
// Shallow search: only one level, regardless of workspace location.
let entries: string[] = [];
try {
entries = readdirSync(searchDir);
} catch {
// Directory doesn't exist or can't be read
return [];
}
// Filter entries matching the search pattern.
// If pattern is empty, show all entries (for when user just types \"@\").
// Also exclude common dependency/build directories.
const lowerPattern = searchPattern.toLowerCase();
const matchingEntries = entries.filter(
(entry) =>
!shouldHardExcludeEntry(entry) &&
(searchPattern.length === 0 ||
entry.toLowerCase().includes(lowerPattern)),
);
// Get stats for each matching entry
for (const entry of matchingEntries.slice(0, 50)) {
// Limit to 50 results
try {
const fullPath = join(searchDir, entry);
const stats = statSync(fullPath);
const relativePath = relative(process.cwd(), fullPath);
results.push({
path: relativePath,
type: stats.isDirectory() ? "dir" : "file",
});
} catch {}
}
}
// If the index was working but just didn't have these files (created
// externally), add the newly found entries so future searches hit the
// cache instead of falling back to disk again.
if (indexSearchSucceeded && results.length > diskResultsBefore) {
addEntriesToCache(results.slice(diskResultsBefore));
}
}
// Only sort when the disk scan ran — its results come in arbitrary readdir
// order so we normalise to dirs-first alphabetical. When the index search
// succeeded the results already come out in mtime order (most recently
// modified first) from buildCachedEntries, so we leave that order intact.
if (!indexSearchSucceeded) {
results.sort((a, b) => {
if (a.type === "dir" && b.type !== "dir") return -1;
if (a.type !== "dir" && b.type === "dir") return 1;
return a.path.localeCompare(b.path);
});
}
} catch (error) {
// Return empty array on any error
debugLog("file-search", "File search error: %O", error);

View File

@@ -0,0 +1,104 @@
import picomatch from "picomatch";
import {
ensureLettaIgnoreFile,
readLettaIgnorePatterns,
} from "./ignoredDirectories";
/**
* Hardcoded defaults — always excluded from both the file index and disk scans.
* These cover the most common build/dependency directories across ecosystems.
* Matched case-insensitively against the entry name.
*/
const DEFAULT_EXCLUDED = new Set([
// JavaScript / Node
"node_modules",
"bower_components",
// Build outputs
"dist",
"build",
"out",
"coverage",
// Frameworks
".next",
".nuxt",
// Python
"venv",
".venv",
"__pycache__",
".tox",
// Rust / Maven / Java
"target",
// Version control & tooling
".git",
".cache",
]);
/**
* Pre-compiled matchers from .lettaignore, split by whether the pattern
* is name-based (no slash → match against entry name) or path-based
* (contains slash → match against the full relative path).
* Compiled once at module load for performance.
*/
const { nameMatchers, pathMatchers } = (() => {
// Create .lettaignore with defaults if the project doesn't have one yet.
// Must run before readLettaIgnorePatterns() so the file exists when we read it.
ensureLettaIgnoreFile();
const patterns = readLettaIgnorePatterns();
const nameMatchers: picomatch.Matcher[] = [];
const pathMatchers: picomatch.Matcher[] = [];
for (const raw of patterns) {
const normalized = raw.replace(/\/$/, ""); // strip trailing slash
if (normalized.includes("/")) {
pathMatchers.push(picomatch(normalized, { dot: true }));
} else {
nameMatchers.push(picomatch(normalized, { dot: true }));
}
}
return { nameMatchers, pathMatchers };
})();
/**
* Returns true if the given entry should be excluded from the file index.
* Applies both the hardcoded defaults and any .lettaignore patterns.
*
* Use this when building the index — .lettaignore controls what gets cached,
* not what the user can ever find. For disk scan fallback paths, use
* shouldHardExcludeEntry() so .lettaignore-matched files remain discoverable.
*
* @param name - The entry's basename (e.g. "node_modules", ".env")
* @param relativePath - Optional path relative to cwd (e.g. "src/generated/foo.ts").
* Required for path-based .lettaignore patterns to work.
*/
export function shouldExcludeEntry(
name: string,
relativePath?: string,
): boolean {
// Fast path: hardcoded defaults (O(1) Set lookup)
if (DEFAULT_EXCLUDED.has(name.toLowerCase())) return true;
// Name-based .lettaignore patterns (e.g. *.log, vendor)
if (nameMatchers.length > 0 && nameMatchers.some((m) => m(name))) return true;
// Path-based .lettaignore patterns (e.g. src/generated/**)
if (
relativePath &&
pathMatchers.length > 0 &&
pathMatchers.some((m) => m(relativePath))
)
return true;
return false;
}
/**
* Returns true if the given entry should be excluded from disk scan fallbacks.
* Only applies the hardcoded defaults — .lettaignore patterns are intentionally
* skipped here so users can still find those files with an explicit @ search.
*
* @param name - The entry's basename (e.g. "node_modules", "dist")
*/
export function shouldHardExcludeEntry(name: string): boolean {
return DEFAULT_EXCLUDED.has(name.toLowerCase());
}

View File

@@ -0,0 +1,80 @@
import { existsSync, readFileSync, writeFileSync } from "node:fs";
import { join } from "node:path";
const DEFAULT_LETTAIGNORE = `\
# .lettaignore — Letta Code file index exclusions
#
# Files and directories matching these patterns are excluded from the @ file
# search index (cache). They won't appear in autocomplete results by default,
# but can still be found if you type their path explicitly.
#
# Syntax: one pattern per line, supports globs (e.g. *.log, src/generated/**)
# Lines starting with # are comments.
#
# The following are always excluded (even from explicit search) and do not need
# to be listed here:
# node_modules dist build out coverage target bower_components
# .git .cache .next .nuxt venv .venv __pycache__ .tox
# Lock files
package-lock.json
yarn.lock
pnpm-lock.yaml
poetry.lock
Cargo.lock
# Logs
*.log
# OS artifacts
.DS_Store
Thumbs.db
`;
/**
* Create a .lettaignore file in the project root with sensible defaults
* if one does not already exist. Safe to call multiple times.
*/
export function ensureLettaIgnoreFile(cwd: string = process.cwd()): void {
const filePath = join(cwd, ".lettaignore");
if (existsSync(filePath)) return;
try {
writeFileSync(filePath, DEFAULT_LETTAIGNORE, "utf-8");
} catch {
// If we can't write (e.g. read-only fs), silently skip — the
// hardcoded defaults in fileSearchConfig.ts still apply.
}
}
/**
* Read glob patterns from a .lettaignore file in the given directory.
* Returns an empty array if the file is missing or unreadable.
*
* Syntax:
* - One pattern per line (supports globs: *.log, src/generated/**)
* - Lines starting with # are comments
* - Negations (!) are not currently supported and are silently skipped
* - A trailing / is treated as a directory hint and stripped before matching
*/
export function readLettaIgnorePatterns(cwd: string = process.cwd()): string[] {
const filePath = join(cwd, ".lettaignore");
if (!existsSync(filePath)) return [];
try {
const content = readFileSync(filePath, "utf-8");
return parseLettaIgnore(content);
} catch {
return [];
}
}
function parseLettaIgnore(content: string): string[] {
return content
.split("\n")
.map((line) => line.trim())
.filter(
(line) =>
line.length > 0 && !line.startsWith("#") && !line.startsWith("!"),
);
}