feat: file indexing (#1352)

2026-03-11 14:58:30 -07:00
parent 0a07bf7343
commit 0a5e3d6132
12 changed files with 1849 additions and 114 deletions
--- a/src/cli/helpers/fileIndex.ts
+++ b/src/cli/helpers/fileIndex.ts
@@ -0,0 +1,747 @@
+import { createHash } from "node:crypto";
+import type { Stats as FsStats } from "node:fs";
+import {
+  existsSync,
+  mkdirSync,
+  readdirSync,
+  readFileSync,
+  statSync,
+  writeFileSync,
+} from "node:fs";
+import { homedir } from "node:os";
+import { join, normalize, relative, sep } from "node:path";
+import { debugLog } from "../../utils/debug";
+import { readIntSetting } from "../../utils/lettaSettings";
+import { shouldExcludeEntry } from "./fileSearchConfig";
+
+interface FileIndexEntry {
+  path: string;
+  type: "file" | "dir";
+  lowerPath: string;
+  parent: string;
+}
+
+interface SearchFileIndexOptions {
+  searchDir: string;
+  pattern: string;
+  deep: boolean;
+  maxResults: number;
+}
+
+interface FileStats {
+  type: "file" | "dir";
+  mtimeMs: number;
+  ino: number;
+  size?: number;
+}
+
+type StatsMap = Record<string, FileStats>;
+type MerkleMap = Record<string, string>;
+
+export interface FileMatch {
+  path: string;
+  type: "file" | "dir";
+}
+
+const MAX_INDEX_DEPTH = 12;
+const PROJECT_INDEX_FILENAME = "file-index.json";
+
+// Read from ~/.letta/.lettasettings (MAX_ENTRIES), falling back to 50 000.
+// The file is auto-created with comments on first run so users can find it.
+const MAX_CACHE_ENTRIES = readIntSetting("MAX_ENTRIES", 50_000);
+
+let cachedEntries: FileIndexEntry[] = [];
+// Kept in sync with cachedEntries for O(1) membership checks in addEntriesToCache.
+let cachedEntryPaths = new Set<string>();
+let buildPromise: Promise<void> | null = null;
+let hasCompletedBuild = false;
+
+interface FileIndexCache {
+  metadata: {
+    rootHash: string;
+  };
+  entries: FileIndexEntry[];
+  merkle: MerkleMap;
+  stats: StatsMap;
+}
+
+interface PreviousIndexData {
+  entries: FileIndexEntry[];
+  entryPaths: string[];
+  merkle: MerkleMap;
+  merkleKeys: string[];
+  stats: StatsMap;
+  statsKeys: string[];
+}
+
+interface BuildContext {
+  newEntryCount: number;
+  truncated: boolean;
+}
+
+interface FileIndexBuildResult {
+  entries: FileIndexEntry[];
+  merkle: MerkleMap;
+  stats: StatsMap;
+  rootHash: string;
+  truncated: boolean;
+}
+
+function normalizeParent(relativePath: string): string {
+  if (relativePath.length === 0) {
+    return "";
+  }
+  const lastSepIndex = relativePath.lastIndexOf(sep);
+  return lastSepIndex === -1 ? "" : relativePath.slice(0, lastSepIndex);
+}
+
+function hashValue(input: string): string {
+  return createHash("sha256").update(input).digest("hex");
+}
+
+function lowerBound(sorted: string[], target: string): number {
+  let low = 0;
+  let high = sorted.length;
+
+  while (low < high) {
+    const mid = (low + high) >> 1;
+    if ((sorted[mid] ?? "") < target) {
+      low = mid + 1;
+    } else {
+      high = mid;
+    }
+  }
+
+  return low;
+}
+
+function findPrefixRange(sorted: string[], prefix: string): [number, number] {
+  const start = lowerBound(sorted, prefix);
+  let end = start;
+
+  while (end < sorted.length && sorted[end]!.startsWith(prefix)) {
+    end++;
+  }
+
+  return [start, end];
+}
+
+function preparePreviousIndexData(cache: FileIndexCache): PreviousIndexData {
+  const entryPaths = cache.entries.map((entry) => entry.path);
+  const merkleKeys = Object.keys(cache.merkle).sort();
+  const stats: StatsMap = { ...cache.stats };
+  const statsKeys = Object.keys(stats).sort();
+
+  return {
+    entries: cache.entries,
+    entryPaths,
+    merkle: cache.merkle,
+    merkleKeys,
+    stats,
+    statsKeys,
+  };
+}
+
+function appendSubtreeEntries(
+  targetEntries: FileIndexEntry[],
+  previous: PreviousIndexData,
+  path: string,
+): void {
+  if (path === "") {
+    for (const e of previous.entries) targetEntries.push(e);
+    return;
+  }
+
+  const { entryPaths, entries: previousEntries } = previous;
+  // Do NOT push the directory's own entry here — the parent loop already did
+  // that before making the recursive buildDirectory call. Only copy children.
+  const prefix = `${path}/`;
+  const [start, end] = findPrefixRange(entryPaths, prefix);
+
+  for (let i = start; i < end; i++) {
+    const entry = previousEntries[i];
+    if (entry !== undefined) targetEntries.push(entry);
+  }
+}
+
+function copyMerkleSubtree(
+  previous: PreviousIndexData,
+  path: string,
+  target: MerkleMap,
+): void {
+  if (path !== "" && previous.merkle[path]) {
+    target[path] = previous.merkle[path];
+  }
+
+  const prefix = path === "" ? "" : `${path}/`;
+  const [start, end] =
+    prefix === ""
+      ? [0, previous.merkleKeys.length]
+      : findPrefixRange(previous.merkleKeys, prefix);
+
+  for (let i = start; i < end; i++) {
+    const key = previous.merkleKeys[i];
+    if (key === undefined) continue;
+    target[key] = previous.merkle[key] ?? "";
+  }
+}
+
+function copyStatsSubtree(
+  previous: PreviousIndexData,
+  path: string,
+  target: StatsMap,
+): void {
+  if (path !== "" && previous.stats[path]) {
+    target[path] = previous.stats[path];
+  }
+
+  const prefix = path === "" ? "" : `${path}/`;
+  const [start, end] =
+    prefix === ""
+      ? [0, previous.statsKeys.length]
+      : findPrefixRange(previous.statsKeys, prefix);
+
+  for (let i = start; i < end; i++) {
+    const key = previous.statsKeys[i];
+    if (key === undefined) continue;
+    const val = previous.stats[key];
+    if (val !== undefined) target[key] = val;
+  }
+}
+
+function collectPreviousChildNames(
+  previous: PreviousIndexData,
+  path: string,
+): Set<string> {
+  const names = new Set<string>();
+  const prefix = path === "" ? "" : `${path}/`;
+
+  // Use binary search to jump to the relevant range instead of scanning all
+  // statsKeys. For root (prefix="") every key qualifies so we start at 0;
+  // for any other path findPrefixRange narrows it to O(log n + k).
+  const [start, end] =
+    prefix === ""
+      ? [0, previous.statsKeys.length]
+      : findPrefixRange(previous.statsKeys, prefix);
+
+  for (let i = start; i < end; i++) {
+    const key = previous.statsKeys[i]!;
+    const remainder = key.slice(prefix.length);
+    const slashIndex = remainder.indexOf("/");
+    const childName =
+      slashIndex === -1 ? remainder : remainder.slice(0, slashIndex);
+    if (childName.length > 0) {
+      names.add(childName);
+    }
+  }
+
+  return names;
+}
+
+function statsMatch(prev: FileStats, current: FsStats): boolean {
+  if (prev.type === "dir" && !current.isDirectory()) {
+    return false;
+  }
+
+  if (prev.type === "file" && !current.isFile()) {
+    return false;
+  }
+
+  if (prev.mtimeMs !== current.mtimeMs || prev.ino !== (current.ino ?? 0)) {
+    return false;
+  }
+
+  if (prev.type === "file") {
+    return typeof prev.size === "number" ? prev.size === current.size : true;
+  }
+
+  return true;
+}
+
+function shouldReuseDirectory(
+  previous: PreviousIndexData | undefined,
+  path: string,
+  stats: FileStats,
+  childNames: string[],
+  childStats: Map<string, FsStats>,
+): boolean {
+  if (!previous) {
+    return false;
+  }
+
+  const previousStats = previous.stats[path];
+
+  if (!previousStats || previousStats.type !== "dir") {
+    return false;
+  }
+
+  if (
+    previousStats.mtimeMs !== stats.mtimeMs ||
+    previousStats.ino !== stats.ino
+  ) {
+    return false;
+  }
+
+  const previousChildNames = collectPreviousChildNames(previous, path);
+  const seen = new Set<string>();
+
+  for (const childName of childNames) {
+    const childPath = path === "" ? childName : `${path}/${childName}`;
+    const prevStats = previous.stats[childPath];
+    const currentStats = childStats.get(childName);
+
+    if (!prevStats || !currentStats) {
+      return false;
+    }
+
+    if (!statsMatch(prevStats, currentStats)) {
+      return false;
+    }
+
+    seen.add(childName);
+  }
+
+  if (seen.size !== previousChildNames.size) {
+    return false;
+  }
+
+  for (const name of previousChildNames) {
+    if (!seen.has(name)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+async function buildDirectory(
+  dir: string,
+  relativePath: string,
+  entries: FileIndexEntry[],
+  merkle: MerkleMap,
+  statsMap: StatsMap,
+  previous: PreviousIndexData | undefined,
+  depth: number,
+  context: BuildContext,
+): Promise<string> {
+  let dirStats: FsStats;
+
+  try {
+    dirStats = statSync(dir);
+  } catch {
+    const unreadableHash = hashValue("__unreadable__");
+    merkle[relativePath] = unreadableHash;
+    return unreadableHash;
+  }
+
+  const currentStats: FileStats = {
+    type: "dir",
+    mtimeMs: dirStats.mtimeMs,
+    ino: dirStats.ino ?? 0,
+  };
+
+  let dirEntries: string[];
+  try {
+    dirEntries = readdirSync(dir);
+  } catch {
+    const unreadableHash = hashValue("__unreadable__");
+    merkle[relativePath] = unreadableHash;
+    return unreadableHash;
+  }
+
+  const childNames: string[] = [];
+  const childStatsMap = new Map<string, FsStats>();
+
+  for (const entry of dirEntries) {
+    const entryRelPath =
+      relativePath === "" ? entry : `${relativePath}/${entry}`;
+    if (shouldExcludeEntry(entry, entryRelPath)) {
+      continue;
+    }
+
+    try {
+      const childStat = statSync(join(dir, entry));
+      childNames.push(entry);
+      childStatsMap.set(entry, childStat);
+    } catch {}
+  }
+
+  if (
+    previous !== undefined &&
+    shouldReuseDirectory(
+      previous,
+      relativePath,
+      currentStats,
+      childNames,
+      childStatsMap,
+    )
+  ) {
+    copyStatsSubtree(previous, relativePath, statsMap);
+    appendSubtreeEntries(entries, previous, relativePath);
+    copyMerkleSubtree(previous, relativePath, merkle);
+    return previous.merkle[relativePath] ?? hashValue("__reused__");
+  }
+
+  statsMap[relativePath] = currentStats;
+
+  if (depth >= MAX_INDEX_DEPTH) {
+    context.truncated = true;
+    const truncatedHash = hashValue("__truncated__");
+    merkle[relativePath] = truncatedHash;
+    return truncatedHash;
+  }
+
+  const childHashes: string[] = [];
+
+  for (const entry of childNames) {
+    // Yield to the event loop every 500 entries to keep the UI responsive
+    // during the initial walk of large workspaces.
+    if (context.newEntryCount > 0 && context.newEntryCount % 500 === 0) {
+      await new Promise<void>((resolve) => setImmediate(resolve));
+    }
+
+    const entryStat = childStatsMap.get(entry);
+    if (!entryStat) {
+      continue;
+    }
+
+    const fullPath = join(dir, entry);
+    const entryPath = relative(process.cwd(), fullPath);
+
+    if (!entryPath) {
+      continue;
+    }
+
+    if (entryStat.isDirectory()) {
+      entries.push({
+        path: entryPath,
+        type: "dir",
+        lowerPath: entryPath.toLowerCase(),
+        parent: normalizeParent(entryPath),
+      });
+      context.newEntryCount++;
+
+      const childHash = await buildDirectory(
+        fullPath,
+        entryPath,
+        entries,
+        merkle,
+        statsMap,
+        previous,
+        depth + 1,
+        context,
+      );
+
+      childHashes.push(`dir:${entry}:${childHash}`);
+    } else {
+      const fileHash = hashValue(
+        `${entryPath}:${entryStat.size}:${entryStat.mtimeMs}:${entryStat.ino ?? 0}`,
+      );
+
+      statsMap[entryPath] = {
+        type: "file",
+        mtimeMs: entryStat.mtimeMs,
+        ino: entryStat.ino ?? 0,
+        size: entryStat.size,
+      };
+
+      merkle[entryPath] = fileHash;
+      entries.push({
+        path: entryPath,
+        type: "file",
+        lowerPath: entryPath.toLowerCase(),
+        parent: normalizeParent(entryPath),
+      });
+      context.newEntryCount++;
+      childHashes.push(`file:${entry}:${fileHash}`);
+    }
+  }
+
+  const dirHash = hashValue(childHashes.sort().join("|"));
+  merkle[relativePath] = dirHash;
+  return dirHash;
+}
+
+async function buildIndex(
+  previous?: PreviousIndexData,
+): Promise<FileIndexBuildResult> {
+  const entries: FileIndexEntry[] = [];
+  const merkle: MerkleMap = {};
+  const statsMap: StatsMap = {};
+  const context: BuildContext = { newEntryCount: 0, truncated: false };
+  const rootHash = await buildDirectory(
+    process.cwd(),
+    "",
+    entries,
+    merkle,
+    statsMap,
+    previous,
+    0,
+    context,
+  );
+
+  entries.sort((a, b) => a.path.localeCompare(b.path));
+
+  // Deduplicate by path. Duplicates can occur when a dirty cache is reused
+  // via appendSubtreeEntries — the parent pushed the dir entry, and the cache
+  // contained it again. This is a one-time cleanup that also writes a clean
+  // cache to disk so subsequent sessions start fresh.
+  const seen = new Set<string>();
+  const deduped = entries.filter((e) => {
+    if (seen.has(e.path)) return false;
+    seen.add(e.path);
+    return true;
+  });
+
+  return {
+    entries: deduped,
+    merkle,
+    stats: statsMap,
+    rootHash,
+    truncated: context.truncated,
+  };
+}
+
+function sanitizeWorkspacePath(workspacePath: string): string {
+  const normalizedPath = normalize(workspacePath);
+  const strippedPath = normalizedPath.replace(/^[/\\]+/, "");
+  const sanitized = strippedPath.replace(/[/\\:]/g, "_").replace(/\s+/g, "_");
+
+  return sanitized.length === 0 ? "workspace" : sanitized;
+}
+
+function getProjectStorageDir(): string {
+  const homeDir = homedir();
+  const sanitizedWorkspace = sanitizeWorkspacePath(process.cwd());
+  return join(homeDir, ".letta", "projects", sanitizedWorkspace);
+}
+
+function ensureProjectStorageDir(): string {
+  const storageDir = getProjectStorageDir();
+  if (!existsSync(storageDir)) {
+    mkdirSync(storageDir, { recursive: true });
+  }
+  return storageDir;
+}
+
+function getProjectIndexPath(): string {
+  return join(getProjectStorageDir(), PROJECT_INDEX_FILENAME);
+}
+
+function loadCachedIndex(): FileIndexCache | null {
+  const indexPath = getProjectIndexPath();
+  if (!existsSync(indexPath)) {
+    return null;
+  }
+
+  try {
+    const content = readFileSync(indexPath, "utf-8");
+    const parsed = JSON.parse(content);
+
+    if (
+      parsed &&
+      parsed.metadata &&
+      typeof parsed.metadata.rootHash === "string" &&
+      Array.isArray(parsed.entries) &&
+      parsed.merkle &&
+      typeof parsed.merkle === "object"
+    ) {
+      const merkle: MerkleMap = {};
+      for (const [key, value] of Object.entries(parsed.merkle)) {
+        if (typeof value === "string") {
+          merkle[key] = value;
+        }
+      }
+
+      const stats: StatsMap = {};
+      if (parsed.stats && typeof parsed.stats === "object") {
+        for (const [path, rawStats] of Object.entries(parsed.stats)) {
+          const sv = rawStats as Record<string, unknown>;
+          if (
+            sv &&
+            typeof sv["mtimeMs"] === "number" &&
+            typeof sv["ino"] === "number" &&
+            (sv["type"] === "file" || sv["type"] === "dir")
+          ) {
+            stats[path] = {
+              type: sv["type"] as "file" | "dir",
+              mtimeMs: sv["mtimeMs"],
+              ino: sv["ino"],
+            };
+          }
+        }
+      }
+
+      return {
+        metadata: {
+          rootHash: parsed.metadata.rootHash,
+        },
+        entries: parsed.entries,
+        merkle,
+        stats,
+      };
+    }
+  } catch {
+    // Ignore parse errors
+  }
+
+  return null;
+}
+
+function cacheProjectIndex(result: FileIndexBuildResult): void {
+  try {
+    const storageDir = ensureProjectStorageDir();
+    const indexPath = join(storageDir, PROJECT_INDEX_FILENAME);
+    const payload: FileIndexCache = {
+      metadata: {
+        rootHash: result.rootHash,
+      },
+      entries: result.entries,
+      merkle: result.merkle,
+      stats: result.stats,
+    };
+    writeFileSync(indexPath, JSON.stringify(payload, null, 2), "utf-8");
+  } catch {
+    // Silently ignore persistence errors to avoid breaking search.
+  }
+}
+
+/**
+ * Build the in-memory search cache from a full entries list.
+ * Sorts dirs first, then by mtime descending (most recently modified files
+ * appear first in results), and caps at MAX_CACHE_ENTRIES.
+ *
+ * NOTE: buildIndex keeps entries sorted by path — that ordering is load-bearing
+ * for the binary searches in appendSubtreeEntries/findPrefixRange. This helper
+ * produces a separate mtime-sorted copy only for the in-memory search cache.
+ */
+function buildCachedEntries(
+  entries: FileIndexEntry[],
+  stats: StatsMap,
+): { entries: FileIndexEntry[]; paths: Set<string> } {
+  const sorted = [...entries]
+    .sort((a, b) => {
+      if (a.type === "dir" && b.type !== "dir") return -1;
+      if (a.type !== "dir" && b.type === "dir") return 1;
+      const aMtime = stats[a.path]?.mtimeMs ?? 0;
+      const bMtime = stats[b.path]?.mtimeMs ?? 0;
+      return bMtime - aMtime;
+    })
+    .slice(0, MAX_CACHE_ENTRIES);
+  return { entries: sorted, paths: new Set(sorted.map((e) => e.path)) };
+}
+
+/**
+ * Ensure the file index is built at least once per session.
+ */
+export function ensureFileIndex(): Promise<void> {
+  if (hasCompletedBuild) return Promise.resolve();
+  if (!buildPromise) {
+    let currentPromise!: Promise<void>;
+    currentPromise = (async () => {
+      let succeeded = false;
+      try {
+        const diskIndex = loadCachedIndex();
+        const previousData = diskIndex
+          ? preparePreviousIndexData(diskIndex)
+          : undefined;
+        const buildResult = await buildIndex(previousData);
+
+        if (diskIndex && diskIndex.metadata.rootHash === buildResult.rootHash) {
+          ({ entries: cachedEntries, paths: cachedEntryPaths } =
+            buildCachedEntries(buildResult.entries, buildResult.stats));
+          succeeded = true;
+          return;
+        }
+
+        if (buildResult.truncated) {
+          debugLog(
+            "file-index",
+            `Index truncated: workspace exceeds ${MAX_INDEX_DEPTH} directory levels deep. ` +
+              `Files beyond that depth will fall back to disk search.`,
+          );
+        }
+
+        cacheProjectIndex(buildResult);
+        ({ entries: cachedEntries, paths: cachedEntryPaths } =
+          buildCachedEntries(buildResult.entries, buildResult.stats));
+        succeeded = true;
+      } finally {
+        // Only clear buildPromise if it's still ours — refreshFileIndex may
+        // have already replaced it with a newer promise.
+        if (buildPromise === currentPromise) buildPromise = null;
+        if (succeeded) hasCompletedBuild = true;
+      }
+    })();
+    buildPromise = currentPromise;
+  }
+
+  return buildPromise;
+}
+
+export function refreshFileIndex(): Promise<void> {
+  hasCompletedBuild = false;
+  buildPromise = null;
+  return ensureFileIndex();
+}
+
+/**
+ * Add newly discovered entries to the in-memory cache without a full rebuild.
+ * Called when a disk scan finds files that weren't in the index (e.g. created
+ * externally). Skips paths that are already cached.
+ *
+ * The initial build has priority — it fills the cache up to MAX_CACHE_ENTRIES
+ * with the most recently modified files. Disk scan hits fill any remaining
+ * space. Once the cap is reached, new entries are not added until the next
+ * rebuild; the disk scan will still find them on demand.
+ */
+export function addEntriesToCache(matches: FileMatch[]): void {
+  const available = MAX_CACHE_ENTRIES - cachedEntries.length;
+  if (available <= 0) return;
+
+  let added = 0;
+  for (const match of matches) {
+    if (added >= available) break;
+    if (!cachedEntryPaths.has(match.path)) {
+      cachedEntries.push({
+        path: match.path,
+        type: match.type,
+        lowerPath: match.path.toLowerCase(),
+        parent: normalizeParent(match.path),
+      });
+      cachedEntryPaths.add(match.path);
+      added++;
+    }
+  }
+}
+
+export function searchFileIndex(options: SearchFileIndexOptions): FileMatch[] {
+  const { searchDir, pattern, deep, maxResults } = options;
+  const normalizedDir = searchDir === "." ? "" : searchDir;
+  const dirWithSep = normalizedDir === "" ? "" : `${normalizedDir}${sep}`;
+  const lowerPattern = pattern.toLowerCase();
+  const results: FileMatch[] = [];
+
+  for (const entry of cachedEntries) {
+    if (normalizedDir) {
+      if (entry.path !== normalizedDir && !entry.path.startsWith(dirWithSep)) {
+        continue;
+      }
+    }
+
+    if (!deep && entry.parent !== normalizedDir) {
+      continue;
+    }
+
+    if (lowerPattern && !entry.lowerPath.includes(lowerPattern)) {
+      continue;
+    }
+
+    results.push({ path: entry.path, type: entry.type });
+    if (results.length >= maxResults) {
+      break;
+    }
+  }
+
+  return results;
+}
--- a/src/cli/helpers/fileSearch.ts
+++ b/src/cli/helpers/fileSearch.ts
@@ -1,52 +1,13 @@
 import { readdirSync, statSync } from "node:fs";
-import { join, resolve } from "node:path";
+import { join, relative, resolve } from "node:path";
 import { debugLog } from "../../utils/debug";
-
-interface FileMatch {
-  path: string;
-  type: "file" | "dir" | "url";
-}
-
-/**
- * Directories to exclude from file search autocomplete.
- * These are common dependency/build directories that cause lag when searched.
- * All values are lowercase for case-insensitive matching (Windows compatibility).
- */
-const IGNORED_DIRECTORIES = new Set([
-  // JavaScript/Node
-  "node_modules",
-  "dist",
-  "build",
-  ".next",
-  ".nuxt",
-  "bower_components",
-
-  // Python
-  "venv",
-  ".venv",
-  "__pycache__",
-  ".tox",
-  "env",
-
-  // Build outputs
-  "target", // Rust/Maven/Java
-  "out",
-  "coverage",
-  ".cache",
-]);
-
-/**
- * Check if a directory entry should be excluded from search results.
- * Uses case-insensitive matching for Windows compatibility.
- */
-function shouldExcludeEntry(entry: string): boolean {
-  // Skip hidden files/directories (starts with .)
-  if (entry.startsWith(".")) {
-    return true;
-  }
-  // Case-insensitive check for Windows compatibility
-  return IGNORED_DIRECTORIES.has(entry.toLowerCase());
-}
+import {
+  addEntriesToCache,
+  ensureFileIndex,
+  type FileMatch,
+  searchFileIndex,
+} from "./fileIndex";
+import { shouldHardExcludeEntry } from "./fileSearchConfig";

 export function debounce<T extends (...args: never[]) => unknown>(
  func: T,
@@ -75,6 +36,7 @@ function searchDirectoryRecursive(
  results: FileMatch[] = [],
  depth: number = 0,
  maxDepth: number = 10,
+  lowerPattern: string = pattern.toLowerCase(),
 ): FileMatch[] {
  if (results.length >= maxResults || depth >= maxDepth) {
    return results;
@@ -84,23 +46,20 @@ function searchDirectoryRecursive(
    const entries = readdirSync(dir);

    for (const entry of entries) {
-      // Skip hidden files and common dependency/build directories
-      if (shouldExcludeEntry(entry)) {
-        continue;
-      }
-
      try {
        const fullPath = join(dir, entry);
-        const stats = statSync(fullPath);
+        const relativePath = relative(process.cwd(), fullPath);

-        const relativePath = fullPath.startsWith(process.cwd())
-          ? fullPath.slice(process.cwd().length + 1)
-          : fullPath;
+        if (shouldHardExcludeEntry(entry)) {
+          continue;
+        }
+
+        const stats = statSync(fullPath);

        // Check if entry matches the pattern (match against full relative path for partial path support)
        const matches =
          pattern.length === 0 ||
-          relativePath.toLowerCase().includes(pattern.toLowerCase());
+          relativePath.toLowerCase().includes(lowerPattern);

        if (matches) {
          results.push({
@@ -122,6 +81,7 @@ function searchDirectoryRecursive(
            results,
            depth + 1,
            maxDepth,
+            lowerPattern,
          );
        }
      } catch {}
@@ -182,68 +142,109 @@ export async function searchFiles(
    // Use shallow search to avoid recursively walking the entire subtree.
    const effectiveDeep = deep && searchPattern.length > 0;

-    if (effectiveDeep) {
-      // Deep search: recursively search subdirectories
-      // Use a shallower depth limit when searching outside the project directory
-      // to avoid walking massive sibling directory trees
-      const isOutsideCwd = !searchDir.startsWith(process.cwd());
-      const maxDepth = isOutsideCwd ? 3 : 10;
-      const deepResults = searchDirectoryRecursive(
-        searchDir,
-        searchPattern,
-        200,
-        [],
-        0,
-        maxDepth,
-      );
-      results.push(...deepResults);
-    } else {
-      // Shallow search: only current directory
-      let entries: string[] = [];
+    const relativeSearchDir = relative(process.cwd(), searchDir);
+    const normalizedSearchDir =
+      relativeSearchDir === "." ? "" : relativeSearchDir;
+    const insideWorkspace =
+      normalizedSearchDir === "" || !normalizedSearchDir.startsWith("..");
+
+    let indexSearchSucceeded = false;
+    if (insideWorkspace) {
      try {
-        entries = readdirSync(searchDir);
-      } catch {
-        // Directory doesn't exist or can't be read
-        return [];
-      }
-
-      // Filter entries matching the search pattern
-      // If pattern is empty, show all entries (for when user just types "@")
-      // Also exclude common dependency/build directories
-      const matchingEntries = entries
-        .filter((entry) => !shouldExcludeEntry(entry))
-        .filter(
-          (entry) =>
-            searchPattern.length === 0 ||
-            entry.toLowerCase().includes(searchPattern.toLowerCase()),
+        await ensureFileIndex();
+        results.push(
+          ...searchFileIndex({
+            searchDir: normalizedSearchDir,
+            pattern: searchPattern,
+            deep: effectiveDeep,
+            maxResults: effectiveDeep ? 200 : 50,
+          }),
+        );
+        indexSearchSucceeded = true;
+      } catch (error) {
+        debugLog(
+          "file-search",
+          "Indexed search failed, falling back to disk scan: %O",
+          error,
        );
-
-      // Get stats for each matching entry
-      for (const entry of matchingEntries.slice(0, 50)) {
-        // Limit to 50 results
-        try {
-          const fullPath = join(searchDir, entry);
-          const stats = statSync(fullPath);
-
-          // Make path relative to cwd if possible
-          const relativePath = fullPath.startsWith(process.cwd())
-            ? fullPath.slice(process.cwd().length + 1)
-            : fullPath;
-
-          results.push({
-            path: relativePath,
-            type: stats.isDirectory() ? "dir" : "file",
-          });
-        } catch {}
      }
    }

-    // Sort: directories first, then files, alphabetically within each group
-    results.sort((a, b) => {
-      if (a.type === "dir" && b.type !== "dir") return -1;
-      if (a.type !== "dir" && b.type === "dir") return 1;
-      return a.path.localeCompare(b.path);
-    });
+    if (!indexSearchSucceeded || results.length === 0) {
+      const diskResultsBefore = results.length;
+
+      if (effectiveDeep) {
+        // Deep search: recursively search subdirectories.
+        // Use a shallower depth limit when searching outside the project directory
+        // to avoid walking massive sibling directory trees.
+        const isOutsideCwd = normalizedSearchDir.startsWith("..");
+        const maxDepth = isOutsideCwd ? 3 : 10;
+        const deepResults = searchDirectoryRecursive(
+          searchDir,
+          searchPattern,
+          200,
+          [],
+          0,
+          maxDepth,
+        );
+        results.push(...deepResults);
+      } else {
+        // Shallow search: only one level, regardless of workspace location.
+        let entries: string[] = [];
+        try {
+          entries = readdirSync(searchDir);
+        } catch {
+          // Directory doesn't exist or can't be read
+          return [];
+        }
+
+        // Filter entries matching the search pattern.
+        // If pattern is empty, show all entries (for when user just types \"@\").
+        // Also exclude common dependency/build directories.
+        const lowerPattern = searchPattern.toLowerCase();
+        const matchingEntries = entries.filter(
+          (entry) =>
+            !shouldHardExcludeEntry(entry) &&
+            (searchPattern.length === 0 ||
+              entry.toLowerCase().includes(lowerPattern)),
+        );
+
+        // Get stats for each matching entry
+        for (const entry of matchingEntries.slice(0, 50)) {
+          // Limit to 50 results
+          try {
+            const fullPath = join(searchDir, entry);
+            const stats = statSync(fullPath);
+
+            const relativePath = relative(process.cwd(), fullPath);
+
+            results.push({
+              path: relativePath,
+              type: stats.isDirectory() ? "dir" : "file",
+            });
+          } catch {}
+        }
+      }
+
+      // If the index was working but just didn't have these files (created
+      // externally), add the newly found entries so future searches hit the
+      // cache instead of falling back to disk again.
+      if (indexSearchSucceeded && results.length > diskResultsBefore) {
+        addEntriesToCache(results.slice(diskResultsBefore));
+      }
+    }
+
+    // Only sort when the disk scan ran — its results come in arbitrary readdir
+    // order so we normalise to dirs-first alphabetical. When the index search
+    // succeeded the results already come out in mtime order (most recently
+    // modified first) from buildCachedEntries, so we leave that order intact.
+    if (!indexSearchSucceeded) {
+      results.sort((a, b) => {
+        if (a.type === "dir" && b.type !== "dir") return -1;
+        if (a.type !== "dir" && b.type === "dir") return 1;
+        return a.path.localeCompare(b.path);
+      });
+    }
  } catch (error) {
    // Return empty array on any error
    debugLog("file-search", "File search error: %O", error);
--- a/src/cli/helpers/fileSearchConfig.ts
+++ b/src/cli/helpers/fileSearchConfig.ts
@@ -0,0 +1,104 @@
+import picomatch from "picomatch";
+import {
+  ensureLettaIgnoreFile,
+  readLettaIgnorePatterns,
+} from "./ignoredDirectories";
+
+/**
+ * Hardcoded defaults — always excluded from both the file index and disk scans.
+ * These cover the most common build/dependency directories across ecosystems.
+ * Matched case-insensitively against the entry name.
+ */
+const DEFAULT_EXCLUDED = new Set([
+  // JavaScript / Node
+  "node_modules",
+  "bower_components",
+  // Build outputs
+  "dist",
+  "build",
+  "out",
+  "coverage",
+  // Frameworks
+  ".next",
+  ".nuxt",
+  // Python
+  "venv",
+  ".venv",
+  "__pycache__",
+  ".tox",
+  // Rust / Maven / Java
+  "target",
+  // Version control & tooling
+  ".git",
+  ".cache",
+]);
+
+/**
+ * Pre-compiled matchers from .lettaignore, split by whether the pattern
+ * is name-based (no slash → match against entry name) or path-based
+ * (contains slash → match against the full relative path).
+ * Compiled once at module load for performance.
+ */
+const { nameMatchers, pathMatchers } = (() => {
+  // Create .lettaignore with defaults if the project doesn't have one yet.
+  // Must run before readLettaIgnorePatterns() so the file exists when we read it.
+  ensureLettaIgnoreFile();
+  const patterns = readLettaIgnorePatterns();
+  const nameMatchers: picomatch.Matcher[] = [];
+  const pathMatchers: picomatch.Matcher[] = [];
+
+  for (const raw of patterns) {
+    const normalized = raw.replace(/\/$/, ""); // strip trailing slash
+    if (normalized.includes("/")) {
+      pathMatchers.push(picomatch(normalized, { dot: true }));
+    } else {
+      nameMatchers.push(picomatch(normalized, { dot: true }));
+    }
+  }
+
+  return { nameMatchers, pathMatchers };
+})();
+
+/**
+ * Returns true if the given entry should be excluded from the file index.
+ * Applies both the hardcoded defaults and any .lettaignore patterns.
+ *
+ * Use this when building the index — .lettaignore controls what gets cached,
+ * not what the user can ever find. For disk scan fallback paths, use
+ * shouldHardExcludeEntry() so .lettaignore-matched files remain discoverable.
+ *
+ * @param name         - The entry's basename (e.g. "node_modules", ".env")
+ * @param relativePath - Optional path relative to cwd (e.g. "src/generated/foo.ts").
+ *                       Required for path-based .lettaignore patterns to work.
+ */
+export function shouldExcludeEntry(
+  name: string,
+  relativePath?: string,
+): boolean {
+  // Fast path: hardcoded defaults (O(1) Set lookup)
+  if (DEFAULT_EXCLUDED.has(name.toLowerCase())) return true;
+
+  // Name-based .lettaignore patterns (e.g. *.log, vendor)
+  if (nameMatchers.length > 0 && nameMatchers.some((m) => m(name))) return true;
+
+  // Path-based .lettaignore patterns (e.g. src/generated/**)
+  if (
+    relativePath &&
+    pathMatchers.length > 0 &&
+    pathMatchers.some((m) => m(relativePath))
+  )
+    return true;
+
+  return false;
+}
+
+/**
+ * Returns true if the given entry should be excluded from disk scan fallbacks.
+ * Only applies the hardcoded defaults — .lettaignore patterns are intentionally
+ * skipped here so users can still find those files with an explicit @ search.
+ *
+ * @param name - The entry's basename (e.g. "node_modules", "dist")
+ */
+export function shouldHardExcludeEntry(name: string): boolean {
+  return DEFAULT_EXCLUDED.has(name.toLowerCase());
+}
--- a/src/cli/helpers/ignoredDirectories.ts
+++ b/src/cli/helpers/ignoredDirectories.ts
@@ -0,0 +1,80 @@
+import { existsSync, readFileSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+
+const DEFAULT_LETTAIGNORE = `\
+# .lettaignore — Letta Code file index exclusions
+#
+# Files and directories matching these patterns are excluded from the @ file
+# search index (cache). They won't appear in autocomplete results by default,
+# but can still be found if you type their path explicitly.
+#
+# Syntax: one pattern per line, supports globs (e.g. *.log, src/generated/**)
+# Lines starting with # are comments.
+#
+# The following are always excluded (even from explicit search) and do not need
+# to be listed here:
+#   node_modules  dist  build  out  coverage  target  bower_components
+#   .git  .cache  .next  .nuxt  venv  .venv  __pycache__  .tox
+
+# Lock files
+package-lock.json
+yarn.lock
+pnpm-lock.yaml
+poetry.lock
+Cargo.lock
+
+# Logs
+*.log
+
+# OS artifacts
+.DS_Store
+Thumbs.db
+`;
+
+/**
+ * Create a .lettaignore file in the project root with sensible defaults
+ * if one does not already exist. Safe to call multiple times.
+ */
+export function ensureLettaIgnoreFile(cwd: string = process.cwd()): void {
+  const filePath = join(cwd, ".lettaignore");
+  if (existsSync(filePath)) return;
+
+  try {
+    writeFileSync(filePath, DEFAULT_LETTAIGNORE, "utf-8");
+  } catch {
+    // If we can't write (e.g. read-only fs), silently skip — the
+    // hardcoded defaults in fileSearchConfig.ts still apply.
+  }
+}
+
+/**
+ * Read glob patterns from a .lettaignore file in the given directory.
+ * Returns an empty array if the file is missing or unreadable.
+ *
+ * Syntax:
+ *   - One pattern per line (supports globs: *.log, src/generated/**)
+ *   - Lines starting with # are comments
+ *   - Negations (!) are not currently supported and are silently skipped
+ *   - A trailing / is treated as a directory hint and stripped before matching
+ */
+export function readLettaIgnorePatterns(cwd: string = process.cwd()): string[] {
+  const filePath = join(cwd, ".lettaignore");
+  if (!existsSync(filePath)) return [];
+
+  try {
+    const content = readFileSync(filePath, "utf-8");
+    return parseLettaIgnore(content);
+  } catch {
+    return [];
+  }
+}
+
+function parseLettaIgnore(content: string): string[] {
+  return content
+    .split("\n")
+    .map((line) => line.trim())
+    .filter(
+      (line) =>
+        line.length > 0 && !line.startsWith("#") && !line.startsWith("!"),
+    );
+}