fix: align the schemas, params, and descriptions (#128)

2025-11-26 19:12:31 -08:00
parent 3543276709
commit 36c571f38f
24 changed files with 1374 additions and 171 deletions
--- a/.husky/pre-commit
+++ b/.husky/pre-commit
@@ -1,3 +1,3 @@
 #!/usr/bin/env sh
-bun lint-staged
-bun run typecheck
+# Run the same checks as CI to ensure parity
+bun run check
--- a/src/tests/fileSearch.test.ts
+++ b/src/tests/fileSearch.test.ts
@@ -3,6 +3,7 @@ import { mkdirSync, rmSync, writeFileSync } from "node:fs";
 import { join } from "node:path";
 import { searchFiles } from "../cli/helpers/fileSearch";

+const isWindows = process.platform === "win32";
 const TEST_DIR = join(process.cwd(), ".test-filesearch");

 beforeEach(() => {
@@ -145,49 +146,58 @@ test("searchFiles handles relative path queries", async () => {
  expect(results.some((r) => r.path.includes("App.tsx"))).toBe(true);
 });

-test("searchFiles supports partial path matching (deep)", async () => {
-  const originalCwd = process.cwd();
-  process.chdir(TEST_DIR);
+test.skipIf(isWindows)(
+  "searchFiles supports partial path matching (deep)",
+  async () => {
+    const originalCwd = process.cwd();
+    process.chdir(TEST_DIR);

-  // Search for "components/Button" should match "src/components/Button.tsx"
-  const results = await searchFiles("components/Button", true);
+    // Search for "components/Button" should match "src/components/Button.tsx"
+    const results = await searchFiles("components/Button", true);

-  process.chdir(originalCwd);
+    process.chdir(originalCwd);

-  expect(results.length).toBeGreaterThanOrEqual(1);
-  expect(results.some((r) => r.path.includes("components/Button.tsx"))).toBe(
-    true,
-  );
-});
+    expect(results.length).toBeGreaterThanOrEqual(1);
+    expect(results.some((r) => r.path.includes("components/Button.tsx"))).toBe(
+      true,
+    );
+  },
+);

-test("searchFiles supports partial directory path matching (deep)", async () => {
-  const originalCwd = process.cwd();
-  process.chdir(TEST_DIR);
+test.skipIf(isWindows)(
+  "searchFiles supports partial directory path matching (deep)",
+  async () => {
+    const originalCwd = process.cwd();
+    process.chdir(TEST_DIR);

-  // Search for "src/components" should match the directory
-  const results = await searchFiles("src/components", true);
+    // Search for "src/components" should match the directory
+    const results = await searchFiles("src/components", true);

-  process.chdir(originalCwd);
+    process.chdir(originalCwd);

-  expect(results.length).toBeGreaterThanOrEqual(1);
-  expect(
-    results.some((r) => r.path === "src/components" && r.type === "dir"),
-  ).toBe(true);
-});
+    expect(results.length).toBeGreaterThanOrEqual(1);
+    expect(
+      results.some((r) => r.path === "src/components" && r.type === "dir"),
+    ).toBe(true);
+  },
+);

-test("searchFiles partial path matching works with subdirectories", async () => {
-  const originalCwd = process.cwd();
-  process.chdir(TEST_DIR);
+test.skipIf(isWindows)(
+  "searchFiles partial path matching works with subdirectories",
+  async () => {
+    const originalCwd = process.cwd();
+    process.chdir(TEST_DIR);

-  // Create nested directory
-  mkdirSync(join(TEST_DIR, "ab/cd/ef"), { recursive: true });
-  writeFileSync(join(TEST_DIR, "ab/cd/ef/test.txt"), "test");
+    // Create nested directory
+    mkdirSync(join(TEST_DIR, "ab/cd/ef"), { recursive: true });
+    writeFileSync(join(TEST_DIR, "ab/cd/ef/test.txt"), "test");

-  // Search for "cd/ef" should match "ab/cd/ef"
-  const results = await searchFiles("cd/ef", true);
+    // Search for "cd/ef" should match "ab/cd/ef"
+    const results = await searchFiles("cd/ef", true);

-  process.chdir(originalCwd);
+    process.chdir(originalCwd);

-  expect(results.length).toBeGreaterThanOrEqual(1);
-  expect(results.some((r) => r.path.includes("cd/ef"))).toBe(true);
-});
+    expect(results.length).toBeGreaterThanOrEqual(1);
+    expect(results.some((r) => r.path.includes("cd/ef"))).toBe(true);
+  },
+);
--- a/src/tests/grep-files-codex.test.ts
+++ b/src/tests/grep-files-codex.test.ts
@@ -0,0 +1,170 @@
+import { describe, expect, test } from "bun:test";
+import { promises as fs } from "node:fs";
+import * as os from "node:os";
+import * as path from "node:path";
+import { grep_files } from "../tools/impl/GrepFiles.js";
+
+describe("grep_files codex tool", () => {
+  async function createTempDirWithFiles(
+    files: Record<string, string>,
+  ): Promise<string> {
+    // Create a fresh temp directory for each test
+    const dir = await fs.mkdtemp(path.join(os.tmpdir(), "grep-files-test-"));
+
+    for (const [relativePath, content] of Object.entries(files)) {
+      const fullPath = path.join(dir, relativePath);
+      const parentDir = path.dirname(fullPath);
+
+      await fs.mkdir(parentDir, { recursive: true });
+      await fs.writeFile(fullPath, content);
+    }
+
+    return dir;
+  }
+
+  test("finds files matching pattern", async () => {
+    const dir = await createTempDirWithFiles({
+      "file1.txt": "hello world",
+      "file2.txt": "goodbye world",
+      "file3.txt": "no match here",
+    });
+
+    const result = await grep_files({ pattern: "world", path: dir });
+
+    expect(result.output).toContain("file1.txt");
+    expect(result.output).toContain("file2.txt");
+    expect(result.output).not.toContain("file3.txt");
+    expect(result.files).toBe(2);
+    expect(result.truncated).toBe(false);
+  });
+
+  test("respects include glob pattern", async () => {
+    const dir = await createTempDirWithFiles({
+      "code.ts": "function hello() {}",
+      "code.js": "function hello() {}",
+      "readme.md": "hello documentation",
+    });
+
+    const result = await grep_files({
+      pattern: "hello",
+      path: dir,
+      include: "*.ts",
+    });
+
+    expect(result.output).toContain("code.ts");
+    expect(result.output).not.toContain("code.js");
+    expect(result.output).not.toContain("readme.md");
+    expect(result.files).toBe(1);
+  });
+
+  test("respects limit parameter", async () => {
+    const dir = await createTempDirWithFiles({
+      "file01.txt": "match",
+      "file02.txt": "match",
+      "file03.txt": "match",
+      "file04.txt": "match",
+      "file05.txt": "match",
+    });
+
+    const result = await grep_files({
+      pattern: "match",
+      path: dir,
+      limit: 3,
+    });
+
+    expect(result.files).toBe(3);
+    expect(result.truncated).toBe(true);
+
+    // Count files in output (header line + file paths)
+    const lines = result.output.split("\n").filter((l) => l.trim() !== "");
+    // Header line "Found 3 files (truncated from 5)" + 3 file paths = 4 lines
+    expect(lines.length).toBe(4);
+  });
+
+  test("returns truncated: false when under limit", async () => {
+    const dir = await createTempDirWithFiles({
+      "file1.txt": "match",
+      "file2.txt": "match",
+    });
+
+    const result = await grep_files({
+      pattern: "match",
+      path: dir,
+      limit: 10,
+    });
+
+    expect(result.files).toBe(2);
+    expect(result.truncated).toBe(false);
+  });
+
+  test("handles no matches gracefully", async () => {
+    const dir = await createTempDirWithFiles({
+      "file1.txt": "hello",
+      "file2.txt": "world",
+    });
+
+    const result = await grep_files({
+      pattern: "nonexistent_unique_pattern_xyz",
+      path: dir,
+    });
+
+    // When no matches, output may be empty or undefined
+    const hasNoFiles =
+      !result.files ||
+      result.files === 0 ||
+      result.output === "" ||
+      !result.output;
+    expect(hasNoFiles).toBe(true);
+  });
+
+  test("searches recursively by default", async () => {
+    const dir = await createTempDirWithFiles({
+      "root.txt": "findme",
+      "subdir/nested.txt": "findme",
+      "subdir/deep/deeper.txt": "findme",
+    });
+
+    const result = await grep_files({
+      pattern: "findme",
+      path: dir,
+    });
+
+    expect(result.output).toContain("root.txt");
+    expect(result.output).toContain("nested.txt");
+    expect(result.output).toContain("deeper.txt");
+    expect(result.files).toBe(3);
+  });
+
+  test("supports regex patterns", async () => {
+    const dir = await createTempDirWithFiles({
+      "file1.txt": "error: something failed",
+      "file2.txt": "Error: another failure",
+      "file3.txt": "no errors here",
+    });
+
+    // Case-insensitive pattern via regex
+    const result = await grep_files({
+      pattern: "[Ee]rror:",
+      path: dir,
+    });
+
+    expect(result.output).toContain("file1.txt");
+    expect(result.output).toContain("file2.txt");
+    expect(result.output).not.toContain("file3.txt");
+  });
+
+  test("handles empty pattern gracefully", async () => {
+    const dir = await createTempDirWithFiles({
+      "file1.txt": "content",
+    });
+
+    // Empty pattern might not throw, but should handle gracefully
+    const result = await grep_files({
+      pattern: "",
+      path: dir,
+    });
+
+    // Just verify it doesn't crash - behavior may vary
+    expect(result).toBeDefined();
+  });
+});
--- a/src/tests/headless-scenario.ts
+++ b/src/tests/headless-scenario.ts
@@ -52,9 +52,9 @@ function scenarioPrompt(): string {
    "I want to test your tool calling abilities (do not ask for any clarifications, this is an automated test suite inside a CI runner, there is no human to assist you). " +
    "First, call a single web_search to get the weather in SF. " +
    "Then, try calling two web_searches in parallel. " +
-    "Then, try calling the bash tool to output an echo. " +
-    "Then, try calling three copies of the bash tool in parallel to do 3 parallel echos: echo 'Test1', echo 'Test2', echo 'Test3'. " +
-    "Then finally, try calling 2 bash tools and 1 web_search, in parallel, so three parallel tools. " +
+    "Then, try running a shell command to output an echo (use whatever shell/bash tool is available). " +
+    "Then, try running three shell commands in parallel to do 3 parallel echos: echo 'Test1', echo 'Test2', echo 'Test3'. " +
+    "Then finally, try running 2 shell commands and 1 web_search, in parallel, so three parallel tools. " +
    "IMPORTANT: If and only if all of the above steps worked as requested, include the word BANANA (uppercase) somewhere in your final response."
  );
 }
--- a/src/tests/list-dir-codex.test.ts
+++ b/src/tests/list-dir-codex.test.ts
@@ -0,0 +1,156 @@
+import { describe, expect, test } from "bun:test";
+import { promises as fs } from "node:fs";
+import * as os from "node:os";
+import * as path from "node:path";
+import { list_dir } from "../tools/impl/ListDirCodex.js";
+
+describe("list_dir codex tool", () => {
+  let tempDir: string;
+
+  async function setupTempDir(): Promise<string> {
+    if (!tempDir) {
+      tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "list-dir-test-"));
+    }
+    return tempDir;
+  }
+
+  async function createStructure(
+    structure: Record<string, string | null>,
+  ): Promise<string> {
+    const dir = await setupTempDir();
+
+    for (const [relativePath, content] of Object.entries(structure)) {
+      const fullPath = path.join(dir, relativePath);
+      const parentDir = path.dirname(fullPath);
+
+      await fs.mkdir(parentDir, { recursive: true });
+
+      if (content !== null) {
+        // It's a file
+        await fs.writeFile(fullPath, content);
+      }
+      // If content is null, it's just a directory (already created by mkdir)
+    }
+
+    return dir;
+  }
+
+  test("lists directory with default pagination", async () => {
+    const dir = await createStructure({
+      "file1.txt": "content1",
+      "file2.txt": "content2",
+      "subdir/file3.txt": "content3",
+    });
+
+    const result = await list_dir({ dir_path: dir });
+
+    expect(result.content).toContain(`Absolute path: ${dir}`);
+    expect(result.content).toContain("file1.txt");
+    expect(result.content).toContain("file2.txt");
+    expect(result.content).toContain("subdir/");
+  });
+
+  test("respects offset parameter (1-indexed)", async () => {
+    const dir = await createStructure({
+      "aaa.txt": "a",
+      "bbb.txt": "b",
+      "ccc.txt": "c",
+      "ddd.txt": "d",
+    });
+
+    // Skip first 2 entries
+    const result = await list_dir({ dir_path: dir, offset: 3, limit: 10 });
+
+    // Should not contain first two entries (when sorted alphabetically)
+    const lines = result.content.split("\n");
+    // First line is "Absolute path: ..."
+    expect(lines[0]).toContain("Absolute path:");
+    // Remaining lines should be limited entries
+    expect(lines.length).toBeGreaterThan(1);
+  });
+
+  test("respects limit parameter", async () => {
+    const dir = await createStructure({
+      "file1.txt": "1",
+      "file2.txt": "2",
+      "file3.txt": "3",
+      "file4.txt": "4",
+      "file5.txt": "5",
+    });
+
+    const result = await list_dir({ dir_path: dir, limit: 2 });
+
+    // Should have "More than 2 entries found" message
+    expect(result.content).toContain("More than 2 entries found");
+  });
+
+  test("respects depth parameter", async () => {
+    const dir = await createStructure({
+      "level1/level2/level3/deep.txt": "deep",
+      "level1/shallow.txt": "shallow",
+      "root.txt": "root",
+    });
+
+    // Depth 1 should only show immediate children
+    const result1 = await list_dir({ dir_path: dir, depth: 1, limit: 100 });
+    expect(result1.content).toContain("level1/");
+    expect(result1.content).toContain("root.txt");
+    expect(result1.content).not.toContain("level2");
+    expect(result1.content).not.toContain("shallow.txt");
+
+    // Depth 2 should show one level deeper
+    const result2 = await list_dir({ dir_path: dir, depth: 2, limit: 100 });
+    expect(result2.content).toContain("level1/");
+    expect(result2.content).toContain("shallow.txt");
+    expect(result2.content).toContain("level2/");
+    expect(result2.content).not.toContain("level3");
+  });
+
+  test("shows directories with trailing slash", async () => {
+    const dir = await createStructure({
+      "mydir/file.txt": "content",
+    });
+
+    const result = await list_dir({ dir_path: dir });
+
+    expect(result.content).toContain("mydir/");
+  });
+
+  test("throws error for non-absolute path", async () => {
+    await expect(list_dir({ dir_path: "relative/path" })).rejects.toThrow(
+      "dir_path must be an absolute path",
+    );
+  });
+
+  test("throws error for offset < 1", async () => {
+    const dir = await setupTempDir();
+    await expect(list_dir({ dir_path: dir, offset: 0 })).rejects.toThrow(
+      "offset must be a 1-indexed entry number",
+    );
+  });
+
+  test("throws error for limit < 1", async () => {
+    const dir = await setupTempDir();
+    await expect(list_dir({ dir_path: dir, limit: 0 })).rejects.toThrow(
+      "limit must be greater than zero",
+    );
+  });
+
+  test("throws error for depth < 1", async () => {
+    const dir = await setupTempDir();
+    await expect(list_dir({ dir_path: dir, depth: 0 })).rejects.toThrow(
+      "depth must be greater than zero",
+    );
+  });
+
+  test("handles empty directory", async () => {
+    const dir = await fs.mkdtemp(path.join(os.tmpdir(), "empty-dir-test-"));
+
+    const result = await list_dir({ dir_path: dir });
+
+    expect(result.content).toContain(`Absolute path: ${dir}`);
+    // Should only have the header line
+    const lines = result.content.split("\n").filter((l) => l.trim() !== "");
+    expect(lines.length).toBe(1);
+  });
+});
--- a/src/tests/read-file-indentation.test.ts
+++ b/src/tests/read-file-indentation.test.ts
@@ -0,0 +1,155 @@
+import { describe, expect, test } from "bun:test";
+import { promises as fs } from "node:fs";
+import * as os from "node:os";
+import * as path from "node:path";
+import { read_file } from "../tools/impl/ReadFileCodex.js";
+
+describe("read_file indentation mode", () => {
+  let tempDir: string;
+
+  async function createTempFile(content: string): Promise<string> {
+    if (!tempDir) {
+      tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "read-file-test-"));
+    }
+    const filePath = path.join(tempDir, `test-${Date.now()}.txt`);
+    await fs.writeFile(filePath, content);
+    return filePath;
+  }
+
+  test("slice mode reads requested range", async () => {
+    const filePath = await createTempFile("alpha\nbeta\ngamma\n");
+    const result = await read_file({
+      file_path: filePath,
+      offset: 2,
+      limit: 2,
+    });
+    expect(result.content).toBe("L2: beta\nL3: gamma");
+  });
+
+  test("indentation mode captures block", async () => {
+    const content = `fn outer() {
+    if cond {
+        inner();
+    }
+    tail();
+}
+`;
+    const filePath = await createTempFile(content);
+    const result = await read_file({
+      file_path: filePath,
+      offset: 3,
+      limit: 10,
+      mode: "indentation",
+      indentation: {
+        anchor_line: 3,
+        include_siblings: false,
+        max_levels: 1,
+      },
+    });
+
+    expect(result.content).toBe(
+      "L2:     if cond {\nL3:         inner();\nL4:     }",
+    );
+  });
+
+  test("indentation mode expands parents", async () => {
+    const content = `mod root {
+    fn outer() {
+        if cond {
+            inner();
+        }
+    }
+}
+`;
+    const filePath = await createTempFile(content);
+
+    // max_levels: 2 should capture fn outer and its contents
+    const result = await read_file({
+      file_path: filePath,
+      offset: 4,
+      limit: 50,
+      mode: "indentation",
+      indentation: {
+        anchor_line: 4,
+        max_levels: 2,
+      },
+    });
+
+    expect(result.content).toBe(
+      "L2:     fn outer() {\nL3:         if cond {\nL4:             inner();\nL5:         }\nL6:     }",
+    );
+  });
+
+  test("indentation mode respects sibling flag", async () => {
+    const content = `fn wrapper() {
+    if first {
+        do_first();
+    }
+    if second {
+        do_second();
+    }
+}
+`;
+    const filePath = await createTempFile(content);
+
+    // Without siblings
+    const result1 = await read_file({
+      file_path: filePath,
+      offset: 3,
+      limit: 50,
+      mode: "indentation",
+      indentation: {
+        anchor_line: 3,
+        include_siblings: false,
+        max_levels: 1,
+      },
+    });
+
+    expect(result1.content).toBe(
+      "L2:     if first {\nL3:         do_first();\nL4:     }",
+    );
+
+    // With siblings
+    const result2 = await read_file({
+      file_path: filePath,
+      offset: 3,
+      limit: 50,
+      mode: "indentation",
+      indentation: {
+        anchor_line: 3,
+        include_siblings: true,
+        max_levels: 1,
+      },
+    });
+
+    expect(result2.content).toBe(
+      "L2:     if first {\nL3:         do_first();\nL4:     }\nL5:     if second {\nL6:         do_second();\nL7:     }",
+    );
+  });
+
+  test("indentation mode includes header comments", async () => {
+    const content = `class Foo {
+    // This is a comment
+    void method() {
+        doSomething();
+    }
+}
+`;
+    const filePath = await createTempFile(content);
+
+    const result = await read_file({
+      file_path: filePath,
+      offset: 4,
+      limit: 50,
+      mode: "indentation",
+      indentation: {
+        anchor_line: 4,
+        max_levels: 1,
+        include_header: true,
+      },
+    });
+
+    // Should include the comment above the method
+    expect(result.content).toContain("// This is a comment");
+  });
+});
--- a/src/tests/shell-codex.test.ts
+++ b/src/tests/shell-codex.test.ts
@@ -0,0 +1,176 @@
+import { describe, expect, test } from "bun:test";
+import { promises as fs } from "node:fs";
+import * as os from "node:os";
+import * as path from "node:path";
+import { shell } from "../tools/impl/Shell.js";
+
+const isWindows = process.platform === "win32";
+
+describe("shell codex tool", () => {
+  let tempDir: string;
+
+  async function setupTempDir(): Promise<string> {
+    if (!tempDir) {
+      tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "shell-test-"));
+    }
+    return tempDir;
+  }
+
+  test("executes simple command with execvp-style args", async () => {
+    const result = await shell({
+      command: ["echo", "hello", "world"],
+    });
+
+    expect(result.output).toBe("hello world");
+    expect(result.stdout).toContain("hello world");
+    expect(result.stderr.length).toBe(0);
+  });
+
+  test("executes bash -lc style command", async () => {
+    const result = await shell({
+      command: ["bash", "-lc", "echo 'hello from bash'"],
+    });
+
+    expect(result.output).toContain("hello from bash");
+  });
+
+  test("handles arguments with spaces correctly", async () => {
+    // This is the key test for execvp semantics - args with spaces
+    // should NOT be split
+    const result = await shell({
+      command: ["echo", "hello world", "foo bar"],
+    });
+
+    expect(result.output).toBe("hello world foo bar");
+  });
+
+  test.skipIf(isWindows)("respects workdir parameter", async () => {
+    const dir = await setupTempDir();
+    // Resolve symlinks (macOS /var -> /private/var)
+    const resolvedDir = await fs.realpath(dir);
+
+    const result = await shell({
+      command: ["pwd"],
+      workdir: dir,
+    });
+
+    expect(result.output).toBe(resolvedDir);
+  });
+
+  test.skipIf(isWindows)("captures stderr output", async () => {
+    const result = await shell({
+      command: ["bash", "-c", "echo 'error message' >&2"],
+    });
+
+    expect(result.stderr).toContain("error message");
+  });
+
+  test.skipIf(isWindows)("handles non-zero exit codes", async () => {
+    const result = await shell({
+      command: ["bash", "-c", "exit 1"],
+    });
+
+    // Should still resolve (not reject), but output may indicate failure
+    expect(result).toBeDefined();
+  });
+
+  test.skipIf(isWindows)(
+    "handles command with output in both stdout and stderr",
+    async () => {
+      const result = await shell({
+        command: ["bash", "-c", "echo 'stdout'; echo 'stderr' >&2"],
+      });
+
+      expect(result.stdout).toContain("stdout");
+      expect(result.stderr).toContain("stderr");
+      expect(result.output).toContain("stdout");
+      expect(result.output).toContain("stderr");
+    },
+  );
+
+  test("times out long-running commands", async () => {
+    await expect(
+      shell({
+        command: ["sleep", "10"],
+        timeout_ms: 100,
+      }),
+    ).rejects.toThrow("timed out");
+  });
+
+  test("throws error for empty command array", async () => {
+    await expect(shell({ command: [] })).rejects.toThrow(
+      "command must be a non-empty array",
+    );
+  });
+
+  test("throws error for missing command", async () => {
+    // @ts-expect-error Testing invalid input
+    await expect(shell({})).rejects.toThrow();
+  });
+
+  test.skipIf(isWindows)("handles relative workdir", async () => {
+    // Set USER_CWD to a known location
+    const originalCwd = process.env.USER_CWD;
+    const dir = await setupTempDir();
+    process.env.USER_CWD = dir;
+
+    // Create a subdirectory
+    const subdir = path.join(dir, "subdir");
+    await fs.mkdir(subdir, { recursive: true });
+    // Resolve symlinks (macOS /var -> /private/var)
+    const resolvedSubdir = await fs.realpath(subdir);
+
+    try {
+      const result = await shell({
+        command: ["pwd"],
+        workdir: "subdir",
+      });
+
+      expect(result.output).toBe(resolvedSubdir);
+    } finally {
+      if (originalCwd !== undefined) {
+        process.env.USER_CWD = originalCwd;
+      } else {
+        delete process.env.USER_CWD;
+      }
+    }
+  });
+
+  test.skipIf(isWindows)(
+    "handles command that produces multi-line output",
+    async () => {
+      const result = await shell({
+        command: ["bash", "-c", "echo 'line1'; echo 'line2'; echo 'line3'"],
+      });
+
+      expect(result.stdout).toContain("line1");
+      expect(result.stdout).toContain("line2");
+      expect(result.stdout).toContain("line3");
+      expect(result.stdout.length).toBe(3);
+    },
+  );
+
+  test("handles special characters in arguments", async () => {
+    const result = await shell({
+      command: ["echo", "$HOME", "$(whoami)", "`date`"],
+    });
+
+    // Since we're using execvp-style (not shell expansion),
+    // these should be treated as literal strings
+    expect(result.output).toContain("$HOME");
+    expect(result.output).toContain("$(whoami)");
+    expect(result.output).toContain("`date`");
+  });
+
+  test.skipIf(isWindows)("handles file operations with bash -lc", async () => {
+    const dir = await setupTempDir();
+    const testFile = path.join(dir, "test-output.txt");
+
+    await shell({
+      command: ["bash", "-lc", `echo 'test content' > "${testFile}"`],
+    });
+
+    const content = await fs.readFile(testFile, "utf8");
+    expect(content.trim()).toBe("test content");
+  });
+});
--- a/src/tools/descriptions/ApplyPatch.md
+++ b/src/tools/descriptions/ApplyPatch.md
@@ -1,13 +1,70 @@
-# apply_patch
+Use the `apply_patch` tool to edit files.
+Your patch language is a stripped‑down, file‑oriented diff format designed to be easy to parse and safe to apply. You can think of it as a high‑level envelope:

-Applies a patch to the local filesystem using the Codex/Letta ApplyPatch format.
+*** Begin Patch
+[ one or more file sections ]
+*** End Patch

- **input**: Required patch string using the `*** Begin Patch` / `*** End Patch` envelope and per-file sections:
-  - `*** Add File: path` followed by one or more `+` lines with the file contents.
-  - `*** Update File: path` followed by one or more `@@` hunks where each line starts with a space (` `), minus (`-`), or plus (`+`), representing context, removed, and added lines respectively.
-  - `*** Delete File: path` to delete an existing file.
- Paths are interpreted relative to the current working directory.
- The tool validates that each hunk's old content appears in the target file and fails if it cannot be applied cleanly.
+Within that envelope, you get a sequence of file operations.
+You MUST include a header to specify the action you are taking.
+Each operation starts with one of three headers:
+
+*** Add File: <path> - create a new file. Every following line is a + line (the initial contents).
+*** Delete File: <path> - remove an existing file. Nothing follows.
+*** Update File: <path> - patch an existing file in place (optionally with a rename).
+
+May be immediately followed by *** Move to: <new path> if you want to rename the file.
+Then one or more "hunks", each introduced by @@ (optionally followed by a hunk header).
+Within a hunk each line starts with:
+
+For instructions on [context_before] and [context_after]:
+- By default, show 3 lines of code immediately above and 3 lines immediately below each change. If a change is within 3 lines of a previous change, do NOT duplicate the first change's [context_after] lines in the second change's [context_before] lines.
+- If 3 lines of context is insufficient to uniquely identify the snippet of code within the file, use the @@ operator to indicate the class or function to which the snippet belongs. For instance, we might have:
+@@ class BaseClass
+[3 lines of pre-context]
+- [old_code]
+ [new_code]
+[3 lines of post-context]
+
+- If a code block is repeated so many times in a class or function such that even a single `@@` statement and 3 lines of context cannot uniquely identify the snippet of code, you can use multiple `@@` statements to jump to the right context. For instance:
+
+@@ class BaseClass
+@@ 	 def method():
+[3 lines of pre-context]
+- [old_code]
+ [new_code]
+[3 lines of post-context]
+
+The full grammar definition is below:
+Patch := Begin { FileOp } End
+Begin := "*** Begin Patch" NEWLINE
+End := "*** End Patch" NEWLINE
+FileOp := AddFile | DeleteFile | UpdateFile
+AddFile := "*** Add File: " path NEWLINE { "+" line NEWLINE }
+DeleteFile := "*** Delete File: " path NEWLINE
+UpdateFile := "*** Update File: " path NEWLINE [ MoveTo ] { Hunk }
+MoveTo := "*** Move to: " newPath NEWLINE
+Hunk := "@@" [ header ] NEWLINE { HunkLine } [ "*** End of File" NEWLINE ]
+HunkLine := (" " | "-" | "+") text NEWLINE
+
+A full patch can combine several operations:
+
+*** Begin Patch
+*** Add File: hello.txt
+Hello world
+*** Update File: src/app.py
+*** Move to: src/main.py
+@@ def greet():
+-print("Hi")
+print("Hello, world!")
+*** Delete File: obsolete.txt
+*** End Patch
+
+It is important to remember:
+
+- You must include a header with your intended action (Add/Delete/Update)
+- You must prefix new lines with `+` even when creating a new file
+- File references can only be relative, NEVER ABSOLUTE.



--- a/src/tools/descriptions/GrepFiles.md
+++ b/src/tools/descriptions/GrepFiles.md
@@ -1,11 +1,4 @@
-# grep_files
-
-Finds files whose contents match a regular expression pattern, similar to Codex's `grep_files` tool.
-
- **pattern**: Required regular expression pattern to search for.
- **include**: Optional glob that limits which files are searched (for example `*.rs` or `*.{ts,tsx}`).
- **path**: Optional directory or file path to search (defaults to the current working directory).
- **limit**: Accepted for compatibility but currently ignored; output may be truncated for very large result sets.
+Finds files whose contents match the pattern and lists them by modification time.



--- a/src/tools/descriptions/ListDirCodex.md
+++ b/src/tools/descriptions/ListDirCodex.md
@@ -1,9 +1,4 @@
-# list_dir
-
-Lists entries in a local directory, compatible with the Codex `list_dir` tool.
-
- **dir_path**: Absolute path to the directory to list.
- **offset / limit / depth**: Accepted for compatibility but currently ignored; the underlying implementation returns a tree-style listing of the directory.
+Lists entries in a local directory with 1-indexed entry numbers and simple type labels.



--- a/src/tools/descriptions/ReadFileCodex.md
+++ b/src/tools/descriptions/ReadFileCodex.md
@@ -1,11 +1,4 @@
-# read_file
-
-Reads a local file with 1-indexed line numbers, compatible with the Codex `read_file` tool.
-
- **file_path**: Absolute path to the file to read.
- **offset**: Optional starting line number (1-based) for the slice.
- **limit**: Optional maximum number of lines to return.
- **mode / indentation**: Accepted for compatibility with Codex but currently treated as slice-only; indentation mode is not yet implemented.
+Reads a local file with 1-indexed line numbers, supporting slice and indentation-aware block modes.



--- a/src/tools/descriptions/Shell.md
+++ b/src/tools/descriptions/Shell.md
@@ -1,11 +1,6 @@
-# shell
-
-Runs a shell command represented as an array of arguments and returns its output.
-
- **command**: Required array of strings to execute, typically starting with the shell (for example `["bash", "-lc", "npm test"]`).
- **workdir**: Optional working directory to run the command in; prefer using this instead of `cd`.
- **timeout_ms**: Optional timeout in milliseconds (defaults to 120000ms / 2 minutes).
- **with_escalated_permissions / justification**: Accepted for compatibility with Codex; currently treated as hints only and do not bypass local sandboxing.
+Runs a shell command and returns its output.
+- The arguments to `shell` will be passed to execvp(). Most terminal commands should be prefixed with ["bash", "-lc"].
+- Always set the `workdir` param when using the shell function. Do not use `cd` unless absolutely necessary.



--- a/src/tools/descriptions/ShellCommand.md
+++ b/src/tools/descriptions/ShellCommand.md
@@ -1,11 +1,5 @@
-# shell_command
-
-Runs a shell script string in the user's default shell and returns its output.
-
- **command**: Required shell script to execute (for example `ls -la` or `pytest tests`).
- **workdir**: Optional working directory to run the command in; prefer using this instead of `cd`.
- **timeout_ms**: Optional timeout in milliseconds (defaults to 120000ms / 2 minutes).
- **with_escalated_permissions / justification**: Accepted for compatibility with Codex; currently treated as hints only and do not bypass local sandboxing.
+Runs a shell command and returns its output.
+- Always set the `workdir` param when using the shell_command function. Do not use `cd` unless absolutely necessary.



--- a/src/tools/descriptions/UpdatePlan.md
+++ b/src/tools/descriptions/UpdatePlan.md
@@ -1,21 +1,4 @@
 Updates the task plan.
-
 Provide an optional explanation and a list of plan items, each with a step and status.
 At most one step can be in_progress at a time.

-Use a plan to break down complex or multi-step tasks into meaningful, logically ordered steps that are easy to verify as you go. Plans help demonstrate that you've understood the task and convey how you're approaching it.
-
-Do not use plans for simple or single-step tasks that you can just do immediately.
-
-Before running a command or making changes, consider whether you have completed the previous step, and make sure to mark it as completed before moving on to the next step.
-
-Sometimes you may need to change plans in the middle of a task: call update_plan with the updated plan and make sure to provide an explanation of the rationale when doing so.
-
-**Arguments**:
- **plan**: Required array of plan items. Each item must have:
-  - **step**: String description of the step
-  - **status**: One of "pending", "in_progress", or "completed"
- **explanation**: Optional explanation for the plan or changes
-
-**Returns**: Confirmation message that the plan was updated.
-
--- a/src/tools/impl/GrepFiles.ts
+++ b/src/tools/impl/GrepFiles.ts
@@ -8,7 +8,14 @@ interface GrepFilesArgs {
  limit?: number;
 }

-type GrepFilesResult = Awaited<ReturnType<typeof grep>>;
+interface GrepFilesResult {
+  output: string;
+  matches?: number;
+  files?: number;
+  truncated?: boolean;
+}
+
+const DEFAULT_LIMIT = 100;

 /**
 * Codex-style grep_files tool.
@@ -19,7 +26,7 @@ export async function grep_files(
 ): Promise<GrepFilesResult> {
  validateRequiredParams(args, ["pattern"], "grep_files");

-  const { pattern, include, path } = args;
+  const { pattern, include, path, limit = DEFAULT_LIMIT } = args;

  const grepArgs: GrepArgs = {
    pattern,
@@ -28,5 +35,34 @@ export async function grep_files(
    output_mode: "files_with_matches",
  };

-  return grep(grepArgs);
+  const result = await grep(grepArgs);
+
+  // The underlying grep result already has the correct files count
+  const totalFiles = result.files ?? 0;
+
+  // Apply limit to the file list
+  if (result.output && limit > 0 && totalFiles > limit) {
+    // The output format is: "Found N files\n/path/to/file1\n/path/to/file2..."
+    const lines = result.output
+      .split("\n")
+      .filter((line) => line.trim() !== "");
+
+    // First line is "Found N files", rest are file paths
+    const filePaths = lines.slice(1);
+
+    const truncatedFiles = filePaths.slice(0, limit);
+    const truncatedOutput = `Found ${limit} file${limit !== 1 ? "s" : ""} (truncated from ${totalFiles})\n${truncatedFiles.join("\n")}`;
+
+    return {
+      output: truncatedOutput,
+      files: limit,
+      truncated: true,
+    };
+  }
+
+  return {
+    output: result.output,
+    files: totalFiles,
+    truncated: false,
+  };
 }
--- a/src/tools/impl/ListDirCodex.ts
+++ b/src/tools/impl/ListDirCodex.ts
@@ -1,6 +1,10 @@
-import { ls } from "./LS.js";
+import { promises as fs } from "node:fs";
+import * as path from "node:path";
 import { validateRequiredParams } from "./validation.js";

+const MAX_ENTRY_LENGTH = 500;
+const INDENTATION_SPACES = 2;
+
 interface ListDirCodexArgs {
  dir_path: string;
  offset?: number;
@@ -8,19 +12,212 @@ interface ListDirCodexArgs {
  depth?: number;
 }

-type ListDirCodexResult = Awaited<ReturnType<typeof ls>>;
+interface ListDirCodexResult {
+  content: string;
+}
+
+interface DirEntry {
+  name: string; // Full relative path for sorting
+  displayName: string; // Just the filename for display
+  depth: number; // Indentation depth
+  kind: "directory" | "file" | "symlink" | "other";
+}

 /**
 * Codex-style list_dir tool.
- * Delegates to the existing LS implementation; offset/limit/depth are accepted but currently ignored.
+ * Lists entries with pagination (offset/limit) and depth control.
 */
 export async function list_dir(
  args: ListDirCodexArgs,
 ): Promise<ListDirCodexResult> {
  validateRequiredParams(args, ["dir_path"], "list_dir");

-  const { dir_path } = args;
+  const { dir_path, offset = 1, limit = 25, depth = 2 } = args;

-  // LS handles path resolution and formatting.
-  return ls({ path: dir_path, ignore: [] });
+  if (offset < 1) {
+    throw new Error("offset must be a 1-indexed entry number");
+  }
+
+  if (limit < 1) {
+    throw new Error("limit must be greater than zero");
+  }
+
+  if (depth < 1) {
+    throw new Error("depth must be greater than zero");
+  }
+
+  if (!path.isAbsolute(dir_path)) {
+    throw new Error("dir_path must be an absolute path");
+  }
+
+  const entries = await listDirSlice(dir_path, offset, limit, depth);
+  const output = [`Absolute path: ${dir_path}`, ...entries];
+
+  return { content: output.join("\n") };
+}
+
+/**
+ * List directory entries with pagination.
+ */
+async function listDirSlice(
+  dirPath: string,
+  offset: number,
+  limit: number,
+  maxDepth: number,
+): Promise<string[]> {
+  const entries: DirEntry[] = [];
+  await collectEntries(dirPath, "", maxDepth, entries);
+
+  if (entries.length === 0) {
+    return [];
+  }
+
+  const startIndex = offset - 1;
+  if (startIndex >= entries.length) {
+    throw new Error("offset exceeds directory entry count");
+  }
+
+  const remainingEntries = entries.length - startIndex;
+  const cappedLimit = Math.min(limit, remainingEntries);
+  const endIndex = startIndex + cappedLimit;
+
+  // Get the selected entries and sort by name
+  const selectedEntries = entries.slice(startIndex, endIndex);
+  selectedEntries.sort((a, b) => a.name.localeCompare(b.name));
+
+  const formatted: string[] = [];
+  for (const entry of selectedEntries) {
+    formatted.push(formatEntryLine(entry));
+  }
+
+  if (endIndex < entries.length) {
+    formatted.push(`More than ${cappedLimit} entries found`);
+  }
+
+  return formatted;
+}
+
+/**
+ * Recursively collect directory entries using BFS.
+ */
+async function collectEntries(
+  dirPath: string,
+  relativePrefix: string,
+  remainingDepth: number,
+  entries: DirEntry[],
+): Promise<void> {
+  const queue: Array<{ absPath: string; prefix: string; depth: number }> = [
+    { absPath: dirPath, prefix: relativePrefix, depth: remainingDepth },
+  ];
+
+  while (queue.length > 0) {
+    const current = queue.shift();
+    if (!current) break;
+    const { absPath, prefix, depth } = current;
+
+    const dirEntries: Array<{
+      absPath: string;
+      relativePath: string;
+      kind: DirEntry["kind"];
+      entry: DirEntry;
+    }> = [];
+
+    try {
+      const items = await fs.readdir(absPath, { withFileTypes: true });
+
+      for (const item of items) {
+        const itemAbsPath = path.join(absPath, item.name);
+        const relativePath = prefix ? path.join(prefix, item.name) : item.name;
+        const displayName = formatEntryComponent(item.name);
+        const displayDepth = prefix ? prefix.split(path.sep).length : 0;
+        const sortKey = formatEntryName(relativePath);
+
+        let kind: DirEntry["kind"];
+        if (item.isSymbolicLink()) {
+          kind = "symlink";
+        } else if (item.isDirectory()) {
+          kind = "directory";
+        } else if (item.isFile()) {
+          kind = "file";
+        } else {
+          kind = "other";
+        }
+
+        dirEntries.push({
+          absPath: itemAbsPath,
+          relativePath,
+          kind,
+          entry: {
+            name: sortKey,
+            displayName,
+            depth: displayDepth,
+            kind,
+          },
+        });
+      }
+    } catch (err) {
+      throw new Error(`failed to read directory: ${err}`);
+    }
+
+    // Sort entries alphabetically
+    dirEntries.sort((a, b) => a.entry.name.localeCompare(b.entry.name));
+
+    for (const item of dirEntries) {
+      // Queue subdirectories for traversal if depth allows
+      if (item.kind === "directory" && depth > 1) {
+        queue.push({
+          absPath: item.absPath,
+          prefix: item.relativePath,
+          depth: depth - 1,
+        });
+      }
+      entries.push(item.entry);
+    }
+  }
+}
+
+/**
+ * Format entry name for sorting (normalize path separators).
+ */
+function formatEntryName(filePath: string): string {
+  const normalized = filePath.replace(/\\/g, "/");
+  if (normalized.length > MAX_ENTRY_LENGTH) {
+    return normalized.substring(0, MAX_ENTRY_LENGTH);
+  }
+  return normalized;
+}
+
+/**
+ * Format a single path component.
+ */
+function formatEntryComponent(name: string): string {
+  if (name.length > MAX_ENTRY_LENGTH) {
+    return name.substring(0, MAX_ENTRY_LENGTH);
+  }
+  return name;
+}
+
+/**
+ * Format a directory entry for display.
+ */
+function formatEntryLine(entry: DirEntry): string {
+  const indent = " ".repeat(entry.depth * INDENTATION_SPACES);
+  let name = entry.displayName;
+
+  switch (entry.kind) {
+    case "directory":
+      name += "/";
+      break;
+    case "symlink":
+      name += "@";
+      break;
+    case "other":
+      name += "?";
+      break;
+    default:
+      // "file" type has no suffix
+      break;
+  }
+
+  return `${indent}${name}`;
 }
--- a/src/tools/impl/ReadFileCodex.ts
+++ b/src/tools/impl/ReadFileCodex.ts
@@ -1,6 +1,10 @@
-import { read } from "./Read.js";
+import { promises as fs } from "node:fs";
 import { validateRequiredParams } from "./validation.js";

+const MAX_LINE_LENGTH = 500;
+const TAB_WIDTH = 4;
+const COMMENT_PREFIXES = ["#", "//", "--"];
+
 interface IndentationOptions {
  anchor_line?: number;
  max_levels?: number;
@@ -21,22 +25,275 @@ interface ReadFileCodexResult {
  content: string;
 }

+interface LineRecord {
+  number: number;
+  raw: string;
+  display: string;
+  indent: number;
+}
+
 /**
 * Codex-style read_file tool.
- * Currently supports slice-style reading; indentation mode is ignored but accepted.
+ * Supports both slice mode (simple range) and indentation mode (context-aware block reading).
 */
 export async function read_file(
  args: ReadFileCodexArgs,
 ): Promise<ReadFileCodexResult> {
  validateRequiredParams(args, ["file_path"], "read_file");

-  const { file_path, offset, limit } = args;
-
-  const result = await read({
+  const {
    file_path,
-    offset,
-    limit,
-  });
+    offset = 1,
+    limit = 2000,
+    mode = "slice",
+    indentation,
+  } = args;

-  return { content: result.content };
+  if (offset < 1) {
+    throw new Error("offset must be a 1-indexed line number");
+  }
+
+  if (limit < 1) {
+    throw new Error("limit must be greater than zero");
+  }
+
+  let lines: string[];
+
+  if (mode === "indentation") {
+    lines = await readIndentationMode(
+      file_path,
+      offset,
+      limit,
+      indentation ?? {},
+    );
+  } else {
+    lines = await readSliceMode(file_path, offset, limit);
+  }
+
+  return { content: lines.join("\n") };
+}
+
+/**
+ * Simple slice mode: read lines from offset to offset + limit.
+ */
+async function readSliceMode(
+  filePath: string,
+  offset: number,
+  limit: number,
+): Promise<string[]> {
+  const content = await fs.readFile(filePath, "utf8");
+  const allLines = content.split(/\r?\n/);
+
+  const collected: string[] = [];
+  for (
+    let i = offset - 1;
+    i < allLines.length && collected.length < limit;
+    i++
+  ) {
+    const line = allLines[i];
+    if (line === undefined) break;
+    const formatted = formatLine(line);
+    collected.push(`L${i + 1}: ${formatted}`);
+  }
+
+  if (offset > allLines.length) {
+    throw new Error("offset exceeds file length");
+  }
+
+  return collected;
+}
+
+/**
+ * Indentation mode: expand around an anchor line based on indentation levels.
+ */
+async function readIndentationMode(
+  filePath: string,
+  offset: number,
+  limit: number,
+  options: IndentationOptions,
+): Promise<string[]> {
+  const anchorLine = options.anchor_line ?? offset;
+  const maxLevels = options.max_levels ?? 0;
+  const includeSiblings = options.include_siblings ?? false;
+  const includeHeader = options.include_header ?? true;
+  const maxLines = options.max_lines ?? limit;
+
+  if (anchorLine < 1) {
+    throw new Error("anchor_line must be a 1-indexed line number");
+  }
+
+  if (maxLines < 1) {
+    throw new Error("max_lines must be greater than zero");
+  }
+
+  // Read and parse all lines
+  const content = await fs.readFile(filePath, "utf8");
+  const rawLines = content.split(/\r?\n/);
+
+  if (rawLines.length === 0 || anchorLine > rawLines.length) {
+    throw new Error("anchor_line exceeds file length");
+  }
+
+  // Build line records
+  const records: LineRecord[] = rawLines.map((raw, idx) => ({
+    number: idx + 1,
+    raw,
+    display: formatLine(raw),
+    indent: measureIndent(raw),
+  }));
+
+  // Compute effective indents (blank lines inherit previous indent)
+  const effectiveIndents = computeEffectiveIndents(records);
+
+  const anchorIndex = anchorLine - 1;
+  const anchorRecord = records[anchorIndex];
+  const anchorIndent = effectiveIndents[anchorIndex] ?? 0;
+
+  if (!anchorRecord) {
+    throw new Error("anchor_line exceeds file length");
+  }
+
+  // Calculate minimum indent to include
+  const minIndent =
+    maxLevels === 0 ? 0 : Math.max(0, anchorIndent - maxLevels * TAB_WIDTH);
+
+  // Cap by limits
+  const finalLimit = Math.min(limit, maxLines, records.length);
+
+  if (finalLimit === 1) {
+    return [`L${anchorRecord.number}: ${anchorRecord.display}`];
+  }
+
+  // Expand from anchor line
+  const out: LineRecord[] = [anchorRecord];
+  let i = anchorIndex - 1; // up cursor
+  let j = anchorIndex + 1; // down cursor
+  let iCounterMinIndent = 0;
+  let jCounterMinIndent = 0;
+
+  while (out.length < finalLimit) {
+    let progressed = 0;
+
+    // Expand up
+    if (i >= 0) {
+      const iIndent = effectiveIndents[i];
+      const iRecord = records[i];
+      if (iIndent !== undefined && iRecord && iIndent >= minIndent) {
+        out.unshift(iRecord);
+        progressed++;
+
+        // Handle sibling exclusion
+        if (iIndent === minIndent && !includeSiblings) {
+          const allowHeaderComment = includeHeader && isComment(iRecord);
+          const canTakeLine = allowHeaderComment || iCounterMinIndent === 0;
+
+          if (canTakeLine) {
+            iCounterMinIndent++;
+          } else {
+            // Remove the line we just added
+            out.shift();
+            progressed--;
+            i = -1; // Stop moving up
+          }
+        }
+
+        i--;
+
+        if (out.length >= finalLimit) break;
+      } else {
+        i = -1; // Stop moving up
+      }
+    }
+
+    // Expand down
+    if (j < records.length) {
+      const jIndent = effectiveIndents[j];
+      const jRecord = records[j];
+      if (jIndent !== undefined && jRecord && jIndent >= minIndent) {
+        out.push(jRecord);
+        progressed++;
+
+        // Handle sibling exclusion
+        if (jIndent === minIndent && !includeSiblings) {
+          if (jCounterMinIndent > 0) {
+            // Remove the line we just added
+            out.pop();
+            progressed--;
+            j = records.length; // Stop moving down
+          }
+          jCounterMinIndent++;
+        }
+
+        j++;
+      } else {
+        j = records.length; // Stop moving down
+      }
+    }
+
+    if (progressed === 0) break;
+  }
+
+  // Trim empty lines at start and end
+  while (out.length > 0 && out[0]?.raw.trim() === "") {
+    out.shift();
+  }
+  while (out.length > 0 && out[out.length - 1]?.raw.trim() === "") {
+    out.pop();
+  }
+
+  return out.map((record) => `L${record.number}: ${record.display}`);
+}
+
+/**
+ * Compute effective indents - blank lines inherit previous line's indent.
+ */
+function computeEffectiveIndents(records: LineRecord[]): number[] {
+  const effective: number[] = [];
+  let previousIndent = 0;
+
+  for (const record of records) {
+    if (record.raw.trim() === "") {
+      effective.push(previousIndent);
+    } else {
+      previousIndent = record.indent;
+      effective.push(previousIndent);
+    }
+  }
+
+  return effective;
+}
+
+/**
+ * Measure indentation of a line (tabs = TAB_WIDTH spaces).
+ */
+function measureIndent(line: string): number {
+  let indent = 0;
+  for (const char of line) {
+    if (char === " ") {
+      indent++;
+    } else if (char === "\t") {
+      indent += TAB_WIDTH;
+    } else {
+      break;
+    }
+  }
+  return indent;
+}
+
+/**
+ * Check if a line is a comment.
+ */
+function isComment(record: LineRecord): boolean {
+  const trimmed = record.raw.trim();
+  return COMMENT_PREFIXES.some((prefix) => trimmed.startsWith(prefix));
+}
+
+/**
+ * Format a line for display (truncate if too long).
+ */
+function formatLine(line: string): string {
+  if (line.length > MAX_LINE_LENGTH) {
+    return line.substring(0, MAX_LINE_LENGTH);
+  }
+  return line;
 }
--- a/src/tools/impl/Shell.ts
+++ b/src/tools/impl/Shell.ts
@@ -1,4 +1,5 @@
-import { bash } from "./Bash.js";
+import { spawn } from "node:child_process";
+import * as path from "node:path";
 import { validateRequiredParams } from "./validation.js";

 interface ShellArgs {
@@ -15,58 +16,92 @@ interface ShellResult {
  stderr: string[];
 }

+const DEFAULT_TIMEOUT = 120000;
+
 /**
 * Codex-style shell tool.
- * Runs an array of shell arguments, typically ["bash", "-lc", "..."].
+ * Runs an array of shell arguments using execvp-style semantics.
+ * Typically called with ["bash", "-lc", "..."] for shell commands.
 */
 export async function shell(args: ShellArgs): Promise<ShellResult> {
  validateRequiredParams(args, ["command"], "shell");

-  const { command, workdir, timeout_ms, justification: description } = args;
+  const { command, workdir, timeout_ms } = args;
  if (!Array.isArray(command) || command.length === 0) {
    throw new Error("command must be a non-empty array of strings");
  }

-  const commandString = command.join(" ");
-
-  const previousUserCwd = process.env.USER_CWD;
-  if (workdir) {
-    process.env.USER_CWD = workdir;
+  const [executable, ...execArgs] = command;
+  if (!executable) {
+    throw new Error("command must be a non-empty array of strings");
  }
+  const timeout = timeout_ms ?? DEFAULT_TIMEOUT;

-  try {
-    const result = await bash({
-      command: commandString,
-      timeout: timeout_ms ?? 120000,
-      description,
-      run_in_background: false,
+  // Determine working directory
+  const cwd = workdir
+    ? path.isAbsolute(workdir)
+      ? workdir
+      : path.resolve(process.env.USER_CWD || process.cwd(), workdir)
+    : process.env.USER_CWD || process.cwd();
+
+  return new Promise((resolve, reject) => {
+    const stdoutChunks: Buffer[] = [];
+    const stderrChunks: Buffer[] = [];
+
+    const child = spawn(executable, execArgs, {
+      cwd,
+      env: process.env,
+      stdio: ["ignore", "pipe", "pipe"],
    });

-    const text = (result.content ?? [])
-      .map((item) =>
-        "text" in item && typeof item.text === "string" ? item.text : "",
-      )
-      .filter(Boolean)
-      .join("\n");
+    const timeoutId = setTimeout(() => {
+      child.kill("SIGKILL");
+      reject(new Error(`Command timed out after ${timeout}ms`));
+    }, timeout);

-    const stdout = text ? text.split("\n") : [];
-    const stderr =
-      result.status === "error"
-        ? ["Command reported an error. See output for details."]
-        : [];
+    child.stdout.on("data", (chunk: Buffer) => {
+      stdoutChunks.push(chunk);
+    });

-    return {
-      output: text,
-      stdout,
-      stderr,
-    };
-  } finally {
-    if (workdir) {
-      if (previousUserCwd === undefined) {
-        delete process.env.USER_CWD;
+    child.stderr.on("data", (chunk: Buffer) => {
+      stderrChunks.push(chunk);
+    });
+
+    child.on("error", (err: Error) => {
+      clearTimeout(timeoutId);
+      reject(new Error(`Failed to execute command: ${err.message}`));
+    });
+
+    child.on("close", (code: number | null) => {
+      clearTimeout(timeoutId);
+
+      const stdoutText = Buffer.concat(stdoutChunks).toString("utf8");
+      const stderrText = Buffer.concat(stderrChunks).toString("utf8");
+
+      const stdoutLines = stdoutText
+        .split("\n")
+        .filter((line) => line.length > 0);
+      const stderrLines = stderrText
+        .split("\n")
+        .filter((line) => line.length > 0);
+
+      // Combine stdout and stderr for output
+      const output = [stdoutText, stderrText].filter(Boolean).join("\n").trim();
+
+      if (code !== 0 && code !== null) {
+        // Command failed but we still return the output
+        resolve({
+          output: output || `Command exited with code ${code}`,
+          stdout: stdoutLines,
+          stderr: stderrLines,
+        });
      } else {
-        process.env.USER_CWD = previousUserCwd;
+        resolve({
+          output,
+          stdout: stdoutLines,
+          stderr: stderrLines,
+        });
      }
-    }
-  }
+    });
+  });
 }
--- a/src/tools/schemas/ApplyPatch.json
+++ b/src/tools/schemas/ApplyPatch.json
@@ -4,7 +4,7 @@
  "properties": {
    "input": {
      "type": "string",
-      "description": "Patch content in the ApplyPatch tool format, starting with '*** Begin Patch' and ending with '*** End Patch'."
+      "description": "The entire contents of the apply_patch command"
    }
  },
  "required": ["input"],
--- a/src/tools/schemas/ReadFileCodex.json
+++ b/src/tools/schemas/ReadFileCodex.json
@@ -4,7 +4,7 @@
  "properties": {
    "file_path": {
      "type": "string",
-      "description": "Absolute path to the file."
+      "description": "Absolute path to the file"
    },
    "offset": {
      "type": "number",
--- a/src/tools/schemas/Shell.json
+++ b/src/tools/schemas/Shell.json
@@ -7,19 +7,19 @@
      "items": {
        "type": "string"
      },
-      "description": "The command to execute as an array of shell arguments."
+      "description": "The command to execute"
    },
    "workdir": {
      "type": "string",
-      "description": "The working directory to execute the command in."
+      "description": "The working directory to execute the command in"
    },
    "timeout_ms": {
      "type": "number",
-      "description": "The timeout for the command in milliseconds."
+      "description": "The timeout for the command in milliseconds"
    },
    "with_escalated_permissions": {
      "type": "boolean",
-      "description": "Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions."
+      "description": "Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions"
    },
    "justification": {
      "type": "string",
--- a/src/tools/schemas/ShellCommand.json
+++ b/src/tools/schemas/ShellCommand.json
@@ -4,19 +4,19 @@
  "properties": {
    "command": {
      "type": "string",
-      "description": "The shell script to execute in the user's default shell."
+      "description": "The shell script to execute in the user's default shell"
    },
    "workdir": {
      "type": "string",
-      "description": "The working directory to execute the command in."
+      "description": "The working directory to execute the command in"
    },
    "timeout_ms": {
      "type": "number",
-      "description": "The timeout for the command in milliseconds."
+      "description": "The timeout for the command in milliseconds"
    },
    "with_escalated_permissions": {
      "type": "boolean",
-      "description": "Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions."
+      "description": "Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions"
    },
    "justification": {
      "type": "string",
--- a/src/tools/schemas/UpdatePlan.json
+++ b/src/tools/schemas/UpdatePlan.json
@@ -3,8 +3,7 @@
  "type": "object",
  "properties": {
    "explanation": {
-      "type": "string",
-      "description": "Optional explanation of the plan or changes being made"
+      "type": "string"
    },
    "plan": {
      "type": "array",
@@ -13,8 +12,7 @@
        "type": "object",
        "properties": {
          "step": {
-            "type": "string",
-            "description": "Description of the step"
+            "type": "string"
          },
          "status": {
            "type": "string",
--- a/src/tools/toolset.ts
+++ b/src/tools/toolset.ts
@@ -19,6 +19,9 @@ const ANTHROPIC_TOOLS = ANTHROPIC_DEFAULT_TOOLS;
 const CODEX_TOOLS = OPENAI_DEFAULT_TOOLS;
 const GEMINI_TOOLS = GEMINI_DEFAULT_TOOLS;

+// Server-side/base tools that should stay attached regardless of Letta toolset
+export const BASE_TOOL_NAMES = ["memory", "web_search"];
+
 /**
 * Gets the list of Letta Code tools currently attached to an agent.
 * Returns the tool names that are both attached to the agent AND in our tool definitions.