From 36c571f38fcc0bcf77e3d395e641cb5963062b70 Mon Sep 17 00:00:00 2001 From: Charles Packer Date: Wed, 26 Nov 2025 19:12:31 -0800 Subject: [PATCH] fix: align the schemas, params, and descriptions (#128) --- .husky/pre-commit | 4 +- src/tests/fileSearch.test.ts | 78 ++++--- src/tests/grep-files-codex.test.ts | 170 +++++++++++++++ src/tests/headless-scenario.ts | 6 +- src/tests/list-dir-codex.test.ts | 156 ++++++++++++++ src/tests/read-file-indentation.test.ts | 155 +++++++++++++ src/tests/shell-codex.test.ts | 176 +++++++++++++++ src/tools/descriptions/ApplyPatch.md | 73 ++++++- src/tools/descriptions/GrepFiles.md | 9 +- src/tools/descriptions/ListDirCodex.md | 7 +- src/tools/descriptions/ReadFileCodex.md | 9 +- src/tools/descriptions/Shell.md | 11 +- src/tools/descriptions/ShellCommand.md | 10 +- src/tools/descriptions/UpdatePlan.md | 17 -- src/tools/impl/GrepFiles.ts | 42 +++- src/tools/impl/ListDirCodex.ts | 209 +++++++++++++++++- src/tools/impl/ReadFileCodex.ts | 275 +++++++++++++++++++++++- src/tools/impl/Shell.ts | 109 ++++++---- src/tools/schemas/ApplyPatch.json | 2 +- src/tools/schemas/ReadFileCodex.json | 2 +- src/tools/schemas/Shell.json | 8 +- src/tools/schemas/ShellCommand.json | 8 +- src/tools/schemas/UpdatePlan.json | 6 +- src/tools/toolset.ts | 3 + 24 files changed, 1374 insertions(+), 171 deletions(-) create mode 100644 src/tests/grep-files-codex.test.ts create mode 100644 src/tests/list-dir-codex.test.ts create mode 100644 src/tests/read-file-indentation.test.ts create mode 100644 src/tests/shell-codex.test.ts diff --git a/.husky/pre-commit b/.husky/pre-commit index 32177c4..5bbf70e 100755 --- a/.husky/pre-commit +++ b/.husky/pre-commit @@ -1,3 +1,3 @@ #!/usr/bin/env sh -bun lint-staged -bun run typecheck +# Run the same checks as CI to ensure parity +bun run check diff --git a/src/tests/fileSearch.test.ts b/src/tests/fileSearch.test.ts index 7876bf3..f062aa0 100644 --- a/src/tests/fileSearch.test.ts +++ b/src/tests/fileSearch.test.ts @@ -3,6 +3,7 @@ import { mkdirSync, rmSync, writeFileSync } from "node:fs"; import { join } from "node:path"; import { searchFiles } from "../cli/helpers/fileSearch"; +const isWindows = process.platform === "win32"; const TEST_DIR = join(process.cwd(), ".test-filesearch"); beforeEach(() => { @@ -145,49 +146,58 @@ test("searchFiles handles relative path queries", async () => { expect(results.some((r) => r.path.includes("App.tsx"))).toBe(true); }); -test("searchFiles supports partial path matching (deep)", async () => { - const originalCwd = process.cwd(); - process.chdir(TEST_DIR); +test.skipIf(isWindows)( + "searchFiles supports partial path matching (deep)", + async () => { + const originalCwd = process.cwd(); + process.chdir(TEST_DIR); - // Search for "components/Button" should match "src/components/Button.tsx" - const results = await searchFiles("components/Button", true); + // Search for "components/Button" should match "src/components/Button.tsx" + const results = await searchFiles("components/Button", true); - process.chdir(originalCwd); + process.chdir(originalCwd); - expect(results.length).toBeGreaterThanOrEqual(1); - expect(results.some((r) => r.path.includes("components/Button.tsx"))).toBe( - true, - ); -}); + expect(results.length).toBeGreaterThanOrEqual(1); + expect(results.some((r) => r.path.includes("components/Button.tsx"))).toBe( + true, + ); + }, +); -test("searchFiles supports partial directory path matching (deep)", async () => { - const originalCwd = process.cwd(); - process.chdir(TEST_DIR); +test.skipIf(isWindows)( + "searchFiles supports partial directory path matching (deep)", + async () => { + const originalCwd = process.cwd(); + process.chdir(TEST_DIR); - // Search for "src/components" should match the directory - const results = await searchFiles("src/components", true); + // Search for "src/components" should match the directory + const results = await searchFiles("src/components", true); - process.chdir(originalCwd); + process.chdir(originalCwd); - expect(results.length).toBeGreaterThanOrEqual(1); - expect( - results.some((r) => r.path === "src/components" && r.type === "dir"), - ).toBe(true); -}); + expect(results.length).toBeGreaterThanOrEqual(1); + expect( + results.some((r) => r.path === "src/components" && r.type === "dir"), + ).toBe(true); + }, +); -test("searchFiles partial path matching works with subdirectories", async () => { - const originalCwd = process.cwd(); - process.chdir(TEST_DIR); +test.skipIf(isWindows)( + "searchFiles partial path matching works with subdirectories", + async () => { + const originalCwd = process.cwd(); + process.chdir(TEST_DIR); - // Create nested directory - mkdirSync(join(TEST_DIR, "ab/cd/ef"), { recursive: true }); - writeFileSync(join(TEST_DIR, "ab/cd/ef/test.txt"), "test"); + // Create nested directory + mkdirSync(join(TEST_DIR, "ab/cd/ef"), { recursive: true }); + writeFileSync(join(TEST_DIR, "ab/cd/ef/test.txt"), "test"); - // Search for "cd/ef" should match "ab/cd/ef" - const results = await searchFiles("cd/ef", true); + // Search for "cd/ef" should match "ab/cd/ef" + const results = await searchFiles("cd/ef", true); - process.chdir(originalCwd); + process.chdir(originalCwd); - expect(results.length).toBeGreaterThanOrEqual(1); - expect(results.some((r) => r.path.includes("cd/ef"))).toBe(true); -}); + expect(results.length).toBeGreaterThanOrEqual(1); + expect(results.some((r) => r.path.includes("cd/ef"))).toBe(true); + }, +); diff --git a/src/tests/grep-files-codex.test.ts b/src/tests/grep-files-codex.test.ts new file mode 100644 index 0000000..87dec24 --- /dev/null +++ b/src/tests/grep-files-codex.test.ts @@ -0,0 +1,170 @@ +import { describe, expect, test } from "bun:test"; +import { promises as fs } from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { grep_files } from "../tools/impl/GrepFiles.js"; + +describe("grep_files codex tool", () => { + async function createTempDirWithFiles( + files: Record, + ): Promise { + // Create a fresh temp directory for each test + const dir = await fs.mkdtemp(path.join(os.tmpdir(), "grep-files-test-")); + + for (const [relativePath, content] of Object.entries(files)) { + const fullPath = path.join(dir, relativePath); + const parentDir = path.dirname(fullPath); + + await fs.mkdir(parentDir, { recursive: true }); + await fs.writeFile(fullPath, content); + } + + return dir; + } + + test("finds files matching pattern", async () => { + const dir = await createTempDirWithFiles({ + "file1.txt": "hello world", + "file2.txt": "goodbye world", + "file3.txt": "no match here", + }); + + const result = await grep_files({ pattern: "world", path: dir }); + + expect(result.output).toContain("file1.txt"); + expect(result.output).toContain("file2.txt"); + expect(result.output).not.toContain("file3.txt"); + expect(result.files).toBe(2); + expect(result.truncated).toBe(false); + }); + + test("respects include glob pattern", async () => { + const dir = await createTempDirWithFiles({ + "code.ts": "function hello() {}", + "code.js": "function hello() {}", + "readme.md": "hello documentation", + }); + + const result = await grep_files({ + pattern: "hello", + path: dir, + include: "*.ts", + }); + + expect(result.output).toContain("code.ts"); + expect(result.output).not.toContain("code.js"); + expect(result.output).not.toContain("readme.md"); + expect(result.files).toBe(1); + }); + + test("respects limit parameter", async () => { + const dir = await createTempDirWithFiles({ + "file01.txt": "match", + "file02.txt": "match", + "file03.txt": "match", + "file04.txt": "match", + "file05.txt": "match", + }); + + const result = await grep_files({ + pattern: "match", + path: dir, + limit: 3, + }); + + expect(result.files).toBe(3); + expect(result.truncated).toBe(true); + + // Count files in output (header line + file paths) + const lines = result.output.split("\n").filter((l) => l.trim() !== ""); + // Header line "Found 3 files (truncated from 5)" + 3 file paths = 4 lines + expect(lines.length).toBe(4); + }); + + test("returns truncated: false when under limit", async () => { + const dir = await createTempDirWithFiles({ + "file1.txt": "match", + "file2.txt": "match", + }); + + const result = await grep_files({ + pattern: "match", + path: dir, + limit: 10, + }); + + expect(result.files).toBe(2); + expect(result.truncated).toBe(false); + }); + + test("handles no matches gracefully", async () => { + const dir = await createTempDirWithFiles({ + "file1.txt": "hello", + "file2.txt": "world", + }); + + const result = await grep_files({ + pattern: "nonexistent_unique_pattern_xyz", + path: dir, + }); + + // When no matches, output may be empty or undefined + const hasNoFiles = + !result.files || + result.files === 0 || + result.output === "" || + !result.output; + expect(hasNoFiles).toBe(true); + }); + + test("searches recursively by default", async () => { + const dir = await createTempDirWithFiles({ + "root.txt": "findme", + "subdir/nested.txt": "findme", + "subdir/deep/deeper.txt": "findme", + }); + + const result = await grep_files({ + pattern: "findme", + path: dir, + }); + + expect(result.output).toContain("root.txt"); + expect(result.output).toContain("nested.txt"); + expect(result.output).toContain("deeper.txt"); + expect(result.files).toBe(3); + }); + + test("supports regex patterns", async () => { + const dir = await createTempDirWithFiles({ + "file1.txt": "error: something failed", + "file2.txt": "Error: another failure", + "file3.txt": "no errors here", + }); + + // Case-insensitive pattern via regex + const result = await grep_files({ + pattern: "[Ee]rror:", + path: dir, + }); + + expect(result.output).toContain("file1.txt"); + expect(result.output).toContain("file2.txt"); + expect(result.output).not.toContain("file3.txt"); + }); + + test("handles empty pattern gracefully", async () => { + const dir = await createTempDirWithFiles({ + "file1.txt": "content", + }); + + // Empty pattern might not throw, but should handle gracefully + const result = await grep_files({ + pattern: "", + path: dir, + }); + + // Just verify it doesn't crash - behavior may vary + expect(result).toBeDefined(); + }); +}); diff --git a/src/tests/headless-scenario.ts b/src/tests/headless-scenario.ts index 1810c69..220e698 100644 --- a/src/tests/headless-scenario.ts +++ b/src/tests/headless-scenario.ts @@ -52,9 +52,9 @@ function scenarioPrompt(): string { "I want to test your tool calling abilities (do not ask for any clarifications, this is an automated test suite inside a CI runner, there is no human to assist you). " + "First, call a single web_search to get the weather in SF. " + "Then, try calling two web_searches in parallel. " + - "Then, try calling the bash tool to output an echo. " + - "Then, try calling three copies of the bash tool in parallel to do 3 parallel echos: echo 'Test1', echo 'Test2', echo 'Test3'. " + - "Then finally, try calling 2 bash tools and 1 web_search, in parallel, so three parallel tools. " + + "Then, try running a shell command to output an echo (use whatever shell/bash tool is available). " + + "Then, try running three shell commands in parallel to do 3 parallel echos: echo 'Test1', echo 'Test2', echo 'Test3'. " + + "Then finally, try running 2 shell commands and 1 web_search, in parallel, so three parallel tools. " + "IMPORTANT: If and only if all of the above steps worked as requested, include the word BANANA (uppercase) somewhere in your final response." ); } diff --git a/src/tests/list-dir-codex.test.ts b/src/tests/list-dir-codex.test.ts new file mode 100644 index 0000000..8a5a59b --- /dev/null +++ b/src/tests/list-dir-codex.test.ts @@ -0,0 +1,156 @@ +import { describe, expect, test } from "bun:test"; +import { promises as fs } from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { list_dir } from "../tools/impl/ListDirCodex.js"; + +describe("list_dir codex tool", () => { + let tempDir: string; + + async function setupTempDir(): Promise { + if (!tempDir) { + tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "list-dir-test-")); + } + return tempDir; + } + + async function createStructure( + structure: Record, + ): Promise { + const dir = await setupTempDir(); + + for (const [relativePath, content] of Object.entries(structure)) { + const fullPath = path.join(dir, relativePath); + const parentDir = path.dirname(fullPath); + + await fs.mkdir(parentDir, { recursive: true }); + + if (content !== null) { + // It's a file + await fs.writeFile(fullPath, content); + } + // If content is null, it's just a directory (already created by mkdir) + } + + return dir; + } + + test("lists directory with default pagination", async () => { + const dir = await createStructure({ + "file1.txt": "content1", + "file2.txt": "content2", + "subdir/file3.txt": "content3", + }); + + const result = await list_dir({ dir_path: dir }); + + expect(result.content).toContain(`Absolute path: ${dir}`); + expect(result.content).toContain("file1.txt"); + expect(result.content).toContain("file2.txt"); + expect(result.content).toContain("subdir/"); + }); + + test("respects offset parameter (1-indexed)", async () => { + const dir = await createStructure({ + "aaa.txt": "a", + "bbb.txt": "b", + "ccc.txt": "c", + "ddd.txt": "d", + }); + + // Skip first 2 entries + const result = await list_dir({ dir_path: dir, offset: 3, limit: 10 }); + + // Should not contain first two entries (when sorted alphabetically) + const lines = result.content.split("\n"); + // First line is "Absolute path: ..." + expect(lines[0]).toContain("Absolute path:"); + // Remaining lines should be limited entries + expect(lines.length).toBeGreaterThan(1); + }); + + test("respects limit parameter", async () => { + const dir = await createStructure({ + "file1.txt": "1", + "file2.txt": "2", + "file3.txt": "3", + "file4.txt": "4", + "file5.txt": "5", + }); + + const result = await list_dir({ dir_path: dir, limit: 2 }); + + // Should have "More than 2 entries found" message + expect(result.content).toContain("More than 2 entries found"); + }); + + test("respects depth parameter", async () => { + const dir = await createStructure({ + "level1/level2/level3/deep.txt": "deep", + "level1/shallow.txt": "shallow", + "root.txt": "root", + }); + + // Depth 1 should only show immediate children + const result1 = await list_dir({ dir_path: dir, depth: 1, limit: 100 }); + expect(result1.content).toContain("level1/"); + expect(result1.content).toContain("root.txt"); + expect(result1.content).not.toContain("level2"); + expect(result1.content).not.toContain("shallow.txt"); + + // Depth 2 should show one level deeper + const result2 = await list_dir({ dir_path: dir, depth: 2, limit: 100 }); + expect(result2.content).toContain("level1/"); + expect(result2.content).toContain("shallow.txt"); + expect(result2.content).toContain("level2/"); + expect(result2.content).not.toContain("level3"); + }); + + test("shows directories with trailing slash", async () => { + const dir = await createStructure({ + "mydir/file.txt": "content", + }); + + const result = await list_dir({ dir_path: dir }); + + expect(result.content).toContain("mydir/"); + }); + + test("throws error for non-absolute path", async () => { + await expect(list_dir({ dir_path: "relative/path" })).rejects.toThrow( + "dir_path must be an absolute path", + ); + }); + + test("throws error for offset < 1", async () => { + const dir = await setupTempDir(); + await expect(list_dir({ dir_path: dir, offset: 0 })).rejects.toThrow( + "offset must be a 1-indexed entry number", + ); + }); + + test("throws error for limit < 1", async () => { + const dir = await setupTempDir(); + await expect(list_dir({ dir_path: dir, limit: 0 })).rejects.toThrow( + "limit must be greater than zero", + ); + }); + + test("throws error for depth < 1", async () => { + const dir = await setupTempDir(); + await expect(list_dir({ dir_path: dir, depth: 0 })).rejects.toThrow( + "depth must be greater than zero", + ); + }); + + test("handles empty directory", async () => { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), "empty-dir-test-")); + + const result = await list_dir({ dir_path: dir }); + + expect(result.content).toContain(`Absolute path: ${dir}`); + // Should only have the header line + const lines = result.content.split("\n").filter((l) => l.trim() !== ""); + expect(lines.length).toBe(1); + }); +}); diff --git a/src/tests/read-file-indentation.test.ts b/src/tests/read-file-indentation.test.ts new file mode 100644 index 0000000..37e2e17 --- /dev/null +++ b/src/tests/read-file-indentation.test.ts @@ -0,0 +1,155 @@ +import { describe, expect, test } from "bun:test"; +import { promises as fs } from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { read_file } from "../tools/impl/ReadFileCodex.js"; + +describe("read_file indentation mode", () => { + let tempDir: string; + + async function createTempFile(content: string): Promise { + if (!tempDir) { + tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "read-file-test-")); + } + const filePath = path.join(tempDir, `test-${Date.now()}.txt`); + await fs.writeFile(filePath, content); + return filePath; + } + + test("slice mode reads requested range", async () => { + const filePath = await createTempFile("alpha\nbeta\ngamma\n"); + const result = await read_file({ + file_path: filePath, + offset: 2, + limit: 2, + }); + expect(result.content).toBe("L2: beta\nL3: gamma"); + }); + + test("indentation mode captures block", async () => { + const content = `fn outer() { + if cond { + inner(); + } + tail(); +} +`; + const filePath = await createTempFile(content); + const result = await read_file({ + file_path: filePath, + offset: 3, + limit: 10, + mode: "indentation", + indentation: { + anchor_line: 3, + include_siblings: false, + max_levels: 1, + }, + }); + + expect(result.content).toBe( + "L2: if cond {\nL3: inner();\nL4: }", + ); + }); + + test("indentation mode expands parents", async () => { + const content = `mod root { + fn outer() { + if cond { + inner(); + } + } +} +`; + const filePath = await createTempFile(content); + + // max_levels: 2 should capture fn outer and its contents + const result = await read_file({ + file_path: filePath, + offset: 4, + limit: 50, + mode: "indentation", + indentation: { + anchor_line: 4, + max_levels: 2, + }, + }); + + expect(result.content).toBe( + "L2: fn outer() {\nL3: if cond {\nL4: inner();\nL5: }\nL6: }", + ); + }); + + test("indentation mode respects sibling flag", async () => { + const content = `fn wrapper() { + if first { + do_first(); + } + if second { + do_second(); + } +} +`; + const filePath = await createTempFile(content); + + // Without siblings + const result1 = await read_file({ + file_path: filePath, + offset: 3, + limit: 50, + mode: "indentation", + indentation: { + anchor_line: 3, + include_siblings: false, + max_levels: 1, + }, + }); + + expect(result1.content).toBe( + "L2: if first {\nL3: do_first();\nL4: }", + ); + + // With siblings + const result2 = await read_file({ + file_path: filePath, + offset: 3, + limit: 50, + mode: "indentation", + indentation: { + anchor_line: 3, + include_siblings: true, + max_levels: 1, + }, + }); + + expect(result2.content).toBe( + "L2: if first {\nL3: do_first();\nL4: }\nL5: if second {\nL6: do_second();\nL7: }", + ); + }); + + test("indentation mode includes header comments", async () => { + const content = `class Foo { + // This is a comment + void method() { + doSomething(); + } +} +`; + const filePath = await createTempFile(content); + + const result = await read_file({ + file_path: filePath, + offset: 4, + limit: 50, + mode: "indentation", + indentation: { + anchor_line: 4, + max_levels: 1, + include_header: true, + }, + }); + + // Should include the comment above the method + expect(result.content).toContain("// This is a comment"); + }); +}); diff --git a/src/tests/shell-codex.test.ts b/src/tests/shell-codex.test.ts new file mode 100644 index 0000000..8e85d30 --- /dev/null +++ b/src/tests/shell-codex.test.ts @@ -0,0 +1,176 @@ +import { describe, expect, test } from "bun:test"; +import { promises as fs } from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { shell } from "../tools/impl/Shell.js"; + +const isWindows = process.platform === "win32"; + +describe("shell codex tool", () => { + let tempDir: string; + + async function setupTempDir(): Promise { + if (!tempDir) { + tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "shell-test-")); + } + return tempDir; + } + + test("executes simple command with execvp-style args", async () => { + const result = await shell({ + command: ["echo", "hello", "world"], + }); + + expect(result.output).toBe("hello world"); + expect(result.stdout).toContain("hello world"); + expect(result.stderr.length).toBe(0); + }); + + test("executes bash -lc style command", async () => { + const result = await shell({ + command: ["bash", "-lc", "echo 'hello from bash'"], + }); + + expect(result.output).toContain("hello from bash"); + }); + + test("handles arguments with spaces correctly", async () => { + // This is the key test for execvp semantics - args with spaces + // should NOT be split + const result = await shell({ + command: ["echo", "hello world", "foo bar"], + }); + + expect(result.output).toBe("hello world foo bar"); + }); + + test.skipIf(isWindows)("respects workdir parameter", async () => { + const dir = await setupTempDir(); + // Resolve symlinks (macOS /var -> /private/var) + const resolvedDir = await fs.realpath(dir); + + const result = await shell({ + command: ["pwd"], + workdir: dir, + }); + + expect(result.output).toBe(resolvedDir); + }); + + test.skipIf(isWindows)("captures stderr output", async () => { + const result = await shell({ + command: ["bash", "-c", "echo 'error message' >&2"], + }); + + expect(result.stderr).toContain("error message"); + }); + + test.skipIf(isWindows)("handles non-zero exit codes", async () => { + const result = await shell({ + command: ["bash", "-c", "exit 1"], + }); + + // Should still resolve (not reject), but output may indicate failure + expect(result).toBeDefined(); + }); + + test.skipIf(isWindows)( + "handles command with output in both stdout and stderr", + async () => { + const result = await shell({ + command: ["bash", "-c", "echo 'stdout'; echo 'stderr' >&2"], + }); + + expect(result.stdout).toContain("stdout"); + expect(result.stderr).toContain("stderr"); + expect(result.output).toContain("stdout"); + expect(result.output).toContain("stderr"); + }, + ); + + test("times out long-running commands", async () => { + await expect( + shell({ + command: ["sleep", "10"], + timeout_ms: 100, + }), + ).rejects.toThrow("timed out"); + }); + + test("throws error for empty command array", async () => { + await expect(shell({ command: [] })).rejects.toThrow( + "command must be a non-empty array", + ); + }); + + test("throws error for missing command", async () => { + // @ts-expect-error Testing invalid input + await expect(shell({})).rejects.toThrow(); + }); + + test.skipIf(isWindows)("handles relative workdir", async () => { + // Set USER_CWD to a known location + const originalCwd = process.env.USER_CWD; + const dir = await setupTempDir(); + process.env.USER_CWD = dir; + + // Create a subdirectory + const subdir = path.join(dir, "subdir"); + await fs.mkdir(subdir, { recursive: true }); + // Resolve symlinks (macOS /var -> /private/var) + const resolvedSubdir = await fs.realpath(subdir); + + try { + const result = await shell({ + command: ["pwd"], + workdir: "subdir", + }); + + expect(result.output).toBe(resolvedSubdir); + } finally { + if (originalCwd !== undefined) { + process.env.USER_CWD = originalCwd; + } else { + delete process.env.USER_CWD; + } + } + }); + + test.skipIf(isWindows)( + "handles command that produces multi-line output", + async () => { + const result = await shell({ + command: ["bash", "-c", "echo 'line1'; echo 'line2'; echo 'line3'"], + }); + + expect(result.stdout).toContain("line1"); + expect(result.stdout).toContain("line2"); + expect(result.stdout).toContain("line3"); + expect(result.stdout.length).toBe(3); + }, + ); + + test("handles special characters in arguments", async () => { + const result = await shell({ + command: ["echo", "$HOME", "$(whoami)", "`date`"], + }); + + // Since we're using execvp-style (not shell expansion), + // these should be treated as literal strings + expect(result.output).toContain("$HOME"); + expect(result.output).toContain("$(whoami)"); + expect(result.output).toContain("`date`"); + }); + + test.skipIf(isWindows)("handles file operations with bash -lc", async () => { + const dir = await setupTempDir(); + const testFile = path.join(dir, "test-output.txt"); + + await shell({ + command: ["bash", "-lc", `echo 'test content' > "${testFile}"`], + }); + + const content = await fs.readFile(testFile, "utf8"); + expect(content.trim()).toBe("test content"); + }); +}); diff --git a/src/tools/descriptions/ApplyPatch.md b/src/tools/descriptions/ApplyPatch.md index ad4368a..9ff890b 100644 --- a/src/tools/descriptions/ApplyPatch.md +++ b/src/tools/descriptions/ApplyPatch.md @@ -1,13 +1,70 @@ -# apply_patch +Use the `apply_patch` tool to edit files. +Your patch language is a stripped‑down, file‑oriented diff format designed to be easy to parse and safe to apply. You can think of it as a high‑level envelope: -Applies a patch to the local filesystem using the Codex/Letta ApplyPatch format. +*** Begin Patch +[ one or more file sections ] +*** End Patch -- **input**: Required patch string using the `*** Begin Patch` / `*** End Patch` envelope and per-file sections: - - `*** Add File: path` followed by one or more `+` lines with the file contents. - - `*** Update File: path` followed by one or more `@@` hunks where each line starts with a space (` `), minus (`-`), or plus (`+`), representing context, removed, and added lines respectively. - - `*** Delete File: path` to delete an existing file. -- Paths are interpreted relative to the current working directory. -- The tool validates that each hunk's old content appears in the target file and fails if it cannot be applied cleanly. +Within that envelope, you get a sequence of file operations. +You MUST include a header to specify the action you are taking. +Each operation starts with one of three headers: + +*** Add File: - create a new file. Every following line is a + line (the initial contents). +*** Delete File: - remove an existing file. Nothing follows. +*** Update File: - patch an existing file in place (optionally with a rename). + +May be immediately followed by *** Move to: if you want to rename the file. +Then one or more "hunks", each introduced by @@ (optionally followed by a hunk header). +Within a hunk each line starts with: + +For instructions on [context_before] and [context_after]: +- By default, show 3 lines of code immediately above and 3 lines immediately below each change. If a change is within 3 lines of a previous change, do NOT duplicate the first change's [context_after] lines in the second change's [context_before] lines. +- If 3 lines of context is insufficient to uniquely identify the snippet of code within the file, use the @@ operator to indicate the class or function to which the snippet belongs. For instance, we might have: +@@ class BaseClass +[3 lines of pre-context] +- [old_code] ++ [new_code] +[3 lines of post-context] + +- If a code block is repeated so many times in a class or function such that even a single `@@` statement and 3 lines of context cannot uniquely identify the snippet of code, you can use multiple `@@` statements to jump to the right context. For instance: + +@@ class BaseClass +@@ def method(): +[3 lines of pre-context] +- [old_code] ++ [new_code] +[3 lines of post-context] + +The full grammar definition is below: +Patch := Begin { FileOp } End +Begin := "*** Begin Patch" NEWLINE +End := "*** End Patch" NEWLINE +FileOp := AddFile | DeleteFile | UpdateFile +AddFile := "*** Add File: " path NEWLINE { "+" line NEWLINE } +DeleteFile := "*** Delete File: " path NEWLINE +UpdateFile := "*** Update File: " path NEWLINE [ MoveTo ] { Hunk } +MoveTo := "*** Move to: " newPath NEWLINE +Hunk := "@@" [ header ] NEWLINE { HunkLine } [ "*** End of File" NEWLINE ] +HunkLine := (" " | "-" | "+") text NEWLINE + +A full patch can combine several operations: + +*** Begin Patch +*** Add File: hello.txt ++Hello world +*** Update File: src/app.py +*** Move to: src/main.py +@@ def greet(): +-print("Hi") ++print("Hello, world!") +*** Delete File: obsolete.txt +*** End Patch + +It is important to remember: + +- You must include a header with your intended action (Add/Delete/Update) +- You must prefix new lines with `+` even when creating a new file +- File references can only be relative, NEVER ABSOLUTE. diff --git a/src/tools/descriptions/GrepFiles.md b/src/tools/descriptions/GrepFiles.md index 901a332..3a97be8 100644 --- a/src/tools/descriptions/GrepFiles.md +++ b/src/tools/descriptions/GrepFiles.md @@ -1,11 +1,4 @@ -# grep_files - -Finds files whose contents match a regular expression pattern, similar to Codex's `grep_files` tool. - -- **pattern**: Required regular expression pattern to search for. -- **include**: Optional glob that limits which files are searched (for example `*.rs` or `*.{ts,tsx}`). -- **path**: Optional directory or file path to search (defaults to the current working directory). -- **limit**: Accepted for compatibility but currently ignored; output may be truncated for very large result sets. +Finds files whose contents match the pattern and lists them by modification time. diff --git a/src/tools/descriptions/ListDirCodex.md b/src/tools/descriptions/ListDirCodex.md index 2a9c0ff..f4d67cb 100644 --- a/src/tools/descriptions/ListDirCodex.md +++ b/src/tools/descriptions/ListDirCodex.md @@ -1,9 +1,4 @@ -# list_dir - -Lists entries in a local directory, compatible with the Codex `list_dir` tool. - -- **dir_path**: Absolute path to the directory to list. -- **offset / limit / depth**: Accepted for compatibility but currently ignored; the underlying implementation returns a tree-style listing of the directory. +Lists entries in a local directory with 1-indexed entry numbers and simple type labels. diff --git a/src/tools/descriptions/ReadFileCodex.md b/src/tools/descriptions/ReadFileCodex.md index cf45a8b..a308cc4 100644 --- a/src/tools/descriptions/ReadFileCodex.md +++ b/src/tools/descriptions/ReadFileCodex.md @@ -1,11 +1,4 @@ -# read_file - -Reads a local file with 1-indexed line numbers, compatible with the Codex `read_file` tool. - -- **file_path**: Absolute path to the file to read. -- **offset**: Optional starting line number (1-based) for the slice. -- **limit**: Optional maximum number of lines to return. -- **mode / indentation**: Accepted for compatibility with Codex but currently treated as slice-only; indentation mode is not yet implemented. +Reads a local file with 1-indexed line numbers, supporting slice and indentation-aware block modes. diff --git a/src/tools/descriptions/Shell.md b/src/tools/descriptions/Shell.md index 6ca21b2..371e82c 100644 --- a/src/tools/descriptions/Shell.md +++ b/src/tools/descriptions/Shell.md @@ -1,11 +1,6 @@ -# shell - -Runs a shell command represented as an array of arguments and returns its output. - -- **command**: Required array of strings to execute, typically starting with the shell (for example `["bash", "-lc", "npm test"]`). -- **workdir**: Optional working directory to run the command in; prefer using this instead of `cd`. -- **timeout_ms**: Optional timeout in milliseconds (defaults to 120000ms / 2 minutes). -- **with_escalated_permissions / justification**: Accepted for compatibility with Codex; currently treated as hints only and do not bypass local sandboxing. +Runs a shell command and returns its output. +- The arguments to `shell` will be passed to execvp(). Most terminal commands should be prefixed with ["bash", "-lc"]. +- Always set the `workdir` param when using the shell function. Do not use `cd` unless absolutely necessary. diff --git a/src/tools/descriptions/ShellCommand.md b/src/tools/descriptions/ShellCommand.md index 230c13e..35f8668 100644 --- a/src/tools/descriptions/ShellCommand.md +++ b/src/tools/descriptions/ShellCommand.md @@ -1,11 +1,5 @@ -# shell_command - -Runs a shell script string in the user's default shell and returns its output. - -- **command**: Required shell script to execute (for example `ls -la` or `pytest tests`). -- **workdir**: Optional working directory to run the command in; prefer using this instead of `cd`. -- **timeout_ms**: Optional timeout in milliseconds (defaults to 120000ms / 2 minutes). -- **with_escalated_permissions / justification**: Accepted for compatibility with Codex; currently treated as hints only and do not bypass local sandboxing. +Runs a shell command and returns its output. +- Always set the `workdir` param when using the shell_command function. Do not use `cd` unless absolutely necessary. diff --git a/src/tools/descriptions/UpdatePlan.md b/src/tools/descriptions/UpdatePlan.md index 84d22e7..13c9c85 100644 --- a/src/tools/descriptions/UpdatePlan.md +++ b/src/tools/descriptions/UpdatePlan.md @@ -1,21 +1,4 @@ Updates the task plan. - Provide an optional explanation and a list of plan items, each with a step and status. At most one step can be in_progress at a time. -Use a plan to break down complex or multi-step tasks into meaningful, logically ordered steps that are easy to verify as you go. Plans help demonstrate that you've understood the task and convey how you're approaching it. - -Do not use plans for simple or single-step tasks that you can just do immediately. - -Before running a command or making changes, consider whether you have completed the previous step, and make sure to mark it as completed before moving on to the next step. - -Sometimes you may need to change plans in the middle of a task: call update_plan with the updated plan and make sure to provide an explanation of the rationale when doing so. - -**Arguments**: -- **plan**: Required array of plan items. Each item must have: - - **step**: String description of the step - - **status**: One of "pending", "in_progress", or "completed" -- **explanation**: Optional explanation for the plan or changes - -**Returns**: Confirmation message that the plan was updated. - diff --git a/src/tools/impl/GrepFiles.ts b/src/tools/impl/GrepFiles.ts index 83098f5..652f863 100644 --- a/src/tools/impl/GrepFiles.ts +++ b/src/tools/impl/GrepFiles.ts @@ -8,7 +8,14 @@ interface GrepFilesArgs { limit?: number; } -type GrepFilesResult = Awaited>; +interface GrepFilesResult { + output: string; + matches?: number; + files?: number; + truncated?: boolean; +} + +const DEFAULT_LIMIT = 100; /** * Codex-style grep_files tool. @@ -19,7 +26,7 @@ export async function grep_files( ): Promise { validateRequiredParams(args, ["pattern"], "grep_files"); - const { pattern, include, path } = args; + const { pattern, include, path, limit = DEFAULT_LIMIT } = args; const grepArgs: GrepArgs = { pattern, @@ -28,5 +35,34 @@ export async function grep_files( output_mode: "files_with_matches", }; - return grep(grepArgs); + const result = await grep(grepArgs); + + // The underlying grep result already has the correct files count + const totalFiles = result.files ?? 0; + + // Apply limit to the file list + if (result.output && limit > 0 && totalFiles > limit) { + // The output format is: "Found N files\n/path/to/file1\n/path/to/file2..." + const lines = result.output + .split("\n") + .filter((line) => line.trim() !== ""); + + // First line is "Found N files", rest are file paths + const filePaths = lines.slice(1); + + const truncatedFiles = filePaths.slice(0, limit); + const truncatedOutput = `Found ${limit} file${limit !== 1 ? "s" : ""} (truncated from ${totalFiles})\n${truncatedFiles.join("\n")}`; + + return { + output: truncatedOutput, + files: limit, + truncated: true, + }; + } + + return { + output: result.output, + files: totalFiles, + truncated: false, + }; } diff --git a/src/tools/impl/ListDirCodex.ts b/src/tools/impl/ListDirCodex.ts index 07468a4..03f1d45 100644 --- a/src/tools/impl/ListDirCodex.ts +++ b/src/tools/impl/ListDirCodex.ts @@ -1,6 +1,10 @@ -import { ls } from "./LS.js"; +import { promises as fs } from "node:fs"; +import * as path from "node:path"; import { validateRequiredParams } from "./validation.js"; +const MAX_ENTRY_LENGTH = 500; +const INDENTATION_SPACES = 2; + interface ListDirCodexArgs { dir_path: string; offset?: number; @@ -8,19 +12,212 @@ interface ListDirCodexArgs { depth?: number; } -type ListDirCodexResult = Awaited>; +interface ListDirCodexResult { + content: string; +} + +interface DirEntry { + name: string; // Full relative path for sorting + displayName: string; // Just the filename for display + depth: number; // Indentation depth + kind: "directory" | "file" | "symlink" | "other"; +} /** * Codex-style list_dir tool. - * Delegates to the existing LS implementation; offset/limit/depth are accepted but currently ignored. + * Lists entries with pagination (offset/limit) and depth control. */ export async function list_dir( args: ListDirCodexArgs, ): Promise { validateRequiredParams(args, ["dir_path"], "list_dir"); - const { dir_path } = args; + const { dir_path, offset = 1, limit = 25, depth = 2 } = args; - // LS handles path resolution and formatting. - return ls({ path: dir_path, ignore: [] }); + if (offset < 1) { + throw new Error("offset must be a 1-indexed entry number"); + } + + if (limit < 1) { + throw new Error("limit must be greater than zero"); + } + + if (depth < 1) { + throw new Error("depth must be greater than zero"); + } + + if (!path.isAbsolute(dir_path)) { + throw new Error("dir_path must be an absolute path"); + } + + const entries = await listDirSlice(dir_path, offset, limit, depth); + const output = [`Absolute path: ${dir_path}`, ...entries]; + + return { content: output.join("\n") }; +} + +/** + * List directory entries with pagination. + */ +async function listDirSlice( + dirPath: string, + offset: number, + limit: number, + maxDepth: number, +): Promise { + const entries: DirEntry[] = []; + await collectEntries(dirPath, "", maxDepth, entries); + + if (entries.length === 0) { + return []; + } + + const startIndex = offset - 1; + if (startIndex >= entries.length) { + throw new Error("offset exceeds directory entry count"); + } + + const remainingEntries = entries.length - startIndex; + const cappedLimit = Math.min(limit, remainingEntries); + const endIndex = startIndex + cappedLimit; + + // Get the selected entries and sort by name + const selectedEntries = entries.slice(startIndex, endIndex); + selectedEntries.sort((a, b) => a.name.localeCompare(b.name)); + + const formatted: string[] = []; + for (const entry of selectedEntries) { + formatted.push(formatEntryLine(entry)); + } + + if (endIndex < entries.length) { + formatted.push(`More than ${cappedLimit} entries found`); + } + + return formatted; +} + +/** + * Recursively collect directory entries using BFS. + */ +async function collectEntries( + dirPath: string, + relativePrefix: string, + remainingDepth: number, + entries: DirEntry[], +): Promise { + const queue: Array<{ absPath: string; prefix: string; depth: number }> = [ + { absPath: dirPath, prefix: relativePrefix, depth: remainingDepth }, + ]; + + while (queue.length > 0) { + const current = queue.shift(); + if (!current) break; + const { absPath, prefix, depth } = current; + + const dirEntries: Array<{ + absPath: string; + relativePath: string; + kind: DirEntry["kind"]; + entry: DirEntry; + }> = []; + + try { + const items = await fs.readdir(absPath, { withFileTypes: true }); + + for (const item of items) { + const itemAbsPath = path.join(absPath, item.name); + const relativePath = prefix ? path.join(prefix, item.name) : item.name; + const displayName = formatEntryComponent(item.name); + const displayDepth = prefix ? prefix.split(path.sep).length : 0; + const sortKey = formatEntryName(relativePath); + + let kind: DirEntry["kind"]; + if (item.isSymbolicLink()) { + kind = "symlink"; + } else if (item.isDirectory()) { + kind = "directory"; + } else if (item.isFile()) { + kind = "file"; + } else { + kind = "other"; + } + + dirEntries.push({ + absPath: itemAbsPath, + relativePath, + kind, + entry: { + name: sortKey, + displayName, + depth: displayDepth, + kind, + }, + }); + } + } catch (err) { + throw new Error(`failed to read directory: ${err}`); + } + + // Sort entries alphabetically + dirEntries.sort((a, b) => a.entry.name.localeCompare(b.entry.name)); + + for (const item of dirEntries) { + // Queue subdirectories for traversal if depth allows + if (item.kind === "directory" && depth > 1) { + queue.push({ + absPath: item.absPath, + prefix: item.relativePath, + depth: depth - 1, + }); + } + entries.push(item.entry); + } + } +} + +/** + * Format entry name for sorting (normalize path separators). + */ +function formatEntryName(filePath: string): string { + const normalized = filePath.replace(/\\/g, "/"); + if (normalized.length > MAX_ENTRY_LENGTH) { + return normalized.substring(0, MAX_ENTRY_LENGTH); + } + return normalized; +} + +/** + * Format a single path component. + */ +function formatEntryComponent(name: string): string { + if (name.length > MAX_ENTRY_LENGTH) { + return name.substring(0, MAX_ENTRY_LENGTH); + } + return name; +} + +/** + * Format a directory entry for display. + */ +function formatEntryLine(entry: DirEntry): string { + const indent = " ".repeat(entry.depth * INDENTATION_SPACES); + let name = entry.displayName; + + switch (entry.kind) { + case "directory": + name += "/"; + break; + case "symlink": + name += "@"; + break; + case "other": + name += "?"; + break; + default: + // "file" type has no suffix + break; + } + + return `${indent}${name}`; } diff --git a/src/tools/impl/ReadFileCodex.ts b/src/tools/impl/ReadFileCodex.ts index 4965a8a..7a05144 100644 --- a/src/tools/impl/ReadFileCodex.ts +++ b/src/tools/impl/ReadFileCodex.ts @@ -1,6 +1,10 @@ -import { read } from "./Read.js"; +import { promises as fs } from "node:fs"; import { validateRequiredParams } from "./validation.js"; +const MAX_LINE_LENGTH = 500; +const TAB_WIDTH = 4; +const COMMENT_PREFIXES = ["#", "//", "--"]; + interface IndentationOptions { anchor_line?: number; max_levels?: number; @@ -21,22 +25,275 @@ interface ReadFileCodexResult { content: string; } +interface LineRecord { + number: number; + raw: string; + display: string; + indent: number; +} + /** * Codex-style read_file tool. - * Currently supports slice-style reading; indentation mode is ignored but accepted. + * Supports both slice mode (simple range) and indentation mode (context-aware block reading). */ export async function read_file( args: ReadFileCodexArgs, ): Promise { validateRequiredParams(args, ["file_path"], "read_file"); - const { file_path, offset, limit } = args; - - const result = await read({ + const { file_path, - offset, - limit, - }); + offset = 1, + limit = 2000, + mode = "slice", + indentation, + } = args; - return { content: result.content }; + if (offset < 1) { + throw new Error("offset must be a 1-indexed line number"); + } + + if (limit < 1) { + throw new Error("limit must be greater than zero"); + } + + let lines: string[]; + + if (mode === "indentation") { + lines = await readIndentationMode( + file_path, + offset, + limit, + indentation ?? {}, + ); + } else { + lines = await readSliceMode(file_path, offset, limit); + } + + return { content: lines.join("\n") }; +} + +/** + * Simple slice mode: read lines from offset to offset + limit. + */ +async function readSliceMode( + filePath: string, + offset: number, + limit: number, +): Promise { + const content = await fs.readFile(filePath, "utf8"); + const allLines = content.split(/\r?\n/); + + const collected: string[] = []; + for ( + let i = offset - 1; + i < allLines.length && collected.length < limit; + i++ + ) { + const line = allLines[i]; + if (line === undefined) break; + const formatted = formatLine(line); + collected.push(`L${i + 1}: ${formatted}`); + } + + if (offset > allLines.length) { + throw new Error("offset exceeds file length"); + } + + return collected; +} + +/** + * Indentation mode: expand around an anchor line based on indentation levels. + */ +async function readIndentationMode( + filePath: string, + offset: number, + limit: number, + options: IndentationOptions, +): Promise { + const anchorLine = options.anchor_line ?? offset; + const maxLevels = options.max_levels ?? 0; + const includeSiblings = options.include_siblings ?? false; + const includeHeader = options.include_header ?? true; + const maxLines = options.max_lines ?? limit; + + if (anchorLine < 1) { + throw new Error("anchor_line must be a 1-indexed line number"); + } + + if (maxLines < 1) { + throw new Error("max_lines must be greater than zero"); + } + + // Read and parse all lines + const content = await fs.readFile(filePath, "utf8"); + const rawLines = content.split(/\r?\n/); + + if (rawLines.length === 0 || anchorLine > rawLines.length) { + throw new Error("anchor_line exceeds file length"); + } + + // Build line records + const records: LineRecord[] = rawLines.map((raw, idx) => ({ + number: idx + 1, + raw, + display: formatLine(raw), + indent: measureIndent(raw), + })); + + // Compute effective indents (blank lines inherit previous indent) + const effectiveIndents = computeEffectiveIndents(records); + + const anchorIndex = anchorLine - 1; + const anchorRecord = records[anchorIndex]; + const anchorIndent = effectiveIndents[anchorIndex] ?? 0; + + if (!anchorRecord) { + throw new Error("anchor_line exceeds file length"); + } + + // Calculate minimum indent to include + const minIndent = + maxLevels === 0 ? 0 : Math.max(0, anchorIndent - maxLevels * TAB_WIDTH); + + // Cap by limits + const finalLimit = Math.min(limit, maxLines, records.length); + + if (finalLimit === 1) { + return [`L${anchorRecord.number}: ${anchorRecord.display}`]; + } + + // Expand from anchor line + const out: LineRecord[] = [anchorRecord]; + let i = anchorIndex - 1; // up cursor + let j = anchorIndex + 1; // down cursor + let iCounterMinIndent = 0; + let jCounterMinIndent = 0; + + while (out.length < finalLimit) { + let progressed = 0; + + // Expand up + if (i >= 0) { + const iIndent = effectiveIndents[i]; + const iRecord = records[i]; + if (iIndent !== undefined && iRecord && iIndent >= minIndent) { + out.unshift(iRecord); + progressed++; + + // Handle sibling exclusion + if (iIndent === minIndent && !includeSiblings) { + const allowHeaderComment = includeHeader && isComment(iRecord); + const canTakeLine = allowHeaderComment || iCounterMinIndent === 0; + + if (canTakeLine) { + iCounterMinIndent++; + } else { + // Remove the line we just added + out.shift(); + progressed--; + i = -1; // Stop moving up + } + } + + i--; + + if (out.length >= finalLimit) break; + } else { + i = -1; // Stop moving up + } + } + + // Expand down + if (j < records.length) { + const jIndent = effectiveIndents[j]; + const jRecord = records[j]; + if (jIndent !== undefined && jRecord && jIndent >= minIndent) { + out.push(jRecord); + progressed++; + + // Handle sibling exclusion + if (jIndent === minIndent && !includeSiblings) { + if (jCounterMinIndent > 0) { + // Remove the line we just added + out.pop(); + progressed--; + j = records.length; // Stop moving down + } + jCounterMinIndent++; + } + + j++; + } else { + j = records.length; // Stop moving down + } + } + + if (progressed === 0) break; + } + + // Trim empty lines at start and end + while (out.length > 0 && out[0]?.raw.trim() === "") { + out.shift(); + } + while (out.length > 0 && out[out.length - 1]?.raw.trim() === "") { + out.pop(); + } + + return out.map((record) => `L${record.number}: ${record.display}`); +} + +/** + * Compute effective indents - blank lines inherit previous line's indent. + */ +function computeEffectiveIndents(records: LineRecord[]): number[] { + const effective: number[] = []; + let previousIndent = 0; + + for (const record of records) { + if (record.raw.trim() === "") { + effective.push(previousIndent); + } else { + previousIndent = record.indent; + effective.push(previousIndent); + } + } + + return effective; +} + +/** + * Measure indentation of a line (tabs = TAB_WIDTH spaces). + */ +function measureIndent(line: string): number { + let indent = 0; + for (const char of line) { + if (char === " ") { + indent++; + } else if (char === "\t") { + indent += TAB_WIDTH; + } else { + break; + } + } + return indent; +} + +/** + * Check if a line is a comment. + */ +function isComment(record: LineRecord): boolean { + const trimmed = record.raw.trim(); + return COMMENT_PREFIXES.some((prefix) => trimmed.startsWith(prefix)); +} + +/** + * Format a line for display (truncate if too long). + */ +function formatLine(line: string): string { + if (line.length > MAX_LINE_LENGTH) { + return line.substring(0, MAX_LINE_LENGTH); + } + return line; } diff --git a/src/tools/impl/Shell.ts b/src/tools/impl/Shell.ts index 61bb20d..51fdef4 100644 --- a/src/tools/impl/Shell.ts +++ b/src/tools/impl/Shell.ts @@ -1,4 +1,5 @@ -import { bash } from "./Bash.js"; +import { spawn } from "node:child_process"; +import * as path from "node:path"; import { validateRequiredParams } from "./validation.js"; interface ShellArgs { @@ -15,58 +16,92 @@ interface ShellResult { stderr: string[]; } +const DEFAULT_TIMEOUT = 120000; + /** * Codex-style shell tool. - * Runs an array of shell arguments, typically ["bash", "-lc", "..."]. + * Runs an array of shell arguments using execvp-style semantics. + * Typically called with ["bash", "-lc", "..."] for shell commands. */ export async function shell(args: ShellArgs): Promise { validateRequiredParams(args, ["command"], "shell"); - const { command, workdir, timeout_ms, justification: description } = args; + const { command, workdir, timeout_ms } = args; if (!Array.isArray(command) || command.length === 0) { throw new Error("command must be a non-empty array of strings"); } - const commandString = command.join(" "); - - const previousUserCwd = process.env.USER_CWD; - if (workdir) { - process.env.USER_CWD = workdir; + const [executable, ...execArgs] = command; + if (!executable) { + throw new Error("command must be a non-empty array of strings"); } + const timeout = timeout_ms ?? DEFAULT_TIMEOUT; - try { - const result = await bash({ - command: commandString, - timeout: timeout_ms ?? 120000, - description, - run_in_background: false, + // Determine working directory + const cwd = workdir + ? path.isAbsolute(workdir) + ? workdir + : path.resolve(process.env.USER_CWD || process.cwd(), workdir) + : process.env.USER_CWD || process.cwd(); + + return new Promise((resolve, reject) => { + const stdoutChunks: Buffer[] = []; + const stderrChunks: Buffer[] = []; + + const child = spawn(executable, execArgs, { + cwd, + env: process.env, + stdio: ["ignore", "pipe", "pipe"], }); - const text = (result.content ?? []) - .map((item) => - "text" in item && typeof item.text === "string" ? item.text : "", - ) - .filter(Boolean) - .join("\n"); + const timeoutId = setTimeout(() => { + child.kill("SIGKILL"); + reject(new Error(`Command timed out after ${timeout}ms`)); + }, timeout); - const stdout = text ? text.split("\n") : []; - const stderr = - result.status === "error" - ? ["Command reported an error. See output for details."] - : []; + child.stdout.on("data", (chunk: Buffer) => { + stdoutChunks.push(chunk); + }); - return { - output: text, - stdout, - stderr, - }; - } finally { - if (workdir) { - if (previousUserCwd === undefined) { - delete process.env.USER_CWD; + child.stderr.on("data", (chunk: Buffer) => { + stderrChunks.push(chunk); + }); + + child.on("error", (err: Error) => { + clearTimeout(timeoutId); + reject(new Error(`Failed to execute command: ${err.message}`)); + }); + + child.on("close", (code: number | null) => { + clearTimeout(timeoutId); + + const stdoutText = Buffer.concat(stdoutChunks).toString("utf8"); + const stderrText = Buffer.concat(stderrChunks).toString("utf8"); + + const stdoutLines = stdoutText + .split("\n") + .filter((line) => line.length > 0); + const stderrLines = stderrText + .split("\n") + .filter((line) => line.length > 0); + + // Combine stdout and stderr for output + const output = [stdoutText, stderrText].filter(Boolean).join("\n").trim(); + + if (code !== 0 && code !== null) { + // Command failed but we still return the output + resolve({ + output: output || `Command exited with code ${code}`, + stdout: stdoutLines, + stderr: stderrLines, + }); } else { - process.env.USER_CWD = previousUserCwd; + resolve({ + output, + stdout: stdoutLines, + stderr: stderrLines, + }); } - } - } + }); + }); } diff --git a/src/tools/schemas/ApplyPatch.json b/src/tools/schemas/ApplyPatch.json index 671acec..48312a7 100644 --- a/src/tools/schemas/ApplyPatch.json +++ b/src/tools/schemas/ApplyPatch.json @@ -4,7 +4,7 @@ "properties": { "input": { "type": "string", - "description": "Patch content in the ApplyPatch tool format, starting with '*** Begin Patch' and ending with '*** End Patch'." + "description": "The entire contents of the apply_patch command" } }, "required": ["input"], diff --git a/src/tools/schemas/ReadFileCodex.json b/src/tools/schemas/ReadFileCodex.json index 87892c3..98b401e 100644 --- a/src/tools/schemas/ReadFileCodex.json +++ b/src/tools/schemas/ReadFileCodex.json @@ -4,7 +4,7 @@ "properties": { "file_path": { "type": "string", - "description": "Absolute path to the file." + "description": "Absolute path to the file" }, "offset": { "type": "number", diff --git a/src/tools/schemas/Shell.json b/src/tools/schemas/Shell.json index b34dddd..46224c5 100644 --- a/src/tools/schemas/Shell.json +++ b/src/tools/schemas/Shell.json @@ -7,19 +7,19 @@ "items": { "type": "string" }, - "description": "The command to execute as an array of shell arguments." + "description": "The command to execute" }, "workdir": { "type": "string", - "description": "The working directory to execute the command in." + "description": "The working directory to execute the command in" }, "timeout_ms": { "type": "number", - "description": "The timeout for the command in milliseconds." + "description": "The timeout for the command in milliseconds" }, "with_escalated_permissions": { "type": "boolean", - "description": "Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions." + "description": "Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions" }, "justification": { "type": "string", diff --git a/src/tools/schemas/ShellCommand.json b/src/tools/schemas/ShellCommand.json index 0afac52..0d3413a 100644 --- a/src/tools/schemas/ShellCommand.json +++ b/src/tools/schemas/ShellCommand.json @@ -4,19 +4,19 @@ "properties": { "command": { "type": "string", - "description": "The shell script to execute in the user's default shell." + "description": "The shell script to execute in the user's default shell" }, "workdir": { "type": "string", - "description": "The working directory to execute the command in." + "description": "The working directory to execute the command in" }, "timeout_ms": { "type": "number", - "description": "The timeout for the command in milliseconds." + "description": "The timeout for the command in milliseconds" }, "with_escalated_permissions": { "type": "boolean", - "description": "Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions." + "description": "Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions" }, "justification": { "type": "string", diff --git a/src/tools/schemas/UpdatePlan.json b/src/tools/schemas/UpdatePlan.json index d20923a..e1d6078 100644 --- a/src/tools/schemas/UpdatePlan.json +++ b/src/tools/schemas/UpdatePlan.json @@ -3,8 +3,7 @@ "type": "object", "properties": { "explanation": { - "type": "string", - "description": "Optional explanation of the plan or changes being made" + "type": "string" }, "plan": { "type": "array", @@ -13,8 +12,7 @@ "type": "object", "properties": { "step": { - "type": "string", - "description": "Description of the step" + "type": "string" }, "status": { "type": "string", diff --git a/src/tools/toolset.ts b/src/tools/toolset.ts index b556787..cb9cc06 100644 --- a/src/tools/toolset.ts +++ b/src/tools/toolset.ts @@ -19,6 +19,9 @@ const ANTHROPIC_TOOLS = ANTHROPIC_DEFAULT_TOOLS; const CODEX_TOOLS = OPENAI_DEFAULT_TOOLS; const GEMINI_TOOLS = GEMINI_DEFAULT_TOOLS; +// Server-side/base tools that should stay attached regardless of Letta toolset +export const BASE_TOOL_NAMES = ["memory", "web_search"]; + /** * Gets the list of Letta Code tools currently attached to an agent. * Returns the tool names that are both attached to the agent AND in our tool definitions.