feat: misc tool alignment (#137)

2025-11-30 15:38:04 -08:00
parent b0291597f3
commit 6089ce1cdd
40 changed files with 1524 additions and 206 deletions
--- a/src/tools/descriptions/AskUserQuestion.md
+++ b/src/tools/descriptions/AskUserQuestion.md
@@ -0,0 +1,11 @@
+# AskUserQuestion
+
+Use this tool when you need to ask the user questions during execution. This allows you to:
+1. Gather user preferences or requirements
+2. Clarify ambiguous instructions
+3. Get decisions on implementation choices as you work
+4. Offer choices to the user about what direction to take.
+
+Usage notes:
+- Users will always be able to select "Other" to provide custom text input
+- Use multiSelect: true to allow multiple answers to be selected for a question
--- a/src/tools/descriptions/Bash.md
+++ b/src/tools/descriptions/Bash.md
@@ -2,11 +2,13 @@

 Executes a given bash command in a persistent shell session with optional timeout, ensuring proper handling and security measures.

+IMPORTANT: This tool is for terminal operations like git, npm, docker, etc. DO NOT use it for file operations (reading, writing, editing, searching, finding files) - use the specialized tools for this instead.
+
 Before executing the command, please follow these steps:

 1. Directory Verification:
-   - If the command will create new directories or files, first use the LS tool to verify the parent directory exists and is the correct location
-   - For example, before running "mkdir foo/bar", first use LS to check that "foo" exists and is the intended parent directory
+   - If the command will create new directories or files, first use `ls` to verify the parent directory exists and is the correct location
+   - For example, before running "mkdir foo/bar", first use `ls foo` to check that "foo" exists and is the intended parent directory

 2. Command Execution:
   - Always quote file paths that contain spaces with double quotes (e.g., cd "path with spaces/file.txt")
@@ -23,9 +25,20 @@ Usage notes:
  - You can specify an optional timeout in milliseconds (up to 600000ms / 10 minutes). If not specified, commands will timeout after 120000ms (2 minutes).
  - It is very helpful if you write a clear, concise description of what this command does in 5-10 words.
  - If the output exceeds 30000 characters, output will be truncated before being returned to you.
-  - VERY IMPORTANT: You MUST avoid using search commands like `find` and `grep`. Instead use Grep, Glob, or Task to search. You MUST avoid read tools like `cat`, `head`, `tail`, and `ls`, and use Read and LS to read files.
- - If you _still_ need to run `grep`, STOP. ALWAYS USE ripgrep at `rg` first, which all ${PRODUCT_NAME} users have pre-installed.
-  - When issuing multiple commands, use the ';' or '&&' operator to separate them. DO NOT use newlines (newlines are ok in quoted strings).
+  - You can use the `run_in_background` parameter to run the command in the background, which allows you to continue working while the command runs. You can monitor the output using the Bash tool as it becomes available. You do not need to use '&' at the end of the command when using this parameter.
+  
+  - Avoid using Bash with the `find`, `grep`, `cat`, `head`, `tail`, `sed`, `awk`, or `echo` commands, unless explicitly instructed or when these commands are truly necessary for the task. Instead, always prefer using the dedicated tools for these commands:
+    - File search: Use Glob (NOT find or ls)
+    - Content search: Use Grep (NOT grep or rg)
+    - Read files: Use Read (NOT cat/head/tail)
+    - Edit files: Use Edit (NOT sed/awk)
+    - Write files: Use Write (NOT echo >/cat <<EOF)
+    - Communication: Output text directly (NOT echo/printf)
+  - When issuing multiple commands:
+    - If the commands are independent and can run in parallel, make multiple Bash tool calls in a single message. For example, if you need to run "git status" and "git diff", send a single message with two Bash tool calls in parallel.
+    - If the commands depend on each other and must run sequentially, use a single Bash call with '&&' to chain them together (e.g., `git add . && git commit -m "message" && git push`). For instance, if one operation must complete before another starts (like mkdir before cp, Write before Bash for git operations, or git add before git commit), run these operations sequentially instead.
+    - Use ';' only when you need to run commands sequentially but don't care if earlier commands fail
+    - DO NOT use newlines to separate commands (newlines are ok in quoted strings)
  - Try to maintain your current working directory throughout the session by using absolute paths and avoiding usage of `cd`. You may use `cd` if the User explicitly requests it.
    <good-example>
    pytest /foo/bar/tests
@@ -34,33 +47,42 @@ Usage notes:
    cd /foo/bar && pytest tests
    </bad-example>

-
-
-
 # Committing changes with git

-When the user asks you to create a new git commit, follow these steps carefully:
+Only create commits when requested by the user. If unclear, ask first. When the user asks you to create a new git commit, follow these steps carefully:

-1. You have the capability to call multiple tools in a single response. When multiple independent pieces of information are requested, batch your tool calls together for optimal performance. ALWAYS run the following bash commands in parallel, each using the Bash tool:
+Git Safety Protocol:
+- NEVER update the git config
+- NEVER run destructive/irreversible git commands (like push --force, hard reset, etc) unless the user explicitly requests them 
+- NEVER skip hooks (--no-verify, --no-gpg-sign, etc) unless the user explicitly requests it
+- NEVER run force push to main/master, warn the user if they request it
+- Avoid git commit --amend.  ONLY use --amend when either (1) user explicitly requested amend OR (2) adding edits from pre-commit hook (additional instructions below) 
+- Before amending: ALWAYS check authorship (git log -1 --format='%an %ae')
+- NEVER commit changes unless the user explicitly asks you to. It is VERY IMPORTANT to only commit when explicitly asked, otherwise the user will feel that you are being too proactive.
+
+1. You can call multiple tools in a single response. When multiple independent pieces of information are requested and all commands are likely to succeed, run multiple tool calls in parallel for optimal performance. run the following bash commands in parallel, each using the Bash tool:
  - Run a git status command to see all untracked files.
  - Run a git diff command to see both staged and unstaged changes that will be committed.
  - Run a git log command to see recent commit messages, so that you can follow this repository's commit message style.
 2. Analyze all staged changes (both previously staged and newly added) and draft a commit message:
  - Summarize the nature of the changes (eg. new feature, enhancement to an existing feature, bug fix, refactoring, test, docs, etc.). Ensure the message accurately reflects the changes and their purpose (i.e. "add" means a wholly new feature, "update" means an enhancement to an existing feature, "fix" means a bug fix, etc.).
-  - Check for any sensitive information that shouldn't be committed
+  - Do not commit files that likely contain secrets (.env, credentials.json, etc). Warn the user if they specifically request to commit those files
  - Draft a concise (1-2 sentences) commit message that focuses on the "why" rather than the "what"
  - Ensure it accurately reflects the changes and their purpose
-3. You have the capability to call multiple tools in a single response. When multiple independent pieces of information are requested, batch your tool calls together for optimal performance. ALWAYS run the following commands in parallel:
+3. You can call multiple tools in a single response. When multiple independent pieces of information are requested and all commands are likely to succeed, run multiple tool calls in parallel for optimal performance. run the following commands:
   - Add relevant untracked files to the staging area.
   - Create the commit with a message ending with:
   👾 Generated with [Letta Code](https://letta.com)

   Co-Authored-By: Letta <noreply@letta.com>
-   - Run git status to make sure the commit succeeded.
-4. If the commit fails due to pre-commit hook changes, retry the commit ONCE to include these automated changes. If it fails again, it usually means a pre-commit hook is preventing the commit. If the commit succeeds but you notice that files were modified by the pre-commit hook, you MUST amend your commit to include them.
+   - Run git status after the commit completes to verify success.
+   Note: git status depends on the commit completing, so run it sequentially after the commit.
+4. If the commit fails due to pre-commit hook changes, retry ONCE. If it succeeds but files were modified by the hook, verify it's safe to amend:
+   - Check authorship: git log -1 --format='%an %ae'
+   - Check not pushed: git status shows "Your branch is ahead"
+   - If both true: amend your commit. Otherwise: create NEW commit (never amend other developers' commits)

 Important notes:
- NEVER update the git config
 - NEVER run additional commands to read or explore code, besides git bash commands
 - NEVER use the TodoWrite or Task tools
 - DO NOT push to the remote repository unless the user explicitly asks you to do so
@@ -83,13 +105,13 @@ Use the gh command via the Bash tool for ALL GitHub-related tasks including work

 IMPORTANT: When the user asks you to create a pull request, follow these steps carefully:

-1. You have the capability to call multiple tools in a single response. When multiple independent pieces of information are requested, batch your tool calls together for optimal performance. ALWAYS run the following bash commands in parallel using the Bash tool, in order to understand the current state of the branch since it diverged from the main branch:
+1. You can call multiple tools in a single response. When multiple independent pieces of information are requested and all commands are likely to succeed, run multiple tool calls in parallel for optimal performance. run the following bash commands in parallel using the Bash tool, in order to understand the current state of the branch since it diverged from the main branch:
   - Run a git status command to see all untracked files
   - Run a git diff command to see both staged and unstaged changes that will be committed
   - Check if the current branch tracks a remote branch and is up to date with the remote, so you know if you need to push to the remote
   - Run a git log command and `git diff [base-branch]...HEAD` to understand the full commit history for the current branch (from the time it diverged from the base branch)
 2. Analyze all changes that will be included in the pull request, making sure to look at all relevant commits (NOT just the latest commit, but ALL commits that will be included in the pull request!!!), and draft a pull request summary
-3. You have the capability to call multiple tools in a single response. When multiple independent pieces of information are requested, batch your tool calls together for optimal performance. ALWAYS run the following commands in parallel:
+3. You can call multiple tools in a single response. When multiple independent pieces of information are requested and all commands are likely to succeed, run multiple tool calls in parallel for optimal performance. run the following commands in parallel:
   - Create new branch if needed
   - Push to remote with -u flag if needed
   - Create PR using gh pr create with the format below. Use a HEREDOC to pass the body to ensure correct formatting.
@@ -99,7 +121,7 @@ gh pr create --title "the pr title" --body "$(cat <<'EOF'
 <1-3 bullet points>

 ## Test plan
-[Checklist of TODOs for testing the pull request...]
+[Bulleted markdown checklist of TODOs for testing the pull request...]

 👾 Generated with [Letta Code](https://letta.com)
 EOF
@@ -107,9 +129,8 @@ EOF
 </example>

 Important:
- NEVER update the git config
 - DO NOT use the TodoWrite or Task tools
 - Return the PR URL when you're done, so the user can see it

 # Other common operations
- View comments on a Github PR: gh api repos/foo/bar/pulls/123/comments
+- View comments on a Github PR: gh api repos/foo/bar/pulls/123/comments
--- a/src/tools/descriptions/BashOutput.md
+++ b/src/tools/descriptions/BashOutput.md
@@ -1,10 +1,10 @@
 # BashOutput

 - Retrieves output from a running or completed background bash shell
- Takes a bash_id parameter identifying the shell
+- Takes a shell_id parameter identifying the shell
 - Always returns only new output since the last check
 - Returns stdout and stderr output along with shell status
 - Supports optional regex filtering to show only lines matching a pattern
 - Use this tool when you need to monitor or check the output of a long-running shell
 - Shell IDs can be found using the /bashes command
- If the accumulated output exceeds 30,000 characters, it will be truncated before being returned to you
+- If the accumulated output exceeds 30,000 characters, it will be truncated before being returned to you
--- a/src/tools/descriptions/EnterPlanMode.md
+++ b/src/tools/descriptions/EnterPlanMode.md
@@ -0,0 +1,75 @@
+# EnterPlanMode
+
+Use this tool when you encounter a complex task that requires careful planning and exploration before implementation. This tool transitions you into plan mode where you can thoroughly explore the codebase and design an implementation approach.
+
+## When to Use This Tool
+
+Use EnterPlanMode when ANY of these conditions apply:
+
+1. **Multiple Valid Approaches**: The task can be solved in several different ways, each with trade-offs
+   - Example: "Add caching to the API" - could use Redis, in-memory, file-based, etc.
+   - Example: "Improve performance" - many optimization strategies possible
+
+2. **Significant Architectural Decisions**: The task requires choosing between architectural patterns
+   - Example: "Add real-time updates" - WebSockets vs SSE vs polling
+   - Example: "Implement state management" - Redux vs Context vs custom solution
+
+3. **Large-Scale Changes**: The task touches many files or systems
+   - Example: "Refactor the authentication system"
+   - Example: "Migrate from REST to GraphQL"
+
+4. **Unclear Requirements**: You need to explore before understanding the full scope
+   - Example: "Make the app faster" - need to profile and identify bottlenecks
+   - Example: "Fix the bug in checkout" - need to investigate root cause
+
+5. **User Input Needed**: You'll need to ask clarifying questions before starting
+   - If you would use AskUserQuestion to clarify the approach, consider EnterPlanMode instead
+   - Plan mode lets you explore first, then present options with context
+
+## When NOT to Use This Tool
+
+Do NOT use EnterPlanMode for:
+- Simple, straightforward tasks with obvious implementation
+- Small bug fixes where the solution is clear
+- Adding a single function or small feature
+- Tasks you're already confident how to implement
+- Research-only tasks (use the Task tool with explore agent instead)
+
+## What Happens in Plan Mode
+
+In plan mode, you'll:
+1. Thoroughly explore the codebase using Glob, Grep, and Read tools
+2. Understand existing patterns and architecture
+3. Design an implementation approach
+4. Present your plan to the user for approval
+5. Use AskUserQuestion if you need to clarify approaches
+6. Exit plan mode with ExitPlanMode when ready to implement
+
+## Examples
+
+### GOOD - Use EnterPlanMode:
+User: "Add user authentication to the app"
+- This requires architectural decisions (session vs JWT, where to store tokens, middleware structure)
+
+User: "Optimize the database queries"
+- Multiple approaches possible, need to profile first, significant impact
+
+User: "Implement dark mode"
+- Architectural decision on theme system, affects many components
+
+### BAD - Don't use EnterPlanMode:
+User: "Fix the typo in the README"
+- Straightforward, no planning needed
+
+User: "Add a console.log to debug this function"
+- Simple, obvious implementation
+
+User: "What files handle routing?"
+- Research task, not implementation planning
+
+## Important Notes
+
+- This tool REQUIRES user approval - they must consent to entering plan mode
+- Be thoughtful about when to use it - unnecessary plan mode slows down simple tasks
+- If unsure whether to use it, err on the side of starting implementation
+- You can always ask the user "Would you like me to plan this out first?"
--- a/src/tools/descriptions/ExitPlanMode.md
+++ b/src/tools/descriptions/ExitPlanMode.md
@@ -1,4 +1,14 @@
-Use this tool when you are in plan mode and have finished presenting your plan and are ready to code. This will prompt the user to exit plan mode.
+# ExitPlanMode
+
+Use this tool when you are in plan mode and have finished writing your plan to the plan file and are ready for user approval.
+
+## How This Tool Works
+- You should have already written your plan to the plan file specified in the plan mode system message
+- This tool does NOT take the plan content as a parameter - it will read the plan from the file you wrote
+- This tool simply signals that you're done planning and ready for the user to review and approve
+- The user will see the contents of your plan file when they review it
+
+## When to Use This Tool
 IMPORTANT: Only use this tool when the task requires planning the implementation steps of a task that requires writing code. For research tasks where you're gathering information, searching files, reading files or in general trying to understand the codebase - do NOT use this tool.

 ## Handling Ambiguity in Plans
@@ -6,8 +16,8 @@ Before using this tool, ensure your plan is clear and unambiguous. If there are
 1. Use the AskUserQuestion tool to clarify with the user
 2. Ask about specific implementation choices (e.g., architectural patterns, which library to use)
 3. Clarify any assumptions that could affect the implementation
-4. Only proceed with ExitPlanMode after resolving ambiguities
-
+4. Edit your plan file to incorporate user feedback
+5. Only proceed with ExitPlanMode after resolving ambiguities and updating the plan file

 ## Examples

--- a/src/tools/descriptions/Read.md
+++ b/src/tools/descriptions/Read.md
@@ -9,8 +9,6 @@ Usage:
 - You can optionally specify a line offset and limit (especially handy for long files), but it's recommended to read the whole file by not providing these parameters
 - Any lines longer than 2000 characters will be truncated
 - Results are returned using cat -n format, with line numbers starting at 1
- This tool allows Claude Code to read images (eg PNG, JPG, etc). When reading an image file the contents are presented visually as Claude Code is a multimodal LLM.
- For Jupyter notebooks (.ipynb files), use the NotebookRead instead
- You have the capability to call multiple tools in a single response. It is always better to speculatively read multiple files as a batch that are potentially useful. 
- You will regularly be asked to read screenshots. If the user provides a path to a screenshot ALWAYS use this tool to view the file at the path. This tool will work with all temporary file paths like /var/folders/123/abc/T/TemporaryItems/NSIRD_screencaptureui_ZfB1tD/Screenshot.png
- If you read a file that exists but has empty contents you will receive a system reminder warning in place of file contents.
+- This tool can only read files, not directories. To read a directory, use the ls command via Bash.
+- You can call multiple tools in a single response. It is always better to speculatively read multiple potentially useful files in parallel.
+- If you read a file that exists but has empty contents you will receive a system reminder warning in place of file contents.
--- a/src/tools/descriptions/TodoWrite.md
+++ b/src/tools/descriptions/TodoWrite.md
@@ -30,11 +30,11 @@ NOTE that you should not use this tool if there is only one trivial task to do.
 User: I want to add a dark mode toggle to the application settings. Make sure you run the tests and build when you're done!
 Assistant: I'll help add a dark mode toggle to your application settings. Let me create a todo list to track this implementation.
 *Creates todo list with the following items:*
-1. Create dark mode toggle component in Settings page
-2. Add dark mode state management (context/store)
-3. Implement CSS-in-JS styles for dark theme
-4. Update existing components to support theme switching
-5. Run tests and build process, addressing any failures or errors that occur
+1. Creating dark mode toggle component in Settings page
+2. Adding dark mode state management (context/store)
+3. Implementing CSS-in-JS styles for dark theme
+4. Updating existing components to support theme switching
+5. Running tests and build process, addressing any failures or errors that occur
 *Begins working on the first task*

 <reasoning>
@@ -81,7 +81,7 @@ User: Can you help optimize my React application? It's rendering slowly and has
 Assistant: I'll help optimize your React application. First, let me examine your codebase to identify potential performance bottlenecks.
 *Reviews component structure, render patterns, state management, and data fetching*
 Assistant: After analyzing your codebase, I've identified several performance issues. Let me create a todo list to track our optimization efforts.
-*Creates todo list with items like: 1) Implement memoization for expensive calculations in ProductList, 2) Add virtualization for long lists in Dashboard, 3) Optimize image loading in Gallery component, 4) Fix state update loops in ShoppingCart, 5) Review bundle size and implement code splitting*
+*Creates todo list with items like: 1) Implementing memoization for expensive calculations in ProductList, 2) Adding virtualization for long lists in Dashboard, 3) Optimizing image loading in Gallery component, 4) Fixing state update loops in ShoppingCart, 5) Reviewing bundle size and implementing code splitting*
 Let's start by implementing memoization for the expensive calculations in your ProductList component.</assistant>

 <reasoning>
@@ -152,10 +152,14 @@ The assistant did not use the todo list because this is a single command executi
   - in_progress: Currently working on (limit to ONE task at a time)
   - completed: Task finished successfully

+   **IMPORTANT**: Task descriptions must have two forms:
+   - content: The imperative form describing what needs to be done (e.g., "Run tests", "Build the project")
+   - activeForm: The present continuous form shown during execution (e.g., "Running tests", "Building the project")
+
 2. **Task Management**:
   - Update task status in real-time as you work
   - Mark tasks complete IMMEDIATELY after finishing (don't batch completions)
-   - Only have ONE task in_progress at any time
+   - Exactly ONE task must be in_progress at any time (not less, not more)
   - Complete current tasks before starting new ones
   - Remove tasks that are no longer relevant from the list entirely

@@ -173,5 +177,8 @@ The assistant did not use the todo list because this is a single command executi
   - Create specific, actionable items
   - Break complex tasks into smaller, manageable steps
   - Use clear, descriptive task names
+   - Always provide both forms:
+     - content: "Fix authentication bug"
+     - activeForm: "Fixing authentication bug"

 When in doubt, use this tool. Being proactive with task management demonstrates attentiveness and ensures you complete all requirements successfully.
--- a/src/tools/impl/AskUserQuestion.ts
+++ b/src/tools/impl/AskUserQuestion.ts
@@ -0,0 +1,80 @@
+import { validateRequiredParams } from "./validation.js";
+
+interface QuestionOption {
+  label: string;
+  description: string;
+}
+
+interface Question {
+  question: string;
+  header: string;
+  options: QuestionOption[];
+  multiSelect: boolean;
+}
+
+interface AskUserQuestionArgs {
+  questions: Question[];
+  answers?: Record<string, string>;
+}
+
+interface AskUserQuestionResult {
+  message: string;
+}
+
+export async function ask_user_question(
+  args: AskUserQuestionArgs,
+): Promise<AskUserQuestionResult> {
+  validateRequiredParams(args, ["questions"], "AskUserQuestion");
+
+  if (!Array.isArray(args.questions) || args.questions.length === 0) {
+    throw new Error("questions must be a non-empty array");
+  }
+
+  if (args.questions.length > 4) {
+    throw new Error("Maximum of 4 questions allowed");
+  }
+
+  for (const q of args.questions) {
+    if (!q.question || typeof q.question !== "string") {
+      throw new Error("Each question must have a question string");
+    }
+    if (!q.header || typeof q.header !== "string") {
+      throw new Error("Each question must have a header string");
+    }
+    if (
+      !Array.isArray(q.options) ||
+      q.options.length < 2 ||
+      q.options.length > 4
+    ) {
+      throw new Error("Each question must have 2-4 options");
+    }
+    if (typeof q.multiSelect !== "boolean") {
+      throw new Error("Each question must have a multiSelect boolean");
+    }
+    for (const opt of q.options) {
+      if (!opt.label || typeof opt.label !== "string") {
+        throw new Error("Each option must have a label string");
+      }
+      if (!opt.description || typeof opt.description !== "string") {
+        throw new Error("Each option must have a description string");
+      }
+    }
+  }
+
+  // If answers are provided (filled in by UI layer), format the response
+  if (args.answers && Object.keys(args.answers).length > 0) {
+    const answerParts = args.questions.map((q) => {
+      const answer = args.answers?.[q.question] || "";
+      return `"${q.question}"="${answer}"`;
+    });
+    return {
+      message: `User has answered your questions: ${answerParts.join(", ")}. You can now continue with the user's answers in mind.`,
+    };
+  }
+
+  // Otherwise, return a placeholder - the UI layer should intercept this tool call
+  // and show the question UI before returning the actual response
+  return {
+    message: "Waiting for user response...",
+  };
+}
--- a/src/tools/impl/BashOutput.ts
+++ b/src/tools/impl/BashOutput.ts
@@ -3,7 +3,7 @@ import { LIMITS, truncateByChars } from "./truncation.js";
 import { validateRequiredParams } from "./validation.js";

 interface BashOutputArgs {
-  bash_id: string;
+  shell_id: string;
  filter?: string;
 }
 interface BashOutputResult {
@@ -13,11 +13,11 @@ interface BashOutputResult {
 export async function bash_output(
  args: BashOutputArgs,
 ): Promise<BashOutputResult> {
-  validateRequiredParams(args, ["bash_id"], "BashOutput");
-  const { bash_id, filter } = args;
-  const proc = backgroundProcesses.get(bash_id);
+  validateRequiredParams(args, ["shell_id"], "BashOutput");
+  const { shell_id, filter } = args;
+  const proc = backgroundProcesses.get(shell_id);
  if (!proc)
-    return { message: `No background process found with ID: ${bash_id}` };
+    return { message: `No background process found with ID: ${shell_id}` };
  const stdout = proc.stdout.join("\n");
  const stderr = proc.stderr.join("\n");
  let text = stdout;
--- a/src/tools/impl/EnterPlanMode.ts
+++ b/src/tools/impl/EnterPlanMode.ts
@@ -0,0 +1,32 @@
+interface EnterPlanModeArgs {
+  [key: string]: never;
+}
+
+interface EnterPlanModeResult {
+  message: string;
+}
+
+export async function enter_plan_mode(
+  _args: EnterPlanModeArgs,
+): Promise<EnterPlanModeResult> {
+  // This is handled by the UI layer which will:
+  // 1. Show approval dialog
+  // 2. On approve: toggle plan mode on, generate plan file path, inject system reminder
+  // 3. On reject: send rejection, agent proceeds without plan mode
+  //
+  // The message below is returned on successful entry into plan mode.
+  // The UI harness will also inject a <system-reminder> with the plan file path.
+  return {
+    message: `Entered plan mode. You should now focus on exploring the codebase and designing an implementation approach.
+
+In plan mode, you should:
+1. Thoroughly explore the codebase to understand existing patterns
+2. Identify similar features and architectural approaches
+3. Consider multiple approaches and their trade-offs
+4. Use AskUserQuestion if you need to clarify the approach
+5. Design a concrete implementation strategy
+6. When ready, use ExitPlanMode to present your plan for approval
+
+Remember: DO NOT write or edit any files yet. This is a read-only exploration and planning phase.`,
+  };
+}
--- a/src/tools/impl/ExitPlanMode.ts
+++ b/src/tools/impl/ExitPlanMode.ts
@@ -1,22 +1,11 @@
 /**
 * ExitPlanMode tool implementation
- * Exits plan mode by presenting the plan to the user for approval
+ * Exits plan mode - the plan is read from the plan file by the UI
 */

-import { validateRequiredParams } from "./validation.js";
-
-interface ExitPlanModeArgs {
-  plan: string;
-}
-
-export async function exit_plan_mode(
-  args: ExitPlanModeArgs,
-): Promise<{ message: string }> {
-  validateRequiredParams(args, ["plan"], "ExitPlanMode");
-  const { plan: _plan } = args;
-
+export async function exit_plan_mode(): Promise<{ message: string }> {
  // Return confirmation message that plan was approved
-  // Note: The plan itself should be displayed by the UI/system before this return is shown
+  // Note: The plan is read from the plan file by the UI before this return is shown
  return {
    message:
      "User has approved your plan. You can now start coding.\nStart with updating your todo list if applicable",
--- a/src/tools/impl/Grep.ts
+++ b/src/tools/impl/Grep.ts
@@ -21,6 +21,18 @@ function getRipgrepPath(): string {

 const rgPath = getRipgrepPath();

+function applyOffsetAndLimit<T>(
+  items: T[],
+  offset: number,
+  limit: number,
+): T[] {
+  const sliced = items.slice(offset);
+  if (limit > 0) {
+    return sliced.slice(0, limit);
+  }
+  return sliced; // 0 = unlimited
+}
+
 export interface GrepArgs {
  pattern: string;
  path?: string;
@@ -32,6 +44,8 @@ export interface GrepArgs {
  "-n"?: boolean;
  "-i"?: boolean;
  type?: string;
+  head_limit?: number;
+  offset?: number;
  multiline?: boolean;
 }

@@ -51,9 +65,11 @@ export async function grep(args: GrepArgs): Promise<GrepResult> {
    "-B": before,
    "-A": after,
    "-C": context,
-    "-n": lineNumbers,
+    "-n": lineNumbers = true,
    "-i": ignoreCase,
    type: fileType,
+    head_limit = 100,
+    offset = 0,
    multiline,
  } = args;

@@ -88,12 +104,14 @@ export async function grep(args: GrepArgs): Promise<GrepResult> {
      cwd: userCwd,
    });
    if (output_mode === "files_with_matches") {
-      const files = stdout.trim().split("\n").filter(Boolean);
+      const allFiles = stdout.trim().split("\n").filter(Boolean);
+      const files = applyOffsetAndLimit(allFiles, offset, head_limit);
      const fileCount = files.length;
-      if (fileCount === 0) return { output: "No files found", files: 0 };
+      const totalCount = allFiles.length;
+      if (totalCount === 0) return { output: "No files found", files: 0 };

      const fileList = files.join("\n");
-      const fullOutput = `Found ${fileCount} file${fileCount !== 1 ? "s" : ""}\n${fileList}`;
+      const fullOutput = `Found ${totalCount} file${totalCount !== 1 ? "s" : ""}${fileCount < totalCount ? ` (showing ${fileCount})` : ""}\n${fileList}`;

      // Apply character limit to prevent large file lists
      const { content: truncatedOutput } = truncateByChars(
@@ -104,13 +122,14 @@ export async function grep(args: GrepArgs): Promise<GrepResult> {

      return {
        output: truncatedOutput,
-        files: fileCount,
+        files: totalCount,
      };
    } else if (output_mode === "count") {
-      const lines = stdout.trim().split("\n").filter(Boolean);
+      const allLines = stdout.trim().split("\n").filter(Boolean);
+      const lines = applyOffsetAndLimit(allLines, offset, head_limit);
      let totalMatches = 0;
      let filesWithMatches = 0;
-      for (const line of lines) {
+      for (const line of allLines) {
        const parts = line.split(":");
        if (parts.length >= 2) {
          const lastPart = parts[parts.length - 1];
@@ -138,16 +157,20 @@ export async function grep(args: GrepArgs): Promise<GrepResult> {
      if (!stdout || stdout.trim() === "")
        return { output: "No matches found", matches: 0 };

+      const allLines = stdout.split("\n");
+      const lines = applyOffsetAndLimit(allLines, offset, head_limit);
+      const content = lines.join("\n");
+
      // Apply character limit to content output
      const { content: truncatedOutput } = truncateByChars(
-        stdout,
+        content,
        LIMITS.GREP_OUTPUT_CHARS,
        "Grep",
      );

      return {
        output: truncatedOutput,
-        matches: stdout.split("\n").filter(Boolean).length,
+        matches: allLines.filter(Boolean).length,
      };
    }
  } catch (error) {
--- a/src/tools/impl/Read.ts
+++ b/src/tools/impl/Read.ts
@@ -133,6 +133,11 @@ export async function read(args: ReadArgs): Promise<ReadResult> {
    if (await isBinaryFile(file_path))
      throw new Error(`Cannot read binary file: ${file_path}`);
    const content = await fs.readFile(file_path, "utf-8");
+    if (content.trim() === "") {
+      return {
+        content: `<system-reminder>\nThe file ${file_path} exists but has empty contents.\n</system-reminder>`,
+      };
+    }
    const formattedContent = formatWithLineNumbers(content, offset, limit);
    return { content: formattedContent };
  } catch (error) {
--- a/src/tools/impl/TodoWrite.ts
+++ b/src/tools/impl/TodoWrite.ts
@@ -3,8 +3,7 @@ import { validateRequiredParams } from "./validation.js";
 interface TodoItem {
  content: string;
  status: "pending" | "in_progress" | "completed";
-  id: string;
-  priority?: "high" | "medium" | "low";
+  activeForm: string;
 }
 interface TodoWriteArgs {
  todos: TodoItem[];
@@ -29,10 +28,8 @@ export async function todo_write(
      throw new Error(
        "Each todo must have a valid status (pending, in_progress, or completed)",
      );
-    if (!todo.id || typeof todo.id !== "string")
-      throw new Error("Each todo must have an id string");
-    if (todo.priority && !["high", "medium", "low"].includes(todo.priority))
-      throw new Error("If provided, priority must be high, medium, or low");
+    if (!todo.activeForm || typeof todo.activeForm !== "string")
+      throw new Error("Each todo must have an activeForm string");
  }
  return {
    message:
--- a/src/tools/manager.ts
+++ b/src/tools/manager.ts
@@ -47,19 +47,21 @@ export function getInternalToolName(serverName: string): string {
 }

 export const ANTHROPIC_DEFAULT_TOOLS: ToolName[] = [
+  "AskUserQuestion",
  "Bash",
  "BashOutput",
  "Edit",
+  "EnterPlanMode",
  "ExitPlanMode",
  "Glob",
  "Grep",
  "KillBash",
-  "LS",
-  "MultiEdit",
+  // "MultiEdit",
+  // "LS",
  "Read",
-  "Skill",
  "TodoWrite",
  "Write",
+  "Skill",
 ];

 export const OPENAI_DEFAULT_TOOLS: ToolName[] = [
@@ -113,9 +115,11 @@ export const GEMINI_PASCAL_TOOLS: ToolName[] = [

 // Tool permissions configuration
 const TOOL_PERMISSIONS: Record<ToolName, { requiresApproval: boolean }> = {
+  AskUserQuestion: { requiresApproval: true },
  Bash: { requiresApproval: true },
  BashOutput: { requiresApproval: false },
  Edit: { requiresApproval: true },
+  EnterPlanMode: { requiresApproval: true },
  ExitPlanMode: { requiresApproval: false },
  Glob: { requiresApproval: false },
  Grep: { requiresApproval: false },
--- a/src/tools/schemas/AskUserQuestion.json
+++ b/src/tools/schemas/AskUserQuestion.json
@@ -0,0 +1,61 @@
+{
+  "type": "object",
+  "properties": {
+    "questions": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "question": {
+            "type": "string",
+            "description": "The complete question to ask the user. Should be clear, specific, and end with a question mark. Example: \"Which library should we use for date formatting?\" If multiSelect is true, phrase it accordingly, e.g. \"Which features do you want to enable?\""
+          },
+          "header": {
+            "type": "string",
+            "description": "Very short label displayed as a chip/tag (max 12 chars). Examples: \"Auth method\", \"Library\", \"Approach\"."
+          },
+          "options": {
+            "type": "array",
+            "items": {
+              "type": "object",
+              "properties": {
+                "label": {
+                  "type": "string",
+                  "description": "The display text for this option that the user will see and select. Should be concise (1-5 words) and clearly describe the choice."
+                },
+                "description": {
+                  "type": "string",
+                  "description": "Explanation of what this option means or what will happen if chosen. Useful for providing context about trade-offs or implications."
+                }
+              },
+              "required": ["label", "description"],
+              "additionalProperties": false
+            },
+            "minItems": 2,
+            "maxItems": 4,
+            "description": "The available choices for this question. Must have 2-4 options. Each option should be a distinct, mutually exclusive choice (unless multiSelect is enabled). There should be no 'Other' option, that will be provided automatically."
+          },
+          "multiSelect": {
+            "type": "boolean",
+            "description": "Set to true to allow the user to select multiple options instead of just one. Use when choices are not mutually exclusive."
+          }
+        },
+        "required": ["question", "header", "options", "multiSelect"],
+        "additionalProperties": false
+      },
+      "minItems": 1,
+      "maxItems": 4,
+      "description": "Questions to ask the user (1-4 questions)"
+    },
+    "answers": {
+      "type": "object",
+      "additionalProperties": {
+        "type": "string"
+      },
+      "description": "User answers collected by the permission component"
+    }
+  },
+  "required": ["questions"],
+  "additionalProperties": false,
+  "$schema": "http://json-schema.org/draft-07/schema#"
+}
--- a/src/tools/schemas/Bash.json
+++ b/src/tools/schemas/Bash.json
@@ -11,7 +11,7 @@
    },
    "description": {
      "type": "string",
-      "description": " Clear, concise description of what this command does in 5-10 words. Examples:\nInput: ls\nOutput: Lists files in current directory\n\nInput: git status\nOutput: Shows working tree status\n\nInput: npm install\nOutput: Installs package dependencies\n\nInput: mkdir foo\nOutput: Creates directory 'foo'"
+      "description": "Clear, concise description of what this command does in 5-10 words, in active voice. Examples:\nInput: ls\nOutput: List files in current directory\n\nInput: git status\nOutput: Show working tree status\n\nInput: npm install\nOutput: Install package dependencies\n\nInput: mkdir foo\nOutput: Create directory 'foo'"
    },
    "run_in_background": {
      "type": "boolean",
--- a/src/tools/schemas/BashOutput.json
+++ b/src/tools/schemas/BashOutput.json
@@ -1,7 +1,7 @@
 {
  "type": "object",
  "properties": {
-    "bash_id": {
+    "shell_id": {
      "type": "string",
      "description": "The ID of the background shell to retrieve output from"
    },
@@ -10,7 +10,7 @@
      "description": "Optional regular expression to filter the output lines. Only lines matching this regex will be included in the result. Any lines that do not match will no longer be available to read."
    }
  },
-  "required": ["bash_id"],
+  "required": ["shell_id"],
  "additionalProperties": false,
  "$schema": "http://json-schema.org/draft-07/schema#"
 }
--- a/src/tools/schemas/EnterPlanMode.json
+++ b/src/tools/schemas/EnterPlanMode.json
@@ -0,0 +1,6 @@
+{
+  "type": "object",
+  "properties": {},
+  "additionalProperties": false,
+  "$schema": "http://json-schema.org/draft-07/schema#"
+}
--- a/src/tools/schemas/ExitPlanMode.json
+++ b/src/tools/schemas/ExitPlanMode.json
@@ -1,11 +1,6 @@
 {
-  "$schema": "http://json-schema.org/draft-07/schema#",
  "type": "object",
-  "properties": {
-    "plan": {
-      "type": "string"
-    }
-  },
-  "required": ["plan"],
-  "additionalProperties": false
+  "properties": {},
+  "additionalProperties": true,
+  "$schema": "http://json-schema.org/draft-07/schema#"
 }
--- a/src/tools/schemas/Grep.json
+++ b/src/tools/schemas/Grep.json
@@ -32,7 +32,7 @@
    },
    "-n": {
      "type": "boolean",
-      "description": "Show line numbers in output (rg -n). Requires output_mode: \"content\", ignored otherwise."
+      "description": "Show line numbers in output (rg -n). Requires output_mode: \"content\", ignored otherwise. Defaults to true."
    },
    "-i": {
      "type": "boolean",
@@ -44,7 +44,11 @@
    },
    "head_limit": {
      "type": "number",
-      "description": "Limit output to first N lines/entries, equivalent to \"| head -N\". Works across all output modes: content (limits output lines), files_with_matches (limits file paths), count (limits count entries). When unspecified, shows all results from ripgrep."
+      "description": "Limit output to first N lines/entries, equivalent to \"| head -N\". Works across all output modes: content (limits output lines), files_with_matches (limits file paths), count (limits count entries). Defaults to 100 (0 = unlimited)."
+    },
+    "offset": {
+      "type": "number",
+      "description": "Skip first N lines/entries before applying head_limit, equivalent to \"| tail -n +N | head -N\". Works across all output modes. Defaults to 0."
    },
    "multiline": {
      "type": "boolean",
--- a/src/tools/schemas/TodoWrite.json
+++ b/src/tools/schemas/TodoWrite.json
@@ -14,15 +14,12 @@
            "type": "string",
            "enum": ["pending", "in_progress", "completed"]
          },
-          "priority": {
+          "activeForm": {
            "type": "string",
-            "enum": ["high", "medium", "low"]
-          },
-          "id": {
-            "type": "string"
+            "minLength": 1
          }
        },
-        "required": ["content", "status", "id"],
+        "required": ["content", "status", "activeForm"],
        "additionalProperties": false
      },
      "description": "The updated todo list"
--- a/src/tools/toolDefinitions.ts
+++ b/src/tools/toolDefinitions.ts
@@ -1,7 +1,9 @@
 import ApplyPatchDescription from "./descriptions/ApplyPatch.md";
+import AskUserQuestionDescription from "./descriptions/AskUserQuestion.md";
 import BashDescription from "./descriptions/Bash.md";
 import BashOutputDescription from "./descriptions/BashOutput.md";
 import EditDescription from "./descriptions/Edit.md";
+import EnterPlanModeDescription from "./descriptions/EnterPlanMode.md";
 import ExitPlanModeDescription from "./descriptions/ExitPlanMode.md";
 import GlobDescription from "./descriptions/Glob.md";
 // Gemini toolset
@@ -29,9 +31,11 @@ import WriteDescription from "./descriptions/Write.md";
 import WriteFileGeminiDescription from "./descriptions/WriteFileGemini.md";
 import WriteTodosGeminiDescription from "./descriptions/WriteTodosGemini.md";
 import { apply_patch } from "./impl/ApplyPatch";
+import { ask_user_question } from "./impl/AskUserQuestion";
 import { bash } from "./impl/Bash";
 import { bash_output } from "./impl/BashOutput";
 import { edit } from "./impl/Edit";
+import { enter_plan_mode } from "./impl/EnterPlanMode";
 import { exit_plan_mode } from "./impl/ExitPlanMode";
 import { glob } from "./impl/Glob";
 // Gemini toolset
@@ -59,9 +63,11 @@ import { write } from "./impl/Write";
 import { write_file_gemini } from "./impl/WriteFileGemini";
 import { write_todos } from "./impl/WriteTodosGemini";
 import ApplyPatchSchema from "./schemas/ApplyPatch.json";
+import AskUserQuestionSchema from "./schemas/AskUserQuestion.json";
 import BashSchema from "./schemas/Bash.json";
 import BashOutputSchema from "./schemas/BashOutput.json";
 import EditSchema from "./schemas/Edit.json";
+import EnterPlanModeSchema from "./schemas/EnterPlanMode.json";
 import ExitPlanModeSchema from "./schemas/ExitPlanMode.json";
 import GlobSchema from "./schemas/Glob.json";
 // Gemini toolset
@@ -98,6 +104,11 @@ interface ToolAssets {
 }

 const toolDefinitions = {
+  AskUserQuestion: {
+    schema: AskUserQuestionSchema,
+    description: AskUserQuestionDescription.trim(),
+    impl: ask_user_question as unknown as ToolImplementation,
+  },
  Bash: {
    schema: BashSchema,
    description: BashDescription.trim(),
@@ -113,6 +124,11 @@ const toolDefinitions = {
    description: EditDescription.trim(),
    impl: edit as unknown as ToolImplementation,
  },
+  EnterPlanMode: {
+    schema: EnterPlanModeSchema,
+    description: EnterPlanModeDescription.trim(),
+    impl: enter_plan_mode as unknown as ToolImplementation,
+  },
  ExitPlanMode: {
    schema: ExitPlanModeSchema,
    description: ExitPlanModeDescription.trim(),