feat: misc tool alignment (#137)

2025-11-30 15:38:04 -08:00
parent b0291597f3
commit 6089ce1cdd
40 changed files with 1524 additions and 206 deletions
--- a/src/agent/check-approval.ts
+++ b/src/agent/check-approval.ts
@@ -63,7 +63,9 @@ export async function getResumeData(
      cursorLastMessage.id !== inContextLastMessageId
    ) {
      console.warn(
-        `[check-approval] Desync detected - cursor last: ${cursorLastMessage.id}, in-context last: ${inContextLastMessageId}`,
+        `[check-approval] Desync detected:\n` +
+          `  cursor last: ${cursorLastMessage.id} (type: ${cursorLastMessage.message_type})\n` +
+          `  in-context last: ${inContextLastMessageId} (type: unknown until found)`,
      );

      // Search for the in-context message in the fetched messages
@@ -82,6 +84,12 @@ export async function getResumeData(
        const inContextMessage = approvalMessage ?? lastMessage;

        if (inContextMessage) {
+          console.warn(
+            `[check-approval] Found in-context message (type: ${inContextMessage.message_type})` +
+              (matchingMessages.length > 1
+                ? ` - had ${matchingMessages.length} duplicates`
+                : ""),
+          );
          messageToCheck = inContextMessage;
        }
      } else {
@@ -98,6 +106,18 @@ export async function getResumeData(
    let pendingApproval: ApprovalRequest | null = null;
    let pendingApprovals: ApprovalRequest[] = [];

+    // Log the agent's last_stop_reason for debugging
+    const lastStopReason = (agent as { last_stop_reason?: string })
+      .last_stop_reason;
+    if (lastStopReason === "requires_approval") {
+      console.warn(
+        `[check-approval] Agent last_stop_reason: ${lastStopReason}`,
+      );
+      console.warn(
+        `[check-approval] Message to check: ${messageToCheck.id} (type: ${messageToCheck.message_type})`,
+      );
+    }
+
    if (messageToCheck.message_type === "approval_request_message") {
      // Cast to access tool_calls with proper typing
      const approvalMsg = messageToCheck as Message & {
@@ -142,6 +162,9 @@ export async function getResumeData(
      // Set legacy singular field for backward compatibility (first approval only)
      if (pendingApprovals.length > 0) {
        pendingApproval = pendingApprovals[0] || null;
+        console.warn(
+          `[check-approval] Found ${pendingApprovals.length} pending approval(s): ${pendingApprovals.map((a) => a.toolName).join(", ")}`,
+        );
      }
    }

--- a/src/cli/App.tsx
+++ b/src/cli/App.tsx
@@ -1,5 +1,6 @@
 // src/cli/App.tsx

+import { existsSync, readFileSync } from "node:fs";
 import { APIError } from "@letta-ai/letta-client/core/error";
 import type {
  AgentState,
@@ -34,12 +35,14 @@ import { ApprovalDialog } from "./components/ApprovalDialogRich";
 // import { AssistantMessage } from "./components/AssistantMessage";
 import { AssistantMessage } from "./components/AssistantMessageRich";
 import { CommandMessage } from "./components/CommandMessage";
+import { EnterPlanModeDialog } from "./components/EnterPlanModeDialog";
 // import { ErrorMessage } from "./components/ErrorMessage";
 import { ErrorMessage } from "./components/ErrorMessageRich";
 // import { Input } from "./components/Input";
 import { Input } from "./components/InputRich";
 import { ModelSelector } from "./components/ModelSelector";
 import { PlanModeDialog } from "./components/PlanModeDialog";
+import { QuestionDialog } from "./components/QuestionDialog";
 // import { ReasoningMessage } from "./components/ReasoningMessage";
 import { ReasoningMessage } from "./components/ReasoningMessageRich";
 import { SessionStats as SessionStatsComponent } from "./components/SessionStats";
@@ -63,6 +66,7 @@ import {
  buildMessageContentFromDisplay,
  clearPlaceholdersInText,
 } from "./helpers/pasteRegistry";
+import { generatePlanFilePath } from "./helpers/planName";
 import { safeJsonParseOr } from "./helpers/safeJsonParse";
 import { type ApprovalRequest, drainStreamWithResume } from "./helpers/stream";
 import { getRandomThinkingMessage } from "./helpers/thinkingMessages";
@@ -92,9 +96,72 @@ function getPlanModeReminder(): string {
    return "";
  }

-  // Use bundled reminder text for binary compatibility
-  const { PLAN_MODE_REMINDER } = require("../agent/promptAssets");
-  return PLAN_MODE_REMINDER;
+  const planFilePath = permissionMode.getPlanFilePath();
+
+  // Generate dynamic reminder with plan file path
+  return `<system-reminder>
+Plan mode is active. The user indicated that they do not want you to execute yet -- you MUST NOT make any edits (with the exception of the plan file mentioned below), run any non-readonly tools (including changing configs or making commits), or otherwise make any changes to the system. This supercedes any other instructions you have received.
+
+## Plan File Info:
+${planFilePath ? `No plan file exists yet. You should create your plan at ${planFilePath} using the Write tool.` : "No plan file path assigned."}
+
+You should build your plan incrementally by writing to or editing this file. NOTE that this is the only file you are allowed to edit - other than this you are only allowed to take READ-ONLY actions.
+
+**Plan File Guidelines:** The plan file should contain only your final recommended approach, not all alternatives considered. Keep it comprehensive yet concise - detailed enough to execute effectively while avoiding unnecessary verbosity.
+
+## Enhanced Planning Workflow
+
+### Phase 1: Initial Understanding
+Goal: Gain a comprehensive understanding of the user's request by reading through code and asking them questions.
+
+1. Understand the user's request thoroughly
+2. Explore the codebase to understand existing patterns and relevant code
+3. Use AskUserQuestion tool to clarify ambiguities in the user request up front.
+
+### Phase 2: Planning
+Goal: Come up with an approach to solve the problem identified in phase 1.
+
+- Provide any background context that may help with the task without prescribing the exact design itself
+- Create a detailed plan
+
+### Phase 3: Synthesis
+Goal: Synthesize the perspectives from Phase 2, and ensure that it aligns with the user's intentions by asking them questions.
+
+1. Collect all findings from exploration
+2. Keep track of critical files that should be read before implementing the plan
+3. Use AskUserQuestion to ask the user questions about trade offs.
+
+### Phase 4: Final Plan
+Once you have all the information you need, ensure that the plan file has been updated with your synthesized recommendation including:
+
+- Recommended approach with rationale
+- Key insights from different perspectives
+- Critical files that need modification
+
+### Phase 5: Call ExitPlanMode
+At the very end of your turn, once you have asked the user questions and are happy with your final plan file - you should always call ExitPlanMode to indicate to the user that you are done planning.
+
+This is critical - your turn should only end with either asking the user a question or calling ExitPlanMode. Do not stop unless it's for these 2 reasons.
+
+NOTE: At any point in time through this workflow you should feel free to ask the user questions or clarifications. Don't make large assumptions about user intent. The goal is to present a well researched plan to the user, and tie any loose ends before implementation begins.
+</system-reminder>
+`;
+}
+
+// Read plan content from the plan file
+function readPlanFile(): string {
+  const planFilePath = permissionMode.getPlanFilePath();
+  if (!planFilePath) {
+    return "No plan file path set.";
+  }
+  if (!existsSync(planFilePath)) {
+    return `Plan file not found at ${planFilePath}`;
+  }
+  try {
+    return readFileSync(planFilePath, "utf-8");
+  } catch {
+    return `Failed to read plan file at ${planFilePath}`;
+  }
 }

 // Get skill unload reminder if skills are loaded (using cached flag)
@@ -214,6 +281,23 @@ export default function App({
    toolArgs: string;
  } | null>(null);

+  // If we have a question approval request, show the question dialog
+  const [questionApprovalPending, setQuestionApprovalPending] = useState<{
+    questions: Array<{
+      question: string;
+      header: string;
+      options: Array<{ label: string; description: string }>;
+      multiSelect: boolean;
+    }>;
+    toolCallId: string;
+  } | null>(null);
+
+  // If we have an EnterPlanMode approval request, show the dialog
+  const [enterPlanModeApprovalPending, setEnterPlanModeApprovalPending] =
+    useState<{
+      toolCallId: string;
+    } | null>(null);
+
  // Model selector state
  const [modelSelectorOpen, setModelSelectorOpen] = useState(false);
  const [toolsetSelectorOpen, setToolsetSelectorOpen] = useState(false);
@@ -365,42 +449,77 @@ export default function App({
      // Check if this is an ExitPlanMode approval - route to plan dialog
      const planApproval = approvals.find((a) => a.toolName === "ExitPlanMode");
      if (planApproval) {
-        const parsedArgs = safeJsonParseOr<Record<string, unknown>>(
-          planApproval.toolArgs,
-          {},
-        );
-        const plan = (parsedArgs.plan as string) || "No plan provided";
+        // Read plan from the plan file (not from toolArgs)
+        const plan = readPlanFile();

        setPlanApprovalPending({
          plan,
          toolCallId: planApproval.toolCallId,
          toolArgs: planApproval.toolArgs,
        });
-      } else {
-        // Regular tool approvals (may be multiple for parallel tools)
-        setPendingApprovals(approvals);
-
-        // Analyze approval contexts for all restored approvals
-        const analyzeStartupApprovals = async () => {
-          try {
-            const contexts = await Promise.all(
-              approvals.map(async (approval) => {
-                const parsedArgs = safeJsonParseOr<Record<string, unknown>>(
-                  approval.toolArgs,
-                  {},
-                );
-                return await analyzeToolApproval(approval.toolName, parsedArgs);
-              }),
-            );
-            setApprovalContexts(contexts);
-          } catch (error) {
-            // If analysis fails, leave context as null (will show basic options)
-            console.error("Failed to analyze startup approvals:", error);
-          }
-        };
-
-        analyzeStartupApprovals();
+        return;
      }
+
+      // Check if this is an AskUserQuestion approval - route to question dialog
+      const questionApproval = approvals.find(
+        (a) => a.toolName === "AskUserQuestion",
+      );
+      if (questionApproval) {
+        const parsedArgs = safeJsonParseOr<Record<string, unknown>>(
+          questionApproval.toolArgs,
+          {},
+        );
+        const questions =
+          (parsedArgs.questions as Array<{
+            question: string;
+            header: string;
+            options: Array<{ label: string; description: string }>;
+            multiSelect: boolean;
+          }>) || [];
+
+        if (questions.length > 0) {
+          setQuestionApprovalPending({
+            questions,
+            toolCallId: questionApproval.toolCallId,
+          });
+          return;
+        }
+      }
+
+      // Check if this is an EnterPlanMode approval - route to enter plan mode dialog
+      const enterPlanModeApproval = approvals.find(
+        (a) => a.toolName === "EnterPlanMode",
+      );
+      if (enterPlanModeApproval) {
+        setEnterPlanModeApprovalPending({
+          toolCallId: enterPlanModeApproval.toolCallId,
+        });
+        return;
+      }
+
+      // Regular tool approvals (may be multiple for parallel tools)
+      setPendingApprovals(approvals);
+
+      // Analyze approval contexts for all restored approvals
+      const analyzeStartupApprovals = async () => {
+        try {
+          const contexts = await Promise.all(
+            approvals.map(async (approval) => {
+              const parsedArgs = safeJsonParseOr<Record<string, unknown>>(
+                approval.toolArgs,
+                {},
+              );
+              return await analyzeToolApproval(approval.toolName, parsedArgs);
+            }),
+          );
+          setApprovalContexts(contexts);
+        } catch (error) {
+          // If analysis fails, leave context as null (will show basic options)
+          console.error("Failed to analyze startup approvals:", error);
+        }
+      };
+
+      analyzeStartupApprovals();
    }
  }, [loadingState, startupApproval, startupApprovals]);

@@ -553,11 +672,8 @@ export default function App({
              (a) => a.toolName === "ExitPlanMode",
            );
            if (planApproval) {
-              const parsedArgs = safeJsonParseOr<Record<string, unknown>>(
-                planApproval.toolArgs,
-                {},
-              );
-              const plan = (parsedArgs.plan as string) || "No plan provided";
+              // Read plan from the plan file (not from toolArgs)
+              const plan = readPlanFile();

              setPlanApprovalPending({
                plan,
@@ -568,6 +684,45 @@ export default function App({
              return;
            }

+            // Check each approval for AskUserQuestion special case
+            const questionApproval = approvalsToProcess.find(
+              (a) => a.toolName === "AskUserQuestion",
+            );
+            if (questionApproval) {
+              const parsedArgs = safeJsonParseOr<Record<string, unknown>>(
+                questionApproval.toolArgs,
+                {},
+              );
+              const questions =
+                (parsedArgs.questions as Array<{
+                  question: string;
+                  header: string;
+                  options: Array<{ label: string; description: string }>;
+                  multiSelect: boolean;
+                }>) || [];
+
+              if (questions.length > 0) {
+                setQuestionApprovalPending({
+                  questions,
+                  toolCallId: questionApproval.toolCallId,
+                });
+                setStreaming(false);
+                return;
+              }
+            }
+
+            // Check each approval for EnterPlanMode special case
+            const enterPlanModeApproval = approvalsToProcess.find(
+              (a) => a.toolName === "EnterPlanMode",
+            );
+            if (enterPlanModeApproval) {
+              setEnterPlanModeApprovalPending({
+                toolCallId: enterPlanModeApproval.toolCallId,
+              });
+              setStreaming(false);
+              return;
+            }
+
            // Check permissions for all approvals
            const approvalResults = await Promise.all(
              approvalsToProcess.map(async (approvalItem) => {
@@ -642,14 +797,30 @@ export default function App({
              }),
            );

-            // Create denial results for auto-denied tools
-            const autoDeniedResults = autoDenied.map((ac) => ({
-              approval: ac.approval,
-              reason:
+            // Create denial results for auto-denied tools and update buffers
+            const autoDeniedResults = autoDenied.map((ac) => {
+              const reason =
                "matchedRule" in ac.permission && ac.permission.matchedRule
                  ? `Permission denied by rule: ${ac.permission.matchedRule}`
-                  : `Permission denied: ${ac.permission.reason || "Unknown reason"}`,
-            }));
+                  : `Permission denied: ${ac.permission.reason || "Unknown reason"}`;
+
+              // Update buffers with tool rejection for UI
+              onChunk(buffersRef.current, {
+                message_type: "tool_return_message",
+                id: "dummy",
+                date: new Date().toISOString(),
+                tool_call_id: ac.approval.toolCallId,
+                tool_return: `Error: request to call tool denied. User reason: ${reason}`,
+                status: "error",
+                stdout: null,
+                stderr: null,
+              });
+
+              return {
+                approval: ac.approval,
+                reason,
+              };
+            });

            // If all are auto-handled, continue immediately without showing dialog
            if (needsUserInput.length === 0) {
@@ -1277,6 +1448,43 @@ export default function App({
          return { submitted: true };
        }

+        // Special handling for /bashes command - show background shell processes
+        if (msg.trim() === "/bashes") {
+          const { backgroundProcesses } = await import(
+            "../tools/impl/process_manager"
+          );
+          const cmdId = uid("cmd");
+
+          let output: string;
+          if (backgroundProcesses.size === 0) {
+            output = "No background processes running";
+          } else {
+            const lines = ["Background processes:"];
+            for (const [id, proc] of backgroundProcesses) {
+              const status =
+                proc.status === "running"
+                  ? "running"
+                  : proc.status === "completed"
+                    ? `completed (exit ${proc.exitCode})`
+                    : `failed (exit ${proc.exitCode})`;
+              lines.push(`  ${id}: ${proc.command} [${status}]`);
+            }
+            output = lines.join("\n");
+          }
+
+          buffersRef.current.byId.set(cmdId, {
+            kind: "command",
+            id: cmdId,
+            input: msg,
+            output,
+            phase: "finished",
+            success: true,
+          });
+          buffersRef.current.order.push(cmdId);
+          refreshDerived();
+          return { submitted: true };
+        }
+
        // Special handling for /download command - download agent file
        if (msg.trim() === "/download") {
          const cmdId = uid("cmd");
@@ -2213,6 +2421,180 @@ export default function App({
    [planApprovalPending, processConversation, appendError],
  );

+  const handleQuestionSubmit = useCallback(
+    async (answers: Record<string, string>) => {
+      if (!questionApprovalPending) return;
+
+      const { toolCallId, questions } = questionApprovalPending;
+      setQuestionApprovalPending(null);
+
+      try {
+        // Format the answer string like Claude Code does
+        const answerParts = questions.map((q) => {
+          const answer = answers[q.question] || "";
+          return `"${q.question}"="${answer}"`;
+        });
+        const toolReturn = `User has answered your questions: ${answerParts.join(", ")}. You can now continue with the user's answers in mind.`;
+
+        // Update buffers with tool return
+        onChunk(buffersRef.current, {
+          message_type: "tool_return_message",
+          id: "dummy",
+          date: new Date().toISOString(),
+          tool_call_id: toolCallId,
+          tool_return: toolReturn,
+          status: "success",
+          stdout: null,
+          stderr: null,
+        });
+
+        // Rotate to a new thinking message
+        setThinkingMessage(getRandomThinkingMessage());
+        refreshDerived();
+
+        // Restart conversation loop with the answer
+        await processConversation([
+          {
+            type: "approval",
+            approvals: [
+              {
+                type: "tool",
+                tool_call_id: toolCallId,
+                tool_return: toolReturn,
+                status: "success",
+                stdout: null,
+                stderr: null,
+              },
+            ],
+          },
+        ]);
+      } catch (e) {
+        appendError(String(e));
+        setStreaming(false);
+      }
+    },
+    [questionApprovalPending, processConversation, appendError, refreshDerived],
+  );
+
+  const handleEnterPlanModeApprove = useCallback(async () => {
+    if (!enterPlanModeApprovalPending) return;
+
+    const { toolCallId } = enterPlanModeApprovalPending;
+    setEnterPlanModeApprovalPending(null);
+
+    // Generate plan file path
+    const planFilePath = generatePlanFilePath();
+
+    // Toggle plan mode on and store plan file path
+    permissionMode.setMode("plan");
+    permissionMode.setPlanFilePath(planFilePath);
+    setUiPermissionMode("plan");
+
+    // Get the tool return message from the implementation
+    const toolReturn = `Entered plan mode. You should now focus on exploring the codebase and designing an implementation approach.
+
+In plan mode, you should:
+1. Thoroughly explore the codebase to understand existing patterns
+2. Identify similar features and architectural approaches
+3. Consider multiple approaches and their trade-offs
+4. Use AskUserQuestion if you need to clarify the approach
+5. Design a concrete implementation strategy
+6. When ready, use ExitPlanMode to present your plan for approval
+
+Remember: DO NOT write or edit any files yet. This is a read-only exploration and planning phase.
+
+Plan file path: ${planFilePath}`;
+
+    try {
+      // Update buffers with tool return
+      onChunk(buffersRef.current, {
+        message_type: "tool_return_message",
+        id: "dummy",
+        date: new Date().toISOString(),
+        tool_call_id: toolCallId,
+        tool_return: toolReturn,
+        status: "success",
+        stdout: null,
+        stderr: null,
+      });
+
+      // Rotate to a new thinking message
+      setThinkingMessage(getRandomThinkingMessage());
+      refreshDerived();
+
+      // Restart conversation loop with approval
+      await processConversation([
+        {
+          type: "approval",
+          approvals: [
+            {
+              type: "tool",
+              tool_call_id: toolCallId,
+              tool_return: toolReturn,
+              status: "success",
+              stdout: null,
+              stderr: null,
+            },
+          ],
+        },
+      ]);
+    } catch (e) {
+      appendError(String(e));
+      setStreaming(false);
+    }
+  }, [
+    enterPlanModeApprovalPending,
+    processConversation,
+    appendError,
+    refreshDerived,
+  ]);
+
+  const handleEnterPlanModeReject = useCallback(async () => {
+    if (!enterPlanModeApprovalPending) return;
+
+    const { toolCallId } = enterPlanModeApprovalPending;
+    setEnterPlanModeApprovalPending(null);
+
+    const rejectionReason =
+      "User chose to skip plan mode and start implementing directly.";
+
+    try {
+      // Update buffers with tool rejection (format matches what harness sends)
+      onChunk(buffersRef.current, {
+        message_type: "tool_return_message",
+        id: "dummy",
+        date: new Date().toISOString(),
+        tool_call_id: toolCallId,
+        tool_return: `Error: request to call tool denied. User reason: ${rejectionReason}`,
+        status: "error",
+        stdout: null,
+        stderr: null,
+      });
+
+      // Rotate to a new thinking message
+      setThinkingMessage(getRandomThinkingMessage());
+      refreshDerived();
+
+      // Restart conversation loop with rejection
+      await processConversation([
+        {
+          type: "approval",
+          approval_request_id: toolCallId,
+          approve: false,
+          reason: rejectionReason,
+        },
+      ]);
+    } catch (e) {
+      appendError(String(e));
+      setStreaming(false);
+    }
+  }, [
+    enterPlanModeApprovalPending,
+    processConversation,
+    appendError,
+    refreshDerived,
+  ]);
+
  // Live area shows only in-progress items
  const liveItems = useMemo(() => {
    return lines.filter((ln) => {
@@ -2343,7 +2725,9 @@ export default function App({
                !toolsetSelectorOpen &&
                !systemPromptSelectorOpen &&
                !agentSelectorOpen &&
-                !planApprovalPending
+                !planApprovalPending &&
+                !questionApprovalPending &&
+                !enterPlanModeApprovalPending
              }
              streaming={streaming}
              commandRunning={commandRunning}
@@ -2412,6 +2796,28 @@ export default function App({
              </>
            )}

+            {/* Question Dialog - for AskUserQuestion tool */}
+            {questionApprovalPending && (
+              <>
+                <Box height={1} />
+                <QuestionDialog
+                  questions={questionApprovalPending.questions}
+                  onSubmit={handleQuestionSubmit}
+                />
+              </>
+            )}
+
+            {/* Enter Plan Mode Dialog - for EnterPlanMode tool */}
+            {enterPlanModeApprovalPending && (
+              <>
+                <Box height={1} />
+                <EnterPlanModeDialog
+                  onApprove={handleEnterPlanModeApprove}
+                  onReject={handleEnterPlanModeReject}
+                />
+              </>
+            )}
+
            {/* Approval Dialog - below live items */}
            {pendingApprovals.length > 0 && (
              <>
--- a/src/cli/commands/registry.ts
+++ b/src/cli/commands/registry.ts
@@ -99,6 +99,13 @@ export const commands: Record<string, Command> = {
      return "Downloading agent file...";
    },
  },
+  "/bashes": {
+    desc: "Show background shell processes",
+    handler: () => {
+      // Handled specially in App.tsx to show background processes
+      return "Showing background processes...";
+    },
+  },
 };

 /**
--- a/src/cli/components/EnterPlanModeDialog.tsx
+++ b/src/cli/components/EnterPlanModeDialog.tsx
@@ -0,0 +1,80 @@
+import { Box, Text, useInput } from "ink";
+import { memo, useState } from "react";
+import { colors } from "./colors";
+
+type Props = {
+  onApprove: () => void;
+  onReject: () => void;
+};
+
+export const EnterPlanModeDialog = memo(({ onApprove, onReject }: Props) => {
+  const [selectedOption, setSelectedOption] = useState(0);
+
+  const options = [
+    { label: "Yes, enter plan mode", action: onApprove },
+    { label: "No, start implementing now", action: onReject },
+  ];
+
+  useInput((input, key) => {
+    if (key.upArrow) {
+      setSelectedOption((prev) => Math.max(0, prev - 1));
+    } else if (key.downArrow) {
+      setSelectedOption((prev) => Math.min(options.length - 1, prev + 1));
+    } else if (key.return) {
+      options[selectedOption]?.action();
+    } else if (input === "1") {
+      onApprove();
+    } else if (input === "2") {
+      onReject();
+    }
+  });
+
+  return (
+    <Box flexDirection="column" paddingY={1}>
+      <Box marginBottom={1}>
+        <Text color={colors.approval.header} bold>
+          Enter plan mode?
+        </Text>
+      </Box>
+
+      <Box marginBottom={1} flexDirection="column">
+        <Text>
+          Letta wants to enter plan mode to explore and design an implementation
+          approach.
+        </Text>
+        <Text> </Text>
+        <Text>In plan mode, Letta will:</Text>
+        <Text> • Explore the codebase thoroughly</Text>
+        <Text> • Identify existing patterns</Text>
+        <Text> • Design an implementation strategy</Text>
+        <Text> • Present a plan for your approval</Text>
+        <Text> </Text>
+        <Text dimColor>
+          No code changes will be made until you approve the plan.
+        </Text>
+      </Box>
+
+      <Box flexDirection="column">
+        {options.map((option, index) => {
+          const isSelected = index === selectedOption;
+          const color = isSelected ? colors.approval.header : undefined;
+
+          return (
+            <Box key={option.label} flexDirection="row">
+              <Box width={2} flexShrink={0}>
+                <Text color={color}>{isSelected ? ">" : " "}</Text>
+              </Box>
+              <Box flexGrow={1}>
+                <Text color={color} bold={isSelected}>
+                  {index + 1}. {option.label}
+                </Text>
+              </Box>
+            </Box>
+          );
+        })}
+      </Box>
+    </Box>
+  );
+});
+
+EnterPlanModeDialog.displayName = "EnterPlanModeDialog";
--- a/src/cli/components/InputRich.tsx
+++ b/src/cli/components/InputRich.tsx
@@ -110,6 +110,7 @@ export function Input({

  // Handle escape key for interrupt (when streaming) or double-escape-to-clear (when not)
  useInput((_input, key) => {
+    if (!visible) return;
    if (key.escape) {
      // When streaming, use Esc to interrupt
      if (streaming && onInterrupt && !interruptRequested) {
@@ -138,6 +139,7 @@ export function Input({

  // Handle CTRL-C for double-ctrl-c-to-exit
  useInput((input, key) => {
+    if (!visible) return;
    if (input === "c" && key.ctrl) {
      if (ctrlCPressed) {
        // Second CTRL-C - call onExit callback which handles stats and exit
@@ -156,6 +158,7 @@ export function Input({

  // Handle Shift+Tab for permission mode cycling
  useInput((_input, key) => {
+    if (!visible) return;
    if (key.shift && key.tab) {
      // Cycle through permission modes
      const modes: PermissionMode[] = [
@@ -181,6 +184,7 @@ export function Input({

  // Handle up/down arrow keys for wrapped text navigation and command history
  useInput((_input, key) => {
+    if (!visible) return;
    // Don't interfere with autocomplete navigation
    if (isAutocompleteActive) {
      return;
--- a/src/cli/components/QuestionDialog.tsx
+++ b/src/cli/components/QuestionDialog.tsx
@@ -0,0 +1,217 @@
+import { Box, Text, useInput } from "ink";
+import { memo, useState } from "react";
+import { colors } from "./colors";
+import { PasteAwareTextInput } from "./PasteAwareTextInput";
+
+interface QuestionOption {
+  label: string;
+  description: string;
+}
+
+interface Question {
+  question: string;
+  header: string;
+  options: QuestionOption[];
+  multiSelect: boolean;
+}
+
+type Props = {
+  questions: Question[];
+  onSubmit: (answers: Record<string, string>) => void;
+};
+
+export const QuestionDialog = memo(({ questions, onSubmit }: Props) => {
+  const [currentQuestionIndex, setCurrentQuestionIndex] = useState(0);
+  const [answers, setAnswers] = useState<Record<string, string>>({});
+  const [selectedOption, setSelectedOption] = useState(0);
+  const [isOtherMode, setIsOtherMode] = useState(false);
+  const [otherText, setOtherText] = useState("");
+  const [selectedMulti, setSelectedMulti] = useState<Set<number>>(new Set());
+
+  const currentQuestion = questions[currentQuestionIndex];
+  const optionsWithOther = currentQuestion
+    ? [
+        ...currentQuestion.options,
+        { label: "Other", description: "Provide a custom response" },
+      ]
+    : [];
+
+  const handleSubmitAnswer = (answer: string) => {
+    if (!currentQuestion) return;
+    const newAnswers = {
+      ...answers,
+      [currentQuestion.question]: answer,
+    };
+    setAnswers(newAnswers);
+
+    if (currentQuestionIndex < questions.length - 1) {
+      setCurrentQuestionIndex(currentQuestionIndex + 1);
+      setSelectedOption(0);
+      setIsOtherMode(false);
+      setOtherText("");
+      setSelectedMulti(new Set());
+    } else {
+      onSubmit(newAnswers);
+    }
+  };
+
+  useInput((input, key) => {
+    if (!currentQuestion) return;
+
+    if (isOtherMode) {
+      if (key.escape) {
+        setIsOtherMode(false);
+        setOtherText("");
+      }
+      return;
+    }
+
+    if (key.upArrow) {
+      setSelectedOption((prev) => Math.max(0, prev - 1));
+    } else if (key.downArrow) {
+      setSelectedOption((prev) =>
+        Math.min(optionsWithOther.length - 1, prev + 1),
+      );
+    } else if (key.return) {
+      if (currentQuestion.multiSelect) {
+        if (selectedOption === optionsWithOther.length - 1) {
+          setIsOtherMode(true);
+        } else if (selectedMulti.size > 0) {
+          const selectedLabels = Array.from(selectedMulti)
+            .map((i) => optionsWithOther[i]?.label)
+            .filter(Boolean)
+            .join(", ");
+          handleSubmitAnswer(selectedLabels);
+        }
+      } else {
+        if (selectedOption === optionsWithOther.length - 1) {
+          setIsOtherMode(true);
+        } else {
+          handleSubmitAnswer(optionsWithOther[selectedOption]?.label || "");
+        }
+      }
+    } else if (input === " " && currentQuestion.multiSelect) {
+      if (selectedOption < optionsWithOther.length - 1) {
+        setSelectedMulti((prev) => {
+          const newSet = new Set(prev);
+          if (newSet.has(selectedOption)) {
+            newSet.delete(selectedOption);
+          } else {
+            newSet.add(selectedOption);
+          }
+          return newSet;
+        });
+      }
+    } else if (input >= "1" && input <= "9") {
+      const optionIndex = Number.parseInt(input, 10) - 1;
+      if (optionIndex < optionsWithOther.length) {
+        if (currentQuestion.multiSelect) {
+          if (optionIndex < optionsWithOther.length - 1) {
+            setSelectedMulti((prev) => {
+              const newSet = new Set(prev);
+              if (newSet.has(optionIndex)) {
+                newSet.delete(optionIndex);
+              } else {
+                newSet.add(optionIndex);
+              }
+              return newSet;
+            });
+          }
+        } else {
+          if (optionIndex === optionsWithOther.length - 1) {
+            setIsOtherMode(true);
+          } else {
+            handleSubmitAnswer(optionsWithOther[optionIndex]?.label || "");
+          }
+        }
+      }
+    }
+  });
+
+  const handleOtherSubmit = (text: string) => {
+    handleSubmitAnswer(text);
+  };
+
+  if (!currentQuestion) return null;
+
+  return (
+    <Box flexDirection="column" paddingY={1}>
+      <Box marginBottom={1}>
+        <Text color={colors.approval.header}>
+          <Text bold>[{currentQuestion.header}]</Text>{" "}
+          {currentQuestion.question}
+        </Text>
+      </Box>
+
+      {questions.length > 1 && (
+        <Box marginBottom={1}>
+          <Text dimColor>
+            Question {currentQuestionIndex + 1} of {questions.length}
+          </Text>
+        </Box>
+      )}
+
+      {isOtherMode ? (
+        <Box flexDirection="column">
+          <Text dimColor>Type your response (Esc to cancel):</Text>
+          <Box marginTop={1}>
+            <Text color={colors.approval.header}>&gt; </Text>
+            <PasteAwareTextInput
+              value={otherText}
+              onChange={setOtherText}
+              onSubmit={handleOtherSubmit}
+            />
+          </Box>
+        </Box>
+      ) : (
+        <Box flexDirection="column">
+          {optionsWithOther.map((option, index) => {
+            const isSelected = index === selectedOption;
+            const isChecked = selectedMulti.has(index);
+            const color = isSelected ? colors.approval.header : undefined;
+
+            return (
+              <Box
+                key={option.label}
+                flexDirection="column"
+                marginBottom={index < optionsWithOther.length - 1 ? 1 : 0}
+              >
+                <Box flexDirection="row">
+                  <Box width={2} flexShrink={0}>
+                    <Text color={color}>{isSelected ? ">" : " "}</Text>
+                  </Box>
+                  {currentQuestion.multiSelect &&
+                    index < optionsWithOther.length - 1 && (
+                      <Box width={4} flexShrink={0}>
+                        <Text color={color}>[{isChecked ? "x" : " "}]</Text>
+                      </Box>
+                    )}
+                  <Box flexGrow={1}>
+                    <Text color={color} bold={isSelected}>
+                      {index + 1}. {option.label}
+                    </Text>
+                  </Box>
+                </Box>
+                {option.description && (
+                  <Box paddingLeft={currentQuestion.multiSelect ? 6 : 2}>
+                    <Text dimColor>{option.description}</Text>
+                  </Box>
+                )}
+              </Box>
+            );
+          })}
+
+          <Box marginTop={1}>
+            <Text dimColor>
+              {currentQuestion.multiSelect
+                ? "Space to toggle, Enter to confirm selection"
+                : `Enter to select, or type 1-${optionsWithOther.length}`}
+            </Text>
+          </Box>
+        </Box>
+      )}
+    </Box>
+  );
+});
+
+QuestionDialog.displayName = "QuestionDialog";
--- a/src/cli/components/ToolCallMessageRich.tsx
+++ b/src/cli/components/ToolCallMessageRich.tsx
@@ -62,6 +62,7 @@ export const ToolCallMessage = memo(({ line }: { line: ToolCallLine }) => {
  else if (displayName === "ls") displayName = "LS";
  else if (displayName === "todo_write") displayName = "TODO";
  else if (displayName === "TodoWrite") displayName = "TODO";
+  else if (displayName === "EnterPlanMode") displayName = "Planning";
  else if (displayName === "ExitPlanMode") displayName = "Planning";
  // Codex toolset
  else if (displayName === "update_plan") displayName = "Plan";
--- a/src/cli/helpers/backfill.ts
+++ b/src/cli/helpers/backfill.ts
@@ -212,4 +212,29 @@ export function backfillBuffers(buffers: Buffers, history: Message[]): void {
        break; // ignore other message types
    }
  }
+
+  // Mark stray tool calls as closed
+  // Walk backwards: any pending tool_call before the first "transition" (non-pending-tool-call) is stray
+  let foundTransition = false;
+  for (let i = buffers.order.length - 1; i >= 0; i--) {
+    const lineId = buffers.order[i];
+    if (!lineId) continue;
+    const line = buffers.byId.get(lineId);
+
+    if (line?.kind === "tool_call" && line.phase === "ready") {
+      if (foundTransition) {
+        // This is a stray - mark it closed
+        buffers.byId.set(lineId, {
+          ...line,
+          phase: "finished",
+          resultText: "[Tool return not found in history]",
+          resultOk: false,
+        });
+      }
+      // else: legit pending, leave it
+    } else {
+      // Hit something that's not a pending tool_call - transition point
+      foundTransition = true;
+    }
+  }
 }
--- a/src/cli/helpers/planName.ts
+++ b/src/cli/helpers/planName.ts
@@ -0,0 +1,117 @@
+import { homedir } from "node:os";
+
+const adjectives = [
+  "bold",
+  "bright",
+  "calm",
+  "clever",
+  "crisp",
+  "daring",
+  "eager",
+  "fair",
+  "gentle",
+  "happy",
+  "keen",
+  "lively",
+  "merry",
+  "nimble",
+  "playful",
+  "quick",
+  "radiant",
+  "serene",
+  "swift",
+  "vivid",
+  "warm",
+  "witty",
+  "zealous",
+  "agile",
+  "breezy",
+  "charming",
+  "dazzling",
+  "elegant",
+  "fancy",
+  "golden",
+  "humble",
+  "jolly",
+  "kind",
+  "lucky",
+  "mystic",
+  "noble",
+  "peaceful",
+  "quiet",
+  "rolling",
+  "shiny",
+  "tender",
+  "upbeat",
+  "valiant",
+  "whimsy",
+  "youthful",
+  "zesty",
+];
+
+const nouns = [
+  "apple",
+  "brook",
+  "cloud",
+  "dawn",
+  "elm",
+  "fern",
+  "grove",
+  "hill",
+  "iris",
+  "jade",
+  "kite",
+  "lake",
+  "maple",
+  "nest",
+  "oak",
+  "pine",
+  "quartz",
+  "river",
+  "stone",
+  "tide",
+  "umbra",
+  "vine",
+  "wave",
+  "yarn",
+  "zenith",
+  "acorn",
+  "birch",
+  "coral",
+  "dune",
+  "ember",
+  "frost",
+  "glade",
+  "harbor",
+  "ivy",
+  "jasper",
+  "kelp",
+  "lotus",
+  "moss",
+  "nova",
+  "opal",
+  "pebble",
+  "plum",
+  "reed",
+  "sage",
+  "thorn",
+  "violet",
+  "willow",
+  "zephyr",
+];
+
+function randomElement<T>(arr: T[]): T {
+  return arr[Math.floor(Math.random() * arr.length)] as T;
+}
+
+export function generatePlanName(): string {
+  const adj1 = randomElement(adjectives);
+  const adj2 = randomElement(adjectives);
+  const noun = randomElement(nouns);
+  return `${adj1}-${adj2}-${noun}`;
+}
+
+export function generatePlanFilePath(): string {
+  const name = generatePlanName();
+  return `${homedir()}/.letta/plans/${name}.md`;
+}
--- a/src/permissions/checker.ts
+++ b/src/permissions/checker.ts
@@ -80,7 +80,7 @@ export function checkPermission(
  }

  // Check permission mode (applies before CLI allow rules but after deny rules)
-  const modeOverride = permissionMode.checkModeOverride(toolName);
+  const modeOverride = permissionMode.checkModeOverride(toolName, toolArgs);
  if (modeOverride) {
    const currentMode = permissionMode.getMode();
    return {
--- a/src/permissions/mode.ts
+++ b/src/permissions/mode.ts
@@ -10,9 +10,11 @@ export type PermissionMode =
 // Use globalThis to ensure singleton across bundle
 // This prevents Bun's bundler from creating duplicate instances of the mode manager
 const MODE_KEY = Symbol.for("@letta/permissionMode");
+const PLAN_FILE_KEY = Symbol.for("@letta/planFilePath");

 type GlobalWithMode = typeof globalThis & {
-  [key: symbol]: PermissionMode;
+  [MODE_KEY]: PermissionMode;
+  [PLAN_FILE_KEY]: string | null;
 };

 function getGlobalMode(): PermissionMode {
@@ -28,6 +30,16 @@ function setGlobalMode(value: PermissionMode): void {
  global[MODE_KEY] = value;
 }

+function getGlobalPlanFilePath(): string | null {
+  const global = globalThis as GlobalWithMode;
+  return global[PLAN_FILE_KEY] || null;
+}
+
+function setGlobalPlanFilePath(value: string | null): void {
+  const global = globalThis as GlobalWithMode;
+  global[PLAN_FILE_KEY] = value;
+}
+
 /**
 * Permission mode state for the current session.
 * Set via CLI --permission-mode flag or settings.json defaultMode.
@@ -46,6 +58,10 @@ class PermissionModeManager {
   */
  setMode(mode: PermissionMode): void {
    this.currentMode = mode;
+    // Clear plan file path when exiting plan mode
+    if (mode !== "plan") {
+      setGlobalPlanFilePath(null);
+    }
  }

  /**
@@ -55,11 +71,28 @@ class PermissionModeManager {
    return this.currentMode;
  }

+  /**
+   * Set the plan file path (only relevant when in plan mode)
+   */
+  setPlanFilePath(path: string | null): void {
+    setGlobalPlanFilePath(path);
+  }
+
+  /**
+   * Get the current plan file path
+   */
+  getPlanFilePath(): string | null {
+    return getGlobalPlanFilePath();
+  }
+
  /**
   * Check if a tool should be auto-allowed based on current mode
   * Returns null if mode doesn't apply to this tool
   */
-  checkModeOverride(toolName: string): "allow" | "deny" | null {
+  checkModeOverride(
+    toolName: string,
+    toolArgs?: Record<string, unknown>,
+  ): "allow" | "deny" | null {
    switch (this.currentMode) {
      case "bypassPermissions":
        // Auto-allow everything (except explicit deny rules checked earlier)
@@ -91,17 +124,25 @@ class PermissionModeManager {
          "NotebookRead",
          "TodoWrite",
        ];
-        const deniedInPlan = [
-          "Write",
-          "Edit",
-          "NotebookEdit",
-          "Bash",
-          "WebFetch",
-        ];
+        const writeTools = ["Write", "Edit", "MultiEdit", "NotebookEdit"];
+        const deniedInPlan = ["Bash", "WebFetch"];

        if (allowedInPlan.includes(toolName)) {
          return "allow";
        }
+
+        // Special case: allow writes to the plan file only
+        if (writeTools.includes(toolName)) {
+          const planFilePath = this.getPlanFilePath();
+          const targetPath =
+            (toolArgs?.file_path as string) || (toolArgs?.path as string);
+
+          if (planFilePath && targetPath && targetPath === planFilePath) {
+            return "allow";
+          }
+          return "deny";
+        }
+
        if (deniedInPlan.includes(toolName)) {
          return "deny";
        }
--- a/src/tests/tools/bash-background.test.ts
+++ b/src/tests/tools/bash-background.test.ts
@@ -23,7 +23,7 @@ describe("Bash background tools", () => {
      run_in_background: true,
    });

-    // Extract bash_id from the response text
+    // Extract shell_id from the response text
    const match = startResult.content[0]?.text.match(/bash_(\d+)/);
    expect(match).toBeDefined();
    const bashId = `bash_${match?.[1]}`;
@@ -32,13 +32,13 @@ describe("Bash background tools", () => {
    await new Promise((resolve) => setTimeout(resolve, 200));

    // Retrieve output
-    const outputResult = await bash_output({ bash_id: bashId });
+    const outputResult = await bash_output({ shell_id: bashId });

    expect(outputResult.message).toContain("background output");
  });

-  test("BashOutput handles non-existent bash_id gracefully", async () => {
-    const result = await bash_output({ bash_id: "nonexistent" });
+  test("BashOutput handles non-existent shell_id gracefully", async () => {
+    const result = await bash_output({ shell_id: "nonexistent" });

    expect(result.message).toContain("No background process found");
  });
--- a/src/tests/tools/exitplanmode.test.ts
+++ b/src/tests/tools/exitplanmode.test.ts
@@ -3,25 +3,16 @@ import { exit_plan_mode } from "../../tools/impl/ExitPlanMode";

 describe("ExitPlanMode tool", () => {
  test("returns approval message", async () => {
-    const result = await exit_plan_mode({
-      plan: "1. Do thing A\n2. Do thing B\n3. Profit",
-    });
+    const result = await exit_plan_mode();

    expect(result.message).toBeDefined();
    expect(result.message).toContain("approved");
  });

-  test("handles empty plan", async () => {
-    const result = await exit_plan_mode({ plan: "" });
+  test("returns message with coding guidance", async () => {
+    const result = await exit_plan_mode();

    expect(result.message).toBeDefined();
-  });
-
-  test("accepts markdown formatted plan", async () => {
-    const plan = "## Steps\n- Step 1\n- Step 2\n\n**Important:** Read the docs";
-    const result = await exit_plan_mode({ plan });
-
-    expect(result.message).toBeDefined();
-    expect(result.message).toContain("approved");
+    expect(result.message).toContain("todo list");
  });
 });
--- a/src/tests/tools/grep.test.ts
+++ b/src/tests/tools/grep.test.ts
@@ -62,4 +62,86 @@ describe("Grep tool", () => {
      /missing required parameter.*pattern/,
    );
  });
+
+  test("head_limit limits number of results", async () => {
+    testDir = new TestDirectory();
+    testDir.createFile("a.txt", "match");
+    testDir.createFile("b.txt", "match");
+    testDir.createFile("c.txt", "match");
+    testDir.createFile("d.txt", "match");
+
+    try {
+      const result = await grep({
+        pattern: "match",
+        path: testDir.path,
+        output_mode: "files_with_matches",
+        head_limit: 2,
+      });
+
+      expect(result.files).toBe(4);
+      expect(result.output).toContain("showing 2");
+      const lines = result.output.split("\n").filter(Boolean);
+      expect(lines.length).toBe(3); // header + 2 files
+    } catch (error) {
+      if (error instanceof Error && error.message.includes("ENOENT")) {
+        console.log("Skipping grep test - ripgrep not available");
+      } else {
+        throw error;
+      }
+    }
+  });
+
+  test("offset skips initial results", async () => {
+    testDir = new TestDirectory();
+    testDir.createFile("a.txt", "match");
+    testDir.createFile("b.txt", "match");
+    testDir.createFile("c.txt", "match");
+
+    try {
+      const result = await grep({
+        pattern: "match",
+        path: testDir.path,
+        output_mode: "files_with_matches",
+        offset: 1,
+      });
+
+      expect(result.files).toBe(3);
+      expect(result.output).toContain("showing 2");
+    } catch (error) {
+      if (error instanceof Error && error.message.includes("ENOENT")) {
+        console.log("Skipping grep test - ripgrep not available");
+      } else {
+        throw error;
+      }
+    }
+  });
+
+  test("offset and head_limit work together", async () => {
+    testDir = new TestDirectory();
+    testDir.createFile("a.txt", "match");
+    testDir.createFile("b.txt", "match");
+    testDir.createFile("c.txt", "match");
+    testDir.createFile("d.txt", "match");
+
+    try {
+      const result = await grep({
+        pattern: "match",
+        path: testDir.path,
+        output_mode: "files_with_matches",
+        offset: 1,
+        head_limit: 2,
+      });
+
+      expect(result.files).toBe(4);
+      expect(result.output).toContain("showing 2");
+      const lines = result.output.split("\n").filter(Boolean);
+      expect(lines.length).toBe(3); // header + 2 files
+    } catch (error) {
+      if (error instanceof Error && error.message.includes("ENOENT")) {
+        console.log("Skipping grep test - ripgrep not available");
+      } else {
+        throw error;
+      }
+    }
+  });
 });
--- a/src/tests/tools/read.test.ts
+++ b/src/tests/tools/read.test.ts
@@ -108,4 +108,24 @@ export default box;
      /missing required parameter.*file_path/,
    );
  });
+
+  test("returns system reminder for empty files", async () => {
+    testDir = new TestDirectory();
+    const file = testDir.createFile("empty.txt", "");
+
+    const result = await read({ file_path: file });
+
+    expect(result.content).toContain("<system-reminder>");
+    expect(result.content).toContain("empty contents");
+  });
+
+  test("returns system reminder for whitespace-only files", async () => {
+    testDir = new TestDirectory();
+    const file = testDir.createFile("whitespace.txt", "   \n\n  \t  ");
+
+    const result = await read({ file_path: file });
+
+    expect(result.content).toContain("<system-reminder>");
+    expect(result.content).toContain("empty contents");
+  });
 });
--- a/src/tests/tools/todowrite.test.ts
+++ b/src/tests/tools/todowrite.test.ts
@@ -5,8 +5,12 @@ describe("TodoWrite tool", () => {
  test("accepts valid todos with all required fields", async () => {
    const result = await todo_write({
      todos: [
-        { id: "1", content: "Task 1", status: "pending" },
-        { id: "2", content: "Task 2", status: "in_progress" },
+        {
+          content: "Run tests",
+          status: "pending",
+          activeForm: "Running tests",
+        },
+        { content: "Fix bug", status: "in_progress", activeForm: "Fixing bug" },
      ],
    });

@@ -14,15 +18,15 @@ describe("TodoWrite tool", () => {
    expect(result.message).toContain("modified successfully");
  });

-  test("requires id field", async () => {
+  test("requires activeForm field", async () => {
    await expect(
      todo_write({
        todos: [
          // @ts-expect-error - testing invalid input
-          { content: "Missing id", status: "pending" },
+          { content: "Missing activeForm", status: "pending" },
        ],
      }),
-    ).rejects.toThrow(/id string/);
+    ).rejects.toThrow(/activeForm string/);
  });

  test("requires content field", async () => {
@@ -30,7 +34,7 @@ describe("TodoWrite tool", () => {
      todo_write({
        todos: [
          // @ts-expect-error - testing invalid input
-          { id: "1", status: "pending" },
+          { activeForm: "Testing", status: "pending" },
        ],
      }),
    ).rejects.toThrow(/content string/);
@@ -41,7 +45,7 @@ describe("TodoWrite tool", () => {
      todo_write({
        todos: [
          // @ts-expect-error - testing invalid input
-          { id: "1", content: "Test" },
+          { content: "Test", activeForm: "Testing" },
        ],
      }),
    ).rejects.toThrow(/valid status/);
@@ -52,7 +56,7 @@ describe("TodoWrite tool", () => {
      todo_write({
        todos: [
          // @ts-expect-error - testing invalid status
-          { id: "1", content: "Test", status: "invalid" },
+          { content: "Test", status: "invalid", activeForm: "Testing" },
        ],
      }),
    ).rejects.toThrow(/valid status/);
@@ -63,36 +67,4 @@ describe("TodoWrite tool", () => {

    expect(result.message).toBeDefined();
  });
-
-  test("accepts optional priority field", async () => {
-    const result = await todo_write({
-      todos: [
-        {
-          id: "1",
-          content: "High priority task",
-          status: "pending",
-          priority: "high",
-        },
-        {
-          id: "2",
-          content: "Low priority task",
-          status: "pending",
-          priority: "low",
-        },
-      ],
-    });
-
-    expect(result.message).toContain("modified successfully");
-  });
-
-  test("validates priority values", async () => {
-    await expect(
-      todo_write({
-        todos: [
-          // @ts-expect-error - testing invalid priority
-          { id: "1", content: "Test", status: "pending", priority: "urgent" },
-        ],
-      }),
-    ).rejects.toThrow(/priority must be/);
-  });
 });
--- a/src/tests/tools/tool-truncation.test.ts
+++ b/src/tests/tools/tool-truncation.test.ts
@@ -139,6 +139,7 @@ describe("tool truncation integration tests", () => {
        pattern: "match",
        path: testDir,
        output_mode: "content",
+        head_limit: 0, // Disable head_limit to test truncation
      });

      expect(result.output.length).toBeLessThanOrEqual(15000); // 10K + notice
@@ -280,7 +281,7 @@ describe("tool truncation integration tests", () => {
        // Wait a bit for output to accumulate
        await new Promise((resolve) => setTimeout(resolve, 100));

-        const outputResult = await bash_output({ bash_id: bashId });
+        const outputResult = await bash_output({ shell_id: bashId });

        expect(outputResult.message.length).toBeLessThan(35000); // 30K + notice
        if (outputResult.message.length > 30000) {
--- a/src/tools/descriptions/AskUserQuestion.md
+++ b/src/tools/descriptions/AskUserQuestion.md
@@ -0,0 +1,11 @@
+# AskUserQuestion
+
+Use this tool when you need to ask the user questions during execution. This allows you to:
+1. Gather user preferences or requirements
+2. Clarify ambiguous instructions
+3. Get decisions on implementation choices as you work
+4. Offer choices to the user about what direction to take.
+
+Usage notes:
+- Users will always be able to select "Other" to provide custom text input
+- Use multiSelect: true to allow multiple answers to be selected for a question
--- a/src/tools/descriptions/Bash.md
+++ b/src/tools/descriptions/Bash.md
@@ -2,11 +2,13 @@

 Executes a given bash command in a persistent shell session with optional timeout, ensuring proper handling and security measures.

+IMPORTANT: This tool is for terminal operations like git, npm, docker, etc. DO NOT use it for file operations (reading, writing, editing, searching, finding files) - use the specialized tools for this instead.
+
 Before executing the command, please follow these steps:

 1. Directory Verification:
-   - If the command will create new directories or files, first use the LS tool to verify the parent directory exists and is the correct location
-   - For example, before running "mkdir foo/bar", first use LS to check that "foo" exists and is the intended parent directory
+   - If the command will create new directories or files, first use `ls` to verify the parent directory exists and is the correct location
+   - For example, before running "mkdir foo/bar", first use `ls foo` to check that "foo" exists and is the intended parent directory

 2. Command Execution:
   - Always quote file paths that contain spaces with double quotes (e.g., cd "path with spaces/file.txt")
@@ -23,9 +25,20 @@ Usage notes:
  - You can specify an optional timeout in milliseconds (up to 600000ms / 10 minutes). If not specified, commands will timeout after 120000ms (2 minutes).
  - It is very helpful if you write a clear, concise description of what this command does in 5-10 words.
  - If the output exceeds 30000 characters, output will be truncated before being returned to you.
-  - VERY IMPORTANT: You MUST avoid using search commands like `find` and `grep`. Instead use Grep, Glob, or Task to search. You MUST avoid read tools like `cat`, `head`, `tail`, and `ls`, and use Read and LS to read files.
- - If you _still_ need to run `grep`, STOP. ALWAYS USE ripgrep at `rg` first, which all ${PRODUCT_NAME} users have pre-installed.
-  - When issuing multiple commands, use the ';' or '&&' operator to separate them. DO NOT use newlines (newlines are ok in quoted strings).
+  - You can use the `run_in_background` parameter to run the command in the background, which allows you to continue working while the command runs. You can monitor the output using the Bash tool as it becomes available. You do not need to use '&' at the end of the command when using this parameter.
+  
+  - Avoid using Bash with the `find`, `grep`, `cat`, `head`, `tail`, `sed`, `awk`, or `echo` commands, unless explicitly instructed or when these commands are truly necessary for the task. Instead, always prefer using the dedicated tools for these commands:
+    - File search: Use Glob (NOT find or ls)
+    - Content search: Use Grep (NOT grep or rg)
+    - Read files: Use Read (NOT cat/head/tail)
+    - Edit files: Use Edit (NOT sed/awk)
+    - Write files: Use Write (NOT echo >/cat <<EOF)
+    - Communication: Output text directly (NOT echo/printf)
+  - When issuing multiple commands:
+    - If the commands are independent and can run in parallel, make multiple Bash tool calls in a single message. For example, if you need to run "git status" and "git diff", send a single message with two Bash tool calls in parallel.
+    - If the commands depend on each other and must run sequentially, use a single Bash call with '&&' to chain them together (e.g., `git add . && git commit -m "message" && git push`). For instance, if one operation must complete before another starts (like mkdir before cp, Write before Bash for git operations, or git add before git commit), run these operations sequentially instead.
+    - Use ';' only when you need to run commands sequentially but don't care if earlier commands fail
+    - DO NOT use newlines to separate commands (newlines are ok in quoted strings)
  - Try to maintain your current working directory throughout the session by using absolute paths and avoiding usage of `cd`. You may use `cd` if the User explicitly requests it.
    <good-example>
    pytest /foo/bar/tests
@@ -34,33 +47,42 @@ Usage notes:
    cd /foo/bar && pytest tests
    </bad-example>

-
-
-
 # Committing changes with git

-When the user asks you to create a new git commit, follow these steps carefully:
+Only create commits when requested by the user. If unclear, ask first. When the user asks you to create a new git commit, follow these steps carefully:

-1. You have the capability to call multiple tools in a single response. When multiple independent pieces of information are requested, batch your tool calls together for optimal performance. ALWAYS run the following bash commands in parallel, each using the Bash tool:
+Git Safety Protocol:
+- NEVER update the git config
+- NEVER run destructive/irreversible git commands (like push --force, hard reset, etc) unless the user explicitly requests them 
+- NEVER skip hooks (--no-verify, --no-gpg-sign, etc) unless the user explicitly requests it
+- NEVER run force push to main/master, warn the user if they request it
+- Avoid git commit --amend.  ONLY use --amend when either (1) user explicitly requested amend OR (2) adding edits from pre-commit hook (additional instructions below) 
+- Before amending: ALWAYS check authorship (git log -1 --format='%an %ae')
+- NEVER commit changes unless the user explicitly asks you to. It is VERY IMPORTANT to only commit when explicitly asked, otherwise the user will feel that you are being too proactive.
+
+1. You can call multiple tools in a single response. When multiple independent pieces of information are requested and all commands are likely to succeed, run multiple tool calls in parallel for optimal performance. run the following bash commands in parallel, each using the Bash tool:
  - Run a git status command to see all untracked files.
  - Run a git diff command to see both staged and unstaged changes that will be committed.
  - Run a git log command to see recent commit messages, so that you can follow this repository's commit message style.
 2. Analyze all staged changes (both previously staged and newly added) and draft a commit message:
  - Summarize the nature of the changes (eg. new feature, enhancement to an existing feature, bug fix, refactoring, test, docs, etc.). Ensure the message accurately reflects the changes and their purpose (i.e. "add" means a wholly new feature, "update" means an enhancement to an existing feature, "fix" means a bug fix, etc.).
-  - Check for any sensitive information that shouldn't be committed
+  - Do not commit files that likely contain secrets (.env, credentials.json, etc). Warn the user if they specifically request to commit those files
  - Draft a concise (1-2 sentences) commit message that focuses on the "why" rather than the "what"
  - Ensure it accurately reflects the changes and their purpose
-3. You have the capability to call multiple tools in a single response. When multiple independent pieces of information are requested, batch your tool calls together for optimal performance. ALWAYS run the following commands in parallel:
+3. You can call multiple tools in a single response. When multiple independent pieces of information are requested and all commands are likely to succeed, run multiple tool calls in parallel for optimal performance. run the following commands:
   - Add relevant untracked files to the staging area.
   - Create the commit with a message ending with:
   👾 Generated with [Letta Code](https://letta.com)

   Co-Authored-By: Letta <noreply@letta.com>
-   - Run git status to make sure the commit succeeded.
-4. If the commit fails due to pre-commit hook changes, retry the commit ONCE to include these automated changes. If it fails again, it usually means a pre-commit hook is preventing the commit. If the commit succeeds but you notice that files were modified by the pre-commit hook, you MUST amend your commit to include them.
+   - Run git status after the commit completes to verify success.
+   Note: git status depends on the commit completing, so run it sequentially after the commit.
+4. If the commit fails due to pre-commit hook changes, retry ONCE. If it succeeds but files were modified by the hook, verify it's safe to amend:
+   - Check authorship: git log -1 --format='%an %ae'
+   - Check not pushed: git status shows "Your branch is ahead"
+   - If both true: amend your commit. Otherwise: create NEW commit (never amend other developers' commits)

 Important notes:
- NEVER update the git config
 - NEVER run additional commands to read or explore code, besides git bash commands
 - NEVER use the TodoWrite or Task tools
 - DO NOT push to the remote repository unless the user explicitly asks you to do so
@@ -83,13 +105,13 @@ Use the gh command via the Bash tool for ALL GitHub-related tasks including work

 IMPORTANT: When the user asks you to create a pull request, follow these steps carefully:

-1. You have the capability to call multiple tools in a single response. When multiple independent pieces of information are requested, batch your tool calls together for optimal performance. ALWAYS run the following bash commands in parallel using the Bash tool, in order to understand the current state of the branch since it diverged from the main branch:
+1. You can call multiple tools in a single response. When multiple independent pieces of information are requested and all commands are likely to succeed, run multiple tool calls in parallel for optimal performance. run the following bash commands in parallel using the Bash tool, in order to understand the current state of the branch since it diverged from the main branch:
   - Run a git status command to see all untracked files
   - Run a git diff command to see both staged and unstaged changes that will be committed
   - Check if the current branch tracks a remote branch and is up to date with the remote, so you know if you need to push to the remote
   - Run a git log command and `git diff [base-branch]...HEAD` to understand the full commit history for the current branch (from the time it diverged from the base branch)
 2. Analyze all changes that will be included in the pull request, making sure to look at all relevant commits (NOT just the latest commit, but ALL commits that will be included in the pull request!!!), and draft a pull request summary
-3. You have the capability to call multiple tools in a single response. When multiple independent pieces of information are requested, batch your tool calls together for optimal performance. ALWAYS run the following commands in parallel:
+3. You can call multiple tools in a single response. When multiple independent pieces of information are requested and all commands are likely to succeed, run multiple tool calls in parallel for optimal performance. run the following commands in parallel:
   - Create new branch if needed
   - Push to remote with -u flag if needed
   - Create PR using gh pr create with the format below. Use a HEREDOC to pass the body to ensure correct formatting.
@@ -99,7 +121,7 @@ gh pr create --title "the pr title" --body "$(cat <<'EOF'
 <1-3 bullet points>

 ## Test plan
-[Checklist of TODOs for testing the pull request...]
+[Bulleted markdown checklist of TODOs for testing the pull request...]

 👾 Generated with [Letta Code](https://letta.com)
 EOF
@@ -107,9 +129,8 @@ EOF
 </example>

 Important:
- NEVER update the git config
 - DO NOT use the TodoWrite or Task tools
 - Return the PR URL when you're done, so the user can see it

 # Other common operations
- View comments on a Github PR: gh api repos/foo/bar/pulls/123/comments
+- View comments on a Github PR: gh api repos/foo/bar/pulls/123/comments
--- a/src/tools/descriptions/BashOutput.md
+++ b/src/tools/descriptions/BashOutput.md
@@ -1,10 +1,10 @@
 # BashOutput

 - Retrieves output from a running or completed background bash shell
- Takes a bash_id parameter identifying the shell
+- Takes a shell_id parameter identifying the shell
 - Always returns only new output since the last check
 - Returns stdout and stderr output along with shell status
 - Supports optional regex filtering to show only lines matching a pattern
 - Use this tool when you need to monitor or check the output of a long-running shell
 - Shell IDs can be found using the /bashes command
- If the accumulated output exceeds 30,000 characters, it will be truncated before being returned to you
+- If the accumulated output exceeds 30,000 characters, it will be truncated before being returned to you
--- a/src/tools/descriptions/EnterPlanMode.md
+++ b/src/tools/descriptions/EnterPlanMode.md
@@ -0,0 +1,75 @@
+# EnterPlanMode
+
+Use this tool when you encounter a complex task that requires careful planning and exploration before implementation. This tool transitions you into plan mode where you can thoroughly explore the codebase and design an implementation approach.
+
+## When to Use This Tool
+
+Use EnterPlanMode when ANY of these conditions apply:
+
+1. **Multiple Valid Approaches**: The task can be solved in several different ways, each with trade-offs
+   - Example: "Add caching to the API" - could use Redis, in-memory, file-based, etc.
+   - Example: "Improve performance" - many optimization strategies possible
+
+2. **Significant Architectural Decisions**: The task requires choosing between architectural patterns
+   - Example: "Add real-time updates" - WebSockets vs SSE vs polling
+   - Example: "Implement state management" - Redux vs Context vs custom solution
+
+3. **Large-Scale Changes**: The task touches many files or systems
+   - Example: "Refactor the authentication system"
+   - Example: "Migrate from REST to GraphQL"
+
+4. **Unclear Requirements**: You need to explore before understanding the full scope
+   - Example: "Make the app faster" - need to profile and identify bottlenecks
+   - Example: "Fix the bug in checkout" - need to investigate root cause
+
+5. **User Input Needed**: You'll need to ask clarifying questions before starting
+   - If you would use AskUserQuestion to clarify the approach, consider EnterPlanMode instead
+   - Plan mode lets you explore first, then present options with context
+
+## When NOT to Use This Tool
+
+Do NOT use EnterPlanMode for:
+- Simple, straightforward tasks with obvious implementation
+- Small bug fixes where the solution is clear
+- Adding a single function or small feature
+- Tasks you're already confident how to implement
+- Research-only tasks (use the Task tool with explore agent instead)
+
+## What Happens in Plan Mode
+
+In plan mode, you'll:
+1. Thoroughly explore the codebase using Glob, Grep, and Read tools
+2. Understand existing patterns and architecture
+3. Design an implementation approach
+4. Present your plan to the user for approval
+5. Use AskUserQuestion if you need to clarify approaches
+6. Exit plan mode with ExitPlanMode when ready to implement
+
+## Examples
+
+### GOOD - Use EnterPlanMode:
+User: "Add user authentication to the app"
+- This requires architectural decisions (session vs JWT, where to store tokens, middleware structure)
+
+User: "Optimize the database queries"
+- Multiple approaches possible, need to profile first, significant impact
+
+User: "Implement dark mode"
+- Architectural decision on theme system, affects many components
+
+### BAD - Don't use EnterPlanMode:
+User: "Fix the typo in the README"
+- Straightforward, no planning needed
+
+User: "Add a console.log to debug this function"
+- Simple, obvious implementation
+
+User: "What files handle routing?"
+- Research task, not implementation planning
+
+## Important Notes
+
+- This tool REQUIRES user approval - they must consent to entering plan mode
+- Be thoughtful about when to use it - unnecessary plan mode slows down simple tasks
+- If unsure whether to use it, err on the side of starting implementation
+- You can always ask the user "Would you like me to plan this out first?"
--- a/src/tools/descriptions/ExitPlanMode.md
+++ b/src/tools/descriptions/ExitPlanMode.md
@@ -1,4 +1,14 @@
-Use this tool when you are in plan mode and have finished presenting your plan and are ready to code. This will prompt the user to exit plan mode.
+# ExitPlanMode
+
+Use this tool when you are in plan mode and have finished writing your plan to the plan file and are ready for user approval.
+
+## How This Tool Works
+- You should have already written your plan to the plan file specified in the plan mode system message
+- This tool does NOT take the plan content as a parameter - it will read the plan from the file you wrote
+- This tool simply signals that you're done planning and ready for the user to review and approve
+- The user will see the contents of your plan file when they review it
+
+## When to Use This Tool
 IMPORTANT: Only use this tool when the task requires planning the implementation steps of a task that requires writing code. For research tasks where you're gathering information, searching files, reading files or in general trying to understand the codebase - do NOT use this tool.

 ## Handling Ambiguity in Plans
@@ -6,8 +16,8 @@ Before using this tool, ensure your plan is clear and unambiguous. If there are
 1. Use the AskUserQuestion tool to clarify with the user
 2. Ask about specific implementation choices (e.g., architectural patterns, which library to use)
 3. Clarify any assumptions that could affect the implementation
-4. Only proceed with ExitPlanMode after resolving ambiguities
-
+4. Edit your plan file to incorporate user feedback
+5. Only proceed with ExitPlanMode after resolving ambiguities and updating the plan file

 ## Examples

--- a/src/tools/descriptions/Read.md
+++ b/src/tools/descriptions/Read.md
@@ -9,8 +9,6 @@ Usage:
 - You can optionally specify a line offset and limit (especially handy for long files), but it's recommended to read the whole file by not providing these parameters
 - Any lines longer than 2000 characters will be truncated
 - Results are returned using cat -n format, with line numbers starting at 1
- This tool allows Claude Code to read images (eg PNG, JPG, etc). When reading an image file the contents are presented visually as Claude Code is a multimodal LLM.
- For Jupyter notebooks (.ipynb files), use the NotebookRead instead
- You have the capability to call multiple tools in a single response. It is always better to speculatively read multiple files as a batch that are potentially useful. 
- You will regularly be asked to read screenshots. If the user provides a path to a screenshot ALWAYS use this tool to view the file at the path. This tool will work with all temporary file paths like /var/folders/123/abc/T/TemporaryItems/NSIRD_screencaptureui_ZfB1tD/Screenshot.png
- If you read a file that exists but has empty contents you will receive a system reminder warning in place of file contents.
+- This tool can only read files, not directories. To read a directory, use the ls command via Bash.
+- You can call multiple tools in a single response. It is always better to speculatively read multiple potentially useful files in parallel.
+- If you read a file that exists but has empty contents you will receive a system reminder warning in place of file contents.
--- a/src/tools/descriptions/TodoWrite.md
+++ b/src/tools/descriptions/TodoWrite.md
@@ -30,11 +30,11 @@ NOTE that you should not use this tool if there is only one trivial task to do.
 User: I want to add a dark mode toggle to the application settings. Make sure you run the tests and build when you're done!
 Assistant: I'll help add a dark mode toggle to your application settings. Let me create a todo list to track this implementation.
 *Creates todo list with the following items:*
-1. Create dark mode toggle component in Settings page
-2. Add dark mode state management (context/store)
-3. Implement CSS-in-JS styles for dark theme
-4. Update existing components to support theme switching
-5. Run tests and build process, addressing any failures or errors that occur
+1. Creating dark mode toggle component in Settings page
+2. Adding dark mode state management (context/store)
+3. Implementing CSS-in-JS styles for dark theme
+4. Updating existing components to support theme switching
+5. Running tests and build process, addressing any failures or errors that occur
 *Begins working on the first task*

 <reasoning>
@@ -81,7 +81,7 @@ User: Can you help optimize my React application? It's rendering slowly and has
 Assistant: I'll help optimize your React application. First, let me examine your codebase to identify potential performance bottlenecks.
 *Reviews component structure, render patterns, state management, and data fetching*
 Assistant: After analyzing your codebase, I've identified several performance issues. Let me create a todo list to track our optimization efforts.
-*Creates todo list with items like: 1) Implement memoization for expensive calculations in ProductList, 2) Add virtualization for long lists in Dashboard, 3) Optimize image loading in Gallery component, 4) Fix state update loops in ShoppingCart, 5) Review bundle size and implement code splitting*
+*Creates todo list with items like: 1) Implementing memoization for expensive calculations in ProductList, 2) Adding virtualization for long lists in Dashboard, 3) Optimizing image loading in Gallery component, 4) Fixing state update loops in ShoppingCart, 5) Reviewing bundle size and implementing code splitting*
 Let's start by implementing memoization for the expensive calculations in your ProductList component.</assistant>

 <reasoning>
@@ -152,10 +152,14 @@ The assistant did not use the todo list because this is a single command executi
   - in_progress: Currently working on (limit to ONE task at a time)
   - completed: Task finished successfully

+   **IMPORTANT**: Task descriptions must have two forms:
+   - content: The imperative form describing what needs to be done (e.g., "Run tests", "Build the project")
+   - activeForm: The present continuous form shown during execution (e.g., "Running tests", "Building the project")
+
 2. **Task Management**:
   - Update task status in real-time as you work
   - Mark tasks complete IMMEDIATELY after finishing (don't batch completions)
-   - Only have ONE task in_progress at any time
+   - Exactly ONE task must be in_progress at any time (not less, not more)
   - Complete current tasks before starting new ones
   - Remove tasks that are no longer relevant from the list entirely

@@ -173,5 +177,8 @@ The assistant did not use the todo list because this is a single command executi
   - Create specific, actionable items
   - Break complex tasks into smaller, manageable steps
   - Use clear, descriptive task names
+   - Always provide both forms:
+     - content: "Fix authentication bug"
+     - activeForm: "Fixing authentication bug"

 When in doubt, use this tool. Being proactive with task management demonstrates attentiveness and ensures you complete all requirements successfully.
--- a/src/tools/impl/AskUserQuestion.ts
+++ b/src/tools/impl/AskUserQuestion.ts
@@ -0,0 +1,80 @@
+import { validateRequiredParams } from "./validation.js";
+
+interface QuestionOption {
+  label: string;
+  description: string;
+}
+
+interface Question {
+  question: string;
+  header: string;
+  options: QuestionOption[];
+  multiSelect: boolean;
+}
+
+interface AskUserQuestionArgs {
+  questions: Question[];
+  answers?: Record<string, string>;
+}
+
+interface AskUserQuestionResult {
+  message: string;
+}
+
+export async function ask_user_question(
+  args: AskUserQuestionArgs,
+): Promise<AskUserQuestionResult> {
+  validateRequiredParams(args, ["questions"], "AskUserQuestion");
+
+  if (!Array.isArray(args.questions) || args.questions.length === 0) {
+    throw new Error("questions must be a non-empty array");
+  }
+
+  if (args.questions.length > 4) {
+    throw new Error("Maximum of 4 questions allowed");
+  }
+
+  for (const q of args.questions) {
+    if (!q.question || typeof q.question !== "string") {
+      throw new Error("Each question must have a question string");
+    }
+    if (!q.header || typeof q.header !== "string") {
+      throw new Error("Each question must have a header string");
+    }
+    if (
+      !Array.isArray(q.options) ||
+      q.options.length < 2 ||
+      q.options.length > 4
+    ) {
+      throw new Error("Each question must have 2-4 options");
+    }
+    if (typeof q.multiSelect !== "boolean") {
+      throw new Error("Each question must have a multiSelect boolean");
+    }
+    for (const opt of q.options) {
+      if (!opt.label || typeof opt.label !== "string") {
+        throw new Error("Each option must have a label string");
+      }
+      if (!opt.description || typeof opt.description !== "string") {
+        throw new Error("Each option must have a description string");
+      }
+    }
+  }
+
+  // If answers are provided (filled in by UI layer), format the response
+  if (args.answers && Object.keys(args.answers).length > 0) {
+    const answerParts = args.questions.map((q) => {
+      const answer = args.answers?.[q.question] || "";
+      return `"${q.question}"="${answer}"`;
+    });
+    return {
+      message: `User has answered your questions: ${answerParts.join(", ")}. You can now continue with the user's answers in mind.`,
+    };
+  }
+
+  // Otherwise, return a placeholder - the UI layer should intercept this tool call
+  // and show the question UI before returning the actual response
+  return {
+    message: "Waiting for user response...",
+  };
+}
--- a/src/tools/impl/BashOutput.ts
+++ b/src/tools/impl/BashOutput.ts
@@ -3,7 +3,7 @@ import { LIMITS, truncateByChars } from "./truncation.js";
 import { validateRequiredParams } from "./validation.js";

 interface BashOutputArgs {
-  bash_id: string;
+  shell_id: string;
  filter?: string;
 }
 interface BashOutputResult {
@@ -13,11 +13,11 @@ interface BashOutputResult {
 export async function bash_output(
  args: BashOutputArgs,
 ): Promise<BashOutputResult> {
-  validateRequiredParams(args, ["bash_id"], "BashOutput");
-  const { bash_id, filter } = args;
-  const proc = backgroundProcesses.get(bash_id);
+  validateRequiredParams(args, ["shell_id"], "BashOutput");
+  const { shell_id, filter } = args;
+  const proc = backgroundProcesses.get(shell_id);
  if (!proc)
-    return { message: `No background process found with ID: ${bash_id}` };
+    return { message: `No background process found with ID: ${shell_id}` };
  const stdout = proc.stdout.join("\n");
  const stderr = proc.stderr.join("\n");
  let text = stdout;
--- a/src/tools/impl/EnterPlanMode.ts
+++ b/src/tools/impl/EnterPlanMode.ts
@@ -0,0 +1,32 @@
+interface EnterPlanModeArgs {
+  [key: string]: never;
+}
+
+interface EnterPlanModeResult {
+  message: string;
+}
+
+export async function enter_plan_mode(
+  _args: EnterPlanModeArgs,
+): Promise<EnterPlanModeResult> {
+  // This is handled by the UI layer which will:
+  // 1. Show approval dialog
+  // 2. On approve: toggle plan mode on, generate plan file path, inject system reminder
+  // 3. On reject: send rejection, agent proceeds without plan mode
+  //
+  // The message below is returned on successful entry into plan mode.
+  // The UI harness will also inject a <system-reminder> with the plan file path.
+  return {
+    message: `Entered plan mode. You should now focus on exploring the codebase and designing an implementation approach.
+
+In plan mode, you should:
+1. Thoroughly explore the codebase to understand existing patterns
+2. Identify similar features and architectural approaches
+3. Consider multiple approaches and their trade-offs
+4. Use AskUserQuestion if you need to clarify the approach
+5. Design a concrete implementation strategy
+6. When ready, use ExitPlanMode to present your plan for approval
+
+Remember: DO NOT write or edit any files yet. This is a read-only exploration and planning phase.`,
+  };
+}
--- a/src/tools/impl/ExitPlanMode.ts
+++ b/src/tools/impl/ExitPlanMode.ts
@@ -1,22 +1,11 @@
 /**
 * ExitPlanMode tool implementation
- * Exits plan mode by presenting the plan to the user for approval
+ * Exits plan mode - the plan is read from the plan file by the UI
 */

-import { validateRequiredParams } from "./validation.js";
-
-interface ExitPlanModeArgs {
-  plan: string;
-}
-
-export async function exit_plan_mode(
-  args: ExitPlanModeArgs,
-): Promise<{ message: string }> {
-  validateRequiredParams(args, ["plan"], "ExitPlanMode");
-  const { plan: _plan } = args;
-
+export async function exit_plan_mode(): Promise<{ message: string }> {
  // Return confirmation message that plan was approved
-  // Note: The plan itself should be displayed by the UI/system before this return is shown
+  // Note: The plan is read from the plan file by the UI before this return is shown
  return {
    message:
      "User has approved your plan. You can now start coding.\nStart with updating your todo list if applicable",
--- a/src/tools/impl/Grep.ts
+++ b/src/tools/impl/Grep.ts
@@ -21,6 +21,18 @@ function getRipgrepPath(): string {

 const rgPath = getRipgrepPath();

+function applyOffsetAndLimit<T>(
+  items: T[],
+  offset: number,
+  limit: number,
+): T[] {
+  const sliced = items.slice(offset);
+  if (limit > 0) {
+    return sliced.slice(0, limit);
+  }
+  return sliced; // 0 = unlimited
+}
+
 export interface GrepArgs {
  pattern: string;
  path?: string;
@@ -32,6 +44,8 @@ export interface GrepArgs {
  "-n"?: boolean;
  "-i"?: boolean;
  type?: string;
+  head_limit?: number;
+  offset?: number;
  multiline?: boolean;
 }

@@ -51,9 +65,11 @@ export async function grep(args: GrepArgs): Promise<GrepResult> {
    "-B": before,
    "-A": after,
    "-C": context,
-    "-n": lineNumbers,
+    "-n": lineNumbers = true,
    "-i": ignoreCase,
    type: fileType,
+    head_limit = 100,
+    offset = 0,
    multiline,
  } = args;

@@ -88,12 +104,14 @@ export async function grep(args: GrepArgs): Promise<GrepResult> {
      cwd: userCwd,
    });
    if (output_mode === "files_with_matches") {
-      const files = stdout.trim().split("\n").filter(Boolean);
+      const allFiles = stdout.trim().split("\n").filter(Boolean);
+      const files = applyOffsetAndLimit(allFiles, offset, head_limit);
      const fileCount = files.length;
-      if (fileCount === 0) return { output: "No files found", files: 0 };
+      const totalCount = allFiles.length;
+      if (totalCount === 0) return { output: "No files found", files: 0 };

      const fileList = files.join("\n");
-      const fullOutput = `Found ${fileCount} file${fileCount !== 1 ? "s" : ""}\n${fileList}`;
+      const fullOutput = `Found ${totalCount} file${totalCount !== 1 ? "s" : ""}${fileCount < totalCount ? ` (showing ${fileCount})` : ""}\n${fileList}`;

      // Apply character limit to prevent large file lists
      const { content: truncatedOutput } = truncateByChars(
@@ -104,13 +122,14 @@ export async function grep(args: GrepArgs): Promise<GrepResult> {

      return {
        output: truncatedOutput,
-        files: fileCount,
+        files: totalCount,
      };
    } else if (output_mode === "count") {
-      const lines = stdout.trim().split("\n").filter(Boolean);
+      const allLines = stdout.trim().split("\n").filter(Boolean);
+      const lines = applyOffsetAndLimit(allLines, offset, head_limit);
      let totalMatches = 0;
      let filesWithMatches = 0;
-      for (const line of lines) {
+      for (const line of allLines) {
        const parts = line.split(":");
        if (parts.length >= 2) {
          const lastPart = parts[parts.length - 1];
@@ -138,16 +157,20 @@ export async function grep(args: GrepArgs): Promise<GrepResult> {
      if (!stdout || stdout.trim() === "")
        return { output: "No matches found", matches: 0 };

+      const allLines = stdout.split("\n");
+      const lines = applyOffsetAndLimit(allLines, offset, head_limit);
+      const content = lines.join("\n");
+
      // Apply character limit to content output
      const { content: truncatedOutput } = truncateByChars(
-        stdout,
+        content,
        LIMITS.GREP_OUTPUT_CHARS,
        "Grep",
      );

      return {
        output: truncatedOutput,
-        matches: stdout.split("\n").filter(Boolean).length,
+        matches: allLines.filter(Boolean).length,
      };
    }
  } catch (error) {
--- a/src/tools/impl/Read.ts
+++ b/src/tools/impl/Read.ts
@@ -133,6 +133,11 @@ export async function read(args: ReadArgs): Promise<ReadResult> {
    if (await isBinaryFile(file_path))
      throw new Error(`Cannot read binary file: ${file_path}`);
    const content = await fs.readFile(file_path, "utf-8");
+    if (content.trim() === "") {
+      return {
+        content: `<system-reminder>\nThe file ${file_path} exists but has empty contents.\n</system-reminder>`,
+      };
+    }
    const formattedContent = formatWithLineNumbers(content, offset, limit);
    return { content: formattedContent };
  } catch (error) {
--- a/src/tools/impl/TodoWrite.ts
+++ b/src/tools/impl/TodoWrite.ts
@@ -3,8 +3,7 @@ import { validateRequiredParams } from "./validation.js";
 interface TodoItem {
  content: string;
  status: "pending" | "in_progress" | "completed";
-  id: string;
-  priority?: "high" | "medium" | "low";
+  activeForm: string;
 }
 interface TodoWriteArgs {
  todos: TodoItem[];
@@ -29,10 +28,8 @@ export async function todo_write(
      throw new Error(
        "Each todo must have a valid status (pending, in_progress, or completed)",
      );
-    if (!todo.id || typeof todo.id !== "string")
-      throw new Error("Each todo must have an id string");
-    if (todo.priority && !["high", "medium", "low"].includes(todo.priority))
-      throw new Error("If provided, priority must be high, medium, or low");
+    if (!todo.activeForm || typeof todo.activeForm !== "string")
+      throw new Error("Each todo must have an activeForm string");
  }
  return {
    message:
--- a/src/tools/manager.ts
+++ b/src/tools/manager.ts
@@ -47,19 +47,21 @@ export function getInternalToolName(serverName: string): string {
 }

 export const ANTHROPIC_DEFAULT_TOOLS: ToolName[] = [
+  "AskUserQuestion",
  "Bash",
  "BashOutput",
  "Edit",
+  "EnterPlanMode",
  "ExitPlanMode",
  "Glob",
  "Grep",
  "KillBash",
-  "LS",
-  "MultiEdit",
+  // "MultiEdit",
+  // "LS",
  "Read",
-  "Skill",
  "TodoWrite",
  "Write",
+  "Skill",
 ];

 export const OPENAI_DEFAULT_TOOLS: ToolName[] = [
@@ -113,9 +115,11 @@ export const GEMINI_PASCAL_TOOLS: ToolName[] = [

 // Tool permissions configuration
 const TOOL_PERMISSIONS: Record<ToolName, { requiresApproval: boolean }> = {
+  AskUserQuestion: { requiresApproval: true },
  Bash: { requiresApproval: true },
  BashOutput: { requiresApproval: false },
  Edit: { requiresApproval: true },
+  EnterPlanMode: { requiresApproval: true },
  ExitPlanMode: { requiresApproval: false },
  Glob: { requiresApproval: false },
  Grep: { requiresApproval: false },
--- a/src/tools/schemas/AskUserQuestion.json
+++ b/src/tools/schemas/AskUserQuestion.json
@@ -0,0 +1,61 @@
+{
+  "type": "object",
+  "properties": {
+    "questions": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "question": {
+            "type": "string",
+            "description": "The complete question to ask the user. Should be clear, specific, and end with a question mark. Example: \"Which library should we use for date formatting?\" If multiSelect is true, phrase it accordingly, e.g. \"Which features do you want to enable?\""
+          },
+          "header": {
+            "type": "string",
+            "description": "Very short label displayed as a chip/tag (max 12 chars). Examples: \"Auth method\", \"Library\", \"Approach\"."
+          },
+          "options": {
+            "type": "array",
+            "items": {
+              "type": "object",
+              "properties": {
+                "label": {
+                  "type": "string",
+                  "description": "The display text for this option that the user will see and select. Should be concise (1-5 words) and clearly describe the choice."
+                },
+                "description": {
+                  "type": "string",
+                  "description": "Explanation of what this option means or what will happen if chosen. Useful for providing context about trade-offs or implications."
+                }
+              },
+              "required": ["label", "description"],
+              "additionalProperties": false
+            },
+            "minItems": 2,
+            "maxItems": 4,
+            "description": "The available choices for this question. Must have 2-4 options. Each option should be a distinct, mutually exclusive choice (unless multiSelect is enabled). There should be no 'Other' option, that will be provided automatically."
+          },
+          "multiSelect": {
+            "type": "boolean",
+            "description": "Set to true to allow the user to select multiple options instead of just one. Use when choices are not mutually exclusive."
+          }
+        },
+        "required": ["question", "header", "options", "multiSelect"],
+        "additionalProperties": false
+      },
+      "minItems": 1,
+      "maxItems": 4,
+      "description": "Questions to ask the user (1-4 questions)"
+    },
+    "answers": {
+      "type": "object",
+      "additionalProperties": {
+        "type": "string"
+      },
+      "description": "User answers collected by the permission component"
+    }
+  },
+  "required": ["questions"],
+  "additionalProperties": false,
+  "$schema": "http://json-schema.org/draft-07/schema#"
+}
--- a/src/tools/schemas/Bash.json
+++ b/src/tools/schemas/Bash.json
@@ -11,7 +11,7 @@
    },
    "description": {
      "type": "string",
-      "description": " Clear, concise description of what this command does in 5-10 words. Examples:\nInput: ls\nOutput: Lists files in current directory\n\nInput: git status\nOutput: Shows working tree status\n\nInput: npm install\nOutput: Installs package dependencies\n\nInput: mkdir foo\nOutput: Creates directory 'foo'"
+      "description": "Clear, concise description of what this command does in 5-10 words, in active voice. Examples:\nInput: ls\nOutput: List files in current directory\n\nInput: git status\nOutput: Show working tree status\n\nInput: npm install\nOutput: Install package dependencies\n\nInput: mkdir foo\nOutput: Create directory 'foo'"
    },
    "run_in_background": {
      "type": "boolean",
--- a/src/tools/schemas/BashOutput.json
+++ b/src/tools/schemas/BashOutput.json
@@ -1,7 +1,7 @@
 {
  "type": "object",
  "properties": {
-    "bash_id": {
+    "shell_id": {
      "type": "string",
      "description": "The ID of the background shell to retrieve output from"
    },
@@ -10,7 +10,7 @@
      "description": "Optional regular expression to filter the output lines. Only lines matching this regex will be included in the result. Any lines that do not match will no longer be available to read."
    }
  },
-  "required": ["bash_id"],
+  "required": ["shell_id"],
  "additionalProperties": false,
  "$schema": "http://json-schema.org/draft-07/schema#"
 }
--- a/src/tools/schemas/EnterPlanMode.json
+++ b/src/tools/schemas/EnterPlanMode.json
@@ -0,0 +1,6 @@
+{
+  "type": "object",
+  "properties": {},
+  "additionalProperties": false,
+  "$schema": "http://json-schema.org/draft-07/schema#"
+}
--- a/src/tools/schemas/ExitPlanMode.json
+++ b/src/tools/schemas/ExitPlanMode.json
@@ -1,11 +1,6 @@
 {
-  "$schema": "http://json-schema.org/draft-07/schema#",
  "type": "object",
-  "properties": {
-    "plan": {
-      "type": "string"
-    }
-  },
-  "required": ["plan"],
-  "additionalProperties": false
+  "properties": {},
+  "additionalProperties": true,
+  "$schema": "http://json-schema.org/draft-07/schema#"
 }
--- a/src/tools/schemas/Grep.json
+++ b/src/tools/schemas/Grep.json
@@ -32,7 +32,7 @@
    },
    "-n": {
      "type": "boolean",
-      "description": "Show line numbers in output (rg -n). Requires output_mode: \"content\", ignored otherwise."
+      "description": "Show line numbers in output (rg -n). Requires output_mode: \"content\", ignored otherwise. Defaults to true."
    },
    "-i": {
      "type": "boolean",
@@ -44,7 +44,11 @@
    },
    "head_limit": {
      "type": "number",
-      "description": "Limit output to first N lines/entries, equivalent to \"| head -N\". Works across all output modes: content (limits output lines), files_with_matches (limits file paths), count (limits count entries). When unspecified, shows all results from ripgrep."
+      "description": "Limit output to first N lines/entries, equivalent to \"| head -N\". Works across all output modes: content (limits output lines), files_with_matches (limits file paths), count (limits count entries). Defaults to 100 (0 = unlimited)."
+    },
+    "offset": {
+      "type": "number",
+      "description": "Skip first N lines/entries before applying head_limit, equivalent to \"| tail -n +N | head -N\". Works across all output modes. Defaults to 0."
    },
    "multiline": {
      "type": "boolean",
--- a/src/tools/schemas/TodoWrite.json
+++ b/src/tools/schemas/TodoWrite.json
@@ -14,15 +14,12 @@
            "type": "string",
            "enum": ["pending", "in_progress", "completed"]
          },
-          "priority": {
+          "activeForm": {
            "type": "string",
-            "enum": ["high", "medium", "low"]
-          },
-          "id": {
-            "type": "string"
+            "minLength": 1
          }
        },
-        "required": ["content", "status", "id"],
+        "required": ["content", "status", "activeForm"],
        "additionalProperties": false
      },
      "description": "The updated todo list"
--- a/src/tools/toolDefinitions.ts
+++ b/src/tools/toolDefinitions.ts
@@ -1,7 +1,9 @@
 import ApplyPatchDescription from "./descriptions/ApplyPatch.md";
+import AskUserQuestionDescription from "./descriptions/AskUserQuestion.md";
 import BashDescription from "./descriptions/Bash.md";
 import BashOutputDescription from "./descriptions/BashOutput.md";
 import EditDescription from "./descriptions/Edit.md";
+import EnterPlanModeDescription from "./descriptions/EnterPlanMode.md";
 import ExitPlanModeDescription from "./descriptions/ExitPlanMode.md";
 import GlobDescription from "./descriptions/Glob.md";
 // Gemini toolset
@@ -29,9 +31,11 @@ import WriteDescription from "./descriptions/Write.md";
 import WriteFileGeminiDescription from "./descriptions/WriteFileGemini.md";
 import WriteTodosGeminiDescription from "./descriptions/WriteTodosGemini.md";
 import { apply_patch } from "./impl/ApplyPatch";
+import { ask_user_question } from "./impl/AskUserQuestion";
 import { bash } from "./impl/Bash";
 import { bash_output } from "./impl/BashOutput";
 import { edit } from "./impl/Edit";
+import { enter_plan_mode } from "./impl/EnterPlanMode";
 import { exit_plan_mode } from "./impl/ExitPlanMode";
 import { glob } from "./impl/Glob";
 // Gemini toolset
@@ -59,9 +63,11 @@ import { write } from "./impl/Write";
 import { write_file_gemini } from "./impl/WriteFileGemini";
 import { write_todos } from "./impl/WriteTodosGemini";
 import ApplyPatchSchema from "./schemas/ApplyPatch.json";
+import AskUserQuestionSchema from "./schemas/AskUserQuestion.json";
 import BashSchema from "./schemas/Bash.json";
 import BashOutputSchema from "./schemas/BashOutput.json";
 import EditSchema from "./schemas/Edit.json";
+import EnterPlanModeSchema from "./schemas/EnterPlanMode.json";
 import ExitPlanModeSchema from "./schemas/ExitPlanMode.json";
 import GlobSchema from "./schemas/Glob.json";
 // Gemini toolset
@@ -98,6 +104,11 @@ interface ToolAssets {
 }

 const toolDefinitions = {
+  AskUserQuestion: {
+    schema: AskUserQuestionSchema,
+    description: AskUserQuestionDescription.trim(),
+    impl: ask_user_question as unknown as ToolImplementation,
+  },
  Bash: {
    schema: BashSchema,
    description: BashDescription.trim(),
@@ -113,6 +124,11 @@ const toolDefinitions = {
    description: EditDescription.trim(),
    impl: edit as unknown as ToolImplementation,
  },
+  EnterPlanMode: {
+    schema: EnterPlanModeSchema,
+    description: EnterPlanModeDescription.trim(),
+    impl: enter_plan_mode as unknown as ToolImplementation,
+  },
  ExitPlanMode: {
    schema: ExitPlanModeSchema,
    description: ExitPlanModeDescription.trim(),