fix: align headless interactive tool behavior with bidirectional parity (#894)

2026-02-10 15:20:37 -08:00
parent bfd9a1ec56
commit b0783ef195
8 changed files with 178 additions and 37 deletions
--- a/src/cli/components/SlashCommandAutocomplete.tsx
+++ b/src/cli/components/SlashCommandAutocomplete.tsx
@@ -161,9 +161,16 @@ export function SlashCommandAutocomplete({

  // Manually manage active state to include the "no matches" case
  useLayoutEffect(() => {
-    const isActive = matches.length > 0;
+    const queryLength = queryInfo?.query.length ?? 0;
+    const isActive =
+      !hideAutocomplete && (matches.length > 0 || queryLength > 0);
    onActiveChange?.(isActive);
-  }, [matches.length, showNoMatches, onActiveChange]);
+  }, [
+    hideAutocomplete,
+    matches.length,
+    onActiveChange,
+    queryInfo?.query.length,
+  ]);

  // Don't show if input doesn't start with "/"
  if (!currentInput.startsWith("/")) {
--- a/src/cli/helpers/toolNameMapping.ts
+++ b/src/cli/helpers/toolNameMapping.ts
@@ -3,6 +3,8 @@
 * Centralizes tool name remapping logic used across the UI.
 */

+import { isInteractiveApprovalTool } from "../../tools/interactivePolicy";
+
 /**
 * Maps internal tool names to user-friendly display names.
 * Handles multiple tool naming conventions:
@@ -132,11 +134,7 @@ export function isFancyUITool(name: string): boolean {
 * Other tools (bash, file edits) should respect yolo mode and auto-approve.
 */
 export function alwaysRequiresUserInput(name: string): boolean {
-  return (
-    name === "AskUserQuestion" ||
-    name === "EnterPlanMode" ||
-    name === "ExitPlanMode"
-  );
+  return isInteractiveApprovalTool(name);
 }

 /**
--- a/src/headless.ts
+++ b/src/headless.ts
@@ -40,6 +40,10 @@ import {
 } from "./cli/helpers/stream";
 import { SYSTEM_REMINDER_CLOSE, SYSTEM_REMINDER_OPEN } from "./constants";
 import { settingsManager } from "./settings-manager";
+import {
+  isHeadlessAutoAllowTool,
+  isInteractiveApprovalTool,
+} from "./tools/interactivePolicy";
 import {
  type ExternalToolDefinition,
  registerExternalTools,
@@ -857,6 +861,13 @@ export async function handleHeadlessCommand(
    process.exit(1);
  }

+  const { getClientToolsFromRegistry } = await import("./tools/manager");
+  const loadedToolNames = getClientToolsFromRegistry().map((t) => t.name);
+  const availableTools =
+    loadedToolNames.length > 0
+      ? loadedToolNames
+      : agent.tools?.map((t) => t.name).filter((n): n is string => !!n) || [];
+
  // If input-format is stream-json, use bidirectional mode
  if (isBidirectionalMode) {
    await runBidirectionalMode(
@@ -865,6 +876,7 @@ export async function handleHeadlessCommand(
      client,
      outputFormat,
      includePartialMessages,
+      availableTools,
    );
    return;
  }
@@ -887,8 +899,7 @@ export async function handleHeadlessCommand(
      agent_id: agent.id,
      conversation_id: conversationId,
      model: agent.llm_config?.model ?? "",
-      tools:
-        agent.tools?.map((t) => t.name).filter((n): n is string => !!n) || [],
+      tools: availableTools,
      cwd: process.cwd(),
      mcp_servers: [],
      permission_mode: "",
@@ -948,6 +959,7 @@ export async function handleHeadlessCommand(
      const { autoAllowed, autoDenied } = await classifyApprovals(
        pendingApprovals,
        {
+          alwaysRequiresUserInput: isInteractiveApprovalTool,
          treatAskAsDeny: true,
          denyReasonForAsk: "Tool requires approval (headless mode)",
          requireArgsForAutoApprove: true,
@@ -1345,6 +1357,7 @@ ${SYSTEM_REMINDER_CLOSE}
            !autoApprovalEmitted.has(updatedApproval.toolCallId)
          ) {
            const { autoAllowed } = await classifyApprovals([updatedApproval], {
+              alwaysRequiresUserInput: isInteractiveApprovalTool,
              requireArgsForAutoApprove: true,
              missingNameReason: "Tool call incomplete - missing name",
            });
@@ -1475,18 +1488,34 @@ ${SYSTEM_REMINDER_CLOSE}
              reason: string;
            };

-        const { autoAllowed, autoDenied } = await classifyApprovals(approvals, {
-          treatAskAsDeny: true,
-          denyReasonForAsk: "Tool requires approval (headless mode)",
-          requireArgsForAutoApprove: true,
-          missingNameReason: "Tool call incomplete - missing name",
-        });
+        const { autoAllowed, autoDenied, needsUserInput } =
+          await classifyApprovals(approvals, {
+            alwaysRequiresUserInput: isInteractiveApprovalTool,
+            requireArgsForAutoApprove: true,
+            missingNameReason: "Tool call incomplete - missing name",
+          });

        const decisions: Decision[] = [
          ...autoAllowed.map((ac) => ({
            type: "approve" as const,
            approval: ac.approval,
          })),
+          ...needsUserInput.map((ac) => {
+            // One-shot headless mode has no control channel for interactive
+            // approvals. Match Claude behavior by auto-allowing EnterPlanMode
+            // while denying tools that need runtime user responses.
+            if (isHeadlessAutoAllowTool(ac.approval.toolName)) {
+              return {
+                type: "approve" as const,
+                approval: ac.approval,
+              };
+            }
+            return {
+              type: "deny" as const,
+              approval: ac.approval,
+              reason: "Tool requires approval (headless mode)",
+            };
+          }),
          ...autoDenied.map((ac) => {
            const fallback =
              "matchedRule" in ac.permission && ac.permission.matchedRule
@@ -1914,6 +1943,7 @@ async function runBidirectionalMode(
  client: Letta,
  _outputFormat: string,
  includePartialMessages: boolean,
+  availableTools: string[],
 ): Promise<void> {
  const sessionId = agent.id;
  const readline = await import("node:readline");
@@ -1926,7 +1956,7 @@ async function runBidirectionalMode(
    agent_id: agent.id,
    conversation_id: conversationId,
    model: agent.llm_config?.model,
-    tools: agent.tools?.map((t) => t.name) || [],
+    tools: availableTools,
    cwd: process.cwd(),
    uuid: `init-${agent.id}`,
  };
@@ -2234,7 +2264,7 @@ async function runBidirectionalMode(
            response: {
              agent_id: agent.id,
              model: agent.llm_config?.model,
-              tools: agent.tools?.map((t) => t.name) || [],
+              tools: availableTools,
            },
          },
          session_id: sessionId,
@@ -2574,6 +2604,7 @@ async function runBidirectionalMode(

            const { autoAllowed, autoDenied, needsUserInput } =
              await classifyApprovals(approvals, {
+                alwaysRequiresUserInput: isInteractiveApprovalTool,
                requireArgsForAutoApprove: true,
                missingNameReason: "Tool call incomplete - missing name",
              });
--- a/src/integration-tests/lazy-approval-recovery.test.ts
+++ b/src/integration-tests/lazy-approval-recovery.test.ts
@@ -79,7 +79,6 @@ async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{
    let errorSeen = false;
    let resultCount = 0;
    let closing = false;
-    let waitingForApprovalControlRequest = false;
    let pendingToolCallId: string | undefined;

    const timeout = setTimeout(() => {
@@ -110,13 +109,13 @@ async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{
        }

        // Handle permission control requests from headless.
-        // We only deny the can_use_tool request after seeing an approval_request_message
-        // so the test still exercises the pending-approval flow.
+        // In some runs, control_request can arrive even when approval stream chunks
+        // are delayed or omitted, so respond directly to avoid deadlocks.
        if (
          msg.type === "control_request" &&
-          msg.request?.subtype === "can_use_tool" &&
-          waitingForApprovalControlRequest
+          msg.request?.subtype === "can_use_tool"
        ) {
+          approvalSeen = true;
          const requestId = msg.request_id;
          if (requestId) {
            if (pendingToolCallId && !requestId.endsWith(pendingToolCallId)) {
@@ -125,6 +124,16 @@ async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{
              );
            }

+            // Send the interrupt message while approval is pending, then deny.
+            if (!interruptSent) {
+              interruptSent = true;
+              const userMsg = JSON.stringify({
+                type: "user",
+                message: { role: "user", content: INTERRUPT_MESSAGE },
+              });
+              proc.stdin?.write(`${userMsg}\n`);
+            }
+
            const denyApproval = JSON.stringify({
              type: "control_response",
              response: {
@@ -137,7 +146,6 @@ async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{
              },
            });
            proc.stdin?.write(`${denyApproval}\n`);
-            waitingForApprovalControlRequest = false;
          }
          return;
        }
@@ -170,20 +178,17 @@ async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{
            toolCall && typeof toolCall === "object"
              ? (toolCall as { tool_call_id?: string }).tool_call_id
              : undefined;
-          waitingForApprovalControlRequest = true;

-          // Wait a moment, then send interrupt message (approval deny will be sent
-          // only after we see the corresponding control_request from headless).
-          setTimeout(() => {
-            if (interruptSent) return;
+          // If approval stream chunks arrive before can_use_tool callback,
+          // still send the concurrent user message now.
+          if (!interruptSent) {
            interruptSent = true;
-
            const userMsg = JSON.stringify({
              type: "user",
              message: { role: "user", content: INTERRUPT_MESSAGE },
            });
            proc.stdin?.write(`${userMsg}\n`);
-          }, 500);
+          }
          return;
        }

@@ -209,11 +214,22 @@ async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{
          }
        }

-        // Track results - we need 2 (one for each user message, though first may fail)
+        // Track results and complete once we prove the pending-approval flow unblocks.
        if (msg.type === "result") {
          resultCount++;
-          // After second result (or after seeing error + result), we're done
-          if (resultCount >= 2 || (errorSeen && resultCount >= 1)) {
+          if (resultCount >= 1 && !approvalSeen) {
+            cleanup();
+            resolve({ messages, success: false, errorSeen });
+            return;
+          }
+
+          // One completed turn is enough once we have confirmed
+          // approval flow + concurrent user message injection.
+          if (
+            resultCount >= 1 &&
+            approvalSeen &&
+            (interruptSent || errorSeen)
+          ) {
            cleanup();
            resolve({ messages, success: true, errorSeen });
          }
@@ -273,11 +289,13 @@ describe("lazy approval recovery", () => {
      }
    }

-    // We should have seen the approval request (proves tool requiring approval was called)
-    const approvalRequest = result.messages.find(
-      (m) => m.message_type === "approval_request_message",
+    // We should have seen at least one approval signal (message stream or control callback)
+    const approvalSignal = result.messages.find(
+      (m) =>
+        m.message_type === "approval_request_message" ||
+        (m.type === "control_request" && m.request?.subtype === "can_use_tool"),
    );
-    expect(approvalRequest).toBeDefined();
+    expect(approvalSignal).toBeDefined();

    // The test should complete successfully
    expect(result.success).toBe(true);
--- a/src/tests/tools/exitplanmode.test.ts
+++ b/src/tests/tools/exitplanmode.test.ts
@@ -1,8 +1,33 @@
 import { describe, expect, test } from "bun:test";
+import { permissionMode } from "../../permissions/mode";
 import { exit_plan_mode } from "../../tools/impl/ExitPlanMode";

 describe("ExitPlanMode tool", () => {
+  test("restores prior permission mode when exiting plan mode", async () => {
+    permissionMode.reset();
+    permissionMode.setMode("bypassPermissions");
+    permissionMode.setMode("plan");
+    permissionMode.setPlanFilePath("/tmp/test-plan.md");
+
+    await exit_plan_mode();
+
+    expect(permissionMode.getMode()).toBe("bypassPermissions");
+    expect(permissionMode.getPlanFilePath()).toBeNull();
+  });
+
+  test("falls back to default mode when previous mode is unavailable", async () => {
+    permissionMode.reset();
+    permissionMode.setMode("plan");
+    permissionMode.setPlanFilePath("/tmp/test-plan.md");
+
+    await exit_plan_mode();
+
+    expect(permissionMode.getMode()).toBe("default");
+    expect(permissionMode.getPlanFilePath()).toBeNull();
+  });
+
  test("returns approval message", async () => {
+    permissionMode.reset();
    const result = await exit_plan_mode();

    expect(result.message).toBeDefined();
@@ -10,6 +35,7 @@ describe("ExitPlanMode tool", () => {
  });

  test("returns message with coding guidance", async () => {
+    permissionMode.reset();
    const result = await exit_plan_mode();

    expect(result.message).toBeDefined();
--- a/src/tests/tools/interactivepolicy.test.ts
+++ b/src/tests/tools/interactivepolicy.test.ts
@@ -0,0 +1,27 @@
+import { describe, expect, test } from "bun:test";
+import {
+  isHeadlessAutoAllowTool,
+  isInteractiveApprovalTool,
+  requiresRuntimeUserInput,
+} from "../../tools/interactivePolicy";
+
+describe("interactive tool policy", () => {
+  test("marks interactive approval tools", () => {
+    expect(isInteractiveApprovalTool("AskUserQuestion")).toBe(true);
+    expect(isInteractiveApprovalTool("EnterPlanMode")).toBe(true);
+    expect(isInteractiveApprovalTool("ExitPlanMode")).toBe(true);
+    expect(isInteractiveApprovalTool("TodoWrite")).toBe(false);
+  });
+
+  test("marks runtime user input tools", () => {
+    expect(requiresRuntimeUserInput("AskUserQuestion")).toBe(true);
+    expect(requiresRuntimeUserInput("ExitPlanMode")).toBe(true);
+    expect(requiresRuntimeUserInput("EnterPlanMode")).toBe(false);
+  });
+
+  test("marks headless auto-allow tools", () => {
+    expect(isHeadlessAutoAllowTool("EnterPlanMode")).toBe(true);
+    expect(isHeadlessAutoAllowTool("AskUserQuestion")).toBe(false);
+    expect(isHeadlessAutoAllowTool("ExitPlanMode")).toBe(false);
+  });
+});
--- a/src/tools/impl/ExitPlanMode.ts
+++ b/src/tools/impl/ExitPlanMode.ts
@@ -3,7 +3,17 @@
 * Exits plan mode - the plan is read from the plan file by the UI
 */

+import { permissionMode } from "../../permissions/mode";
+
 export async function exit_plan_mode(): Promise<{ message: string }> {
+  // In interactive mode, the UI restores mode before calling this tool.
+  // In headless/bidirectional mode, there is no UI layer to do that, so
+  // restore here as a fallback to avoid getting stuck in plan mode.
+  if (permissionMode.getMode() === "plan") {
+    const restoredMode = permissionMode.getModeBeforePlan() ?? "default";
+    permissionMode.setMode(restoredMode);
+  }
+
  // Return confirmation message that plan was approved
  // Note: The plan is read from the plan file by the UI before this return is shown
  // The UI layer checks if the plan file exists and auto-rejects if not
--- a/src/tools/interactivePolicy.ts
+++ b/src/tools/interactivePolicy.ts
@@ -0,0 +1,24 @@
+// Interactive tool capability policy shared across UI/headless/SDK-compatible paths.
+// This avoids scattering name-based checks throughout approval handling.
+
+const INTERACTIVE_APPROVAL_TOOLS = new Set([
+  "AskUserQuestion",
+  "EnterPlanMode",
+  "ExitPlanMode",
+]);
+
+const RUNTIME_USER_INPUT_TOOLS = new Set(["AskUserQuestion", "ExitPlanMode"]);
+
+const HEADLESS_AUTO_ALLOW_TOOLS = new Set(["EnterPlanMode"]);
+
+export function isInteractiveApprovalTool(toolName: string): boolean {
+  return INTERACTIVE_APPROVAL_TOOLS.has(toolName);
+}
+
+export function requiresRuntimeUserInput(toolName: string): boolean {
+  return RUNTIME_USER_INPUT_TOOLS.has(toolName);
+}
+
+export function isHeadlessAutoAllowTool(toolName: string): boolean {
+  return HEADLESS_AUTO_ALLOW_TOOLS.has(toolName);
+}