feat: reduce time-to-boot, remove default eager approval checks on inputs, auto-cancel stale approvals (#579)

Co-authored-by: Letta <noreply@letta.com>
2026-01-17 16:19:30 -08:00
parent f4eb921af7
commit 5f5c0df18e
13 changed files with 1376 additions and 93 deletions
--- a/src/tests/approval-recovery.test.ts
+++ b/src/tests/approval-recovery.test.ts
@@ -0,0 +1,249 @@
+import { describe, expect, test } from "bun:test";
+import type { Message } from "@letta-ai/letta-client/resources/agents/messages";
+import {
+  isApprovalPendingError,
+  isApprovalStateDesyncError,
+} from "../agent/approval-recovery";
+import { extractApprovals } from "../agent/check-approval";
+
+/**
+ * Tests for approval error detection helpers (LET-7101).
+ *
+ * These functions detect two opposite error conditions:
+ * 1. isApprovalStateDesyncError: Sent approval, but server has no pending approval
+ * 2. isApprovalPendingError: Sent user message, but server has pending approval waiting
+ */
+
+describe("isApprovalStateDesyncError", () => {
+  test("detects desync error in detail string", () => {
+    const detail = "No tool call is currently awaiting approval";
+    expect(isApprovalStateDesyncError(detail)).toBe(true);
+  });
+
+  test("detects desync error case-insensitively", () => {
+    const detail = "NO TOOL CALL IS CURRENTLY AWAITING APPROVAL";
+    expect(isApprovalStateDesyncError(detail)).toBe(true);
+  });
+
+  test("detects desync error in longer message", () => {
+    const detail =
+      "Error: No tool call is currently awaiting approval. The approval request may have expired.";
+    expect(isApprovalStateDesyncError(detail)).toBe(true);
+  });
+
+  test("returns false for unrelated errors", () => {
+    expect(isApprovalStateDesyncError("Connection timeout")).toBe(false);
+    expect(isApprovalStateDesyncError("Internal server error")).toBe(false);
+  });
+
+  test("returns false for non-string input", () => {
+    expect(isApprovalStateDesyncError(null)).toBe(false);
+    expect(isApprovalStateDesyncError(undefined)).toBe(false);
+    expect(isApprovalStateDesyncError(123)).toBe(false);
+    expect(isApprovalStateDesyncError({ error: "test" })).toBe(false);
+  });
+});
+
+describe("isApprovalPendingError", () => {
+  // This is the actual error format from the Letta backend (screenshot from LET-7101)
+  const REAL_ERROR_DETAIL =
+    "CONFLICT: Cannot send a new message: The agent is waiting for approval on a tool call. Please approve or deny the pending request before continuing.";
+
+  test("detects approval pending error in real error format", () => {
+    expect(isApprovalPendingError(REAL_ERROR_DETAIL)).toBe(true);
+  });
+
+  test("detects approval pending error case-insensitively", () => {
+    expect(isApprovalPendingError("CANNOT SEND A NEW MESSAGE")).toBe(true);
+    expect(isApprovalPendingError("cannot send a new message")).toBe(true);
+  });
+
+  test("detects partial match in longer message", () => {
+    const detail = "Error occurred: Cannot send a new message while processing";
+    expect(isApprovalPendingError(detail)).toBe(true);
+  });
+
+  test("returns false for desync errors (opposite case)", () => {
+    // These are the OPPOSITE error - when we send approval but there's nothing pending
+    expect(
+      isApprovalPendingError("No tool call is currently awaiting approval"),
+    ).toBe(false);
+  });
+
+  test("returns false for unrelated errors", () => {
+    expect(isApprovalPendingError("Connection timeout")).toBe(false);
+    expect(isApprovalPendingError("Rate limit exceeded")).toBe(false);
+    expect(isApprovalPendingError("Invalid API key")).toBe(false);
+  });
+
+  test("returns false for non-string input", () => {
+    expect(isApprovalPendingError(null)).toBe(false);
+    expect(isApprovalPendingError(undefined)).toBe(false);
+    expect(isApprovalPendingError(123)).toBe(false);
+    expect(isApprovalPendingError({ detail: REAL_ERROR_DETAIL })).toBe(false);
+  });
+});
+
+/**
+ * Tests for parallel tool call approval extraction.
+ * Ensures lazy recovery handles multiple simultaneous tool calls correctly.
+ */
+describe("extractApprovals", () => {
+  // Helper to create a minimal Message-like object for testing
+  // We use 'as Message' cast because the real Message type is complex
+  const createMessage = (overrides: {
+    tool_calls?: Array<{
+      tool_call_id?: string;
+      name?: string;
+      arguments?: string;
+    }>;
+    tool_call?: {
+      tool_call_id?: string;
+      name?: string;
+      arguments?: string;
+    };
+  }): Message =>
+    ({
+      id: "test-msg-id",
+      date: new Date().toISOString(),
+      message_type: "approval_request_message",
+      ...overrides,
+    }) as unknown as Message;
+
+  test("extracts single tool call from tool_calls array", () => {
+    const msg = createMessage({
+      tool_calls: [
+        {
+          tool_call_id: "call-1",
+          name: "Bash",
+          arguments: '{"command": "echo hello"}',
+        },
+      ],
+    });
+
+    const result = extractApprovals(msg);
+
+    expect(result.pendingApprovals).toHaveLength(1);
+    expect(result.pendingApprovals[0]!.toolCallId).toBe("call-1");
+    expect(result.pendingApprovals[0]!.toolName).toBe("Bash");
+    expect(result.pendingApproval?.toolCallId).toBe("call-1");
+  });
+
+  test("extracts multiple parallel tool calls", () => {
+    const msg = createMessage({
+      tool_calls: [
+        {
+          tool_call_id: "call-1",
+          name: "Bash",
+          arguments: '{"command": "echo hello"}',
+        },
+        {
+          tool_call_id: "call-2",
+          name: "web_search",
+          arguments: '{"query": "test"}',
+        },
+        {
+          tool_call_id: "call-3",
+          name: "Read",
+          arguments: '{"file_path": "/tmp/test.txt"}',
+        },
+      ],
+    });
+
+    const result = extractApprovals(msg);
+
+    expect(result.pendingApprovals).toHaveLength(3);
+    expect(result.pendingApprovals[0]!.toolCallId).toBe("call-1");
+    expect(result.pendingApprovals[0]!.toolName).toBe("Bash");
+    expect(result.pendingApprovals[1]!.toolCallId).toBe("call-2");
+    expect(result.pendingApprovals[1]!.toolName).toBe("web_search");
+    expect(result.pendingApprovals[2]!.toolCallId).toBe("call-3");
+    expect(result.pendingApprovals[2]!.toolName).toBe("Read");
+    // pendingApproval is deprecated, should be first item
+    expect(result.pendingApproval?.toolCallId).toBe("call-1");
+  });
+
+  test("handles deprecated single tool_call field", () => {
+    const msg = createMessage({
+      tool_call: {
+        tool_call_id: "call-legacy",
+        name: "Write",
+        arguments: '{"file_path": "/tmp/out.txt"}',
+      },
+    });
+
+    const result = extractApprovals(msg);
+
+    expect(result.pendingApprovals).toHaveLength(1);
+    expect(result.pendingApprovals[0]!.toolCallId).toBe("call-legacy");
+    expect(result.pendingApprovals[0]!.toolName).toBe("Write");
+  });
+
+  test("prefers tool_calls array over deprecated tool_call", () => {
+    const msg = createMessage({
+      tool_calls: [{ tool_call_id: "call-new", name: "Bash", arguments: "{}" }],
+      tool_call: {
+        tool_call_id: "call-old",
+        name: "Write",
+        arguments: "{}",
+      },
+    });
+
+    const result = extractApprovals(msg);
+
+    // Should use tool_calls, not tool_call
+    expect(result.pendingApprovals).toHaveLength(1);
+    expect(result.pendingApprovals[0]!.toolCallId).toBe("call-new");
+  });
+
+  test("filters out tool calls without tool_call_id", () => {
+    const msg = createMessage({
+      tool_calls: [
+        { tool_call_id: "call-valid", name: "Bash", arguments: "{}" },
+        { name: "Invalid", arguments: "{}" }, // Missing tool_call_id
+        { tool_call_id: "", name: "Empty", arguments: "{}" }, // Empty tool_call_id
+        { tool_call_id: "call-valid-2", name: "Read", arguments: "{}" },
+      ],
+    });
+
+    const result = extractApprovals(msg);
+
+    // Should only include entries with valid tool_call_id
+    expect(result.pendingApprovals).toHaveLength(2);
+    expect(result.pendingApprovals[0]!.toolCallId).toBe("call-valid");
+    expect(result.pendingApprovals[1]!.toolCallId).toBe("call-valid-2");
+  });
+
+  test("returns empty array when no tool calls present", () => {
+    const msg = createMessage({});
+
+    const result = extractApprovals(msg);
+
+    expect(result.pendingApprovals).toHaveLength(0);
+    expect(result.pendingApproval).toBeNull();
+  });
+
+  test("handles missing name and arguments gracefully", () => {
+    const msg = createMessage({
+      tool_calls: [{ tool_call_id: "call-minimal" }],
+    });
+
+    const result = extractApprovals(msg);
+
+    expect(result.pendingApprovals).toHaveLength(1);
+    expect(result.pendingApprovals[0]!.toolCallId).toBe("call-minimal");
+    expect(result.pendingApprovals[0]!.toolName).toBe("");
+    expect(result.pendingApprovals[0]!.toolArgs).toBe("");
+  });
+});
+
+/**
+ * Note: Full integration testing of lazy approval recovery requires:
+ * 1. Starting CLI without --yolo
+ * 2. Sending a prompt that triggers a tool call requiring approval
+ * 3. Instead of approving, sending another user message
+ * 4. Verifying the CONFLICT error is detected and recovery happens
+ *
+ * This is complex to automate reliably in unit tests.
+ * Manual testing or a dedicated integration test suite is recommended.
+ */
--- a/src/tests/lazy-approval-recovery.test.ts
+++ b/src/tests/lazy-approval-recovery.test.ts
@@ -0,0 +1,256 @@
+import { describe, expect, test } from "bun:test";
+import { spawn } from "node:child_process";
+
+/**
+ * Integration test for lazy approval recovery (LET-7101).
+ *
+ * NOTE: The lazy approval recovery is primarily designed for TUI mode where:
+ * 1. User has a session with pending approvals (e.g., from a previous run)
+ * 2. User sends a new message before responding to the approval
+ * 3. Server returns CONFLICT error
+ * 4. CLI recovers by auto-denying stale approvals and retrying
+ *
+ * In bidirectional mode, messages sent during permission wait are dropped
+ * (see headless.ts line 1710-1714), so we can't directly test the CONFLICT
+ * scenario here. This test validates that the flow doesn't crash when
+ * messages are sent while approvals are pending.
+ *
+ * The RecoveryMessage emission can be tested by:
+ * 1. Manual testing in TUI mode (start session with orphaned approval)
+ * 2. Or by modifying headless mode to not drop messages during permission wait
+ */
+
+// Prompt that will trigger a Bash tool call requiring approval
+const BASH_TRIGGER_PROMPT =
+  "Run this exact bash command: echo test123. Do not use any other tools.";
+
+// Second message to send while approval is pending
+const INTERRUPT_MESSAGE =
+  "Actually, just say OK instead. Do not call any tools.";
+
+interface StreamMessage {
+  type: string;
+  subtype?: string;
+  message_type?: string;
+  stop_reason?: string;
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  [key: string]: any;
+}
+
+/**
+ * Run bidirectional test with custom message handling.
+ * Allows sending messages at specific points in the flow.
+ */
+async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{
+  messages: StreamMessage[];
+  success: boolean;
+  errorSeen: boolean;
+}> {
+  return new Promise((resolve, reject) => {
+    const proc = spawn(
+      "bun",
+      [
+        "run",
+        "dev",
+        "-p",
+        "--input-format",
+        "stream-json",
+        "--output-format",
+        "stream-json",
+        "--new-agent",
+        "-m",
+        "haiku",
+        // NOTE: No --yolo flag - approvals are required
+      ],
+      {
+        cwd: process.cwd(),
+        env: { ...process.env },
+      },
+    );
+
+    const messages: StreamMessage[] = [];
+    let buffer = "";
+    let initReceived = false;
+    let approvalSeen = false;
+    let interruptSent = false;
+    let errorSeen = false;
+    let resultCount = 0;
+    let closing = false;
+
+    const timeout = setTimeout(() => {
+      if (!closing) {
+        proc.kill();
+        reject(new Error(`Test timed out after ${timeoutMs}ms`));
+      }
+    }, timeoutMs);
+
+    const cleanup = () => {
+      closing = true;
+      clearTimeout(timeout);
+      setTimeout(() => {
+        proc.stdin?.end();
+        proc.kill();
+      }, 500);
+    };
+
+    const processLine = (line: string) => {
+      if (!line.trim()) return;
+      try {
+        const msg: StreamMessage = JSON.parse(line);
+        messages.push(msg);
+
+        // Debug output
+        if (process.env.DEBUG_TEST) {
+          console.log("MSG:", JSON.stringify(msg, null, 2));
+        }
+
+        // Step 1: Wait for init, then send bash trigger prompt
+        if (msg.type === "system" && msg.subtype === "init" && !initReceived) {
+          initReceived = true;
+          const userMsg = JSON.stringify({
+            type: "user",
+            message: { role: "user", content: BASH_TRIGGER_PROMPT },
+          });
+          proc.stdin?.write(`${userMsg}\n`);
+          return;
+        }
+
+        // Step 2: When we see approval request, send another user message instead
+        if (
+          msg.type === "message" &&
+          msg.message_type === "approval_request_message" &&
+          !approvalSeen
+        ) {
+          approvalSeen = true;
+          // Wait a moment, then send interrupt message (NOT an approval)
+          setTimeout(() => {
+            if (!interruptSent) {
+              interruptSent = true;
+              const userMsg = JSON.stringify({
+                type: "user",
+                message: { role: "user", content: INTERRUPT_MESSAGE },
+              });
+              proc.stdin?.write(`${userMsg}\n`);
+            }
+          }, 500);
+          return;
+        }
+
+        // Track recovery messages - this is the key signal that lazy recovery worked
+        if (
+          msg.type === "recovery" &&
+          msg.recovery_type === "approval_pending"
+        ) {
+          errorSeen = true; // reusing this flag to mean "recovery message seen"
+        }
+
+        // Also track raw errors (shouldn't see these if recovery works properly)
+        if (
+          msg.type === "error" ||
+          (msg.type === "message" && msg.message_type === "error_message")
+        ) {
+          const detail = msg.detail || msg.message || "";
+          if (detail.toLowerCase().includes("cannot send a new message")) {
+            // Raw error leaked through - recovery may have failed
+            console.log(
+              "WARNING: Raw CONFLICT error seen (recovery may have failed)",
+            );
+          }
+        }
+
+        // Track results - we need 2 (one for each user message, though first may fail)
+        if (msg.type === "result") {
+          resultCount++;
+          // After second result (or after seeing error + result), we're done
+          if (resultCount >= 2 || (errorSeen && resultCount >= 1)) {
+            cleanup();
+            resolve({ messages, success: true, errorSeen });
+          }
+        }
+      } catch {
+        // Not valid JSON, ignore
+      }
+    };
+
+    proc.stdout?.on("data", (data) => {
+      buffer += data.toString();
+      const lines = buffer.split("\n");
+      buffer = lines.pop() || "";
+      for (const line of lines) {
+        processLine(line);
+      }
+    });
+
+    let _stderr = "";
+    proc.stderr?.on("data", (data) => {
+      _stderr += data.toString();
+    });
+
+    proc.on("close", (_code) => {
+      clearTimeout(timeout);
+      // Process any remaining buffer
+      if (buffer.trim()) {
+        processLine(buffer);
+      }
+
+      if (!closing) {
+        // If we got here without resolving, check what we have
+        resolve({
+          messages,
+          success: resultCount > 0,
+          errorSeen,
+        });
+      }
+    });
+
+    proc.on("error", (err) => {
+      clearTimeout(timeout);
+      reject(err);
+    });
+  });
+}
+
+describe("lazy approval recovery", () => {
+  test("handles concurrent message while approval is pending", async () => {
+    const result = await runLazyRecoveryTest();
+
+    // Log messages for debugging if test fails
+    if (!result.success) {
+      console.log("All messages received:");
+      for (const msg of result.messages) {
+        console.log(JSON.stringify(msg, null, 2));
+      }
+    }
+
+    // We should have seen the approval request (proves tool requiring approval was called)
+    const approvalRequest = result.messages.find(
+      (m) => m.message_type === "approval_request_message",
+    );
+    expect(approvalRequest).toBeDefined();
+
+    // The test should complete successfully
+    expect(result.success).toBe(true);
+
+    // Count results - we should get at least 1 (the second message should always complete)
+    const resultCount = result.messages.filter(
+      (m) => m.type === "result",
+    ).length;
+    expect(resultCount).toBeGreaterThanOrEqual(1);
+
+    // KEY ASSERTION: Check if we saw the recovery message
+    // This proves the lazy recovery mechanism was triggered
+    const recoveryMessage = result.messages.find(
+      (m) => m.type === "recovery" && m.recovery_type === "approval_pending",
+    );
+    if (recoveryMessage) {
+      console.log("Recovery message detected - lazy recovery worked correctly");
+      expect(result.errorSeen).toBe(true); // Should have been set when we saw recovery
+    } else {
+      // Recovery might not be triggered if approval was auto-handled before second message
+      // This can happen due to timing - the test still validates the flow works
+      console.log(
+        "Note: No recovery message seen - approval may have been handled before conflict",
+      );
+    }
+  }, 180000); // 3 minute timeout for CI
+});