feat: reduce time-to-boot, remove default eager approval checks on inputs, auto-cancel stale approvals (#579)

Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
Charles Packer
2026-01-17 16:19:30 -08:00
committed by GitHub
parent f4eb921af7
commit 5f5c0df18e
13 changed files with 1376 additions and 93 deletions

View File

@@ -0,0 +1,249 @@
import { describe, expect, test } from "bun:test";
import type { Message } from "@letta-ai/letta-client/resources/agents/messages";
import {
isApprovalPendingError,
isApprovalStateDesyncError,
} from "../agent/approval-recovery";
import { extractApprovals } from "../agent/check-approval";
/**
* Tests for approval error detection helpers (LET-7101).
*
* These functions detect two opposite error conditions:
* 1. isApprovalStateDesyncError: Sent approval, but server has no pending approval
* 2. isApprovalPendingError: Sent user message, but server has pending approval waiting
*/
describe("isApprovalStateDesyncError", () => {
test("detects desync error in detail string", () => {
const detail = "No tool call is currently awaiting approval";
expect(isApprovalStateDesyncError(detail)).toBe(true);
});
test("detects desync error case-insensitively", () => {
const detail = "NO TOOL CALL IS CURRENTLY AWAITING APPROVAL";
expect(isApprovalStateDesyncError(detail)).toBe(true);
});
test("detects desync error in longer message", () => {
const detail =
"Error: No tool call is currently awaiting approval. The approval request may have expired.";
expect(isApprovalStateDesyncError(detail)).toBe(true);
});
test("returns false for unrelated errors", () => {
expect(isApprovalStateDesyncError("Connection timeout")).toBe(false);
expect(isApprovalStateDesyncError("Internal server error")).toBe(false);
});
test("returns false for non-string input", () => {
expect(isApprovalStateDesyncError(null)).toBe(false);
expect(isApprovalStateDesyncError(undefined)).toBe(false);
expect(isApprovalStateDesyncError(123)).toBe(false);
expect(isApprovalStateDesyncError({ error: "test" })).toBe(false);
});
});
describe("isApprovalPendingError", () => {
// This is the actual error format from the Letta backend (screenshot from LET-7101)
const REAL_ERROR_DETAIL =
"CONFLICT: Cannot send a new message: The agent is waiting for approval on a tool call. Please approve or deny the pending request before continuing.";
test("detects approval pending error in real error format", () => {
expect(isApprovalPendingError(REAL_ERROR_DETAIL)).toBe(true);
});
test("detects approval pending error case-insensitively", () => {
expect(isApprovalPendingError("CANNOT SEND A NEW MESSAGE")).toBe(true);
expect(isApprovalPendingError("cannot send a new message")).toBe(true);
});
test("detects partial match in longer message", () => {
const detail = "Error occurred: Cannot send a new message while processing";
expect(isApprovalPendingError(detail)).toBe(true);
});
test("returns false for desync errors (opposite case)", () => {
// These are the OPPOSITE error - when we send approval but there's nothing pending
expect(
isApprovalPendingError("No tool call is currently awaiting approval"),
).toBe(false);
});
test("returns false for unrelated errors", () => {
expect(isApprovalPendingError("Connection timeout")).toBe(false);
expect(isApprovalPendingError("Rate limit exceeded")).toBe(false);
expect(isApprovalPendingError("Invalid API key")).toBe(false);
});
test("returns false for non-string input", () => {
expect(isApprovalPendingError(null)).toBe(false);
expect(isApprovalPendingError(undefined)).toBe(false);
expect(isApprovalPendingError(123)).toBe(false);
expect(isApprovalPendingError({ detail: REAL_ERROR_DETAIL })).toBe(false);
});
});
/**
* Tests for parallel tool call approval extraction.
* Ensures lazy recovery handles multiple simultaneous tool calls correctly.
*/
describe("extractApprovals", () => {
// Helper to create a minimal Message-like object for testing
// We use 'as Message' cast because the real Message type is complex
const createMessage = (overrides: {
tool_calls?: Array<{
tool_call_id?: string;
name?: string;
arguments?: string;
}>;
tool_call?: {
tool_call_id?: string;
name?: string;
arguments?: string;
};
}): Message =>
({
id: "test-msg-id",
date: new Date().toISOString(),
message_type: "approval_request_message",
...overrides,
}) as unknown as Message;
test("extracts single tool call from tool_calls array", () => {
const msg = createMessage({
tool_calls: [
{
tool_call_id: "call-1",
name: "Bash",
arguments: '{"command": "echo hello"}',
},
],
});
const result = extractApprovals(msg);
expect(result.pendingApprovals).toHaveLength(1);
expect(result.pendingApprovals[0]!.toolCallId).toBe("call-1");
expect(result.pendingApprovals[0]!.toolName).toBe("Bash");
expect(result.pendingApproval?.toolCallId).toBe("call-1");
});
test("extracts multiple parallel tool calls", () => {
const msg = createMessage({
tool_calls: [
{
tool_call_id: "call-1",
name: "Bash",
arguments: '{"command": "echo hello"}',
},
{
tool_call_id: "call-2",
name: "web_search",
arguments: '{"query": "test"}',
},
{
tool_call_id: "call-3",
name: "Read",
arguments: '{"file_path": "/tmp/test.txt"}',
},
],
});
const result = extractApprovals(msg);
expect(result.pendingApprovals).toHaveLength(3);
expect(result.pendingApprovals[0]!.toolCallId).toBe("call-1");
expect(result.pendingApprovals[0]!.toolName).toBe("Bash");
expect(result.pendingApprovals[1]!.toolCallId).toBe("call-2");
expect(result.pendingApprovals[1]!.toolName).toBe("web_search");
expect(result.pendingApprovals[2]!.toolCallId).toBe("call-3");
expect(result.pendingApprovals[2]!.toolName).toBe("Read");
// pendingApproval is deprecated, should be first item
expect(result.pendingApproval?.toolCallId).toBe("call-1");
});
test("handles deprecated single tool_call field", () => {
const msg = createMessage({
tool_call: {
tool_call_id: "call-legacy",
name: "Write",
arguments: '{"file_path": "/tmp/out.txt"}',
},
});
const result = extractApprovals(msg);
expect(result.pendingApprovals).toHaveLength(1);
expect(result.pendingApprovals[0]!.toolCallId).toBe("call-legacy");
expect(result.pendingApprovals[0]!.toolName).toBe("Write");
});
test("prefers tool_calls array over deprecated tool_call", () => {
const msg = createMessage({
tool_calls: [{ tool_call_id: "call-new", name: "Bash", arguments: "{}" }],
tool_call: {
tool_call_id: "call-old",
name: "Write",
arguments: "{}",
},
});
const result = extractApprovals(msg);
// Should use tool_calls, not tool_call
expect(result.pendingApprovals).toHaveLength(1);
expect(result.pendingApprovals[0]!.toolCallId).toBe("call-new");
});
test("filters out tool calls without tool_call_id", () => {
const msg = createMessage({
tool_calls: [
{ tool_call_id: "call-valid", name: "Bash", arguments: "{}" },
{ name: "Invalid", arguments: "{}" }, // Missing tool_call_id
{ tool_call_id: "", name: "Empty", arguments: "{}" }, // Empty tool_call_id
{ tool_call_id: "call-valid-2", name: "Read", arguments: "{}" },
],
});
const result = extractApprovals(msg);
// Should only include entries with valid tool_call_id
expect(result.pendingApprovals).toHaveLength(2);
expect(result.pendingApprovals[0]!.toolCallId).toBe("call-valid");
expect(result.pendingApprovals[1]!.toolCallId).toBe("call-valid-2");
});
test("returns empty array when no tool calls present", () => {
const msg = createMessage({});
const result = extractApprovals(msg);
expect(result.pendingApprovals).toHaveLength(0);
expect(result.pendingApproval).toBeNull();
});
test("handles missing name and arguments gracefully", () => {
const msg = createMessage({
tool_calls: [{ tool_call_id: "call-minimal" }],
});
const result = extractApprovals(msg);
expect(result.pendingApprovals).toHaveLength(1);
expect(result.pendingApprovals[0]!.toolCallId).toBe("call-minimal");
expect(result.pendingApprovals[0]!.toolName).toBe("");
expect(result.pendingApprovals[0]!.toolArgs).toBe("");
});
});
/**
* Note: Full integration testing of lazy approval recovery requires:
* 1. Starting CLI without --yolo
* 2. Sending a prompt that triggers a tool call requiring approval
* 3. Instead of approving, sending another user message
* 4. Verifying the CONFLICT error is detected and recovery happens
*
* This is complex to automate reliably in unit tests.
* Manual testing or a dedicated integration test suite is recommended.
*/

View File

@@ -0,0 +1,256 @@
import { describe, expect, test } from "bun:test";
import { spawn } from "node:child_process";
/**
* Integration test for lazy approval recovery (LET-7101).
*
* NOTE: The lazy approval recovery is primarily designed for TUI mode where:
* 1. User has a session with pending approvals (e.g., from a previous run)
* 2. User sends a new message before responding to the approval
* 3. Server returns CONFLICT error
* 4. CLI recovers by auto-denying stale approvals and retrying
*
* In bidirectional mode, messages sent during permission wait are dropped
* (see headless.ts line 1710-1714), so we can't directly test the CONFLICT
* scenario here. This test validates that the flow doesn't crash when
* messages are sent while approvals are pending.
*
* The RecoveryMessage emission can be tested by:
* 1. Manual testing in TUI mode (start session with orphaned approval)
* 2. Or by modifying headless mode to not drop messages during permission wait
*/
// Prompt that will trigger a Bash tool call requiring approval
const BASH_TRIGGER_PROMPT =
"Run this exact bash command: echo test123. Do not use any other tools.";
// Second message to send while approval is pending
const INTERRUPT_MESSAGE =
"Actually, just say OK instead. Do not call any tools.";
interface StreamMessage {
type: string;
subtype?: string;
message_type?: string;
stop_reason?: string;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
[key: string]: any;
}
/**
* Run bidirectional test with custom message handling.
* Allows sending messages at specific points in the flow.
*/
async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{
messages: StreamMessage[];
success: boolean;
errorSeen: boolean;
}> {
return new Promise((resolve, reject) => {
const proc = spawn(
"bun",
[
"run",
"dev",
"-p",
"--input-format",
"stream-json",
"--output-format",
"stream-json",
"--new-agent",
"-m",
"haiku",
// NOTE: No --yolo flag - approvals are required
],
{
cwd: process.cwd(),
env: { ...process.env },
},
);
const messages: StreamMessage[] = [];
let buffer = "";
let initReceived = false;
let approvalSeen = false;
let interruptSent = false;
let errorSeen = false;
let resultCount = 0;
let closing = false;
const timeout = setTimeout(() => {
if (!closing) {
proc.kill();
reject(new Error(`Test timed out after ${timeoutMs}ms`));
}
}, timeoutMs);
const cleanup = () => {
closing = true;
clearTimeout(timeout);
setTimeout(() => {
proc.stdin?.end();
proc.kill();
}, 500);
};
const processLine = (line: string) => {
if (!line.trim()) return;
try {
const msg: StreamMessage = JSON.parse(line);
messages.push(msg);
// Debug output
if (process.env.DEBUG_TEST) {
console.log("MSG:", JSON.stringify(msg, null, 2));
}
// Step 1: Wait for init, then send bash trigger prompt
if (msg.type === "system" && msg.subtype === "init" && !initReceived) {
initReceived = true;
const userMsg = JSON.stringify({
type: "user",
message: { role: "user", content: BASH_TRIGGER_PROMPT },
});
proc.stdin?.write(`${userMsg}\n`);
return;
}
// Step 2: When we see approval request, send another user message instead
if (
msg.type === "message" &&
msg.message_type === "approval_request_message" &&
!approvalSeen
) {
approvalSeen = true;
// Wait a moment, then send interrupt message (NOT an approval)
setTimeout(() => {
if (!interruptSent) {
interruptSent = true;
const userMsg = JSON.stringify({
type: "user",
message: { role: "user", content: INTERRUPT_MESSAGE },
});
proc.stdin?.write(`${userMsg}\n`);
}
}, 500);
return;
}
// Track recovery messages - this is the key signal that lazy recovery worked
if (
msg.type === "recovery" &&
msg.recovery_type === "approval_pending"
) {
errorSeen = true; // reusing this flag to mean "recovery message seen"
}
// Also track raw errors (shouldn't see these if recovery works properly)
if (
msg.type === "error" ||
(msg.type === "message" && msg.message_type === "error_message")
) {
const detail = msg.detail || msg.message || "";
if (detail.toLowerCase().includes("cannot send a new message")) {
// Raw error leaked through - recovery may have failed
console.log(
"WARNING: Raw CONFLICT error seen (recovery may have failed)",
);
}
}
// Track results - we need 2 (one for each user message, though first may fail)
if (msg.type === "result") {
resultCount++;
// After second result (or after seeing error + result), we're done
if (resultCount >= 2 || (errorSeen && resultCount >= 1)) {
cleanup();
resolve({ messages, success: true, errorSeen });
}
}
} catch {
// Not valid JSON, ignore
}
};
proc.stdout?.on("data", (data) => {
buffer += data.toString();
const lines = buffer.split("\n");
buffer = lines.pop() || "";
for (const line of lines) {
processLine(line);
}
});
let _stderr = "";
proc.stderr?.on("data", (data) => {
_stderr += data.toString();
});
proc.on("close", (_code) => {
clearTimeout(timeout);
// Process any remaining buffer
if (buffer.trim()) {
processLine(buffer);
}
if (!closing) {
// If we got here without resolving, check what we have
resolve({
messages,
success: resultCount > 0,
errorSeen,
});
}
});
proc.on("error", (err) => {
clearTimeout(timeout);
reject(err);
});
});
}
describe("lazy approval recovery", () => {
test("handles concurrent message while approval is pending", async () => {
const result = await runLazyRecoveryTest();
// Log messages for debugging if test fails
if (!result.success) {
console.log("All messages received:");
for (const msg of result.messages) {
console.log(JSON.stringify(msg, null, 2));
}
}
// We should have seen the approval request (proves tool requiring approval was called)
const approvalRequest = result.messages.find(
(m) => m.message_type === "approval_request_message",
);
expect(approvalRequest).toBeDefined();
// The test should complete successfully
expect(result.success).toBe(true);
// Count results - we should get at least 1 (the second message should always complete)
const resultCount = result.messages.filter(
(m) => m.type === "result",
).length;
expect(resultCount).toBeGreaterThanOrEqual(1);
// KEY ASSERTION: Check if we saw the recovery message
// This proves the lazy recovery mechanism was triggered
const recoveryMessage = result.messages.find(
(m) => m.type === "recovery" && m.recovery_type === "approval_pending",
);
if (recoveryMessage) {
console.log("Recovery message detected - lazy recovery worked correctly");
expect(result.errorSeen).toBe(true); // Should have been set when we saw recovery
} else {
// Recovery might not be triggered if approval was auto-handled before second message
// This can happen due to timing - the test still validates the flow works
console.log(
"Note: No recovery message seen - approval may have been handled before conflict",
);
}
}, 180000); // 3 minute timeout for CI
});