feat: clean up keep-alive fallback handling (#663)
This commit is contained in:
@@ -1,10 +1,4 @@
|
||||
import type { MessageCreate } from "@letta-ai/letta-client/resources/agents/agents";
|
||||
import { getClient } from "./client";
|
||||
import { APPROVAL_RECOVERY_PROMPT } from "./promptAssets";
|
||||
|
||||
// Error when trying to SEND approval but server has no pending approval
|
||||
const APPROVAL_RECOVERY_DETAIL_FRAGMENT =
|
||||
"no tool call is currently awaiting approval";
|
||||
|
||||
// Error when approval tool call IDs don't match what server expects
|
||||
// Format: "Invalid tool call IDs: Expected [...], got [...]"
|
||||
@@ -30,15 +24,6 @@ type RunErrorMetadata =
|
||||
| undefined
|
||||
| null;
|
||||
|
||||
export function isApprovalStateDesyncError(detail: unknown): boolean {
|
||||
if (typeof detail !== "string") return false;
|
||||
const lower = detail.toLowerCase();
|
||||
return (
|
||||
lower.includes(APPROVAL_RECOVERY_DETAIL_FRAGMENT) ||
|
||||
lower.includes(INVALID_TOOL_CALL_IDS_FRAGMENT)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if error specifically indicates tool call ID mismatch.
|
||||
* This is a subtype of desync where the server HAS pending approvals,
|
||||
@@ -100,11 +85,3 @@ export async function fetchRunErrorDetail(
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export function buildApprovalRecoveryMessage(): MessageCreate {
|
||||
return {
|
||||
type: "message",
|
||||
role: "user",
|
||||
content: [{ type: "text", text: APPROVAL_RECOVERY_PROMPT }],
|
||||
};
|
||||
}
|
||||
|
||||
158
src/cli/App.tsx
158
src/cli/App.tsx
@@ -29,10 +29,8 @@ import {
|
||||
getDisplayableToolReturn,
|
||||
} from "../agent/approval-execution";
|
||||
import {
|
||||
buildApprovalRecoveryMessage,
|
||||
fetchRunErrorDetail,
|
||||
isApprovalPendingError,
|
||||
isApprovalStateDesyncError,
|
||||
isConversationBusyError,
|
||||
isInvalidToolCallIdsError,
|
||||
} from "../agent/approval-recovery";
|
||||
@@ -2506,72 +2504,11 @@ export default function App({
|
||||
sendDesktopNotification("Approval needed");
|
||||
return;
|
||||
}
|
||||
// No approvals found - fall through to general desync recovery
|
||||
// No approvals found - fall through to error handling below
|
||||
} catch {
|
||||
// Fetch failed - fall through to general desync recovery
|
||||
// Fetch failed - fall through to error handling below
|
||||
}
|
||||
}
|
||||
|
||||
// General desync: "no tool call awaiting" or fetch failed above
|
||||
// Recover with keep-alive prompt or strip stale approvals
|
||||
if (
|
||||
isApprovalStateDesyncError(errorDetail) &&
|
||||
llmApiErrorRetriesRef.current < LLM_API_ERROR_MAX_RETRIES
|
||||
) {
|
||||
llmApiErrorRetriesRef.current += 1;
|
||||
|
||||
// Show transient status (matches post-stream desync handler UX)
|
||||
const statusId = uid("status");
|
||||
buffersRef.current.byId.set(statusId, {
|
||||
kind: "status",
|
||||
id: statusId,
|
||||
lines: [
|
||||
"Approval state desynced; resending keep-alive recovery prompt...",
|
||||
],
|
||||
});
|
||||
buffersRef.current.order.push(statusId);
|
||||
refreshDerived();
|
||||
|
||||
// Swap payload to recovery message (or strip stale approvals)
|
||||
const isApprovalOnlyPayload =
|
||||
hasApprovalInPayload && currentInput.length === 1;
|
||||
if (isApprovalOnlyPayload) {
|
||||
currentInput.splice(
|
||||
0,
|
||||
currentInput.length,
|
||||
buildApprovalRecoveryMessage(),
|
||||
);
|
||||
} else {
|
||||
// Mixed payload: strip stale approvals, keep user message
|
||||
const messageItems = currentInput.filter(
|
||||
(item) => item?.type !== "approval",
|
||||
);
|
||||
if (messageItems.length > 0) {
|
||||
currentInput.splice(
|
||||
0,
|
||||
currentInput.length,
|
||||
...messageItems,
|
||||
);
|
||||
} else {
|
||||
currentInput.splice(
|
||||
0,
|
||||
currentInput.length,
|
||||
buildApprovalRecoveryMessage(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Remove transient status before retry
|
||||
buffersRef.current.byId.delete(statusId);
|
||||
buffersRef.current.order = buffersRef.current.order.filter(
|
||||
(id) => id !== statusId,
|
||||
);
|
||||
refreshDerived();
|
||||
|
||||
// Reset interrupted flag so retry stream chunks are processed
|
||||
buffersRef.current.interrupted = false;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Not a recoverable desync - re-throw to outer catch
|
||||
@@ -3332,8 +3269,6 @@ export default function App({
|
||||
const hasApprovalInPayload = currentInput.some(
|
||||
(item) => item?.type === "approval",
|
||||
);
|
||||
const isApprovalOnlyPayload =
|
||||
hasApprovalInPayload && currentInput.length === 1;
|
||||
|
||||
// Capture the most recent error text in this turn (if any)
|
||||
let latestErrorText: string | null = null;
|
||||
@@ -3347,17 +3282,9 @@ export default function App({
|
||||
}
|
||||
}
|
||||
|
||||
// Detect approval desync once per turn
|
||||
const detailFromRun = await fetchRunErrorDetail(lastRunId);
|
||||
const desyncDetected =
|
||||
isApprovalStateDesyncError(detailFromRun) ||
|
||||
isApprovalStateDesyncError(latestErrorText);
|
||||
|
||||
// Track last failure info so we can emit it if retries stop
|
||||
const lastFailureMessage = latestErrorText || detailFromRun || null;
|
||||
|
||||
// "Invalid tool call IDs" means server HAS pending approvals but with different IDs.
|
||||
// Check for "Invalid tool call IDs" error - server HAS pending approvals but with different IDs.
|
||||
// Fetch the actual pending approvals and show them to the user.
|
||||
const detailFromRun = await fetchRunErrorDetail(lastRunId);
|
||||
const invalidIdsDetected =
|
||||
isInvalidToolCallIdsError(detailFromRun) ||
|
||||
isInvalidToolCallIdsError(latestErrorText);
|
||||
@@ -3438,85 +3365,12 @@ export default function App({
|
||||
sendDesktopNotification("Approval needed");
|
||||
return;
|
||||
}
|
||||
// No approvals found - fall through to general desync recovery
|
||||
// No approvals found - fall through to error handling below
|
||||
} catch {
|
||||
// Fetch failed - fall through to general desync recovery
|
||||
// Fetch failed - fall through to error handling below
|
||||
}
|
||||
}
|
||||
|
||||
// Check for approval desync errors even if stop_reason isn't llm_api_error.
|
||||
// Handle both approval-only payloads and mixed [approval, message] payloads.
|
||||
if (hasApprovalInPayload && desyncDetected) {
|
||||
if (llmApiErrorRetriesRef.current < LLM_API_ERROR_MAX_RETRIES) {
|
||||
llmApiErrorRetriesRef.current += 1;
|
||||
const statusId = uid("status");
|
||||
buffersRef.current.byId.set(statusId, {
|
||||
kind: "status",
|
||||
id: statusId,
|
||||
lines: [
|
||||
"Approval state desynced; resending keep-alive recovery prompt...",
|
||||
],
|
||||
});
|
||||
buffersRef.current.order.push(statusId);
|
||||
refreshDerived();
|
||||
|
||||
if (isApprovalOnlyPayload) {
|
||||
// Approval-only payload: send recovery prompt
|
||||
currentInput.splice(
|
||||
0,
|
||||
currentInput.length,
|
||||
buildApprovalRecoveryMessage(),
|
||||
);
|
||||
} else {
|
||||
// Mixed payload [approval, message]: strip stale approval, keep user message
|
||||
const messageItems = currentInput.filter(
|
||||
(item) => item?.type !== "approval",
|
||||
);
|
||||
if (messageItems.length > 0) {
|
||||
currentInput.splice(0, currentInput.length, ...messageItems);
|
||||
} else {
|
||||
// Fallback if somehow no message items remain
|
||||
currentInput.splice(
|
||||
0,
|
||||
currentInput.length,
|
||||
buildApprovalRecoveryMessage(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Remove the transient status before retrying
|
||||
buffersRef.current.byId.delete(statusId);
|
||||
buffersRef.current.order = buffersRef.current.order.filter(
|
||||
(id) => id !== statusId,
|
||||
);
|
||||
refreshDerived();
|
||||
|
||||
// Reset interrupted flag so retry stream chunks are processed
|
||||
buffersRef.current.interrupted = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
// No retries left: emit the failure and exit
|
||||
const errorToShow =
|
||||
lastFailureMessage ||
|
||||
`An error occurred during agent execution\n(run_id: ${lastRunId ?? "unknown"}, stop_reason: ${stopReasonToHandle})`;
|
||||
appendError(errorToShow, true);
|
||||
appendError(ERROR_FEEDBACK_HINT, true);
|
||||
|
||||
// Restore dequeued message to input on error
|
||||
if (lastDequeuedMessageRef.current) {
|
||||
setRestoredInput(lastDequeuedMessageRef.current);
|
||||
lastDequeuedMessageRef.current = null;
|
||||
}
|
||||
// Clear any remaining queue on error
|
||||
setMessageQueue([]);
|
||||
|
||||
setStreaming(false);
|
||||
sendDesktopNotification("Agent execution error", "error");
|
||||
refreshDerived();
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for approval pending error (sent user message while approval waiting)
|
||||
// This is the lazy recovery path for when needsEagerApprovalCheck is false
|
||||
const approvalPendingDetected =
|
||||
|
||||
139
src/headless.ts
139
src/headless.ts
@@ -9,10 +9,8 @@ import type { ApprovalCreate } from "@letta-ai/letta-client/resources/agents/mes
|
||||
import type { StopReasonType } from "@letta-ai/letta-client/resources/runs/runs";
|
||||
import type { ApprovalResult } from "./agent/approval-execution";
|
||||
import {
|
||||
buildApprovalRecoveryMessage,
|
||||
fetchRunErrorDetail,
|
||||
isApprovalPendingError,
|
||||
isApprovalStateDesyncError,
|
||||
isConversationBusyError,
|
||||
isInvalidToolCallIdsError,
|
||||
} from "./agent/approval-recovery";
|
||||
@@ -1385,26 +1383,11 @@ export async function handleHeadlessCommand(
|
||||
}
|
||||
}
|
||||
|
||||
// Detect approval desync once per turn
|
||||
// Fetch run error detail for invalid tool call ID detection
|
||||
const detailFromRun = await fetchRunErrorDetail(lastRunId);
|
||||
const approvalDesynced =
|
||||
currentInput.length === 1 &&
|
||||
currentInput[0]?.type === "approval" &&
|
||||
(isApprovalStateDesyncError(detailFromRun) ||
|
||||
isApprovalStateDesyncError(latestErrorText));
|
||||
|
||||
// Track last failure text for emitting on exit
|
||||
const lastFailureText =
|
||||
latestErrorText ||
|
||||
detailFromRun ||
|
||||
(lastRunId
|
||||
? `An error occurred during agent execution\n(run_id: ${lastRunId}, stop_reason: ${stopReason})`
|
||||
: `An error occurred during agent execution\n(stop_reason: ${stopReason})`);
|
||||
|
||||
// Case 3: Transient LLM API error - retry with exponential backoff up to a limit
|
||||
if (stopReason === "llm_api_error") {
|
||||
const shouldUseApprovalRecovery = approvalDesynced;
|
||||
|
||||
if (llmApiErrorRetries < LLM_API_ERROR_MAX_RETRIES) {
|
||||
const attempt = llmApiErrorRetries + 1;
|
||||
const baseDelayMs = 1000;
|
||||
@@ -1426,104 +1409,64 @@ export async function handleHeadlessCommand(
|
||||
console.log(JSON.stringify(retryMsg));
|
||||
} else {
|
||||
const delaySeconds = Math.round(delayMs / 1000);
|
||||
const recoveryNote = shouldUseApprovalRecovery
|
||||
? " (approval state desynced - sending keep-going prompt)"
|
||||
: "";
|
||||
console.error(
|
||||
`LLM API error encountered (attempt ${attempt} of ${LLM_API_ERROR_MAX_RETRIES}), retrying in ${delaySeconds}s...${recoveryNote}`,
|
||||
`LLM API error encountered (attempt ${attempt} of ${LLM_API_ERROR_MAX_RETRIES}), retrying in ${delaySeconds}s...`,
|
||||
);
|
||||
}
|
||||
|
||||
// Exponential backoff before retrying the same input
|
||||
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
||||
|
||||
if (shouldUseApprovalRecovery) {
|
||||
currentInput = [buildApprovalRecoveryMessage()];
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: if we were sending only approvals and hit an internal error that
|
||||
// says there is no pending approval, resend using the keep-alive recovery prompt.
|
||||
if (approvalDesynced) {
|
||||
// "Invalid tool call IDs" means server HAS pending approvals but with different IDs.
|
||||
// Fetch the actual pending approvals and process them before retrying.
|
||||
if (
|
||||
isInvalidToolCallIdsError(detailFromRun) ||
|
||||
isInvalidToolCallIdsError(latestErrorText)
|
||||
) {
|
||||
if (outputFormat === "stream-json") {
|
||||
const recoveryMsg: RecoveryMessage = {
|
||||
type: "recovery",
|
||||
recovery_type: "invalid_tool_call_ids",
|
||||
message:
|
||||
"Tool call ID mismatch; fetching actual pending approvals and resyncing",
|
||||
run_id: lastRunId ?? undefined,
|
||||
session_id: sessionId,
|
||||
uuid: `recovery-${lastRunId || crypto.randomUUID()}`,
|
||||
};
|
||||
console.log(JSON.stringify(recoveryMsg));
|
||||
} else {
|
||||
console.error(
|
||||
"Tool call ID mismatch; fetching actual pending approvals...",
|
||||
);
|
||||
}
|
||||
// "Invalid tool call IDs" means server HAS pending approvals but with different IDs.
|
||||
// Fetch the actual pending approvals and process them before retrying.
|
||||
const invalidIdsDetected =
|
||||
isInvalidToolCallIdsError(detailFromRun) ||
|
||||
isInvalidToolCallIdsError(latestErrorText);
|
||||
|
||||
try {
|
||||
// Fetch and process actual pending approvals from server
|
||||
await resolveAllPendingApprovals();
|
||||
// After processing, continue to next iteration (fresh state)
|
||||
continue;
|
||||
} catch {
|
||||
// If fetch fails, fall through to general desync recovery
|
||||
}
|
||||
}
|
||||
|
||||
if (llmApiErrorRetries < LLM_API_ERROR_MAX_RETRIES) {
|
||||
llmApiErrorRetries += 1;
|
||||
|
||||
const retryReason = stopReason ?? "error";
|
||||
if (outputFormat === "stream-json") {
|
||||
const retryMsg: RetryMessage = {
|
||||
type: "retry",
|
||||
reason: retryReason,
|
||||
attempt: llmApiErrorRetries,
|
||||
max_attempts: LLM_API_ERROR_MAX_RETRIES,
|
||||
delay_ms: 0,
|
||||
run_id: lastRunId ?? undefined,
|
||||
session_id: sessionId,
|
||||
uuid: `retry-${lastRunId || crypto.randomUUID()}`,
|
||||
};
|
||||
console.log(JSON.stringify(retryMsg));
|
||||
} else {
|
||||
console.error(
|
||||
"Approval state desynced; resending keep-alive recovery prompt...",
|
||||
);
|
||||
}
|
||||
|
||||
// Small pause to avoid rapid-fire retries
|
||||
await new Promise((resolve) => setTimeout(resolve, 250));
|
||||
|
||||
currentInput = [buildApprovalRecoveryMessage()];
|
||||
continue;
|
||||
}
|
||||
|
||||
// No retries left or non-retriable: emit error and exit
|
||||
if (invalidIdsDetected) {
|
||||
if (outputFormat === "stream-json") {
|
||||
const errorMsg: ErrorMessage = {
|
||||
type: "error",
|
||||
message: lastFailureText,
|
||||
stop_reason: stopReason,
|
||||
const recoveryMsg: RecoveryMessage = {
|
||||
type: "recovery",
|
||||
recovery_type: "invalid_tool_call_ids",
|
||||
message:
|
||||
"Tool call ID mismatch; fetching actual pending approvals and resyncing",
|
||||
run_id: lastRunId ?? undefined,
|
||||
session_id: sessionId,
|
||||
uuid: `error-${lastRunId || crypto.randomUUID()}`,
|
||||
uuid: `recovery-${lastRunId || crypto.randomUUID()}`,
|
||||
};
|
||||
console.log(JSON.stringify(errorMsg));
|
||||
console.log(JSON.stringify(recoveryMsg));
|
||||
} else {
|
||||
console.error(lastFailureText);
|
||||
console.error(
|
||||
"Tool call ID mismatch; fetching actual pending approvals...",
|
||||
);
|
||||
}
|
||||
|
||||
try {
|
||||
// Fetch and process actual pending approvals from server
|
||||
await resolveAllPendingApprovals();
|
||||
// After processing, continue to next iteration (fresh state)
|
||||
continue;
|
||||
} catch {
|
||||
// If fetch fails, exit with error
|
||||
if (outputFormat === "stream-json") {
|
||||
const errorMsg: ErrorMessage = {
|
||||
type: "error",
|
||||
message: "Failed to fetch pending approvals for resync",
|
||||
stop_reason: stopReason,
|
||||
run_id: lastRunId ?? undefined,
|
||||
session_id: sessionId,
|
||||
uuid: `error-${lastRunId || crypto.randomUUID()}`,
|
||||
};
|
||||
console.log(JSON.stringify(errorMsg));
|
||||
} else {
|
||||
console.error("Failed to fetch pending approvals for resync");
|
||||
}
|
||||
process.exit(1);
|
||||
}
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Unexpected stop reason (error, llm_api_error, etc.)
|
||||
|
||||
@@ -2,7 +2,7 @@ import { describe, expect, test } from "bun:test";
|
||||
import type { Message } from "@letta-ai/letta-client/resources/agents/messages";
|
||||
import {
|
||||
isApprovalPendingError,
|
||||
isApprovalStateDesyncError,
|
||||
isConversationBusyError,
|
||||
isInvalidToolCallIdsError,
|
||||
} from "../agent/approval-recovery";
|
||||
import { extractApprovals } from "../agent/check-approval";
|
||||
@@ -10,56 +10,12 @@ import { extractApprovals } from "../agent/check-approval";
|
||||
/**
|
||||
* Tests for approval error detection helpers (LET-7101).
|
||||
*
|
||||
* These functions detect two opposite error conditions:
|
||||
* 1. isApprovalStateDesyncError: Sent approval, but server has no pending approval
|
||||
* These functions detect different error conditions related to approvals:
|
||||
* 1. isInvalidToolCallIdsError: Tool call IDs don't match server's pending approvals
|
||||
* 2. isApprovalPendingError: Sent user message, but server has pending approval waiting
|
||||
* 3. isConversationBusyError: Another request is being processed (409 CONFLICT)
|
||||
*/
|
||||
|
||||
describe("isApprovalStateDesyncError", () => {
|
||||
test("detects desync error in detail string", () => {
|
||||
const detail = "No tool call is currently awaiting approval";
|
||||
expect(isApprovalStateDesyncError(detail)).toBe(true);
|
||||
});
|
||||
|
||||
test("detects desync error case-insensitively", () => {
|
||||
const detail = "NO TOOL CALL IS CURRENTLY AWAITING APPROVAL";
|
||||
expect(isApprovalStateDesyncError(detail)).toBe(true);
|
||||
});
|
||||
|
||||
test("detects desync error in longer message", () => {
|
||||
const detail =
|
||||
"Error: No tool call is currently awaiting approval. The approval request may have expired.";
|
||||
expect(isApprovalStateDesyncError(detail)).toBe(true);
|
||||
});
|
||||
|
||||
test("detects invalid tool call IDs error", () => {
|
||||
const detail =
|
||||
"Invalid tool call IDs: Expected ['tc_abc123'], got ['tc_xyz789']";
|
||||
expect(isApprovalStateDesyncError(detail)).toBe(true);
|
||||
});
|
||||
|
||||
test("detects invalid tool call IDs error case-insensitively", () => {
|
||||
expect(
|
||||
isApprovalStateDesyncError("INVALID TOOL CALL IDS: Expected X, got Y"),
|
||||
).toBe(true);
|
||||
expect(isApprovalStateDesyncError("invalid tool call ids: mismatch")).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
test("returns false for unrelated errors", () => {
|
||||
expect(isApprovalStateDesyncError("Connection timeout")).toBe(false);
|
||||
expect(isApprovalStateDesyncError("Internal server error")).toBe(false);
|
||||
});
|
||||
|
||||
test("returns false for non-string input", () => {
|
||||
expect(isApprovalStateDesyncError(null)).toBe(false);
|
||||
expect(isApprovalStateDesyncError(undefined)).toBe(false);
|
||||
expect(isApprovalStateDesyncError(123)).toBe(false);
|
||||
expect(isApprovalStateDesyncError({ error: "test" })).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("isInvalidToolCallIdsError", () => {
|
||||
test("detects invalid tool call IDs error", () => {
|
||||
const detail =
|
||||
@@ -137,6 +93,51 @@ describe("isApprovalPendingError", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("isConversationBusyError", () => {
|
||||
const REAL_ERROR_DETAIL =
|
||||
"CONFLICT: Cannot send a new message: Another request is currently being processed for this conversation.";
|
||||
|
||||
test("detects conversation busy error in real error format", () => {
|
||||
expect(isConversationBusyError(REAL_ERROR_DETAIL)).toBe(true);
|
||||
});
|
||||
|
||||
test("detects conversation busy error case-insensitively", () => {
|
||||
expect(
|
||||
isConversationBusyError("ANOTHER REQUEST IS CURRENTLY BEING PROCESSED"),
|
||||
).toBe(true);
|
||||
expect(
|
||||
isConversationBusyError("another request is currently being processed"),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
test("detects partial match in longer message", () => {
|
||||
const detail =
|
||||
"Error: Another request is currently being processed. Please wait.";
|
||||
expect(isConversationBusyError(detail)).toBe(true);
|
||||
});
|
||||
|
||||
test("returns false for approval pending errors (different case)", () => {
|
||||
expect(
|
||||
isConversationBusyError(
|
||||
"Cannot send a new message: The agent is waiting for approval",
|
||||
),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
test("returns false for unrelated errors", () => {
|
||||
expect(isConversationBusyError("Connection timeout")).toBe(false);
|
||||
expect(isConversationBusyError("Rate limit exceeded")).toBe(false);
|
||||
expect(isConversationBusyError("Invalid API key")).toBe(false);
|
||||
});
|
||||
|
||||
test("returns false for non-string input", () => {
|
||||
expect(isConversationBusyError(null)).toBe(false);
|
||||
expect(isConversationBusyError(undefined)).toBe(false);
|
||||
expect(isConversationBusyError(123)).toBe(false);
|
||||
expect(isConversationBusyError({ detail: REAL_ERROR_DETAIL })).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* Tests for parallel tool call approval extraction.
|
||||
* Ensures lazy recovery handles multiple simultaneous tool calls correctly.
|
||||
|
||||
Reference in New Issue
Block a user