From 6587237fa6116c5b3f4e1b6295820bff2e666477 Mon Sep 17 00:00:00 2001 From: Charles Packer Date: Fri, 2 Jan 2026 16:20:40 -0800 Subject: [PATCH] fix: patch desync fallback (#452) --- src/cli/App.tsx | 50 +++++++++++++------- src/headless.ts | 121 +++++++++++++++++++++++++++++------------------- 2 files changed, 105 insertions(+), 66 deletions(-) diff --git a/src/cli/App.tsx b/src/cli/App.tsx index 0de5dc2..8c622bb 100644 --- a/src/cli/App.tsx +++ b/src/cli/App.tsx @@ -1671,29 +1671,33 @@ export default function App({ } // Unexpected stop reason (error, llm_api_error, etc.) - // Check for approval desync errors even if stop_reason isn't llm_api_error. + // Cache desync detection and last failure for consistent handling const isApprovalPayload = currentInput.length === 1 && currentInput[0]?.type === "approval"; - const approvalDesyncDetected = async () => { - // 1) Check run metadata - const detailFromRun = await fetchRunErrorDetail(lastRunId); - if (isApprovalStateDesyncError(detailFromRun)) return true; - - // 2) Check the most recent streamed error line in this turn - for (let i = buffersRef.current.order.length - 1; i >= 0; i -= 1) { - const id = buffersRef.current.order[i]; - if (!id) continue; - const entry = buffersRef.current.byId.get(id); - if (entry?.kind === "error") { - return isApprovalStateDesyncError(entry.text); - } + // Capture the most recent error text in this turn (if any) + let latestErrorText: string | null = null; + for (let i = buffersRef.current.order.length - 1; i >= 0; i -= 1) { + const id = buffersRef.current.order[i]; + if (!id) continue; + const entry = buffersRef.current.byId.get(id); + if (entry?.kind === "error" && typeof entry.text === "string") { + latestErrorText = entry.text; + break; } - return false; - }; + } - if (isApprovalPayload && (await approvalDesyncDetected())) { - // Limit how many times we try this recovery to avoid loops + // Detect approval desync once per turn + const detailFromRun = await fetchRunErrorDetail(lastRunId); + const desyncDetected = + isApprovalStateDesyncError(detailFromRun) || + isApprovalStateDesyncError(latestErrorText); + + // Track last failure info so we can emit it if retries stop + const lastFailureMessage = latestErrorText || detailFromRun || null; + + // Check for approval desync errors even if stop_reason isn't llm_api_error. + if (isApprovalPayload && desyncDetected) { if (llmApiErrorRetriesRef.current < LLM_API_ERROR_MAX_RETRIES) { llmApiErrorRetriesRef.current += 1; const statusId = uid("status"); @@ -1721,6 +1725,16 @@ export default function App({ refreshDerived(); continue; } + + // No retries left: emit the failure and exit + const errorToShow = + lastFailureMessage || + `An error occurred during agent execution\n(run_id: ${lastRunId ?? "unknown"}, stop_reason: ${stopReasonToHandle})`; + appendError(errorToShow, true); + setStreaming(false); + sendDesktopNotification(); + refreshDerived(); + return; } // Check if this is a retriable error (transient LLM API error) diff --git a/src/headless.ts b/src/headless.ts index 9416094..f79da5e 100644 --- a/src/headless.ts +++ b/src/headless.ts @@ -1132,12 +1132,40 @@ export async function handleHeadlessCommand( continue; } + // Cache latest error text for this turn + let latestErrorText: string | null = null; + const linesForTurn = toLines(buffers); + for (let i = linesForTurn.length - 1; i >= 0; i -= 1) { + const line = linesForTurn[i]; + if ( + line?.kind === "error" && + "text" in line && + typeof line.text === "string" + ) { + latestErrorText = line.text; + break; + } + } + + // Detect approval desync once per turn + const detailFromRun = await fetchRunErrorDetail(lastRunId); + const approvalDesynced = + currentInput.length === 1 && + currentInput[0]?.type === "approval" && + (isApprovalStateDesyncError(detailFromRun) || + isApprovalStateDesyncError(latestErrorText)); + + // Track last failure text for emitting on exit + const lastFailureText = + latestErrorText || + detailFromRun || + (lastRunId + ? `An error occurred during agent execution\n(run_id: ${lastRunId}, stop_reason: ${stopReason})` + : `An error occurred during agent execution\n(stop_reason: ${stopReason})`); + // Case 3: Transient LLM API error - retry with exponential backoff up to a limit if (stopReason === "llm_api_error") { - const shouldUseApprovalRecovery = - currentInput.length === 1 && - currentInput[0]?.type === "approval" && - isApprovalStateDesyncError(await fetchRunErrorDetail(lastRunId)); + const shouldUseApprovalRecovery = approvalDesynced; if (llmApiErrorRetries < LLM_API_ERROR_MAX_RETRIES) { const attempt = llmApiErrorRetries + 1; @@ -1180,54 +1208,51 @@ export async function handleHeadlessCommand( // Fallback: if we were sending only approvals and hit an internal error that // says there is no pending approval, resend using the keep-alive recovery prompt. - const isApprovalPayload = - currentInput.length === 1 && currentInput[0]?.type === "approval"; - const approvalDesynced = - isApprovalPayload && - (isApprovalStateDesyncError(await fetchRunErrorDetail(lastRunId)) || - (() => { - const lines = toLines(buffers); - for (let i = lines.length - 1; i >= 0; i -= 1) { - const line = lines[i]; - if (!line) continue; - if ( - line.kind === "error" && - "text" in line && - typeof line.text === "string" - ) { - return isApprovalStateDesyncError(line.text ?? null); - } - } - return false; - })()); + if (approvalDesynced) { + if (llmApiErrorRetries < LLM_API_ERROR_MAX_RETRIES) { + llmApiErrorRetries += 1; - if (approvalDesynced && llmApiErrorRetries < LLM_API_ERROR_MAX_RETRIES) { - llmApiErrorRetries += 1; + const retryReason = stopReason ?? "error"; + if (outputFormat === "stream-json") { + const retryMsg: RetryMessage = { + type: "retry", + reason: retryReason, + attempt: llmApiErrorRetries, + max_attempts: LLM_API_ERROR_MAX_RETRIES, + delay_ms: 0, + run_id: lastRunId ?? undefined, + session_id: sessionId, + uuid: `retry-${lastRunId || crypto.randomUUID()}`, + }; + console.log(JSON.stringify(retryMsg)); + } else { + console.error( + "Approval state desynced; resending keep-alive recovery prompt...", + ); + } - const retryReason = stopReason ?? "error"; - if (outputFormat === "stream-json") { - const retryMsg: RetryMessage = { - type: "retry", - reason: retryReason, - attempt: llmApiErrorRetries, - max_attempts: LLM_API_ERROR_MAX_RETRIES, - delay_ms: 0, - run_id: lastRunId ?? undefined, - session_id: sessionId, - uuid: `retry-${lastRunId || crypto.randomUUID()}`, - }; - console.log(JSON.stringify(retryMsg)); - } else { - console.error( - "Approval state desynced; resending keep-alive recovery prompt...", - ); + // Small pause to avoid rapid-fire retries + await new Promise((resolve) => setTimeout(resolve, 250)); + + currentInput = [buildApprovalRecoveryMessage()]; + continue; } - // Small pause to avoid rapid-fire retries - await new Promise((resolve) => setTimeout(resolve, 250)); - - currentInput = [buildApprovalRecoveryMessage()]; - continue; + // No retries left or non-retriable: emit error and exit + if (outputFormat === "stream-json") { + const errorMsg: ErrorMessage = { + type: "error", + message: lastFailureText, + stop_reason: stopReason, + run_id: lastRunId ?? undefined, + session_id: sessionId, + uuid: `error-${lastRunId || crypto.randomUUID()}`, + }; + console.log(JSON.stringify(errorMsg)); + } else { + console.error(lastFailureText); + } + process.exit(1); } // Unexpected stop reason (error, llm_api_error, etc.)