fix: patch desync fallback (#452)

2026-01-02 16:20:40 -08:00
parent a956a15db4
commit 6587237fa6
2 changed files with 105 additions and 66 deletions
--- a/src/cli/App.tsx
+++ b/src/cli/App.tsx
@@ -1671,29 +1671,33 @@ export default function App({
          }

          // Unexpected stop reason (error, llm_api_error, etc.)
-          // Check for approval desync errors even if stop_reason isn't llm_api_error.
+          // Cache desync detection and last failure for consistent handling
          const isApprovalPayload =
            currentInput.length === 1 && currentInput[0]?.type === "approval";

-          const approvalDesyncDetected = async () => {
-            // 1) Check run metadata
-            const detailFromRun = await fetchRunErrorDetail(lastRunId);
-            if (isApprovalStateDesyncError(detailFromRun)) return true;
-
-            // 2) Check the most recent streamed error line in this turn
-            for (let i = buffersRef.current.order.length - 1; i >= 0; i -= 1) {
-              const id = buffersRef.current.order[i];
-              if (!id) continue;
-              const entry = buffersRef.current.byId.get(id);
-              if (entry?.kind === "error") {
-                return isApprovalStateDesyncError(entry.text);
-              }
+          // Capture the most recent error text in this turn (if any)
+          let latestErrorText: string | null = null;
+          for (let i = buffersRef.current.order.length - 1; i >= 0; i -= 1) {
+            const id = buffersRef.current.order[i];
+            if (!id) continue;
+            const entry = buffersRef.current.byId.get(id);
+            if (entry?.kind === "error" && typeof entry.text === "string") {
+              latestErrorText = entry.text;
+              break;
            }
-            return false;
-          };
+          }

-          if (isApprovalPayload && (await approvalDesyncDetected())) {
-            // Limit how many times we try this recovery to avoid loops
+          // Detect approval desync once per turn
+          const detailFromRun = await fetchRunErrorDetail(lastRunId);
+          const desyncDetected =
+            isApprovalStateDesyncError(detailFromRun) ||
+            isApprovalStateDesyncError(latestErrorText);
+
+          // Track last failure info so we can emit it if retries stop
+          const lastFailureMessage = latestErrorText || detailFromRun || null;
+
+          // Check for approval desync errors even if stop_reason isn't llm_api_error.
+          if (isApprovalPayload && desyncDetected) {
            if (llmApiErrorRetriesRef.current < LLM_API_ERROR_MAX_RETRIES) {
              llmApiErrorRetriesRef.current += 1;
              const statusId = uid("status");
@@ -1721,6 +1725,16 @@ export default function App({
              refreshDerived();
              continue;
            }
+
+            // No retries left: emit the failure and exit
+            const errorToShow =
+              lastFailureMessage ||
+              `An error occurred during agent execution\n(run_id: ${lastRunId ?? "unknown"}, stop_reason: ${stopReasonToHandle})`;
+            appendError(errorToShow, true);
+            setStreaming(false);
+            sendDesktopNotification();
+            refreshDerived();
+            return;
          }

          // Check if this is a retriable error (transient LLM API error)
--- a/src/headless.ts
+++ b/src/headless.ts
@@ -1132,12 +1132,40 @@ export async function handleHeadlessCommand(
        continue;
      }

+      // Cache latest error text for this turn
+      let latestErrorText: string | null = null;
+      const linesForTurn = toLines(buffers);
+      for (let i = linesForTurn.length - 1; i >= 0; i -= 1) {
+        const line = linesForTurn[i];
+        if (
+          line?.kind === "error" &&
+          "text" in line &&
+          typeof line.text === "string"
+        ) {
+          latestErrorText = line.text;
+          break;
+        }
+      }
+
+      // Detect approval desync once per turn
+      const detailFromRun = await fetchRunErrorDetail(lastRunId);
+      const approvalDesynced =
+        currentInput.length === 1 &&
+        currentInput[0]?.type === "approval" &&
+        (isApprovalStateDesyncError(detailFromRun) ||
+          isApprovalStateDesyncError(latestErrorText));
+
+      // Track last failure text for emitting on exit
+      const lastFailureText =
+        latestErrorText ||
+        detailFromRun ||
+        (lastRunId
+          ? `An error occurred during agent execution\n(run_id: ${lastRunId}, stop_reason: ${stopReason})`
+          : `An error occurred during agent execution\n(stop_reason: ${stopReason})`);
+
      // Case 3: Transient LLM API error - retry with exponential backoff up to a limit
      if (stopReason === "llm_api_error") {
-        const shouldUseApprovalRecovery =
-          currentInput.length === 1 &&
-          currentInput[0]?.type === "approval" &&
-          isApprovalStateDesyncError(await fetchRunErrorDetail(lastRunId));
+        const shouldUseApprovalRecovery = approvalDesynced;

        if (llmApiErrorRetries < LLM_API_ERROR_MAX_RETRIES) {
          const attempt = llmApiErrorRetries + 1;
@@ -1180,54 +1208,51 @@ export async function handleHeadlessCommand(

      // Fallback: if we were sending only approvals and hit an internal error that
      // says there is no pending approval, resend using the keep-alive recovery prompt.
-      const isApprovalPayload =
-        currentInput.length === 1 && currentInput[0]?.type === "approval";
-      const approvalDesynced =
-        isApprovalPayload &&
-        (isApprovalStateDesyncError(await fetchRunErrorDetail(lastRunId)) ||
-          (() => {
-            const lines = toLines(buffers);
-            for (let i = lines.length - 1; i >= 0; i -= 1) {
-              const line = lines[i];
-              if (!line) continue;
-              if (
-                line.kind === "error" &&
-                "text" in line &&
-                typeof line.text === "string"
-              ) {
-                return isApprovalStateDesyncError(line.text ?? null);
-              }
-            }
-            return false;
-          })());
+      if (approvalDesynced) {
+        if (llmApiErrorRetries < LLM_API_ERROR_MAX_RETRIES) {
+          llmApiErrorRetries += 1;

-      if (approvalDesynced && llmApiErrorRetries < LLM_API_ERROR_MAX_RETRIES) {
-        llmApiErrorRetries += 1;
+          const retryReason = stopReason ?? "error";
+          if (outputFormat === "stream-json") {
+            const retryMsg: RetryMessage = {
+              type: "retry",
+              reason: retryReason,
+              attempt: llmApiErrorRetries,
+              max_attempts: LLM_API_ERROR_MAX_RETRIES,
+              delay_ms: 0,
+              run_id: lastRunId ?? undefined,
+              session_id: sessionId,
+              uuid: `retry-${lastRunId || crypto.randomUUID()}`,
+            };
+            console.log(JSON.stringify(retryMsg));
+          } else {
+            console.error(
+              "Approval state desynced; resending keep-alive recovery prompt...",
+            );
+          }

-        const retryReason = stopReason ?? "error";
-        if (outputFormat === "stream-json") {
-          const retryMsg: RetryMessage = {
-            type: "retry",
-            reason: retryReason,
-            attempt: llmApiErrorRetries,
-            max_attempts: LLM_API_ERROR_MAX_RETRIES,
-            delay_ms: 0,
-            run_id: lastRunId ?? undefined,
-            session_id: sessionId,
-            uuid: `retry-${lastRunId || crypto.randomUUID()}`,
-          };
-          console.log(JSON.stringify(retryMsg));
-        } else {
-          console.error(
-            "Approval state desynced; resending keep-alive recovery prompt...",
-          );
+          // Small pause to avoid rapid-fire retries
+          await new Promise((resolve) => setTimeout(resolve, 250));
+
+          currentInput = [buildApprovalRecoveryMessage()];
+          continue;
        }

-        // Small pause to avoid rapid-fire retries
-        await new Promise((resolve) => setTimeout(resolve, 250));
-
-        currentInput = [buildApprovalRecoveryMessage()];
-        continue;
+        // No retries left or non-retriable: emit error and exit
+        if (outputFormat === "stream-json") {
+          const errorMsg: ErrorMessage = {
+            type: "error",
+            message: lastFailureText,
+            stop_reason: stopReason,
+            run_id: lastRunId ?? undefined,
+            session_id: sessionId,
+            uuid: `error-${lastRunId || crypto.randomUUID()}`,
+          };
+          console.log(JSON.stringify(errorMsg));
+        } else {
+          console.error(lastFailureText);
+        }
+        process.exit(1);
      }

      // Unexpected stop reason (error, llm_api_error, etc.)