From a57cf84e035873451c92612fa6f1192ae67e5be3 Mon Sep 17 00:00:00 2001
From: jnjpng <jinjpeng@gmail.com>
Date: Mon, 9 Mar 2026 14:51:41 -0700
Subject: [PATCH] fix(retry): increase Cloudflare transient backoff to 5/10/20
 (#1307)

---
 src/agent/approval-recovery.ts         |  3 +
 src/agent/turn-recovery-policy.ts      | 55 ++++++++++++++++
 src/cli/App.tsx                        | 27 +++++---
 src/headless.ts                        | 42 +++++++++----
 src/tests/turn-recovery-policy.test.ts | 86 ++++++++++++++++++++++++++
 src/websocket/listen-client.ts         | 15 ++++-
 6 files changed, 207 insertions(+), 21 deletions(-)

diff --git a/src/agent/approval-recovery.ts b/src/agent/approval-recovery.ts
index f00c30a..d1ec9f3 100644
--- a/src/agent/approval-recovery.ts
+++ b/src/agent/approval-recovery.ts
@@ -13,12 +13,15 @@ export type {
   PreStreamConflictKind,
   PreStreamErrorAction,
   PreStreamErrorOptions,
+  RetryDelayCategory,
 } from "./turn-recovery-policy";
 // ── Re-export pure policy helpers (single source of truth) ──────────
 export {
   classifyPreStreamConflict,
   extractConflictDetail,
   getPreStreamErrorAction,
+  getRetryDelayMs,
+  getTransientRetryDelayMs,
   isApprovalPendingError,
   isConversationBusyError,
   isEmptyResponseError,
diff --git a/src/agent/turn-recovery-policy.ts b/src/agent/turn-recovery-policy.ts
index f7c7104..3277a89 100644
--- a/src/agent/turn-recovery-policy.ts
+++ b/src/agent/turn-recovery-policy.ts
@@ -65,6 +65,11 @@ const NON_RETRYABLE_QUOTA_DETAIL_PATTERNS = [
 ];
 const NON_RETRYABLE_4XX_PATTERN = /Error code:\s*4(0[0-8]|1\d|2\d|3\d|4\d|51)/i;
 const RETRYABLE_429_PATTERN = /Error code:\s*429|rate limit|too many requests/i;
+const DEFAULT_TRANSIENT_RETRY_BASE_DELAY_MS = 1000;
+const CLOUDFLARE_EDGE_52X_RETRY_BASE_DELAY_MS = 5000;
+const CONVERSATION_BUSY_RETRY_BASE_DELAY_MS = 10000;
+const EMPTY_RESPONSE_RETRY_BASE_DELAY_MS = 500;
+
 function isCloudflareEdge52xDetail(detail: unknown): boolean {
   if (typeof detail !== "string") return false;
   return isCloudflareEdge52xHtmlError(detail);
@@ -206,6 +211,56 @@ export function parseRetryAfterHeaderMs(
   return delayMs > 0 ? delayMs : 0;
 }
 
+export type RetryDelayCategory =
+  | "transient_provider"
+  | "conversation_busy"
+  | "empty_response";
+
+/**
+ * Compute retry delay for known retry classes.
+ * - `transient_provider`: exponential (Cloudflare-specific base) with Retry-After override
+ * - `conversation_busy`: exponential
+ * - `empty_response`: linear
+ */
+export function getRetryDelayMs(opts: {
+  category: RetryDelayCategory;
+  attempt: number;
+  detail?: unknown;
+  retryAfterMs?: number | null;
+}): number {
+  const { category, attempt, detail, retryAfterMs = null } = opts;
+
+  if (category === "transient_provider") {
+    if (retryAfterMs !== null) return retryAfterMs;
+    const baseDelayMs = isCloudflareEdge52xDetail(detail)
+      ? CLOUDFLARE_EDGE_52X_RETRY_BASE_DELAY_MS
+      : DEFAULT_TRANSIENT_RETRY_BASE_DELAY_MS;
+    return baseDelayMs * 2 ** (attempt - 1);
+  }
+
+  if (category === "conversation_busy") {
+    return CONVERSATION_BUSY_RETRY_BASE_DELAY_MS * 2 ** (attempt - 1);
+  }
+
+  return EMPTY_RESPONSE_RETRY_BASE_DELAY_MS * attempt;
+}
+
+/**
+ * Backward-compatible wrapper for transient provider retries.
+ */
+export function getTransientRetryDelayMs(opts: {
+  attempt: number;
+  detail: unknown;
+  retryAfterMs?: number | null;
+}): number {
+  return getRetryDelayMs({
+    category: "transient_provider",
+    attempt: opts.attempt,
+    detail: opts.detail,
+    retryAfterMs: opts.retryAfterMs,
+  });
+}
+
 // ── Pre-stream conflict routing ─────────────────────────────────────
 
 export type PreStreamConflictKind =
diff --git a/src/cli/App.tsx b/src/cli/App.tsx
index d3218fe..5efb444 100644
--- a/src/cli/App.tsx
+++ b/src/cli/App.tsx
@@ -32,6 +32,7 @@ import {
   extractConflictDetail,
   fetchRunErrorDetail,
   getPreStreamErrorAction,
+  getRetryDelayMs,
   isApprovalPendingError,
   isEmptyResponseRetryable,
   isInvalidToolCallIdsError,
@@ -331,7 +332,6 @@ const EMPTY_RESPONSE_MAX_RETRIES = 2;
 
 // Retry config for 409 "conversation busy" errors (exponential backoff)
 const CONVERSATION_BUSY_MAX_RETRIES = 3; // 10s -> 20s -> 40s
-const CONVERSATION_BUSY_RETRY_BASE_DELAY_MS = 10000; // 10 seconds
 
 // Message shown when user interrupts the stream
 const INTERRUPT_MESSAGE =
@@ -4072,9 +4072,10 @@ export default function App({
             // Check for 409 "conversation busy" error - retry with exponential backoff
             if (preStreamAction === "retry_conversation_busy") {
               conversationBusyRetriesRef.current += 1;
-              const retryDelayMs =
-                CONVERSATION_BUSY_RETRY_BASE_DELAY_MS *
-                2 ** (conversationBusyRetriesRef.current - 1);
+              const retryDelayMs = getRetryDelayMs({
+                category: "conversation_busy",
+                attempt: conversationBusyRetriesRef.current,
+              });
 
               // Log the conversation-busy error
               telemetry.trackError(
@@ -4142,7 +4143,12 @@ export default function App({
                       preStreamError.headers?.get("retry-after"),
                     )
                   : null;
-              const delayMs = retryAfterMs ?? 1000 * 2 ** (attempt - 1);
+              const delayMs = getRetryDelayMs({
+                category: "transient_provider",
+                attempt,
+                detail: errorDetail,
+                retryAfterMs,
+              });
 
               // Log the error that triggered the retry
               telemetry.trackError(
@@ -5348,7 +5354,10 @@ export default function App({
           ) {
             emptyResponseRetriesRef.current += 1;
             const attempt = emptyResponseRetriesRef.current;
-            const delayMs = 500 * attempt;
+            const delayMs = getRetryDelayMs({
+              category: "empty_response",
+              attempt,
+            });
 
             // Only append a nudge on the last attempt
             if (attempt >= EMPTY_RESPONSE_MAX_RETRIES) {
@@ -5397,7 +5406,11 @@ export default function App({
           ) {
             llmApiErrorRetriesRef.current += 1;
             const attempt = llmApiErrorRetriesRef.current;
-            const delayMs = 1000 * 2 ** (attempt - 1); // 1s, 2s, 4s
+            const delayMs = getRetryDelayMs({
+              category: "transient_provider",
+              attempt,
+              detail: detailFromRun ?? fallbackError,
+            });
 
             // Log the error that triggered the retry
             telemetry.trackError(
diff --git a/src/headless.ts b/src/headless.ts
index 78a1857..43b6df6 100644
--- a/src/headless.ts
+++ b/src/headless.ts
@@ -12,6 +12,7 @@ import {
   extractConflictDetail,
   fetchRunErrorDetail,
   getPreStreamErrorAction,
+  getRetryDelayMs,
   isApprovalPendingError,
   isEmptyResponseRetryable,
   isInvalidToolCallIdsError,
@@ -133,7 +134,6 @@ const EMPTY_RESPONSE_MAX_RETRIES = 2;
 
 // Retry config for 409 "conversation busy" errors (exponential backoff)
 const CONVERSATION_BUSY_MAX_RETRIES = 3; // 10s -> 20s -> 40s
-const CONVERSATION_BUSY_RETRY_BASE_DELAY_MS = 10000; // 10 seconds
 
 export type BidirectionalQueuedInput = QueuedTurnInput<
   MessageCreate["content"]
@@ -1544,9 +1544,10 @@ ${SYSTEM_REMINDER_CLOSE}
         // Check for 409 "conversation busy" error - retry once with delay
         if (preStreamAction === "retry_conversation_busy") {
           conversationBusyRetries += 1;
-          const retryDelayMs =
-            CONVERSATION_BUSY_RETRY_BASE_DELAY_MS *
-            2 ** (conversationBusyRetries - 1);
+          const retryDelayMs = getRetryDelayMs({
+            category: "conversation_busy",
+            attempt: conversationBusyRetries,
+          });
 
           // Emit retry message for stream-json mode
           if (outputFormat === "stream-json") {
@@ -1579,7 +1580,12 @@ ${SYSTEM_REMINDER_CLOSE}
                   preStreamError.headers?.get("retry-after"),
                 )
               : null;
-          const delayMs = retryAfterMs ?? 1000 * 2 ** (attempt - 1);
+          const delayMs = getRetryDelayMs({
+            category: "transient_provider",
+            attempt,
+            detail: errorDetail,
+            retryAfterMs,
+          });
 
           llmApiErrorRetries = attempt;
 
@@ -1910,8 +1916,11 @@ ${SYSTEM_REMINDER_CLOSE}
       if (stopReason === "llm_api_error") {
         if (llmApiErrorRetries < LLM_API_ERROR_MAX_RETRIES) {
           const attempt = llmApiErrorRetries + 1;
-          const baseDelayMs = 1000;
-          const delayMs = baseDelayMs * 2 ** (attempt - 1);
+          const delayMs = getRetryDelayMs({
+            category: "transient_provider",
+            attempt,
+            detail: detailFromRun,
+          });
 
           llmApiErrorRetries = attempt;
 
@@ -2038,7 +2047,10 @@ ${SYSTEM_REMINDER_CLOSE}
             )
           ) {
             const attempt = emptyResponseRetries + 1;
-            const delayMs = 500 * attempt;
+            const delayMs = getRetryDelayMs({
+              category: "empty_response",
+              attempt,
+            });
 
             emptyResponseRetries = attempt;
 
@@ -2075,8 +2087,11 @@ ${SYSTEM_REMINDER_CLOSE}
 
           if (shouldRetryRunMetadataError(errorType, detail)) {
             const attempt = llmApiErrorRetries + 1;
-            const baseDelayMs = 1000;
-            const delayMs = baseDelayMs * 2 ** (attempt - 1);
+            const delayMs = getRetryDelayMs({
+              category: "transient_provider",
+              attempt,
+              detail,
+            });
 
             llmApiErrorRetries = attempt;
 
@@ -3169,7 +3184,12 @@ async function runBidirectionalMode(
                       preStreamError.headers?.get("retry-after"),
                     )
                   : null;
-              const delayMs = retryAfterMs ?? 1000 * 2 ** (attempt - 1);
+              const delayMs = getRetryDelayMs({
+                category: "transient_provider",
+                attempt,
+                detail: errorDetail,
+                retryAfterMs,
+              });
               preStreamTransientRetries = attempt;
 
               const retryMsg: RetryMessage = {
diff --git a/src/tests/turn-recovery-policy.test.ts b/src/tests/turn-recovery-policy.test.ts
index e9a0101..5f3d33e 100644
--- a/src/tests/turn-recovery-policy.test.ts
+++ b/src/tests/turn-recovery-policy.test.ts
@@ -3,6 +3,8 @@ import {
   classifyPreStreamConflict,
   extractConflictDetail,
   getPreStreamErrorAction,
+  getRetryDelayMs,
+  getTransientRetryDelayMs,
   isApprovalPendingError,
   isConversationBusyError,
   isEmptyResponseError,
@@ -284,6 +286,90 @@ describe("parseRetryAfterHeaderMs", () => {
   });
 });
 
+describe("getRetryDelayMs", () => {
+  test("uses default transient backoff for non-Cloudflare details", () => {
+    expect(
+      getRetryDelayMs({
+        category: "transient_provider",
+        attempt: 1,
+        detail: "Connection error during streaming",
+      }),
+    ).toBe(1000);
+    expect(
+      getRetryDelayMs({
+        category: "transient_provider",
+        attempt: 2,
+        detail: "Connection error during streaming",
+      }),
+    ).toBe(2000);
+  });
+
+  test("uses larger transient base for Cloudflare edge 52x details", () => {
+    const detail =
+      "521 <!DOCTYPE html><html><head><title>api.letta.com | 521: Web server is down</title></head><body>Cloudflare Ray ID: 9d431b5f6f656c08</body></html>";
+
+    expect(
+      getRetryDelayMs({
+        category: "transient_provider",
+        attempt: 1,
+        detail,
+      }),
+    ).toBe(5000);
+    expect(
+      getRetryDelayMs({
+        category: "transient_provider",
+        attempt: 3,
+        detail,
+      }),
+    ).toBe(20000);
+  });
+
+  test("uses Retry-After delay when provided for transient retries", () => {
+    const detail =
+      "521 <!DOCTYPE html><html><head><title>api.letta.com | 521: Web server is down</title></head><body>Cloudflare Ray ID: 9d431b5f6f656c08</body></html>";
+
+    expect(
+      getRetryDelayMs({
+        category: "transient_provider",
+        attempt: 3,
+        detail,
+        retryAfterMs: 7000,
+      }),
+    ).toBe(7000);
+  });
+
+  test("uses exponential conversation_busy profile", () => {
+    expect(getRetryDelayMs({ category: "conversation_busy", attempt: 1 })).toBe(
+      10000,
+    );
+    expect(getRetryDelayMs({ category: "conversation_busy", attempt: 2 })).toBe(
+      20000,
+    );
+  });
+
+  test("uses linear empty_response profile", () => {
+    expect(getRetryDelayMs({ category: "empty_response", attempt: 1 })).toBe(
+      500,
+    );
+    expect(getRetryDelayMs({ category: "empty_response", attempt: 2 })).toBe(
+      1000,
+    );
+  });
+});
+
+describe("getTransientRetryDelayMs", () => {
+  test("matches transient_provider category behavior", () => {
+    const detail = "Connection error during streaming";
+    expect(getTransientRetryDelayMs({ attempt: 2, detail })).toBe(
+      getRetryDelayMs({
+        category: "transient_provider",
+        attempt: 2,
+        detail,
+      }),
+    );
+  });
+});
+
 // ── Error text extraction ───────────────────────────────────────────
 
 describe("extractConflictDetail", () => {
diff --git a/src/websocket/listen-client.ts b/src/websocket/listen-client.ts
index fa6ddc8..69ed17f 100644
--- a/src/websocket/listen-client.ts
+++ b/src/websocket/listen-client.ts
@@ -23,6 +23,7 @@ import { getStreamToolContextId, sendMessageStream } from "../agent/message";
 import {
   extractConflictDetail,
   getPreStreamErrorAction,
+  getRetryDelayMs,
   isApprovalPendingError,
   isInvalidToolCallIdsError,
   parseRetryAfterHeaderMs,
@@ -1536,7 +1537,7 @@ async function sendMessageStreamWithRetry(
   let transientRetries = 0;
   let conversationBusyRetries = 0;
   let preStreamRecoveryAttempts = 0;
-  const MAX_CONVERSATION_BUSY_RETRIES = 1;
+  const MAX_CONVERSATION_BUSY_RETRIES = 3;
 
   // eslint-disable-next-line no-constant-condition
   while (true) {
@@ -1609,7 +1610,12 @@ async function sendMessageStreamWithRetry(
                 preStreamError.headers?.get("retry-after"),
               )
             : null;
-        const delayMs = retryAfterMs ?? 1000 * 2 ** (attempt - 1);
+        const delayMs = getRetryDelayMs({
+          category: "transient_provider",
+          attempt,
+          detail: errorDetail,
+          retryAfterMs,
+        });
         transientRetries = attempt;
 
         emitToWS(socket, {
@@ -1631,7 +1637,10 @@ async function sendMessageStreamWithRetry(
 
       if (action === "retry_conversation_busy") {
         const attempt = conversationBusyRetries + 1;
-        const delayMs = 2500;
+        const delayMs = getRetryDelayMs({
+          category: "conversation_busy",
+          attempt,
+        });
         conversationBusyRetries = attempt;
 
         emitToWS(socket, {