fix(retry): increase Cloudflare transient backoff to 5/10/20 (#1307)

This commit is contained in:
jnjpng
2026-03-09 14:51:41 -07:00
committed by GitHub
parent 28039dcb43
commit a57cf84e03
6 changed files with 207 additions and 21 deletions

View File

@@ -13,12 +13,15 @@ export type {
PreStreamConflictKind, PreStreamConflictKind,
PreStreamErrorAction, PreStreamErrorAction,
PreStreamErrorOptions, PreStreamErrorOptions,
RetryDelayCategory,
} from "./turn-recovery-policy"; } from "./turn-recovery-policy";
// ── Re-export pure policy helpers (single source of truth) ────────── // ── Re-export pure policy helpers (single source of truth) ──────────
export { export {
classifyPreStreamConflict, classifyPreStreamConflict,
extractConflictDetail, extractConflictDetail,
getPreStreamErrorAction, getPreStreamErrorAction,
getRetryDelayMs,
getTransientRetryDelayMs,
isApprovalPendingError, isApprovalPendingError,
isConversationBusyError, isConversationBusyError,
isEmptyResponseError, isEmptyResponseError,

View File

@@ -65,6 +65,11 @@ const NON_RETRYABLE_QUOTA_DETAIL_PATTERNS = [
]; ];
const NON_RETRYABLE_4XX_PATTERN = /Error code:\s*4(0[0-8]|1\d|2\d|3\d|4\d|51)/i; const NON_RETRYABLE_4XX_PATTERN = /Error code:\s*4(0[0-8]|1\d|2\d|3\d|4\d|51)/i;
const RETRYABLE_429_PATTERN = /Error code:\s*429|rate limit|too many requests/i; const RETRYABLE_429_PATTERN = /Error code:\s*429|rate limit|too many requests/i;
const DEFAULT_TRANSIENT_RETRY_BASE_DELAY_MS = 1000;
const CLOUDFLARE_EDGE_52X_RETRY_BASE_DELAY_MS = 5000;
const CONVERSATION_BUSY_RETRY_BASE_DELAY_MS = 10000;
const EMPTY_RESPONSE_RETRY_BASE_DELAY_MS = 500;
function isCloudflareEdge52xDetail(detail: unknown): boolean { function isCloudflareEdge52xDetail(detail: unknown): boolean {
if (typeof detail !== "string") return false; if (typeof detail !== "string") return false;
return isCloudflareEdge52xHtmlError(detail); return isCloudflareEdge52xHtmlError(detail);
@@ -206,6 +211,56 @@ export function parseRetryAfterHeaderMs(
return delayMs > 0 ? delayMs : 0; return delayMs > 0 ? delayMs : 0;
} }
export type RetryDelayCategory =
| "transient_provider"
| "conversation_busy"
| "empty_response";
/**
* Compute retry delay for known retry classes.
* - `transient_provider`: exponential (Cloudflare-specific base) with Retry-After override
* - `conversation_busy`: exponential
* - `empty_response`: linear
*/
export function getRetryDelayMs(opts: {
category: RetryDelayCategory;
attempt: number;
detail?: unknown;
retryAfterMs?: number | null;
}): number {
const { category, attempt, detail, retryAfterMs = null } = opts;
if (category === "transient_provider") {
if (retryAfterMs !== null) return retryAfterMs;
const baseDelayMs = isCloudflareEdge52xDetail(detail)
? CLOUDFLARE_EDGE_52X_RETRY_BASE_DELAY_MS
: DEFAULT_TRANSIENT_RETRY_BASE_DELAY_MS;
return baseDelayMs * 2 ** (attempt - 1);
}
if (category === "conversation_busy") {
return CONVERSATION_BUSY_RETRY_BASE_DELAY_MS * 2 ** (attempt - 1);
}
return EMPTY_RESPONSE_RETRY_BASE_DELAY_MS * attempt;
}
/**
* Backward-compatible wrapper for transient provider retries.
*/
export function getTransientRetryDelayMs(opts: {
attempt: number;
detail: unknown;
retryAfterMs?: number | null;
}): number {
return getRetryDelayMs({
category: "transient_provider",
attempt: opts.attempt,
detail: opts.detail,
retryAfterMs: opts.retryAfterMs,
});
}
// ── Pre-stream conflict routing ───────────────────────────────────── // ── Pre-stream conflict routing ─────────────────────────────────────
export type PreStreamConflictKind = export type PreStreamConflictKind =

View File

@@ -32,6 +32,7 @@ import {
extractConflictDetail, extractConflictDetail,
fetchRunErrorDetail, fetchRunErrorDetail,
getPreStreamErrorAction, getPreStreamErrorAction,
getRetryDelayMs,
isApprovalPendingError, isApprovalPendingError,
isEmptyResponseRetryable, isEmptyResponseRetryable,
isInvalidToolCallIdsError, isInvalidToolCallIdsError,
@@ -331,7 +332,6 @@ const EMPTY_RESPONSE_MAX_RETRIES = 2;
// Retry config for 409 "conversation busy" errors (exponential backoff) // Retry config for 409 "conversation busy" errors (exponential backoff)
const CONVERSATION_BUSY_MAX_RETRIES = 3; // 10s -> 20s -> 40s const CONVERSATION_BUSY_MAX_RETRIES = 3; // 10s -> 20s -> 40s
const CONVERSATION_BUSY_RETRY_BASE_DELAY_MS = 10000; // 10 seconds
// Message shown when user interrupts the stream // Message shown when user interrupts the stream
const INTERRUPT_MESSAGE = const INTERRUPT_MESSAGE =
@@ -4072,9 +4072,10 @@ export default function App({
// Check for 409 "conversation busy" error - retry with exponential backoff // Check for 409 "conversation busy" error - retry with exponential backoff
if (preStreamAction === "retry_conversation_busy") { if (preStreamAction === "retry_conversation_busy") {
conversationBusyRetriesRef.current += 1; conversationBusyRetriesRef.current += 1;
const retryDelayMs = const retryDelayMs = getRetryDelayMs({
CONVERSATION_BUSY_RETRY_BASE_DELAY_MS * category: "conversation_busy",
2 ** (conversationBusyRetriesRef.current - 1); attempt: conversationBusyRetriesRef.current,
});
// Log the conversation-busy error // Log the conversation-busy error
telemetry.trackError( telemetry.trackError(
@@ -4142,7 +4143,12 @@ export default function App({
preStreamError.headers?.get("retry-after"), preStreamError.headers?.get("retry-after"),
) )
: null; : null;
const delayMs = retryAfterMs ?? 1000 * 2 ** (attempt - 1); const delayMs = getRetryDelayMs({
category: "transient_provider",
attempt,
detail: errorDetail,
retryAfterMs,
});
// Log the error that triggered the retry // Log the error that triggered the retry
telemetry.trackError( telemetry.trackError(
@@ -5348,7 +5354,10 @@ export default function App({
) { ) {
emptyResponseRetriesRef.current += 1; emptyResponseRetriesRef.current += 1;
const attempt = emptyResponseRetriesRef.current; const attempt = emptyResponseRetriesRef.current;
const delayMs = 500 * attempt; const delayMs = getRetryDelayMs({
category: "empty_response",
attempt,
});
// Only append a nudge on the last attempt // Only append a nudge on the last attempt
if (attempt >= EMPTY_RESPONSE_MAX_RETRIES) { if (attempt >= EMPTY_RESPONSE_MAX_RETRIES) {
@@ -5397,7 +5406,11 @@ export default function App({
) { ) {
llmApiErrorRetriesRef.current += 1; llmApiErrorRetriesRef.current += 1;
const attempt = llmApiErrorRetriesRef.current; const attempt = llmApiErrorRetriesRef.current;
const delayMs = 1000 * 2 ** (attempt - 1); // 1s, 2s, 4s const delayMs = getRetryDelayMs({
category: "transient_provider",
attempt,
detail: detailFromRun ?? fallbackError,
});
// Log the error that triggered the retry // Log the error that triggered the retry
telemetry.trackError( telemetry.trackError(

View File

@@ -12,6 +12,7 @@ import {
extractConflictDetail, extractConflictDetail,
fetchRunErrorDetail, fetchRunErrorDetail,
getPreStreamErrorAction, getPreStreamErrorAction,
getRetryDelayMs,
isApprovalPendingError, isApprovalPendingError,
isEmptyResponseRetryable, isEmptyResponseRetryable,
isInvalidToolCallIdsError, isInvalidToolCallIdsError,
@@ -133,7 +134,6 @@ const EMPTY_RESPONSE_MAX_RETRIES = 2;
// Retry config for 409 "conversation busy" errors (exponential backoff) // Retry config for 409 "conversation busy" errors (exponential backoff)
const CONVERSATION_BUSY_MAX_RETRIES = 3; // 10s -> 20s -> 40s const CONVERSATION_BUSY_MAX_RETRIES = 3; // 10s -> 20s -> 40s
const CONVERSATION_BUSY_RETRY_BASE_DELAY_MS = 10000; // 10 seconds
export type BidirectionalQueuedInput = QueuedTurnInput< export type BidirectionalQueuedInput = QueuedTurnInput<
MessageCreate["content"] MessageCreate["content"]
@@ -1544,9 +1544,10 @@ ${SYSTEM_REMINDER_CLOSE}
// Check for 409 "conversation busy" error - retry once with delay // Check for 409 "conversation busy" error - retry once with delay
if (preStreamAction === "retry_conversation_busy") { if (preStreamAction === "retry_conversation_busy") {
conversationBusyRetries += 1; conversationBusyRetries += 1;
const retryDelayMs = const retryDelayMs = getRetryDelayMs({
CONVERSATION_BUSY_RETRY_BASE_DELAY_MS * category: "conversation_busy",
2 ** (conversationBusyRetries - 1); attempt: conversationBusyRetries,
});
// Emit retry message for stream-json mode // Emit retry message for stream-json mode
if (outputFormat === "stream-json") { if (outputFormat === "stream-json") {
@@ -1579,7 +1580,12 @@ ${SYSTEM_REMINDER_CLOSE}
preStreamError.headers?.get("retry-after"), preStreamError.headers?.get("retry-after"),
) )
: null; : null;
const delayMs = retryAfterMs ?? 1000 * 2 ** (attempt - 1); const delayMs = getRetryDelayMs({
category: "transient_provider",
attempt,
detail: errorDetail,
retryAfterMs,
});
llmApiErrorRetries = attempt; llmApiErrorRetries = attempt;
@@ -1910,8 +1916,11 @@ ${SYSTEM_REMINDER_CLOSE}
if (stopReason === "llm_api_error") { if (stopReason === "llm_api_error") {
if (llmApiErrorRetries < LLM_API_ERROR_MAX_RETRIES) { if (llmApiErrorRetries < LLM_API_ERROR_MAX_RETRIES) {
const attempt = llmApiErrorRetries + 1; const attempt = llmApiErrorRetries + 1;
const baseDelayMs = 1000; const delayMs = getRetryDelayMs({
const delayMs = baseDelayMs * 2 ** (attempt - 1); category: "transient_provider",
attempt,
detail: detailFromRun,
});
llmApiErrorRetries = attempt; llmApiErrorRetries = attempt;
@@ -2038,7 +2047,10 @@ ${SYSTEM_REMINDER_CLOSE}
) )
) { ) {
const attempt = emptyResponseRetries + 1; const attempt = emptyResponseRetries + 1;
const delayMs = 500 * attempt; const delayMs = getRetryDelayMs({
category: "empty_response",
attempt,
});
emptyResponseRetries = attempt; emptyResponseRetries = attempt;
@@ -2075,8 +2087,11 @@ ${SYSTEM_REMINDER_CLOSE}
if (shouldRetryRunMetadataError(errorType, detail)) { if (shouldRetryRunMetadataError(errorType, detail)) {
const attempt = llmApiErrorRetries + 1; const attempt = llmApiErrorRetries + 1;
const baseDelayMs = 1000; const delayMs = getRetryDelayMs({
const delayMs = baseDelayMs * 2 ** (attempt - 1); category: "transient_provider",
attempt,
detail,
});
llmApiErrorRetries = attempt; llmApiErrorRetries = attempt;
@@ -3169,7 +3184,12 @@ async function runBidirectionalMode(
preStreamError.headers?.get("retry-after"), preStreamError.headers?.get("retry-after"),
) )
: null; : null;
const delayMs = retryAfterMs ?? 1000 * 2 ** (attempt - 1); const delayMs = getRetryDelayMs({
category: "transient_provider",
attempt,
detail: errorDetail,
retryAfterMs,
});
preStreamTransientRetries = attempt; preStreamTransientRetries = attempt;
const retryMsg: RetryMessage = { const retryMsg: RetryMessage = {

View File

@@ -3,6 +3,8 @@ import {
classifyPreStreamConflict, classifyPreStreamConflict,
extractConflictDetail, extractConflictDetail,
getPreStreamErrorAction, getPreStreamErrorAction,
getRetryDelayMs,
getTransientRetryDelayMs,
isApprovalPendingError, isApprovalPendingError,
isConversationBusyError, isConversationBusyError,
isEmptyResponseError, isEmptyResponseError,
@@ -284,6 +286,90 @@ describe("parseRetryAfterHeaderMs", () => {
}); });
}); });
describe("getRetryDelayMs", () => {
test("uses default transient backoff for non-Cloudflare details", () => {
expect(
getRetryDelayMs({
category: "transient_provider",
attempt: 1,
detail: "Connection error during streaming",
}),
).toBe(1000);
expect(
getRetryDelayMs({
category: "transient_provider",
attempt: 2,
detail: "Connection error during streaming",
}),
).toBe(2000);
});
test("uses larger transient base for Cloudflare edge 52x details", () => {
const detail =
"521 <!DOCTYPE html><html><head><title>api.letta.com | 521: Web server is down</title></head><body>Cloudflare Ray ID: 9d431b5f6f656c08</body></html>";
expect(
getRetryDelayMs({
category: "transient_provider",
attempt: 1,
detail,
}),
).toBe(5000);
expect(
getRetryDelayMs({
category: "transient_provider",
attempt: 3,
detail,
}),
).toBe(20000);
});
test("uses Retry-After delay when provided for transient retries", () => {
const detail =
"521 <!DOCTYPE html><html><head><title>api.letta.com | 521: Web server is down</title></head><body>Cloudflare Ray ID: 9d431b5f6f656c08</body></html>";
expect(
getRetryDelayMs({
category: "transient_provider",
attempt: 3,
detail,
retryAfterMs: 7000,
}),
).toBe(7000);
});
test("uses exponential conversation_busy profile", () => {
expect(getRetryDelayMs({ category: "conversation_busy", attempt: 1 })).toBe(
10000,
);
expect(getRetryDelayMs({ category: "conversation_busy", attempt: 2 })).toBe(
20000,
);
});
test("uses linear empty_response profile", () => {
expect(getRetryDelayMs({ category: "empty_response", attempt: 1 })).toBe(
500,
);
expect(getRetryDelayMs({ category: "empty_response", attempt: 2 })).toBe(
1000,
);
});
});
describe("getTransientRetryDelayMs", () => {
test("matches transient_provider category behavior", () => {
const detail = "Connection error during streaming";
expect(getTransientRetryDelayMs({ attempt: 2, detail })).toBe(
getRetryDelayMs({
category: "transient_provider",
attempt: 2,
detail,
}),
);
});
});
// ── Error text extraction ─────────────────────────────────────────── // ── Error text extraction ───────────────────────────────────────────
describe("extractConflictDetail", () => { describe("extractConflictDetail", () => {

View File

@@ -23,6 +23,7 @@ import { getStreamToolContextId, sendMessageStream } from "../agent/message";
import { import {
extractConflictDetail, extractConflictDetail,
getPreStreamErrorAction, getPreStreamErrorAction,
getRetryDelayMs,
isApprovalPendingError, isApprovalPendingError,
isInvalidToolCallIdsError, isInvalidToolCallIdsError,
parseRetryAfterHeaderMs, parseRetryAfterHeaderMs,
@@ -1536,7 +1537,7 @@ async function sendMessageStreamWithRetry(
let transientRetries = 0; let transientRetries = 0;
let conversationBusyRetries = 0; let conversationBusyRetries = 0;
let preStreamRecoveryAttempts = 0; let preStreamRecoveryAttempts = 0;
const MAX_CONVERSATION_BUSY_RETRIES = 1; const MAX_CONVERSATION_BUSY_RETRIES = 3;
// eslint-disable-next-line no-constant-condition // eslint-disable-next-line no-constant-condition
while (true) { while (true) {
@@ -1609,7 +1610,12 @@ async function sendMessageStreamWithRetry(
preStreamError.headers?.get("retry-after"), preStreamError.headers?.get("retry-after"),
) )
: null; : null;
const delayMs = retryAfterMs ?? 1000 * 2 ** (attempt - 1); const delayMs = getRetryDelayMs({
category: "transient_provider",
attempt,
detail: errorDetail,
retryAfterMs,
});
transientRetries = attempt; transientRetries = attempt;
emitToWS(socket, { emitToWS(socket, {
@@ -1631,7 +1637,10 @@ async function sendMessageStreamWithRetry(
if (action === "retry_conversation_busy") { if (action === "retry_conversation_busy") {
const attempt = conversationBusyRetries + 1; const attempt = conversationBusyRetries + 1;
const delayMs = 2500; const delayMs = getRetryDelayMs({
category: "conversation_busy",
attempt,
});
conversationBusyRetries = attempt; conversationBusyRetries = attempt;
emitToWS(socket, { emitToWS(socket, {