fix(retry): increase Cloudflare transient backoff to 5/10/20 (#1307)
This commit is contained in:
@@ -13,12 +13,15 @@ export type {
|
||||
PreStreamConflictKind,
|
||||
PreStreamErrorAction,
|
||||
PreStreamErrorOptions,
|
||||
RetryDelayCategory,
|
||||
} from "./turn-recovery-policy";
|
||||
// ── Re-export pure policy helpers (single source of truth) ──────────
|
||||
export {
|
||||
classifyPreStreamConflict,
|
||||
extractConflictDetail,
|
||||
getPreStreamErrorAction,
|
||||
getRetryDelayMs,
|
||||
getTransientRetryDelayMs,
|
||||
isApprovalPendingError,
|
||||
isConversationBusyError,
|
||||
isEmptyResponseError,
|
||||
|
||||
@@ -65,6 +65,11 @@ const NON_RETRYABLE_QUOTA_DETAIL_PATTERNS = [
|
||||
];
|
||||
const NON_RETRYABLE_4XX_PATTERN = /Error code:\s*4(0[0-8]|1\d|2\d|3\d|4\d|51)/i;
|
||||
const RETRYABLE_429_PATTERN = /Error code:\s*429|rate limit|too many requests/i;
|
||||
const DEFAULT_TRANSIENT_RETRY_BASE_DELAY_MS = 1000;
|
||||
const CLOUDFLARE_EDGE_52X_RETRY_BASE_DELAY_MS = 5000;
|
||||
const CONVERSATION_BUSY_RETRY_BASE_DELAY_MS = 10000;
|
||||
const EMPTY_RESPONSE_RETRY_BASE_DELAY_MS = 500;
|
||||
|
||||
function isCloudflareEdge52xDetail(detail: unknown): boolean {
|
||||
if (typeof detail !== "string") return false;
|
||||
return isCloudflareEdge52xHtmlError(detail);
|
||||
@@ -206,6 +211,56 @@ export function parseRetryAfterHeaderMs(
|
||||
return delayMs > 0 ? delayMs : 0;
|
||||
}
|
||||
|
||||
export type RetryDelayCategory =
|
||||
| "transient_provider"
|
||||
| "conversation_busy"
|
||||
| "empty_response";
|
||||
|
||||
/**
|
||||
* Compute retry delay for known retry classes.
|
||||
* - `transient_provider`: exponential (Cloudflare-specific base) with Retry-After override
|
||||
* - `conversation_busy`: exponential
|
||||
* - `empty_response`: linear
|
||||
*/
|
||||
export function getRetryDelayMs(opts: {
|
||||
category: RetryDelayCategory;
|
||||
attempt: number;
|
||||
detail?: unknown;
|
||||
retryAfterMs?: number | null;
|
||||
}): number {
|
||||
const { category, attempt, detail, retryAfterMs = null } = opts;
|
||||
|
||||
if (category === "transient_provider") {
|
||||
if (retryAfterMs !== null) return retryAfterMs;
|
||||
const baseDelayMs = isCloudflareEdge52xDetail(detail)
|
||||
? CLOUDFLARE_EDGE_52X_RETRY_BASE_DELAY_MS
|
||||
: DEFAULT_TRANSIENT_RETRY_BASE_DELAY_MS;
|
||||
return baseDelayMs * 2 ** (attempt - 1);
|
||||
}
|
||||
|
||||
if (category === "conversation_busy") {
|
||||
return CONVERSATION_BUSY_RETRY_BASE_DELAY_MS * 2 ** (attempt - 1);
|
||||
}
|
||||
|
||||
return EMPTY_RESPONSE_RETRY_BASE_DELAY_MS * attempt;
|
||||
}
|
||||
|
||||
/**
|
||||
* Backward-compatible wrapper for transient provider retries.
|
||||
*/
|
||||
export function getTransientRetryDelayMs(opts: {
|
||||
attempt: number;
|
||||
detail: unknown;
|
||||
retryAfterMs?: number | null;
|
||||
}): number {
|
||||
return getRetryDelayMs({
|
||||
category: "transient_provider",
|
||||
attempt: opts.attempt,
|
||||
detail: opts.detail,
|
||||
retryAfterMs: opts.retryAfterMs,
|
||||
});
|
||||
}
|
||||
|
||||
// ── Pre-stream conflict routing ─────────────────────────────────────
|
||||
|
||||
export type PreStreamConflictKind =
|
||||
|
||||
@@ -32,6 +32,7 @@ import {
|
||||
extractConflictDetail,
|
||||
fetchRunErrorDetail,
|
||||
getPreStreamErrorAction,
|
||||
getRetryDelayMs,
|
||||
isApprovalPendingError,
|
||||
isEmptyResponseRetryable,
|
||||
isInvalidToolCallIdsError,
|
||||
@@ -331,7 +332,6 @@ const EMPTY_RESPONSE_MAX_RETRIES = 2;
|
||||
|
||||
// Retry config for 409 "conversation busy" errors (exponential backoff)
|
||||
const CONVERSATION_BUSY_MAX_RETRIES = 3; // 10s -> 20s -> 40s
|
||||
const CONVERSATION_BUSY_RETRY_BASE_DELAY_MS = 10000; // 10 seconds
|
||||
|
||||
// Message shown when user interrupts the stream
|
||||
const INTERRUPT_MESSAGE =
|
||||
@@ -4072,9 +4072,10 @@ export default function App({
|
||||
// Check for 409 "conversation busy" error - retry with exponential backoff
|
||||
if (preStreamAction === "retry_conversation_busy") {
|
||||
conversationBusyRetriesRef.current += 1;
|
||||
const retryDelayMs =
|
||||
CONVERSATION_BUSY_RETRY_BASE_DELAY_MS *
|
||||
2 ** (conversationBusyRetriesRef.current - 1);
|
||||
const retryDelayMs = getRetryDelayMs({
|
||||
category: "conversation_busy",
|
||||
attempt: conversationBusyRetriesRef.current,
|
||||
});
|
||||
|
||||
// Log the conversation-busy error
|
||||
telemetry.trackError(
|
||||
@@ -4142,7 +4143,12 @@ export default function App({
|
||||
preStreamError.headers?.get("retry-after"),
|
||||
)
|
||||
: null;
|
||||
const delayMs = retryAfterMs ?? 1000 * 2 ** (attempt - 1);
|
||||
const delayMs = getRetryDelayMs({
|
||||
category: "transient_provider",
|
||||
attempt,
|
||||
detail: errorDetail,
|
||||
retryAfterMs,
|
||||
});
|
||||
|
||||
// Log the error that triggered the retry
|
||||
telemetry.trackError(
|
||||
@@ -5348,7 +5354,10 @@ export default function App({
|
||||
) {
|
||||
emptyResponseRetriesRef.current += 1;
|
||||
const attempt = emptyResponseRetriesRef.current;
|
||||
const delayMs = 500 * attempt;
|
||||
const delayMs = getRetryDelayMs({
|
||||
category: "empty_response",
|
||||
attempt,
|
||||
});
|
||||
|
||||
// Only append a nudge on the last attempt
|
||||
if (attempt >= EMPTY_RESPONSE_MAX_RETRIES) {
|
||||
@@ -5397,7 +5406,11 @@ export default function App({
|
||||
) {
|
||||
llmApiErrorRetriesRef.current += 1;
|
||||
const attempt = llmApiErrorRetriesRef.current;
|
||||
const delayMs = 1000 * 2 ** (attempt - 1); // 1s, 2s, 4s
|
||||
const delayMs = getRetryDelayMs({
|
||||
category: "transient_provider",
|
||||
attempt,
|
||||
detail: detailFromRun ?? fallbackError,
|
||||
});
|
||||
|
||||
// Log the error that triggered the retry
|
||||
telemetry.trackError(
|
||||
|
||||
@@ -12,6 +12,7 @@ import {
|
||||
extractConflictDetail,
|
||||
fetchRunErrorDetail,
|
||||
getPreStreamErrorAction,
|
||||
getRetryDelayMs,
|
||||
isApprovalPendingError,
|
||||
isEmptyResponseRetryable,
|
||||
isInvalidToolCallIdsError,
|
||||
@@ -133,7 +134,6 @@ const EMPTY_RESPONSE_MAX_RETRIES = 2;
|
||||
|
||||
// Retry config for 409 "conversation busy" errors (exponential backoff)
|
||||
const CONVERSATION_BUSY_MAX_RETRIES = 3; // 10s -> 20s -> 40s
|
||||
const CONVERSATION_BUSY_RETRY_BASE_DELAY_MS = 10000; // 10 seconds
|
||||
|
||||
export type BidirectionalQueuedInput = QueuedTurnInput<
|
||||
MessageCreate["content"]
|
||||
@@ -1544,9 +1544,10 @@ ${SYSTEM_REMINDER_CLOSE}
|
||||
// Check for 409 "conversation busy" error - retry once with delay
|
||||
if (preStreamAction === "retry_conversation_busy") {
|
||||
conversationBusyRetries += 1;
|
||||
const retryDelayMs =
|
||||
CONVERSATION_BUSY_RETRY_BASE_DELAY_MS *
|
||||
2 ** (conversationBusyRetries - 1);
|
||||
const retryDelayMs = getRetryDelayMs({
|
||||
category: "conversation_busy",
|
||||
attempt: conversationBusyRetries,
|
||||
});
|
||||
|
||||
// Emit retry message for stream-json mode
|
||||
if (outputFormat === "stream-json") {
|
||||
@@ -1579,7 +1580,12 @@ ${SYSTEM_REMINDER_CLOSE}
|
||||
preStreamError.headers?.get("retry-after"),
|
||||
)
|
||||
: null;
|
||||
const delayMs = retryAfterMs ?? 1000 * 2 ** (attempt - 1);
|
||||
const delayMs = getRetryDelayMs({
|
||||
category: "transient_provider",
|
||||
attempt,
|
||||
detail: errorDetail,
|
||||
retryAfterMs,
|
||||
});
|
||||
|
||||
llmApiErrorRetries = attempt;
|
||||
|
||||
@@ -1910,8 +1916,11 @@ ${SYSTEM_REMINDER_CLOSE}
|
||||
if (stopReason === "llm_api_error") {
|
||||
if (llmApiErrorRetries < LLM_API_ERROR_MAX_RETRIES) {
|
||||
const attempt = llmApiErrorRetries + 1;
|
||||
const baseDelayMs = 1000;
|
||||
const delayMs = baseDelayMs * 2 ** (attempt - 1);
|
||||
const delayMs = getRetryDelayMs({
|
||||
category: "transient_provider",
|
||||
attempt,
|
||||
detail: detailFromRun,
|
||||
});
|
||||
|
||||
llmApiErrorRetries = attempt;
|
||||
|
||||
@@ -2038,7 +2047,10 @@ ${SYSTEM_REMINDER_CLOSE}
|
||||
)
|
||||
) {
|
||||
const attempt = emptyResponseRetries + 1;
|
||||
const delayMs = 500 * attempt;
|
||||
const delayMs = getRetryDelayMs({
|
||||
category: "empty_response",
|
||||
attempt,
|
||||
});
|
||||
|
||||
emptyResponseRetries = attempt;
|
||||
|
||||
@@ -2075,8 +2087,11 @@ ${SYSTEM_REMINDER_CLOSE}
|
||||
|
||||
if (shouldRetryRunMetadataError(errorType, detail)) {
|
||||
const attempt = llmApiErrorRetries + 1;
|
||||
const baseDelayMs = 1000;
|
||||
const delayMs = baseDelayMs * 2 ** (attempt - 1);
|
||||
const delayMs = getRetryDelayMs({
|
||||
category: "transient_provider",
|
||||
attempt,
|
||||
detail,
|
||||
});
|
||||
|
||||
llmApiErrorRetries = attempt;
|
||||
|
||||
@@ -3169,7 +3184,12 @@ async function runBidirectionalMode(
|
||||
preStreamError.headers?.get("retry-after"),
|
||||
)
|
||||
: null;
|
||||
const delayMs = retryAfterMs ?? 1000 * 2 ** (attempt - 1);
|
||||
const delayMs = getRetryDelayMs({
|
||||
category: "transient_provider",
|
||||
attempt,
|
||||
detail: errorDetail,
|
||||
retryAfterMs,
|
||||
});
|
||||
preStreamTransientRetries = attempt;
|
||||
|
||||
const retryMsg: RetryMessage = {
|
||||
|
||||
@@ -3,6 +3,8 @@ import {
|
||||
classifyPreStreamConflict,
|
||||
extractConflictDetail,
|
||||
getPreStreamErrorAction,
|
||||
getRetryDelayMs,
|
||||
getTransientRetryDelayMs,
|
||||
isApprovalPendingError,
|
||||
isConversationBusyError,
|
||||
isEmptyResponseError,
|
||||
@@ -284,6 +286,90 @@ describe("parseRetryAfterHeaderMs", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("getRetryDelayMs", () => {
|
||||
test("uses default transient backoff for non-Cloudflare details", () => {
|
||||
expect(
|
||||
getRetryDelayMs({
|
||||
category: "transient_provider",
|
||||
attempt: 1,
|
||||
detail: "Connection error during streaming",
|
||||
}),
|
||||
).toBe(1000);
|
||||
expect(
|
||||
getRetryDelayMs({
|
||||
category: "transient_provider",
|
||||
attempt: 2,
|
||||
detail: "Connection error during streaming",
|
||||
}),
|
||||
).toBe(2000);
|
||||
});
|
||||
|
||||
test("uses larger transient base for Cloudflare edge 52x details", () => {
|
||||
const detail =
|
||||
"521 <!DOCTYPE html><html><head><title>api.letta.com | 521: Web server is down</title></head><body>Cloudflare Ray ID: 9d431b5f6f656c08</body></html>";
|
||||
|
||||
expect(
|
||||
getRetryDelayMs({
|
||||
category: "transient_provider",
|
||||
attempt: 1,
|
||||
detail,
|
||||
}),
|
||||
).toBe(5000);
|
||||
expect(
|
||||
getRetryDelayMs({
|
||||
category: "transient_provider",
|
||||
attempt: 3,
|
||||
detail,
|
||||
}),
|
||||
).toBe(20000);
|
||||
});
|
||||
|
||||
test("uses Retry-After delay when provided for transient retries", () => {
|
||||
const detail =
|
||||
"521 <!DOCTYPE html><html><head><title>api.letta.com | 521: Web server is down</title></head><body>Cloudflare Ray ID: 9d431b5f6f656c08</body></html>";
|
||||
|
||||
expect(
|
||||
getRetryDelayMs({
|
||||
category: "transient_provider",
|
||||
attempt: 3,
|
||||
detail,
|
||||
retryAfterMs: 7000,
|
||||
}),
|
||||
).toBe(7000);
|
||||
});
|
||||
|
||||
test("uses exponential conversation_busy profile", () => {
|
||||
expect(getRetryDelayMs({ category: "conversation_busy", attempt: 1 })).toBe(
|
||||
10000,
|
||||
);
|
||||
expect(getRetryDelayMs({ category: "conversation_busy", attempt: 2 })).toBe(
|
||||
20000,
|
||||
);
|
||||
});
|
||||
|
||||
test("uses linear empty_response profile", () => {
|
||||
expect(getRetryDelayMs({ category: "empty_response", attempt: 1 })).toBe(
|
||||
500,
|
||||
);
|
||||
expect(getRetryDelayMs({ category: "empty_response", attempt: 2 })).toBe(
|
||||
1000,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("getTransientRetryDelayMs", () => {
|
||||
test("matches transient_provider category behavior", () => {
|
||||
const detail = "Connection error during streaming";
|
||||
expect(getTransientRetryDelayMs({ attempt: 2, detail })).toBe(
|
||||
getRetryDelayMs({
|
||||
category: "transient_provider",
|
||||
attempt: 2,
|
||||
detail,
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
// ── Error text extraction ───────────────────────────────────────────
|
||||
|
||||
describe("extractConflictDetail", () => {
|
||||
|
||||
@@ -23,6 +23,7 @@ import { getStreamToolContextId, sendMessageStream } from "../agent/message";
|
||||
import {
|
||||
extractConflictDetail,
|
||||
getPreStreamErrorAction,
|
||||
getRetryDelayMs,
|
||||
isApprovalPendingError,
|
||||
isInvalidToolCallIdsError,
|
||||
parseRetryAfterHeaderMs,
|
||||
@@ -1536,7 +1537,7 @@ async function sendMessageStreamWithRetry(
|
||||
let transientRetries = 0;
|
||||
let conversationBusyRetries = 0;
|
||||
let preStreamRecoveryAttempts = 0;
|
||||
const MAX_CONVERSATION_BUSY_RETRIES = 1;
|
||||
const MAX_CONVERSATION_BUSY_RETRIES = 3;
|
||||
|
||||
// eslint-disable-next-line no-constant-condition
|
||||
while (true) {
|
||||
@@ -1609,7 +1610,12 @@ async function sendMessageStreamWithRetry(
|
||||
preStreamError.headers?.get("retry-after"),
|
||||
)
|
||||
: null;
|
||||
const delayMs = retryAfterMs ?? 1000 * 2 ** (attempt - 1);
|
||||
const delayMs = getRetryDelayMs({
|
||||
category: "transient_provider",
|
||||
attempt,
|
||||
detail: errorDetail,
|
||||
retryAfterMs,
|
||||
});
|
||||
transientRetries = attempt;
|
||||
|
||||
emitToWS(socket, {
|
||||
@@ -1631,7 +1637,10 @@ async function sendMessageStreamWithRetry(
|
||||
|
||||
if (action === "retry_conversation_busy") {
|
||||
const attempt = conversationBusyRetries + 1;
|
||||
const delayMs = 2500;
|
||||
const delayMs = getRetryDelayMs({
|
||||
category: "conversation_busy",
|
||||
attempt,
|
||||
});
|
||||
conversationBusyRetries = attempt;
|
||||
|
||||
emitToWS(socket, {
|
||||
|
||||
Reference in New Issue
Block a user