fix(retry): increase Cloudflare transient backoff to 5/10/20 (#1307)

This commit is contained in:
jnjpng
2026-03-09 14:51:41 -07:00
committed by GitHub
parent 28039dcb43
commit a57cf84e03
6 changed files with 207 additions and 21 deletions

View File

@@ -12,6 +12,7 @@ import {
extractConflictDetail,
fetchRunErrorDetail,
getPreStreamErrorAction,
getRetryDelayMs,
isApprovalPendingError,
isEmptyResponseRetryable,
isInvalidToolCallIdsError,
@@ -133,7 +134,6 @@ const EMPTY_RESPONSE_MAX_RETRIES = 2;
// Retry config for 409 "conversation busy" errors (exponential backoff)
const CONVERSATION_BUSY_MAX_RETRIES = 3; // 10s -> 20s -> 40s
const CONVERSATION_BUSY_RETRY_BASE_DELAY_MS = 10000; // 10 seconds
export type BidirectionalQueuedInput = QueuedTurnInput<
MessageCreate["content"]
@@ -1544,9 +1544,10 @@ ${SYSTEM_REMINDER_CLOSE}
// Check for 409 "conversation busy" error - retry once with delay
if (preStreamAction === "retry_conversation_busy") {
conversationBusyRetries += 1;
const retryDelayMs =
CONVERSATION_BUSY_RETRY_BASE_DELAY_MS *
2 ** (conversationBusyRetries - 1);
const retryDelayMs = getRetryDelayMs({
category: "conversation_busy",
attempt: conversationBusyRetries,
});
// Emit retry message for stream-json mode
if (outputFormat === "stream-json") {
@@ -1579,7 +1580,12 @@ ${SYSTEM_REMINDER_CLOSE}
preStreamError.headers?.get("retry-after"),
)
: null;
const delayMs = retryAfterMs ?? 1000 * 2 ** (attempt - 1);
const delayMs = getRetryDelayMs({
category: "transient_provider",
attempt,
detail: errorDetail,
retryAfterMs,
});
llmApiErrorRetries = attempt;
@@ -1910,8 +1916,11 @@ ${SYSTEM_REMINDER_CLOSE}
if (stopReason === "llm_api_error") {
if (llmApiErrorRetries < LLM_API_ERROR_MAX_RETRIES) {
const attempt = llmApiErrorRetries + 1;
const baseDelayMs = 1000;
const delayMs = baseDelayMs * 2 ** (attempt - 1);
const delayMs = getRetryDelayMs({
category: "transient_provider",
attempt,
detail: detailFromRun,
});
llmApiErrorRetries = attempt;
@@ -2038,7 +2047,10 @@ ${SYSTEM_REMINDER_CLOSE}
)
) {
const attempt = emptyResponseRetries + 1;
const delayMs = 500 * attempt;
const delayMs = getRetryDelayMs({
category: "empty_response",
attempt,
});
emptyResponseRetries = attempt;
@@ -2075,8 +2087,11 @@ ${SYSTEM_REMINDER_CLOSE}
if (shouldRetryRunMetadataError(errorType, detail)) {
const attempt = llmApiErrorRetries + 1;
const baseDelayMs = 1000;
const delayMs = baseDelayMs * 2 ** (attempt - 1);
const delayMs = getRetryDelayMs({
category: "transient_provider",
attempt,
detail,
});
llmApiErrorRetries = attempt;
@@ -3169,7 +3184,12 @@ async function runBidirectionalMode(
preStreamError.headers?.get("retry-after"),
)
: null;
const delayMs = retryAfterMs ?? 1000 * 2 ** (attempt - 1);
const delayMs = getRetryDelayMs({
category: "transient_provider",
attempt,
detail: errorDetail,
retryAfterMs,
});
preStreamTransientRetries = attempt;
const retryMsg: RetryMessage = {