fix: expand chatgpt oauth retry classification (#980)

This commit is contained in:
Charles Packer
2026-02-16 14:36:15 -08:00
committed by GitHub
parent f2f59e4591
commit 5435f44c45
10 changed files with 430 additions and 49 deletions

View File

@@ -34,8 +34,10 @@ import {
getPreStreamErrorAction,
isApprovalPendingError,
isInvalidToolCallIdsError,
parseRetryAfterHeaderMs,
rebuildInputWithFreshDenials,
shouldAttemptApprovalRecovery,
shouldRetryRunMetadataError,
} from "../agent/approval-recovery";
import { prefetchAvailableModelHandles } from "../agent/available-models";
import { getResumeData } from "../agent/check-approval";
@@ -482,29 +484,7 @@ async function isRetriableError(
const errorType = metaError?.error_type ?? metaError?.error?.error_type;
const detail = metaError?.detail ?? metaError?.error?.detail ?? "";
// Don't retry 4xx client errors (validation, auth, malformed requests)
// These are not transient and won't succeed on retry
const is4xxError = /Error code: 4\d{2}/.test(detail);
if (errorType === "llm_error" && !is4xxError) return true;
// Fallback: detect LLM provider errors from detail even if misclassified
// This handles edge cases where streaming errors weren't properly converted to LLMError
// Patterns are derived from handle_llm_error() message formats in the backend
const llmProviderPatterns = [
"Anthropic API error", // anthropic_client.py:759
"OpenAI API error", // openai_client.py:1034
"ChatGPT API error", // chatgpt_oauth_client.py - upstream connect errors
"Google Vertex API error", // google_vertex_client.py:848
"overloaded", // anthropic_client.py:753 - used for LLMProviderOverloaded
"api_error", // Anthropic SDK error type field
"Network error", // Transient network failures during streaming
"Connection error during", // Peer disconnections, incomplete chunked reads (Anthropic, ChatGPT streaming)
];
if (
llmProviderPatterns.some((pattern) => detail.includes(pattern)) &&
!is4xxError
) {
if (shouldRetryRunMetadataError(errorType, detail)) {
return true;
}
@@ -3156,6 +3136,14 @@ export default function App({
errorDetail,
conversationBusyRetriesRef.current,
CONVERSATION_BUSY_MAX_RETRIES,
{
status:
preStreamError instanceof APIError
? preStreamError.status
: undefined,
transientRetries: llmApiErrorRetriesRef.current,
maxTransientRetries: LLM_API_ERROR_MAX_RETRIES,
},
);
// Resolve stale approval conflict: fetch real pending approvals, auto-deny, retry.
@@ -3238,6 +3226,54 @@ export default function App({
// User pressed ESC - fall through to error handling
}
// Retry pre-stream transient errors (429/5xx/network) with shared LLM retry budget
if (preStreamAction === "retry_transient") {
llmApiErrorRetriesRef.current += 1;
const attempt = llmApiErrorRetriesRef.current;
const retryAfterMs =
preStreamError instanceof APIError
? parseRetryAfterHeaderMs(
preStreamError.headers?.get("retry-after"),
)
: null;
const delayMs = retryAfterMs ?? 1000 * 2 ** (attempt - 1);
const statusId = uid("status");
buffersRef.current.byId.set(statusId, {
kind: "status",
id: statusId,
lines: [getRetryStatusMessage(errorDetail)],
});
buffersRef.current.order.push(statusId);
refreshDerived();
let cancelled = false;
const startTime = Date.now();
while (Date.now() - startTime < delayMs) {
if (
abortControllerRef.current?.signal.aborted ||
userCancelledRef.current
) {
cancelled = true;
break;
}
await new Promise((resolve) => setTimeout(resolve, 100));
}
buffersRef.current.byId.delete(statusId);
buffersRef.current.order = buffersRef.current.order.filter(
(id) => id !== statusId,
);
refreshDerived();
if (!cancelled) {
buffersRef.current.interrupted = false;
conversationBusyRetriesRef.current = 0;
continue;
}
// User pressed ESC - fall through to error handling
}
// Reset conversation busy retry counter on non-busy error
conversationBusyRetriesRef.current = 0;

View File

@@ -448,6 +448,22 @@ export function getRetryStatusMessage(
if (errorDetail.includes("Anthropic API is overloaded"))
return "Anthropic API is overloaded, retrying...";
if (
errorDetail.includes("ChatGPT API error") ||
errorDetail.includes("ChatGPT server error") ||
errorDetail.includes("upstream connect error")
) {
return "OpenAI ChatGPT backend connection failed, retrying...";
}
if (
errorDetail.includes("Connection error during streaming") ||
errorDetail.includes("incomplete chunked read") ||
errorDetail.includes("connection termination")
) {
return "OpenAI ChatGPT streaming connection dropped, retrying...";
}
if (errorDetail.includes("OpenAI API error"))
return "OpenAI API error, retrying...";
return DEFAULT_RETRY_MESSAGE;
}

View File

@@ -226,8 +226,9 @@ export async function drainStream(
fallbackError = errorMessage;
}
// Set error stop reason so drainStreamWithResume can try to reconnect
stopReason = "error";
// Preserve a stop reason already parsed from stream chunks (e.g. llm_api_error)
// and only fall back to generic "error" when none is available.
stopReason = streamProcessor.stopReason || "error";
markIncompleteToolsAsCancelled(buffers, true, "stream_error");
queueMicrotask(refresh);
} finally {