From a58dddaf3d587d03ab3fc58dc929aee52b2bd4cb Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Fri, 12 Dec 2025 09:51:49 -0800 Subject: [PATCH] feat: Add retries for stop_reason `error` with metadata (#191) --- src/headless.ts | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/src/headless.ts b/src/headless.ts index 63f4273..4df8a12 100644 --- a/src/headless.ts +++ b/src/headless.ts @@ -830,6 +830,55 @@ export async function handleHeadlessCommand( } // Unexpected stop reason (error, llm_api_error, etc.) + // Before failing, check run metadata to see if this is a retriable llm_api_error + if ( + stopReason === "error" && + lastRunId && + llmApiErrorRetries < LLM_API_ERROR_MAX_RETRIES + ) { + try { + const run = await client.runs.retrieve(lastRunId); + const metaError = run.metadata?.error as + | { + type?: string; + message?: string; + detail?: string; + } + | undefined; + + if (metaError?.type === "llm_api_error") { + const attempt = llmApiErrorRetries + 1; + const baseDelayMs = 1000; + const delayMs = baseDelayMs * 2 ** (attempt - 1); + + llmApiErrorRetries = attempt; + + if (outputFormat === "stream-json") { + console.log( + JSON.stringify({ + type: "retry", + reason: "llm_api_error", + attempt, + max_attempts: LLM_API_ERROR_MAX_RETRIES, + delay_ms: delayMs, + run_id: lastRunId, + }), + ); + } else { + const delaySeconds = Math.round(delayMs / 1000); + console.error( + `LLM API error encountered (attempt ${attempt} of ${LLM_API_ERROR_MAX_RETRIES}), retrying in ${delaySeconds}s...`, + ); + } + + await new Promise((resolve) => setTimeout(resolve, delayMs)); + continue; + } + } catch (_e) { + // If we can't fetch run metadata, fall through to normal error handling + } + } + // Mark incomplete tool calls as cancelled to prevent stuck state markIncompleteToolsAsCancelled(buffers);