feat: Add retries for stop_reason error with metadata (#191)

This commit is contained in:
Kevin Lin
2025-12-12 09:51:49 -08:00
committed by GitHub
parent 97e6b7e2a7
commit a58dddaf3d

View File

@@ -830,6 +830,55 @@ export async function handleHeadlessCommand(
}
// Unexpected stop reason (error, llm_api_error, etc.)
// Before failing, check run metadata to see if this is a retriable llm_api_error
if (
stopReason === "error" &&
lastRunId &&
llmApiErrorRetries < LLM_API_ERROR_MAX_RETRIES
) {
try {
const run = await client.runs.retrieve(lastRunId);
const metaError = run.metadata?.error as
| {
type?: string;
message?: string;
detail?: string;
}
| undefined;
if (metaError?.type === "llm_api_error") {
const attempt = llmApiErrorRetries + 1;
const baseDelayMs = 1000;
const delayMs = baseDelayMs * 2 ** (attempt - 1);
llmApiErrorRetries = attempt;
if (outputFormat === "stream-json") {
console.log(
JSON.stringify({
type: "retry",
reason: "llm_api_error",
attempt,
max_attempts: LLM_API_ERROR_MAX_RETRIES,
delay_ms: delayMs,
run_id: lastRunId,
}),
);
} else {
const delaySeconds = Math.round(delayMs / 1000);
console.error(
`LLM API error encountered (attempt ${attempt} of ${LLM_API_ERROR_MAX_RETRIES}), retrying in ${delaySeconds}s...`,
);
}
await new Promise((resolve) => setTimeout(resolve, delayMs));
continue;
}
} catch (_e) {
// If we can't fetch run metadata, fall through to normal error handling
}
}
// Mark incomplete tool calls as cancelled to prevent stuck state
markIncompleteToolsAsCancelled(buffers);