feat: Add retries for stop_reason error with metadata (#191)
This commit is contained in:
@@ -830,6 +830,55 @@ export async function handleHeadlessCommand(
|
||||
}
|
||||
|
||||
// Unexpected stop reason (error, llm_api_error, etc.)
|
||||
// Before failing, check run metadata to see if this is a retriable llm_api_error
|
||||
if (
|
||||
stopReason === "error" &&
|
||||
lastRunId &&
|
||||
llmApiErrorRetries < LLM_API_ERROR_MAX_RETRIES
|
||||
) {
|
||||
try {
|
||||
const run = await client.runs.retrieve(lastRunId);
|
||||
const metaError = run.metadata?.error as
|
||||
| {
|
||||
type?: string;
|
||||
message?: string;
|
||||
detail?: string;
|
||||
}
|
||||
| undefined;
|
||||
|
||||
if (metaError?.type === "llm_api_error") {
|
||||
const attempt = llmApiErrorRetries + 1;
|
||||
const baseDelayMs = 1000;
|
||||
const delayMs = baseDelayMs * 2 ** (attempt - 1);
|
||||
|
||||
llmApiErrorRetries = attempt;
|
||||
|
||||
if (outputFormat === "stream-json") {
|
||||
console.log(
|
||||
JSON.stringify({
|
||||
type: "retry",
|
||||
reason: "llm_api_error",
|
||||
attempt,
|
||||
max_attempts: LLM_API_ERROR_MAX_RETRIES,
|
||||
delay_ms: delayMs,
|
||||
run_id: lastRunId,
|
||||
}),
|
||||
);
|
||||
} else {
|
||||
const delaySeconds = Math.round(delayMs / 1000);
|
||||
console.error(
|
||||
`LLM API error encountered (attempt ${attempt} of ${LLM_API_ERROR_MAX_RETRIES}), retrying in ${delaySeconds}s...`,
|
||||
);
|
||||
}
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
||||
continue;
|
||||
}
|
||||
} catch (_e) {
|
||||
// If we can't fetch run metadata, fall through to normal error handling
|
||||
}
|
||||
}
|
||||
|
||||
// Mark incomplete tool calls as cancelled to prevent stuck state
|
||||
markIncompleteToolsAsCancelled(buffers);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user