feat: Exponential backoff for llm_api_error (#181)
This commit is contained in:
@@ -24,6 +24,12 @@ import { drainStreamWithResume } from "./cli/helpers/stream";
|
||||
import { settingsManager } from "./settings-manager";
|
||||
import { checkToolPermission } from "./tools/manager";
|
||||
|
||||
// Maximum number of times to retry a turn when the backend
|
||||
// reports an `llm_api_error` stop reason. This helps smooth
|
||||
// over transient LLM/backend issues without requiring the
|
||||
// caller to manually resubmit the prompt.
|
||||
const LLM_API_ERROR_MAX_RETRIES = 3;
|
||||
|
||||
export async function handleHeadlessCommand(
|
||||
argv: string[],
|
||||
model?: string,
|
||||
@@ -439,6 +445,7 @@ export async function handleHeadlessCommand(
|
||||
|
||||
// Track lastRunId outside the while loop so it's available in catch block
|
||||
let lastKnownRunId: string | null = null;
|
||||
let llmApiErrorRetries = 0;
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
@@ -789,6 +796,39 @@ export async function handleHeadlessCommand(
|
||||
continue;
|
||||
}
|
||||
|
||||
// Case 3: Transient LLM API error - retry with exponential backoff up to a limit
|
||||
if (stopReason === "llm_api_error") {
|
||||
if (llmApiErrorRetries < LLM_API_ERROR_MAX_RETRIES) {
|
||||
const attempt = llmApiErrorRetries + 1;
|
||||
const baseDelayMs = 1000;
|
||||
const delayMs = baseDelayMs * 2 ** (attempt - 1);
|
||||
|
||||
llmApiErrorRetries = attempt;
|
||||
|
||||
if (outputFormat === "stream-json") {
|
||||
console.log(
|
||||
JSON.stringify({
|
||||
type: "retry",
|
||||
reason: "llm_api_error",
|
||||
attempt,
|
||||
max_attempts: LLM_API_ERROR_MAX_RETRIES,
|
||||
delay_ms: delayMs,
|
||||
run_id: lastRunId,
|
||||
}),
|
||||
);
|
||||
} else {
|
||||
const delaySeconds = Math.round(delayMs / 1000);
|
||||
console.error(
|
||||
`LLM API error encountered (attempt ${attempt} of ${LLM_API_ERROR_MAX_RETRIES}), retrying in ${delaySeconds}s...`,
|
||||
);
|
||||
}
|
||||
|
||||
// Exponential backoff before retrying the same input
|
||||
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Unexpected stop reason (error, llm_api_error, etc.)
|
||||
// Mark incomplete tool calls as cancelled to prevent stuck state
|
||||
markIncompleteToolsAsCancelled(buffers);
|
||||
|
||||
Reference in New Issue
Block a user