fix: patch desync fallback (#452)
This commit is contained in:
@@ -1671,29 +1671,33 @@ export default function App({
|
||||
}
|
||||
|
||||
// Unexpected stop reason (error, llm_api_error, etc.)
|
||||
// Check for approval desync errors even if stop_reason isn't llm_api_error.
|
||||
// Cache desync detection and last failure for consistent handling
|
||||
const isApprovalPayload =
|
||||
currentInput.length === 1 && currentInput[0]?.type === "approval";
|
||||
|
||||
const approvalDesyncDetected = async () => {
|
||||
// 1) Check run metadata
|
||||
const detailFromRun = await fetchRunErrorDetail(lastRunId);
|
||||
if (isApprovalStateDesyncError(detailFromRun)) return true;
|
||||
|
||||
// 2) Check the most recent streamed error line in this turn
|
||||
for (let i = buffersRef.current.order.length - 1; i >= 0; i -= 1) {
|
||||
const id = buffersRef.current.order[i];
|
||||
if (!id) continue;
|
||||
const entry = buffersRef.current.byId.get(id);
|
||||
if (entry?.kind === "error") {
|
||||
return isApprovalStateDesyncError(entry.text);
|
||||
}
|
||||
// Capture the most recent error text in this turn (if any)
|
||||
let latestErrorText: string | null = null;
|
||||
for (let i = buffersRef.current.order.length - 1; i >= 0; i -= 1) {
|
||||
const id = buffersRef.current.order[i];
|
||||
if (!id) continue;
|
||||
const entry = buffersRef.current.byId.get(id);
|
||||
if (entry?.kind === "error" && typeof entry.text === "string") {
|
||||
latestErrorText = entry.text;
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
}
|
||||
|
||||
if (isApprovalPayload && (await approvalDesyncDetected())) {
|
||||
// Limit how many times we try this recovery to avoid loops
|
||||
// Detect approval desync once per turn
|
||||
const detailFromRun = await fetchRunErrorDetail(lastRunId);
|
||||
const desyncDetected =
|
||||
isApprovalStateDesyncError(detailFromRun) ||
|
||||
isApprovalStateDesyncError(latestErrorText);
|
||||
|
||||
// Track last failure info so we can emit it if retries stop
|
||||
const lastFailureMessage = latestErrorText || detailFromRun || null;
|
||||
|
||||
// Check for approval desync errors even if stop_reason isn't llm_api_error.
|
||||
if (isApprovalPayload && desyncDetected) {
|
||||
if (llmApiErrorRetriesRef.current < LLM_API_ERROR_MAX_RETRIES) {
|
||||
llmApiErrorRetriesRef.current += 1;
|
||||
const statusId = uid("status");
|
||||
@@ -1721,6 +1725,16 @@ export default function App({
|
||||
refreshDerived();
|
||||
continue;
|
||||
}
|
||||
|
||||
// No retries left: emit the failure and exit
|
||||
const errorToShow =
|
||||
lastFailureMessage ||
|
||||
`An error occurred during agent execution\n(run_id: ${lastRunId ?? "unknown"}, stop_reason: ${stopReasonToHandle})`;
|
||||
appendError(errorToShow, true);
|
||||
setStreaming(false);
|
||||
sendDesktopNotification();
|
||||
refreshDerived();
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if this is a retriable error (transient LLM API error)
|
||||
|
||||
121
src/headless.ts
121
src/headless.ts
@@ -1132,12 +1132,40 @@ export async function handleHeadlessCommand(
|
||||
continue;
|
||||
}
|
||||
|
||||
// Cache latest error text for this turn
|
||||
let latestErrorText: string | null = null;
|
||||
const linesForTurn = toLines(buffers);
|
||||
for (let i = linesForTurn.length - 1; i >= 0; i -= 1) {
|
||||
const line = linesForTurn[i];
|
||||
if (
|
||||
line?.kind === "error" &&
|
||||
"text" in line &&
|
||||
typeof line.text === "string"
|
||||
) {
|
||||
latestErrorText = line.text;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Detect approval desync once per turn
|
||||
const detailFromRun = await fetchRunErrorDetail(lastRunId);
|
||||
const approvalDesynced =
|
||||
currentInput.length === 1 &&
|
||||
currentInput[0]?.type === "approval" &&
|
||||
(isApprovalStateDesyncError(detailFromRun) ||
|
||||
isApprovalStateDesyncError(latestErrorText));
|
||||
|
||||
// Track last failure text for emitting on exit
|
||||
const lastFailureText =
|
||||
latestErrorText ||
|
||||
detailFromRun ||
|
||||
(lastRunId
|
||||
? `An error occurred during agent execution\n(run_id: ${lastRunId}, stop_reason: ${stopReason})`
|
||||
: `An error occurred during agent execution\n(stop_reason: ${stopReason})`);
|
||||
|
||||
// Case 3: Transient LLM API error - retry with exponential backoff up to a limit
|
||||
if (stopReason === "llm_api_error") {
|
||||
const shouldUseApprovalRecovery =
|
||||
currentInput.length === 1 &&
|
||||
currentInput[0]?.type === "approval" &&
|
||||
isApprovalStateDesyncError(await fetchRunErrorDetail(lastRunId));
|
||||
const shouldUseApprovalRecovery = approvalDesynced;
|
||||
|
||||
if (llmApiErrorRetries < LLM_API_ERROR_MAX_RETRIES) {
|
||||
const attempt = llmApiErrorRetries + 1;
|
||||
@@ -1180,54 +1208,51 @@ export async function handleHeadlessCommand(
|
||||
|
||||
// Fallback: if we were sending only approvals and hit an internal error that
|
||||
// says there is no pending approval, resend using the keep-alive recovery prompt.
|
||||
const isApprovalPayload =
|
||||
currentInput.length === 1 && currentInput[0]?.type === "approval";
|
||||
const approvalDesynced =
|
||||
isApprovalPayload &&
|
||||
(isApprovalStateDesyncError(await fetchRunErrorDetail(lastRunId)) ||
|
||||
(() => {
|
||||
const lines = toLines(buffers);
|
||||
for (let i = lines.length - 1; i >= 0; i -= 1) {
|
||||
const line = lines[i];
|
||||
if (!line) continue;
|
||||
if (
|
||||
line.kind === "error" &&
|
||||
"text" in line &&
|
||||
typeof line.text === "string"
|
||||
) {
|
||||
return isApprovalStateDesyncError(line.text ?? null);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
})());
|
||||
if (approvalDesynced) {
|
||||
if (llmApiErrorRetries < LLM_API_ERROR_MAX_RETRIES) {
|
||||
llmApiErrorRetries += 1;
|
||||
|
||||
if (approvalDesynced && llmApiErrorRetries < LLM_API_ERROR_MAX_RETRIES) {
|
||||
llmApiErrorRetries += 1;
|
||||
const retryReason = stopReason ?? "error";
|
||||
if (outputFormat === "stream-json") {
|
||||
const retryMsg: RetryMessage = {
|
||||
type: "retry",
|
||||
reason: retryReason,
|
||||
attempt: llmApiErrorRetries,
|
||||
max_attempts: LLM_API_ERROR_MAX_RETRIES,
|
||||
delay_ms: 0,
|
||||
run_id: lastRunId ?? undefined,
|
||||
session_id: sessionId,
|
||||
uuid: `retry-${lastRunId || crypto.randomUUID()}`,
|
||||
};
|
||||
console.log(JSON.stringify(retryMsg));
|
||||
} else {
|
||||
console.error(
|
||||
"Approval state desynced; resending keep-alive recovery prompt...",
|
||||
);
|
||||
}
|
||||
|
||||
const retryReason = stopReason ?? "error";
|
||||
if (outputFormat === "stream-json") {
|
||||
const retryMsg: RetryMessage = {
|
||||
type: "retry",
|
||||
reason: retryReason,
|
||||
attempt: llmApiErrorRetries,
|
||||
max_attempts: LLM_API_ERROR_MAX_RETRIES,
|
||||
delay_ms: 0,
|
||||
run_id: lastRunId ?? undefined,
|
||||
session_id: sessionId,
|
||||
uuid: `retry-${lastRunId || crypto.randomUUID()}`,
|
||||
};
|
||||
console.log(JSON.stringify(retryMsg));
|
||||
} else {
|
||||
console.error(
|
||||
"Approval state desynced; resending keep-alive recovery prompt...",
|
||||
);
|
||||
// Small pause to avoid rapid-fire retries
|
||||
await new Promise((resolve) => setTimeout(resolve, 250));
|
||||
|
||||
currentInput = [buildApprovalRecoveryMessage()];
|
||||
continue;
|
||||
}
|
||||
|
||||
// Small pause to avoid rapid-fire retries
|
||||
await new Promise((resolve) => setTimeout(resolve, 250));
|
||||
|
||||
currentInput = [buildApprovalRecoveryMessage()];
|
||||
continue;
|
||||
// No retries left or non-retriable: emit error and exit
|
||||
if (outputFormat === "stream-json") {
|
||||
const errorMsg: ErrorMessage = {
|
||||
type: "error",
|
||||
message: lastFailureText,
|
||||
stop_reason: stopReason,
|
||||
run_id: lastRunId ?? undefined,
|
||||
session_id: sessionId,
|
||||
uuid: `error-${lastRunId || crypto.randomUUID()}`,
|
||||
};
|
||||
console.log(JSON.stringify(errorMsg));
|
||||
} else {
|
||||
console.error(lastFailureText);
|
||||
}
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Unexpected stop reason (error, llm_api_error, etc.)
|
||||
|
||||
Reference in New Issue
Block a user