feat: clean up keep-alive fallback handling (#663)
This commit is contained in:
139
src/headless.ts
139
src/headless.ts
@@ -9,10 +9,8 @@ import type { ApprovalCreate } from "@letta-ai/letta-client/resources/agents/mes
|
||||
import type { StopReasonType } from "@letta-ai/letta-client/resources/runs/runs";
|
||||
import type { ApprovalResult } from "./agent/approval-execution";
|
||||
import {
|
||||
buildApprovalRecoveryMessage,
|
||||
fetchRunErrorDetail,
|
||||
isApprovalPendingError,
|
||||
isApprovalStateDesyncError,
|
||||
isConversationBusyError,
|
||||
isInvalidToolCallIdsError,
|
||||
} from "./agent/approval-recovery";
|
||||
@@ -1385,26 +1383,11 @@ export async function handleHeadlessCommand(
|
||||
}
|
||||
}
|
||||
|
||||
// Detect approval desync once per turn
|
||||
// Fetch run error detail for invalid tool call ID detection
|
||||
const detailFromRun = await fetchRunErrorDetail(lastRunId);
|
||||
const approvalDesynced =
|
||||
currentInput.length === 1 &&
|
||||
currentInput[0]?.type === "approval" &&
|
||||
(isApprovalStateDesyncError(detailFromRun) ||
|
||||
isApprovalStateDesyncError(latestErrorText));
|
||||
|
||||
// Track last failure text for emitting on exit
|
||||
const lastFailureText =
|
||||
latestErrorText ||
|
||||
detailFromRun ||
|
||||
(lastRunId
|
||||
? `An error occurred during agent execution\n(run_id: ${lastRunId}, stop_reason: ${stopReason})`
|
||||
: `An error occurred during agent execution\n(stop_reason: ${stopReason})`);
|
||||
|
||||
// Case 3: Transient LLM API error - retry with exponential backoff up to a limit
|
||||
if (stopReason === "llm_api_error") {
|
||||
const shouldUseApprovalRecovery = approvalDesynced;
|
||||
|
||||
if (llmApiErrorRetries < LLM_API_ERROR_MAX_RETRIES) {
|
||||
const attempt = llmApiErrorRetries + 1;
|
||||
const baseDelayMs = 1000;
|
||||
@@ -1426,104 +1409,64 @@ export async function handleHeadlessCommand(
|
||||
console.log(JSON.stringify(retryMsg));
|
||||
} else {
|
||||
const delaySeconds = Math.round(delayMs / 1000);
|
||||
const recoveryNote = shouldUseApprovalRecovery
|
||||
? " (approval state desynced - sending keep-going prompt)"
|
||||
: "";
|
||||
console.error(
|
||||
`LLM API error encountered (attempt ${attempt} of ${LLM_API_ERROR_MAX_RETRIES}), retrying in ${delaySeconds}s...${recoveryNote}`,
|
||||
`LLM API error encountered (attempt ${attempt} of ${LLM_API_ERROR_MAX_RETRIES}), retrying in ${delaySeconds}s...`,
|
||||
);
|
||||
}
|
||||
|
||||
// Exponential backoff before retrying the same input
|
||||
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
||||
|
||||
if (shouldUseApprovalRecovery) {
|
||||
currentInput = [buildApprovalRecoveryMessage()];
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: if we were sending only approvals and hit an internal error that
|
||||
// says there is no pending approval, resend using the keep-alive recovery prompt.
|
||||
if (approvalDesynced) {
|
||||
// "Invalid tool call IDs" means server HAS pending approvals but with different IDs.
|
||||
// Fetch the actual pending approvals and process them before retrying.
|
||||
if (
|
||||
isInvalidToolCallIdsError(detailFromRun) ||
|
||||
isInvalidToolCallIdsError(latestErrorText)
|
||||
) {
|
||||
if (outputFormat === "stream-json") {
|
||||
const recoveryMsg: RecoveryMessage = {
|
||||
type: "recovery",
|
||||
recovery_type: "invalid_tool_call_ids",
|
||||
message:
|
||||
"Tool call ID mismatch; fetching actual pending approvals and resyncing",
|
||||
run_id: lastRunId ?? undefined,
|
||||
session_id: sessionId,
|
||||
uuid: `recovery-${lastRunId || crypto.randomUUID()}`,
|
||||
};
|
||||
console.log(JSON.stringify(recoveryMsg));
|
||||
} else {
|
||||
console.error(
|
||||
"Tool call ID mismatch; fetching actual pending approvals...",
|
||||
);
|
||||
}
|
||||
// "Invalid tool call IDs" means server HAS pending approvals but with different IDs.
|
||||
// Fetch the actual pending approvals and process them before retrying.
|
||||
const invalidIdsDetected =
|
||||
isInvalidToolCallIdsError(detailFromRun) ||
|
||||
isInvalidToolCallIdsError(latestErrorText);
|
||||
|
||||
try {
|
||||
// Fetch and process actual pending approvals from server
|
||||
await resolveAllPendingApprovals();
|
||||
// After processing, continue to next iteration (fresh state)
|
||||
continue;
|
||||
} catch {
|
||||
// If fetch fails, fall through to general desync recovery
|
||||
}
|
||||
}
|
||||
|
||||
if (llmApiErrorRetries < LLM_API_ERROR_MAX_RETRIES) {
|
||||
llmApiErrorRetries += 1;
|
||||
|
||||
const retryReason = stopReason ?? "error";
|
||||
if (outputFormat === "stream-json") {
|
||||
const retryMsg: RetryMessage = {
|
||||
type: "retry",
|
||||
reason: retryReason,
|
||||
attempt: llmApiErrorRetries,
|
||||
max_attempts: LLM_API_ERROR_MAX_RETRIES,
|
||||
delay_ms: 0,
|
||||
run_id: lastRunId ?? undefined,
|
||||
session_id: sessionId,
|
||||
uuid: `retry-${lastRunId || crypto.randomUUID()}`,
|
||||
};
|
||||
console.log(JSON.stringify(retryMsg));
|
||||
} else {
|
||||
console.error(
|
||||
"Approval state desynced; resending keep-alive recovery prompt...",
|
||||
);
|
||||
}
|
||||
|
||||
// Small pause to avoid rapid-fire retries
|
||||
await new Promise((resolve) => setTimeout(resolve, 250));
|
||||
|
||||
currentInput = [buildApprovalRecoveryMessage()];
|
||||
continue;
|
||||
}
|
||||
|
||||
// No retries left or non-retriable: emit error and exit
|
||||
if (invalidIdsDetected) {
|
||||
if (outputFormat === "stream-json") {
|
||||
const errorMsg: ErrorMessage = {
|
||||
type: "error",
|
||||
message: lastFailureText,
|
||||
stop_reason: stopReason,
|
||||
const recoveryMsg: RecoveryMessage = {
|
||||
type: "recovery",
|
||||
recovery_type: "invalid_tool_call_ids",
|
||||
message:
|
||||
"Tool call ID mismatch; fetching actual pending approvals and resyncing",
|
||||
run_id: lastRunId ?? undefined,
|
||||
session_id: sessionId,
|
||||
uuid: `error-${lastRunId || crypto.randomUUID()}`,
|
||||
uuid: `recovery-${lastRunId || crypto.randomUUID()}`,
|
||||
};
|
||||
console.log(JSON.stringify(errorMsg));
|
||||
console.log(JSON.stringify(recoveryMsg));
|
||||
} else {
|
||||
console.error(lastFailureText);
|
||||
console.error(
|
||||
"Tool call ID mismatch; fetching actual pending approvals...",
|
||||
);
|
||||
}
|
||||
|
||||
try {
|
||||
// Fetch and process actual pending approvals from server
|
||||
await resolveAllPendingApprovals();
|
||||
// After processing, continue to next iteration (fresh state)
|
||||
continue;
|
||||
} catch {
|
||||
// If fetch fails, exit with error
|
||||
if (outputFormat === "stream-json") {
|
||||
const errorMsg: ErrorMessage = {
|
||||
type: "error",
|
||||
message: "Failed to fetch pending approvals for resync",
|
||||
stop_reason: stopReason,
|
||||
run_id: lastRunId ?? undefined,
|
||||
session_id: sessionId,
|
||||
uuid: `error-${lastRunId || crypto.randomUUID()}`,
|
||||
};
|
||||
console.log(JSON.stringify(errorMsg));
|
||||
} else {
|
||||
console.error("Failed to fetch pending approvals for resync");
|
||||
}
|
||||
process.exit(1);
|
||||
}
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Unexpected stop reason (error, llm_api_error, etc.)
|
||||
|
||||
Reference in New Issue
Block a user