feat: add 409 retry, error improvements, and queue restoration (#618)

Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
Charles Packer
2026-01-21 14:57:48 -08:00
committed by GitHub
parent 802136c868
commit 6a0bcdd683
5 changed files with 281 additions and 36 deletions

View File

@@ -13,6 +13,7 @@ import {
fetchRunErrorDetail,
isApprovalPendingError,
isApprovalStateDesyncError,
isConversationBusyError,
} from "./agent/approval-recovery";
import { getClient } from "./agent/client";
import { initializeLoadedSkillsFlag, setAgentContext } from "./agent/context";
@@ -59,6 +60,10 @@ import {
// caller to manually resubmit the prompt.
const LLM_API_ERROR_MAX_RETRIES = 3;
// Retry config for 409 "conversation busy" errors
const CONVERSATION_BUSY_MAX_RETRIES = 1; // Only retry once, fail on 2nd 409
const CONVERSATION_BUSY_RETRY_DELAY_MS = 2500; // 2.5 seconds
export async function handleHeadlessCommand(
argv: string[],
model?: string,
@@ -945,15 +950,83 @@ export async function handleHeadlessCommand(
// Track lastRunId outside the while loop so it's available in catch block
let lastKnownRunId: string | null = null;
let llmApiErrorRetries = 0;
let conversationBusyRetries = 0;
markMilestone("HEADLESS_FIRST_STREAM_START");
measureSinceMilestone("headless-setup-total", "HEADLESS_CLIENT_READY");
try {
while (true) {
const stream = await sendMessageStream(conversationId, currentInput, {
agentId: agent.id,
});
// Wrap sendMessageStream in try-catch to handle pre-stream errors (e.g., 409)
let stream: Awaited<ReturnType<typeof sendMessageStream>>;
try {
stream = await sendMessageStream(conversationId, currentInput, {
agentId: agent.id,
});
} catch (preStreamError) {
// Extract error detail from APIError
let errorDetail = "";
if (
preStreamError instanceof APIError &&
preStreamError.error &&
typeof preStreamError.error === "object"
) {
const errObj = preStreamError.error as Record<string, unknown>;
if (
errObj.error &&
typeof errObj.error === "object" &&
"detail" in errObj.error
) {
const nested = errObj.error as Record<string, unknown>;
errorDetail =
typeof nested.detail === "string" ? nested.detail : "";
}
if (!errorDetail && typeof errObj.detail === "string") {
errorDetail = errObj.detail;
}
}
if (!errorDetail && preStreamError instanceof Error) {
errorDetail = preStreamError.message;
}
// Check for 409 "conversation busy" error - retry once with delay
if (
isConversationBusyError(errorDetail) &&
conversationBusyRetries < CONVERSATION_BUSY_MAX_RETRIES
) {
conversationBusyRetries += 1;
// Emit retry message for stream-json mode
if (outputFormat === "stream-json") {
const retryMsg: RetryMessage = {
type: "retry",
reason: "error", // 409 conversation busy is a pre-stream error
attempt: conversationBusyRetries,
max_attempts: CONVERSATION_BUSY_MAX_RETRIES,
delay_ms: CONVERSATION_BUSY_RETRY_DELAY_MS,
session_id: sessionId,
uuid: `retry-conversation-busy-${crypto.randomUUID()}`,
};
console.log(JSON.stringify(retryMsg));
} else {
console.error(
`Conversation is busy, waiting ${CONVERSATION_BUSY_RETRY_DELAY_MS / 1000}s and retrying...`,
);
}
// Wait before retry
await new Promise((resolve) =>
setTimeout(resolve, CONVERSATION_BUSY_RETRY_DELAY_MS),
);
continue;
}
// Reset conversation busy retry counter on other errors
conversationBusyRetries = 0;
// Re-throw to outer catch for other errors
throw preStreamError;
}
// For stream-json, output each chunk as it arrives
let stopReason: StopReasonType | null = null;
@@ -1147,6 +1220,9 @@ export async function handleHeadlessCommand(
// Case 1: Turn ended normally
if (stopReason === "end_turn") {
// Reset retry counters on success
llmApiErrorRetries = 0;
conversationBusyRetries = 0;
break;
}