feat: retry on empty LLM response (LET-7679) (#1130)
Co-authored-by: letta-code <248085862+letta-code@users.noreply.github.com> Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
@@ -21,6 +21,8 @@ export {
|
||||
getPreStreamErrorAction,
|
||||
isApprovalPendingError,
|
||||
isConversationBusyError,
|
||||
isEmptyResponseError,
|
||||
isEmptyResponseRetryable,
|
||||
isInvalidToolCallIdsError,
|
||||
isNonRetryableProviderErrorDetail,
|
||||
isRetryableProviderErrorDetail,
|
||||
|
||||
@@ -16,6 +16,7 @@ const INVALID_TOOL_CALL_IDS_FRAGMENT = "invalid tool call ids";
|
||||
const APPROVAL_PENDING_DETAIL_FRAGMENT = "waiting for approval";
|
||||
const CONVERSATION_BUSY_DETAIL_FRAGMENT =
|
||||
"another request is currently being processed";
|
||||
const EMPTY_RESPONSE_DETAIL_FRAGMENT = "empty content in";
|
||||
const RETRYABLE_PROVIDER_DETAIL_PATTERNS = [
|
||||
"Anthropic API error",
|
||||
"OpenAI API error",
|
||||
@@ -94,6 +95,16 @@ export function isConversationBusyError(detail: unknown): boolean {
|
||||
return detail.toLowerCase().includes(CONVERSATION_BUSY_DETAIL_FRAGMENT);
|
||||
}
|
||||
|
||||
/**
|
||||
* LLM returned an empty response (no content and no tool calls).
|
||||
* This can happen with models like Opus 4.6 that occasionally return empty content.
|
||||
* These are retryable with a cache-busting system message modification.
|
||||
*/
|
||||
export function isEmptyResponseError(detail: unknown): boolean {
|
||||
if (typeof detail !== "string") return false;
|
||||
return detail.toLowerCase().includes(EMPTY_RESPONSE_DETAIL_FRAGMENT);
|
||||
}
|
||||
|
||||
/** Transient provider/network detail that is usually safe to retry. */
|
||||
export function isRetryableProviderErrorDetail(detail: unknown): boolean {
|
||||
if (typeof detail !== "string") return false;
|
||||
@@ -131,6 +142,24 @@ export function shouldRetryRunMetadataError(
|
||||
return retryable429Detail || retryableDetail;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if this is an empty response error that should be retried.
|
||||
*
|
||||
* Empty responses from models like Opus 4.6 are retryable. The caller
|
||||
* decides whether to retry with the same input or append a system
|
||||
* reminder nudge (typically on the last attempt).
|
||||
*/
|
||||
export function isEmptyResponseRetryable(
|
||||
errorType: unknown,
|
||||
detail: unknown,
|
||||
emptyResponseRetries: number,
|
||||
maxEmptyResponseRetries: number,
|
||||
): boolean {
|
||||
if (emptyResponseRetries >= maxEmptyResponseRetries) return false;
|
||||
if (errorType !== "llm_error") return false;
|
||||
return isEmptyResponseError(detail);
|
||||
}
|
||||
|
||||
/** Retry decision for pre-stream send failures before any chunks are yielded. */
|
||||
export function shouldRetryPreStreamTransientError(opts: {
|
||||
status: number | undefined;
|
||||
|
||||
@@ -33,6 +33,7 @@ import {
|
||||
fetchRunErrorDetail,
|
||||
getPreStreamErrorAction,
|
||||
isApprovalPendingError,
|
||||
isEmptyResponseRetryable,
|
||||
isInvalidToolCallIdsError,
|
||||
parseRetryAfterHeaderMs,
|
||||
rebuildInputWithFreshDenials,
|
||||
@@ -299,6 +300,10 @@ const EAGER_CANCEL = true;
|
||||
// Maximum retries for transient LLM API errors (matches headless.ts)
|
||||
const LLM_API_ERROR_MAX_RETRIES = 3;
|
||||
|
||||
// Retry config for empty response errors (Opus 4.6 SADs)
|
||||
// Retry 1: same input. Retry 2: with system reminder nudge.
|
||||
const EMPTY_RESPONSE_MAX_RETRIES = 2;
|
||||
|
||||
// Retry config for 409 "conversation busy" errors (exponential backoff)
|
||||
const CONVERSATION_BUSY_MAX_RETRIES = 3; // 2.5s -> 5s -> 10s
|
||||
const CONVERSATION_BUSY_RETRY_BASE_DELAY_MS = 2500; // 2.5 seconds
|
||||
@@ -1610,6 +1615,7 @@ export default function App({
|
||||
|
||||
// Retry counter for transient LLM API errors (ref for synchronous access in loop)
|
||||
const llmApiErrorRetriesRef = useRef(0);
|
||||
const emptyResponseRetriesRef = useRef(0);
|
||||
|
||||
// Retry counter for 409 "conversation busy" errors
|
||||
const conversationBusyRetriesRef = useRef(0);
|
||||
@@ -3360,6 +3366,7 @@ export default function App({
|
||||
// Reset retry counters for new conversation turns (fresh budget per user message)
|
||||
if (!allowReentry) {
|
||||
llmApiErrorRetriesRef.current = 0;
|
||||
emptyResponseRetriesRef.current = 0;
|
||||
conversationBusyRetriesRef.current = 0;
|
||||
}
|
||||
|
||||
@@ -3954,6 +3961,7 @@ export default function App({
|
||||
})();
|
||||
closeTrajectorySegment();
|
||||
llmApiErrorRetriesRef.current = 0; // Reset retry counter on success
|
||||
emptyResponseRetriesRef.current = 0;
|
||||
conversationBusyRetriesRef.current = 0;
|
||||
lastDequeuedMessageRef.current = null; // Clear - message was processed successfully
|
||||
lastSentInputRef.current = null; // Clear - no recovery needed
|
||||
@@ -4800,6 +4808,55 @@ export default function App({
|
||||
continue;
|
||||
}
|
||||
|
||||
// Empty LLM response retry (e.g. Opus 4.6 occasionally returns no content).
|
||||
// Retry 1: same input unchanged. Retry 2: append system reminder nudging the model.
|
||||
if (
|
||||
isEmptyResponseRetryable(
|
||||
stopReasonToHandle === "llm_api_error" ? "llm_error" : undefined,
|
||||
detailFromRun,
|
||||
emptyResponseRetriesRef.current,
|
||||
EMPTY_RESPONSE_MAX_RETRIES,
|
||||
)
|
||||
) {
|
||||
emptyResponseRetriesRef.current += 1;
|
||||
const attempt = emptyResponseRetriesRef.current;
|
||||
const delayMs = 500 * attempt;
|
||||
|
||||
// Only append a nudge on the last attempt
|
||||
if (attempt >= EMPTY_RESPONSE_MAX_RETRIES) {
|
||||
currentInput = [
|
||||
...currentInput,
|
||||
{
|
||||
type: "message" as const,
|
||||
role: "system" as const,
|
||||
content: `<system-reminder>The previous response was empty. Please provide a response with either text content or a tool call.</system-reminder>`,
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
const statusId = uid("status");
|
||||
buffersRef.current.byId.set(statusId, {
|
||||
kind: "status",
|
||||
id: statusId,
|
||||
lines: [
|
||||
`Empty LLM response, retrying (attempt ${attempt}/${EMPTY_RESPONSE_MAX_RETRIES})...`,
|
||||
],
|
||||
});
|
||||
buffersRef.current.order.push(statusId);
|
||||
refreshDerived();
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
||||
|
||||
buffersRef.current.byId.delete(statusId);
|
||||
buffersRef.current.order = buffersRef.current.order.filter(
|
||||
(id) => id !== statusId,
|
||||
);
|
||||
refreshDerived();
|
||||
|
||||
buffersRef.current.interrupted = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if this is a retriable error (transient LLM API error)
|
||||
const retriable = await isRetriableError(
|
||||
stopReasonToHandle,
|
||||
@@ -4870,6 +4927,7 @@ export default function App({
|
||||
|
||||
// Reset retry counters on non-retriable error (or max retries exceeded)
|
||||
llmApiErrorRetriesRef.current = 0;
|
||||
emptyResponseRetriesRef.current = 0;
|
||||
conversationBusyRetriesRef.current = 0;
|
||||
|
||||
// Mark incomplete tool calls as finished to prevent stuck blinking UI
|
||||
|
||||
@@ -14,6 +14,7 @@ import {
|
||||
fetchRunErrorDetail,
|
||||
getPreStreamErrorAction,
|
||||
isApprovalPendingError,
|
||||
isEmptyResponseRetryable,
|
||||
isInvalidToolCallIdsError,
|
||||
parseRetryAfterHeaderMs,
|
||||
shouldRetryRunMetadataError,
|
||||
@@ -110,6 +111,10 @@ import {
|
||||
// caller to manually resubmit the prompt.
|
||||
const LLM_API_ERROR_MAX_RETRIES = 3;
|
||||
|
||||
// Retry config for empty response errors (Opus 4.6 SADs)
|
||||
// Retry 1: same input. Retry 2: with system reminder nudge.
|
||||
const EMPTY_RESPONSE_MAX_RETRIES = 2;
|
||||
|
||||
// Retry config for 409 "conversation busy" errors
|
||||
const CONVERSATION_BUSY_MAX_RETRIES = 1; // Only retry once, fail on 2nd 409
|
||||
const CONVERSATION_BUSY_RETRY_DELAY_MS = 2500; // 2.5 seconds
|
||||
@@ -1443,6 +1448,7 @@ ${SYSTEM_REMINDER_CLOSE}
|
||||
// Track lastRunId outside the while loop so it's available in catch block
|
||||
let lastKnownRunId: string | null = null;
|
||||
let llmApiErrorRetries = 0;
|
||||
let emptyResponseRetries = 0;
|
||||
let conversationBusyRetries = 0;
|
||||
markMilestone("HEADLESS_FIRST_STREAM_START");
|
||||
measureSinceMilestone("headless-setup-total", "HEADLESS_CLIENT_READY");
|
||||
@@ -1792,6 +1798,7 @@ ${SYSTEM_REMINDER_CLOSE}
|
||||
if (stopReason === "end_turn") {
|
||||
// Reset retry counters on success
|
||||
llmApiErrorRetries = 0;
|
||||
emptyResponseRetries = 0;
|
||||
conversationBusyRetries = 0;
|
||||
break;
|
||||
}
|
||||
@@ -2026,6 +2033,53 @@ ${SYSTEM_REMINDER_CLOSE}
|
||||
|
||||
const detail = metaError?.detail ?? metaError?.error?.detail ?? "";
|
||||
|
||||
// Special handling for empty response errors (Opus 4.6 SADs)
|
||||
// Empty LLM response retry (e.g. Opus 4.6 occasionally returns no content).
|
||||
// Retry 1: same input unchanged. Retry 2: append system reminder nudging the model.
|
||||
if (
|
||||
isEmptyResponseRetryable(
|
||||
errorType,
|
||||
detail,
|
||||
emptyResponseRetries,
|
||||
EMPTY_RESPONSE_MAX_RETRIES,
|
||||
)
|
||||
) {
|
||||
const attempt = emptyResponseRetries + 1;
|
||||
const delayMs = 500 * attempt;
|
||||
|
||||
emptyResponseRetries = attempt;
|
||||
|
||||
// Only append a nudge on the last attempt
|
||||
if (attempt >= EMPTY_RESPONSE_MAX_RETRIES) {
|
||||
const nudgeMessage: MessageCreate = {
|
||||
role: "system",
|
||||
content: `<system-reminder>The previous response was empty. Please provide a response with either text content or a tool call.</system-reminder>`,
|
||||
};
|
||||
currentInput = [...currentInput, nudgeMessage];
|
||||
}
|
||||
|
||||
if (outputFormat === "stream-json") {
|
||||
const retryMsg: RetryMessage = {
|
||||
type: "retry",
|
||||
reason: "llm_api_error",
|
||||
attempt,
|
||||
max_attempts: EMPTY_RESPONSE_MAX_RETRIES,
|
||||
delay_ms: delayMs,
|
||||
run_id: lastRunId ?? undefined,
|
||||
session_id: sessionId,
|
||||
uuid: `retry-empty-${lastRunId || randomUUID()}`,
|
||||
};
|
||||
console.log(JSON.stringify(retryMsg));
|
||||
} else {
|
||||
console.error(
|
||||
`Empty LLM response, retrying (attempt ${attempt} of ${EMPTY_RESPONSE_MAX_RETRIES})...`,
|
||||
);
|
||||
}
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (shouldRetryRunMetadataError(errorType, detail)) {
|
||||
const attempt = llmApiErrorRetries + 1;
|
||||
const baseDelayMs = 1000;
|
||||
|
||||
@@ -5,6 +5,8 @@ import {
|
||||
getPreStreamErrorAction,
|
||||
isApprovalPendingError,
|
||||
isConversationBusyError,
|
||||
isEmptyResponseError,
|
||||
isEmptyResponseRetryable,
|
||||
isInvalidToolCallIdsError,
|
||||
isNonRetryableProviderErrorDetail,
|
||||
isRetryableProviderErrorDetail,
|
||||
@@ -455,3 +457,90 @@ describe("shouldAttemptApprovalRecovery", () => {
|
||||
expect(tuiResult).toBe(headlessResult);
|
||||
});
|
||||
});
|
||||
|
||||
// ── Empty response error detection (LET-7679) ────────────────────────
|
||||
|
||||
describe("isEmptyResponseError", () => {
|
||||
test("detects empty content in response", () => {
|
||||
expect(
|
||||
isEmptyResponseError(
|
||||
"LLM provider returned empty content in response (ID: msg_123, model: claude-opus-4-6)",
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
test("detects empty content in streaming response", () => {
|
||||
expect(
|
||||
isEmptyResponseError(
|
||||
"LLM provider returned empty content in streaming response (model: claude-opus-4-6)",
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
test("case insensitive", () => {
|
||||
expect(isEmptyResponseError("EMPTY CONTENT IN RESPONSE")).toBe(true);
|
||||
});
|
||||
|
||||
test("returns false for unrelated errors", () => {
|
||||
expect(isEmptyResponseError("Connection error")).toBe(false);
|
||||
expect(isEmptyResponseError("Rate limit exceeded")).toBe(false);
|
||||
});
|
||||
|
||||
test("returns false for non-string input", () => {
|
||||
expect(isEmptyResponseError(null)).toBe(false);
|
||||
expect(isEmptyResponseError(undefined)).toBe(false);
|
||||
expect(isEmptyResponseError(123)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("isEmptyResponseRetryable", () => {
|
||||
test("true when llm_error and empty response detail and under retry budget", () => {
|
||||
expect(
|
||||
isEmptyResponseRetryable(
|
||||
"llm_error",
|
||||
"LLM provider returned empty content in response",
|
||||
0,
|
||||
2,
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
test("true at boundary (retries < max)", () => {
|
||||
expect(
|
||||
isEmptyResponseRetryable(
|
||||
"llm_error",
|
||||
"LLM provider returned empty content in streaming response",
|
||||
1,
|
||||
2,
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
test("false when retry budget exhausted", () => {
|
||||
expect(
|
||||
isEmptyResponseRetryable(
|
||||
"llm_error",
|
||||
"LLM provider returned empty content in response",
|
||||
2,
|
||||
2,
|
||||
),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
test("false when not llm_error type", () => {
|
||||
expect(
|
||||
isEmptyResponseRetryable(
|
||||
"internal_error",
|
||||
"LLM provider returned empty content in response",
|
||||
0,
|
||||
2,
|
||||
),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
test("false when not empty response error", () => {
|
||||
expect(
|
||||
isEmptyResponseRetryable("llm_error", "Connection error occurred", 0, 2),
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user