feat: retry on empty LLM response (LET-7679) (#1130)

Co-authored-by: letta-code <248085862+letta-code@users.noreply.github.com>
Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
cthomas
2026-02-25 11:17:55 -08:00
committed by GitHub
parent 0023b9c7e5
commit be5fbfca74
5 changed files with 232 additions and 0 deletions

View File

@@ -21,6 +21,8 @@ export {
getPreStreamErrorAction,
isApprovalPendingError,
isConversationBusyError,
isEmptyResponseError,
isEmptyResponseRetryable,
isInvalidToolCallIdsError,
isNonRetryableProviderErrorDetail,
isRetryableProviderErrorDetail,

View File

@@ -16,6 +16,7 @@ const INVALID_TOOL_CALL_IDS_FRAGMENT = "invalid tool call ids";
const APPROVAL_PENDING_DETAIL_FRAGMENT = "waiting for approval";
const CONVERSATION_BUSY_DETAIL_FRAGMENT =
"another request is currently being processed";
const EMPTY_RESPONSE_DETAIL_FRAGMENT = "empty content in";
const RETRYABLE_PROVIDER_DETAIL_PATTERNS = [
"Anthropic API error",
"OpenAI API error",
@@ -94,6 +95,16 @@ export function isConversationBusyError(detail: unknown): boolean {
return detail.toLowerCase().includes(CONVERSATION_BUSY_DETAIL_FRAGMENT);
}
/**
* LLM returned an empty response (no content and no tool calls).
* This can happen with models like Opus 4.6 that occasionally return empty content.
* These are retryable with a cache-busting system message modification.
*/
export function isEmptyResponseError(detail: unknown): boolean {
if (typeof detail !== "string") return false;
return detail.toLowerCase().includes(EMPTY_RESPONSE_DETAIL_FRAGMENT);
}
/** Transient provider/network detail that is usually safe to retry. */
export function isRetryableProviderErrorDetail(detail: unknown): boolean {
if (typeof detail !== "string") return false;
@@ -131,6 +142,24 @@ export function shouldRetryRunMetadataError(
return retryable429Detail || retryableDetail;
}
/**
* Check if this is an empty response error that should be retried.
*
* Empty responses from models like Opus 4.6 are retryable. The caller
* decides whether to retry with the same input or append a system
* reminder nudge (typically on the last attempt).
*/
export function isEmptyResponseRetryable(
errorType: unknown,
detail: unknown,
emptyResponseRetries: number,
maxEmptyResponseRetries: number,
): boolean {
if (emptyResponseRetries >= maxEmptyResponseRetries) return false;
if (errorType !== "llm_error") return false;
return isEmptyResponseError(detail);
}
/** Retry decision for pre-stream send failures before any chunks are yielded. */
export function shouldRetryPreStreamTransientError(opts: {
status: number | undefined;

View File

@@ -33,6 +33,7 @@ import {
fetchRunErrorDetail,
getPreStreamErrorAction,
isApprovalPendingError,
isEmptyResponseRetryable,
isInvalidToolCallIdsError,
parseRetryAfterHeaderMs,
rebuildInputWithFreshDenials,
@@ -299,6 +300,10 @@ const EAGER_CANCEL = true;
// Maximum retries for transient LLM API errors (matches headless.ts)
const LLM_API_ERROR_MAX_RETRIES = 3;
// Retry config for empty response errors (Opus 4.6 SADs)
// Retry 1: same input. Retry 2: with system reminder nudge.
const EMPTY_RESPONSE_MAX_RETRIES = 2;
// Retry config for 409 "conversation busy" errors (exponential backoff)
const CONVERSATION_BUSY_MAX_RETRIES = 3; // 2.5s -> 5s -> 10s
const CONVERSATION_BUSY_RETRY_BASE_DELAY_MS = 2500; // 2.5 seconds
@@ -1610,6 +1615,7 @@ export default function App({
// Retry counter for transient LLM API errors (ref for synchronous access in loop)
const llmApiErrorRetriesRef = useRef(0);
const emptyResponseRetriesRef = useRef(0);
// Retry counter for 409 "conversation busy" errors
const conversationBusyRetriesRef = useRef(0);
@@ -3360,6 +3366,7 @@ export default function App({
// Reset retry counters for new conversation turns (fresh budget per user message)
if (!allowReentry) {
llmApiErrorRetriesRef.current = 0;
emptyResponseRetriesRef.current = 0;
conversationBusyRetriesRef.current = 0;
}
@@ -3954,6 +3961,7 @@ export default function App({
})();
closeTrajectorySegment();
llmApiErrorRetriesRef.current = 0; // Reset retry counter on success
emptyResponseRetriesRef.current = 0;
conversationBusyRetriesRef.current = 0;
lastDequeuedMessageRef.current = null; // Clear - message was processed successfully
lastSentInputRef.current = null; // Clear - no recovery needed
@@ -4800,6 +4808,55 @@ export default function App({
continue;
}
// Empty LLM response retry (e.g. Opus 4.6 occasionally returns no content).
// Retry 1: same input unchanged. Retry 2: append system reminder nudging the model.
if (
isEmptyResponseRetryable(
stopReasonToHandle === "llm_api_error" ? "llm_error" : undefined,
detailFromRun,
emptyResponseRetriesRef.current,
EMPTY_RESPONSE_MAX_RETRIES,
)
) {
emptyResponseRetriesRef.current += 1;
const attempt = emptyResponseRetriesRef.current;
const delayMs = 500 * attempt;
// Only append a nudge on the last attempt
if (attempt >= EMPTY_RESPONSE_MAX_RETRIES) {
currentInput = [
...currentInput,
{
type: "message" as const,
role: "system" as const,
content: `<system-reminder>The previous response was empty. Please provide a response with either text content or a tool call.</system-reminder>`,
},
];
}
const statusId = uid("status");
buffersRef.current.byId.set(statusId, {
kind: "status",
id: statusId,
lines: [
`Empty LLM response, retrying (attempt ${attempt}/${EMPTY_RESPONSE_MAX_RETRIES})...`,
],
});
buffersRef.current.order.push(statusId);
refreshDerived();
await new Promise((resolve) => setTimeout(resolve, delayMs));
buffersRef.current.byId.delete(statusId);
buffersRef.current.order = buffersRef.current.order.filter(
(id) => id !== statusId,
);
refreshDerived();
buffersRef.current.interrupted = false;
continue;
}
// Check if this is a retriable error (transient LLM API error)
const retriable = await isRetriableError(
stopReasonToHandle,
@@ -4870,6 +4927,7 @@ export default function App({
// Reset retry counters on non-retriable error (or max retries exceeded)
llmApiErrorRetriesRef.current = 0;
emptyResponseRetriesRef.current = 0;
conversationBusyRetriesRef.current = 0;
// Mark incomplete tool calls as finished to prevent stuck blinking UI

View File

@@ -14,6 +14,7 @@ import {
fetchRunErrorDetail,
getPreStreamErrorAction,
isApprovalPendingError,
isEmptyResponseRetryable,
isInvalidToolCallIdsError,
parseRetryAfterHeaderMs,
shouldRetryRunMetadataError,
@@ -110,6 +111,10 @@ import {
// caller to manually resubmit the prompt.
const LLM_API_ERROR_MAX_RETRIES = 3;
// Retry config for empty response errors (Opus 4.6 SADs)
// Retry 1: same input. Retry 2: with system reminder nudge.
const EMPTY_RESPONSE_MAX_RETRIES = 2;
// Retry config for 409 "conversation busy" errors
const CONVERSATION_BUSY_MAX_RETRIES = 1; // Only retry once, fail on 2nd 409
const CONVERSATION_BUSY_RETRY_DELAY_MS = 2500; // 2.5 seconds
@@ -1443,6 +1448,7 @@ ${SYSTEM_REMINDER_CLOSE}
// Track lastRunId outside the while loop so it's available in catch block
let lastKnownRunId: string | null = null;
let llmApiErrorRetries = 0;
let emptyResponseRetries = 0;
let conversationBusyRetries = 0;
markMilestone("HEADLESS_FIRST_STREAM_START");
measureSinceMilestone("headless-setup-total", "HEADLESS_CLIENT_READY");
@@ -1792,6 +1798,7 @@ ${SYSTEM_REMINDER_CLOSE}
if (stopReason === "end_turn") {
// Reset retry counters on success
llmApiErrorRetries = 0;
emptyResponseRetries = 0;
conversationBusyRetries = 0;
break;
}
@@ -2026,6 +2033,53 @@ ${SYSTEM_REMINDER_CLOSE}
const detail = metaError?.detail ?? metaError?.error?.detail ?? "";
// Special handling for empty response errors (Opus 4.6 SADs)
// Empty LLM response retry (e.g. Opus 4.6 occasionally returns no content).
// Retry 1: same input unchanged. Retry 2: append system reminder nudging the model.
if (
isEmptyResponseRetryable(
errorType,
detail,
emptyResponseRetries,
EMPTY_RESPONSE_MAX_RETRIES,
)
) {
const attempt = emptyResponseRetries + 1;
const delayMs = 500 * attempt;
emptyResponseRetries = attempt;
// Only append a nudge on the last attempt
if (attempt >= EMPTY_RESPONSE_MAX_RETRIES) {
const nudgeMessage: MessageCreate = {
role: "system",
content: `<system-reminder>The previous response was empty. Please provide a response with either text content or a tool call.</system-reminder>`,
};
currentInput = [...currentInput, nudgeMessage];
}
if (outputFormat === "stream-json") {
const retryMsg: RetryMessage = {
type: "retry",
reason: "llm_api_error",
attempt,
max_attempts: EMPTY_RESPONSE_MAX_RETRIES,
delay_ms: delayMs,
run_id: lastRunId ?? undefined,
session_id: sessionId,
uuid: `retry-empty-${lastRunId || randomUUID()}`,
};
console.log(JSON.stringify(retryMsg));
} else {
console.error(
`Empty LLM response, retrying (attempt ${attempt} of ${EMPTY_RESPONSE_MAX_RETRIES})...`,
);
}
await new Promise((resolve) => setTimeout(resolve, delayMs));
continue;
}
if (shouldRetryRunMetadataError(errorType, detail)) {
const attempt = llmApiErrorRetries + 1;
const baseDelayMs = 1000;

View File

@@ -5,6 +5,8 @@ import {
getPreStreamErrorAction,
isApprovalPendingError,
isConversationBusyError,
isEmptyResponseError,
isEmptyResponseRetryable,
isInvalidToolCallIdsError,
isNonRetryableProviderErrorDetail,
isRetryableProviderErrorDetail,
@@ -455,3 +457,90 @@ describe("shouldAttemptApprovalRecovery", () => {
expect(tuiResult).toBe(headlessResult);
});
});
// ── Empty response error detection (LET-7679) ────────────────────────
describe("isEmptyResponseError", () => {
test("detects empty content in response", () => {
expect(
isEmptyResponseError(
"LLM provider returned empty content in response (ID: msg_123, model: claude-opus-4-6)",
),
).toBe(true);
});
test("detects empty content in streaming response", () => {
expect(
isEmptyResponseError(
"LLM provider returned empty content in streaming response (model: claude-opus-4-6)",
),
).toBe(true);
});
test("case insensitive", () => {
expect(isEmptyResponseError("EMPTY CONTENT IN RESPONSE")).toBe(true);
});
test("returns false for unrelated errors", () => {
expect(isEmptyResponseError("Connection error")).toBe(false);
expect(isEmptyResponseError("Rate limit exceeded")).toBe(false);
});
test("returns false for non-string input", () => {
expect(isEmptyResponseError(null)).toBe(false);
expect(isEmptyResponseError(undefined)).toBe(false);
expect(isEmptyResponseError(123)).toBe(false);
});
});
describe("isEmptyResponseRetryable", () => {
test("true when llm_error and empty response detail and under retry budget", () => {
expect(
isEmptyResponseRetryable(
"llm_error",
"LLM provider returned empty content in response",
0,
2,
),
).toBe(true);
});
test("true at boundary (retries < max)", () => {
expect(
isEmptyResponseRetryable(
"llm_error",
"LLM provider returned empty content in streaming response",
1,
2,
),
).toBe(true);
});
test("false when retry budget exhausted", () => {
expect(
isEmptyResponseRetryable(
"llm_error",
"LLM provider returned empty content in response",
2,
2,
),
).toBe(false);
});
test("false when not llm_error type", () => {
expect(
isEmptyResponseRetryable(
"internal_error",
"LLM provider returned empty content in response",
0,
2,
),
).toBe(false);
});
test("false when not empty response error", () => {
expect(
isEmptyResponseRetryable("llm_error", "Connection error occurred", 0, 2),
).toBe(false);
});
});