feat: retry on empty LLM response (LET-7679) (#1130)

Co-authored-by: letta-code <248085862+letta-code@users.noreply.github.com> Co-authored-by: Letta <noreply@letta.com>
2026-02-25 11:17:55 -08:00
parent 0023b9c7e5
commit be5fbfca74
5 changed files with 232 additions and 0 deletions
--- a/src/agent/approval-recovery.ts
+++ b/src/agent/approval-recovery.ts
@@ -21,6 +21,8 @@ export {
  getPreStreamErrorAction,
  isApprovalPendingError,
  isConversationBusyError,
+  isEmptyResponseError,
+  isEmptyResponseRetryable,
  isInvalidToolCallIdsError,
  isNonRetryableProviderErrorDetail,
  isRetryableProviderErrorDetail,
--- a/src/agent/turn-recovery-policy.ts
+++ b/src/agent/turn-recovery-policy.ts
@@ -16,6 +16,7 @@ const INVALID_TOOL_CALL_IDS_FRAGMENT = "invalid tool call ids";
 const APPROVAL_PENDING_DETAIL_FRAGMENT = "waiting for approval";
 const CONVERSATION_BUSY_DETAIL_FRAGMENT =
  "another request is currently being processed";
+const EMPTY_RESPONSE_DETAIL_FRAGMENT = "empty content in";
 const RETRYABLE_PROVIDER_DETAIL_PATTERNS = [
  "Anthropic API error",
  "OpenAI API error",
@@ -94,6 +95,16 @@ export function isConversationBusyError(detail: unknown): boolean {
  return detail.toLowerCase().includes(CONVERSATION_BUSY_DETAIL_FRAGMENT);
 }

+/**
+ * LLM returned an empty response (no content and no tool calls).
+ * This can happen with models like Opus 4.6 that occasionally return empty content.
+ * These are retryable with a cache-busting system message modification.
+ */
+export function isEmptyResponseError(detail: unknown): boolean {
+  if (typeof detail !== "string") return false;
+  return detail.toLowerCase().includes(EMPTY_RESPONSE_DETAIL_FRAGMENT);
+}
+
 /** Transient provider/network detail that is usually safe to retry. */
 export function isRetryableProviderErrorDetail(detail: unknown): boolean {
  if (typeof detail !== "string") return false;
@@ -131,6 +142,24 @@ export function shouldRetryRunMetadataError(
  return retryable429Detail || retryableDetail;
 }

+/**
+ * Check if this is an empty response error that should be retried.
+ *
+ * Empty responses from models like Opus 4.6 are retryable. The caller
+ * decides whether to retry with the same input or append a system
+ * reminder nudge (typically on the last attempt).
+ */
+export function isEmptyResponseRetryable(
+  errorType: unknown,
+  detail: unknown,
+  emptyResponseRetries: number,
+  maxEmptyResponseRetries: number,
+): boolean {
+  if (emptyResponseRetries >= maxEmptyResponseRetries) return false;
+  if (errorType !== "llm_error") return false;
+  return isEmptyResponseError(detail);
+}
+
 /** Retry decision for pre-stream send failures before any chunks are yielded. */
 export function shouldRetryPreStreamTransientError(opts: {
  status: number | undefined;
--- a/src/cli/App.tsx
+++ b/src/cli/App.tsx
@@ -33,6 +33,7 @@ import {
  fetchRunErrorDetail,
  getPreStreamErrorAction,
  isApprovalPendingError,
+  isEmptyResponseRetryable,
  isInvalidToolCallIdsError,
  parseRetryAfterHeaderMs,
  rebuildInputWithFreshDenials,
@@ -299,6 +300,10 @@ const EAGER_CANCEL = true;
 // Maximum retries for transient LLM API errors (matches headless.ts)
 const LLM_API_ERROR_MAX_RETRIES = 3;

+// Retry config for empty response errors (Opus 4.6 SADs)
+// Retry 1: same input. Retry 2: with system reminder nudge.
+const EMPTY_RESPONSE_MAX_RETRIES = 2;
+
 // Retry config for 409 "conversation busy" errors (exponential backoff)
 const CONVERSATION_BUSY_MAX_RETRIES = 3; // 2.5s -> 5s -> 10s
 const CONVERSATION_BUSY_RETRY_BASE_DELAY_MS = 2500; // 2.5 seconds
@@ -1610,6 +1615,7 @@ export default function App({

  // Retry counter for transient LLM API errors (ref for synchronous access in loop)
  const llmApiErrorRetriesRef = useRef(0);
+  const emptyResponseRetriesRef = useRef(0);

  // Retry counter for 409 "conversation busy" errors
  const conversationBusyRetriesRef = useRef(0);
@@ -3360,6 +3366,7 @@ export default function App({
      // Reset retry counters for new conversation turns (fresh budget per user message)
      if (!allowReentry) {
        llmApiErrorRetriesRef.current = 0;
+        emptyResponseRetriesRef.current = 0;
        conversationBusyRetriesRef.current = 0;
      }

@@ -3954,6 +3961,7 @@ export default function App({
            })();
            closeTrajectorySegment();
            llmApiErrorRetriesRef.current = 0; // Reset retry counter on success
+            emptyResponseRetriesRef.current = 0;
            conversationBusyRetriesRef.current = 0;
            lastDequeuedMessageRef.current = null; // Clear - message was processed successfully
            lastSentInputRef.current = null; // Clear - no recovery needed
@@ -4800,6 +4808,55 @@ export default function App({
            continue;
          }

+          // Empty LLM response retry (e.g. Opus 4.6 occasionally returns no content).
+          // Retry 1: same input unchanged. Retry 2: append system reminder nudging the model.
+          if (
+            isEmptyResponseRetryable(
+              stopReasonToHandle === "llm_api_error" ? "llm_error" : undefined,
+              detailFromRun,
+              emptyResponseRetriesRef.current,
+              EMPTY_RESPONSE_MAX_RETRIES,
+            )
+          ) {
+            emptyResponseRetriesRef.current += 1;
+            const attempt = emptyResponseRetriesRef.current;
+            const delayMs = 500 * attempt;
+
+            // Only append a nudge on the last attempt
+            if (attempt >= EMPTY_RESPONSE_MAX_RETRIES) {
+              currentInput = [
+                ...currentInput,
+                {
+                  type: "message" as const,
+                  role: "system" as const,
+                  content: `<system-reminder>The previous response was empty. Please provide a response with either text content or a tool call.</system-reminder>`,
+                },
+              ];
+            }
+
+            const statusId = uid("status");
+            buffersRef.current.byId.set(statusId, {
+              kind: "status",
+              id: statusId,
+              lines: [
+                `Empty LLM response, retrying (attempt ${attempt}/${EMPTY_RESPONSE_MAX_RETRIES})...`,
+              ],
+            });
+            buffersRef.current.order.push(statusId);
+            refreshDerived();
+
+            await new Promise((resolve) => setTimeout(resolve, delayMs));
+
+            buffersRef.current.byId.delete(statusId);
+            buffersRef.current.order = buffersRef.current.order.filter(
+              (id) => id !== statusId,
+            );
+            refreshDerived();
+
+            buffersRef.current.interrupted = false;
+            continue;
+          }
+
          // Check if this is a retriable error (transient LLM API error)
          const retriable = await isRetriableError(
            stopReasonToHandle,
@@ -4870,6 +4927,7 @@ export default function App({

          // Reset retry counters on non-retriable error (or max retries exceeded)
          llmApiErrorRetriesRef.current = 0;
+          emptyResponseRetriesRef.current = 0;
          conversationBusyRetriesRef.current = 0;

          // Mark incomplete tool calls as finished to prevent stuck blinking UI
--- a/src/headless.ts
+++ b/src/headless.ts
@@ -14,6 +14,7 @@ import {
  fetchRunErrorDetail,
  getPreStreamErrorAction,
  isApprovalPendingError,
+  isEmptyResponseRetryable,
  isInvalidToolCallIdsError,
  parseRetryAfterHeaderMs,
  shouldRetryRunMetadataError,
@@ -110,6 +111,10 @@ import {
 // caller to manually resubmit the prompt.
 const LLM_API_ERROR_MAX_RETRIES = 3;

+// Retry config for empty response errors (Opus 4.6 SADs)
+// Retry 1: same input. Retry 2: with system reminder nudge.
+const EMPTY_RESPONSE_MAX_RETRIES = 2;
+
 // Retry config for 409 "conversation busy" errors
 const CONVERSATION_BUSY_MAX_RETRIES = 1; // Only retry once, fail on 2nd 409
 const CONVERSATION_BUSY_RETRY_DELAY_MS = 2500; // 2.5 seconds
@@ -1443,6 +1448,7 @@ ${SYSTEM_REMINDER_CLOSE}
  // Track lastRunId outside the while loop so it's available in catch block
  let lastKnownRunId: string | null = null;
  let llmApiErrorRetries = 0;
+  let emptyResponseRetries = 0;
  let conversationBusyRetries = 0;
  markMilestone("HEADLESS_FIRST_STREAM_START");
  measureSinceMilestone("headless-setup-total", "HEADLESS_CLIENT_READY");
@@ -1792,6 +1798,7 @@ ${SYSTEM_REMINDER_CLOSE}
      if (stopReason === "end_turn") {
        // Reset retry counters on success
        llmApiErrorRetries = 0;
+        emptyResponseRetries = 0;
        conversationBusyRetries = 0;
        break;
      }
@@ -2026,6 +2033,53 @@ ${SYSTEM_REMINDER_CLOSE}

          const detail = metaError?.detail ?? metaError?.error?.detail ?? "";

+          // Special handling for empty response errors (Opus 4.6 SADs)
+          // Empty LLM response retry (e.g. Opus 4.6 occasionally returns no content).
+          // Retry 1: same input unchanged. Retry 2: append system reminder nudging the model.
+          if (
+            isEmptyResponseRetryable(
+              errorType,
+              detail,
+              emptyResponseRetries,
+              EMPTY_RESPONSE_MAX_RETRIES,
+            )
+          ) {
+            const attempt = emptyResponseRetries + 1;
+            const delayMs = 500 * attempt;
+
+            emptyResponseRetries = attempt;
+
+            // Only append a nudge on the last attempt
+            if (attempt >= EMPTY_RESPONSE_MAX_RETRIES) {
+              const nudgeMessage: MessageCreate = {
+                role: "system",
+                content: `<system-reminder>The previous response was empty. Please provide a response with either text content or a tool call.</system-reminder>`,
+              };
+              currentInput = [...currentInput, nudgeMessage];
+            }
+
+            if (outputFormat === "stream-json") {
+              const retryMsg: RetryMessage = {
+                type: "retry",
+                reason: "llm_api_error",
+                attempt,
+                max_attempts: EMPTY_RESPONSE_MAX_RETRIES,
+                delay_ms: delayMs,
+                run_id: lastRunId ?? undefined,
+                session_id: sessionId,
+                uuid: `retry-empty-${lastRunId || randomUUID()}`,
+              };
+              console.log(JSON.stringify(retryMsg));
+            } else {
+              console.error(
+                `Empty LLM response, retrying (attempt ${attempt} of ${EMPTY_RESPONSE_MAX_RETRIES})...`,
+              );
+            }
+
+            await new Promise((resolve) => setTimeout(resolve, delayMs));
+            continue;
+          }
+
          if (shouldRetryRunMetadataError(errorType, detail)) {
            const attempt = llmApiErrorRetries + 1;
            const baseDelayMs = 1000;
--- a/src/tests/turn-recovery-policy.test.ts
+++ b/src/tests/turn-recovery-policy.test.ts
@@ -5,6 +5,8 @@ import {
  getPreStreamErrorAction,
  isApprovalPendingError,
  isConversationBusyError,
+  isEmptyResponseError,
+  isEmptyResponseRetryable,
  isInvalidToolCallIdsError,
  isNonRetryableProviderErrorDetail,
  isRetryableProviderErrorDetail,
@@ -455,3 +457,90 @@ describe("shouldAttemptApprovalRecovery", () => {
    expect(tuiResult).toBe(headlessResult);
  });
 });
+
+// ── Empty response error detection (LET-7679) ────────────────────────
+
+describe("isEmptyResponseError", () => {
+  test("detects empty content in response", () => {
+    expect(
+      isEmptyResponseError(
+        "LLM provider returned empty content in response (ID: msg_123, model: claude-opus-4-6)",
+      ),
+    ).toBe(true);
+  });
+
+  test("detects empty content in streaming response", () => {
+    expect(
+      isEmptyResponseError(
+        "LLM provider returned empty content in streaming response (model: claude-opus-4-6)",
+      ),
+    ).toBe(true);
+  });
+
+  test("case insensitive", () => {
+    expect(isEmptyResponseError("EMPTY CONTENT IN RESPONSE")).toBe(true);
+  });
+
+  test("returns false for unrelated errors", () => {
+    expect(isEmptyResponseError("Connection error")).toBe(false);
+    expect(isEmptyResponseError("Rate limit exceeded")).toBe(false);
+  });
+
+  test("returns false for non-string input", () => {
+    expect(isEmptyResponseError(null)).toBe(false);
+    expect(isEmptyResponseError(undefined)).toBe(false);
+    expect(isEmptyResponseError(123)).toBe(false);
+  });
+});
+
+describe("isEmptyResponseRetryable", () => {
+  test("true when llm_error and empty response detail and under retry budget", () => {
+    expect(
+      isEmptyResponseRetryable(
+        "llm_error",
+        "LLM provider returned empty content in response",
+        0,
+        2,
+      ),
+    ).toBe(true);
+  });
+
+  test("true at boundary (retries < max)", () => {
+    expect(
+      isEmptyResponseRetryable(
+        "llm_error",
+        "LLM provider returned empty content in streaming response",
+        1,
+        2,
+      ),
+    ).toBe(true);
+  });
+
+  test("false when retry budget exhausted", () => {
+    expect(
+      isEmptyResponseRetryable(
+        "llm_error",
+        "LLM provider returned empty content in response",
+        2,
+        2,
+      ),
+    ).toBe(false);
+  });
+
+  test("false when not llm_error type", () => {
+    expect(
+      isEmptyResponseRetryable(
+        "internal_error",
+        "LLM provider returned empty content in response",
+        0,
+        2,
+      ),
+    ).toBe(false);
+  });
+
+  test("false when not empty response error", () => {
+    expect(
+      isEmptyResponseRetryable("llm_error", "Connection error occurred", 0, 2),
+    ).toBe(false);
+  });
+});