diff --git a/src/agent/approval-recovery.ts b/src/agent/approval-recovery.ts index afb4613..f00c30a 100644 --- a/src/agent/approval-recovery.ts +++ b/src/agent/approval-recovery.ts @@ -21,6 +21,8 @@ export { getPreStreamErrorAction, isApprovalPendingError, isConversationBusyError, + isEmptyResponseError, + isEmptyResponseRetryable, isInvalidToolCallIdsError, isNonRetryableProviderErrorDetail, isRetryableProviderErrorDetail, diff --git a/src/agent/turn-recovery-policy.ts b/src/agent/turn-recovery-policy.ts index 74f4a43..6b1810c 100644 --- a/src/agent/turn-recovery-policy.ts +++ b/src/agent/turn-recovery-policy.ts @@ -16,6 +16,7 @@ const INVALID_TOOL_CALL_IDS_FRAGMENT = "invalid tool call ids"; const APPROVAL_PENDING_DETAIL_FRAGMENT = "waiting for approval"; const CONVERSATION_BUSY_DETAIL_FRAGMENT = "another request is currently being processed"; +const EMPTY_RESPONSE_DETAIL_FRAGMENT = "empty content in"; const RETRYABLE_PROVIDER_DETAIL_PATTERNS = [ "Anthropic API error", "OpenAI API error", @@ -94,6 +95,16 @@ export function isConversationBusyError(detail: unknown): boolean { return detail.toLowerCase().includes(CONVERSATION_BUSY_DETAIL_FRAGMENT); } +/** + * LLM returned an empty response (no content and no tool calls). + * This can happen with models like Opus 4.6 that occasionally return empty content. + * These are retryable with a cache-busting system message modification. + */ +export function isEmptyResponseError(detail: unknown): boolean { + if (typeof detail !== "string") return false; + return detail.toLowerCase().includes(EMPTY_RESPONSE_DETAIL_FRAGMENT); +} + /** Transient provider/network detail that is usually safe to retry. */ export function isRetryableProviderErrorDetail(detail: unknown): boolean { if (typeof detail !== "string") return false; @@ -131,6 +142,24 @@ export function shouldRetryRunMetadataError( return retryable429Detail || retryableDetail; } +/** + * Check if this is an empty response error that should be retried. + * + * Empty responses from models like Opus 4.6 are retryable. The caller + * decides whether to retry with the same input or append a system + * reminder nudge (typically on the last attempt). + */ +export function isEmptyResponseRetryable( + errorType: unknown, + detail: unknown, + emptyResponseRetries: number, + maxEmptyResponseRetries: number, +): boolean { + if (emptyResponseRetries >= maxEmptyResponseRetries) return false; + if (errorType !== "llm_error") return false; + return isEmptyResponseError(detail); +} + /** Retry decision for pre-stream send failures before any chunks are yielded. */ export function shouldRetryPreStreamTransientError(opts: { status: number | undefined; diff --git a/src/cli/App.tsx b/src/cli/App.tsx index a4dd6e9..94fd6a4 100644 --- a/src/cli/App.tsx +++ b/src/cli/App.tsx @@ -33,6 +33,7 @@ import { fetchRunErrorDetail, getPreStreamErrorAction, isApprovalPendingError, + isEmptyResponseRetryable, isInvalidToolCallIdsError, parseRetryAfterHeaderMs, rebuildInputWithFreshDenials, @@ -299,6 +300,10 @@ const EAGER_CANCEL = true; // Maximum retries for transient LLM API errors (matches headless.ts) const LLM_API_ERROR_MAX_RETRIES = 3; +// Retry config for empty response errors (Opus 4.6 SADs) +// Retry 1: same input. Retry 2: with system reminder nudge. +const EMPTY_RESPONSE_MAX_RETRIES = 2; + // Retry config for 409 "conversation busy" errors (exponential backoff) const CONVERSATION_BUSY_MAX_RETRIES = 3; // 2.5s -> 5s -> 10s const CONVERSATION_BUSY_RETRY_BASE_DELAY_MS = 2500; // 2.5 seconds @@ -1610,6 +1615,7 @@ export default function App({ // Retry counter for transient LLM API errors (ref for synchronous access in loop) const llmApiErrorRetriesRef = useRef(0); + const emptyResponseRetriesRef = useRef(0); // Retry counter for 409 "conversation busy" errors const conversationBusyRetriesRef = useRef(0); @@ -3360,6 +3366,7 @@ export default function App({ // Reset retry counters for new conversation turns (fresh budget per user message) if (!allowReentry) { llmApiErrorRetriesRef.current = 0; + emptyResponseRetriesRef.current = 0; conversationBusyRetriesRef.current = 0; } @@ -3954,6 +3961,7 @@ export default function App({ })(); closeTrajectorySegment(); llmApiErrorRetriesRef.current = 0; // Reset retry counter on success + emptyResponseRetriesRef.current = 0; conversationBusyRetriesRef.current = 0; lastDequeuedMessageRef.current = null; // Clear - message was processed successfully lastSentInputRef.current = null; // Clear - no recovery needed @@ -4800,6 +4808,55 @@ export default function App({ continue; } + // Empty LLM response retry (e.g. Opus 4.6 occasionally returns no content). + // Retry 1: same input unchanged. Retry 2: append system reminder nudging the model. + if ( + isEmptyResponseRetryable( + stopReasonToHandle === "llm_api_error" ? "llm_error" : undefined, + detailFromRun, + emptyResponseRetriesRef.current, + EMPTY_RESPONSE_MAX_RETRIES, + ) + ) { + emptyResponseRetriesRef.current += 1; + const attempt = emptyResponseRetriesRef.current; + const delayMs = 500 * attempt; + + // Only append a nudge on the last attempt + if (attempt >= EMPTY_RESPONSE_MAX_RETRIES) { + currentInput = [ + ...currentInput, + { + type: "message" as const, + role: "system" as const, + content: `The previous response was empty. Please provide a response with either text content or a tool call.`, + }, + ]; + } + + const statusId = uid("status"); + buffersRef.current.byId.set(statusId, { + kind: "status", + id: statusId, + lines: [ + `Empty LLM response, retrying (attempt ${attempt}/${EMPTY_RESPONSE_MAX_RETRIES})...`, + ], + }); + buffersRef.current.order.push(statusId); + refreshDerived(); + + await new Promise((resolve) => setTimeout(resolve, delayMs)); + + buffersRef.current.byId.delete(statusId); + buffersRef.current.order = buffersRef.current.order.filter( + (id) => id !== statusId, + ); + refreshDerived(); + + buffersRef.current.interrupted = false; + continue; + } + // Check if this is a retriable error (transient LLM API error) const retriable = await isRetriableError( stopReasonToHandle, @@ -4870,6 +4927,7 @@ export default function App({ // Reset retry counters on non-retriable error (or max retries exceeded) llmApiErrorRetriesRef.current = 0; + emptyResponseRetriesRef.current = 0; conversationBusyRetriesRef.current = 0; // Mark incomplete tool calls as finished to prevent stuck blinking UI diff --git a/src/headless.ts b/src/headless.ts index 421f567..e2495a6 100644 --- a/src/headless.ts +++ b/src/headless.ts @@ -14,6 +14,7 @@ import { fetchRunErrorDetail, getPreStreamErrorAction, isApprovalPendingError, + isEmptyResponseRetryable, isInvalidToolCallIdsError, parseRetryAfterHeaderMs, shouldRetryRunMetadataError, @@ -110,6 +111,10 @@ import { // caller to manually resubmit the prompt. const LLM_API_ERROR_MAX_RETRIES = 3; +// Retry config for empty response errors (Opus 4.6 SADs) +// Retry 1: same input. Retry 2: with system reminder nudge. +const EMPTY_RESPONSE_MAX_RETRIES = 2; + // Retry config for 409 "conversation busy" errors const CONVERSATION_BUSY_MAX_RETRIES = 1; // Only retry once, fail on 2nd 409 const CONVERSATION_BUSY_RETRY_DELAY_MS = 2500; // 2.5 seconds @@ -1443,6 +1448,7 @@ ${SYSTEM_REMINDER_CLOSE} // Track lastRunId outside the while loop so it's available in catch block let lastKnownRunId: string | null = null; let llmApiErrorRetries = 0; + let emptyResponseRetries = 0; let conversationBusyRetries = 0; markMilestone("HEADLESS_FIRST_STREAM_START"); measureSinceMilestone("headless-setup-total", "HEADLESS_CLIENT_READY"); @@ -1792,6 +1798,7 @@ ${SYSTEM_REMINDER_CLOSE} if (stopReason === "end_turn") { // Reset retry counters on success llmApiErrorRetries = 0; + emptyResponseRetries = 0; conversationBusyRetries = 0; break; } @@ -2026,6 +2033,53 @@ ${SYSTEM_REMINDER_CLOSE} const detail = metaError?.detail ?? metaError?.error?.detail ?? ""; + // Special handling for empty response errors (Opus 4.6 SADs) + // Empty LLM response retry (e.g. Opus 4.6 occasionally returns no content). + // Retry 1: same input unchanged. Retry 2: append system reminder nudging the model. + if ( + isEmptyResponseRetryable( + errorType, + detail, + emptyResponseRetries, + EMPTY_RESPONSE_MAX_RETRIES, + ) + ) { + const attempt = emptyResponseRetries + 1; + const delayMs = 500 * attempt; + + emptyResponseRetries = attempt; + + // Only append a nudge on the last attempt + if (attempt >= EMPTY_RESPONSE_MAX_RETRIES) { + const nudgeMessage: MessageCreate = { + role: "system", + content: `The previous response was empty. Please provide a response with either text content or a tool call.`, + }; + currentInput = [...currentInput, nudgeMessage]; + } + + if (outputFormat === "stream-json") { + const retryMsg: RetryMessage = { + type: "retry", + reason: "llm_api_error", + attempt, + max_attempts: EMPTY_RESPONSE_MAX_RETRIES, + delay_ms: delayMs, + run_id: lastRunId ?? undefined, + session_id: sessionId, + uuid: `retry-empty-${lastRunId || randomUUID()}`, + }; + console.log(JSON.stringify(retryMsg)); + } else { + console.error( + `Empty LLM response, retrying (attempt ${attempt} of ${EMPTY_RESPONSE_MAX_RETRIES})...`, + ); + } + + await new Promise((resolve) => setTimeout(resolve, delayMs)); + continue; + } + if (shouldRetryRunMetadataError(errorType, detail)) { const attempt = llmApiErrorRetries + 1; const baseDelayMs = 1000; diff --git a/src/tests/turn-recovery-policy.test.ts b/src/tests/turn-recovery-policy.test.ts index 26847c8..bf8d521 100644 --- a/src/tests/turn-recovery-policy.test.ts +++ b/src/tests/turn-recovery-policy.test.ts @@ -5,6 +5,8 @@ import { getPreStreamErrorAction, isApprovalPendingError, isConversationBusyError, + isEmptyResponseError, + isEmptyResponseRetryable, isInvalidToolCallIdsError, isNonRetryableProviderErrorDetail, isRetryableProviderErrorDetail, @@ -455,3 +457,90 @@ describe("shouldAttemptApprovalRecovery", () => { expect(tuiResult).toBe(headlessResult); }); }); + +// ── Empty response error detection (LET-7679) ──────────────────────── + +describe("isEmptyResponseError", () => { + test("detects empty content in response", () => { + expect( + isEmptyResponseError( + "LLM provider returned empty content in response (ID: msg_123, model: claude-opus-4-6)", + ), + ).toBe(true); + }); + + test("detects empty content in streaming response", () => { + expect( + isEmptyResponseError( + "LLM provider returned empty content in streaming response (model: claude-opus-4-6)", + ), + ).toBe(true); + }); + + test("case insensitive", () => { + expect(isEmptyResponseError("EMPTY CONTENT IN RESPONSE")).toBe(true); + }); + + test("returns false for unrelated errors", () => { + expect(isEmptyResponseError("Connection error")).toBe(false); + expect(isEmptyResponseError("Rate limit exceeded")).toBe(false); + }); + + test("returns false for non-string input", () => { + expect(isEmptyResponseError(null)).toBe(false); + expect(isEmptyResponseError(undefined)).toBe(false); + expect(isEmptyResponseError(123)).toBe(false); + }); +}); + +describe("isEmptyResponseRetryable", () => { + test("true when llm_error and empty response detail and under retry budget", () => { + expect( + isEmptyResponseRetryable( + "llm_error", + "LLM provider returned empty content in response", + 0, + 2, + ), + ).toBe(true); + }); + + test("true at boundary (retries < max)", () => { + expect( + isEmptyResponseRetryable( + "llm_error", + "LLM provider returned empty content in streaming response", + 1, + 2, + ), + ).toBe(true); + }); + + test("false when retry budget exhausted", () => { + expect( + isEmptyResponseRetryable( + "llm_error", + "LLM provider returned empty content in response", + 2, + 2, + ), + ).toBe(false); + }); + + test("false when not llm_error type", () => { + expect( + isEmptyResponseRetryable( + "internal_error", + "LLM provider returned empty content in response", + 0, + 2, + ), + ).toBe(false); + }); + + test("false when not empty response error", () => { + expect( + isEmptyResponseRetryable("llm_error", "Connection error occurred", 0, 2), + ).toBe(false); + }); +});