From 150ff0d9989c66acce35aa77aa8f360626e216e6 Mon Sep 17 00:00:00 2001 From: Charles Packer Date: Fri, 20 Feb 2026 16:30:20 -0800 Subject: [PATCH] fix: no retry on quota limit errors (#1072) --- src/agent/turn-recovery-policy.ts | 36 +++++++++++++---- src/cli/helpers/errorFormatter.ts | 22 +++++++---- src/tests/cli/errorFormatter.test.ts | 54 ++++++++++++++++++++++++++ src/tests/turn-recovery-policy.test.ts | 19 +++++++++ 4 files changed, 116 insertions(+), 15 deletions(-) diff --git a/src/agent/turn-recovery-policy.ts b/src/agent/turn-recovery-policy.ts index fe428ad..74f4a43 100644 --- a/src/agent/turn-recovery-policy.ts +++ b/src/agent/turn-recovery-policy.ts @@ -47,10 +47,33 @@ const NON_RETRYABLE_PROVIDER_DETAIL_PATTERNS = [ "context_length_exceeded", "invalid_encrypted_content", ]; -const NON_RETRYABLE_429_REASONS = ["agents-limit-exceeded"]; +const NON_RETRYABLE_429_REASONS = [ + "agents-limit-exceeded", + "exceeded-quota", + "free-usage-exceeded", + "premium-usage-exceeded", + "standard-usage-exceeded", + "basic-usage-exceeded", + "not-enough-credits", +]; +const NON_RETRYABLE_QUOTA_DETAIL_PATTERNS = [ + "hosted model usage limit", + "out of credits", +]; const NON_RETRYABLE_4XX_PATTERN = /Error code:\s*4(0[0-8]|1\d|2\d|3\d|4\d|51)/i; const RETRYABLE_429_PATTERN = /Error code:\s*429|rate limit|too many requests/i; +function hasNonRetryableQuotaDetail(detail: unknown): boolean { + if (typeof detail !== "string") return false; + const normalized = detail.toLowerCase(); + return ( + NON_RETRYABLE_429_REASONS.some((reason) => normalized.includes(reason)) || + NON_RETRYABLE_QUOTA_DETAIL_PATTERNS.some((pattern) => + normalized.includes(pattern), + ) + ); +} + // ── Classifiers ───────────────────────────────────────────────────── /** Tool call IDs don't match what the server expects. */ @@ -96,11 +119,13 @@ export function shouldRetryRunMetadataError( detail: unknown, ): boolean { const explicitLlmError = errorType === "llm_error"; + const nonRetryableQuotaDetail = hasNonRetryableQuotaDetail(detail); const retryable429Detail = typeof detail === "string" && RETRYABLE_429_PATTERN.test(detail); const retryableDetail = isRetryableProviderErrorDetail(detail); const nonRetryableDetail = isNonRetryableProviderErrorDetail(detail); + if (nonRetryableQuotaDetail) return false; if (nonRetryableDetail && !retryable429Detail) return false; if (explicitLlmError) return true; return retryable429Detail || retryableDetail; @@ -112,14 +137,9 @@ export function shouldRetryPreStreamTransientError(opts: { detail: unknown; }): boolean { const { status, detail } = opts; + if (hasNonRetryableQuotaDetail(detail)) return false; + if (status === 429) { - // Don't retry non-recoverable 429s (e.g. agent limit reached) - if ( - typeof detail === "string" && - NON_RETRYABLE_429_REASONS.some((r) => detail.includes(r)) - ) { - return false; - } return true; } if (status !== undefined && status >= 500) return true; diff --git a/src/cli/helpers/errorFormatter.ts b/src/cli/helpers/errorFormatter.ts index 0da58ad..38a5b36 100644 --- a/src/cli/helpers/errorFormatter.ts +++ b/src/cli/helpers/errorFormatter.ts @@ -167,6 +167,19 @@ function isCreditExhaustedError(e: APIError, reasons?: string[]): boolean { return hasErrorReason(e, "not-enough-credits", reasons); } +function getTierUsageLimitMessage(reasons: string[]): string | undefined { + if (reasons.includes("premium-usage-exceeded")) { + return `You've reached your Premium model usage limit. Try switching to Standard or Basic hosted models with /model. View your plan and usage at ${LETTA_USAGE_URL}, or connect your own provider keys with /connect.`; + } + if (reasons.includes("standard-usage-exceeded")) { + return `You've reached your Standard model usage limit. Try switching to Basic hosted models with /model. View your plan and usage at ${LETTA_USAGE_URL}, or connect your own provider keys with /connect.`; + } + if (reasons.includes("basic-usage-exceeded")) { + return `You've reached your Basic model usage limit. Try switching models with /model, view your plan and usage at ${LETTA_USAGE_URL}, or connect your own provider keys with /connect.`; + } + return undefined; +} + const ENCRYPTED_CONTENT_HINT = [ "", "This occurs when the conversation contains messages with encrypted", @@ -356,13 +369,8 @@ export function formatErrorDetails( return `Your account is out of credits for hosted inference. Add credits, enable auto-recharge, or upgrade at ${LETTA_USAGE_URL}. You can also connect your own provider keys with /connect.`; } - if ( - hasErrorReason(e, "premium-usage-exceeded", reasons) || - hasErrorReason(e, "standard-usage-exceeded", reasons) || - hasErrorReason(e, "basic-usage-exceeded", reasons) - ) { - return `You've reached your hosted model usage limit. View your plan and usage at ${LETTA_USAGE_URL}, or connect your own provider keys with /connect.`; - } + const tierUsageLimitMsg = getTierUsageLimitMessage(reasons); + if (tierUsageLimitMsg) return tierUsageLimitMsg; if (hasErrorReason(e, "byok-not-available-on-free-tier", reasons)) { const { modelDisplayName } = getErrorContext(); diff --git a/src/tests/cli/errorFormatter.test.ts b/src/tests/cli/errorFormatter.test.ts index 1a5c5ba..f1a0b62 100644 --- a/src/tests/cli/errorFormatter.test.ts +++ b/src/tests/cli/errorFormatter.test.ts @@ -182,6 +182,60 @@ describe("formatErrorDetails", () => { expect(message).toContain("/model"); }); + test("uses premium-specific guidance for premium-usage-exceeded", () => { + const error = new APIError( + 429, + { + error: "Rate limited", + reasons: ["premium-usage-exceeded"], + }, + undefined, + new Headers(), + ); + + const message = formatErrorDetails(error); + + expect(message).toContain("Premium model usage limit"); + expect(message).toContain("Standard or Basic hosted models"); + expect(message).toContain("/model"); + expect(message).not.toContain("hosted model usage limit"); + }); + + test("uses standard-specific guidance for standard-usage-exceeded", () => { + const error = new APIError( + 429, + { + error: "Rate limited", + reasons: ["standard-usage-exceeded"], + }, + undefined, + new Headers(), + ); + + const message = formatErrorDetails(error); + + expect(message).toContain("Standard model usage limit"); + expect(message).toContain("Basic hosted models"); + expect(message).toContain("/model"); + }); + + test("uses basic-specific guidance for basic-usage-exceeded", () => { + const error = new APIError( + 429, + { + error: "Rate limited", + reasons: ["basic-usage-exceeded"], + }, + undefined, + new Headers(), + ); + + const message = formatErrorDetails(error); + + expect(message).toContain("Basic model usage limit"); + expect(message).toContain("/model"); + }); + test("formats Z.ai error from APIError with embedded error code", () => { const error = new APIError( 429, diff --git a/src/tests/turn-recovery-policy.test.ts b/src/tests/turn-recovery-policy.test.ts index aa7b4dc..26847c8 100644 --- a/src/tests/turn-recovery-policy.test.ts +++ b/src/tests/turn-recovery-policy.test.ts @@ -195,6 +195,18 @@ describe("provider detail retry helpers", () => { "OpenAI API error: invalid_request_error", ), ).toBe(false); + expect( + shouldRetryRunMetadataError( + "llm_error", + '429 {"error":"Rate limited","reasons":["exceeded-quota"]}', + ), + ).toBe(false); + expect( + shouldRetryRunMetadataError( + "llm_error", + "You've reached your hosted model usage limit.", + ), + ).toBe(false); }); test("pre-stream transient classifier handles status and detail", () => { @@ -218,6 +230,13 @@ describe("provider detail retry helpers", () => { '429 {"error":"Rate limited","reasons":["agents-limit-exceeded"]}', }), ).toBe(false); + expect( + shouldRetryPreStreamTransientError({ + status: 429, + detail: + '429 {"error":"Rate limited","reasons":["premium-usage-exceeded"]}', + }), + ).toBe(false); expect( shouldRetryPreStreamTransientError({ status: 401,