From 150ff0d9989c66acce35aa77aa8f360626e216e6 Mon Sep 17 00:00:00 2001
From: Charles Packer <packercharles@gmail.com>
Date: Fri, 20 Feb 2026 16:30:20 -0800
Subject: [PATCH] fix: no retry on quota limit errors (#1072)

---
 src/agent/turn-recovery-policy.ts      | 36 +++++++++++++----
 src/cli/helpers/errorFormatter.ts      | 22 +++++++----
 src/tests/cli/errorFormatter.test.ts   | 54 ++++++++++++++++++++++++++
 src/tests/turn-recovery-policy.test.ts | 19 +++++++++
 4 files changed, 116 insertions(+), 15 deletions(-)

diff --git a/src/agent/turn-recovery-policy.ts b/src/agent/turn-recovery-policy.ts
index fe428ad..74f4a43 100644
--- a/src/agent/turn-recovery-policy.ts
+++ b/src/agent/turn-recovery-policy.ts
@@ -47,10 +47,33 @@ const NON_RETRYABLE_PROVIDER_DETAIL_PATTERNS = [
   "context_length_exceeded",
   "invalid_encrypted_content",
 ];
-const NON_RETRYABLE_429_REASONS = ["agents-limit-exceeded"];
+const NON_RETRYABLE_429_REASONS = [
+  "agents-limit-exceeded",
+  "exceeded-quota",
+  "free-usage-exceeded",
+  "premium-usage-exceeded",
+  "standard-usage-exceeded",
+  "basic-usage-exceeded",
+  "not-enough-credits",
+];
+const NON_RETRYABLE_QUOTA_DETAIL_PATTERNS = [
+  "hosted model usage limit",
+  "out of credits",
+];
 const NON_RETRYABLE_4XX_PATTERN = /Error code:\s*4(0[0-8]|1\d|2\d|3\d|4\d|51)/i;
 const RETRYABLE_429_PATTERN = /Error code:\s*429|rate limit|too many requests/i;
 
+function hasNonRetryableQuotaDetail(detail: unknown): boolean {
+  if (typeof detail !== "string") return false;
+  const normalized = detail.toLowerCase();
+  return (
+    NON_RETRYABLE_429_REASONS.some((reason) => normalized.includes(reason)) ||
+    NON_RETRYABLE_QUOTA_DETAIL_PATTERNS.some((pattern) =>
+      normalized.includes(pattern),
+    )
+  );
+}
+
 // ── Classifiers ─────────────────────────────────────────────────────
 
 /** Tool call IDs don't match what the server expects. */
@@ -96,11 +119,13 @@ export function shouldRetryRunMetadataError(
   detail: unknown,
 ): boolean {
   const explicitLlmError = errorType === "llm_error";
+  const nonRetryableQuotaDetail = hasNonRetryableQuotaDetail(detail);
   const retryable429Detail =
     typeof detail === "string" && RETRYABLE_429_PATTERN.test(detail);
   const retryableDetail = isRetryableProviderErrorDetail(detail);
   const nonRetryableDetail = isNonRetryableProviderErrorDetail(detail);
 
+  if (nonRetryableQuotaDetail) return false;
   if (nonRetryableDetail && !retryable429Detail) return false;
   if (explicitLlmError) return true;
   return retryable429Detail || retryableDetail;
@@ -112,14 +137,9 @@ export function shouldRetryPreStreamTransientError(opts: {
   detail: unknown;
 }): boolean {
   const { status, detail } = opts;
+  if (hasNonRetryableQuotaDetail(detail)) return false;
+
   if (status === 429) {
-    // Don't retry non-recoverable 429s (e.g. agent limit reached)
-    if (
-      typeof detail === "string" &&
-      NON_RETRYABLE_429_REASONS.some((r) => detail.includes(r))
-    ) {
-      return false;
-    }
     return true;
   }
   if (status !== undefined && status >= 500) return true;
diff --git a/src/cli/helpers/errorFormatter.ts b/src/cli/helpers/errorFormatter.ts
index 0da58ad..38a5b36 100644
--- a/src/cli/helpers/errorFormatter.ts
+++ b/src/cli/helpers/errorFormatter.ts
@@ -167,6 +167,19 @@ function isCreditExhaustedError(e: APIError, reasons?: string[]): boolean {
   return hasErrorReason(e, "not-enough-credits", reasons);
 }
 
+function getTierUsageLimitMessage(reasons: string[]): string | undefined {
+  if (reasons.includes("premium-usage-exceeded")) {
+    return `You've reached your Premium model usage limit. Try switching to Standard or Basic hosted models with /model. View your plan and usage at ${LETTA_USAGE_URL}, or connect your own provider keys with /connect.`;
+  }
+  if (reasons.includes("standard-usage-exceeded")) {
+    return `You've reached your Standard model usage limit. Try switching to Basic hosted models with /model. View your plan and usage at ${LETTA_USAGE_URL}, or connect your own provider keys with /connect.`;
+  }
+  if (reasons.includes("basic-usage-exceeded")) {
+    return `You've reached your Basic model usage limit. Try switching models with /model, view your plan and usage at ${LETTA_USAGE_URL}, or connect your own provider keys with /connect.`;
+  }
+  return undefined;
+}
+
 const ENCRYPTED_CONTENT_HINT = [
   "",
   "This occurs when the conversation contains messages with encrypted",
@@ -356,13 +369,8 @@ export function formatErrorDetails(
       return `Your account is out of credits for hosted inference. Add credits, enable auto-recharge, or upgrade at ${LETTA_USAGE_URL}. You can also connect your own provider keys with /connect.`;
     }
 
-    if (
-      hasErrorReason(e, "premium-usage-exceeded", reasons) ||
-      hasErrorReason(e, "standard-usage-exceeded", reasons) ||
-      hasErrorReason(e, "basic-usage-exceeded", reasons)
-    ) {
-      return `You've reached your hosted model usage limit. View your plan and usage at ${LETTA_USAGE_URL}, or connect your own provider keys with /connect.`;
-    }
+    const tierUsageLimitMsg = getTierUsageLimitMessage(reasons);
+    if (tierUsageLimitMsg) return tierUsageLimitMsg;
 
     if (hasErrorReason(e, "byok-not-available-on-free-tier", reasons)) {
       const { modelDisplayName } = getErrorContext();
diff --git a/src/tests/cli/errorFormatter.test.ts b/src/tests/cli/errorFormatter.test.ts
index 1a5c5ba..f1a0b62 100644
--- a/src/tests/cli/errorFormatter.test.ts
+++ b/src/tests/cli/errorFormatter.test.ts
@@ -182,6 +182,60 @@ describe("formatErrorDetails", () => {
     expect(message).toContain("/model");
   });
 
+  test("uses premium-specific guidance for premium-usage-exceeded", () => {
+    const error = new APIError(
+      429,
+      {
+        error: "Rate limited",
+        reasons: ["premium-usage-exceeded"],
+      },
+      undefined,
+      new Headers(),
+    );
+
+    const message = formatErrorDetails(error);
+
+    expect(message).toContain("Premium model usage limit");
+    expect(message).toContain("Standard or Basic hosted models");
+    expect(message).toContain("/model");
+    expect(message).not.toContain("hosted model usage limit");
+  });
+
+  test("uses standard-specific guidance for standard-usage-exceeded", () => {
+    const error = new APIError(
+      429,
+      {
+        error: "Rate limited",
+        reasons: ["standard-usage-exceeded"],
+      },
+      undefined,
+      new Headers(),
+    );
+
+    const message = formatErrorDetails(error);
+
+    expect(message).toContain("Standard model usage limit");
+    expect(message).toContain("Basic hosted models");
+    expect(message).toContain("/model");
+  });
+
+  test("uses basic-specific guidance for basic-usage-exceeded", () => {
+    const error = new APIError(
+      429,
+      {
+        error: "Rate limited",
+        reasons: ["basic-usage-exceeded"],
+      },
+      undefined,
+      new Headers(),
+    );
+
+    const message = formatErrorDetails(error);
+
+    expect(message).toContain("Basic model usage limit");
+    expect(message).toContain("/model");
+  });
+
   test("formats Z.ai error from APIError with embedded error code", () => {
     const error = new APIError(
       429,
diff --git a/src/tests/turn-recovery-policy.test.ts b/src/tests/turn-recovery-policy.test.ts
index aa7b4dc..26847c8 100644
--- a/src/tests/turn-recovery-policy.test.ts
+++ b/src/tests/turn-recovery-policy.test.ts
@@ -195,6 +195,18 @@ describe("provider detail retry helpers", () => {
         "OpenAI API error: invalid_request_error",
       ),
     ).toBe(false);
+    expect(
+      shouldRetryRunMetadataError(
+        "llm_error",
+        '429 {"error":"Rate limited","reasons":["exceeded-quota"]}',
+      ),
+    ).toBe(false);
+    expect(
+      shouldRetryRunMetadataError(
+        "llm_error",
+        "You've reached your hosted model usage limit.",
+      ),
+    ).toBe(false);
   });
 
   test("pre-stream transient classifier handles status and detail", () => {
@@ -218,6 +230,13 @@ describe("provider detail retry helpers", () => {
           '429 {"error":"Rate limited","reasons":["agents-limit-exceeded"]}',
       }),
     ).toBe(false);
+    expect(
+      shouldRetryPreStreamTransientError({
+        status: 429,
+        detail:
+          '429 {"error":"Rate limited","reasons":["premium-usage-exceeded"]}',
+      }),
+    ).toBe(false);
     expect(
       shouldRetryPreStreamTransientError({
         status: 401,