fix: no retry on quota limit errors (#1072)

2026-02-20 16:30:20 -08:00
parent 2e8b00f8cc
commit 150ff0d998
4 changed files with 116 additions and 15 deletions
--- a/src/agent/turn-recovery-policy.ts
+++ b/src/agent/turn-recovery-policy.ts
@@ -47,10 +47,33 @@ const NON_RETRYABLE_PROVIDER_DETAIL_PATTERNS = [
  "context_length_exceeded",
  "invalid_encrypted_content",
 ];
-const NON_RETRYABLE_429_REASONS = ["agents-limit-exceeded"];
+const NON_RETRYABLE_429_REASONS = [
+  "agents-limit-exceeded",
+  "exceeded-quota",
+  "free-usage-exceeded",
+  "premium-usage-exceeded",
+  "standard-usage-exceeded",
+  "basic-usage-exceeded",
+  "not-enough-credits",
+];
+const NON_RETRYABLE_QUOTA_DETAIL_PATTERNS = [
+  "hosted model usage limit",
+  "out of credits",
+];
 const NON_RETRYABLE_4XX_PATTERN = /Error code:\s*4(0[0-8]|1\d|2\d|3\d|4\d|51)/i;
 const RETRYABLE_429_PATTERN = /Error code:\s*429|rate limit|too many requests/i;

+function hasNonRetryableQuotaDetail(detail: unknown): boolean {
+  if (typeof detail !== "string") return false;
+  const normalized = detail.toLowerCase();
+  return (
+    NON_RETRYABLE_429_REASONS.some((reason) => normalized.includes(reason)) ||
+    NON_RETRYABLE_QUOTA_DETAIL_PATTERNS.some((pattern) =>
+      normalized.includes(pattern),
+    )
+  );
+}
+
 // ── Classifiers ─────────────────────────────────────────────────────

 /** Tool call IDs don't match what the server expects. */
@@ -96,11 +119,13 @@ export function shouldRetryRunMetadataError(
  detail: unknown,
 ): boolean {
  const explicitLlmError = errorType === "llm_error";
+  const nonRetryableQuotaDetail = hasNonRetryableQuotaDetail(detail);
  const retryable429Detail =
    typeof detail === "string" && RETRYABLE_429_PATTERN.test(detail);
  const retryableDetail = isRetryableProviderErrorDetail(detail);
  const nonRetryableDetail = isNonRetryableProviderErrorDetail(detail);

+  if (nonRetryableQuotaDetail) return false;
  if (nonRetryableDetail && !retryable429Detail) return false;
  if (explicitLlmError) return true;
  return retryable429Detail || retryableDetail;
@@ -112,14 +137,9 @@ export function shouldRetryPreStreamTransientError(opts: {
  detail: unknown;
 }): boolean {
  const { status, detail } = opts;
+  if (hasNonRetryableQuotaDetail(detail)) return false;
+
  if (status === 429) {
-    // Don't retry non-recoverable 429s (e.g. agent limit reached)
-    if (
-      typeof detail === "string" &&
-      NON_RETRYABLE_429_REASONS.some((r) => detail.includes(r))
-    ) {
-      return false;
-    }
    return true;
  }
  if (status !== undefined && status >= 500) return true;
--- a/src/cli/helpers/errorFormatter.ts
+++ b/src/cli/helpers/errorFormatter.ts
@@ -167,6 +167,19 @@ function isCreditExhaustedError(e: APIError, reasons?: string[]): boolean {
  return hasErrorReason(e, "not-enough-credits", reasons);
 }

+function getTierUsageLimitMessage(reasons: string[]): string | undefined {
+  if (reasons.includes("premium-usage-exceeded")) {
+    return `You've reached your Premium model usage limit. Try switching to Standard or Basic hosted models with /model. View your plan and usage at ${LETTA_USAGE_URL}, or connect your own provider keys with /connect.`;
+  }
+  if (reasons.includes("standard-usage-exceeded")) {
+    return `You've reached your Standard model usage limit. Try switching to Basic hosted models with /model. View your plan and usage at ${LETTA_USAGE_URL}, or connect your own provider keys with /connect.`;
+  }
+  if (reasons.includes("basic-usage-exceeded")) {
+    return `You've reached your Basic model usage limit. Try switching models with /model, view your plan and usage at ${LETTA_USAGE_URL}, or connect your own provider keys with /connect.`;
+  }
+  return undefined;
+}
+
 const ENCRYPTED_CONTENT_HINT = [
  "",
  "This occurs when the conversation contains messages with encrypted",
@@ -356,13 +369,8 @@ export function formatErrorDetails(
      return `Your account is out of credits for hosted inference. Add credits, enable auto-recharge, or upgrade at ${LETTA_USAGE_URL}. You can also connect your own provider keys with /connect.`;
    }

-    if (
-      hasErrorReason(e, "premium-usage-exceeded", reasons) ||
-      hasErrorReason(e, "standard-usage-exceeded", reasons) ||
-      hasErrorReason(e, "basic-usage-exceeded", reasons)
-    ) {
-      return `You've reached your hosted model usage limit. View your plan and usage at ${LETTA_USAGE_URL}, or connect your own provider keys with /connect.`;
-    }
+    const tierUsageLimitMsg = getTierUsageLimitMessage(reasons);
+    if (tierUsageLimitMsg) return tierUsageLimitMsg;

    if (hasErrorReason(e, "byok-not-available-on-free-tier", reasons)) {
      const { modelDisplayName } = getErrorContext();
--- a/src/tests/cli/errorFormatter.test.ts
+++ b/src/tests/cli/errorFormatter.test.ts
@@ -182,6 +182,60 @@ describe("formatErrorDetails", () => {
    expect(message).toContain("/model");
  });

+  test("uses premium-specific guidance for premium-usage-exceeded", () => {
+    const error = new APIError(
+      429,
+      {
+        error: "Rate limited",
+        reasons: ["premium-usage-exceeded"],
+      },
+      undefined,
+      new Headers(),
+    );
+
+    const message = formatErrorDetails(error);
+
+    expect(message).toContain("Premium model usage limit");
+    expect(message).toContain("Standard or Basic hosted models");
+    expect(message).toContain("/model");
+    expect(message).not.toContain("hosted model usage limit");
+  });
+
+  test("uses standard-specific guidance for standard-usage-exceeded", () => {
+    const error = new APIError(
+      429,
+      {
+        error: "Rate limited",
+        reasons: ["standard-usage-exceeded"],
+      },
+      undefined,
+      new Headers(),
+    );
+
+    const message = formatErrorDetails(error);
+
+    expect(message).toContain("Standard model usage limit");
+    expect(message).toContain("Basic hosted models");
+    expect(message).toContain("/model");
+  });
+
+  test("uses basic-specific guidance for basic-usage-exceeded", () => {
+    const error = new APIError(
+      429,
+      {
+        error: "Rate limited",
+        reasons: ["basic-usage-exceeded"],
+      },
+      undefined,
+      new Headers(),
+    );
+
+    const message = formatErrorDetails(error);
+
+    expect(message).toContain("Basic model usage limit");
+    expect(message).toContain("/model");
+  });
+
  test("formats Z.ai error from APIError with embedded error code", () => {
    const error = new APIError(
      429,
--- a/src/tests/turn-recovery-policy.test.ts
+++ b/src/tests/turn-recovery-policy.test.ts
@@ -195,6 +195,18 @@ describe("provider detail retry helpers", () => {
        "OpenAI API error: invalid_request_error",
      ),
    ).toBe(false);
+    expect(
+      shouldRetryRunMetadataError(
+        "llm_error",
+        '429 {"error":"Rate limited","reasons":["exceeded-quota"]}',
+      ),
+    ).toBe(false);
+    expect(
+      shouldRetryRunMetadataError(
+        "llm_error",
+        "You've reached your hosted model usage limit.",
+      ),
+    ).toBe(false);
  });

  test("pre-stream transient classifier handles status and detail", () => {
@@ -218,6 +230,13 @@ describe("provider detail retry helpers", () => {
          '429 {"error":"Rate limited","reasons":["agents-limit-exceeded"]}',
      }),
    ).toBe(false);
+    expect(
+      shouldRetryPreStreamTransientError({
+        status: 429,
+        detail:
+          '429 {"error":"Rate limited","reasons":["premium-usage-exceeded"]}',
+      }),
+    ).toBe(false);
    expect(
      shouldRetryPreStreamTransientError({
        status: 401,