fix: no retry on quota limit errors (#1072)

This commit is contained in:
Charles Packer
2026-02-20 16:30:20 -08:00
committed by GitHub
parent 2e8b00f8cc
commit 150ff0d998
4 changed files with 116 additions and 15 deletions

View File

@@ -47,10 +47,33 @@ const NON_RETRYABLE_PROVIDER_DETAIL_PATTERNS = [
"context_length_exceeded",
"invalid_encrypted_content",
];
const NON_RETRYABLE_429_REASONS = ["agents-limit-exceeded"];
const NON_RETRYABLE_429_REASONS = [
"agents-limit-exceeded",
"exceeded-quota",
"free-usage-exceeded",
"premium-usage-exceeded",
"standard-usage-exceeded",
"basic-usage-exceeded",
"not-enough-credits",
];
const NON_RETRYABLE_QUOTA_DETAIL_PATTERNS = [
"hosted model usage limit",
"out of credits",
];
const NON_RETRYABLE_4XX_PATTERN = /Error code:\s*4(0[0-8]|1\d|2\d|3\d|4\d|51)/i;
const RETRYABLE_429_PATTERN = /Error code:\s*429|rate limit|too many requests/i;
function hasNonRetryableQuotaDetail(detail: unknown): boolean {
if (typeof detail !== "string") return false;
const normalized = detail.toLowerCase();
return (
NON_RETRYABLE_429_REASONS.some((reason) => normalized.includes(reason)) ||
NON_RETRYABLE_QUOTA_DETAIL_PATTERNS.some((pattern) =>
normalized.includes(pattern),
)
);
}
// ── Classifiers ─────────────────────────────────────────────────────
/** Tool call IDs don't match what the server expects. */
@@ -96,11 +119,13 @@ export function shouldRetryRunMetadataError(
detail: unknown,
): boolean {
const explicitLlmError = errorType === "llm_error";
const nonRetryableQuotaDetail = hasNonRetryableQuotaDetail(detail);
const retryable429Detail =
typeof detail === "string" && RETRYABLE_429_PATTERN.test(detail);
const retryableDetail = isRetryableProviderErrorDetail(detail);
const nonRetryableDetail = isNonRetryableProviderErrorDetail(detail);
if (nonRetryableQuotaDetail) return false;
if (nonRetryableDetail && !retryable429Detail) return false;
if (explicitLlmError) return true;
return retryable429Detail || retryableDetail;
@@ -112,14 +137,9 @@ export function shouldRetryPreStreamTransientError(opts: {
detail: unknown;
}): boolean {
const { status, detail } = opts;
if (hasNonRetryableQuotaDetail(detail)) return false;
if (status === 429) {
// Don't retry non-recoverable 429s (e.g. agent limit reached)
if (
typeof detail === "string" &&
NON_RETRYABLE_429_REASONS.some((r) => detail.includes(r))
) {
return false;
}
return true;
}
if (status !== undefined && status >= 500) return true;

View File

@@ -167,6 +167,19 @@ function isCreditExhaustedError(e: APIError, reasons?: string[]): boolean {
return hasErrorReason(e, "not-enough-credits", reasons);
}
function getTierUsageLimitMessage(reasons: string[]): string | undefined {
if (reasons.includes("premium-usage-exceeded")) {
return `You've reached your Premium model usage limit. Try switching to Standard or Basic hosted models with /model. View your plan and usage at ${LETTA_USAGE_URL}, or connect your own provider keys with /connect.`;
}
if (reasons.includes("standard-usage-exceeded")) {
return `You've reached your Standard model usage limit. Try switching to Basic hosted models with /model. View your plan and usage at ${LETTA_USAGE_URL}, or connect your own provider keys with /connect.`;
}
if (reasons.includes("basic-usage-exceeded")) {
return `You've reached your Basic model usage limit. Try switching models with /model, view your plan and usage at ${LETTA_USAGE_URL}, or connect your own provider keys with /connect.`;
}
return undefined;
}
const ENCRYPTED_CONTENT_HINT = [
"",
"This occurs when the conversation contains messages with encrypted",
@@ -356,13 +369,8 @@ export function formatErrorDetails(
return `Your account is out of credits for hosted inference. Add credits, enable auto-recharge, or upgrade at ${LETTA_USAGE_URL}. You can also connect your own provider keys with /connect.`;
}
if (
hasErrorReason(e, "premium-usage-exceeded", reasons) ||
hasErrorReason(e, "standard-usage-exceeded", reasons) ||
hasErrorReason(e, "basic-usage-exceeded", reasons)
) {
return `You've reached your hosted model usage limit. View your plan and usage at ${LETTA_USAGE_URL}, or connect your own provider keys with /connect.`;
}
const tierUsageLimitMsg = getTierUsageLimitMessage(reasons);
if (tierUsageLimitMsg) return tierUsageLimitMsg;
if (hasErrorReason(e, "byok-not-available-on-free-tier", reasons)) {
const { modelDisplayName } = getErrorContext();

View File

@@ -182,6 +182,60 @@ describe("formatErrorDetails", () => {
expect(message).toContain("/model");
});
test("uses premium-specific guidance for premium-usage-exceeded", () => {
const error = new APIError(
429,
{
error: "Rate limited",
reasons: ["premium-usage-exceeded"],
},
undefined,
new Headers(),
);
const message = formatErrorDetails(error);
expect(message).toContain("Premium model usage limit");
expect(message).toContain("Standard or Basic hosted models");
expect(message).toContain("/model");
expect(message).not.toContain("hosted model usage limit");
});
test("uses standard-specific guidance for standard-usage-exceeded", () => {
const error = new APIError(
429,
{
error: "Rate limited",
reasons: ["standard-usage-exceeded"],
},
undefined,
new Headers(),
);
const message = formatErrorDetails(error);
expect(message).toContain("Standard model usage limit");
expect(message).toContain("Basic hosted models");
expect(message).toContain("/model");
});
test("uses basic-specific guidance for basic-usage-exceeded", () => {
const error = new APIError(
429,
{
error: "Rate limited",
reasons: ["basic-usage-exceeded"],
},
undefined,
new Headers(),
);
const message = formatErrorDetails(error);
expect(message).toContain("Basic model usage limit");
expect(message).toContain("/model");
});
test("formats Z.ai error from APIError with embedded error code", () => {
const error = new APIError(
429,

View File

@@ -195,6 +195,18 @@ describe("provider detail retry helpers", () => {
"OpenAI API error: invalid_request_error",
),
).toBe(false);
expect(
shouldRetryRunMetadataError(
"llm_error",
'429 {"error":"Rate limited","reasons":["exceeded-quota"]}',
),
).toBe(false);
expect(
shouldRetryRunMetadataError(
"llm_error",
"You've reached your hosted model usage limit.",
),
).toBe(false);
});
test("pre-stream transient classifier handles status and detail", () => {
@@ -218,6 +230,13 @@ describe("provider detail retry helpers", () => {
'429 {"error":"Rate limited","reasons":["agents-limit-exceeded"]}',
}),
).toBe(false);
expect(
shouldRetryPreStreamTransientError({
status: 429,
detail:
'429 {"error":"Rate limited","reasons":["premium-usage-exceeded"]}',
}),
).toBe(false);
expect(
shouldRetryPreStreamTransientError({
status: 401,