fix: no retry on quota limit errors (#1072)
This commit is contained in:
@@ -47,10 +47,33 @@ const NON_RETRYABLE_PROVIDER_DETAIL_PATTERNS = [
|
||||
"context_length_exceeded",
|
||||
"invalid_encrypted_content",
|
||||
];
|
||||
const NON_RETRYABLE_429_REASONS = ["agents-limit-exceeded"];
|
||||
const NON_RETRYABLE_429_REASONS = [
|
||||
"agents-limit-exceeded",
|
||||
"exceeded-quota",
|
||||
"free-usage-exceeded",
|
||||
"premium-usage-exceeded",
|
||||
"standard-usage-exceeded",
|
||||
"basic-usage-exceeded",
|
||||
"not-enough-credits",
|
||||
];
|
||||
const NON_RETRYABLE_QUOTA_DETAIL_PATTERNS = [
|
||||
"hosted model usage limit",
|
||||
"out of credits",
|
||||
];
|
||||
const NON_RETRYABLE_4XX_PATTERN = /Error code:\s*4(0[0-8]|1\d|2\d|3\d|4\d|51)/i;
|
||||
const RETRYABLE_429_PATTERN = /Error code:\s*429|rate limit|too many requests/i;
|
||||
|
||||
function hasNonRetryableQuotaDetail(detail: unknown): boolean {
|
||||
if (typeof detail !== "string") return false;
|
||||
const normalized = detail.toLowerCase();
|
||||
return (
|
||||
NON_RETRYABLE_429_REASONS.some((reason) => normalized.includes(reason)) ||
|
||||
NON_RETRYABLE_QUOTA_DETAIL_PATTERNS.some((pattern) =>
|
||||
normalized.includes(pattern),
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
// ── Classifiers ─────────────────────────────────────────────────────
|
||||
|
||||
/** Tool call IDs don't match what the server expects. */
|
||||
@@ -96,11 +119,13 @@ export function shouldRetryRunMetadataError(
|
||||
detail: unknown,
|
||||
): boolean {
|
||||
const explicitLlmError = errorType === "llm_error";
|
||||
const nonRetryableQuotaDetail = hasNonRetryableQuotaDetail(detail);
|
||||
const retryable429Detail =
|
||||
typeof detail === "string" && RETRYABLE_429_PATTERN.test(detail);
|
||||
const retryableDetail = isRetryableProviderErrorDetail(detail);
|
||||
const nonRetryableDetail = isNonRetryableProviderErrorDetail(detail);
|
||||
|
||||
if (nonRetryableQuotaDetail) return false;
|
||||
if (nonRetryableDetail && !retryable429Detail) return false;
|
||||
if (explicitLlmError) return true;
|
||||
return retryable429Detail || retryableDetail;
|
||||
@@ -112,14 +137,9 @@ export function shouldRetryPreStreamTransientError(opts: {
|
||||
detail: unknown;
|
||||
}): boolean {
|
||||
const { status, detail } = opts;
|
||||
if (hasNonRetryableQuotaDetail(detail)) return false;
|
||||
|
||||
if (status === 429) {
|
||||
// Don't retry non-recoverable 429s (e.g. agent limit reached)
|
||||
if (
|
||||
typeof detail === "string" &&
|
||||
NON_RETRYABLE_429_REASONS.some((r) => detail.includes(r))
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
if (status !== undefined && status >= 500) return true;
|
||||
|
||||
@@ -167,6 +167,19 @@ function isCreditExhaustedError(e: APIError, reasons?: string[]): boolean {
|
||||
return hasErrorReason(e, "not-enough-credits", reasons);
|
||||
}
|
||||
|
||||
function getTierUsageLimitMessage(reasons: string[]): string | undefined {
|
||||
if (reasons.includes("premium-usage-exceeded")) {
|
||||
return `You've reached your Premium model usage limit. Try switching to Standard or Basic hosted models with /model. View your plan and usage at ${LETTA_USAGE_URL}, or connect your own provider keys with /connect.`;
|
||||
}
|
||||
if (reasons.includes("standard-usage-exceeded")) {
|
||||
return `You've reached your Standard model usage limit. Try switching to Basic hosted models with /model. View your plan and usage at ${LETTA_USAGE_URL}, or connect your own provider keys with /connect.`;
|
||||
}
|
||||
if (reasons.includes("basic-usage-exceeded")) {
|
||||
return `You've reached your Basic model usage limit. Try switching models with /model, view your plan and usage at ${LETTA_USAGE_URL}, or connect your own provider keys with /connect.`;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const ENCRYPTED_CONTENT_HINT = [
|
||||
"",
|
||||
"This occurs when the conversation contains messages with encrypted",
|
||||
@@ -356,13 +369,8 @@ export function formatErrorDetails(
|
||||
return `Your account is out of credits for hosted inference. Add credits, enable auto-recharge, or upgrade at ${LETTA_USAGE_URL}. You can also connect your own provider keys with /connect.`;
|
||||
}
|
||||
|
||||
if (
|
||||
hasErrorReason(e, "premium-usage-exceeded", reasons) ||
|
||||
hasErrorReason(e, "standard-usage-exceeded", reasons) ||
|
||||
hasErrorReason(e, "basic-usage-exceeded", reasons)
|
||||
) {
|
||||
return `You've reached your hosted model usage limit. View your plan and usage at ${LETTA_USAGE_URL}, or connect your own provider keys with /connect.`;
|
||||
}
|
||||
const tierUsageLimitMsg = getTierUsageLimitMessage(reasons);
|
||||
if (tierUsageLimitMsg) return tierUsageLimitMsg;
|
||||
|
||||
if (hasErrorReason(e, "byok-not-available-on-free-tier", reasons)) {
|
||||
const { modelDisplayName } = getErrorContext();
|
||||
|
||||
@@ -182,6 +182,60 @@ describe("formatErrorDetails", () => {
|
||||
expect(message).toContain("/model");
|
||||
});
|
||||
|
||||
test("uses premium-specific guidance for premium-usage-exceeded", () => {
|
||||
const error = new APIError(
|
||||
429,
|
||||
{
|
||||
error: "Rate limited",
|
||||
reasons: ["premium-usage-exceeded"],
|
||||
},
|
||||
undefined,
|
||||
new Headers(),
|
||||
);
|
||||
|
||||
const message = formatErrorDetails(error);
|
||||
|
||||
expect(message).toContain("Premium model usage limit");
|
||||
expect(message).toContain("Standard or Basic hosted models");
|
||||
expect(message).toContain("/model");
|
||||
expect(message).not.toContain("hosted model usage limit");
|
||||
});
|
||||
|
||||
test("uses standard-specific guidance for standard-usage-exceeded", () => {
|
||||
const error = new APIError(
|
||||
429,
|
||||
{
|
||||
error: "Rate limited",
|
||||
reasons: ["standard-usage-exceeded"],
|
||||
},
|
||||
undefined,
|
||||
new Headers(),
|
||||
);
|
||||
|
||||
const message = formatErrorDetails(error);
|
||||
|
||||
expect(message).toContain("Standard model usage limit");
|
||||
expect(message).toContain("Basic hosted models");
|
||||
expect(message).toContain("/model");
|
||||
});
|
||||
|
||||
test("uses basic-specific guidance for basic-usage-exceeded", () => {
|
||||
const error = new APIError(
|
||||
429,
|
||||
{
|
||||
error: "Rate limited",
|
||||
reasons: ["basic-usage-exceeded"],
|
||||
},
|
||||
undefined,
|
||||
new Headers(),
|
||||
);
|
||||
|
||||
const message = formatErrorDetails(error);
|
||||
|
||||
expect(message).toContain("Basic model usage limit");
|
||||
expect(message).toContain("/model");
|
||||
});
|
||||
|
||||
test("formats Z.ai error from APIError with embedded error code", () => {
|
||||
const error = new APIError(
|
||||
429,
|
||||
|
||||
@@ -195,6 +195,18 @@ describe("provider detail retry helpers", () => {
|
||||
"OpenAI API error: invalid_request_error",
|
||||
),
|
||||
).toBe(false);
|
||||
expect(
|
||||
shouldRetryRunMetadataError(
|
||||
"llm_error",
|
||||
'429 {"error":"Rate limited","reasons":["exceeded-quota"]}',
|
||||
),
|
||||
).toBe(false);
|
||||
expect(
|
||||
shouldRetryRunMetadataError(
|
||||
"llm_error",
|
||||
"You've reached your hosted model usage limit.",
|
||||
),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
test("pre-stream transient classifier handles status and detail", () => {
|
||||
@@ -218,6 +230,13 @@ describe("provider detail retry helpers", () => {
|
||||
'429 {"error":"Rate limited","reasons":["agents-limit-exceeded"]}',
|
||||
}),
|
||||
).toBe(false);
|
||||
expect(
|
||||
shouldRetryPreStreamTransientError({
|
||||
status: 429,
|
||||
detail:
|
||||
'429 {"error":"Rate limited","reasons":["premium-usage-exceeded"]}',
|
||||
}),
|
||||
).toBe(false);
|
||||
expect(
|
||||
shouldRetryPreStreamTransientError({
|
||||
status: 401,
|
||||
|
||||
Reference in New Issue
Block a user