fix: handle Cloudflare HTML 5xx errors gracefully (#1179)
Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
@@ -8,6 +8,7 @@
|
||||
|
||||
import type { MessageCreate } from "@letta-ai/letta-client/resources/agents/agents";
|
||||
import type { ApprovalCreate } from "@letta-ai/letta-client/resources/agents/messages";
|
||||
import { isCloudflareEdge52xHtmlError } from "../cli/helpers/errorFormatter";
|
||||
import { isZaiNonRetryableError } from "../cli/helpers/zaiErrors";
|
||||
|
||||
// ── Error fragment constants ────────────────────────────────────────
|
||||
@@ -64,6 +65,10 @@ const NON_RETRYABLE_QUOTA_DETAIL_PATTERNS = [
|
||||
];
|
||||
const NON_RETRYABLE_4XX_PATTERN = /Error code:\s*4(0[0-8]|1\d|2\d|3\d|4\d|51)/i;
|
||||
const RETRYABLE_429_PATTERN = /Error code:\s*429|rate limit|too many requests/i;
|
||||
function isCloudflareEdge52xDetail(detail: unknown): boolean {
|
||||
if (typeof detail !== "string") return false;
|
||||
return isCloudflareEdge52xHtmlError(detail);
|
||||
}
|
||||
|
||||
function hasNonRetryableQuotaDetail(detail: unknown): boolean {
|
||||
if (typeof detail !== "string") return false;
|
||||
@@ -108,6 +113,7 @@ export function isEmptyResponseError(detail: unknown): boolean {
|
||||
|
||||
/** Transient provider/network detail that is usually safe to retry. */
|
||||
export function isRetryableProviderErrorDetail(detail: unknown): boolean {
|
||||
if (isCloudflareEdge52xDetail(detail)) return true;
|
||||
if (typeof detail !== "string") return false;
|
||||
return RETRYABLE_PROVIDER_DETAIL_PATTERNS.some((pattern) =>
|
||||
detail.includes(pattern),
|
||||
|
||||
@@ -14,6 +14,75 @@ function extractReasonList(value: unknown): string[] {
|
||||
.map((reason) => reason.toLowerCase());
|
||||
}
|
||||
|
||||
interface CloudflareEdgeErrorInfo {
|
||||
code?: string;
|
||||
statusText?: string;
|
||||
host?: string;
|
||||
rayId?: string;
|
||||
}
|
||||
|
||||
const CLOUDFLARE_EDGE_5XX_MARKER_PATTERN =
|
||||
/(^|\s)(502|52[0-6])\s*<!doctype html|error code\s*(502|52[0-6])/i;
|
||||
const CLOUDFLARE_EDGE_5XX_TITLE_PATTERN = /\|\s*(502|52[0-6])\s*:/i;
|
||||
|
||||
export function isCloudflareEdge52xHtmlError(text: string): boolean {
|
||||
const normalized = text.toLowerCase();
|
||||
const hasCloudflare = normalized.includes("cloudflare");
|
||||
const hasHtml =
|
||||
normalized.includes("<!doctype html") ||
|
||||
normalized.includes("<html") ||
|
||||
normalized.includes("error code");
|
||||
const has52xCode =
|
||||
CLOUDFLARE_EDGE_5XX_MARKER_PATTERN.test(text) ||
|
||||
CLOUDFLARE_EDGE_5XX_TITLE_PATTERN.test(text);
|
||||
|
||||
return hasCloudflare && hasHtml && has52xCode;
|
||||
}
|
||||
|
||||
function parseCloudflareEdgeError(
|
||||
text: string,
|
||||
): CloudflareEdgeErrorInfo | undefined {
|
||||
if (!isCloudflareEdge52xHtmlError(text)) return undefined;
|
||||
|
||||
const code =
|
||||
text.match(/^\s*(502|52[0-6])\s*<!doctype html/i)?.[1] ??
|
||||
text.match(/error code\s*(502|52[0-6])/i)?.[1] ??
|
||||
text.match(/\|\s*(502|52[0-6])\s*:/i)?.[1];
|
||||
|
||||
const statusText =
|
||||
text
|
||||
.match(/<title>[^<|]*\|\s*(?:502|52[0-6])\s*:\s*([^<]+)/i)?.[1]
|
||||
?.trim() ??
|
||||
text.match(/<span\s+class="inline-block">([^<]+)<\/span>/i)?.[1]?.trim();
|
||||
|
||||
const host =
|
||||
text.match(/utm_campaign=([a-z0-9.-]+)/i)?.[1] ??
|
||||
text.match(/<span[^>]*truncate[^>]*>([a-z0-9.-]+)<\/span>/i)?.[1];
|
||||
|
||||
const rayId =
|
||||
text.match(
|
||||
/Cloudflare Ray ID:\s*(?:<strong[^>]*>)?([a-z0-9]+)(?:<\/strong>)?/i,
|
||||
)?.[1] ?? text.match(/Cloudflare Ray ID:\s*([a-z0-9]+)/i)?.[1];
|
||||
|
||||
if (!code && !statusText && !host && !rayId) return undefined;
|
||||
|
||||
return { code, statusText, host, rayId };
|
||||
}
|
||||
|
||||
export function checkCloudflareEdgeError(text: string): string | undefined {
|
||||
const info = parseCloudflareEdgeError(text);
|
||||
if (!info) return undefined;
|
||||
|
||||
const codeLabel = info.code ? `Cloudflare ${info.code}` : "Cloudflare";
|
||||
const statusSegment = info.statusText
|
||||
? `: ${info.statusText}`
|
||||
: " upstream error";
|
||||
const hostSegment = info.host ? ` for ${info.host}` : "";
|
||||
const raySegment = info.rayId ? ` (Ray ID: ${info.rayId})` : "";
|
||||
|
||||
return `${codeLabel}${statusSegment}${hostSegment}${raySegment}. This is usually a temporary edge/origin outage. Please retry in a moment.`;
|
||||
}
|
||||
|
||||
function getErrorReasons(e: APIError): string[] {
|
||||
const reasons = new Set<string>();
|
||||
|
||||
@@ -90,6 +159,62 @@ function getRateLimitResetMs(e: APIError): number | undefined {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Walk an error object to find and format Cloudflare HTML 52x pages.
|
||||
*/
|
||||
function findAndFormatCloudflareEdgeError(e: unknown): string | undefined {
|
||||
if (typeof e === "string") return checkCloudflareEdgeError(e);
|
||||
|
||||
if (typeof e !== "object" || e === null) return undefined;
|
||||
|
||||
if (e instanceof Error) {
|
||||
const msg = checkCloudflareEdgeError(e.message);
|
||||
if (msg) return msg;
|
||||
}
|
||||
|
||||
const obj = e as Record<string, unknown>;
|
||||
|
||||
if (typeof obj.detail === "string") {
|
||||
const msg = checkCloudflareEdgeError(obj.detail);
|
||||
if (msg) return msg;
|
||||
}
|
||||
|
||||
if (typeof obj.message === "string") {
|
||||
const msg = checkCloudflareEdgeError(obj.message);
|
||||
if (msg) return msg;
|
||||
}
|
||||
|
||||
if (obj.error && typeof obj.error === "object") {
|
||||
const errObj = obj.error as Record<string, unknown>;
|
||||
|
||||
if (typeof errObj.detail === "string") {
|
||||
const msg = checkCloudflareEdgeError(errObj.detail);
|
||||
if (msg) return msg;
|
||||
}
|
||||
|
||||
if (typeof errObj.message === "string") {
|
||||
const msg = checkCloudflareEdgeError(errObj.message);
|
||||
if (msg) return msg;
|
||||
}
|
||||
|
||||
if (errObj.error && typeof errObj.error === "object") {
|
||||
const inner = errObj.error as Record<string, unknown>;
|
||||
|
||||
if (typeof inner.detail === "string") {
|
||||
const msg = checkCloudflareEdgeError(inner.detail);
|
||||
if (msg) return msg;
|
||||
}
|
||||
|
||||
if (typeof inner.message === "string") {
|
||||
const msg = checkCloudflareEdgeError(inner.message);
|
||||
if (msg) return msg;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Format a time duration in milliseconds to a human-readable string
|
||||
*/
|
||||
@@ -400,6 +525,9 @@ export function formatErrorDetails(
|
||||
const chatGptUsageLimitMsg = findAndFormatChatGptUsageLimit(e);
|
||||
if (chatGptUsageLimitMsg) return chatGptUsageLimitMsg;
|
||||
|
||||
const cloudflareEdgeMsg = findAndFormatCloudflareEdgeError(e);
|
||||
if (cloudflareEdgeMsg) return cloudflareEdgeMsg;
|
||||
|
||||
// Check for Z.ai provider errors (wrapped in generic "OpenAI" messages)
|
||||
const errorText =
|
||||
e instanceof APIError
|
||||
@@ -562,6 +690,12 @@ export function getRetryStatusMessage(
|
||||
): string {
|
||||
if (!errorDetail) return DEFAULT_RETRY_MESSAGE;
|
||||
|
||||
const cloudflareInfo = parseCloudflareEdgeError(errorDetail);
|
||||
if (cloudflareInfo) {
|
||||
const codeSegment = cloudflareInfo.code ? ` ${cloudflareInfo.code}` : "";
|
||||
return `Cloudflare${codeSegment} upstream outage, retrying...`;
|
||||
}
|
||||
|
||||
if (checkZaiError(errorDetail)) return "Z.ai API error, retrying...";
|
||||
|
||||
if (errorDetail.includes("Anthropic API is overloaded"))
|
||||
|
||||
@@ -6,6 +6,7 @@ import {
|
||||
} from "../../cli/helpers/errorContext";
|
||||
import {
|
||||
checkChatGptUsageLimitError,
|
||||
checkCloudflareEdgeError,
|
||||
formatErrorDetails,
|
||||
} from "../../cli/helpers/errorFormatter";
|
||||
|
||||
@@ -341,4 +342,75 @@ describe("formatErrorDetails", () => {
|
||||
expect(message).toContain("High concurrency usage exceeds limits");
|
||||
expect(message).not.toContain("OpenAI");
|
||||
});
|
||||
|
||||
describe("Cloudflare HTML 52x errors", () => {
|
||||
const cloudflare521Html = `521 <!DOCTYPE html>
|
||||
<html lang="en-US">
|
||||
<head>
|
||||
<title>api.letta.com | 521: Web server is down</title>
|
||||
</head>
|
||||
<body>
|
||||
<span class="inline-block">Web server is down</span>
|
||||
<a href="https://www.cloudflare.com/5xx-error-landing?utm_source=errorcode_521&utm_campaign=api.letta.com">cloudflare.com</a>
|
||||
Cloudflare Ray ID: <strong>9d431b5f6f656c08</strong>
|
||||
</body>
|
||||
</html>`;
|
||||
|
||||
test("formats Cloudflare HTML into a concise friendly message", () => {
|
||||
const result = checkCloudflareEdgeError(cloudflare521Html);
|
||||
|
||||
expect(result).toBeDefined();
|
||||
expect(result).toContain("Cloudflare 521");
|
||||
expect(result).toContain("Web server is down");
|
||||
expect(result).toContain("api.letta.com");
|
||||
expect(result).toContain("Ray ID: 9d431b5f6f656c08");
|
||||
expect(result).toContain("retry");
|
||||
expect(result).not.toContain("<!DOCTYPE html>");
|
||||
});
|
||||
|
||||
test("formats via formatErrorDetails for run metadata nested detail", () => {
|
||||
const errorObject = {
|
||||
error: {
|
||||
error: {
|
||||
detail: cloudflare521Html,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const result = formatErrorDetails(errorObject);
|
||||
|
||||
expect(result).toContain("Cloudflare 521");
|
||||
expect(result).toContain("Web server is down");
|
||||
expect(result).not.toContain("<html");
|
||||
});
|
||||
|
||||
test("returns undefined for non-cloudflare html", () => {
|
||||
const result = checkCloudflareEdgeError(
|
||||
"<!DOCTYPE html><html><head><title>Example</title></head><body>hello</body></html>",
|
||||
);
|
||||
expect(result).toBeUndefined();
|
||||
});
|
||||
|
||||
test("formats Cloudflare 502 bad gateway pages", () => {
|
||||
const cloudflare502Html = `502 <!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>letta.com | 502: Bad gateway</title>
|
||||
</head>
|
||||
<body>
|
||||
<span class="code-label">Error code 502</span>
|
||||
Cloudflare Ray ID: <strong>9d43b2d6dab269e2</strong>
|
||||
<a href="https://www.cloudflare.com/5xx-error-landing?utm_source=errorcode_502&utm_campaign=api.letta.com">cloudflare.com</a>
|
||||
</body>
|
||||
</html>`;
|
||||
|
||||
const result = checkCloudflareEdgeError(cloudflare502Html);
|
||||
|
||||
expect(result).toBeDefined();
|
||||
expect(result).toContain("Cloudflare 502");
|
||||
expect(result).toContain("Bad gateway");
|
||||
expect(result).toContain("api.letta.com");
|
||||
expect(result).toContain("Ray ID: 9d43b2d6dab269e2");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -221,6 +221,16 @@ describe("provider detail retry helpers", () => {
|
||||
);
|
||||
});
|
||||
|
||||
test("Cloudflare 521 HTML is retryable", () => {
|
||||
const detail =
|
||||
"521 <!DOCTYPE html><html><head><title>api.letta.com | 521: Web server is down</title></head><body>Cloudflare Ray ID: 9d431b5f6f656c08</body></html>";
|
||||
|
||||
expect(shouldRetryRunMetadataError("llm_error", detail)).toBe(true);
|
||||
expect(
|
||||
shouldRetryPreStreamTransientError({ status: undefined, detail }),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
test("pre-stream transient classifier handles status and detail", () => {
|
||||
expect(
|
||||
shouldRetryPreStreamTransientError({
|
||||
|
||||
Reference in New Issue
Block a user