fix: handle Cloudflare HTML 5xx errors gracefully (#1179)

Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
jnjpng
2026-02-26 18:26:09 -08:00
committed by GitHub
parent 70fac1d1f3
commit 986cb7dc79
4 changed files with 222 additions and 0 deletions

View File

@@ -8,6 +8,7 @@
import type { MessageCreate } from "@letta-ai/letta-client/resources/agents/agents";
import type { ApprovalCreate } from "@letta-ai/letta-client/resources/agents/messages";
import { isCloudflareEdge52xHtmlError } from "../cli/helpers/errorFormatter";
import { isZaiNonRetryableError } from "../cli/helpers/zaiErrors";
// ── Error fragment constants ────────────────────────────────────────
@@ -64,6 +65,10 @@ const NON_RETRYABLE_QUOTA_DETAIL_PATTERNS = [
];
const NON_RETRYABLE_4XX_PATTERN = /Error code:\s*4(0[0-8]|1\d|2\d|3\d|4\d|51)/i;
const RETRYABLE_429_PATTERN = /Error code:\s*429|rate limit|too many requests/i;
function isCloudflareEdge52xDetail(detail: unknown): boolean {
if (typeof detail !== "string") return false;
return isCloudflareEdge52xHtmlError(detail);
}
function hasNonRetryableQuotaDetail(detail: unknown): boolean {
if (typeof detail !== "string") return false;
@@ -108,6 +113,7 @@ export function isEmptyResponseError(detail: unknown): boolean {
/** Transient provider/network detail that is usually safe to retry. */
export function isRetryableProviderErrorDetail(detail: unknown): boolean {
if (isCloudflareEdge52xDetail(detail)) return true;
if (typeof detail !== "string") return false;
return RETRYABLE_PROVIDER_DETAIL_PATTERNS.some((pattern) =>
detail.includes(pattern),

View File

@@ -14,6 +14,75 @@ function extractReasonList(value: unknown): string[] {
.map((reason) => reason.toLowerCase());
}
interface CloudflareEdgeErrorInfo {
code?: string;
statusText?: string;
host?: string;
rayId?: string;
}
const CLOUDFLARE_EDGE_5XX_MARKER_PATTERN =
/(^|\s)(502|52[0-6])\s*<!doctype html|error code\s*(502|52[0-6])/i;
const CLOUDFLARE_EDGE_5XX_TITLE_PATTERN = /\|\s*(502|52[0-6])\s*:/i;
export function isCloudflareEdge52xHtmlError(text: string): boolean {
const normalized = text.toLowerCase();
const hasCloudflare = normalized.includes("cloudflare");
const hasHtml =
normalized.includes("<!doctype html") ||
normalized.includes("<html") ||
normalized.includes("error code");
const has52xCode =
CLOUDFLARE_EDGE_5XX_MARKER_PATTERN.test(text) ||
CLOUDFLARE_EDGE_5XX_TITLE_PATTERN.test(text);
return hasCloudflare && hasHtml && has52xCode;
}
function parseCloudflareEdgeError(
text: string,
): CloudflareEdgeErrorInfo | undefined {
if (!isCloudflareEdge52xHtmlError(text)) return undefined;
const code =
text.match(/^\s*(502|52[0-6])\s*<!doctype html/i)?.[1] ??
text.match(/error code\s*(502|52[0-6])/i)?.[1] ??
text.match(/\|\s*(502|52[0-6])\s*:/i)?.[1];
const statusText =
text
.match(/<title>[^<|]*\|\s*(?:502|52[0-6])\s*:\s*([^<]+)/i)?.[1]
?.trim() ??
text.match(/<span\s+class="inline-block">([^<]+)<\/span>/i)?.[1]?.trim();
const host =
text.match(/utm_campaign=([a-z0-9.-]+)/i)?.[1] ??
text.match(/<span[^>]*truncate[^>]*>([a-z0-9.-]+)<\/span>/i)?.[1];
const rayId =
text.match(
/Cloudflare Ray ID:\s*(?:<strong[^>]*>)?([a-z0-9]+)(?:<\/strong>)?/i,
)?.[1] ?? text.match(/Cloudflare Ray ID:\s*([a-z0-9]+)/i)?.[1];
if (!code && !statusText && !host && !rayId) return undefined;
return { code, statusText, host, rayId };
}
export function checkCloudflareEdgeError(text: string): string | undefined {
const info = parseCloudflareEdgeError(text);
if (!info) return undefined;
const codeLabel = info.code ? `Cloudflare ${info.code}` : "Cloudflare";
const statusSegment = info.statusText
? `: ${info.statusText}`
: " upstream error";
const hostSegment = info.host ? ` for ${info.host}` : "";
const raySegment = info.rayId ? ` (Ray ID: ${info.rayId})` : "";
return `${codeLabel}${statusSegment}${hostSegment}${raySegment}. This is usually a temporary edge/origin outage. Please retry in a moment.`;
}
function getErrorReasons(e: APIError): string[] {
const reasons = new Set<string>();
@@ -90,6 +159,62 @@ function getRateLimitResetMs(e: APIError): number | undefined {
return undefined;
}
/**
* Walk an error object to find and format Cloudflare HTML 52x pages.
*/
function findAndFormatCloudflareEdgeError(e: unknown): string | undefined {
if (typeof e === "string") return checkCloudflareEdgeError(e);
if (typeof e !== "object" || e === null) return undefined;
if (e instanceof Error) {
const msg = checkCloudflareEdgeError(e.message);
if (msg) return msg;
}
const obj = e as Record<string, unknown>;
if (typeof obj.detail === "string") {
const msg = checkCloudflareEdgeError(obj.detail);
if (msg) return msg;
}
if (typeof obj.message === "string") {
const msg = checkCloudflareEdgeError(obj.message);
if (msg) return msg;
}
if (obj.error && typeof obj.error === "object") {
const errObj = obj.error as Record<string, unknown>;
if (typeof errObj.detail === "string") {
const msg = checkCloudflareEdgeError(errObj.detail);
if (msg) return msg;
}
if (typeof errObj.message === "string") {
const msg = checkCloudflareEdgeError(errObj.message);
if (msg) return msg;
}
if (errObj.error && typeof errObj.error === "object") {
const inner = errObj.error as Record<string, unknown>;
if (typeof inner.detail === "string") {
const msg = checkCloudflareEdgeError(inner.detail);
if (msg) return msg;
}
if (typeof inner.message === "string") {
const msg = checkCloudflareEdgeError(inner.message);
if (msg) return msg;
}
}
}
return undefined;
}
/**
* Format a time duration in milliseconds to a human-readable string
*/
@@ -400,6 +525,9 @@ export function formatErrorDetails(
const chatGptUsageLimitMsg = findAndFormatChatGptUsageLimit(e);
if (chatGptUsageLimitMsg) return chatGptUsageLimitMsg;
const cloudflareEdgeMsg = findAndFormatCloudflareEdgeError(e);
if (cloudflareEdgeMsg) return cloudflareEdgeMsg;
// Check for Z.ai provider errors (wrapped in generic "OpenAI" messages)
const errorText =
e instanceof APIError
@@ -562,6 +690,12 @@ export function getRetryStatusMessage(
): string {
if (!errorDetail) return DEFAULT_RETRY_MESSAGE;
const cloudflareInfo = parseCloudflareEdgeError(errorDetail);
if (cloudflareInfo) {
const codeSegment = cloudflareInfo.code ? ` ${cloudflareInfo.code}` : "";
return `Cloudflare${codeSegment} upstream outage, retrying...`;
}
if (checkZaiError(errorDetail)) return "Z.ai API error, retrying...";
if (errorDetail.includes("Anthropic API is overloaded"))

View File

@@ -6,6 +6,7 @@ import {
} from "../../cli/helpers/errorContext";
import {
checkChatGptUsageLimitError,
checkCloudflareEdgeError,
formatErrorDetails,
} from "../../cli/helpers/errorFormatter";
@@ -341,4 +342,75 @@ describe("formatErrorDetails", () => {
expect(message).toContain("High concurrency usage exceeds limits");
expect(message).not.toContain("OpenAI");
});
describe("Cloudflare HTML 52x errors", () => {
const cloudflare521Html = `521 <!DOCTYPE html>
<html lang="en-US">
<head>
<title>api.letta.com | 521: Web server is down</title>
</head>
<body>
<span class="inline-block">Web server is down</span>
<a href="https://www.cloudflare.com/5xx-error-landing?utm_source=errorcode_521&utm_campaign=api.letta.com">cloudflare.com</a>
Cloudflare Ray ID: <strong>9d431b5f6f656c08</strong>
</body>
</html>`;
test("formats Cloudflare HTML into a concise friendly message", () => {
const result = checkCloudflareEdgeError(cloudflare521Html);
expect(result).toBeDefined();
expect(result).toContain("Cloudflare 521");
expect(result).toContain("Web server is down");
expect(result).toContain("api.letta.com");
expect(result).toContain("Ray ID: 9d431b5f6f656c08");
expect(result).toContain("retry");
expect(result).not.toContain("<!DOCTYPE html>");
});
test("formats via formatErrorDetails for run metadata nested detail", () => {
const errorObject = {
error: {
error: {
detail: cloudflare521Html,
},
},
};
const result = formatErrorDetails(errorObject);
expect(result).toContain("Cloudflare 521");
expect(result).toContain("Web server is down");
expect(result).not.toContain("<html");
});
test("returns undefined for non-cloudflare html", () => {
const result = checkCloudflareEdgeError(
"<!DOCTYPE html><html><head><title>Example</title></head><body>hello</body></html>",
);
expect(result).toBeUndefined();
});
test("formats Cloudflare 502 bad gateway pages", () => {
const cloudflare502Html = `502 <!DOCTYPE html>
<html>
<head>
<title>letta.com | 502: Bad gateway</title>
</head>
<body>
<span class="code-label">Error code 502</span>
Cloudflare Ray ID: <strong>9d43b2d6dab269e2</strong>
<a href="https://www.cloudflare.com/5xx-error-landing?utm_source=errorcode_502&utm_campaign=api.letta.com">cloudflare.com</a>
</body>
</html>`;
const result = checkCloudflareEdgeError(cloudflare502Html);
expect(result).toBeDefined();
expect(result).toContain("Cloudflare 502");
expect(result).toContain("Bad gateway");
expect(result).toContain("api.letta.com");
expect(result).toContain("Ray ID: 9d43b2d6dab269e2");
});
});
});

View File

@@ -221,6 +221,16 @@ describe("provider detail retry helpers", () => {
);
});
test("Cloudflare 521 HTML is retryable", () => {
const detail =
"521 <!DOCTYPE html><html><head><title>api.letta.com | 521: Web server is down</title></head><body>Cloudflare Ray ID: 9d431b5f6f656c08</body></html>";
expect(shouldRetryRunMetadataError("llm_error", detail)).toBe(true);
expect(
shouldRetryPreStreamTransientError({ status: undefined, detail }),
).toBe(true);
});
test("pre-stream transient classifier handles status and detail", () => {
expect(
shouldRetryPreStreamTransientError({