From 986cb7dc793c5ff2d95aab6ae40f3418250d059b Mon Sep 17 00:00:00 2001 From: jnjpng Date: Thu, 26 Feb 2026 18:26:09 -0800 Subject: [PATCH] fix: handle Cloudflare HTML 5xx errors gracefully (#1179) Co-authored-by: Letta --- src/agent/turn-recovery-policy.ts | 6 ++ src/cli/helpers/errorFormatter.ts | 134 +++++++++++++++++++++++++ src/tests/cli/errorFormatter.test.ts | 72 +++++++++++++ src/tests/turn-recovery-policy.test.ts | 10 ++ 4 files changed, 222 insertions(+) diff --git a/src/agent/turn-recovery-policy.ts b/src/agent/turn-recovery-policy.ts index 5ccda64..f7c7104 100644 --- a/src/agent/turn-recovery-policy.ts +++ b/src/agent/turn-recovery-policy.ts @@ -8,6 +8,7 @@ import type { MessageCreate } from "@letta-ai/letta-client/resources/agents/agents"; import type { ApprovalCreate } from "@letta-ai/letta-client/resources/agents/messages"; +import { isCloudflareEdge52xHtmlError } from "../cli/helpers/errorFormatter"; import { isZaiNonRetryableError } from "../cli/helpers/zaiErrors"; // ── Error fragment constants ──────────────────────────────────────── @@ -64,6 +65,10 @@ const NON_RETRYABLE_QUOTA_DETAIL_PATTERNS = [ ]; const NON_RETRYABLE_4XX_PATTERN = /Error code:\s*4(0[0-8]|1\d|2\d|3\d|4\d|51)/i; const RETRYABLE_429_PATTERN = /Error code:\s*429|rate limit|too many requests/i; +function isCloudflareEdge52xDetail(detail: unknown): boolean { + if (typeof detail !== "string") return false; + return isCloudflareEdge52xHtmlError(detail); +} function hasNonRetryableQuotaDetail(detail: unknown): boolean { if (typeof detail !== "string") return false; @@ -108,6 +113,7 @@ export function isEmptyResponseError(detail: unknown): boolean { /** Transient provider/network detail that is usually safe to retry. */ export function isRetryableProviderErrorDetail(detail: unknown): boolean { + if (isCloudflareEdge52xDetail(detail)) return true; if (typeof detail !== "string") return false; return RETRYABLE_PROVIDER_DETAIL_PATTERNS.some((pattern) => detail.includes(pattern), diff --git a/src/cli/helpers/errorFormatter.ts b/src/cli/helpers/errorFormatter.ts index 206ce3a..4a3a523 100644 --- a/src/cli/helpers/errorFormatter.ts +++ b/src/cli/helpers/errorFormatter.ts @@ -14,6 +14,75 @@ function extractReasonList(value: unknown): string[] { .map((reason) => reason.toLowerCase()); } +interface CloudflareEdgeErrorInfo { + code?: string; + statusText?: string; + host?: string; + rayId?: string; +} + +const CLOUDFLARE_EDGE_5XX_MARKER_PATTERN = + /(^|\s)(502|52[0-6])\s*[^<|]*\|\s*(?:502|52[0-6])\s*:\s*([^<]+)/i)?.[1] + ?.trim() ?? + text.match(/([^<]+)<\/span>/i)?.[1]?.trim(); + + const host = + text.match(/utm_campaign=([a-z0-9.-]+)/i)?.[1] ?? + text.match(/]*truncate[^>]*>([a-z0-9.-]+)<\/span>/i)?.[1]; + + const rayId = + text.match( + /Cloudflare Ray ID:\s*(?:]*>)?([a-z0-9]+)(?:<\/strong>)?/i, + )?.[1] ?? text.match(/Cloudflare Ray ID:\s*([a-z0-9]+)/i)?.[1]; + + if (!code && !statusText && !host && !rayId) return undefined; + + return { code, statusText, host, rayId }; +} + +export function checkCloudflareEdgeError(text: string): string | undefined { + const info = parseCloudflareEdgeError(text); + if (!info) return undefined; + + const codeLabel = info.code ? `Cloudflare ${info.code}` : "Cloudflare"; + const statusSegment = info.statusText + ? `: ${info.statusText}` + : " upstream error"; + const hostSegment = info.host ? ` for ${info.host}` : ""; + const raySegment = info.rayId ? ` (Ray ID: ${info.rayId})` : ""; + + return `${codeLabel}${statusSegment}${hostSegment}${raySegment}. This is usually a temporary edge/origin outage. Please retry in a moment.`; +} + function getErrorReasons(e: APIError): string[] { const reasons = new Set(); @@ -90,6 +159,62 @@ function getRateLimitResetMs(e: APIError): number | undefined { return undefined; } +/** + * Walk an error object to find and format Cloudflare HTML 52x pages. + */ +function findAndFormatCloudflareEdgeError(e: unknown): string | undefined { + if (typeof e === "string") return checkCloudflareEdgeError(e); + + if (typeof e !== "object" || e === null) return undefined; + + if (e instanceof Error) { + const msg = checkCloudflareEdgeError(e.message); + if (msg) return msg; + } + + const obj = e as Record; + + if (typeof obj.detail === "string") { + const msg = checkCloudflareEdgeError(obj.detail); + if (msg) return msg; + } + + if (typeof obj.message === "string") { + const msg = checkCloudflareEdgeError(obj.message); + if (msg) return msg; + } + + if (obj.error && typeof obj.error === "object") { + const errObj = obj.error as Record; + + if (typeof errObj.detail === "string") { + const msg = checkCloudflareEdgeError(errObj.detail); + if (msg) return msg; + } + + if (typeof errObj.message === "string") { + const msg = checkCloudflareEdgeError(errObj.message); + if (msg) return msg; + } + + if (errObj.error && typeof errObj.error === "object") { + const inner = errObj.error as Record; + + if (typeof inner.detail === "string") { + const msg = checkCloudflareEdgeError(inner.detail); + if (msg) return msg; + } + + if (typeof inner.message === "string") { + const msg = checkCloudflareEdgeError(inner.message); + if (msg) return msg; + } + } + } + + return undefined; +} + /** * Format a time duration in milliseconds to a human-readable string */ @@ -400,6 +525,9 @@ export function formatErrorDetails( const chatGptUsageLimitMsg = findAndFormatChatGptUsageLimit(e); if (chatGptUsageLimitMsg) return chatGptUsageLimitMsg; + const cloudflareEdgeMsg = findAndFormatCloudflareEdgeError(e); + if (cloudflareEdgeMsg) return cloudflareEdgeMsg; + // Check for Z.ai provider errors (wrapped in generic "OpenAI" messages) const errorText = e instanceof APIError @@ -562,6 +690,12 @@ export function getRetryStatusMessage( ): string { if (!errorDetail) return DEFAULT_RETRY_MESSAGE; + const cloudflareInfo = parseCloudflareEdgeError(errorDetail); + if (cloudflareInfo) { + const codeSegment = cloudflareInfo.code ? ` ${cloudflareInfo.code}` : ""; + return `Cloudflare${codeSegment} upstream outage, retrying...`; + } + if (checkZaiError(errorDetail)) return "Z.ai API error, retrying..."; if (errorDetail.includes("Anthropic API is overloaded")) diff --git a/src/tests/cli/errorFormatter.test.ts b/src/tests/cli/errorFormatter.test.ts index aae3610..0483af7 100644 --- a/src/tests/cli/errorFormatter.test.ts +++ b/src/tests/cli/errorFormatter.test.ts @@ -6,6 +6,7 @@ import { } from "../../cli/helpers/errorContext"; import { checkChatGptUsageLimitError, + checkCloudflareEdgeError, formatErrorDetails, } from "../../cli/helpers/errorFormatter"; @@ -341,4 +342,75 @@ describe("formatErrorDetails", () => { expect(message).toContain("High concurrency usage exceeds limits"); expect(message).not.toContain("OpenAI"); }); + + describe("Cloudflare HTML 52x errors", () => { + const cloudflare521Html = `521 + + +api.letta.com | 521: Web server is down + + +Web server is down +cloudflare.com +Cloudflare Ray ID: 9d431b5f6f656c08 + +`; + + test("formats Cloudflare HTML into a concise friendly message", () => { + const result = checkCloudflareEdgeError(cloudflare521Html); + + expect(result).toBeDefined(); + expect(result).toContain("Cloudflare 521"); + expect(result).toContain("Web server is down"); + expect(result).toContain("api.letta.com"); + expect(result).toContain("Ray ID: 9d431b5f6f656c08"); + expect(result).toContain("retry"); + expect(result).not.toContain(""); + }); + + test("formats via formatErrorDetails for run metadata nested detail", () => { + const errorObject = { + error: { + error: { + detail: cloudflare521Html, + }, + }, + }; + + const result = formatErrorDetails(errorObject); + + expect(result).toContain("Cloudflare 521"); + expect(result).toContain("Web server is down"); + expect(result).not.toContain(" { + const result = checkCloudflareEdgeError( + "Examplehello", + ); + expect(result).toBeUndefined(); + }); + + test("formats Cloudflare 502 bad gateway pages", () => { + const cloudflare502Html = `502 + + +letta.com | 502: Bad gateway + + +Error code 502 +Cloudflare Ray ID: 9d43b2d6dab269e2 +cloudflare.com + +`; + + const result = checkCloudflareEdgeError(cloudflare502Html); + + expect(result).toBeDefined(); + expect(result).toContain("Cloudflare 502"); + expect(result).toContain("Bad gateway"); + expect(result).toContain("api.letta.com"); + expect(result).toContain("Ray ID: 9d43b2d6dab269e2"); + }); + }); }); diff --git a/src/tests/turn-recovery-policy.test.ts b/src/tests/turn-recovery-policy.test.ts index 67a967e..e9a0101 100644 --- a/src/tests/turn-recovery-policy.test.ts +++ b/src/tests/turn-recovery-policy.test.ts @@ -221,6 +221,16 @@ describe("provider detail retry helpers", () => { ); }); + test("Cloudflare 521 HTML is retryable", () => { + const detail = + "521 api.letta.com | 521: Web server is downCloudflare Ray ID: 9d431b5f6f656c08"; + + expect(shouldRetryRunMetadataError("llm_error", detail)).toBe(true); + expect( + shouldRetryPreStreamTransientError({ status: undefined, detail }), + ).toBe(true); + }); + test("pre-stream transient classifier handles status and detail", () => { expect( shouldRetryPreStreamTransientError({