fix(core): improve error handling for upstream LLM provider errors (#9423)

Handle HTML error responses from ALB/load balancers in OpenAI client and add explicit InternalServerError handling for Anthropic upstream issues. 🐛 Generated with [Letta Code](https://letta.com) Co-authored-by: Letta <noreply@letta.com>
2026-02-11 10:53:28 -08:00
parent 6d4e320cc3
commit 411bb63990
2 changed files with 39 additions and 7 deletions
--- a/letta/llm_api/anthropic_client.py
+++ b/letta/llm_api/anthropic_client.py
@@ -1059,9 +1059,35 @@ class AnthropicClient(LLMClientBase):
                details={"is_byok": is_byok},
            )

+        if isinstance(e, anthropic.InternalServerError):
+            error_str = str(e).lower()
+            if "overflow" in error_str or "upstream connect error" in error_str:
+                logger.warning(f"[Anthropic] Upstream infrastructure error (transient): {str(e)}")
+                return LLMServerError(
+                    message=f"Anthropic upstream infrastructure error (transient, may resolve on retry): {str(e)}",
+                    code=ErrorCode.INTERNAL_SERVER_ERROR,
+                    details={
+                        "status_code": e.status_code if hasattr(e, "status_code") else None,
+                        "transient": True,
+                    },
+                )
+            if "overloaded" in error_str:
+                return LLMProviderOverloaded(
+                    message=f"Anthropic API is overloaded: {str(e)}",
+                    code=ErrorCode.INTERNAL_SERVER_ERROR,
+                )
+            logger.warning(f"[Anthropic] Internal server error: {str(e)}")
+            return LLMServerError(
+                message=f"Anthropic internal server error: {str(e)}",
+                code=ErrorCode.INTERNAL_SERVER_ERROR,
+                details={
+                    "status_code": e.status_code if hasattr(e, "status_code") else None,
+                    "response": str(e.response) if hasattr(e, "response") else None,
+                },
+            )
+
        if isinstance(e, anthropic.APIStatusError):
            logger.warning(f"[Anthropic] API status error: {str(e)}")
-            # Handle 413 Request Entity Too Large - request payload exceeds size limits
            if hasattr(e, "status_code") and e.status_code == 413:
                logger.warning(f"[Anthropic] Request too large (413): {str(e)}")
                return ContextWindowExceededError(
--- a/letta/llm_api/openai_client.py
+++ b/letta/llm_api/openai_client.py
@@ -1074,24 +1074,30 @@ class OpenAIClient(LLMClientBase):

        if isinstance(e, openai.BadRequestError):
            logger.warning(f"[OpenAI] Bad request (400): {str(e)}")
-            # BadRequestError can signify different issues (e.g., invalid args, context length)
-            # Check for context_length_exceeded error code in the error body
+            error_str = str(e)
+
+            if "<html" in error_str.lower() or (e.body and isinstance(e.body, str) and "<html" in e.body.lower()):
+                logger.warning("[OpenAI] Received HTML error response from upstream endpoint (likely ALB or reverse proxy)")
+                return LLMBadRequestError(
+                    message="Upstream endpoint returned HTML error (400 Bad Request). This usually indicates the configured API endpoint is not an OpenAI-compatible API or the request was rejected by a load balancer.",
+                    code=ErrorCode.INVALID_ARGUMENT,
+                    details={"raw_body_preview": error_str[:500]},
+                )
+
            error_code = None
            if e.body and isinstance(e.body, dict):
                error_details = e.body.get("error", {})
                if isinstance(error_details, dict):
                    error_code = error_details.get("code")

-            # Check both the error code and message content for context length issues
-            if error_code == "context_length_exceeded" or is_context_window_overflow_message(str(e)):
+            if error_code == "context_length_exceeded" or is_context_window_overflow_message(error_str):
                return ContextWindowExceededError(
-                    message=f"Bad request to OpenAI (context window exceeded): {str(e)}",
+                    message=f"Bad request to OpenAI (context window exceeded): {error_str}",
                    details={"is_byok": is_byok},
                )
            else:
                body_details = e.body if isinstance(e.body, dict) else {"body": e.body}
                return LLMBadRequestError(
-                    message=f"Bad request to OpenAI: {str(e)}",
                    code=ErrorCode.INVALID_ARGUMENT,
                    details={**body_details, "is_byok": is_byok},
                )