fix(core): improve error handling for upstream LLM provider errors (#9423)

Handle HTML error responses from ALB/load balancers in OpenAI client and
add explicit InternalServerError handling for Anthropic upstream issues.

🐛 Generated with [Letta Code](https://letta.com)

Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
Kian Jones
2026-02-11 10:53:28 -08:00
committed by Caren Thomas
parent 6d4e320cc3
commit 411bb63990
2 changed files with 39 additions and 7 deletions

View File

@@ -1059,9 +1059,35 @@ class AnthropicClient(LLMClientBase):
details={"is_byok": is_byok},
)
if isinstance(e, anthropic.InternalServerError):
error_str = str(e).lower()
if "overflow" in error_str or "upstream connect error" in error_str:
logger.warning(f"[Anthropic] Upstream infrastructure error (transient): {str(e)}")
return LLMServerError(
message=f"Anthropic upstream infrastructure error (transient, may resolve on retry): {str(e)}",
code=ErrorCode.INTERNAL_SERVER_ERROR,
details={
"status_code": e.status_code if hasattr(e, "status_code") else None,
"transient": True,
},
)
if "overloaded" in error_str:
return LLMProviderOverloaded(
message=f"Anthropic API is overloaded: {str(e)}",
code=ErrorCode.INTERNAL_SERVER_ERROR,
)
logger.warning(f"[Anthropic] Internal server error: {str(e)}")
return LLMServerError(
message=f"Anthropic internal server error: {str(e)}",
code=ErrorCode.INTERNAL_SERVER_ERROR,
details={
"status_code": e.status_code if hasattr(e, "status_code") else None,
"response": str(e.response) if hasattr(e, "response") else None,
},
)
if isinstance(e, anthropic.APIStatusError):
logger.warning(f"[Anthropic] API status error: {str(e)}")
# Handle 413 Request Entity Too Large - request payload exceeds size limits
if hasattr(e, "status_code") and e.status_code == 413:
logger.warning(f"[Anthropic] Request too large (413): {str(e)}")
return ContextWindowExceededError(

View File

@@ -1074,24 +1074,30 @@ class OpenAIClient(LLMClientBase):
if isinstance(e, openai.BadRequestError):
logger.warning(f"[OpenAI] Bad request (400): {str(e)}")
# BadRequestError can signify different issues (e.g., invalid args, context length)
# Check for context_length_exceeded error code in the error body
error_str = str(e)
if "<html" in error_str.lower() or (e.body and isinstance(e.body, str) and "<html" in e.body.lower()):
logger.warning("[OpenAI] Received HTML error response from upstream endpoint (likely ALB or reverse proxy)")
return LLMBadRequestError(
message="Upstream endpoint returned HTML error (400 Bad Request). This usually indicates the configured API endpoint is not an OpenAI-compatible API or the request was rejected by a load balancer.",
code=ErrorCode.INVALID_ARGUMENT,
details={"raw_body_preview": error_str[:500]},
)
error_code = None
if e.body and isinstance(e.body, dict):
error_details = e.body.get("error", {})
if isinstance(error_details, dict):
error_code = error_details.get("code")
# Check both the error code and message content for context length issues
if error_code == "context_length_exceeded" or is_context_window_overflow_message(str(e)):
if error_code == "context_length_exceeded" or is_context_window_overflow_message(error_str):
return ContextWindowExceededError(
message=f"Bad request to OpenAI (context window exceeded): {str(e)}",
message=f"Bad request to OpenAI (context window exceeded): {error_str}",
details={"is_byok": is_byok},
)
else:
body_details = e.body if isinstance(e.body, dict) else {"body": e.body}
return LLMBadRequestError(
message=f"Bad request to OpenAI: {str(e)}",
code=ErrorCode.INVALID_ARGUMENT,
details={**body_details, "is_byok": is_byok},
)