fix(core): improve error handling for upstream LLM provider errors (#9423)
Handle HTML error responses from ALB/load balancers in OpenAI client and add explicit InternalServerError handling for Anthropic upstream issues. 🐛 Generated with [Letta Code](https://letta.com) Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
@@ -1059,9 +1059,35 @@ class AnthropicClient(LLMClientBase):
|
||||
details={"is_byok": is_byok},
|
||||
)
|
||||
|
||||
if isinstance(e, anthropic.InternalServerError):
|
||||
error_str = str(e).lower()
|
||||
if "overflow" in error_str or "upstream connect error" in error_str:
|
||||
logger.warning(f"[Anthropic] Upstream infrastructure error (transient): {str(e)}")
|
||||
return LLMServerError(
|
||||
message=f"Anthropic upstream infrastructure error (transient, may resolve on retry): {str(e)}",
|
||||
code=ErrorCode.INTERNAL_SERVER_ERROR,
|
||||
details={
|
||||
"status_code": e.status_code if hasattr(e, "status_code") else None,
|
||||
"transient": True,
|
||||
},
|
||||
)
|
||||
if "overloaded" in error_str:
|
||||
return LLMProviderOverloaded(
|
||||
message=f"Anthropic API is overloaded: {str(e)}",
|
||||
code=ErrorCode.INTERNAL_SERVER_ERROR,
|
||||
)
|
||||
logger.warning(f"[Anthropic] Internal server error: {str(e)}")
|
||||
return LLMServerError(
|
||||
message=f"Anthropic internal server error: {str(e)}",
|
||||
code=ErrorCode.INTERNAL_SERVER_ERROR,
|
||||
details={
|
||||
"status_code": e.status_code if hasattr(e, "status_code") else None,
|
||||
"response": str(e.response) if hasattr(e, "response") else None,
|
||||
},
|
||||
)
|
||||
|
||||
if isinstance(e, anthropic.APIStatusError):
|
||||
logger.warning(f"[Anthropic] API status error: {str(e)}")
|
||||
# Handle 413 Request Entity Too Large - request payload exceeds size limits
|
||||
if hasattr(e, "status_code") and e.status_code == 413:
|
||||
logger.warning(f"[Anthropic] Request too large (413): {str(e)}")
|
||||
return ContextWindowExceededError(
|
||||
|
||||
@@ -1074,24 +1074,30 @@ class OpenAIClient(LLMClientBase):
|
||||
|
||||
if isinstance(e, openai.BadRequestError):
|
||||
logger.warning(f"[OpenAI] Bad request (400): {str(e)}")
|
||||
# BadRequestError can signify different issues (e.g., invalid args, context length)
|
||||
# Check for context_length_exceeded error code in the error body
|
||||
error_str = str(e)
|
||||
|
||||
if "<html" in error_str.lower() or (e.body and isinstance(e.body, str) and "<html" in e.body.lower()):
|
||||
logger.warning("[OpenAI] Received HTML error response from upstream endpoint (likely ALB or reverse proxy)")
|
||||
return LLMBadRequestError(
|
||||
message="Upstream endpoint returned HTML error (400 Bad Request). This usually indicates the configured API endpoint is not an OpenAI-compatible API or the request was rejected by a load balancer.",
|
||||
code=ErrorCode.INVALID_ARGUMENT,
|
||||
details={"raw_body_preview": error_str[:500]},
|
||||
)
|
||||
|
||||
error_code = None
|
||||
if e.body and isinstance(e.body, dict):
|
||||
error_details = e.body.get("error", {})
|
||||
if isinstance(error_details, dict):
|
||||
error_code = error_details.get("code")
|
||||
|
||||
# Check both the error code and message content for context length issues
|
||||
if error_code == "context_length_exceeded" or is_context_window_overflow_message(str(e)):
|
||||
if error_code == "context_length_exceeded" or is_context_window_overflow_message(error_str):
|
||||
return ContextWindowExceededError(
|
||||
message=f"Bad request to OpenAI (context window exceeded): {str(e)}",
|
||||
message=f"Bad request to OpenAI (context window exceeded): {error_str}",
|
||||
details={"is_byok": is_byok},
|
||||
)
|
||||
else:
|
||||
body_details = e.body if isinstance(e.body, dict) else {"body": e.body}
|
||||
return LLMBadRequestError(
|
||||
message=f"Bad request to OpenAI: {str(e)}",
|
||||
code=ErrorCode.INVALID_ARGUMENT,
|
||||
details={**body_details, "is_byok": is_byok},
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user