From fbc0bb60d9ef0dc4984f56af9581e58f7d8fda72 Mon Sep 17 00:00:00 2001 From: jnjpng Date: Mon, 16 Feb 2026 12:43:27 -0800 Subject: [PATCH] fix: retry ChatGPT 502 and upstream connection errors with exponential backoff (#9495) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 502s and upstream connection errors (envoy proxy failures) from ChatGPT were not being retried. This classifies them as LLMConnectionError (retryable) in both the streaming and non-streaming paths, and adds retry handling in the non-streaming HTTPStatusError handler so 502s get the same exponential backoff treatment as transport-level connection drops. 🐾 Generated with [Letta Code](https://letta.com) Co-authored-by: Letta --- letta/llm_api/chatgpt_oauth_client.py | 30 +++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/letta/llm_api/chatgpt_oauth_client.py b/letta/llm_api/chatgpt_oauth_client.py index 35ba3887..8a3a0f21 100644 --- a/letta/llm_api/chatgpt_oauth_client.py +++ b/letta/llm_api/chatgpt_oauth_client.py @@ -105,7 +105,7 @@ class ChatGPTOAuthClient(LLMClientBase): MAX_RETRIES = 3 # Transient httpx errors that are safe to retry (connection drops, transport-level failures) - _RETRYABLE_ERRORS = (httpx.ReadError, httpx.WriteError, httpx.ConnectError, httpx.RemoteProtocolError) + _RETRYABLE_ERRORS = (httpx.ReadError, httpx.WriteError, httpx.ConnectError, httpx.RemoteProtocolError, LLMConnectionError) @trace_method async def _get_provider_and_credentials_async(self, llm_config: LLMConfig) -> tuple[ChatGPTOAuthProvider, ChatGPTOAuthCredentials]: @@ -392,7 +392,16 @@ class ChatGPTOAuthClient(LLMClientBase): return await self._accumulate_sse_response(response) except httpx.HTTPStatusError as e: - raise self._handle_http_error(e) + mapped = self._handle_http_error(e) + if isinstance(mapped, tuple(self._RETRYABLE_ERRORS)) and attempt < self.MAX_RETRIES - 1: + wait = 2**attempt + logger.warning( + f"[ChatGPT] Retryable HTTP error on request (attempt {attempt + 1}/{self.MAX_RETRIES}), " + f"retrying in {wait}s: {type(mapped).__name__}: {mapped}" + ) + await asyncio.sleep(wait) + continue + raise mapped except httpx.TimeoutException: raise LLMTimeoutError( message="ChatGPT backend request timed out", @@ -1019,6 +1028,12 @@ class ChatGPTOAuthClient(LLMClientBase): logger.warning(f"Unhandled ChatGPT SSE event type: {event_type}") return None + @staticmethod + def _is_upstream_connection_error(error_body: str) -> bool: + """Check if an error body indicates an upstream connection/proxy failure.""" + lower = error_body.lower() + return "upstream connect error" in lower or "reset before headers" in lower or "connection termination" in lower + def _handle_http_error_from_status(self, status_code: int, error_body: str) -> Exception: """Create appropriate exception from HTTP status code. @@ -1039,6 +1054,11 @@ class ChatGPTOAuthClient(LLMClientBase): message=f"ChatGPT rate limit exceeded: {error_body}", code=ErrorCode.RATE_LIMIT_EXCEEDED, ) + elif status_code == 502 or (status_code >= 500 and self._is_upstream_connection_error(error_body)): + return LLMConnectionError( + message=f"ChatGPT upstream connection error: {error_body}", + code=ErrorCode.INTERNAL_SERVER_ERROR, + ) elif status_code >= 500: return LLMServerError( message=f"ChatGPT server error: {error_body}", @@ -1136,6 +1156,12 @@ class ChatGPTOAuthClient(LLMClientBase): code=ErrorCode.INVALID_ARGUMENT, details={"is_byok": is_byok}, ) + elif status_code == 502 or (status_code >= 500 and self._is_upstream_connection_error(error_message)): + return LLMConnectionError( + message=f"ChatGPT upstream connection error: {error_message}", + code=ErrorCode.INTERNAL_SERVER_ERROR, + details={"is_byok": is_byok}, + ) elif status_code >= 500: return LLMServerError( message=f"ChatGPT server error: {error_message}",