From 2c0cddf9f55d50f1bcd5f671d56fd3560b2d89cb Mon Sep 17 00:00:00 2001 From: Kian Jones <11655409+kianjones9@users.noreply.github.com> Date: Fri, 6 Feb 2026 17:25:35 -0800 Subject: [PATCH] fix(core): handle Google 499 CANCELLED as client disconnect, not server error (#9363) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The google.genai.errors.ClientError with code 499 (CANCELLED) indicates the client disconnected, not a server-side failure. Previously this fell through to the generic ClientError handler and was classified as LLMServerError, causing false 500s in Datadog error tracking. - Add explicit 499 handling in handle_llm_error: log at info level, return LLMConnectionError instead of LLMServerError - Catch 499 during stream iteration in stream_async and end gracefully instead of propagating the error Datadog: https://us5.datadoghq.com/error-tracking/issue/c8453aaa-d559-11f0-81c6-da7ad0900000 🤖 Generated with [Letta Code](https://letta.com) Co-authored-by: Letta --- letta/llm_api/google_vertex_client.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/letta/llm_api/google_vertex_client.py b/letta/llm_api/google_vertex_client.py index ccbee0ca..18a1a1b1 100644 --- a/letta/llm_api/google_vertex_client.py +++ b/letta/llm_api/google_vertex_client.py @@ -204,8 +204,16 @@ class GoogleVertexClient(LLMClientBase): raise e # Direct yield - keeps response alive in generator's local scope throughout iteration # This is required because the SDK's connection lifecycle is tied to the response object - async for chunk in response: - yield chunk + try: + async for chunk in response: + yield chunk + except errors.ClientError as e: + if e.code == 499: + logger.info(f"{self._provider_prefix()} Stream cancelled by client (499): {e}") + return + raise self.handle_llm_error(e) + except errors.APIError as e: + raise self.handle_llm_error(e) @staticmethod def add_dummy_model_messages(messages: List[dict]) -> List[dict]: @@ -801,6 +809,14 @@ class GoogleVertexClient(LLMClientBase): def handle_llm_error(self, e: Exception) -> Exception: # Handle Google GenAI specific errors if isinstance(e, errors.ClientError): + if e.code == 499: + logger.info(f"{self._provider_prefix()} Request cancelled by client (499): {e}") + return LLMConnectionError( + message=f"Request to {self._provider_name()} was cancelled (client disconnected): {str(e)}", + code=ErrorCode.INTERNAL_SERVER_ERROR, + details={"status_code": 499, "cause": "client_cancelled"}, + ) + logger.warning(f"{self._provider_prefix()} Client error ({e.code}): {e}") # Handle specific error codes