From 2c0cddf9f55d50f1bcd5f671d56fd3560b2d89cb Mon Sep 17 00:00:00 2001
From: Kian Jones <11655409+kianjones9@users.noreply.github.com>
Date: Fri, 6 Feb 2026 17:25:35 -0800
Subject: [PATCH] fix(core): handle Google 499 CANCELLED as client disconnect,
 not server error (#9363)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The google.genai.errors.ClientError with code 499 (CANCELLED) indicates the
client disconnected, not a server-side failure. Previously this fell through
to the generic ClientError handler and was classified as LLMServerError,
causing false 500s in Datadog error tracking.

- Add explicit 499 handling in handle_llm_error: log at info level, return
  LLMConnectionError instead of LLMServerError
- Catch 499 during stream iteration in stream_async and end gracefully
  instead of propagating the error

Datadog: https://us5.datadoghq.com/error-tracking/issue/c8453aaa-d559-11f0-81c6-da7ad0900000

🤖 Generated with [Letta Code](https://letta.com)

Co-authored-by: Letta <noreply@letta.com>
---
 letta/llm_api/google_vertex_client.py | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/letta/llm_api/google_vertex_client.py b/letta/llm_api/google_vertex_client.py
index ccbee0ca..18a1a1b1 100644
--- a/letta/llm_api/google_vertex_client.py
+++ b/letta/llm_api/google_vertex_client.py
@@ -204,8 +204,16 @@ class GoogleVertexClient(LLMClientBase):
             raise e
         # Direct yield - keeps response alive in generator's local scope throughout iteration
         # This is required because the SDK's connection lifecycle is tied to the response object
-        async for chunk in response:
-            yield chunk
+        try:
+            async for chunk in response:
+                yield chunk
+        except errors.ClientError as e:
+            if e.code == 499:
+                logger.info(f"{self._provider_prefix()} Stream cancelled by client (499): {e}")
+                return
+            raise self.handle_llm_error(e)
+        except errors.APIError as e:
+            raise self.handle_llm_error(e)
 
     @staticmethod
     def add_dummy_model_messages(messages: List[dict]) -> List[dict]:
@@ -801,6 +809,14 @@ class GoogleVertexClient(LLMClientBase):
     def handle_llm_error(self, e: Exception) -> Exception:
         # Handle Google GenAI specific errors
         if isinstance(e, errors.ClientError):
+            if e.code == 499:
+                logger.info(f"{self._provider_prefix()} Request cancelled by client (499): {e}")
+                return LLMConnectionError(
+                    message=f"Request to {self._provider_name()} was cancelled (client disconnected): {str(e)}",
+                    code=ErrorCode.INTERNAL_SERVER_ERROR,
+                    details={"status_code": 499, "cause": "client_cancelled"},
+                )
+
             logger.warning(f"{self._provider_prefix()} Client error ({e.code}): {e}")
 
             # Handle specific error codes