diff --git a/letta/llm_api/google_vertex_client.py b/letta/llm_api/google_vertex_client.py
index 49fc64a8..488494a0 100644
--- a/letta/llm_api/google_vertex_client.py
+++ b/letta/llm_api/google_vertex_client.py
@@ -200,6 +200,8 @@ class GoogleVertexClient(LLMClientBase):
                 f"Please check your tool definitions. Error: {str(e)}",
                 code=ErrorCode.INTERNAL_SERVER_ERROR,
             )
+        except errors.APIError as e:
+            raise self.handle_llm_error(e)
         except Exception as e:
             logger.error(f"Error streaming {self._provider_name()} request: {e} with request data: {json.dumps(request_data)}")
             raise e
@@ -878,7 +880,7 @@ class GoogleVertexClient(LLMClientBase):
                 else:
                     return LLMBadRequestError(
                         message=f"Bad request to {self._provider_name()}: {str(e)}",
-                        code=ErrorCode.INTERNAL_SERVER_ERROR,
+                        code=ErrorCode.INVALID_ARGUMENT,
                         details={"is_byok": is_byok},
                     )
             elif e.code == 401:
diff --git a/letta/server/rest_api/app.py b/letta/server/rest_api/app.py
index 656ab33c..183bf407 100644
--- a/letta/server/rest_api/app.py
+++ b/letta/server/rest_api/app.py
@@ -54,6 +54,7 @@ from letta.errors import (
     LettaUnsupportedFileUploadError,
     LettaUserNotFoundError,
     LLMAuthenticationError,
+    LLMBadRequestError,
     LLMError,
     LLMProviderOverloaded,
     LLMRateLimitError,
@@ -729,6 +730,19 @@ def create_application() -> "FastAPI":
             },
         )
 
+    @app.exception_handler(LLMBadRequestError)
+    async def llm_bad_request_error_handler(request: Request, exc: LLMBadRequestError):
+        return JSONResponse(
+            status_code=400,
+            content={
+                "error": {
+                    "type": "llm_bad_request",
+                    "message": "The request to the LLM model provider was invalid.",
+                    "detail": str(exc),
+                }
+            },
+        )
+
     @app.exception_handler(LLMError)
     async def llm_error_handler(request: Request, exc: LLMError):
         return JSONResponse(