fix(core): catch bare openai.APIError in handle_llm_error (#9468)

* fix(core): catch bare openai.APIError in handle_llm_error fallthrough openai.APIError raised during streaming (e.g. OpenRouter credit exhaustion) is not an APIStatusError, so it skipped the catch-all at the end and fell through to LLMError("Unhandled"). Now bare APIErrors that aren't context window overflows are mapped to LLMBadRequestError. Datadog: https://us5.datadoghq.com/error-tracking/issue/7a2c356c-0849-11f1-be66-da7ad0900000 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * feat(core): add LLMInsufficientCreditsError for BYOK credit exhaustion Adds dedicated error type for insufficient credits/quota across all providers (OpenAI, Anthropic, Google). Returns HTTP 402 with BYOK-aware messaging instead of generic 400. - New LLMInsufficientCreditsError class and PAYMENT_REQUIRED ErrorCode - is_insufficient_credits_message() helper detecting credit/quota strings - All 3 provider clients detect 402 status + credit keywords - FastAPI handler returns 402 with "your API key" vs generic messaging - 5 new parametrized tests covering OpenRouter, OpenAI, and negative case 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> --------- Co-authored-by: Letta <noreply@letta.com>
2026-02-12 15:49:21 -08:00
parent cfd2ca3102
commit 80f34f134d
7 changed files with 144 additions and 3 deletions
--- a/letta/llm_api/anthropic_client.py
+++ b/letta/llm_api/anthropic_client.py
@@ -19,6 +19,7 @@ from letta.errors import (
    LLMAuthenticationError,
    LLMBadRequestError,
    LLMConnectionError,
+    LLMInsufficientCreditsError,
    LLMNotFoundError,
    LLMPermissionDeniedError,
    LLMProviderOverloaded,
@@ -31,6 +32,7 @@ from letta.helpers.datetime_helpers import get_utc_time_int
 from letta.helpers.decorators import deprecated
 from letta.helpers.json_helpers import sanitize_unicode_surrogates
 from letta.llm_api.anthropic_constants import ANTHROPIC_MAX_STRICT_TOOLS, ANTHROPIC_STRICT_MODE_ALLOWLIST
+from letta.llm_api.error_utils import is_insufficient_credits_message
 from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs
 from letta.llm_api.llm_client_base import LLMClientBase
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
@@ -1088,6 +1090,13 @@ class AnthropicClient(LLMClientBase):

        if isinstance(e, anthropic.APIStatusError):
            logger.warning(f"[Anthropic] API status error: {str(e)}")
+            if hasattr(e, "status_code") and e.status_code == 402 or is_insufficient_credits_message(str(e)):
+                msg = str(e)
+                return LLMInsufficientCreditsError(
+                    message=f"Insufficient credits (BYOK): {msg}" if is_byok else f"Insufficient credits: {msg}",
+                    code=ErrorCode.PAYMENT_REQUIRED,
+                    details={"status_code": getattr(e, "status_code", None), "is_byok": is_byok},
+                )
            if hasattr(e, "status_code") and e.status_code == 413:
                logger.warning(f"[Anthropic] Request too large (413): {str(e)}")
                return ContextWindowExceededError(
--- a/letta/llm_api/error_utils.py
+++ b/letta/llm_api/error_utils.py
@@ -20,3 +20,21 @@ def is_context_window_overflow_message(msg: str) -> bool:
        or "context_length_exceeded" in msg
        or "Input tokens exceed the configured limit" in msg
    )
+
+
+def is_insufficient_credits_message(msg: str) -> bool:
+    """Best-effort detection for insufficient credits/quota/billing errors.
+
+    BYOK users on OpenRouter, OpenAI, etc. may exhaust their credits mid-stream
+    or get rejected pre-flight. We detect these so they map to 402 instead of 400/500.
+    """
+    lower = msg.lower()
+    return (
+        "insufficient credits" in lower
+        or "requires more credits" in lower
+        or "add more credits" in lower
+        or "exceeded your current quota" in lower
+        or "you've exceeded your budget" in lower
+        or ("billing" in lower and "hard limit" in lower)
+        or "can only afford" in lower
+    )
--- a/letta/llm_api/google_vertex_client.py
+++ b/letta/llm_api/google_vertex_client.py
@@ -23,6 +23,7 @@ from letta.errors import (
    LLMAuthenticationError,
    LLMBadRequestError,
    LLMConnectionError,
+    LLMInsufficientCreditsError,
    LLMNotFoundError,
    LLMPermissionDeniedError,
    LLMRateLimitError,
@@ -32,6 +33,7 @@ from letta.errors import (
 )
 from letta.helpers.datetime_helpers import get_utc_time_int
 from letta.helpers.json_helpers import json_dumps, json_loads, sanitize_unicode_surrogates
+from letta.llm_api.error_utils import is_insufficient_credits_message
 from letta.llm_api.llm_client_base import LLMClientBase
 from letta.local_llm.json_parser import clean_json_string_extra_backslash
 from letta.log import get_logger
@@ -932,6 +934,13 @@ class GoogleVertexClient(LLMClientBase):
                    code=ErrorCode.TIMEOUT,
                    details={"cause": str(e.__cause__) if e.__cause__ else None, "is_byok": is_byok},
                )
+            elif e.code == 402 or is_insufficient_credits_message(str(e)):
+                msg = str(e)
+                return LLMInsufficientCreditsError(
+                    message=f"Insufficient credits (BYOK): {msg}" if is_byok else f"Insufficient credits: {msg}",
+                    code=ErrorCode.PAYMENT_REQUIRED,
+                    details={"status_code": e.code, "is_byok": is_byok},
+                )
            elif e.code == 422:
                return LLMUnprocessableEntityError(
                    message=f"Invalid request content for {self._provider_name()}: {str(e)}",
--- a/letta/llm_api/openai_client.py
+++ b/letta/llm_api/openai_client.py
@@ -20,6 +20,7 @@ from letta.errors import (
    LLMAuthenticationError,
    LLMBadRequestError,
    LLMConnectionError,
+    LLMInsufficientCreditsError,
    LLMNotFoundError,
    LLMPermissionDeniedError,
    LLMRateLimitError,
@@ -28,7 +29,7 @@ from letta.errors import (
    LLMUnprocessableEntityError,
 )
 from letta.helpers.json_helpers import sanitize_unicode_surrogates
-from letta.llm_api.error_utils import is_context_window_overflow_message
+from letta.llm_api.error_utils import is_context_window_overflow_message, is_insufficient_credits_message
 from letta.llm_api.helpers import (
    add_inner_thoughts_to_functions,
    convert_response_format_to_responses_api,
@@ -1110,7 +1111,7 @@ class OpenAIClient(LLMClientBase):
        #
        # Example message:
        #   "Your input exceeds the context window of this model. Please adjust your input and try again."
-        if isinstance(e, openai.APIError):
+        if isinstance(e, openai.APIError) and not isinstance(e, openai.APIStatusError):
            msg = str(e)
            if is_context_window_overflow_message(msg):
                return ContextWindowExceededError(
@@ -1121,6 +1122,25 @@ class OpenAIClient(LLMClientBase):
                        "is_byok": is_byok,
                    },
                )
+            if is_insufficient_credits_message(msg):
+                return LLMInsufficientCreditsError(
+                    message=f"Insufficient credits (BYOK): {msg}" if is_byok else f"Insufficient credits: {msg}",
+                    code=ErrorCode.PAYMENT_REQUIRED,
+                    details={
+                        "provider_exception_type": type(e).__name__,
+                        "body": getattr(e, "body", None),
+                        "is_byok": is_byok,
+                    },
+                )
+            return LLMBadRequestError(
+                message=f"OpenAI API error: {msg}",
+                code=ErrorCode.INVALID_ARGUMENT,
+                details={
+                    "provider_exception_type": type(e).__name__,
+                    "body": getattr(e, "body", None),
+                    "is_byok": is_byok,
+                },
+            )

        if isinstance(e, openai.AuthenticationError):
            logger.error(f"[OpenAI] Authentication error (401): {str(e)}")  # More severe log level
@@ -1168,6 +1188,14 @@ class OpenAIClient(LLMClientBase):
                    message=f"Request too large for OpenAI (413): {str(e)}",
                    details={"is_byok": is_byok},
                )
+            # Handle 402 Payment Required or credit-related messages
+            if e.status_code == 402 or is_insufficient_credits_message(str(e)):
+                msg = str(e)
+                return LLMInsufficientCreditsError(
+                    message=f"Insufficient credits (BYOK): {msg}" if is_byok else f"Insufficient credits: {msg}",
+                    code=ErrorCode.PAYMENT_REQUIRED,
+                    details={"status_code": e.status_code, "body": e.body, "is_byok": is_byok},
+                )
            # Map based on status code potentially
            if e.status_code >= 500:
                error_cls = LLMServerError