fix(core): catch bare openai.APIError in handle_llm_error (#9468)

* fix(core): catch bare openai.APIError in handle_llm_error fallthrough openai.APIError raised during streaming (e.g. OpenRouter credit exhaustion) is not an APIStatusError, so it skipped the catch-all at the end and fell through to LLMError("Unhandled"). Now bare APIErrors that aren't context window overflows are mapped to LLMBadRequestError. Datadog: https://us5.datadoghq.com/error-tracking/issue/7a2c356c-0849-11f1-be66-da7ad0900000 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * feat(core): add LLMInsufficientCreditsError for BYOK credit exhaustion Adds dedicated error type for insufficient credits/quota across all providers (OpenAI, Anthropic, Google). Returns HTTP 402 with BYOK-aware messaging instead of generic 400. - New LLMInsufficientCreditsError class and PAYMENT_REQUIRED ErrorCode - is_insufficient_credits_message() helper detecting credit/quota strings - All 3 provider clients detect 402 status + credit keywords - FastAPI handler returns 402 with "your API key" vs generic messaging - 5 new parametrized tests covering OpenRouter, OpenAI, and negative case 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> --------- Co-authored-by: Letta <noreply@letta.com>
2026-02-12 15:49:21 -08:00
parent cfd2ca3102
commit 80f34f134d
7 changed files with 144 additions and 3 deletions
--- a/letta/llm_api/openai_client.py
+++ b/letta/llm_api/openai_client.py
@@ -20,6 +20,7 @@ from letta.errors import (
    LLMAuthenticationError,
    LLMBadRequestError,
    LLMConnectionError,
+    LLMInsufficientCreditsError,
    LLMNotFoundError,
    LLMPermissionDeniedError,
    LLMRateLimitError,
@@ -28,7 +29,7 @@ from letta.errors import (
    LLMUnprocessableEntityError,
 )
 from letta.helpers.json_helpers import sanitize_unicode_surrogates
-from letta.llm_api.error_utils import is_context_window_overflow_message
+from letta.llm_api.error_utils import is_context_window_overflow_message, is_insufficient_credits_message
 from letta.llm_api.helpers import (
    add_inner_thoughts_to_functions,
    convert_response_format_to_responses_api,
@@ -1110,7 +1111,7 @@ class OpenAIClient(LLMClientBase):
        #
        # Example message:
        #   "Your input exceeds the context window of this model. Please adjust your input and try again."
-        if isinstance(e, openai.APIError):
+        if isinstance(e, openai.APIError) and not isinstance(e, openai.APIStatusError):
            msg = str(e)
            if is_context_window_overflow_message(msg):
                return ContextWindowExceededError(
@@ -1121,6 +1122,25 @@ class OpenAIClient(LLMClientBase):
                        "is_byok": is_byok,
                    },
                )
+            if is_insufficient_credits_message(msg):
+                return LLMInsufficientCreditsError(
+                    message=f"Insufficient credits (BYOK): {msg}" if is_byok else f"Insufficient credits: {msg}",
+                    code=ErrorCode.PAYMENT_REQUIRED,
+                    details={
+                        "provider_exception_type": type(e).__name__,
+                        "body": getattr(e, "body", None),
+                        "is_byok": is_byok,
+                    },
+                )
+            return LLMBadRequestError(
+                message=f"OpenAI API error: {msg}",
+                code=ErrorCode.INVALID_ARGUMENT,
+                details={
+                    "provider_exception_type": type(e).__name__,
+                    "body": getattr(e, "body", None),
+                    "is_byok": is_byok,
+                },
+            )

        if isinstance(e, openai.AuthenticationError):
            logger.error(f"[OpenAI] Authentication error (401): {str(e)}")  # More severe log level
@@ -1168,6 +1188,14 @@ class OpenAIClient(LLMClientBase):
                    message=f"Request too large for OpenAI (413): {str(e)}",
                    details={"is_byok": is_byok},
                )
+            # Handle 402 Payment Required or credit-related messages
+            if e.status_code == 402 or is_insufficient_credits_message(str(e)):
+                msg = str(e)
+                return LLMInsufficientCreditsError(
+                    message=f"Insufficient credits (BYOK): {msg}" if is_byok else f"Insufficient credits: {msg}",
+                    code=ErrorCode.PAYMENT_REQUIRED,
+                    details={"status_code": e.status_code, "body": e.body, "is_byok": is_byok},
+                )
            # Map based on status code potentially
            if e.status_code >= 500:
                error_cls = LLMServerError