fix(core): catch bare openai.APIError in handle_llm_error (#9468)

* fix(core): catch bare openai.APIError in handle_llm_error fallthrough

openai.APIError raised during streaming (e.g. OpenRouter credit
exhaustion) is not an APIStatusError, so it skipped the catch-all
at the end and fell through to LLMError("Unhandled"). Now bare
APIErrors that aren't context window overflows are mapped to
LLMBadRequestError.

Datadog: https://us5.datadoghq.com/error-tracking/issue/7a2c356c-0849-11f1-be66-da7ad0900000

🐾 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

* feat(core): add LLMInsufficientCreditsError for BYOK credit exhaustion

Adds dedicated error type for insufficient credits/quota across all
providers (OpenAI, Anthropic, Google). Returns HTTP 402 with
BYOK-aware messaging instead of generic 400.

- New LLMInsufficientCreditsError class and PAYMENT_REQUIRED ErrorCode
- is_insufficient_credits_message() helper detecting credit/quota strings
- All 3 provider clients detect 402 status + credit keywords
- FastAPI handler returns 402 with "your API key" vs generic messaging
- 5 new parametrized tests covering OpenRouter, OpenAI, and negative case

🐾 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

---------

Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
Kian Jones
2026-02-12 15:49:21 -08:00
committed by Caren Thomas
parent cfd2ca3102
commit 80f34f134d
7 changed files with 144 additions and 3 deletions

View File

@@ -19,6 +19,7 @@ from letta.errors import (
LLMAuthenticationError,
LLMBadRequestError,
LLMConnectionError,
LLMInsufficientCreditsError,
LLMNotFoundError,
LLMPermissionDeniedError,
LLMProviderOverloaded,
@@ -31,6 +32,7 @@ from letta.helpers.datetime_helpers import get_utc_time_int
from letta.helpers.decorators import deprecated
from letta.helpers.json_helpers import sanitize_unicode_surrogates
from letta.llm_api.anthropic_constants import ANTHROPIC_MAX_STRICT_TOOLS, ANTHROPIC_STRICT_MODE_ALLOWLIST
from letta.llm_api.error_utils import is_insufficient_credits_message
from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs
from letta.llm_api.llm_client_base import LLMClientBase
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
@@ -1088,6 +1090,13 @@ class AnthropicClient(LLMClientBase):
if isinstance(e, anthropic.APIStatusError):
logger.warning(f"[Anthropic] API status error: {str(e)}")
if hasattr(e, "status_code") and e.status_code == 402 or is_insufficient_credits_message(str(e)):
msg = str(e)
return LLMInsufficientCreditsError(
message=f"Insufficient credits (BYOK): {msg}" if is_byok else f"Insufficient credits: {msg}",
code=ErrorCode.PAYMENT_REQUIRED,
details={"status_code": getattr(e, "status_code", None), "is_byok": is_byok},
)
if hasattr(e, "status_code") and e.status_code == 413:
logger.warning(f"[Anthropic] Request too large (413): {str(e)}")
return ContextWindowExceededError(

View File

@@ -20,3 +20,21 @@ def is_context_window_overflow_message(msg: str) -> bool:
or "context_length_exceeded" in msg
or "Input tokens exceed the configured limit" in msg
)
def is_insufficient_credits_message(msg: str) -> bool:
"""Best-effort detection for insufficient credits/quota/billing errors.
BYOK users on OpenRouter, OpenAI, etc. may exhaust their credits mid-stream
or get rejected pre-flight. We detect these so they map to 402 instead of 400/500.
"""
lower = msg.lower()
return (
"insufficient credits" in lower
or "requires more credits" in lower
or "add more credits" in lower
or "exceeded your current quota" in lower
or "you've exceeded your budget" in lower
or ("billing" in lower and "hard limit" in lower)
or "can only afford" in lower
)

View File

@@ -23,6 +23,7 @@ from letta.errors import (
LLMAuthenticationError,
LLMBadRequestError,
LLMConnectionError,
LLMInsufficientCreditsError,
LLMNotFoundError,
LLMPermissionDeniedError,
LLMRateLimitError,
@@ -32,6 +33,7 @@ from letta.errors import (
)
from letta.helpers.datetime_helpers import get_utc_time_int
from letta.helpers.json_helpers import json_dumps, json_loads, sanitize_unicode_surrogates
from letta.llm_api.error_utils import is_insufficient_credits_message
from letta.llm_api.llm_client_base import LLMClientBase
from letta.local_llm.json_parser import clean_json_string_extra_backslash
from letta.log import get_logger
@@ -932,6 +934,13 @@ class GoogleVertexClient(LLMClientBase):
code=ErrorCode.TIMEOUT,
details={"cause": str(e.__cause__) if e.__cause__ else None, "is_byok": is_byok},
)
elif e.code == 402 or is_insufficient_credits_message(str(e)):
msg = str(e)
return LLMInsufficientCreditsError(
message=f"Insufficient credits (BYOK): {msg}" if is_byok else f"Insufficient credits: {msg}",
code=ErrorCode.PAYMENT_REQUIRED,
details={"status_code": e.code, "is_byok": is_byok},
)
elif e.code == 422:
return LLMUnprocessableEntityError(
message=f"Invalid request content for {self._provider_name()}: {str(e)}",

View File

@@ -20,6 +20,7 @@ from letta.errors import (
LLMAuthenticationError,
LLMBadRequestError,
LLMConnectionError,
LLMInsufficientCreditsError,
LLMNotFoundError,
LLMPermissionDeniedError,
LLMRateLimitError,
@@ -28,7 +29,7 @@ from letta.errors import (
LLMUnprocessableEntityError,
)
from letta.helpers.json_helpers import sanitize_unicode_surrogates
from letta.llm_api.error_utils import is_context_window_overflow_message
from letta.llm_api.error_utils import is_context_window_overflow_message, is_insufficient_credits_message
from letta.llm_api.helpers import (
add_inner_thoughts_to_functions,
convert_response_format_to_responses_api,
@@ -1110,7 +1111,7 @@ class OpenAIClient(LLMClientBase):
#
# Example message:
# "Your input exceeds the context window of this model. Please adjust your input and try again."
if isinstance(e, openai.APIError):
if isinstance(e, openai.APIError) and not isinstance(e, openai.APIStatusError):
msg = str(e)
if is_context_window_overflow_message(msg):
return ContextWindowExceededError(
@@ -1121,6 +1122,25 @@ class OpenAIClient(LLMClientBase):
"is_byok": is_byok,
},
)
if is_insufficient_credits_message(msg):
return LLMInsufficientCreditsError(
message=f"Insufficient credits (BYOK): {msg}" if is_byok else f"Insufficient credits: {msg}",
code=ErrorCode.PAYMENT_REQUIRED,
details={
"provider_exception_type": type(e).__name__,
"body": getattr(e, "body", None),
"is_byok": is_byok,
},
)
return LLMBadRequestError(
message=f"OpenAI API error: {msg}",
code=ErrorCode.INVALID_ARGUMENT,
details={
"provider_exception_type": type(e).__name__,
"body": getattr(e, "body", None),
"is_byok": is_byok,
},
)
if isinstance(e, openai.AuthenticationError):
logger.error(f"[OpenAI] Authentication error (401): {str(e)}") # More severe log level
@@ -1168,6 +1188,14 @@ class OpenAIClient(LLMClientBase):
message=f"Request too large for OpenAI (413): {str(e)}",
details={"is_byok": is_byok},
)
# Handle 402 Payment Required or credit-related messages
if e.status_code == 402 or is_insufficient_credits_message(str(e)):
msg = str(e)
return LLMInsufficientCreditsError(
message=f"Insufficient credits (BYOK): {msg}" if is_byok else f"Insufficient credits: {msg}",
code=ErrorCode.PAYMENT_REQUIRED,
details={"status_code": e.status_code, "body": e.body, "is_byok": is_byok},
)
# Map based on status code potentially
if e.status_code >= 500:
error_cls = LLMServerError