fix(core): catch bare openai.APIError in handle_llm_error (#9468)
* fix(core): catch bare openai.APIError in handle_llm_error fallthrough
openai.APIError raised during streaming (e.g. OpenRouter credit
exhaustion) is not an APIStatusError, so it skipped the catch-all
at the end and fell through to LLMError("Unhandled"). Now bare
APIErrors that aren't context window overflows are mapped to
LLMBadRequestError.
Datadog: https://us5.datadoghq.com/error-tracking/issue/7a2c356c-0849-11f1-be66-da7ad0900000
🐾 Generated with [Letta Code](https://letta.com)
Co-Authored-By: Letta <noreply@letta.com>
* feat(core): add LLMInsufficientCreditsError for BYOK credit exhaustion
Adds dedicated error type for insufficient credits/quota across all
providers (OpenAI, Anthropic, Google). Returns HTTP 402 with
BYOK-aware messaging instead of generic 400.
- New LLMInsufficientCreditsError class and PAYMENT_REQUIRED ErrorCode
- is_insufficient_credits_message() helper detecting credit/quota strings
- All 3 provider clients detect 402 status + credit keywords
- FastAPI handler returns 402 with "your API key" vs generic messaging
- 5 new parametrized tests covering OpenRouter, OpenAI, and negative case
🐾 Generated with [Letta Code](https://letta.com)
Co-Authored-By: Letta <noreply@letta.com>
---------
Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
@@ -19,6 +19,7 @@ from letta.errors import (
|
||||
LLMAuthenticationError,
|
||||
LLMBadRequestError,
|
||||
LLMConnectionError,
|
||||
LLMInsufficientCreditsError,
|
||||
LLMNotFoundError,
|
||||
LLMPermissionDeniedError,
|
||||
LLMProviderOverloaded,
|
||||
@@ -31,6 +32,7 @@ from letta.helpers.datetime_helpers import get_utc_time_int
|
||||
from letta.helpers.decorators import deprecated
|
||||
from letta.helpers.json_helpers import sanitize_unicode_surrogates
|
||||
from letta.llm_api.anthropic_constants import ANTHROPIC_MAX_STRICT_TOOLS, ANTHROPIC_STRICT_MODE_ALLOWLIST
|
||||
from letta.llm_api.error_utils import is_insufficient_credits_message
|
||||
from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs
|
||||
from letta.llm_api.llm_client_base import LLMClientBase
|
||||
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
|
||||
@@ -1088,6 +1090,13 @@ class AnthropicClient(LLMClientBase):
|
||||
|
||||
if isinstance(e, anthropic.APIStatusError):
|
||||
logger.warning(f"[Anthropic] API status error: {str(e)}")
|
||||
if hasattr(e, "status_code") and e.status_code == 402 or is_insufficient_credits_message(str(e)):
|
||||
msg = str(e)
|
||||
return LLMInsufficientCreditsError(
|
||||
message=f"Insufficient credits (BYOK): {msg}" if is_byok else f"Insufficient credits: {msg}",
|
||||
code=ErrorCode.PAYMENT_REQUIRED,
|
||||
details={"status_code": getattr(e, "status_code", None), "is_byok": is_byok},
|
||||
)
|
||||
if hasattr(e, "status_code") and e.status_code == 413:
|
||||
logger.warning(f"[Anthropic] Request too large (413): {str(e)}")
|
||||
return ContextWindowExceededError(
|
||||
|
||||
@@ -20,3 +20,21 @@ def is_context_window_overflow_message(msg: str) -> bool:
|
||||
or "context_length_exceeded" in msg
|
||||
or "Input tokens exceed the configured limit" in msg
|
||||
)
|
||||
|
||||
|
||||
def is_insufficient_credits_message(msg: str) -> bool:
|
||||
"""Best-effort detection for insufficient credits/quota/billing errors.
|
||||
|
||||
BYOK users on OpenRouter, OpenAI, etc. may exhaust their credits mid-stream
|
||||
or get rejected pre-flight. We detect these so they map to 402 instead of 400/500.
|
||||
"""
|
||||
lower = msg.lower()
|
||||
return (
|
||||
"insufficient credits" in lower
|
||||
or "requires more credits" in lower
|
||||
or "add more credits" in lower
|
||||
or "exceeded your current quota" in lower
|
||||
or "you've exceeded your budget" in lower
|
||||
or ("billing" in lower and "hard limit" in lower)
|
||||
or "can only afford" in lower
|
||||
)
|
||||
|
||||
@@ -23,6 +23,7 @@ from letta.errors import (
|
||||
LLMAuthenticationError,
|
||||
LLMBadRequestError,
|
||||
LLMConnectionError,
|
||||
LLMInsufficientCreditsError,
|
||||
LLMNotFoundError,
|
||||
LLMPermissionDeniedError,
|
||||
LLMRateLimitError,
|
||||
@@ -32,6 +33,7 @@ from letta.errors import (
|
||||
)
|
||||
from letta.helpers.datetime_helpers import get_utc_time_int
|
||||
from letta.helpers.json_helpers import json_dumps, json_loads, sanitize_unicode_surrogates
|
||||
from letta.llm_api.error_utils import is_insufficient_credits_message
|
||||
from letta.llm_api.llm_client_base import LLMClientBase
|
||||
from letta.local_llm.json_parser import clean_json_string_extra_backslash
|
||||
from letta.log import get_logger
|
||||
@@ -932,6 +934,13 @@ class GoogleVertexClient(LLMClientBase):
|
||||
code=ErrorCode.TIMEOUT,
|
||||
details={"cause": str(e.__cause__) if e.__cause__ else None, "is_byok": is_byok},
|
||||
)
|
||||
elif e.code == 402 or is_insufficient_credits_message(str(e)):
|
||||
msg = str(e)
|
||||
return LLMInsufficientCreditsError(
|
||||
message=f"Insufficient credits (BYOK): {msg}" if is_byok else f"Insufficient credits: {msg}",
|
||||
code=ErrorCode.PAYMENT_REQUIRED,
|
||||
details={"status_code": e.code, "is_byok": is_byok},
|
||||
)
|
||||
elif e.code == 422:
|
||||
return LLMUnprocessableEntityError(
|
||||
message=f"Invalid request content for {self._provider_name()}: {str(e)}",
|
||||
|
||||
@@ -20,6 +20,7 @@ from letta.errors import (
|
||||
LLMAuthenticationError,
|
||||
LLMBadRequestError,
|
||||
LLMConnectionError,
|
||||
LLMInsufficientCreditsError,
|
||||
LLMNotFoundError,
|
||||
LLMPermissionDeniedError,
|
||||
LLMRateLimitError,
|
||||
@@ -28,7 +29,7 @@ from letta.errors import (
|
||||
LLMUnprocessableEntityError,
|
||||
)
|
||||
from letta.helpers.json_helpers import sanitize_unicode_surrogates
|
||||
from letta.llm_api.error_utils import is_context_window_overflow_message
|
||||
from letta.llm_api.error_utils import is_context_window_overflow_message, is_insufficient_credits_message
|
||||
from letta.llm_api.helpers import (
|
||||
add_inner_thoughts_to_functions,
|
||||
convert_response_format_to_responses_api,
|
||||
@@ -1110,7 +1111,7 @@ class OpenAIClient(LLMClientBase):
|
||||
#
|
||||
# Example message:
|
||||
# "Your input exceeds the context window of this model. Please adjust your input and try again."
|
||||
if isinstance(e, openai.APIError):
|
||||
if isinstance(e, openai.APIError) and not isinstance(e, openai.APIStatusError):
|
||||
msg = str(e)
|
||||
if is_context_window_overflow_message(msg):
|
||||
return ContextWindowExceededError(
|
||||
@@ -1121,6 +1122,25 @@ class OpenAIClient(LLMClientBase):
|
||||
"is_byok": is_byok,
|
||||
},
|
||||
)
|
||||
if is_insufficient_credits_message(msg):
|
||||
return LLMInsufficientCreditsError(
|
||||
message=f"Insufficient credits (BYOK): {msg}" if is_byok else f"Insufficient credits: {msg}",
|
||||
code=ErrorCode.PAYMENT_REQUIRED,
|
||||
details={
|
||||
"provider_exception_type": type(e).__name__,
|
||||
"body": getattr(e, "body", None),
|
||||
"is_byok": is_byok,
|
||||
},
|
||||
)
|
||||
return LLMBadRequestError(
|
||||
message=f"OpenAI API error: {msg}",
|
||||
code=ErrorCode.INVALID_ARGUMENT,
|
||||
details={
|
||||
"provider_exception_type": type(e).__name__,
|
||||
"body": getattr(e, "body", None),
|
||||
"is_byok": is_byok,
|
||||
},
|
||||
)
|
||||
|
||||
if isinstance(e, openai.AuthenticationError):
|
||||
logger.error(f"[OpenAI] Authentication error (401): {str(e)}") # More severe log level
|
||||
@@ -1168,6 +1188,14 @@ class OpenAIClient(LLMClientBase):
|
||||
message=f"Request too large for OpenAI (413): {str(e)}",
|
||||
details={"is_byok": is_byok},
|
||||
)
|
||||
# Handle 402 Payment Required or credit-related messages
|
||||
if e.status_code == 402 or is_insufficient_credits_message(str(e)):
|
||||
msg = str(e)
|
||||
return LLMInsufficientCreditsError(
|
||||
message=f"Insufficient credits (BYOK): {msg}" if is_byok else f"Insufficient credits: {msg}",
|
||||
code=ErrorCode.PAYMENT_REQUIRED,
|
||||
details={"status_code": e.status_code, "body": e.body, "is_byok": is_byok},
|
||||
)
|
||||
# Map based on status code potentially
|
||||
if e.status_code >= 500:
|
||||
error_cls = LLMServerError
|
||||
|
||||
Reference in New Issue
Block a user