fix(core): catch bare openai.APIError in handle_llm_error (#9468)

* fix(core): catch bare openai.APIError in handle_llm_error fallthrough

openai.APIError raised during streaming (e.g. OpenRouter credit
exhaustion) is not an APIStatusError, so it skipped the catch-all
at the end and fell through to LLMError("Unhandled"). Now bare
APIErrors that aren't context window overflows are mapped to
LLMBadRequestError.

Datadog: https://us5.datadoghq.com/error-tracking/issue/7a2c356c-0849-11f1-be66-da7ad0900000

🐾 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

* feat(core): add LLMInsufficientCreditsError for BYOK credit exhaustion

Adds dedicated error type for insufficient credits/quota across all
providers (OpenAI, Anthropic, Google). Returns HTTP 402 with
BYOK-aware messaging instead of generic 400.

- New LLMInsufficientCreditsError class and PAYMENT_REQUIRED ErrorCode
- is_insufficient_credits_message() helper detecting credit/quota strings
- All 3 provider clients detect 402 status + credit keywords
- FastAPI handler returns 402 with "your API key" vs generic messaging
- 5 new parametrized tests covering OpenRouter, OpenAI, and negative case

🐾 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

---------

Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
Kian Jones
2026-02-12 15:49:21 -08:00
committed by Caren Thomas
parent cfd2ca3102
commit 80f34f134d
7 changed files with 144 additions and 3 deletions

View File

@@ -20,6 +20,7 @@ from letta.errors import (
LLMAuthenticationError,
LLMBadRequestError,
LLMConnectionError,
LLMInsufficientCreditsError,
LLMNotFoundError,
LLMPermissionDeniedError,
LLMRateLimitError,
@@ -28,7 +29,7 @@ from letta.errors import (
LLMUnprocessableEntityError,
)
from letta.helpers.json_helpers import sanitize_unicode_surrogates
from letta.llm_api.error_utils import is_context_window_overflow_message
from letta.llm_api.error_utils import is_context_window_overflow_message, is_insufficient_credits_message
from letta.llm_api.helpers import (
add_inner_thoughts_to_functions,
convert_response_format_to_responses_api,
@@ -1110,7 +1111,7 @@ class OpenAIClient(LLMClientBase):
#
# Example message:
# "Your input exceeds the context window of this model. Please adjust your input and try again."
if isinstance(e, openai.APIError):
if isinstance(e, openai.APIError) and not isinstance(e, openai.APIStatusError):
msg = str(e)
if is_context_window_overflow_message(msg):
return ContextWindowExceededError(
@@ -1121,6 +1122,25 @@ class OpenAIClient(LLMClientBase):
"is_byok": is_byok,
},
)
if is_insufficient_credits_message(msg):
return LLMInsufficientCreditsError(
message=f"Insufficient credits (BYOK): {msg}" if is_byok else f"Insufficient credits: {msg}",
code=ErrorCode.PAYMENT_REQUIRED,
details={
"provider_exception_type": type(e).__name__,
"body": getattr(e, "body", None),
"is_byok": is_byok,
},
)
return LLMBadRequestError(
message=f"OpenAI API error: {msg}",
code=ErrorCode.INVALID_ARGUMENT,
details={
"provider_exception_type": type(e).__name__,
"body": getattr(e, "body", None),
"is_byok": is_byok,
},
)
if isinstance(e, openai.AuthenticationError):
logger.error(f"[OpenAI] Authentication error (401): {str(e)}") # More severe log level
@@ -1168,6 +1188,14 @@ class OpenAIClient(LLMClientBase):
message=f"Request too large for OpenAI (413): {str(e)}",
details={"is_byok": is_byok},
)
# Handle 402 Payment Required or credit-related messages
if e.status_code == 402 or is_insufficient_credits_message(str(e)):
msg = str(e)
return LLMInsufficientCreditsError(
message=f"Insufficient credits (BYOK): {msg}" if is_byok else f"Insufficient credits: {msg}",
code=ErrorCode.PAYMENT_REQUIRED,
details={"status_code": e.status_code, "body": e.body, "is_byok": is_byok},
)
# Map based on status code potentially
if e.status_code >= 500:
error_cls = LLMServerError