diff --git a/letta/errors.py b/letta/errors.py index e21413a2..ccf98125 100644 --- a/letta/errors.py +++ b/letta/errors.py @@ -20,6 +20,7 @@ class ErrorCode(Enum): TIMEOUT = "TIMEOUT" CONFLICT = "CONFLICT" EXPIRED = "EXPIRED" + PAYMENT_REQUIRED = "PAYMENT_REQUIRED" class LettaError(Exception): @@ -256,6 +257,10 @@ class LLMBadRequestError(LLMError): """Error when LLM service cannot process request""" +class LLMInsufficientCreditsError(LLMError): + """Error when LLM provider reports insufficient credits or quota""" + + class LLMAuthenticationError(LLMError): """Error when authentication fails with LLM service""" diff --git a/letta/llm_api/anthropic_client.py b/letta/llm_api/anthropic_client.py index c84004a6..c6516663 100644 --- a/letta/llm_api/anthropic_client.py +++ b/letta/llm_api/anthropic_client.py @@ -19,6 +19,7 @@ from letta.errors import ( LLMAuthenticationError, LLMBadRequestError, LLMConnectionError, + LLMInsufficientCreditsError, LLMNotFoundError, LLMPermissionDeniedError, LLMProviderOverloaded, @@ -31,6 +32,7 @@ from letta.helpers.datetime_helpers import get_utc_time_int from letta.helpers.decorators import deprecated from letta.helpers.json_helpers import sanitize_unicode_surrogates from letta.llm_api.anthropic_constants import ANTHROPIC_MAX_STRICT_TOOLS, ANTHROPIC_STRICT_MODE_ALLOWLIST +from letta.llm_api.error_utils import is_insufficient_credits_message from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs from letta.llm_api.llm_client_base import LLMClientBase from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION @@ -1088,6 +1090,13 @@ class AnthropicClient(LLMClientBase): if isinstance(e, anthropic.APIStatusError): logger.warning(f"[Anthropic] API status error: {str(e)}") + if hasattr(e, "status_code") and e.status_code == 402 or is_insufficient_credits_message(str(e)): + msg = str(e) + return LLMInsufficientCreditsError( + message=f"Insufficient credits (BYOK): {msg}" if is_byok else f"Insufficient credits: {msg}", + code=ErrorCode.PAYMENT_REQUIRED, + details={"status_code": getattr(e, "status_code", None), "is_byok": is_byok}, + ) if hasattr(e, "status_code") and e.status_code == 413: logger.warning(f"[Anthropic] Request too large (413): {str(e)}") return ContextWindowExceededError( diff --git a/letta/llm_api/error_utils.py b/letta/llm_api/error_utils.py index b1d6e356..ad28d036 100644 --- a/letta/llm_api/error_utils.py +++ b/letta/llm_api/error_utils.py @@ -20,3 +20,21 @@ def is_context_window_overflow_message(msg: str) -> bool: or "context_length_exceeded" in msg or "Input tokens exceed the configured limit" in msg ) + + +def is_insufficient_credits_message(msg: str) -> bool: + """Best-effort detection for insufficient credits/quota/billing errors. + + BYOK users on OpenRouter, OpenAI, etc. may exhaust their credits mid-stream + or get rejected pre-flight. We detect these so they map to 402 instead of 400/500. + """ + lower = msg.lower() + return ( + "insufficient credits" in lower + or "requires more credits" in lower + or "add more credits" in lower + or "exceeded your current quota" in lower + or "you've exceeded your budget" in lower + or ("billing" in lower and "hard limit" in lower) + or "can only afford" in lower + ) diff --git a/letta/llm_api/google_vertex_client.py b/letta/llm_api/google_vertex_client.py index 4eaa6bb2..5dac8cf4 100644 --- a/letta/llm_api/google_vertex_client.py +++ b/letta/llm_api/google_vertex_client.py @@ -23,6 +23,7 @@ from letta.errors import ( LLMAuthenticationError, LLMBadRequestError, LLMConnectionError, + LLMInsufficientCreditsError, LLMNotFoundError, LLMPermissionDeniedError, LLMRateLimitError, @@ -32,6 +33,7 @@ from letta.errors import ( ) from letta.helpers.datetime_helpers import get_utc_time_int from letta.helpers.json_helpers import json_dumps, json_loads, sanitize_unicode_surrogates +from letta.llm_api.error_utils import is_insufficient_credits_message from letta.llm_api.llm_client_base import LLMClientBase from letta.local_llm.json_parser import clean_json_string_extra_backslash from letta.log import get_logger @@ -932,6 +934,13 @@ class GoogleVertexClient(LLMClientBase): code=ErrorCode.TIMEOUT, details={"cause": str(e.__cause__) if e.__cause__ else None, "is_byok": is_byok}, ) + elif e.code == 402 or is_insufficient_credits_message(str(e)): + msg = str(e) + return LLMInsufficientCreditsError( + message=f"Insufficient credits (BYOK): {msg}" if is_byok else f"Insufficient credits: {msg}", + code=ErrorCode.PAYMENT_REQUIRED, + details={"status_code": e.code, "is_byok": is_byok}, + ) elif e.code == 422: return LLMUnprocessableEntityError( message=f"Invalid request content for {self._provider_name()}: {str(e)}", diff --git a/letta/llm_api/openai_client.py b/letta/llm_api/openai_client.py index 674088ba..e4b78736 100644 --- a/letta/llm_api/openai_client.py +++ b/letta/llm_api/openai_client.py @@ -20,6 +20,7 @@ from letta.errors import ( LLMAuthenticationError, LLMBadRequestError, LLMConnectionError, + LLMInsufficientCreditsError, LLMNotFoundError, LLMPermissionDeniedError, LLMRateLimitError, @@ -28,7 +29,7 @@ from letta.errors import ( LLMUnprocessableEntityError, ) from letta.helpers.json_helpers import sanitize_unicode_surrogates -from letta.llm_api.error_utils import is_context_window_overflow_message +from letta.llm_api.error_utils import is_context_window_overflow_message, is_insufficient_credits_message from letta.llm_api.helpers import ( add_inner_thoughts_to_functions, convert_response_format_to_responses_api, @@ -1110,7 +1111,7 @@ class OpenAIClient(LLMClientBase): # # Example message: # "Your input exceeds the context window of this model. Please adjust your input and try again." - if isinstance(e, openai.APIError): + if isinstance(e, openai.APIError) and not isinstance(e, openai.APIStatusError): msg = str(e) if is_context_window_overflow_message(msg): return ContextWindowExceededError( @@ -1121,6 +1122,25 @@ class OpenAIClient(LLMClientBase): "is_byok": is_byok, }, ) + if is_insufficient_credits_message(msg): + return LLMInsufficientCreditsError( + message=f"Insufficient credits (BYOK): {msg}" if is_byok else f"Insufficient credits: {msg}", + code=ErrorCode.PAYMENT_REQUIRED, + details={ + "provider_exception_type": type(e).__name__, + "body": getattr(e, "body", None), + "is_byok": is_byok, + }, + ) + return LLMBadRequestError( + message=f"OpenAI API error: {msg}", + code=ErrorCode.INVALID_ARGUMENT, + details={ + "provider_exception_type": type(e).__name__, + "body": getattr(e, "body", None), + "is_byok": is_byok, + }, + ) if isinstance(e, openai.AuthenticationError): logger.error(f"[OpenAI] Authentication error (401): {str(e)}") # More severe log level @@ -1168,6 +1188,14 @@ class OpenAIClient(LLMClientBase): message=f"Request too large for OpenAI (413): {str(e)}", details={"is_byok": is_byok}, ) + # Handle 402 Payment Required or credit-related messages + if e.status_code == 402 or is_insufficient_credits_message(str(e)): + msg = str(e) + return LLMInsufficientCreditsError( + message=f"Insufficient credits (BYOK): {msg}" if is_byok else f"Insufficient credits: {msg}", + code=ErrorCode.PAYMENT_REQUIRED, + details={"status_code": e.status_code, "body": e.body, "is_byok": is_byok}, + ) # Map based on status code potentially if e.status_code >= 500: error_cls = LLMServerError diff --git a/letta/server/rest_api/app.py b/letta/server/rest_api/app.py index d59f6b14..c464590b 100644 --- a/letta/server/rest_api/app.py +++ b/letta/server/rest_api/app.py @@ -54,6 +54,7 @@ from letta.errors import ( LLMAuthenticationError, LLMBadRequestError, LLMError, + LLMInsufficientCreditsError, LLMProviderOverloaded, LLMRateLimitError, LLMTimeoutError, @@ -705,6 +706,24 @@ def create_application() -> "FastAPI": }, ) + @app.exception_handler(LLMInsufficientCreditsError) + async def llm_insufficient_credits_handler(request: Request, exc: LLMInsufficientCreditsError): + is_byok = exc.details.get("is_byok") if isinstance(exc.details, dict) else None + if is_byok: + message = "Insufficient credits on your API key. Please add credits with your LLM provider." + else: + message = "Insufficient credits for LLM request. Please check your account." + return JSONResponse( + status_code=402, + content={ + "error": { + "type": "llm_insufficient_credits", + "message": message, + "detail": str(exc), + } + }, + ) + @app.exception_handler(LLMAuthenticationError) async def llm_auth_error_handler(request: Request, exc: LLMAuthenticationError): return JSONResponse( diff --git a/tests/adapters/test_letta_llm_stream_adapter_error_handling.py b/tests/adapters/test_letta_llm_stream_adapter_error_handling.py index 4d3842fc..fcdf562d 100644 --- a/tests/adapters/test_letta_llm_stream_adapter_error_handling.py +++ b/tests/adapters/test_letta_llm_stream_adapter_error_handling.py @@ -1,10 +1,18 @@ import anthropic import httpx +import openai import pytest from google.genai import errors as google_errors from letta.adapters.letta_llm_stream_adapter import LettaLLMStreamAdapter -from letta.errors import ContextWindowExceededError, LLMBadRequestError, LLMConnectionError, LLMError, LLMServerError +from letta.errors import ( + ContextWindowExceededError, + LLMBadRequestError, + LLMConnectionError, + LLMError, + LLMInsufficientCreditsError, + LLMServerError, +) from letta.llm_api.anthropic_client import AnthropicClient from letta.llm_api.google_vertex_client import GoogleVertexClient from letta.schemas.enums import LLMCallType @@ -235,3 +243,48 @@ def test_google_client_handle_llm_error_generic_400_returns_bad_request(): result = client.handle_llm_error(error) assert isinstance(result, LLMBadRequestError) assert not isinstance(result, ContextWindowExceededError) + + +@pytest.mark.parametrize( + "error_message", + [ + "Insufficient credits. Add more using https://openrouter.ai/settings/credits", + "This request requires more credits, or fewer max_tokens. You requested up to 65536 tokens, but can only afford 2679.", + "You exceeded your current quota, please check your plan and billing details.", + ], + ids=["openrouter-402", "openrouter-streaming-afford", "openai-quota-exceeded"], +) +def test_openai_client_handle_llm_error_insufficient_credits(error_message): + """Credit/quota errors should map to LLMInsufficientCreditsError.""" + from letta.llm_api.openai_client import OpenAIClient + + client = OpenAIClient() + request = httpx.Request("POST", "https://api.openai.com/v1/chat/completions") + error = openai.APIError(message=error_message, request=request, body=None) + result = client.handle_llm_error(error) + assert isinstance(result, LLMInsufficientCreditsError) + + +def test_openai_client_handle_llm_error_402_status_code(): + """402 APIStatusError should map to LLMInsufficientCreditsError.""" + from letta.llm_api.openai_client import OpenAIClient + + client = OpenAIClient() + request = httpx.Request("POST", "https://openrouter.ai/api/v1/chat/completions") + response = httpx.Response(status_code=402, request=request) + body = {"error": {"message": "Insufficient credits", "code": 402}} + error = openai.APIStatusError("Insufficient credits", response=response, body=body) + result = client.handle_llm_error(error) + assert isinstance(result, LLMInsufficientCreditsError) + + +def test_openai_client_handle_llm_error_non_credit_api_error(): + """Non-credit bare APIError should map to LLMBadRequestError, not LLMInsufficientCreditsError.""" + from letta.llm_api.openai_client import OpenAIClient + + client = OpenAIClient() + request = httpx.Request("POST", "https://api.openai.com/v1/chat/completions") + error = openai.APIError(message="Some other API error occurred", request=request, body=None) + result = client.handle_llm_error(error) + assert isinstance(result, LLMBadRequestError) + assert not isinstance(result, LLMInsufficientCreditsError)