diff --git a/letta/errors.py b/letta/errors.py
index e21413a2..ccf98125 100644
--- a/letta/errors.py
+++ b/letta/errors.py
@@ -20,6 +20,7 @@ class ErrorCode(Enum):
     TIMEOUT = "TIMEOUT"
     CONFLICT = "CONFLICT"
     EXPIRED = "EXPIRED"
+    PAYMENT_REQUIRED = "PAYMENT_REQUIRED"
 
 
 class LettaError(Exception):
@@ -256,6 +257,10 @@ class LLMBadRequestError(LLMError):
     """Error when LLM service cannot process request"""
 
 
+class LLMInsufficientCreditsError(LLMError):
+    """Error when LLM provider reports insufficient credits or quota"""
+
+
 class LLMAuthenticationError(LLMError):
     """Error when authentication fails with LLM service"""
 
diff --git a/letta/llm_api/anthropic_client.py b/letta/llm_api/anthropic_client.py
index c84004a6..c6516663 100644
--- a/letta/llm_api/anthropic_client.py
+++ b/letta/llm_api/anthropic_client.py
@@ -19,6 +19,7 @@ from letta.errors import (
     LLMAuthenticationError,
     LLMBadRequestError,
     LLMConnectionError,
+    LLMInsufficientCreditsError,
     LLMNotFoundError,
     LLMPermissionDeniedError,
     LLMProviderOverloaded,
@@ -31,6 +32,7 @@ from letta.helpers.datetime_helpers import get_utc_time_int
 from letta.helpers.decorators import deprecated
 from letta.helpers.json_helpers import sanitize_unicode_surrogates
 from letta.llm_api.anthropic_constants import ANTHROPIC_MAX_STRICT_TOOLS, ANTHROPIC_STRICT_MODE_ALLOWLIST
+from letta.llm_api.error_utils import is_insufficient_credits_message
 from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs
 from letta.llm_api.llm_client_base import LLMClientBase
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
@@ -1088,6 +1090,13 @@ class AnthropicClient(LLMClientBase):
 
         if isinstance(e, anthropic.APIStatusError):
             logger.warning(f"[Anthropic] API status error: {str(e)}")
+            if hasattr(e, "status_code") and e.status_code == 402 or is_insufficient_credits_message(str(e)):
+                msg = str(e)
+                return LLMInsufficientCreditsError(
+                    message=f"Insufficient credits (BYOK): {msg}" if is_byok else f"Insufficient credits: {msg}",
+                    code=ErrorCode.PAYMENT_REQUIRED,
+                    details={"status_code": getattr(e, "status_code", None), "is_byok": is_byok},
+                )
             if hasattr(e, "status_code") and e.status_code == 413:
                 logger.warning(f"[Anthropic] Request too large (413): {str(e)}")
                 return ContextWindowExceededError(
diff --git a/letta/llm_api/error_utils.py b/letta/llm_api/error_utils.py
index b1d6e356..ad28d036 100644
--- a/letta/llm_api/error_utils.py
+++ b/letta/llm_api/error_utils.py
@@ -20,3 +20,21 @@ def is_context_window_overflow_message(msg: str) -> bool:
         or "context_length_exceeded" in msg
         or "Input tokens exceed the configured limit" in msg
     )
+
+
+def is_insufficient_credits_message(msg: str) -> bool:
+    """Best-effort detection for insufficient credits/quota/billing errors.
+
+    BYOK users on OpenRouter, OpenAI, etc. may exhaust their credits mid-stream
+    or get rejected pre-flight. We detect these so they map to 402 instead of 400/500.
+    """
+    lower = msg.lower()
+    return (
+        "insufficient credits" in lower
+        or "requires more credits" in lower
+        or "add more credits" in lower
+        or "exceeded your current quota" in lower
+        or "you've exceeded your budget" in lower
+        or ("billing" in lower and "hard limit" in lower)
+        or "can only afford" in lower
+    )
diff --git a/letta/llm_api/google_vertex_client.py b/letta/llm_api/google_vertex_client.py
index 4eaa6bb2..5dac8cf4 100644
--- a/letta/llm_api/google_vertex_client.py
+++ b/letta/llm_api/google_vertex_client.py
@@ -23,6 +23,7 @@ from letta.errors import (
     LLMAuthenticationError,
     LLMBadRequestError,
     LLMConnectionError,
+    LLMInsufficientCreditsError,
     LLMNotFoundError,
     LLMPermissionDeniedError,
     LLMRateLimitError,
@@ -32,6 +33,7 @@ from letta.errors import (
 )
 from letta.helpers.datetime_helpers import get_utc_time_int
 from letta.helpers.json_helpers import json_dumps, json_loads, sanitize_unicode_surrogates
+from letta.llm_api.error_utils import is_insufficient_credits_message
 from letta.llm_api.llm_client_base import LLMClientBase
 from letta.local_llm.json_parser import clean_json_string_extra_backslash
 from letta.log import get_logger
@@ -932,6 +934,13 @@ class GoogleVertexClient(LLMClientBase):
                     code=ErrorCode.TIMEOUT,
                     details={"cause": str(e.__cause__) if e.__cause__ else None, "is_byok": is_byok},
                 )
+            elif e.code == 402 or is_insufficient_credits_message(str(e)):
+                msg = str(e)
+                return LLMInsufficientCreditsError(
+                    message=f"Insufficient credits (BYOK): {msg}" if is_byok else f"Insufficient credits: {msg}",
+                    code=ErrorCode.PAYMENT_REQUIRED,
+                    details={"status_code": e.code, "is_byok": is_byok},
+                )
             elif e.code == 422:
                 return LLMUnprocessableEntityError(
                     message=f"Invalid request content for {self._provider_name()}: {str(e)}",
diff --git a/letta/llm_api/openai_client.py b/letta/llm_api/openai_client.py
index 674088ba..e4b78736 100644
--- a/letta/llm_api/openai_client.py
+++ b/letta/llm_api/openai_client.py
@@ -20,6 +20,7 @@ from letta.errors import (
     LLMAuthenticationError,
     LLMBadRequestError,
     LLMConnectionError,
+    LLMInsufficientCreditsError,
     LLMNotFoundError,
     LLMPermissionDeniedError,
     LLMRateLimitError,
@@ -28,7 +29,7 @@ from letta.errors import (
     LLMUnprocessableEntityError,
 )
 from letta.helpers.json_helpers import sanitize_unicode_surrogates
-from letta.llm_api.error_utils import is_context_window_overflow_message
+from letta.llm_api.error_utils import is_context_window_overflow_message, is_insufficient_credits_message
 from letta.llm_api.helpers import (
     add_inner_thoughts_to_functions,
     convert_response_format_to_responses_api,
@@ -1110,7 +1111,7 @@ class OpenAIClient(LLMClientBase):
         #
         # Example message:
         #   "Your input exceeds the context window of this model. Please adjust your input and try again."
-        if isinstance(e, openai.APIError):
+        if isinstance(e, openai.APIError) and not isinstance(e, openai.APIStatusError):
             msg = str(e)
             if is_context_window_overflow_message(msg):
                 return ContextWindowExceededError(
@@ -1121,6 +1122,25 @@ class OpenAIClient(LLMClientBase):
                         "is_byok": is_byok,
                     },
                 )
+            if is_insufficient_credits_message(msg):
+                return LLMInsufficientCreditsError(
+                    message=f"Insufficient credits (BYOK): {msg}" if is_byok else f"Insufficient credits: {msg}",
+                    code=ErrorCode.PAYMENT_REQUIRED,
+                    details={
+                        "provider_exception_type": type(e).__name__,
+                        "body": getattr(e, "body", None),
+                        "is_byok": is_byok,
+                    },
+                )
+            return LLMBadRequestError(
+                message=f"OpenAI API error: {msg}",
+                code=ErrorCode.INVALID_ARGUMENT,
+                details={
+                    "provider_exception_type": type(e).__name__,
+                    "body": getattr(e, "body", None),
+                    "is_byok": is_byok,
+                },
+            )
 
         if isinstance(e, openai.AuthenticationError):
             logger.error(f"[OpenAI] Authentication error (401): {str(e)}")  # More severe log level
@@ -1168,6 +1188,14 @@ class OpenAIClient(LLMClientBase):
                     message=f"Request too large for OpenAI (413): {str(e)}",
                     details={"is_byok": is_byok},
                 )
+            # Handle 402 Payment Required or credit-related messages
+            if e.status_code == 402 or is_insufficient_credits_message(str(e)):
+                msg = str(e)
+                return LLMInsufficientCreditsError(
+                    message=f"Insufficient credits (BYOK): {msg}" if is_byok else f"Insufficient credits: {msg}",
+                    code=ErrorCode.PAYMENT_REQUIRED,
+                    details={"status_code": e.status_code, "body": e.body, "is_byok": is_byok},
+                )
             # Map based on status code potentially
             if e.status_code >= 500:
                 error_cls = LLMServerError
diff --git a/letta/server/rest_api/app.py b/letta/server/rest_api/app.py
index d59f6b14..c464590b 100644
--- a/letta/server/rest_api/app.py
+++ b/letta/server/rest_api/app.py
@@ -54,6 +54,7 @@ from letta.errors import (
     LLMAuthenticationError,
     LLMBadRequestError,
     LLMError,
+    LLMInsufficientCreditsError,
     LLMProviderOverloaded,
     LLMRateLimitError,
     LLMTimeoutError,
@@ -705,6 +706,24 @@ def create_application() -> "FastAPI":
             },
         )
 
+    @app.exception_handler(LLMInsufficientCreditsError)
+    async def llm_insufficient_credits_handler(request: Request, exc: LLMInsufficientCreditsError):
+        is_byok = exc.details.get("is_byok") if isinstance(exc.details, dict) else None
+        if is_byok:
+            message = "Insufficient credits on your API key. Please add credits with your LLM provider."
+        else:
+            message = "Insufficient credits for LLM request. Please check your account."
+        return JSONResponse(
+            status_code=402,
+            content={
+                "error": {
+                    "type": "llm_insufficient_credits",
+                    "message": message,
+                    "detail": str(exc),
+                }
+            },
+        )
+
     @app.exception_handler(LLMAuthenticationError)
     async def llm_auth_error_handler(request: Request, exc: LLMAuthenticationError):
         return JSONResponse(
diff --git a/tests/adapters/test_letta_llm_stream_adapter_error_handling.py b/tests/adapters/test_letta_llm_stream_adapter_error_handling.py
index 4d3842fc..fcdf562d 100644
--- a/tests/adapters/test_letta_llm_stream_adapter_error_handling.py
+++ b/tests/adapters/test_letta_llm_stream_adapter_error_handling.py
@@ -1,10 +1,18 @@
 import anthropic
 import httpx
+import openai
 import pytest
 from google.genai import errors as google_errors
 
 from letta.adapters.letta_llm_stream_adapter import LettaLLMStreamAdapter
-from letta.errors import ContextWindowExceededError, LLMBadRequestError, LLMConnectionError, LLMError, LLMServerError
+from letta.errors import (
+    ContextWindowExceededError,
+    LLMBadRequestError,
+    LLMConnectionError,
+    LLMError,
+    LLMInsufficientCreditsError,
+    LLMServerError,
+)
 from letta.llm_api.anthropic_client import AnthropicClient
 from letta.llm_api.google_vertex_client import GoogleVertexClient
 from letta.schemas.enums import LLMCallType
@@ -235,3 +243,48 @@ def test_google_client_handle_llm_error_generic_400_returns_bad_request():
     result = client.handle_llm_error(error)
     assert isinstance(result, LLMBadRequestError)
     assert not isinstance(result, ContextWindowExceededError)
+
+
+@pytest.mark.parametrize(
+    "error_message",
+    [
+        "Insufficient credits. Add more using https://openrouter.ai/settings/credits",
+        "This request requires more credits, or fewer max_tokens. You requested up to 65536 tokens, but can only afford 2679.",
+        "You exceeded your current quota, please check your plan and billing details.",
+    ],
+    ids=["openrouter-402", "openrouter-streaming-afford", "openai-quota-exceeded"],
+)
+def test_openai_client_handle_llm_error_insufficient_credits(error_message):
+    """Credit/quota errors should map to LLMInsufficientCreditsError."""
+    from letta.llm_api.openai_client import OpenAIClient
+
+    client = OpenAIClient()
+    request = httpx.Request("POST", "https://api.openai.com/v1/chat/completions")
+    error = openai.APIError(message=error_message, request=request, body=None)
+    result = client.handle_llm_error(error)
+    assert isinstance(result, LLMInsufficientCreditsError)
+
+
+def test_openai_client_handle_llm_error_402_status_code():
+    """402 APIStatusError should map to LLMInsufficientCreditsError."""
+    from letta.llm_api.openai_client import OpenAIClient
+
+    client = OpenAIClient()
+    request = httpx.Request("POST", "https://openrouter.ai/api/v1/chat/completions")
+    response = httpx.Response(status_code=402, request=request)
+    body = {"error": {"message": "Insufficient credits", "code": 402}}
+    error = openai.APIStatusError("Insufficient credits", response=response, body=body)
+    result = client.handle_llm_error(error)
+    assert isinstance(result, LLMInsufficientCreditsError)
+
+
+def test_openai_client_handle_llm_error_non_credit_api_error():
+    """Non-credit bare APIError should map to LLMBadRequestError, not LLMInsufficientCreditsError."""
+    from letta.llm_api.openai_client import OpenAIClient
+
+    client = OpenAIClient()
+    request = httpx.Request("POST", "https://api.openai.com/v1/chat/completions")
+    error = openai.APIError(message="Some other API error occurred", request=request, body=None)
+    result = client.handle_llm_error(error)
+    assert isinstance(result, LLMBadRequestError)
+    assert not isinstance(result, LLMInsufficientCreditsError)