fix: handle Anthropic 413 request_too_large as ContextWindowExceededError (#8424)
The Anthropic API returns a 413 status code with error type `request_too_large` when the request payload exceeds the maximum allowed size. This error should be converted to `ContextWindowExceededError` so the system can handle it appropriately (e.g., by summarizing the conversation to reduce context size). Changes: - Added `request_too_large` and `request exceeds the maximum size` to the early string-based error detection in `handle_llm_error` - Added specific handling for HTTP 413 status code in the `APIStatusError` handler - Added tests to verify the new error handling behavior Fixes: #8422 🤖 Generated with [Letta Code](https://letta.com) Co-authored-by: letta-code <248085862+letta-code@users.noreply.github.com> Co-authored-by: Letta <noreply@letta.com> Co-authored-by: datadog-official[bot] <datadog-official[bot]@users.noreply.github.com> Co-authored-by: Kian Jones <11655409+kianjones9@users.noreply.github.com>
This commit is contained in:
committed by
Caren Thomas
parent
adbc47ddc9
commit
05ec02e384
@@ -728,6 +728,8 @@ class AnthropicClient(LLMClientBase):
|
||||
or "exceeds context" in error_str
|
||||
or "too many total text bytes" in error_str
|
||||
or "total text bytes" in error_str
|
||||
or "request_too_large" in error_str
|
||||
or "request exceeds the maximum size" in error_str
|
||||
):
|
||||
logger.warning(f"[Anthropic] Context window exceeded: {str(e)}")
|
||||
return ContextWindowExceededError(
|
||||
@@ -820,6 +822,12 @@ class AnthropicClient(LLMClientBase):
|
||||
|
||||
if isinstance(e, anthropic.APIStatusError):
|
||||
logger.warning(f"[Anthropic] API status error: {str(e)}")
|
||||
# Handle 413 Request Entity Too Large - request payload exceeds size limits
|
||||
if hasattr(e, "status_code") and e.status_code == 413:
|
||||
logger.warning(f"[Anthropic] Request too large (413): {str(e)}")
|
||||
return ContextWindowExceededError(
|
||||
message=f"Request too large for Anthropic (413): {str(e)}",
|
||||
)
|
||||
if "overloaded" in str(e).lower():
|
||||
return LLMProviderOverloaded(
|
||||
message=f"Anthropic API is overloaded: {str(e)}",
|
||||
|
||||
@@ -3,7 +3,7 @@ import httpx
|
||||
import pytest
|
||||
|
||||
from letta.adapters.letta_llm_stream_adapter import LettaLLMStreamAdapter
|
||||
from letta.errors import LLMServerError
|
||||
from letta.errors import ContextWindowExceededError, LLMServerError
|
||||
from letta.llm_api.anthropic_client import AnthropicClient
|
||||
from letta.schemas.llm_config import LLMConfig
|
||||
|
||||
@@ -48,3 +48,74 @@ async def test_letta_llm_stream_adapter_converts_anthropic_streaming_api_status_
|
||||
with pytest.raises(LLMServerError):
|
||||
async for _ in gen:
|
||||
pass
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_letta_llm_stream_adapter_converts_anthropic_413_request_too_large(monkeypatch):
|
||||
"""Regression: 413 request_too_large errors should be converted to ContextWindowExceededError."""
|
||||
|
||||
request = httpx.Request("POST", "https://api.anthropic.com/v1/messages")
|
||||
response = httpx.Response(status_code=413, request=request)
|
||||
body = {
|
||||
"type": "error",
|
||||
"error": {"type": "request_too_large", "message": "Request exceeds the maximum size"},
|
||||
}
|
||||
|
||||
class FakeAsyncStream:
|
||||
"""Mimics anthropic.AsyncStream enough for AnthropicStreamingInterface (async cm + async iterator)."""
|
||||
|
||||
async def __aenter__(self):
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc, tb):
|
||||
return None
|
||||
|
||||
def __aiter__(self):
|
||||
return self
|
||||
|
||||
async def __anext__(self):
|
||||
raise anthropic.APIStatusError("REQUEST_TOO_LARGE", response=response, body=body)
|
||||
|
||||
async def fake_stream_async(self, request_data: dict, llm_config: LLMConfig):
|
||||
return FakeAsyncStream()
|
||||
|
||||
monkeypatch.setattr(AnthropicClient, "stream_async", fake_stream_async, raising=True)
|
||||
|
||||
llm_client = AnthropicClient()
|
||||
llm_config = LLMConfig(model="claude-sonnet-4-5-20250929", model_endpoint_type="anthropic", context_window=200000)
|
||||
adapter = LettaLLMStreamAdapter(llm_client=llm_client, llm_config=llm_config)
|
||||
|
||||
gen = adapter.invoke_llm(request_data={}, messages=[], tools=[], use_assistant_message=True)
|
||||
with pytest.raises(ContextWindowExceededError):
|
||||
async for _ in gen:
|
||||
pass
|
||||
|
||||
|
||||
def test_anthropic_client_handle_llm_error_413_status_code():
|
||||
"""Test that handle_llm_error correctly converts 413 status code to ContextWindowExceededError."""
|
||||
client = AnthropicClient()
|
||||
|
||||
request = httpx.Request("POST", "https://api.anthropic.com/v1/messages")
|
||||
response = httpx.Response(status_code=413, request=request)
|
||||
body = {
|
||||
"type": "error",
|
||||
"error": {"type": "request_too_large", "message": "Request exceeds the maximum size"},
|
||||
}
|
||||
|
||||
error = anthropic.APIStatusError("REQUEST_TOO_LARGE", response=response, body=body)
|
||||
result = client.handle_llm_error(error)
|
||||
|
||||
assert isinstance(result, ContextWindowExceededError)
|
||||
assert "413" in result.message or "request_too_large" in result.message.lower()
|
||||
|
||||
|
||||
def test_anthropic_client_handle_llm_error_request_too_large_string():
|
||||
"""Test that handle_llm_error correctly converts request_too_large string match to ContextWindowExceededError."""
|
||||
client = AnthropicClient()
|
||||
|
||||
# Test with a generic exception that has the request_too_large string
|
||||
error = Exception("Error code: 413 - {'error': {'type': 'request_too_large', 'message': 'Request exceeds the maximum size'}}")
|
||||
result = client.handle_llm_error(error)
|
||||
|
||||
assert isinstance(result, ContextWindowExceededError)
|
||||
assert "request_too_large" in result.message.lower() or "context window exceeded" in result.message.lower()
|
||||
|
||||
Reference in New Issue
Block a user