From 05ec02e38437173c0af0c3e19e82b309d18ddd46 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 8 Jan 2026 18:11:21 +0000
Subject: [PATCH] fix: handle Anthropic 413 request_too_large as
 ContextWindowExceededError (#8424)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Anthropic API returns a 413 status code with error type `request_too_large`
when the request payload exceeds the maximum allowed size. This error should
be converted to `ContextWindowExceededError` so the system can handle it
appropriately (e.g., by summarizing the conversation to reduce context size).

Changes:
- Added `request_too_large` and `request exceeds the maximum size` to the
  early string-based error detection in `handle_llm_error`
- Added specific handling for HTTP 413 status code in the `APIStatusError`
  handler
- Added tests to verify the new error handling behavior

Fixes: #8422

🤖 Generated with [Letta Code](https://letta.com)

Co-authored-by: letta-code <248085862+letta-code@users.noreply.github.com>
Co-authored-by: Letta <noreply@letta.com>
Co-authored-by: datadog-official[bot] <datadog-official[bot]@users.noreply.github.com>
Co-authored-by: Kian Jones <11655409+kianjones9@users.noreply.github.com>
---
 letta/llm_api/anthropic_client.py             |  8 ++
 ...letta_llm_stream_adapter_error_handling.py | 73 ++++++++++++++++++-
 2 files changed, 80 insertions(+), 1 deletion(-)

diff --git a/letta/llm_api/anthropic_client.py b/letta/llm_api/anthropic_client.py
index 025d20c6..cc7b4f0b 100644
--- a/letta/llm_api/anthropic_client.py
+++ b/letta/llm_api/anthropic_client.py
@@ -728,6 +728,8 @@ class AnthropicClient(LLMClientBase):
             or "exceeds context" in error_str
             or "too many total text bytes" in error_str
             or "total text bytes" in error_str
+            or "request_too_large" in error_str
+            or "request exceeds the maximum size" in error_str
         ):
             logger.warning(f"[Anthropic] Context window exceeded: {str(e)}")
             return ContextWindowExceededError(
@@ -820,6 +822,12 @@ class AnthropicClient(LLMClientBase):
 
         if isinstance(e, anthropic.APIStatusError):
             logger.warning(f"[Anthropic] API status error: {str(e)}")
+            # Handle 413 Request Entity Too Large - request payload exceeds size limits
+            if hasattr(e, "status_code") and e.status_code == 413:
+                logger.warning(f"[Anthropic] Request too large (413): {str(e)}")
+                return ContextWindowExceededError(
+                    message=f"Request too large for Anthropic (413): {str(e)}",
+                )
             if "overloaded" in str(e).lower():
                 return LLMProviderOverloaded(
                     message=f"Anthropic API is overloaded: {str(e)}",
diff --git a/tests/adapters/test_letta_llm_stream_adapter_error_handling.py b/tests/adapters/test_letta_llm_stream_adapter_error_handling.py
index 30cffb2c..a0276757 100644
--- a/tests/adapters/test_letta_llm_stream_adapter_error_handling.py
+++ b/tests/adapters/test_letta_llm_stream_adapter_error_handling.py
@@ -3,7 +3,7 @@ import httpx
 import pytest
 
 from letta.adapters.letta_llm_stream_adapter import LettaLLMStreamAdapter
-from letta.errors import LLMServerError
+from letta.errors import ContextWindowExceededError, LLMServerError
 from letta.llm_api.anthropic_client import AnthropicClient
 from letta.schemas.llm_config import LLMConfig
 
@@ -48,3 +48,74 @@ async def test_letta_llm_stream_adapter_converts_anthropic_streaming_api_status_
     with pytest.raises(LLMServerError):
         async for _ in gen:
             pass
+
+
+@pytest.mark.asyncio
+async def test_letta_llm_stream_adapter_converts_anthropic_413_request_too_large(monkeypatch):
+    """Regression: 413 request_too_large errors should be converted to ContextWindowExceededError."""
+
+    request = httpx.Request("POST", "https://api.anthropic.com/v1/messages")
+    response = httpx.Response(status_code=413, request=request)
+    body = {
+        "type": "error",
+        "error": {"type": "request_too_large", "message": "Request exceeds the maximum size"},
+    }
+
+    class FakeAsyncStream:
+        """Mimics anthropic.AsyncStream enough for AnthropicStreamingInterface (async cm + async iterator)."""
+
+        async def __aenter__(self):
+            return self
+
+        async def __aexit__(self, exc_type, exc, tb):
+            return None
+
+        def __aiter__(self):
+            return self
+
+        async def __anext__(self):
+            raise anthropic.APIStatusError("REQUEST_TOO_LARGE", response=response, body=body)
+
+    async def fake_stream_async(self, request_data: dict, llm_config: LLMConfig):
+        return FakeAsyncStream()
+
+    monkeypatch.setattr(AnthropicClient, "stream_async", fake_stream_async, raising=True)
+
+    llm_client = AnthropicClient()
+    llm_config = LLMConfig(model="claude-sonnet-4-5-20250929", model_endpoint_type="anthropic", context_window=200000)
+    adapter = LettaLLMStreamAdapter(llm_client=llm_client, llm_config=llm_config)
+
+    gen = adapter.invoke_llm(request_data={}, messages=[], tools=[], use_assistant_message=True)
+    with pytest.raises(ContextWindowExceededError):
+        async for _ in gen:
+            pass
+
+
+def test_anthropic_client_handle_llm_error_413_status_code():
+    """Test that handle_llm_error correctly converts 413 status code to ContextWindowExceededError."""
+    client = AnthropicClient()
+
+    request = httpx.Request("POST", "https://api.anthropic.com/v1/messages")
+    response = httpx.Response(status_code=413, request=request)
+    body = {
+        "type": "error",
+        "error": {"type": "request_too_large", "message": "Request exceeds the maximum size"},
+    }
+
+    error = anthropic.APIStatusError("REQUEST_TOO_LARGE", response=response, body=body)
+    result = client.handle_llm_error(error)
+
+    assert isinstance(result, ContextWindowExceededError)
+    assert "413" in result.message or "request_too_large" in result.message.lower()
+
+
+def test_anthropic_client_handle_llm_error_request_too_large_string():
+    """Test that handle_llm_error correctly converts request_too_large string match to ContextWindowExceededError."""
+    client = AnthropicClient()
+
+    # Test with a generic exception that has the request_too_large string
+    error = Exception("Error code: 413 - {'error': {'type': 'request_too_large', 'message': 'Request exceeds the maximum size'}}")
+    result = client.handle_llm_error(error)
+
+    assert isinstance(result, ContextWindowExceededError)
+    assert "request_too_large" in result.message.lower() or "context window exceeded" in result.message.lower()