From b42e9fa7f3389d1c90eef4f15037a14cea5cfb32 Mon Sep 17 00:00:00 2001
From: Charles Packer <packercharles@gmail.com>
Date: Mon, 3 Nov 2025 16:10:01 -0800
Subject: [PATCH] fix(core): properly map bytes overflow to context overflow
 error (#5930)

---
 letta/llm_api/anthropic_client.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/letta/llm_api/anthropic_client.py b/letta/llm_api/anthropic_client.py
index b9808e3b..7990f778 100644
--- a/letta/llm_api/anthropic_client.py
+++ b/letta/llm_api/anthropic_client.py
@@ -478,7 +478,13 @@ class AnthropicClient(LLMClientBase):
     def handle_llm_error(self, e: Exception) -> Exception:
         # make sure to check for overflow errors, regardless of error type
         error_str = str(e).lower()
-        if "prompt is too long" in error_str or "exceed context limit" in error_str or "exceeds context" in error_str:
+        if (
+            "prompt is too long" in error_str
+            or "exceed context limit" in error_str
+            or "exceeds context" in error_str
+            or "too many total text bytes" in error_str
+            or "total text bytes" in error_str
+        ):
             logger.warning(f"[Anthropic] Context window exceeded: {str(e)}")
             return ContextWindowExceededError(
                 message=f"Context window exceeded for Anthropic: {str(e)}",
@@ -510,7 +516,13 @@ class AnthropicClient(LLMClientBase):
         if isinstance(e, anthropic.BadRequestError):
             logger.warning(f"[Anthropic] Bad request: {str(e)}")
             error_str = str(e).lower()
-            if "prompt is too long" in error_str or "exceed context limit" in error_str:
+            if (
+                "prompt is too long" in error_str
+                or "exceed context limit" in error_str
+                or "exceeds context" in error_str
+                or "too many total text bytes" in error_str
+                or "total text bytes" in error_str
+            ):
                 # If the context window is too large, we expect to receive either:
                 # 400 - {'type': 'error', 'error': {'type': 'invalid_request_error', 'message': 'prompt is too long: 200758 tokens > 200000 maximum'}}
                 # 400 - {'type': 'error', 'error': {'type': 'invalid_request_error', 'message': 'input length and `max_tokens` exceed context limit: 173298 + 32000 > 200000, decrease input length or `max_tokens` and try again'}}