From b42e9fa7f3389d1c90eef4f15037a14cea5cfb32 Mon Sep 17 00:00:00 2001 From: Charles Packer Date: Mon, 3 Nov 2025 16:10:01 -0800 Subject: [PATCH] fix(core): properly map bytes overflow to context overflow error (#5930) --- letta/llm_api/anthropic_client.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/letta/llm_api/anthropic_client.py b/letta/llm_api/anthropic_client.py index b9808e3b..7990f778 100644 --- a/letta/llm_api/anthropic_client.py +++ b/letta/llm_api/anthropic_client.py @@ -478,7 +478,13 @@ class AnthropicClient(LLMClientBase): def handle_llm_error(self, e: Exception) -> Exception: # make sure to check for overflow errors, regardless of error type error_str = str(e).lower() - if "prompt is too long" in error_str or "exceed context limit" in error_str or "exceeds context" in error_str: + if ( + "prompt is too long" in error_str + or "exceed context limit" in error_str + or "exceeds context" in error_str + or "too many total text bytes" in error_str + or "total text bytes" in error_str + ): logger.warning(f"[Anthropic] Context window exceeded: {str(e)}") return ContextWindowExceededError( message=f"Context window exceeded for Anthropic: {str(e)}", @@ -510,7 +516,13 @@ class AnthropicClient(LLMClientBase): if isinstance(e, anthropic.BadRequestError): logger.warning(f"[Anthropic] Bad request: {str(e)}") error_str = str(e).lower() - if "prompt is too long" in error_str or "exceed context limit" in error_str: + if ( + "prompt is too long" in error_str + or "exceed context limit" in error_str + or "exceeds context" in error_str + or "too many total text bytes" in error_str + or "total text bytes" in error_str + ): # If the context window is too large, we expect to receive either: # 400 - {'type': 'error', 'error': {'type': 'invalid_request_error', 'message': 'prompt is too long: 200758 tokens > 200000 maximum'}} # 400 - {'type': 'error', 'error': {'type': 'invalid_request_error', 'message': 'input length and `max_tokens` exceed context limit: 173298 + 32000 > 200000, decrease input length or `max_tokens` and try again'}}