From 8729a037b9035e71180098d807b800fce182184b Mon Sep 17 00:00:00 2001 From: Sarah Wooders Date: Mon, 15 Dec 2025 20:29:44 -0800 Subject: [PATCH] fix: handle new openai overflow error format (#7110) --- .../interfaces/openai_streaming_interface.py | 9 +++++++ letta/llm_api/error_utils.py | 22 ++++++++++++++++ letta/llm_api/openai_client.py | 26 +++++++++++++++---- 3 files changed, 52 insertions(+), 5 deletions(-) create mode 100644 letta/llm_api/error_utils.py diff --git a/letta/interfaces/openai_streaming_interface.py b/letta/interfaces/openai_streaming_interface.py index aa7dac9a..36e3dfa6 100644 --- a/letta/interfaces/openai_streaming_interface.py +++ b/letta/interfaces/openai_streaming_interface.py @@ -30,6 +30,7 @@ from openai.types.responses import ( from openai.types.responses.response_stream_event import ResponseStreamEvent from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG +from letta.llm_api.error_utils import is_context_window_overflow_message from letta.llm_api.openai_client import is_openai_reasoning_model from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages from letta.log import get_logger @@ -746,6 +747,14 @@ class SimpleOpenAIStreamingInterface: except Exception as e: import traceback + # IMPORTANT: If this is a context window overflow, we should propagate the + # exception upward so the agent loop can compact/summarize + retry. + # Yielding an error stop reason here would prematurely terminate the user's + # stream even though a retry path exists. + msg = str(e) + if is_context_window_overflow_message(msg): + raise + logger.exception("Error processing stream: %s", e) if ttft_span: ttft_span.add_event( diff --git a/letta/llm_api/error_utils.py b/letta/llm_api/error_utils.py new file mode 100644 index 00000000..b1d6e356 --- /dev/null +++ b/letta/llm_api/error_utils.py @@ -0,0 +1,22 @@ +"""Shared helpers for provider error detection/mapping. + +Keep these utilities free of heavy imports to avoid circular dependencies between +LLM clients (provider-specific) and streaming interfaces. +""" + + +def is_context_window_overflow_message(msg: str) -> bool: + """Best-effort detection for context window overflow errors. + + Different providers (and even different API surfaces within the same provider) + may phrase context-window errors differently. We centralize the heuristic so + all layers (clients, streaming interfaces, agent loops) behave consistently. + """ + + return ( + "exceeds the context window" in msg + or "This model's maximum context length is" in msg + or "maximum context length" in msg + or "context_length_exceeded" in msg + or "Input tokens exceed the configured limit" in msg + ) diff --git a/letta/llm_api/openai_client.py b/letta/llm_api/openai_client.py index 5bb5b08c..26dd493a 100644 --- a/letta/llm_api/openai_client.py +++ b/letta/llm_api/openai_client.py @@ -26,6 +26,7 @@ from letta.errors import ( LLMTimeoutError, LLMUnprocessableEntityError, ) +from letta.llm_api.error_utils import is_context_window_overflow_message from letta.llm_api.helpers import ( add_inner_thoughts_to_functions, convert_response_format_to_responses_api, @@ -978,11 +979,7 @@ class OpenAIClient(LLMClientBase): error_code = error_details.get("code") # Check both the error code and message content for context length issues - if ( - error_code == "context_length_exceeded" - or "This model's maximum context length is" in str(e) - or "Input tokens exceed the configured limit" in str(e) - ): + if error_code == "context_length_exceeded" or is_context_window_overflow_message(str(e)): return ContextWindowExceededError( message=f"Bad request to OpenAI (context window exceeded): {str(e)}", ) @@ -993,6 +990,25 @@ class OpenAIClient(LLMClientBase): details=e.body, ) + # NOTE: The OpenAI Python SDK may raise a generic `openai.APIError` while *iterating* + # over a stream (e.g. Responses API streaming). In this case we don't necessarily + # get a `BadRequestError` with a structured error body, but we still want to + # trigger Letta's context window compaction / retry logic. + # + # Example message: + # "Your input exceeds the context window of this model. Please adjust your input and try again." + if isinstance(e, openai.APIError): + msg = str(e) + if is_context_window_overflow_message(msg): + return ContextWindowExceededError( + message=f"OpenAI request exceeded the context window: {msg}", + details={ + "provider_exception_type": type(e).__name__, + # Best-effort extraction (may not exist on APIError) + "body": getattr(e, "body", None), + }, + ) + if isinstance(e, openai.AuthenticationError): logger.error(f"[OpenAI] Authentication error (401): {str(e)}") # More severe log level return LLMAuthenticationError(