fix: handle new openai overflow error format (#7110)
This commit is contained in:
committed by
Caren Thomas
parent
f1bd246e9b
commit
8729a037b9
@@ -30,6 +30,7 @@ from openai.types.responses import (
|
||||
from openai.types.responses.response_stream_event import ResponseStreamEvent
|
||||
|
||||
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
|
||||
from letta.llm_api.error_utils import is_context_window_overflow_message
|
||||
from letta.llm_api.openai_client import is_openai_reasoning_model
|
||||
from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
|
||||
from letta.log import get_logger
|
||||
@@ -746,6 +747,14 @@ class SimpleOpenAIStreamingInterface:
|
||||
except Exception as e:
|
||||
import traceback
|
||||
|
||||
# IMPORTANT: If this is a context window overflow, we should propagate the
|
||||
# exception upward so the agent loop can compact/summarize + retry.
|
||||
# Yielding an error stop reason here would prematurely terminate the user's
|
||||
# stream even though a retry path exists.
|
||||
msg = str(e)
|
||||
if is_context_window_overflow_message(msg):
|
||||
raise
|
||||
|
||||
logger.exception("Error processing stream: %s", e)
|
||||
if ttft_span:
|
||||
ttft_span.add_event(
|
||||
|
||||
22
letta/llm_api/error_utils.py
Normal file
22
letta/llm_api/error_utils.py
Normal file
@@ -0,0 +1,22 @@
|
||||
"""Shared helpers for provider error detection/mapping.
|
||||
|
||||
Keep these utilities free of heavy imports to avoid circular dependencies between
|
||||
LLM clients (provider-specific) and streaming interfaces.
|
||||
"""
|
||||
|
||||
|
||||
def is_context_window_overflow_message(msg: str) -> bool:
|
||||
"""Best-effort detection for context window overflow errors.
|
||||
|
||||
Different providers (and even different API surfaces within the same provider)
|
||||
may phrase context-window errors differently. We centralize the heuristic so
|
||||
all layers (clients, streaming interfaces, agent loops) behave consistently.
|
||||
"""
|
||||
|
||||
return (
|
||||
"exceeds the context window" in msg
|
||||
or "This model's maximum context length is" in msg
|
||||
or "maximum context length" in msg
|
||||
or "context_length_exceeded" in msg
|
||||
or "Input tokens exceed the configured limit" in msg
|
||||
)
|
||||
@@ -26,6 +26,7 @@ from letta.errors import (
|
||||
LLMTimeoutError,
|
||||
LLMUnprocessableEntityError,
|
||||
)
|
||||
from letta.llm_api.error_utils import is_context_window_overflow_message
|
||||
from letta.llm_api.helpers import (
|
||||
add_inner_thoughts_to_functions,
|
||||
convert_response_format_to_responses_api,
|
||||
@@ -978,11 +979,7 @@ class OpenAIClient(LLMClientBase):
|
||||
error_code = error_details.get("code")
|
||||
|
||||
# Check both the error code and message content for context length issues
|
||||
if (
|
||||
error_code == "context_length_exceeded"
|
||||
or "This model's maximum context length is" in str(e)
|
||||
or "Input tokens exceed the configured limit" in str(e)
|
||||
):
|
||||
if error_code == "context_length_exceeded" or is_context_window_overflow_message(str(e)):
|
||||
return ContextWindowExceededError(
|
||||
message=f"Bad request to OpenAI (context window exceeded): {str(e)}",
|
||||
)
|
||||
@@ -993,6 +990,25 @@ class OpenAIClient(LLMClientBase):
|
||||
details=e.body,
|
||||
)
|
||||
|
||||
# NOTE: The OpenAI Python SDK may raise a generic `openai.APIError` while *iterating*
|
||||
# over a stream (e.g. Responses API streaming). In this case we don't necessarily
|
||||
# get a `BadRequestError` with a structured error body, but we still want to
|
||||
# trigger Letta's context window compaction / retry logic.
|
||||
#
|
||||
# Example message:
|
||||
# "Your input exceeds the context window of this model. Please adjust your input and try again."
|
||||
if isinstance(e, openai.APIError):
|
||||
msg = str(e)
|
||||
if is_context_window_overflow_message(msg):
|
||||
return ContextWindowExceededError(
|
||||
message=f"OpenAI request exceeded the context window: {msg}",
|
||||
details={
|
||||
"provider_exception_type": type(e).__name__,
|
||||
# Best-effort extraction (may not exist on APIError)
|
||||
"body": getattr(e, "body", None),
|
||||
},
|
||||
)
|
||||
|
||||
if isinstance(e, openai.AuthenticationError):
|
||||
logger.error(f"[OpenAI] Authentication error (401): {str(e)}") # More severe log level
|
||||
return LLMAuthenticationError(
|
||||
|
||||
Reference in New Issue
Block a user