From 3d781efd2195c8e99a6ccd3543cd193ae9752c52 Mon Sep 17 00:00:00 2001 From: cthomas Date: Tue, 24 Feb 2026 19:57:23 -0800 Subject: [PATCH] fix(core): raise LLMEmptyResponseError for empty Anthropic responses (#9624) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(core): raise LLMEmptyResponseError for empty Anthropic responses Fixes LET-7679: Opus 4.6 occasionally returns empty responses (no content and no tool calls), causing silent failures with stop_reason=end_turn. Changes: - Add LLMEmptyResponseError class (subclass of LLMServerError) - Raise error in anthropic_client for empty non-streaming responses - Raise error in anthropic_streaming_interface for empty streaming responses - Pass through LLMError instances in handle_llm_error to preserve specific types - Add test for empty streaming response detection This allows clients (letta-code) to catch this specific error and implement retry logic with cache-busting modifications. 🤖 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta * fix(core): set invalid_llm_response stop reason for empty responses Catch LLMEmptyResponseError specifically and set stop_reason to invalid_llm_response instead of llm_api_error. This allows clients to distinguish empty responses from transient API errors. 🤖 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta --------- Co-authored-by: Letta --- letta/agents/letta_agent_v3.py | 5 +- letta/errors.py | 9 +++ .../anthropic_streaming_interface.py | 25 ++++++- letta/llm_api/anthropic_client.py | 9 ++- ...letta_llm_stream_adapter_error_handling.py | 74 +++++++++++++++++++ 5 files changed, 116 insertions(+), 6 deletions(-) diff --git a/letta/agents/letta_agent_v3.py b/letta/agents/letta_agent_v3.py index c53910b0..47bfc038 100644 --- a/letta/agents/letta_agent_v3.py +++ b/letta/agents/letta_agent_v3.py @@ -21,7 +21,7 @@ from letta.agents.helpers import ( ) from letta.agents.letta_agent_v2 import LettaAgentV2 from letta.constants import DEFAULT_MAX_STEPS, NON_USER_MSG_PREFIX, REQUEST_HEARTBEAT_PARAM -from letta.errors import ContextWindowExceededError, LLMError, SystemPromptTokenExceededError +from letta.errors import ContextWindowExceededError, LLMEmptyResponseError, LLMError, SystemPromptTokenExceededError from letta.helpers import ToolRulesSolver from letta.helpers.datetime_helpers import get_utc_time, get_utc_timestamp_ns from letta.helpers.tool_execution_helper import enable_strict_mode @@ -990,6 +990,9 @@ class LettaAgentV3(LettaAgentV2): except ValueError as e: self.stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_llm_response.value) raise e + except LLMEmptyResponseError as e: + self.stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_llm_response.value) + raise e except LLMError as e: self.stop_reason = LettaStopReason(stop_reason=StopReasonType.llm_api_error.value) raise e diff --git a/letta/errors.py b/letta/errors.py index f725b2b5..195dd02e 100644 --- a/letta/errors.py +++ b/letta/errors.py @@ -283,6 +283,15 @@ class LLMServerError(LLMError): while processing the request.""" +class LLMEmptyResponseError(LLMServerError): + """Error when LLM returns an empty response (no content and no tool calls). + + This is a subclass of LLMServerError to maintain retry behavior, but allows + specific handling for empty response cases which may benefit from request + modification before retry. + """ + + class LLMTimeoutError(LLMError): """Error when LLM request times out""" diff --git a/letta/interfaces/anthropic_streaming_interface.py b/letta/interfaces/anthropic_streaming_interface.py index dcb8d4e7..f64adcff 100644 --- a/letta/interfaces/anthropic_streaming_interface.py +++ b/letta/interfaces/anthropic_streaming_interface.py @@ -30,6 +30,7 @@ from anthropic.types.beta import ( ) from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG +from letta.errors import LLMEmptyResponseError from letta.local_llm.constants import INNER_THOUGHTS_KWARG from letta.log import get_logger from letta.schemas.letta_message import ( @@ -104,6 +105,10 @@ class AnthropicStreamingInterface: self.inner_thoughts_complete = False self.put_inner_thoughts_in_kwarg = put_inner_thoughts_in_kwarg + # Track whether any content was produced (text or tool calls) + # Used to detect empty responses from models like Opus 4.6 + self.has_content = False + # Buffer to handle partial XML tags across chunks self.partial_tag_buffer = "" @@ -298,9 +303,11 @@ class AnthropicStreamingInterface: if isinstance(content, BetaTextBlock): self.anthropic_mode = EventMode.TEXT + self.has_content = True # Track that we received text content # TODO: Can capture citations, etc. elif isinstance(content, BetaToolUseBlock): self.anthropic_mode = EventMode.TOOL_USE + self.has_content = True # Track that we received tool use content self.tool_call_id = content.id self.tool_call_name = content.name self.inner_thoughts_complete = False @@ -589,8 +596,12 @@ class AnthropicStreamingInterface: # message_delta event are *cumulative*." So we assign, not accumulate. self.output_tokens = event.usage.output_tokens elif isinstance(event, BetaRawMessageStopEvent): - # Don't do anything here! We don't want to stop the stream. - pass + # Check if any content was produced during the stream + # Empty responses (no text and no tool calls) should raise an error + if not self.has_content: + raise LLMEmptyResponseError( + message=f"LLM provider returned empty content in streaming response (model: {self.model}, message_id: {self.message_id})" + ) elif isinstance(event, BetaRawContentBlockStopEvent): # If we're exiting a tool use block and there are still buffered messages, # we should flush them now. @@ -837,10 +848,12 @@ class SimpleAnthropicStreamingInterface: if isinstance(content, BetaTextBlock): self.anthropic_mode = EventMode.TEXT + self.has_content = True # Track that we received text content # TODO: Can capture citations, etc. elif isinstance(content, BetaToolUseBlock): self.anthropic_mode = EventMode.TOOL_USE + self.has_content = True # Track that we received tool use content self.tool_call_id = content.id self.tool_call_name = content.name @@ -1014,8 +1027,12 @@ class SimpleAnthropicStreamingInterface: self.output_tokens = event.usage.output_tokens elif isinstance(event, BetaRawMessageStopEvent): - # Don't do anything here! We don't want to stop the stream. - pass + # Check if any content was produced during the stream + # Empty responses (no text and no tool calls) should raise an error + if not self.has_content: + raise LLMEmptyResponseError( + message=f"LLM provider returned empty content in streaming response (model: {self.model}, message_id: {self.message_id})" + ) elif isinstance(event, BetaRawContentBlockStopEvent): self.anthropic_mode = None diff --git a/letta/llm_api/anthropic_client.py b/letta/llm_api/anthropic_client.py index 08b06f2c..dcefc609 100644 --- a/letta/llm_api/anthropic_client.py +++ b/letta/llm_api/anthropic_client.py @@ -19,6 +19,8 @@ from letta.errors import ( LLMAuthenticationError, LLMBadRequestError, LLMConnectionError, + LLMEmptyResponseError, + LLMError, LLMInsufficientCreditsError, LLMNotFoundError, LLMPermissionDeniedError, @@ -957,6 +959,11 @@ class AnthropicClient(LLMClientBase): @trace_method def handle_llm_error(self, e: Exception, llm_config: Optional[LLMConfig] = None) -> Exception: + # Pass through errors that are already LLMError instances unchanged + # This preserves specific error types like LLMEmptyResponseError + if isinstance(e, LLMError): + return e + is_byok = (llm_config.provider_category == ProviderCategory.byok) if llm_config else None # make sure to check for overflow errors, regardless of error type @@ -1278,7 +1285,7 @@ class AnthropicClient(LLMClientBase): response.stop_reason, json.dumps(response_data), ) - raise LLMServerError( + raise LLMEmptyResponseError( message=f"LLM provider returned empty content in response (ID: {response.id}, model: {response.model}, stop_reason: {response.stop_reason})", code=ErrorCode.INTERNAL_SERVER_ERROR, details={ diff --git a/tests/adapters/test_letta_llm_stream_adapter_error_handling.py b/tests/adapters/test_letta_llm_stream_adapter_error_handling.py index a7d7e7e5..6f25ea5a 100644 --- a/tests/adapters/test_letta_llm_stream_adapter_error_handling.py +++ b/tests/adapters/test_letta_llm_stream_adapter_error_handling.py @@ -2,6 +2,12 @@ import anthropic import httpx import openai import pytest +from anthropic.types.beta import ( + BetaMessage, + BetaRawMessageStartEvent, + BetaRawMessageStopEvent, + BetaUsage, +) from google.genai import errors as google_errors from letta.adapters.letta_llm_stream_adapter import LettaLLMStreamAdapter @@ -9,6 +15,7 @@ from letta.errors import ( ContextWindowExceededError, LLMBadRequestError, LLMConnectionError, + LLMEmptyResponseError, LLMInsufficientCreditsError, LLMServerError, ) @@ -287,3 +294,70 @@ def test_openai_client_handle_llm_error_non_credit_api_error(): result = client.handle_llm_error(error) assert isinstance(result, LLMBadRequestError) assert not isinstance(result, LLMInsufficientCreditsError) + + +@pytest.mark.asyncio +async def test_letta_llm_stream_adapter_raises_empty_response_error_for_anthropic(monkeypatch): + """LET-7679: Empty streaming responses (no content blocks) should raise LLMEmptyResponseError. + + This tests the case where Opus 4.6 returns a response with: + - BetaRawMessageStartEvent (with usage tokens) + - BetaRawMessageStopEvent (end_turn) + - NO content blocks in between + + This should raise LLMEmptyResponseError, not complete successfully with stop_reason=end_turn. + """ + + class FakeAsyncStream: + """Mimics anthropic.AsyncStream that returns empty content (no content blocks).""" + + def __init__(self): + self.events = [ + # Message start with some usage info + BetaRawMessageStartEvent( + type="message_start", + message=BetaMessage( + id="msg_test_empty", + type="message", + role="assistant", + content=[], # Empty content + model="claude-opus-4-6", + stop_reason="end_turn", + stop_sequence=None, + usage=BetaUsage(input_tokens=1000, output_tokens=26, cache_creation_input_tokens=0, cache_read_input_tokens=0), + ), + ), + # Message stop immediately after start - no content blocks + BetaRawMessageStopEvent(type="message_stop"), + ] + self.index = 0 + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc, tb): + return None + + def __aiter__(self): + return self + + async def __anext__(self): + if self.index >= len(self.events): + raise StopAsyncIteration + event = self.events[self.index] + self.index += 1 + return event + + async def fake_stream_async(self, request_data: dict, llm_config): + return FakeAsyncStream() + + monkeypatch.setattr(AnthropicClient, "stream_async", fake_stream_async, raising=True) + + llm_client = AnthropicClient() + llm_config = LLMConfig(model="claude-opus-4-6", model_endpoint_type="anthropic", context_window=200000) + adapter = LettaLLMStreamAdapter(llm_client=llm_client, llm_config=llm_config, call_type=LLMCallType.agent_step) + + gen = adapter.invoke_llm(request_data={}, messages=[], tools=[], use_assistant_message=True) + with pytest.raises(LLMEmptyResponseError): + async for _ in gen: + pass