From 3d781efd2195c8e99a6ccd3543cd193ae9752c52 Mon Sep 17 00:00:00 2001
From: cthomas <caren@letta.com>
Date: Tue, 24 Feb 2026 19:57:23 -0800
Subject: [PATCH] fix(core): raise LLMEmptyResponseError for empty Anthropic
 responses (#9624)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix(core): raise LLMEmptyResponseError for empty Anthropic responses

Fixes LET-7679: Opus 4.6 occasionally returns empty responses (no content
and no tool calls), causing silent failures with stop_reason=end_turn.

Changes:
- Add LLMEmptyResponseError class (subclass of LLMServerError)
- Raise error in anthropic_client for empty non-streaming responses
- Raise error in anthropic_streaming_interface for empty streaming responses
- Pass through LLMError instances in handle_llm_error to preserve specific types
- Add test for empty streaming response detection

This allows clients (letta-code) to catch this specific error and implement
retry logic with cache-busting modifications.

🤖 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

* fix(core): set invalid_llm_response stop reason for empty responses

Catch LLMEmptyResponseError specifically and set stop_reason to
invalid_llm_response instead of llm_api_error. This allows clients
to distinguish empty responses from transient API errors.

🤖 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

---------

Co-authored-by: Letta <noreply@letta.com>
---
 letta/agents/letta_agent_v3.py                |  5 +-
 letta/errors.py                               |  9 +++
 .../anthropic_streaming_interface.py          | 25 ++++++-
 letta/llm_api/anthropic_client.py             |  9 ++-
 ...letta_llm_stream_adapter_error_handling.py | 74 +++++++++++++++++++
 5 files changed, 116 insertions(+), 6 deletions(-)

diff --git a/letta/agents/letta_agent_v3.py b/letta/agents/letta_agent_v3.py
index c53910b0..47bfc038 100644
--- a/letta/agents/letta_agent_v3.py
+++ b/letta/agents/letta_agent_v3.py
@@ -21,7 +21,7 @@ from letta.agents.helpers import (
 )
 from letta.agents.letta_agent_v2 import LettaAgentV2
 from letta.constants import DEFAULT_MAX_STEPS, NON_USER_MSG_PREFIX, REQUEST_HEARTBEAT_PARAM
-from letta.errors import ContextWindowExceededError, LLMError, SystemPromptTokenExceededError
+from letta.errors import ContextWindowExceededError, LLMEmptyResponseError, LLMError, SystemPromptTokenExceededError
 from letta.helpers import ToolRulesSolver
 from letta.helpers.datetime_helpers import get_utc_time, get_utc_timestamp_ns
 from letta.helpers.tool_execution_helper import enable_strict_mode
@@ -990,6 +990,9 @@ class LettaAgentV3(LettaAgentV2):
                     except ValueError as e:
                         self.stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_llm_response.value)
                         raise e
+                    except LLMEmptyResponseError as e:
+                        self.stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_llm_response.value)
+                        raise e
                     except LLMError as e:
                         self.stop_reason = LettaStopReason(stop_reason=StopReasonType.llm_api_error.value)
                         raise e
diff --git a/letta/errors.py b/letta/errors.py
index f725b2b5..195dd02e 100644
--- a/letta/errors.py
+++ b/letta/errors.py
@@ -283,6 +283,15 @@ class LLMServerError(LLMError):
     while processing the request."""
 
 
+class LLMEmptyResponseError(LLMServerError):
+    """Error when LLM returns an empty response (no content and no tool calls).
+
+    This is a subclass of LLMServerError to maintain retry behavior, but allows
+    specific handling for empty response cases which may benefit from request
+    modification before retry.
+    """
+
+
 class LLMTimeoutError(LLMError):
     """Error when LLM request times out"""
 
diff --git a/letta/interfaces/anthropic_streaming_interface.py b/letta/interfaces/anthropic_streaming_interface.py
index dcb8d4e7..f64adcff 100644
--- a/letta/interfaces/anthropic_streaming_interface.py
+++ b/letta/interfaces/anthropic_streaming_interface.py
@@ -30,6 +30,7 @@ from anthropic.types.beta import (
 )
 
 from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
+from letta.errors import LLMEmptyResponseError
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG
 from letta.log import get_logger
 from letta.schemas.letta_message import (
@@ -104,6 +105,10 @@ class AnthropicStreamingInterface:
         self.inner_thoughts_complete = False
         self.put_inner_thoughts_in_kwarg = put_inner_thoughts_in_kwarg
 
+        # Track whether any content was produced (text or tool calls)
+        # Used to detect empty responses from models like Opus 4.6
+        self.has_content = False
+
         # Buffer to handle partial XML tags across chunks
         self.partial_tag_buffer = ""
 
@@ -298,9 +303,11 @@ class AnthropicStreamingInterface:
 
             if isinstance(content, BetaTextBlock):
                 self.anthropic_mode = EventMode.TEXT
+                self.has_content = True  # Track that we received text content
                 # TODO: Can capture citations, etc.
             elif isinstance(content, BetaToolUseBlock):
                 self.anthropic_mode = EventMode.TOOL_USE
+                self.has_content = True  # Track that we received tool use content
                 self.tool_call_id = content.id
                 self.tool_call_name = content.name
                 self.inner_thoughts_complete = False
@@ -589,8 +596,12 @@ class AnthropicStreamingInterface:
             # message_delta event are *cumulative*." So we assign, not accumulate.
             self.output_tokens = event.usage.output_tokens
         elif isinstance(event, BetaRawMessageStopEvent):
-            # Don't do anything here! We don't want to stop the stream.
-            pass
+            # Check if any content was produced during the stream
+            # Empty responses (no text and no tool calls) should raise an error
+            if not self.has_content:
+                raise LLMEmptyResponseError(
+                    message=f"LLM provider returned empty content in streaming response (model: {self.model}, message_id: {self.message_id})"
+                )
         elif isinstance(event, BetaRawContentBlockStopEvent):
             # If we're exiting a tool use block and there are still buffered messages,
             # we should flush them now.
@@ -837,10 +848,12 @@ class SimpleAnthropicStreamingInterface:
 
             if isinstance(content, BetaTextBlock):
                 self.anthropic_mode = EventMode.TEXT
+                self.has_content = True  # Track that we received text content
                 # TODO: Can capture citations, etc.
 
             elif isinstance(content, BetaToolUseBlock):
                 self.anthropic_mode = EventMode.TOOL_USE
+                self.has_content = True  # Track that we received tool use content
                 self.tool_call_id = content.id
                 self.tool_call_name = content.name
 
@@ -1014,8 +1027,12 @@ class SimpleAnthropicStreamingInterface:
             self.output_tokens = event.usage.output_tokens
 
         elif isinstance(event, BetaRawMessageStopEvent):
-            # Don't do anything here! We don't want to stop the stream.
-            pass
+            # Check if any content was produced during the stream
+            # Empty responses (no text and no tool calls) should raise an error
+            if not self.has_content:
+                raise LLMEmptyResponseError(
+                    message=f"LLM provider returned empty content in streaming response (model: {self.model}, message_id: {self.message_id})"
+                )
 
         elif isinstance(event, BetaRawContentBlockStopEvent):
             self.anthropic_mode = None
diff --git a/letta/llm_api/anthropic_client.py b/letta/llm_api/anthropic_client.py
index 08b06f2c..dcefc609 100644
--- a/letta/llm_api/anthropic_client.py
+++ b/letta/llm_api/anthropic_client.py
@@ -19,6 +19,8 @@ from letta.errors import (
     LLMAuthenticationError,
     LLMBadRequestError,
     LLMConnectionError,
+    LLMEmptyResponseError,
+    LLMError,
     LLMInsufficientCreditsError,
     LLMNotFoundError,
     LLMPermissionDeniedError,
@@ -957,6 +959,11 @@ class AnthropicClient(LLMClientBase):
 
     @trace_method
     def handle_llm_error(self, e: Exception, llm_config: Optional[LLMConfig] = None) -> Exception:
+        # Pass through errors that are already LLMError instances unchanged
+        # This preserves specific error types like LLMEmptyResponseError
+        if isinstance(e, LLMError):
+            return e
+
         is_byok = (llm_config.provider_category == ProviderCategory.byok) if llm_config else None
 
         # make sure to check for overflow errors, regardless of error type
@@ -1278,7 +1285,7 @@ class AnthropicClient(LLMClientBase):
                 response.stop_reason,
                 json.dumps(response_data),
             )
-            raise LLMServerError(
+            raise LLMEmptyResponseError(
                 message=f"LLM provider returned empty content in response (ID: {response.id}, model: {response.model}, stop_reason: {response.stop_reason})",
                 code=ErrorCode.INTERNAL_SERVER_ERROR,
                 details={
diff --git a/tests/adapters/test_letta_llm_stream_adapter_error_handling.py b/tests/adapters/test_letta_llm_stream_adapter_error_handling.py
index a7d7e7e5..6f25ea5a 100644
--- a/tests/adapters/test_letta_llm_stream_adapter_error_handling.py
+++ b/tests/adapters/test_letta_llm_stream_adapter_error_handling.py
@@ -2,6 +2,12 @@ import anthropic
 import httpx
 import openai
 import pytest
+from anthropic.types.beta import (
+    BetaMessage,
+    BetaRawMessageStartEvent,
+    BetaRawMessageStopEvent,
+    BetaUsage,
+)
 from google.genai import errors as google_errors
 
 from letta.adapters.letta_llm_stream_adapter import LettaLLMStreamAdapter
@@ -9,6 +15,7 @@ from letta.errors import (
     ContextWindowExceededError,
     LLMBadRequestError,
     LLMConnectionError,
+    LLMEmptyResponseError,
     LLMInsufficientCreditsError,
     LLMServerError,
 )
@@ -287,3 +294,70 @@ def test_openai_client_handle_llm_error_non_credit_api_error():
     result = client.handle_llm_error(error)
     assert isinstance(result, LLMBadRequestError)
     assert not isinstance(result, LLMInsufficientCreditsError)
+
+
+@pytest.mark.asyncio
+async def test_letta_llm_stream_adapter_raises_empty_response_error_for_anthropic(monkeypatch):
+    """LET-7679: Empty streaming responses (no content blocks) should raise LLMEmptyResponseError.
+
+    This tests the case where Opus 4.6 returns a response with:
+    - BetaRawMessageStartEvent (with usage tokens)
+    - BetaRawMessageStopEvent (end_turn)
+    - NO content blocks in between
+
+    This should raise LLMEmptyResponseError, not complete successfully with stop_reason=end_turn.
+    """
+
+    class FakeAsyncStream:
+        """Mimics anthropic.AsyncStream that returns empty content (no content blocks)."""
+
+        def __init__(self):
+            self.events = [
+                # Message start with some usage info
+                BetaRawMessageStartEvent(
+                    type="message_start",
+                    message=BetaMessage(
+                        id="msg_test_empty",
+                        type="message",
+                        role="assistant",
+                        content=[],  # Empty content
+                        model="claude-opus-4-6",
+                        stop_reason="end_turn",
+                        stop_sequence=None,
+                        usage=BetaUsage(input_tokens=1000, output_tokens=26, cache_creation_input_tokens=0, cache_read_input_tokens=0),
+                    ),
+                ),
+                # Message stop immediately after start - no content blocks
+                BetaRawMessageStopEvent(type="message_stop"),
+            ]
+            self.index = 0
+
+        async def __aenter__(self):
+            return self
+
+        async def __aexit__(self, exc_type, exc, tb):
+            return None
+
+        def __aiter__(self):
+            return self
+
+        async def __anext__(self):
+            if self.index >= len(self.events):
+                raise StopAsyncIteration
+            event = self.events[self.index]
+            self.index += 1
+            return event
+
+    async def fake_stream_async(self, request_data: dict, llm_config):
+        return FakeAsyncStream()
+
+    monkeypatch.setattr(AnthropicClient, "stream_async", fake_stream_async, raising=True)
+
+    llm_client = AnthropicClient()
+    llm_config = LLMConfig(model="claude-opus-4-6", model_endpoint_type="anthropic", context_window=200000)
+    adapter = LettaLLMStreamAdapter(llm_client=llm_client, llm_config=llm_config, call_type=LLMCallType.agent_step)
+
+    gen = adapter.invoke_llm(request_data={}, messages=[], tools=[], use_assistant_message=True)
+    with pytest.raises(LLMEmptyResponseError):
+        async for _ in gen:
+            pass