fix(core): raise LLMEmptyResponseError for empty Anthropic responses (#9624)

* fix(core): raise LLMEmptyResponseError for empty Anthropic responses Fixes LET-7679: Opus 4.6 occasionally returns empty responses (no content and no tool calls), causing silent failures with stop_reason=end_turn. Changes: - Add LLMEmptyResponseError class (subclass of LLMServerError) - Raise error in anthropic_client for empty non-streaming responses - Raise error in anthropic_streaming_interface for empty streaming responses - Pass through LLMError instances in handle_llm_error to preserve specific types - Add test for empty streaming response detection This allows clients (letta-code) to catch this specific error and implement retry logic with cache-busting modifications. 🤖 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * fix(core): set invalid_llm_response stop reason for empty responses Catch LLMEmptyResponseError specifically and set stop_reason to invalid_llm_response instead of llm_api_error. This allows clients to distinguish empty responses from transient API errors. 🤖 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> --------- Co-authored-by: Letta <noreply@letta.com>
2026-02-24 19:57:23 -08:00
parent 86ff216dc9
commit 3d781efd21
5 changed files with 116 additions and 6 deletions
--- a/tests/adapters/test_letta_llm_stream_adapter_error_handling.py
+++ b/tests/adapters/test_letta_llm_stream_adapter_error_handling.py
@@ -2,6 +2,12 @@ import anthropic
 import httpx
 import openai
 import pytest
+from anthropic.types.beta import (
+    BetaMessage,
+    BetaRawMessageStartEvent,
+    BetaRawMessageStopEvent,
+    BetaUsage,
+)
 from google.genai import errors as google_errors

 from letta.adapters.letta_llm_stream_adapter import LettaLLMStreamAdapter
@@ -9,6 +15,7 @@ from letta.errors import (
    ContextWindowExceededError,
    LLMBadRequestError,
    LLMConnectionError,
+    LLMEmptyResponseError,
    LLMInsufficientCreditsError,
    LLMServerError,
 )
@@ -287,3 +294,70 @@ def test_openai_client_handle_llm_error_non_credit_api_error():
    result = client.handle_llm_error(error)
    assert isinstance(result, LLMBadRequestError)
    assert not isinstance(result, LLMInsufficientCreditsError)
+
+
+@pytest.mark.asyncio
+async def test_letta_llm_stream_adapter_raises_empty_response_error_for_anthropic(monkeypatch):
+    """LET-7679: Empty streaming responses (no content blocks) should raise LLMEmptyResponseError.
+
+    This tests the case where Opus 4.6 returns a response with:
+    - BetaRawMessageStartEvent (with usage tokens)
+    - BetaRawMessageStopEvent (end_turn)
+    - NO content blocks in between
+
+    This should raise LLMEmptyResponseError, not complete successfully with stop_reason=end_turn.
+    """
+
+    class FakeAsyncStream:
+        """Mimics anthropic.AsyncStream that returns empty content (no content blocks)."""
+
+        def __init__(self):
+            self.events = [
+                # Message start with some usage info
+                BetaRawMessageStartEvent(
+                    type="message_start",
+                    message=BetaMessage(
+                        id="msg_test_empty",
+                        type="message",
+                        role="assistant",
+                        content=[],  # Empty content
+                        model="claude-opus-4-6",
+                        stop_reason="end_turn",
+                        stop_sequence=None,
+                        usage=BetaUsage(input_tokens=1000, output_tokens=26, cache_creation_input_tokens=0, cache_read_input_tokens=0),
+                    ),
+                ),
+                # Message stop immediately after start - no content blocks
+                BetaRawMessageStopEvent(type="message_stop"),
+            ]
+            self.index = 0
+
+        async def __aenter__(self):
+            return self
+
+        async def __aexit__(self, exc_type, exc, tb):
+            return None
+
+        def __aiter__(self):
+            return self
+
+        async def __anext__(self):
+            if self.index >= len(self.events):
+                raise StopAsyncIteration
+            event = self.events[self.index]
+            self.index += 1
+            return event
+
+    async def fake_stream_async(self, request_data: dict, llm_config):
+        return FakeAsyncStream()
+
+    monkeypatch.setattr(AnthropicClient, "stream_async", fake_stream_async, raising=True)
+
+    llm_client = AnthropicClient()
+    llm_config = LLMConfig(model="claude-opus-4-6", model_endpoint_type="anthropic", context_window=200000)
+    adapter = LettaLLMStreamAdapter(llm_client=llm_client, llm_config=llm_config, call_type=LLMCallType.agent_step)
+
+    gen = adapter.invoke_llm(request_data={}, messages=[], tools=[], use_assistant_message=True)
+    with pytest.raises(LLMEmptyResponseError):
+        async for _ in gen:
+            pass