fix: ensure stop_reason is always set and reduce noisy logs (#9046)

fix: consume [DONE] token after error events to prevent forced finalizer append **Problem:** Stream finalizer was frequently logging warning and appending forced [DONE]: ``` [Stream Finalizer] Appending forced [DONE] for run=run-xxx (saw_error=True, saw_done=False, final_stop_reason=llm_api_error) ``` This happened on every error, even though streaming_service.py already yields [DONE] after all error events. **Root Cause:** Line 266: `is_done = saw_done or saw_error` caused loop to break immediately after seeing error event, BEFORE consuming the [DONE] chunk that follows: ```python is_done = saw_done or saw_error await writer.write_chunk(...) if is_done: # Breaks on error! break ``` Sequence: 1. streaming_service.py yields: `event: error\ndata: {...}\n\n` 2. Redis reader sees error → sets `saw_error=True` 3. Sets `is_done=True` and breaks 4. Never reads next chunk: `data: [DONE]\n\n` 5. Finalizer runs → `saw_done=False` → appends forced [DONE] **Fix:** 1. Only break when `saw_done=True` (not `saw_error`) → allows consuming [DONE] 2. Only run finalizer when `saw_done=False` → reduces log noise **Result:** - [DONE] now consumed naturally from streaming_service.py error handlers - Finalizer warning only appears when truly needed (fallback cases) - Cleaner production logs 👾 Generated with [Letta Code](https://letta.com) Co-authored-by: Letta <noreply@letta.com>
2026-01-22 16:13:29 -08:00
parent ca40eff7bc
commit 2a2e777807
1 changed files with 16 additions and 18 deletions
--- a/letta/server/rest_api/redis_stream_manager.py
+++ b/letta/server/rest_api/redis_stream_manager.py
@@ -263,11 +263,10 @@ async def create_background_stream_processor(
                            # Don't let parsing failures interfere with streaming
                            error_metadata = {"error": {"message": "Failed to parse error payload from stream."}}
-                is_done = saw_done or saw_error
+                await writer.write_chunk(run_id=run_id, data=chunk, is_complete=saw_done)
-                await writer.write_chunk(run_id=run_id, data=chunk, is_complete=is_done)
+                # Only break after seeing [DONE] or error
-
+                if saw_done or saw_error:
                if is_done:
                    break
                try:
@@ -394,20 +393,19 @@ async def create_background_stream_processor(
                conversation_id=conversation_id,
            )
-        # Belt-and-suspenders: always append a terminal [DONE] chunk to ensure clients terminate
+        # Only append [DONE] if we didn't already see it (fallback safety mechanism)
-        # Even if a previous chunk set `complete`, an extra [DONE] is harmless and ensures SDKs that
+        if not saw_done:
-        # rely on explicit [DONE] will exit.
+            logger.warning(
-        logger.warning(
+                "[Stream Finalizer] Appending forced [DONE] for run=%s (saw_error=%s, saw_done=%s, final_stop_reason=%s)",
-            "[Stream Finalizer] Appending forced [DONE] for run=%s (saw_error=%s, saw_done=%s, final_stop_reason=%s)",
+                run_id,
-            run_id,
+                saw_error,
-            saw_error,
+                saw_done,
-            saw_done,
+                final_stop_reason,
-            final_stop_reason,
+            )
-        )
+            try:
-        try:
+                await writer.mark_complete(run_id)
-            await writer.mark_complete(run_id)
+            except Exception as e:
-        except Exception as e:
+                logger.warning(f"Failed to append terminal [DONE] for run {run_id}: {e}")
            logger.warning(f"Failed to append terminal [DONE] for run {run_id}: {e}")
 async def redis_sse_stream_generator(