refactor: extract DisplayPipeline from processMessage stream loop (#550)

Extracts a DisplayPipeline async generator that wraps the raw SDK stream and yields clean DisplayEvent types. Refactors processMessage() to consume pipeline events instead of raw StreamMsg objects. - Locks foreground on first substantive event (reasoning/tool_call/etc), eliminating buffering delay for real-time display - Filters pre-foreground error/retry events to prevent false approval recovery - Re-throws 429 in rejectApproval to prevent rate-limit loops - Gates reasoning log on display config - 12 pipeline unit tests + updated integration tests (56 total) - Net -224 lines from bot.ts Written by Cameron ◯ Letta Code "The purpose of abstraction is not to be vague, but to create a new semantic level in which one can be absolutely precise." -- Edsger Dijkstra
2026-03-11 16:22:26 -07:00
parent dc4204d6f4
commit 13426396ac
7 changed files with 1062 additions and 569 deletions
--- a/src/core/bot-logging.test.ts
+++ b/src/core/bot-logging.test.ts
@@ -82,18 +82,9 @@ describe('stream logging levels', () => {
    const infoMessages = loggerSpies.info.mock.calls.map(([message]) => String(message));
    const traceMessages = loggerSpies.trace.mock.calls.map(([message]) => String(message));

-    expect(debugMessages.some((m) => m.includes('Buffering run-scoped pre-foreground display event'))).toBe(false);
-    expect(debugMessages.some((m) => m.includes('Deferring run-scoped pre-foreground event'))).toBe(false);
-    expect(debugMessages.some((m) => m.includes('Skipping non-foreground stream event'))).toBe(false);
-
-    expect(infoMessages.some((m) => m.includes('type=tool_call'))).toBe(false);
-    expect(infoMessages.some((m) => m.includes('type=tool_result'))).toBe(false);
-
-    expect(traceMessages.some((m) => m.includes('Buffering run-scoped pre-foreground display event'))).toBe(true);
-    expect(traceMessages.some((m) => m.includes('Skipping non-foreground stream event'))).toBe(true);
-    expect(traceMessages.some((m) => m.includes('type=tool_call'))).toBe(true);
-    expect(traceMessages.some((m) => m.includes('type=tool_result'))).toBe(true);
-
+    // Run ID filtering now handled by DisplayPipeline; verify summary log is emitted at info level
    expect(infoMessages.some((m) => m.includes('Filtered') && m.includes('non-foreground event(s)'))).toBe(true);
+    // Foreground run locking is logged at info level
+    expect(infoMessages.some((m) => m.includes('Foreground run locked'))).toBe(true);
  });
 });
--- a/src/core/bot.ts
+++ b/src/core/bot.ts
--- a/src/core/display-pipeline.test.ts
+++ b/src/core/display-pipeline.test.ts
@@ -0,0 +1,211 @@
+import { describe, it, expect } from 'vitest';
+import { createDisplayPipeline, type DisplayEvent } from './display-pipeline.js';
+import type { StreamMsg } from './types.js';
+
+/** Helper: collect all DisplayEvents from a pipeline fed with the given messages. */
+async function collect(
+  messages: StreamMsg[],
+  convKey = 'test',
+): Promise<DisplayEvent[]> {
+  async function* feed(): AsyncIterable<StreamMsg> {
+    for (const msg of messages) yield msg;
+  }
+  const events: DisplayEvent[] = [];
+  for await (const evt of createDisplayPipeline(feed(), {
+    convKey,
+    resultFingerprints: new Map(),
+  })) {
+    events.push(evt);
+  }
+  return events;
+}
+
+describe('createDisplayPipeline', () => {
+  it('locks foreground on first reasoning event and yields immediately', async () => {
+    const events = await collect([
+      { type: 'reasoning', content: 'thinking...', runId: 'run-1' },
+      { type: 'assistant', content: 'reply', runId: 'run-1' },
+      { type: 'result', success: true, result: 'reply', runIds: ['run-1'] },
+    ]);
+
+    const types = events.map(e => e.type);
+    // Reasoning should appear BEFORE text -- no buffering
+    expect(types[0]).toBe('reasoning');
+    expect(types[1]).toBe('text');
+    expect(types[2]).toBe('complete');
+  });
+
+  it('locks foreground on first tool_call event', async () => {
+    const events = await collect([
+      { type: 'tool_call', toolCallId: 'tc-1', toolName: 'Bash', toolInput: { command: 'echo hi' }, runId: 'run-1' },
+      { type: 'assistant', content: 'done', runId: 'run-1' },
+      { type: 'result', success: true, result: 'done', runIds: ['run-1'] },
+    ]);
+
+    expect(events[0].type).toBe('tool_call');
+    expect(events[1].type).toBe('text');
+    expect(events[2].type).toBe('complete');
+  });
+
+  it('filters pre-foreground error events to prevent false retry triggers', async () => {
+    const events = await collect([
+      { type: 'error', runId: 'run-bg', message: 'conflict waiting for approval', stopReason: 'error' },
+      { type: 'result', success: false, error: 'error', runIds: ['run-main'] },
+    ]);
+
+    // Pre-foreground error is filtered (not yielded). Only the result passes through.
+    const errorEvt = events.find(e => e.type === 'error');
+    const completeEvt = events.find(e => e.type === 'complete');
+    expect(errorEvt).toBeUndefined();
+    expect(completeEvt).toBeDefined();
+    if (completeEvt?.type === 'complete') {
+      expect(completeEvt.runIds).toContain('run-main');
+    }
+  });
+
+  it('rebinds foreground on assistant event with new run ID', async () => {
+    const events = await collect([
+      { type: 'assistant', content: 'before tool ', runId: 'run-1' },
+      { type: 'tool_call', toolCallId: 'tc-1', toolName: 'Bash', toolInput: {}, runId: 'run-1' },
+      { type: 'assistant', content: 'after tool', runId: 'run-2' },
+      { type: 'result', success: true, result: 'before tool after tool', runIds: ['run-2'] },
+    ]);
+
+    const textEvents = events.filter(e => e.type === 'text');
+    // Both assistant events should pass through (rebind on run-2)
+    expect(textEvents.length).toBe(2);
+    expect(events.find(e => e.type === 'complete')).toBeDefined();
+  });
+
+  it('filters non-foreground events after lock', async () => {
+    const events = await collect([
+      { type: 'reasoning', content: 'foreground thinking', runId: 'run-1' },
+      { type: 'reasoning', content: 'background noise', runId: 'run-2' },
+      { type: 'assistant', content: 'reply', runId: 'run-1' },
+      { type: 'result', success: true, result: 'reply', runIds: ['run-1'] },
+    ]);
+
+    const reasoningEvents = events.filter(e => e.type === 'reasoning');
+    // Only foreground reasoning should appear (run-2 filtered after lock to run-1)
+    expect(reasoningEvents.length).toBe(1);
+    if (reasoningEvents[0].type === 'reasoning') {
+      expect(reasoningEvents[0].content).toBe('foreground thinking');
+    }
+  });
+
+  it('accumulates reasoning chunks and flushes on type change', async () => {
+    const events = await collect([
+      { type: 'reasoning', content: 'part 1 ', runId: 'run-1' },
+      { type: 'reasoning', content: 'part 2', runId: 'run-1' },
+      { type: 'assistant', content: 'reply', runId: 'run-1' },
+      { type: 'result', success: true, result: 'reply', runIds: ['run-1'] },
+    ]);
+
+    const reasoningEvents = events.filter(e => e.type === 'reasoning');
+    // Multiple reasoning chunks should be accumulated into one event
+    expect(reasoningEvents.length).toBe(1);
+    if (reasoningEvents[0].type === 'reasoning') {
+      expect(reasoningEvents[0].content).toBe('part 1 part 2');
+    }
+  });
+
+  it('prefers streamed text over result field on divergence', async () => {
+    const events = await collect([
+      { type: 'assistant', content: 'streamed reply', runId: 'run-1' },
+      { type: 'result', success: true, result: 'result field reply', runIds: ['run-1'] },
+    ]);
+
+    const complete = events.find(e => e.type === 'complete');
+    expect(complete).toBeDefined();
+    if (complete?.type === 'complete') {
+      expect(complete.text).toBe('streamed reply');
+    }
+  });
+
+  it('falls back to result field when no streamed text', async () => {
+    const events = await collect([
+      { type: 'result', success: true, result: 'result only', runIds: ['run-1'] },
+    ]);
+
+    const complete = events.find(e => e.type === 'complete');
+    expect(complete).toBeDefined();
+    if (complete?.type === 'complete') {
+      expect(complete.text).toBe('result only');
+    }
+  });
+
+  it('detects stale duplicate results by run fingerprint', async () => {
+    const fingerprints = new Map<string, string>();
+
+    // First call -- fresh
+    const events1 = await (async () => {
+      async function* feed(): AsyncIterable<StreamMsg> {
+        yield { type: 'result', success: true, result: 'first', runIds: ['run-1'] };
+      }
+      const events: DisplayEvent[] = [];
+      for await (const evt of createDisplayPipeline(feed(), { convKey: 'test', resultFingerprints: fingerprints })) {
+        events.push(evt);
+      }
+      return events;
+    })();
+
+    // Second call with same runIds -- stale
+    const events2 = await (async () => {
+      async function* feed(): AsyncIterable<StreamMsg> {
+        yield { type: 'result', success: true, result: 'second', runIds: ['run-1'] };
+      }
+      const events: DisplayEvent[] = [];
+      for await (const evt of createDisplayPipeline(feed(), { convKey: 'test', resultFingerprints: fingerprints })) {
+        events.push(evt);
+      }
+      return events;
+    })();
+
+    const c1 = events1.find(e => e.type === 'complete');
+    const c2 = events2.find(e => e.type === 'complete');
+    expect(c1?.type === 'complete' && c1.stale).toBe(false);
+    expect(c2?.type === 'complete' && c2.stale).toBe(true);
+  });
+
+  it('marks cancelled results', async () => {
+    const events = await collect([
+      { type: 'result', success: true, result: '', stopReason: 'cancelled', runIds: ['run-1'] },
+    ]);
+
+    const complete = events.find(e => e.type === 'complete');
+    expect(complete).toBeDefined();
+    if (complete?.type === 'complete') {
+      expect(complete.cancelled).toBe(true);
+    }
+  });
+
+  it('skips stream_event types', async () => {
+    const events = await collect([
+      { type: 'stream_event', content: 'partial delta' },
+      { type: 'assistant', content: 'reply', runId: 'run-1' },
+      { type: 'result', success: true, result: 'reply', runIds: ['run-1'] },
+    ]);
+
+    // stream_event never reaches the output -- only text + complete
+    expect(events.length).toBe(2);
+    expect(events[0].type).toBe('text');
+    expect(events[1].type).toBe('complete');
+  });
+
+  it('yields tool_result events', async () => {
+    const events = await collect([
+      { type: 'tool_call', toolCallId: 'tc-1', toolName: 'Bash', toolInput: {}, runId: 'run-1' },
+      { type: 'tool_result', toolCallId: 'tc-1', content: 'ok', isError: false, runId: 'run-1' },
+      { type: 'assistant', content: 'done', runId: 'run-1' },
+      { type: 'result', success: true, result: 'done', runIds: ['run-1'] },
+    ]);
+
+    const toolResult = events.find(e => e.type === 'tool_result');
+    expect(toolResult).toBeDefined();
+    if (toolResult?.type === 'tool_result') {
+      expect(toolResult.toolCallId).toBe('tc-1');
+      expect(toolResult.content).toBe('ok');
+      expect(toolResult.isError).toBe(false);
+    }
+  });
+});
--- a/src/core/display-pipeline.ts
+++ b/src/core/display-pipeline.ts
@@ -0,0 +1,360 @@
+/**
+ * DisplayPipeline — transforms raw SDK stream events into clean,
+ * high-level display events for channel delivery.
+ *
+ * Encapsulates:
+ *  - Run ID filtering (foreground tracking, rebinding)
+ *  - Reasoning chunk accumulation (flushed on type transitions)
+ *  - stream_event skipping
+ *  - Type transition tracking
+ *  - Result text selection (streamed vs result field)
+ *  - Stale/cancelled result classification
+ */
+
+import type { StreamMsg } from './types.js';
+import { createLogger } from '../logger.js';
+
+const log = createLogger('DisplayPipeline');
+
+// ─── Display event types ────────────────────────────────────────────────────
+
+export interface ReasoningEvent {
+  type: 'reasoning';
+  /** Complete accumulated reasoning block. */
+  content: string;
+}
+
+export interface ToolCallEvent {
+  type: 'tool_call';
+  name: string;
+  args: Record<string, unknown>;
+  id: string;
+  /** The raw StreamMsg for consumers that need extra fields. */
+  raw: StreamMsg;
+}
+
+export interface ToolResultEvent {
+  type: 'tool_result';
+  toolCallId: string;
+  content: string;
+  isError: boolean;
+  raw: StreamMsg;
+}
+
+export interface TextEvent {
+  type: 'text';
+  /** Full accumulated assistant text for this turn. */
+  content: string;
+  /** Just this chunk's addition. */
+  delta: string;
+  /** Assistant message UUID (changes on multi-turn responses). */
+  uuid: string;
+}
+
+export interface CompleteEvent {
+  type: 'complete';
+  /** Final response text (after streamed-vs-result selection). */
+  text: string;
+  success: boolean;
+  error?: string;
+  stopReason?: string;
+  conversationId?: string;
+  runIds: string[];
+  durationMs?: number;
+  /** True if this is a stale duplicate result (same run fingerprint as last time). */
+  stale: boolean;
+  /** True if this result came from a cancelled run (should be discarded + retried). */
+  cancelled: boolean;
+  /** Whether any assistant text was accumulated during streaming. */
+  hadStreamedText: boolean;
+  /** The raw StreamMsg for consumers that need extra fields. */
+  raw: StreamMsg;
+}
+
+export interface ErrorEvent {
+  type: 'error';
+  message: string;
+  stopReason?: string;
+  apiError?: Record<string, unknown>;
+  runId?: string;
+}
+
+export interface RetryEvent {
+  type: 'retry';
+  attempt: number;
+  maxAttempts: number;
+  reason: string;
+  delayMs?: number;
+}
+
+export type DisplayEvent =
+  | ReasoningEvent
+  | ToolCallEvent
+  | ToolResultEvent
+  | TextEvent
+  | CompleteEvent
+  | ErrorEvent
+  | RetryEvent;
+
+// ─── Run fingerprinting (stale detection) ───────────────────────────────────
+
+function classifyResult(
+  convKey: string,
+  runIds: string[],
+  fingerprints: Map<string, string>,
+): 'fresh' | 'stale' | 'unknown' {
+  if (runIds.length === 0) return 'unknown';
+  const fingerprint = [...new Set(runIds)].sort().join(',');
+  const previous = fingerprints.get(convKey);
+  if (previous === fingerprint) {
+    log.warn(`Stale duplicate result detected (key=${convKey}, runIds=${fingerprint})`);
+    return 'stale';
+  }
+  fingerprints.set(convKey, fingerprint);
+  return 'fresh';
+}
+
+// ─── Helpers ────────────────────────────────────────────────────────────────
+
+function extractRunIds(msg: StreamMsg): string[] {
+  const ids: string[] = [];
+  const rawId = (msg as StreamMsg & { runId?: unknown; run_id?: unknown }).runId
+    ?? (msg as StreamMsg & { run_id?: unknown }).run_id;
+  if (typeof rawId === 'string' && rawId.trim()) ids.push(rawId.trim());
+
+  const rawIds = (msg as StreamMsg & { runIds?: unknown; run_ids?: unknown }).runIds
+    ?? (msg as StreamMsg & { run_ids?: unknown }).run_ids;
+  if (Array.isArray(rawIds)) {
+    for (const id of rawIds) {
+      if (typeof id === 'string' && id.trim()) ids.push(id.trim());
+    }
+  }
+  return ids.length > 0 ? [...new Set(ids)] : [];
+}
+
+// ─── Pipeline ───────────────────────────────────────────────────────────────
+
+export interface DisplayPipelineOptions {
+  /** Conversation key for stale-result detection. */
+  convKey: string;
+  /** Shared fingerprint map for stale-result detection (instance-level, not module-level). */
+  resultFingerprints: Map<string, string>;
+}
+
+/**
+ * Wraps an SDK stream (already deduped by session-manager) and yields
+ * clean DisplayEvents. All run-ID filtering, reasoning accumulation,
+ * and result classification happens inside.
+ */
+export async function* createDisplayPipeline(
+  stream: AsyncIterable<StreamMsg>,
+  opts: DisplayPipelineOptions,
+): AsyncGenerator<DisplayEvent> {
+  const { convKey, resultFingerprints } = opts;
+
+  // ── Foreground run tracking ──
+  let foregroundRunId: string | null = null;
+  let foregroundSource: string | null = null;
+
+  // ── Reasoning accumulation ──
+  let reasoningBuffer = '';
+
+  // ── Assistant text accumulation ──
+  let assistantText = '';
+  let lastAssistantUuid: string | null = null;
+  let lastSemanticType: string | null = null;
+
+  // ── All run IDs seen (for result) ──
+  const allRunIds = new Set<string>();
+
+  // ── Stats ──
+  let filteredCount = 0;
+
+  // ── Helpers ──
+  function* flushReasoning(): Generator<DisplayEvent> {
+    if (reasoningBuffer.trim()) {
+      yield { type: 'reasoning', content: reasoningBuffer };
+      reasoningBuffer = '';
+    }
+  }
+
+  // ── Main loop ──
+  for await (const msg of stream) {
+    const eventRunIds = extractRunIds(msg);
+    for (const id of eventRunIds) allRunIds.add(id);
+
+    // Skip stream_event (low-level deltas, not semantic)
+    if (msg.type === 'stream_event') continue;
+
+    log.trace(`raw: type=${msg.type} runIds=${eventRunIds.join(',') || 'none'} fg=${foregroundRunId || 'unlocked'}`);
+
+    // ── Run ID filtering ──
+    // Lock types: substantive events that prove this run is the foreground turn.
+    // Error/retry are excluded -- they're transient signals that could come
+    // from a failed run before the real foreground starts.
+    const isLockType = msg.type === 'reasoning' || msg.type === 'tool_call'
+      || msg.type === 'tool_result' || msg.type === 'assistant' || msg.type === 'result';
+
+    if (foregroundRunId === null && eventRunIds.length > 0 && isLockType) {
+      // Lock foreground on the first substantive event with a run ID.
+      // Background Tasks use separate sessions and cannot produce events in
+      // this stream, so the first run-scoped event is always from the current
+      // turn's run. This eliminates buffering delay -- reasoning and tool calls
+      // display immediately instead of waiting for the first assistant event.
+      foregroundRunId = eventRunIds[0];
+      foregroundSource = msg.type;
+      log.info(`Foreground run locked: ${foregroundRunId} (source=${foregroundSource})`);
+      // Fall through to type transitions and dispatch for immediate processing.
+    } else if (foregroundRunId === null && eventRunIds.length > 0 && !isLockType) {
+      // Pre-foreground error/retry events are filtered. If passed through,
+      // they set lastErrorDetail in the consumer and can spuriously trigger
+      // approval recovery or suppress legitimate retries.
+      filteredCount++;
+      continue;
+    } else if (foregroundRunId && eventRunIds.length > 0 && !eventRunIds.includes(foregroundRunId)) {
+      // Event from a different run. Rebind on assistant events only
+      // (background Tasks don't produce assistant events in the foreground stream).
+      if (msg.type === 'assistant') {
+        const newRunId = eventRunIds[0];
+        log.info(`Foreground run rebind: ${foregroundRunId} -> ${newRunId}`);
+        foregroundRunId = newRunId;
+        foregroundSource = 'assistant';
+      } else {
+        filteredCount++;
+        continue;
+      }
+    }
+
+    // ── Type transitions ──
+    // (stream_event is already `continue`d above, so all events here are semantic.)
+    if (lastSemanticType && lastSemanticType !== msg.type) {
+      if (lastSemanticType === 'reasoning') {
+        yield* flushReasoning();
+      }
+    }
+    lastSemanticType = msg.type;
+
+    // ── Dispatch by type ──
+    switch (msg.type) {
+      case 'reasoning': {
+        reasoningBuffer += msg.content || '';
+        break;
+      }
+
+      case 'tool_call': {
+        yield {
+          type: 'tool_call',
+          name: msg.toolName || 'unknown',
+          args: (msg.toolInput && typeof msg.toolInput === 'object' ? msg.toolInput : {}) as Record<string, unknown>,
+          id: msg.toolCallId || '',
+          raw: msg,
+        };
+        break;
+      }
+
+      case 'tool_result': {
+        yield {
+          type: 'tool_result',
+          toolCallId: msg.toolCallId || '',
+          content: typeof (msg as any).content === 'string'
+            ? (msg as any).content
+            : typeof (msg as any).result === 'string'
+              ? (msg as any).result
+              : '',
+          isError: !!msg.isError,
+          raw: msg,
+        };
+        break;
+      }
+
+      case 'assistant': {
+        const delta = msg.content || '';
+        const uuid = msg.uuid || '';
+        lastAssistantUuid = uuid || lastAssistantUuid;
+
+        assistantText += delta;
+        yield {
+          type: 'text',
+          content: assistantText,
+          delta,
+          uuid: lastAssistantUuid || '',
+        };
+        break;
+      }
+
+      case 'result': {
+        // Flush any remaining reasoning
+        yield* flushReasoning();
+
+        const resultText = typeof msg.result === 'string' ? msg.result : '';
+        const streamedTrimmed = assistantText.trim();
+        const resultTrimmed = resultText.trim();
+        const runIds = extractRunIds(msg);
+
+        // Result text selection: prefer streamed text over result field
+        let finalText = assistantText;
+        if (streamedTrimmed.length > 0 && resultTrimmed !== streamedTrimmed) {
+          // Diverged — prefer streamed (avoid n-1 desync)
+          log.warn(`Result diverges from streamed (resultLen=${resultText.length}, streamLen=${assistantText.length}), preferring streamed`);
+        } else if (streamedTrimmed.length === 0 && msg.success !== false && !msg.error) {
+          // No streamed text — use result as fallback
+          finalText = resultText;
+        }
+
+        // Classify
+        const cancelled = (msg as any).stopReason === 'cancelled';
+        const staleState = classifyResult(convKey, runIds.length > 0 ? runIds : [...allRunIds], resultFingerprints);
+        const stale = staleState === 'stale';
+
+        if (filteredCount > 0) {
+          log.info(`Filtered ${filteredCount} non-foreground event(s) (key=${convKey})`);
+        }
+
+        yield {
+          type: 'complete',
+          text: finalText,
+          success: msg.success !== false,
+          error: typeof msg.error === 'string' ? msg.error : undefined,
+          stopReason: typeof (msg as any).stopReason === 'string' ? (msg as any).stopReason : undefined,
+          conversationId: typeof (msg as any).conversationId === 'string' ? (msg as any).conversationId : undefined,
+          runIds: runIds.length > 0 ? runIds : [...allRunIds],
+          durationMs: typeof (msg as any).durationMs === 'number' ? (msg as any).durationMs : undefined,
+          stale,
+          cancelled,
+          hadStreamedText: streamedTrimmed.length > 0,
+          raw: msg,
+        };
+        break;
+      }
+
+      case 'error': {
+        yield {
+          type: 'error',
+          message: (msg as any).message || 'unknown',
+          stopReason: (msg as any).stopReason,
+          apiError: (msg as any).apiError,
+          runId: (msg as any).runId,
+        };
+        break;
+      }
+
+      case 'retry': {
+        yield {
+          type: 'retry',
+          attempt: (msg as any).attempt ?? 0,
+          maxAttempts: (msg as any).maxAttempts ?? 0,
+          reason: (msg as any).reason || 'unknown',
+          delayMs: (msg as any).delayMs,
+        };
+        break;
+      }
+
+      default:
+        // Unhandled event types — skip
+        break;
+    }
+  }
+
+  // Flush any trailing reasoning that wasn't followed by a type change
+  yield* flushReasoning();
+}
--- a/src/core/result-guard.test.ts
+++ b/src/core/result-guard.test.ts
@@ -154,6 +154,61 @@ describe('result divergence guard', () => {
    expect(sentTexts.some(text => text.includes('repeated CLI command failures'))).toBe(true);
  });

+  it('stops consuming stream and avoids retry after explicit tool-loop abort', async () => {
+    const bot = new LettaBot({
+      workingDir: workDir,
+      allowedTools: [],
+      maxToolCalls: 1,
+    });
+
+    const adapter = {
+      id: 'mock',
+      name: 'Mock',
+      start: vi.fn(async () => {}),
+      stop: vi.fn(async () => {}),
+      isRunning: vi.fn(() => true),
+      sendMessage: vi.fn(async (_msg: OutboundMessage) => ({ messageId: 'msg-1' })),
+      editMessage: vi.fn(async () => {}),
+      sendTypingIndicator: vi.fn(async () => {}),
+      stopTypingIndicator: vi.fn(async () => {}),
+      supportsEditing: vi.fn(() => false),
+      sendFile: vi.fn(async () => ({ messageId: 'file-1' })),
+    };
+
+    const runSession = vi.fn();
+    runSession.mockResolvedValueOnce({
+      session: { abort: vi.fn(async () => {}) },
+      stream: async function* () {
+        yield { type: 'tool_call', toolCallId: 'tc-1', toolName: 'Bash', toolInput: { command: 'echo hi' } };
+        // These trailing events should be ignored because the run was already aborted.
+        yield { type: 'assistant', content: 'late assistant text' };
+        yield { type: 'result', success: false, error: 'error', stopReason: 'cancelled', result: '' };
+      },
+    });
+    runSession.mockResolvedValueOnce({
+      session: { abort: vi.fn(async () => {}) },
+      stream: async function* () {
+        yield { type: 'assistant', content: 'retried response' };
+        yield { type: 'result', success: true, result: 'retried response' };
+      },
+    });
+    (bot as any).sessionManager.runSession = runSession;
+
+    const msg: InboundMessage = {
+      channel: 'discord',
+      chatId: 'chat-1',
+      userId: 'user-1',
+      text: 'hello',
+      timestamp: new Date(),
+    };
+
+    await (bot as any).processMessage(msg, adapter);
+
+    expect(runSession).toHaveBeenCalledTimes(1);
+    const sentTexts = adapter.sendMessage.mock.calls.map(([payload]) => payload.text);
+    expect(sentTexts).toEqual(['(Agent got stuck in a tool loop and was stopped. Try sending your message again.)']);
+  });
+
  it('does not deliver reasoning text from error results as the response', async () => {
    const bot = new LettaBot({
      workingDir: workDir,
@@ -255,7 +310,7 @@ describe('result divergence guard', () => {
    expect(sentTexts).toEqual(['Before tool. ', 'After tool.']);
  });

-  it('buffers pre-foreground run-scoped display events and drops non-foreground buffers', async () => {
+  it('locks foreground on first event with run ID and displays immediately', async () => {
    const bot = new LettaBot({
      workingDir: workDir,
      allowedTools: [],
@@ -276,11 +331,14 @@ describe('result divergence guard', () => {
      sendFile: vi.fn(async () => ({ messageId: 'file-1' })),
    };

+    // Reasoning and tool_call arrive before any assistant event. The pipeline
+    // locks foreground on the first event with a run ID (the reasoning event)
+    // and processes everything immediately -- no buffering.
    (bot as any).sessionManager.runSession = vi.fn(async () => ({
      session: { abort: vi.fn(async () => {}) },
      stream: async function* () {
-        yield { type: 'reasoning', content: 'background-thinking', runId: 'run-bg' };
-        yield { type: 'tool_call', toolCallId: 'tc-bg', toolName: 'Bash', toolInput: { command: 'echo leak' }, runId: 'run-bg' };
+        yield { type: 'reasoning', content: 'pre-tool thinking', runId: 'run-tool' };
+        yield { type: 'tool_call', toolCallId: 'tc-1', toolName: 'Bash', toolInput: { command: 'echo hi' }, runId: 'run-tool' };
        yield { type: 'assistant', content: 'main reply', runId: 'run-main' };
        yield { type: 'result', success: true, result: 'main reply', runIds: ['run-main'] };
      },
@@ -297,7 +355,9 @@ describe('result divergence guard', () => {
    await (bot as any).processMessage(msg, adapter);

    const sentTexts = adapter.sendMessage.mock.calls.map(([payload]) => payload.text);
-    expect(sentTexts).toEqual(['main reply']);
+    // Reasoning display + tool call display + main reply -- all immediate, no buffering
+    expect(sentTexts.length).toBe(3);
+    expect(sentTexts[2]).toBe('main reply');
  });

  it('retries once when a competing result arrives before any foreground terminal result', async () => {
--- a/src/core/sdk-session-contract.test.ts
+++ b/src/core/sdk-session-contract.test.ts
@@ -1100,6 +1100,81 @@ describe('SDK session contract', () => {
    expect(sentTexts).toContain('after retry');
  });

+  it('filters pre-foreground errors so they do not trigger false approval recovery', async () => {
+    const bot = new LettaBot({
+      workingDir: join(dataDir, 'working'),
+      allowedTools: [],
+    });
+
+    let runCall = 0;
+    (bot as any).sessionManager.runSession = vi.fn(async () => ({
+      session: { abort: vi.fn(async () => undefined) },
+      stream: async function* () {
+        if (runCall++ === 0) {
+          // Pre-foreground error is filtered by the pipeline -- it never
+          // reaches processMessage, so lastErrorDetail stays null and
+          // isApprovalConflict cannot fire.
+          yield {
+            type: 'error',
+            runId: 'run-bg',
+            message: 'CONFLICT: Cannot send a new message: waiting for approval',
+            stopReason: 'error',
+          };
+          yield { type: 'result', success: false, error: 'error', conversationId: 'conv-approval', runIds: ['run-main'] };
+          return;
+        }
+        // Retry succeeds
+        yield { type: 'assistant', content: 'after retry' };
+        yield { type: 'result', success: true, result: 'after retry', conversationId: 'conv-approval', runIds: ['run-main-2'] };
+      },
+    }));
+
+    vi.mocked(recoverOrphanedConversationApproval).mockResolvedValueOnce({
+      recovered: false,
+      details: 'No unresolved approval requests found',
+    });
+
+    const adapter = {
+      id: 'mock',
+      name: 'Mock',
+      start: vi.fn(async () => {}),
+      stop: vi.fn(async () => {}),
+      isRunning: vi.fn(() => true),
+      sendMessage: vi.fn(async (_payload: unknown) => ({ messageId: 'msg-1' })),
+      editMessage: vi.fn(async () => {}),
+      sendTypingIndicator: vi.fn(async () => {}),
+      stopTypingIndicator: vi.fn(async () => {}),
+      supportsEditing: vi.fn(() => false),
+      sendFile: vi.fn(async () => ({ messageId: 'file-1' })),
+    };
+
+    const msg = {
+      channel: 'discord',
+      chatId: 'chat-1',
+      userId: 'user-1',
+      text: 'hello',
+      timestamp: new Date(),
+    };
+
+    await (bot as any).processMessage(msg, adapter);
+
+    // The pre-foreground error is filtered, so lastErrorDetail is null.
+    // The result (success=false, nothing delivered) triggers shouldRetryForErrorResult,
+    // NOT isApprovalConflict. The retry goes through the error-result path with
+    // orphaned approval recovery, then retries and succeeds.
+    expect((bot as any).sessionManager.runSession).toHaveBeenCalledTimes(2);
+    // Approval recovery should have been attempted via the error-result path
+    expect(recoverOrphanedConversationApproval).toHaveBeenCalledWith(
+      'agent-contract-test',
+      'conv-approval',
+    );
+    const sentTexts = adapter.sendMessage.mock.calls.map((call) => {
+      const payload = call[0] as { text?: string };
+      return payload.text;
+    });
+    expect(sentTexts).toContain('after retry');
+  });
+
  it('uses agent-level recovery for default conversation alias on terminal approval conflict', async () => {
    const bot = new LettaBot({
      workingDir: join(dataDir, 'working'),
--- a/src/tools/letta-api.ts
+++ b/src/tools/letta-api.ts
@@ -542,6 +542,12 @@ export async function rejectApproval(
      log.warn(`Approval already resolved for tool call ${approval.toolCallId}`);
      return true;
    }
+    // Re-throw rate limit errors so callers can bail out early instead of
+    // hammering the API in a tight loop.
+    if (err?.status === 429) {
+      log.error('Failed to reject approval:', e);
+      throw e;
+    }
    log.error('Failed to reject approval:', e);
    return false;
  }