fix(listen): preserve interrupt error status through next-turn persistence (#1294)

2026-03-05 22:29:08 -08:00
parent cc6f754ca3
commit 52f2cc9924
6 changed files with 918 additions and 58 deletions
--- a/src/tests/agent/approval-result-normalization.test.ts
+++ b/src/tests/agent/approval-result-normalization.test.ts
@@ -0,0 +1,103 @@
+import { describe, expect, test } from "bun:test";
+import type { ApprovalCreate } from "@letta-ai/letta-client/resources/agents/messages";
+import type { ApprovalResult } from "../../agent/approval-execution";
+import {
+  normalizeApprovalResultsForPersistence,
+  normalizeOutgoingApprovalMessages,
+} from "../../agent/approval-result-normalization";
+import { INTERRUPTED_BY_USER } from "../../constants";
+
+describe("normalizeApprovalResultsForPersistence", () => {
+  test("forces status=error for structured interrupted tool_call_ids", () => {
+    const approvals: ApprovalResult[] = [
+      {
+        type: "tool",
+        tool_call_id: "call-1",
+        tool_return: "some return",
+        status: "success",
+      } as ApprovalResult,
+    ];
+
+    const normalized = normalizeApprovalResultsForPersistence(approvals, {
+      interruptedToolCallIds: ["call-1"],
+    });
+
+    expect(normalized[0]).toMatchObject({
+      type: "tool",
+      tool_call_id: "call-1",
+      status: "error",
+    });
+  });
+
+  test("does not modify non-interrupted tool results", () => {
+    const approvals: ApprovalResult[] = [
+      {
+        type: "tool",
+        tool_call_id: "call-2",
+        tool_return: "ok",
+        status: "success",
+      } as ApprovalResult,
+    ];
+
+    const normalized = normalizeApprovalResultsForPersistence(approvals, {
+      interruptedToolCallIds: ["other-id"],
+    });
+
+    expect(normalized[0]).toMatchObject({
+      type: "tool",
+      tool_call_id: "call-2",
+      status: "success",
+    });
+  });
+
+  test("supports legacy fallback on interrupt text when explicitly enabled", () => {
+    const approvals: ApprovalResult[] = [
+      {
+        type: "tool",
+        tool_call_id: "call-3",
+        tool_return: [{ type: "text", text: INTERRUPTED_BY_USER }],
+        status: "success",
+      } as ApprovalResult,
+    ];
+
+    const normalized = normalizeApprovalResultsForPersistence(approvals, {
+      allowInterruptTextFallback: true,
+    });
+
+    expect(normalized[0]).toMatchObject({
+      type: "tool",
+      tool_call_id: "call-3",
+      status: "error",
+    });
+  });
+});
+
+describe("normalizeOutgoingApprovalMessages", () => {
+  test("normalizes approvals and preserves non-approval messages", () => {
+    const approvalMessage: ApprovalCreate = {
+      type: "approval",
+      approvals: [
+        {
+          type: "tool",
+          tool_call_id: "call-7",
+          tool_return: "foo",
+          status: "success",
+        } as ApprovalResult,
+      ],
+    };
+
+    const messages = normalizeOutgoingApprovalMessages(
+      [{ role: "user", content: "hello" }, approvalMessage],
+      { interruptedToolCallIds: ["call-7"] },
+    );
+
+    expect(messages[0]).toMatchObject({ role: "user", content: "hello" });
+    const normalizedApproval = messages[1] as ApprovalCreate;
+    const approvals = normalizedApproval.approvals ?? [];
+    expect(approvals[0]).toMatchObject({
+      type: "tool",
+      tool_call_id: "call-7",
+      status: "error",
+    });
+  });
+});
--- a/src/tests/websocket/listen-client-protocol.test.ts
+++ b/src/tests/websocket/listen-client-protocol.test.ts
@@ -1,5 +1,8 @@
 import { describe, expect, test } from "bun:test";
+import type { ApprovalCreate } from "@letta-ai/letta-client/resources/agents/messages";
 import WebSocket from "ws";
+import { buildConversationMessagesCreateRequestBody } from "../../agent/message";
+import { INTERRUPTED_BY_USER } from "../../constants";
 import type { ControlRequest, ControlResponseBody } from "../../types/protocol";
 import {
  __listenClientTestUtils,
@@ -640,3 +643,159 @@ describe("listen-client post-stop approval recovery policy", () => {
    expect(shouldRecover).toBe(false);
  });
 });
+
+describe("listen-client interrupt persistence normalization", () => {
+  test("forces interrupted in-flight tool results to status=error when cancelRequested", () => {
+    const runtime = __listenClientTestUtils.createRuntime();
+    runtime.cancelRequested = true;
+
+    const normalized =
+      __listenClientTestUtils.normalizeExecutionResultsForInterruptParity(
+        runtime,
+        [
+          {
+            type: "tool",
+            tool_call_id: "tool-1",
+            tool_return: "Interrupted by user",
+            status: "success",
+          },
+        ],
+        ["tool-1"],
+      );
+
+    expect(normalized).toEqual([
+      {
+        type: "tool",
+        tool_call_id: "tool-1",
+        tool_return: "Interrupted by user",
+        status: "error",
+      },
+    ]);
+  });
+
+  test("leaves tool status unchanged when not in cancel flow", () => {
+    const runtime = __listenClientTestUtils.createRuntime();
+    runtime.cancelRequested = false;
+
+    const normalized =
+      __listenClientTestUtils.normalizeExecutionResultsForInterruptParity(
+        runtime,
+        [
+          {
+            type: "tool",
+            tool_call_id: "tool-1",
+            tool_return: "Interrupted by user",
+            status: "success",
+          },
+        ],
+        ["tool-1"],
+      );
+
+    expect(normalized).toEqual([
+      {
+        type: "tool",
+        tool_call_id: "tool-1",
+        tool_return: "Interrupted by user",
+        status: "success",
+      },
+    ]);
+  });
+});
+
+describe("listen-client interrupt persistence request body", () => {
+  test("post-interrupt next-turn payload keeps interrupted tool returns as status=error", () => {
+    const runtime = __listenClientTestUtils.createRuntime();
+    const consumedAgentId = "agent-1";
+    const consumedConversationId = "default";
+
+    __listenClientTestUtils.populateInterruptQueue(runtime, {
+      lastExecutionResults: null,
+      lastExecutingToolCallIds: ["call-running-1"],
+      lastNeedsUserInputToolCallIds: [],
+      agentId: consumedAgentId,
+      conversationId: consumedConversationId,
+    });
+
+    const consumed = __listenClientTestUtils.consumeInterruptQueue(
+      runtime,
+      consumedAgentId,
+      consumedConversationId,
+    );
+
+    expect(consumed).not.toBeNull();
+    if (!consumed) {
+      throw new Error("Expected queued interrupt approvals to be consumed");
+    }
+
+    const requestBody = buildConversationMessagesCreateRequestBody(
+      consumedConversationId,
+      [
+        consumed.approvalMessage,
+        {
+          type: "message",
+          role: "user",
+          content: "next user message after interrupt",
+        },
+      ],
+      {
+        agentId: consumedAgentId,
+        streamTokens: true,
+        background: true,
+        approvalNormalization: {
+          interruptedToolCallIds: consumed.interruptedToolCallIds,
+        },
+      },
+      [],
+    );
+
+    const approvalMessage = requestBody.messages[0] as ApprovalCreate;
+    expect(approvalMessage.type).toBe("approval");
+    expect(approvalMessage.approvals?.[0]).toMatchObject({
+      type: "tool",
+      tool_call_id: "call-running-1",
+      tool_return: INTERRUPTED_BY_USER,
+      status: "error",
+    });
+  });
+});
+
+describe("listen-client tool_return wire normalization", () => {
+  test("normalizes legacy top-level tool return fields to canonical tool_returns[]", () => {
+    const normalized = __listenClientTestUtils.normalizeToolReturnWireMessage({
+      message_type: "tool_return_message",
+      id: "message-1",
+      run_id: "run-1",
+      tool_call_id: "call-1",
+      status: "error",
+      tool_return: [{ type: "text", text: "Interrupted by user" }],
+    });
+
+    expect(normalized).toEqual({
+      message_type: "tool_return_message",
+      id: "message-1",
+      run_id: "run-1",
+      tool_returns: [
+        {
+          tool_call_id: "call-1",
+          status: "error",
+          tool_return: "Interrupted by user",
+        },
+      ],
+    });
+    expect(normalized).not.toHaveProperty("tool_call_id");
+    expect(normalized).not.toHaveProperty("status");
+    expect(normalized).not.toHaveProperty("tool_return");
+  });
+
+  test("returns null for tool_return_message when no canonical status is available", () => {
+    const normalized = __listenClientTestUtils.normalizeToolReturnWireMessage({
+      message_type: "tool_return_message",
+      id: "message-2",
+      run_id: "run-2",
+      tool_call_id: "call-2",
+      tool_return: "maybe done",
+    });
+
+    expect(normalized).toBeNull();
+  });
+});
--- a/src/tests/websocket/listen-interrupt-queue.test.ts
+++ b/src/tests/websocket/listen-interrupt-queue.test.ts
@@ -62,12 +62,14 @@ describe("ListenerRuntime interrupt queue fields", () => {
    const runtime = createRuntime();
    expect(runtime.pendingInterruptedResults).toBeNull();
    expect(runtime.pendingInterruptedContext).toBeNull();
+    expect(runtime.pendingInterruptedToolCallIds).toBeNull();
+    expect(runtime.activeExecutingToolCallIds).toEqual([]);
    expect(runtime.continuationEpoch).toBe(0);
  });
 });

 describe("stopRuntime teardown", () => {
-  test("clears pendingInterruptedResults, context, and batch map", () => {
+  test("clears pendingInterruptedResults, context, ids, and batch map", () => {
    const runtime = createRuntime();
    runtime.socket = new MockSocket(WebSocket.OPEN) as unknown as WebSocket;

@@ -84,12 +86,16 @@ describe("stopRuntime teardown", () => {
      conversationId: "conv-1",
      continuationEpoch: 0,
    };
+    runtime.pendingInterruptedToolCallIds = ["call-1"];
+    runtime.activeExecutingToolCallIds = ["call-1"];
    runtime.pendingApprovalBatchByToolCallId.set("call-1", "batch-1");

    stopRuntime(runtime, true);

    expect(runtime.pendingInterruptedResults).toBeNull();
    expect(runtime.pendingInterruptedContext).toBeNull();
+    expect(runtime.pendingInterruptedToolCallIds).toBeNull();
+    expect(runtime.activeExecutingToolCallIds).toEqual([]);
    expect(runtime.pendingApprovalBatchByToolCallId.size).toBe(0);
  });

@@ -178,6 +184,29 @@ describe("extractInterruptToolReturns", () => {
    ]);
  });

+  test("converts multimodal tool_return content into displayable text", () => {
+    const results: ApprovalResult[] = [
+      {
+        type: "tool",
+        tool_call_id: "call-multimodal",
+        status: "error",
+        tool_return: [
+          { type: "text", text: "Interrupted by user" },
+          { type: "image", image_url: "https://example.com/image.png" },
+        ],
+      } as ApprovalResult,
+    ];
+
+    const mapped = extractInterruptToolReturns(results);
+    expect(mapped).toEqual([
+      {
+        tool_call_id: "call-multimodal",
+        status: "error",
+        tool_return: "Interrupted by user",
+      },
+    ]);
+  });
+
  test("emitInterruptToolReturnMessage emits deterministic per-tool terminal messages", () => {
    const runtime = createRuntime();
    const socket = new MockSocket(WebSocket.OPEN) as unknown as WebSocket;
@@ -208,16 +237,26 @@ describe("extractInterruptToolReturns", () => {
    expect(toolReturnFrames).toHaveLength(2);
    expect(toolReturnFrames[0]).toMatchObject({
      run_id: "run-1",
-      tool_call_id: "call-a",
-      status: "success",
-      tool_returns: [{ tool_call_id: "call-a", status: "success" }],
+      tool_returns: [
+        { tool_call_id: "call-a", status: "success", tool_return: "704" },
+      ],
    });
    expect(toolReturnFrames[1]).toMatchObject({
      run_id: "run-1",
-      tool_call_id: "call-b",
-      status: "error",
-      tool_returns: [{ tool_call_id: "call-b", status: "error" }],
+      tool_returns: [
+        {
+          tool_call_id: "call-b",
+          status: "error",
+          tool_return: "User interrupted the stream",
+        },
+      ],
    });
+    expect(toolReturnFrames[0]).not.toHaveProperty("tool_call_id");
+    expect(toolReturnFrames[0]).not.toHaveProperty("status");
+    expect(toolReturnFrames[0]).not.toHaveProperty("tool_return");
+    expect(toolReturnFrames[1]).not.toHaveProperty("tool_call_id");
+    expect(toolReturnFrames[1]).not.toHaveProperty("status");
+    expect(toolReturnFrames[1]).not.toHaveProperty("tool_return");
  });
 });

@@ -305,13 +344,14 @@ describe("Path A: cancel during tool execution → next turn consumes actual res
    // Cancel fires: populateInterruptQueue (Path A — has execution results)
    const populated = populateInterruptQueue(runtime, {
      lastExecutionResults: executionResults,
+      lastExecutingToolCallIds: [],
      lastNeedsUserInputToolCallIds: ["call-1", "call-2"],
      agentId,
      conversationId,
    });

    expect(populated).toBe(true);
-    expect(runtime.pendingInterruptedResults).toBe(executionResults);
+    expect(runtime.pendingInterruptedResults).toEqual(executionResults);
    expect(runtime.pendingInterruptedContext).toMatchObject({
      agentId,
      conversationId,
@@ -322,9 +362,10 @@ describe("Path A: cancel during tool execution → next turn consumes actual res
    const consumed = consumeInterruptQueue(runtime, agentId, conversationId);

    expect(consumed).not.toBeNull();
-    expect(consumed?.type).toBe("approval");
-    expect(consumed?.approvals).toBe(executionResults);
-    expect(consumed?.approvals).toHaveLength(2);
+    expect(consumed?.approvalMessage.type).toBe("approval");
+    expect(consumed?.approvalMessage.approvals).toEqual(executionResults);
+    expect(consumed?.approvalMessage.approvals).toHaveLength(2);
+    expect(consumed?.interruptedToolCallIds).toEqual([]);

    // Queue is atomically cleared after consumption
    expect(runtime.pendingInterruptedResults).toBeNull();
@@ -342,6 +383,7 @@ describe("Path A: cancel during tool execution → next turn consumes actual res

    const populated = populateInterruptQueue(runtime, {
      lastExecutionResults: executionResults,
+      lastExecutingToolCallIds: [],
      lastNeedsUserInputToolCallIds: ["call-1"],
      agentId: "agent-1",
      conversationId: "conv-1",
@@ -353,9 +395,87 @@ describe("Path A: cancel during tool execution → next turn consumes actual res
      approve: true, // Path A preserves actual approval state
    });
  });
+
+  test("normalizes interrupted tool results to error via structured tool_call_id", () => {
+    const runtime = createRuntime();
+    const executionResults: ApprovalResult[] = [
+      {
+        type: "tool",
+        tool_call_id: "call-1",
+        status: "success",
+        tool_return: "result text does not matter when ID is interrupted",
+      } as unknown as ApprovalResult,
+    ];
+
+    const populated = populateInterruptQueue(runtime, {
+      lastExecutionResults: executionResults,
+      lastExecutingToolCallIds: ["call-1"],
+      lastNeedsUserInputToolCallIds: [],
+      agentId: "agent-1",
+      conversationId: "conv-1",
+    });
+
+    expect(populated).toBe(true);
+    expect(runtime.pendingInterruptedResults?.[0]).toMatchObject({
+      type: "tool",
+      tool_call_id: "call-1",
+      status: "error",
+    });
+    expect(runtime.pendingInterruptedToolCallIds).toEqual(["call-1"]);
+  });
+
+  test("keeps legacy text fallback for interrupted tool return normalization", () => {
+    const runtime = createRuntime();
+    const executionResults: ApprovalResult[] = [
+      {
+        type: "tool",
+        tool_call_id: "call-legacy",
+        status: "success",
+        tool_return: [{ type: "text", text: "Interrupted by user" }],
+      } as unknown as ApprovalResult,
+    ];
+
+    const populated = populateInterruptQueue(runtime, {
+      lastExecutionResults: executionResults,
+      lastExecutingToolCallIds: [],
+      lastNeedsUserInputToolCallIds: [],
+      agentId: "agent-1",
+      conversationId: "conv-1",
+    });
+
+    expect(populated).toBe(true);
+    expect(runtime.pendingInterruptedResults?.[0]).toMatchObject({
+      type: "tool",
+      tool_call_id: "call-legacy",
+      status: "error",
+    });
+  });
 });

 describe("Path B: cancel during approval wait → next turn consumes synthesized denials", () => {
+  test("prefers synthesized tool-error results when execution was already in-flight", () => {
+    const runtime = createRuntime();
+
+    const populated = populateInterruptQueue(runtime, {
+      lastExecutionResults: null,
+      lastExecutingToolCallIds: ["call-running-1"],
+      lastNeedsUserInputToolCallIds: ["call-running-1"],
+      agentId: "agent-1",
+      conversationId: "conv-1",
+    });
+
+    expect(populated).toBe(true);
+    expect(runtime.pendingInterruptedResults).toEqual([
+      {
+        type: "tool",
+        tool_call_id: "call-running-1",
+        tool_return: "Interrupted by user",
+        status: "error",
+      },
+    ]);
+    expect(runtime.pendingInterruptedToolCallIds).toEqual(["call-running-1"]);
+  });
+
  test("full sequence: populate from batch map IDs → consume synthesized denials", () => {
    const runtime = createRuntime();
    const agentId = "agent-abc";
@@ -371,6 +491,7 @@ describe("Path B: cancel during approval wait → next turn consumes synthesized
    // Cancel fires during approval wait: no execution results
    const populated = populateInterruptQueue(runtime, {
      lastExecutionResults: null,
+      lastExecutingToolCallIds: [],
      lastNeedsUserInputToolCallIds: [],
      agentId,
      conversationId,
@@ -397,7 +518,7 @@ describe("Path B: cancel during approval wait → next turn consumes synthesized
    // Next user message: consume
    const consumed = consumeInterruptQueue(runtime, agentId, conversationId);
    expect(consumed).not.toBeNull();
-    expect(consumed?.approvals).toHaveLength(2);
+    expect(consumed?.approvalMessage.approvals).toHaveLength(2);

    // Queue cleared
    expect(runtime.pendingInterruptedResults).toBeNull();
@@ -409,6 +530,7 @@ describe("Path B: cancel during approval wait → next turn consumes synthesized
    // No batch map entries, but we have the snapshot IDs
    const populated = populateInterruptQueue(runtime, {
      lastExecutionResults: null,
+      lastExecutingToolCallIds: [],
      lastNeedsUserInputToolCallIds: ["call-a", "call-b"],
      agentId: "agent-1",
      conversationId: "conv-1",
@@ -427,6 +549,7 @@ describe("Path B: cancel during approval wait → next turn consumes synthesized

    const populated = populateInterruptQueue(runtime, {
      lastExecutionResults: null,
+      lastExecutingToolCallIds: [],
      lastNeedsUserInputToolCallIds: [],
      agentId: "agent-1",
      conversationId: "conv-1",
@@ -453,6 +576,7 @@ describe("post-cancel next turn: queue consumed exactly once (no error loop)", (
          reason: "cancelled",
        },
      ],
+      lastExecutingToolCallIds: [],
      lastNeedsUserInputToolCallIds: [],
      agentId,
      conversationId: convId,
@@ -476,6 +600,7 @@ describe("post-cancel next turn: queue consumed exactly once (no error loop)", (
      lastExecutionResults: [
        { type: "approval", tool_call_id: "call-1", approve: true },
      ],
+      lastExecutingToolCallIds: [],
      lastNeedsUserInputToolCallIds: [],
      agentId,
      conversationId: convId,
@@ -496,6 +621,7 @@ describe("idempotency: first cancel populates, second is no-op", () => {
      lastExecutionResults: [
        { type: "approval", tool_call_id: "call-first", approve: true },
      ],
+      lastExecutingToolCallIds: [],
      lastNeedsUserInputToolCallIds: [],
      agentId: "agent-1",
      conversationId: "conv-1",
@@ -511,6 +637,7 @@ describe("idempotency: first cancel populates, second is no-op", () => {
          reason: "x",
        },
      ],
+      lastExecutingToolCallIds: [],
      lastNeedsUserInputToolCallIds: [],
      agentId: "agent-1",
      conversationId: "conv-1",
@@ -530,6 +657,7 @@ describe("idempotency: first cancel populates, second is no-op", () => {
      lastExecutionResults: [
        { type: "approval", tool_call_id: "call-1", approve: true },
      ],
+      lastExecutingToolCallIds: [],
      lastNeedsUserInputToolCallIds: [],
      agentId: "agent-1",
      conversationId: "conv-1",
@@ -543,6 +671,7 @@ describe("idempotency: first cancel populates, second is no-op", () => {
      lastExecutionResults: [
        { type: "approval", tool_call_id: "call-2", approve: true },
      ],
+      lastExecutingToolCallIds: [],
      lastNeedsUserInputToolCallIds: [],
      agentId: "agent-1",
      conversationId: "conv-1",
@@ -564,6 +693,7 @@ describe("epoch guard: stale context discarded on consume", () => {
      lastExecutionResults: [
        { type: "approval", tool_call_id: "call-1", approve: true },
      ],
+      lastExecutingToolCallIds: [],
      lastNeedsUserInputToolCallIds: [],
      agentId: "agent-1",
      conversationId: "conv-1",
@@ -587,6 +717,7 @@ describe("epoch guard: stale context discarded on consume", () => {
      lastExecutionResults: [
        { type: "approval", tool_call_id: "call-1", approve: true },
      ],
+      lastExecutingToolCallIds: [],
      lastNeedsUserInputToolCallIds: [],
      agentId: "agent-old",
      conversationId: "conv-1",
@@ -605,6 +736,7 @@ describe("epoch guard: stale context discarded on consume", () => {
      lastExecutionResults: [
        { type: "approval", tool_call_id: "call-1", approve: true },
      ],
+      lastExecutingToolCallIds: [],
      lastNeedsUserInputToolCallIds: [],
      agentId: "agent-1",
      conversationId: "conv-old",
@@ -623,6 +755,7 @@ describe("stale Path-B IDs: clearing after successful send prevents re-denial",
    // Also batch map should be cleared by clearPendingApprovalBatchIds
    const populated = populateInterruptQueue(runtime, {
      lastExecutionResults: null,
+      lastExecutingToolCallIds: [],
      lastNeedsUserInputToolCallIds: [], // cleared after send
      agentId: "agent-1",
      conversationId: "conv-1",
@@ -640,6 +773,7 @@ describe("stale Path-B IDs: clearing after successful send prevents re-denial",

    const populated = populateInterruptQueue(runtime, {
      lastExecutionResults: null,
+      lastExecutingToolCallIds: [],
      lastNeedsUserInputToolCallIds: [], // cleared from previous send
      agentId: "agent-1",
      conversationId: "conv-1",
@@ -716,6 +850,7 @@ describe("consume clears pendingApprovalBatchByToolCallId", () => {
      lastExecutionResults: [
        { type: "approval", tool_call_id: "call-1", approve: true },
      ],
+      lastExecutingToolCallIds: [],
      lastNeedsUserInputToolCallIds: [],
      agentId: "agent-1",
      conversationId: "conv-1",
@@ -734,6 +869,7 @@ describe("consume clears pendingApprovalBatchByToolCallId", () => {
      lastExecutionResults: [
        { type: "approval", tool_call_id: "call-1", approve: true },
      ],
+      lastExecutingToolCallIds: [],
      lastNeedsUserInputToolCallIds: [],
      agentId: "agent-old",
      conversationId: "conv-old",