fix(listen): preserve interrupt error status through next-turn persistence (#1294)

This commit is contained in:
Charles Packer
2026-03-05 22:29:08 -08:00
committed by GitHub
parent cc6f754ca3
commit 52f2cc9924
6 changed files with 918 additions and 58 deletions

View File

@@ -0,0 +1,103 @@
import { describe, expect, test } from "bun:test";
import type { ApprovalCreate } from "@letta-ai/letta-client/resources/agents/messages";
import type { ApprovalResult } from "../../agent/approval-execution";
import {
normalizeApprovalResultsForPersistence,
normalizeOutgoingApprovalMessages,
} from "../../agent/approval-result-normalization";
import { INTERRUPTED_BY_USER } from "../../constants";
describe("normalizeApprovalResultsForPersistence", () => {
test("forces status=error for structured interrupted tool_call_ids", () => {
const approvals: ApprovalResult[] = [
{
type: "tool",
tool_call_id: "call-1",
tool_return: "some return",
status: "success",
} as ApprovalResult,
];
const normalized = normalizeApprovalResultsForPersistence(approvals, {
interruptedToolCallIds: ["call-1"],
});
expect(normalized[0]).toMatchObject({
type: "tool",
tool_call_id: "call-1",
status: "error",
});
});
test("does not modify non-interrupted tool results", () => {
const approvals: ApprovalResult[] = [
{
type: "tool",
tool_call_id: "call-2",
tool_return: "ok",
status: "success",
} as ApprovalResult,
];
const normalized = normalizeApprovalResultsForPersistence(approvals, {
interruptedToolCallIds: ["other-id"],
});
expect(normalized[0]).toMatchObject({
type: "tool",
tool_call_id: "call-2",
status: "success",
});
});
test("supports legacy fallback on interrupt text when explicitly enabled", () => {
const approvals: ApprovalResult[] = [
{
type: "tool",
tool_call_id: "call-3",
tool_return: [{ type: "text", text: INTERRUPTED_BY_USER }],
status: "success",
} as ApprovalResult,
];
const normalized = normalizeApprovalResultsForPersistence(approvals, {
allowInterruptTextFallback: true,
});
expect(normalized[0]).toMatchObject({
type: "tool",
tool_call_id: "call-3",
status: "error",
});
});
});
describe("normalizeOutgoingApprovalMessages", () => {
test("normalizes approvals and preserves non-approval messages", () => {
const approvalMessage: ApprovalCreate = {
type: "approval",
approvals: [
{
type: "tool",
tool_call_id: "call-7",
tool_return: "foo",
status: "success",
} as ApprovalResult,
],
};
const messages = normalizeOutgoingApprovalMessages(
[{ role: "user", content: "hello" }, approvalMessage],
{ interruptedToolCallIds: ["call-7"] },
);
expect(messages[0]).toMatchObject({ role: "user", content: "hello" });
const normalizedApproval = messages[1] as ApprovalCreate;
const approvals = normalizedApproval.approvals ?? [];
expect(approvals[0]).toMatchObject({
type: "tool",
tool_call_id: "call-7",
status: "error",
});
});
});

View File

@@ -1,5 +1,8 @@
import { describe, expect, test } from "bun:test";
import type { ApprovalCreate } from "@letta-ai/letta-client/resources/agents/messages";
import WebSocket from "ws";
import { buildConversationMessagesCreateRequestBody } from "../../agent/message";
import { INTERRUPTED_BY_USER } from "../../constants";
import type { ControlRequest, ControlResponseBody } from "../../types/protocol";
import {
__listenClientTestUtils,
@@ -640,3 +643,159 @@ describe("listen-client post-stop approval recovery policy", () => {
expect(shouldRecover).toBe(false);
});
});
describe("listen-client interrupt persistence normalization", () => {
test("forces interrupted in-flight tool results to status=error when cancelRequested", () => {
const runtime = __listenClientTestUtils.createRuntime();
runtime.cancelRequested = true;
const normalized =
__listenClientTestUtils.normalizeExecutionResultsForInterruptParity(
runtime,
[
{
type: "tool",
tool_call_id: "tool-1",
tool_return: "Interrupted by user",
status: "success",
},
],
["tool-1"],
);
expect(normalized).toEqual([
{
type: "tool",
tool_call_id: "tool-1",
tool_return: "Interrupted by user",
status: "error",
},
]);
});
test("leaves tool status unchanged when not in cancel flow", () => {
const runtime = __listenClientTestUtils.createRuntime();
runtime.cancelRequested = false;
const normalized =
__listenClientTestUtils.normalizeExecutionResultsForInterruptParity(
runtime,
[
{
type: "tool",
tool_call_id: "tool-1",
tool_return: "Interrupted by user",
status: "success",
},
],
["tool-1"],
);
expect(normalized).toEqual([
{
type: "tool",
tool_call_id: "tool-1",
tool_return: "Interrupted by user",
status: "success",
},
]);
});
});
describe("listen-client interrupt persistence request body", () => {
test("post-interrupt next-turn payload keeps interrupted tool returns as status=error", () => {
const runtime = __listenClientTestUtils.createRuntime();
const consumedAgentId = "agent-1";
const consumedConversationId = "default";
__listenClientTestUtils.populateInterruptQueue(runtime, {
lastExecutionResults: null,
lastExecutingToolCallIds: ["call-running-1"],
lastNeedsUserInputToolCallIds: [],
agentId: consumedAgentId,
conversationId: consumedConversationId,
});
const consumed = __listenClientTestUtils.consumeInterruptQueue(
runtime,
consumedAgentId,
consumedConversationId,
);
expect(consumed).not.toBeNull();
if (!consumed) {
throw new Error("Expected queued interrupt approvals to be consumed");
}
const requestBody = buildConversationMessagesCreateRequestBody(
consumedConversationId,
[
consumed.approvalMessage,
{
type: "message",
role: "user",
content: "next user message after interrupt",
},
],
{
agentId: consumedAgentId,
streamTokens: true,
background: true,
approvalNormalization: {
interruptedToolCallIds: consumed.interruptedToolCallIds,
},
},
[],
);
const approvalMessage = requestBody.messages[0] as ApprovalCreate;
expect(approvalMessage.type).toBe("approval");
expect(approvalMessage.approvals?.[0]).toMatchObject({
type: "tool",
tool_call_id: "call-running-1",
tool_return: INTERRUPTED_BY_USER,
status: "error",
});
});
});
describe("listen-client tool_return wire normalization", () => {
test("normalizes legacy top-level tool return fields to canonical tool_returns[]", () => {
const normalized = __listenClientTestUtils.normalizeToolReturnWireMessage({
message_type: "tool_return_message",
id: "message-1",
run_id: "run-1",
tool_call_id: "call-1",
status: "error",
tool_return: [{ type: "text", text: "Interrupted by user" }],
});
expect(normalized).toEqual({
message_type: "tool_return_message",
id: "message-1",
run_id: "run-1",
tool_returns: [
{
tool_call_id: "call-1",
status: "error",
tool_return: "Interrupted by user",
},
],
});
expect(normalized).not.toHaveProperty("tool_call_id");
expect(normalized).not.toHaveProperty("status");
expect(normalized).not.toHaveProperty("tool_return");
});
test("returns null for tool_return_message when no canonical status is available", () => {
const normalized = __listenClientTestUtils.normalizeToolReturnWireMessage({
message_type: "tool_return_message",
id: "message-2",
run_id: "run-2",
tool_call_id: "call-2",
tool_return: "maybe done",
});
expect(normalized).toBeNull();
});
});

View File

@@ -62,12 +62,14 @@ describe("ListenerRuntime interrupt queue fields", () => {
const runtime = createRuntime();
expect(runtime.pendingInterruptedResults).toBeNull();
expect(runtime.pendingInterruptedContext).toBeNull();
expect(runtime.pendingInterruptedToolCallIds).toBeNull();
expect(runtime.activeExecutingToolCallIds).toEqual([]);
expect(runtime.continuationEpoch).toBe(0);
});
});
describe("stopRuntime teardown", () => {
test("clears pendingInterruptedResults, context, and batch map", () => {
test("clears pendingInterruptedResults, context, ids, and batch map", () => {
const runtime = createRuntime();
runtime.socket = new MockSocket(WebSocket.OPEN) as unknown as WebSocket;
@@ -84,12 +86,16 @@ describe("stopRuntime teardown", () => {
conversationId: "conv-1",
continuationEpoch: 0,
};
runtime.pendingInterruptedToolCallIds = ["call-1"];
runtime.activeExecutingToolCallIds = ["call-1"];
runtime.pendingApprovalBatchByToolCallId.set("call-1", "batch-1");
stopRuntime(runtime, true);
expect(runtime.pendingInterruptedResults).toBeNull();
expect(runtime.pendingInterruptedContext).toBeNull();
expect(runtime.pendingInterruptedToolCallIds).toBeNull();
expect(runtime.activeExecutingToolCallIds).toEqual([]);
expect(runtime.pendingApprovalBatchByToolCallId.size).toBe(0);
});
@@ -178,6 +184,29 @@ describe("extractInterruptToolReturns", () => {
]);
});
test("converts multimodal tool_return content into displayable text", () => {
const results: ApprovalResult[] = [
{
type: "tool",
tool_call_id: "call-multimodal",
status: "error",
tool_return: [
{ type: "text", text: "Interrupted by user" },
{ type: "image", image_url: "https://example.com/image.png" },
],
} as ApprovalResult,
];
const mapped = extractInterruptToolReturns(results);
expect(mapped).toEqual([
{
tool_call_id: "call-multimodal",
status: "error",
tool_return: "Interrupted by user",
},
]);
});
test("emitInterruptToolReturnMessage emits deterministic per-tool terminal messages", () => {
const runtime = createRuntime();
const socket = new MockSocket(WebSocket.OPEN) as unknown as WebSocket;
@@ -208,16 +237,26 @@ describe("extractInterruptToolReturns", () => {
expect(toolReturnFrames).toHaveLength(2);
expect(toolReturnFrames[0]).toMatchObject({
run_id: "run-1",
tool_call_id: "call-a",
status: "success",
tool_returns: [{ tool_call_id: "call-a", status: "success" }],
tool_returns: [
{ tool_call_id: "call-a", status: "success", tool_return: "704" },
],
});
expect(toolReturnFrames[1]).toMatchObject({
run_id: "run-1",
tool_call_id: "call-b",
status: "error",
tool_returns: [{ tool_call_id: "call-b", status: "error" }],
tool_returns: [
{
tool_call_id: "call-b",
status: "error",
tool_return: "User interrupted the stream",
},
],
});
expect(toolReturnFrames[0]).not.toHaveProperty("tool_call_id");
expect(toolReturnFrames[0]).not.toHaveProperty("status");
expect(toolReturnFrames[0]).not.toHaveProperty("tool_return");
expect(toolReturnFrames[1]).not.toHaveProperty("tool_call_id");
expect(toolReturnFrames[1]).not.toHaveProperty("status");
expect(toolReturnFrames[1]).not.toHaveProperty("tool_return");
});
});
@@ -305,13 +344,14 @@ describe("Path A: cancel during tool execution → next turn consumes actual res
// Cancel fires: populateInterruptQueue (Path A — has execution results)
const populated = populateInterruptQueue(runtime, {
lastExecutionResults: executionResults,
lastExecutingToolCallIds: [],
lastNeedsUserInputToolCallIds: ["call-1", "call-2"],
agentId,
conversationId,
});
expect(populated).toBe(true);
expect(runtime.pendingInterruptedResults).toBe(executionResults);
expect(runtime.pendingInterruptedResults).toEqual(executionResults);
expect(runtime.pendingInterruptedContext).toMatchObject({
agentId,
conversationId,
@@ -322,9 +362,10 @@ describe("Path A: cancel during tool execution → next turn consumes actual res
const consumed = consumeInterruptQueue(runtime, agentId, conversationId);
expect(consumed).not.toBeNull();
expect(consumed?.type).toBe("approval");
expect(consumed?.approvals).toBe(executionResults);
expect(consumed?.approvals).toHaveLength(2);
expect(consumed?.approvalMessage.type).toBe("approval");
expect(consumed?.approvalMessage.approvals).toEqual(executionResults);
expect(consumed?.approvalMessage.approvals).toHaveLength(2);
expect(consumed?.interruptedToolCallIds).toEqual([]);
// Queue is atomically cleared after consumption
expect(runtime.pendingInterruptedResults).toBeNull();
@@ -342,6 +383,7 @@ describe("Path A: cancel during tool execution → next turn consumes actual res
const populated = populateInterruptQueue(runtime, {
lastExecutionResults: executionResults,
lastExecutingToolCallIds: [],
lastNeedsUserInputToolCallIds: ["call-1"],
agentId: "agent-1",
conversationId: "conv-1",
@@ -353,9 +395,87 @@ describe("Path A: cancel during tool execution → next turn consumes actual res
approve: true, // Path A preserves actual approval state
});
});
test("normalizes interrupted tool results to error via structured tool_call_id", () => {
const runtime = createRuntime();
const executionResults: ApprovalResult[] = [
{
type: "tool",
tool_call_id: "call-1",
status: "success",
tool_return: "result text does not matter when ID is interrupted",
} as unknown as ApprovalResult,
];
const populated = populateInterruptQueue(runtime, {
lastExecutionResults: executionResults,
lastExecutingToolCallIds: ["call-1"],
lastNeedsUserInputToolCallIds: [],
agentId: "agent-1",
conversationId: "conv-1",
});
expect(populated).toBe(true);
expect(runtime.pendingInterruptedResults?.[0]).toMatchObject({
type: "tool",
tool_call_id: "call-1",
status: "error",
});
expect(runtime.pendingInterruptedToolCallIds).toEqual(["call-1"]);
});
test("keeps legacy text fallback for interrupted tool return normalization", () => {
const runtime = createRuntime();
const executionResults: ApprovalResult[] = [
{
type: "tool",
tool_call_id: "call-legacy",
status: "success",
tool_return: [{ type: "text", text: "Interrupted by user" }],
} as unknown as ApprovalResult,
];
const populated = populateInterruptQueue(runtime, {
lastExecutionResults: executionResults,
lastExecutingToolCallIds: [],
lastNeedsUserInputToolCallIds: [],
agentId: "agent-1",
conversationId: "conv-1",
});
expect(populated).toBe(true);
expect(runtime.pendingInterruptedResults?.[0]).toMatchObject({
type: "tool",
tool_call_id: "call-legacy",
status: "error",
});
});
});
describe("Path B: cancel during approval wait → next turn consumes synthesized denials", () => {
test("prefers synthesized tool-error results when execution was already in-flight", () => {
const runtime = createRuntime();
const populated = populateInterruptQueue(runtime, {
lastExecutionResults: null,
lastExecutingToolCallIds: ["call-running-1"],
lastNeedsUserInputToolCallIds: ["call-running-1"],
agentId: "agent-1",
conversationId: "conv-1",
});
expect(populated).toBe(true);
expect(runtime.pendingInterruptedResults).toEqual([
{
type: "tool",
tool_call_id: "call-running-1",
tool_return: "Interrupted by user",
status: "error",
},
]);
expect(runtime.pendingInterruptedToolCallIds).toEqual(["call-running-1"]);
});
test("full sequence: populate from batch map IDs → consume synthesized denials", () => {
const runtime = createRuntime();
const agentId = "agent-abc";
@@ -371,6 +491,7 @@ describe("Path B: cancel during approval wait → next turn consumes synthesized
// Cancel fires during approval wait: no execution results
const populated = populateInterruptQueue(runtime, {
lastExecutionResults: null,
lastExecutingToolCallIds: [],
lastNeedsUserInputToolCallIds: [],
agentId,
conversationId,
@@ -397,7 +518,7 @@ describe("Path B: cancel during approval wait → next turn consumes synthesized
// Next user message: consume
const consumed = consumeInterruptQueue(runtime, agentId, conversationId);
expect(consumed).not.toBeNull();
expect(consumed?.approvals).toHaveLength(2);
expect(consumed?.approvalMessage.approvals).toHaveLength(2);
// Queue cleared
expect(runtime.pendingInterruptedResults).toBeNull();
@@ -409,6 +530,7 @@ describe("Path B: cancel during approval wait → next turn consumes synthesized
// No batch map entries, but we have the snapshot IDs
const populated = populateInterruptQueue(runtime, {
lastExecutionResults: null,
lastExecutingToolCallIds: [],
lastNeedsUserInputToolCallIds: ["call-a", "call-b"],
agentId: "agent-1",
conversationId: "conv-1",
@@ -427,6 +549,7 @@ describe("Path B: cancel during approval wait → next turn consumes synthesized
const populated = populateInterruptQueue(runtime, {
lastExecutionResults: null,
lastExecutingToolCallIds: [],
lastNeedsUserInputToolCallIds: [],
agentId: "agent-1",
conversationId: "conv-1",
@@ -453,6 +576,7 @@ describe("post-cancel next turn: queue consumed exactly once (no error loop)", (
reason: "cancelled",
},
],
lastExecutingToolCallIds: [],
lastNeedsUserInputToolCallIds: [],
agentId,
conversationId: convId,
@@ -476,6 +600,7 @@ describe("post-cancel next turn: queue consumed exactly once (no error loop)", (
lastExecutionResults: [
{ type: "approval", tool_call_id: "call-1", approve: true },
],
lastExecutingToolCallIds: [],
lastNeedsUserInputToolCallIds: [],
agentId,
conversationId: convId,
@@ -496,6 +621,7 @@ describe("idempotency: first cancel populates, second is no-op", () => {
lastExecutionResults: [
{ type: "approval", tool_call_id: "call-first", approve: true },
],
lastExecutingToolCallIds: [],
lastNeedsUserInputToolCallIds: [],
agentId: "agent-1",
conversationId: "conv-1",
@@ -511,6 +637,7 @@ describe("idempotency: first cancel populates, second is no-op", () => {
reason: "x",
},
],
lastExecutingToolCallIds: [],
lastNeedsUserInputToolCallIds: [],
agentId: "agent-1",
conversationId: "conv-1",
@@ -530,6 +657,7 @@ describe("idempotency: first cancel populates, second is no-op", () => {
lastExecutionResults: [
{ type: "approval", tool_call_id: "call-1", approve: true },
],
lastExecutingToolCallIds: [],
lastNeedsUserInputToolCallIds: [],
agentId: "agent-1",
conversationId: "conv-1",
@@ -543,6 +671,7 @@ describe("idempotency: first cancel populates, second is no-op", () => {
lastExecutionResults: [
{ type: "approval", tool_call_id: "call-2", approve: true },
],
lastExecutingToolCallIds: [],
lastNeedsUserInputToolCallIds: [],
agentId: "agent-1",
conversationId: "conv-1",
@@ -564,6 +693,7 @@ describe("epoch guard: stale context discarded on consume", () => {
lastExecutionResults: [
{ type: "approval", tool_call_id: "call-1", approve: true },
],
lastExecutingToolCallIds: [],
lastNeedsUserInputToolCallIds: [],
agentId: "agent-1",
conversationId: "conv-1",
@@ -587,6 +717,7 @@ describe("epoch guard: stale context discarded on consume", () => {
lastExecutionResults: [
{ type: "approval", tool_call_id: "call-1", approve: true },
],
lastExecutingToolCallIds: [],
lastNeedsUserInputToolCallIds: [],
agentId: "agent-old",
conversationId: "conv-1",
@@ -605,6 +736,7 @@ describe("epoch guard: stale context discarded on consume", () => {
lastExecutionResults: [
{ type: "approval", tool_call_id: "call-1", approve: true },
],
lastExecutingToolCallIds: [],
lastNeedsUserInputToolCallIds: [],
agentId: "agent-1",
conversationId: "conv-old",
@@ -623,6 +755,7 @@ describe("stale Path-B IDs: clearing after successful send prevents re-denial",
// Also batch map should be cleared by clearPendingApprovalBatchIds
const populated = populateInterruptQueue(runtime, {
lastExecutionResults: null,
lastExecutingToolCallIds: [],
lastNeedsUserInputToolCallIds: [], // cleared after send
agentId: "agent-1",
conversationId: "conv-1",
@@ -640,6 +773,7 @@ describe("stale Path-B IDs: clearing after successful send prevents re-denial",
const populated = populateInterruptQueue(runtime, {
lastExecutionResults: null,
lastExecutingToolCallIds: [],
lastNeedsUserInputToolCallIds: [], // cleared from previous send
agentId: "agent-1",
conversationId: "conv-1",
@@ -716,6 +850,7 @@ describe("consume clears pendingApprovalBatchByToolCallId", () => {
lastExecutionResults: [
{ type: "approval", tool_call_id: "call-1", approve: true },
],
lastExecutingToolCallIds: [],
lastNeedsUserInputToolCallIds: [],
agentId: "agent-1",
conversationId: "conv-1",
@@ -734,6 +869,7 @@ describe("consume clears pendingApprovalBatchByToolCallId", () => {
lastExecutionResults: [
{ type: "approval", tool_call_id: "call-1", approve: true },
],
lastExecutingToolCallIds: [],
lastNeedsUserInputToolCallIds: [],
agentId: "agent-old",
conversationId: "conv-old",