Files
lettabot/src/core/result-guard.test.ts

525 lines
19 KiB
TypeScript

import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { mkdtempSync, rmSync } from 'node:fs';
import { join } from 'node:path';
import { tmpdir } from 'node:os';
import { LettaBot } from './bot.js';
import type { InboundMessage, OutboundMessage } from './types.js';
vi.mock('../tools/letta-api.js', () => ({
getPendingApprovals: vi.fn(),
rejectApproval: vi.fn(),
cancelRuns: vi.fn(),
cancelConversation: vi.fn(),
recoverOrphanedConversationApproval: vi.fn().mockResolvedValue({ recovered: false }),
recoverPendingApprovalsForAgent: vi.fn(),
isRecoverableConversationId: vi.fn(() => false),
getLatestRunError: vi.fn().mockResolvedValue(null),
getAgentModel: vi.fn(),
updateAgentModel: vi.fn(),
}));
describe('result divergence guard', () => {
let workDir: string;
beforeEach(() => {
workDir = mkdtempSync(join(tmpdir(), 'lettabot-result-guard-'));
});
afterEach(() => {
rmSync(workDir, { recursive: true, force: true });
});
it('does not resend full result text when streamed content was already flushed', async () => {
const bot = new LettaBot({
workingDir: workDir,
allowedTools: [],
});
const adapter = {
id: 'mock',
name: 'Mock',
start: vi.fn(async () => {}),
stop: vi.fn(async () => {}),
isRunning: vi.fn(() => true),
sendMessage: vi.fn(async (_msg: OutboundMessage) => ({ messageId: 'msg-1' })),
editMessage: vi.fn(async () => {}),
sendTypingIndicator: vi.fn(async () => {}),
stopTypingIndicator: vi.fn(async () => {}),
supportsEditing: vi.fn(() => false),
sendFile: vi.fn(async () => ({ messageId: 'file-1' })),
};
(bot as any).sessionManager.runSession = vi.fn(async () => ({
session: { abort: vi.fn(async () => {}) },
stream: async function* () {
// Assistant text is flushed when tool_call arrives.
yield { type: 'assistant', content: 'first segment' };
yield { type: 'tool_call', toolCallId: 'tc-1', toolName: 'Bash', toolInput: { command: 'echo hi' } };
// Result repeats the same text; this must not cause a duplicate send.
yield { type: 'result', success: true, result: 'first segment' };
},
}));
const msg: InboundMessage = {
channel: 'discord',
chatId: 'chat-1',
userId: 'user-1',
text: 'hello',
timestamp: new Date(),
};
await (bot as any).processMessage(msg, adapter);
const sentTexts = adapter.sendMessage.mock.calls.map(([payload]) => payload.text);
expect(sentTexts).toEqual(['first segment']);
});
it('prefers streamed assistant text when result text diverges after flush', async () => {
const bot = new LettaBot({
workingDir: workDir,
allowedTools: [],
});
const adapter = {
id: 'mock',
name: 'Mock',
start: vi.fn(async () => {}),
stop: vi.fn(async () => {}),
isRunning: vi.fn(() => true),
sendMessage: vi.fn(async (_msg: OutboundMessage) => ({ messageId: 'msg-1' })),
editMessage: vi.fn(async () => {}),
sendTypingIndicator: vi.fn(async () => {}),
stopTypingIndicator: vi.fn(async () => {}),
supportsEditing: vi.fn(() => false),
sendFile: vi.fn(async () => ({ messageId: 'file-1' })),
};
(bot as any).sessionManager.runSession = vi.fn(async () => ({
session: { abort: vi.fn(async () => {}) },
stream: async function* () {
yield { type: 'assistant', content: 'streamed-segment' };
yield { type: 'tool_call', toolCallId: 'tc-1', toolName: 'Bash', toolInput: { command: 'echo hi' } };
// Divergent stale result should not replace or resend streamed content.
yield { type: 'result', success: true, result: 'stale-result-segment' };
},
}));
const msg: InboundMessage = {
channel: 'discord',
chatId: 'chat-1',
userId: 'user-1',
text: 'hello',
timestamp: new Date(),
};
await (bot as any).processMessage(msg, adapter);
const sentTexts = adapter.sendMessage.mock.calls.map(([payload]) => payload.text);
expect(sentTexts).toEqual(['streamed-segment']);
});
it('stops after repeated failing lettabot CLI bash calls', async () => {
const bot = new LettaBot({
workingDir: workDir,
allowedTools: [],
maxToolCalls: 100,
});
const writeTurn = vi.fn(async () => {});
(bot as any).turnLogger = { write: writeTurn };
const abort = vi.fn(async () => {});
const adapter = {
id: 'mock',
name: 'Mock',
start: vi.fn(async () => {}),
stop: vi.fn(async () => {}),
isRunning: vi.fn(() => true),
sendMessage: vi.fn(async (_msg: OutboundMessage) => ({ messageId: 'msg-1' })),
editMessage: vi.fn(async () => {}),
sendTypingIndicator: vi.fn(async () => {}),
stopTypingIndicator: vi.fn(async () => {}),
supportsEditing: vi.fn(() => false),
sendFile: vi.fn(async () => ({ messageId: 'file-1' })),
};
(bot as any).sessionManager.runSession = vi.fn(async () => ({
session: { abort },
stream: async function* () {
yield { type: 'tool_call', toolCallId: 'tc-1', toolName: 'Bash', toolInput: { command: 'lettabot bluesky post --text "hi" --agent Bot' } };
yield { type: 'tool_result', toolCallId: 'tc-1', isError: true, content: 'Unknown command: bluesky' };
yield { type: 'tool_call', toolCallId: 'tc-2', toolName: 'Bash', toolInput: { command: 'lettabot bluesky post --text "hi" --agent Bot' } };
yield { type: 'tool_result', toolCallId: 'tc-2', isError: true, content: 'Unknown command: bluesky' };
yield { type: 'tool_call', toolCallId: 'tc-3', toolName: 'Bash', toolInput: { command: 'lettabot bluesky post --text "hi" --agent Bot' } };
yield { type: 'tool_result', toolCallId: 'tc-3', isError: true, content: 'Unknown command: bluesky' };
},
}));
const msg: InboundMessage = {
channel: 'discord',
chatId: 'chat-1',
userId: 'user-1',
text: 'hello',
timestamp: new Date(),
};
await (bot as any).processMessage(msg, adapter);
expect(abort).toHaveBeenCalled();
const sentTexts = adapter.sendMessage.mock.calls.map(([payload]) => payload.text as string);
expect(sentTexts.some(text => text.includes('repeated CLI command failures'))).toBe(true);
expect(writeTurn).toHaveBeenCalledTimes(1);
expect(writeTurn).toHaveBeenCalledWith(expect.objectContaining({
error: 'repeated Bash failure abort (3x): lettabot bluesky post --text "hi" --agent Bot',
}));
});
it('stops consuming stream and avoids retry after explicit tool-loop abort', async () => {
const bot = new LettaBot({
workingDir: workDir,
allowedTools: [],
maxToolCalls: 1,
});
const writeTurn = vi.fn(async () => {});
(bot as any).turnLogger = { write: writeTurn };
const adapter = {
id: 'mock',
name: 'Mock',
start: vi.fn(async () => {}),
stop: vi.fn(async () => {}),
isRunning: vi.fn(() => true),
sendMessage: vi.fn(async (_msg: OutboundMessage) => ({ messageId: 'msg-1' })),
editMessage: vi.fn(async () => {}),
sendTypingIndicator: vi.fn(async () => {}),
stopTypingIndicator: vi.fn(async () => {}),
supportsEditing: vi.fn(() => false),
sendFile: vi.fn(async () => ({ messageId: 'file-1' })),
};
const runSession = vi.fn();
runSession.mockResolvedValueOnce({
session: { abort: vi.fn(async () => {}) },
stream: async function* () {
yield { type: 'tool_call', toolCallId: 'tc-1', toolName: 'Bash', toolInput: { command: 'echo hi' } };
// These trailing events should be ignored because the run was already aborted.
yield { type: 'assistant', content: 'late assistant text' };
yield { type: 'result', success: false, error: 'error', stopReason: 'cancelled', result: '' };
},
});
runSession.mockResolvedValueOnce({
session: { abort: vi.fn(async () => {}) },
stream: async function* () {
yield { type: 'assistant', content: 'retried response' };
yield { type: 'result', success: true, result: 'retried response' };
},
});
(bot as any).sessionManager.runSession = runSession;
const msg: InboundMessage = {
channel: 'discord',
chatId: 'chat-1',
userId: 'user-1',
text: 'hello',
timestamp: new Date(),
};
await (bot as any).processMessage(msg, adapter);
expect(runSession).toHaveBeenCalledTimes(1);
const sentTexts = adapter.sendMessage.mock.calls.map(([payload]) => payload.text);
expect(sentTexts).toEqual(['(Agent got stuck in a tool loop and was stopped. Try sending your message again.)']);
expect(writeTurn).toHaveBeenCalledTimes(1);
expect(writeTurn).toHaveBeenCalledWith(expect.objectContaining({
error: 'tool loop abort after 1 tool calls',
}));
});
it('does not deliver reasoning text from error results as the response', async () => {
const bot = new LettaBot({
workingDir: workDir,
allowedTools: [],
});
const adapter = {
id: 'mock',
name: 'Mock',
start: vi.fn(async () => {}),
stop: vi.fn(async () => {}),
isRunning: vi.fn(() => true),
sendMessage: vi.fn(async (_msg: OutboundMessage) => ({ messageId: 'msg-1' })),
editMessage: vi.fn(async () => {}),
sendTypingIndicator: vi.fn(async () => {}),
stopTypingIndicator: vi.fn(async () => {}),
supportsEditing: vi.fn(() => false),
sendFile: vi.fn(async () => ({ messageId: 'file-1' })),
};
(bot as any).sessionManager.runSession = vi.fn(async () => ({
session: { abort: vi.fn(async () => {}) },
stream: async function* () {
// Reproduce the exact bug path: reasoning tokens only, then an error
// result whose result field contains the leaked reasoning text.
yield { type: 'reasoning', content: '**Evaluating response protocol**\n\nI\'m trying to figure out how to respond...' };
yield {
type: 'result',
success: false,
error: 'error',
stopReason: 'llm_api_error',
result: '**Evaluating response protocol**\n\nI\'m trying to figure out how to respond...',
};
},
}));
const msg: InboundMessage = {
channel: 'discord',
chatId: 'chat-1',
userId: 'user-1',
text: 'hello',
timestamp: new Date(),
};
await (bot as any).processMessage(msg, adapter);
const sentTexts = adapter.sendMessage.mock.calls.map(([payload]) => payload.text);
// Must show a formatted error message, never the raw reasoning text.
expect(sentTexts.length).toBeGreaterThanOrEqual(1);
const lastSent = sentTexts[sentTexts.length - 1];
expect(lastSent).not.toContain('Evaluating response protocol');
expect(lastSent).toMatch(/\(.*\)/); // Parenthesized system message
});
it('rebinds foreground run on post-tool-call assistant events with new run ID (#527)', async () => {
const bot = new LettaBot({
workingDir: workDir,
allowedTools: [],
});
const adapter = {
id: 'mock',
name: 'Mock',
start: vi.fn(async () => {}),
stop: vi.fn(async () => {}),
isRunning: vi.fn(() => true),
sendMessage: vi.fn(async (_msg: OutboundMessage) => ({ messageId: 'msg-1' })),
editMessage: vi.fn(async () => {}),
sendTypingIndicator: vi.fn(async () => {}),
stopTypingIndicator: vi.fn(async () => {}),
supportsEditing: vi.fn(() => false),
sendFile: vi.fn(async () => ({ messageId: 'file-1' })),
};
// Server assigns run-2 after tool call -- both runs are part of the same turn
(bot as any).sessionManager.runSession = vi.fn(async () => ({
session: { abort: vi.fn(async () => {}) },
stream: async function* () {
yield { type: 'assistant', content: 'Before tool. ', runId: 'run-1' };
yield { type: 'tool_call', toolCallId: 'tc-1', toolName: 'Bash', toolInput: { command: 'echo ok' }, runId: 'run-1' };
yield { type: 'tool_result', content: 'ok', isError: false, runId: 'run-1' };
yield { type: 'assistant', content: 'After tool.', runId: 'run-2' };
yield { type: 'result', success: true, result: 'Before tool. After tool.', runIds: ['run-2'] };
},
}));
const msg: InboundMessage = {
channel: 'discord',
chatId: 'chat-1',
userId: 'user-1',
text: 'run a command',
timestamp: new Date(),
};
await (bot as any).processMessage(msg, adapter);
const sentTexts = adapter.sendMessage.mock.calls.map(([payload]) => payload.text);
// Pre-tool and post-tool text are separate messages (finalized on type change)
expect(sentTexts).toEqual(['Before tool. ', 'After tool.']);
});
it('locks foreground on first event with run ID and displays immediately', async () => {
const bot = new LettaBot({
workingDir: workDir,
allowedTools: [],
display: { showReasoning: true, showToolCalls: true },
});
const adapter = {
id: 'mock',
name: 'Mock',
start: vi.fn(async () => {}),
stop: vi.fn(async () => {}),
isRunning: vi.fn(() => true),
sendMessage: vi.fn(async (_msg: OutboundMessage) => ({ messageId: 'msg-1' })),
editMessage: vi.fn(async () => {}),
sendTypingIndicator: vi.fn(async () => {}),
stopTypingIndicator: vi.fn(async () => {}),
supportsEditing: vi.fn(() => false),
sendFile: vi.fn(async () => ({ messageId: 'file-1' })),
};
// Reasoning and tool_call arrive before any assistant event. The pipeline
// locks foreground on the first event with a run ID (the reasoning event)
// and processes everything immediately -- no buffering.
(bot as any).sessionManager.runSession = vi.fn(async () => ({
session: { abort: vi.fn(async () => {}) },
stream: async function* () {
yield { type: 'reasoning', content: 'pre-tool thinking', runId: 'run-tool' };
yield { type: 'tool_call', toolCallId: 'tc-1', toolName: 'Bash', toolInput: { command: 'echo hi' }, runId: 'run-tool' };
yield { type: 'assistant', content: 'main reply', runId: 'run-main' };
yield { type: 'result', success: true, result: 'main reply', runIds: ['run-main'] };
},
}));
const msg: InboundMessage = {
channel: 'discord',
chatId: 'chat-1',
userId: 'user-1',
text: 'hello',
timestamp: new Date(),
};
await (bot as any).processMessage(msg, adapter);
const sentTexts = adapter.sendMessage.mock.calls.map(([payload]) => payload.text);
// Reasoning display + tool call display + main reply -- all immediate, no buffering
expect(sentTexts.length).toBe(3);
expect(sentTexts[2]).toBe('main reply');
});
it('retries once when a competing result arrives before any foreground terminal result', async () => {
const bot = new LettaBot({
workingDir: workDir,
allowedTools: [],
});
const adapter = {
id: 'mock',
name: 'Mock',
start: vi.fn(async () => {}),
stop: vi.fn(async () => {}),
isRunning: vi.fn(() => true),
sendMessage: vi.fn(async (_msg: OutboundMessage) => ({ messageId: 'msg-1' })),
editMessage: vi.fn(async () => {}),
sendTypingIndicator: vi.fn(async () => {}),
stopTypingIndicator: vi.fn(async () => {}),
supportsEditing: vi.fn(() => false),
sendFile: vi.fn(async () => ({ messageId: 'file-1' })),
};
const runSession = vi.fn();
runSession.mockResolvedValueOnce({
session: { abort: vi.fn(async () => {}) },
stream: async function* () {
yield { type: 'assistant', content: 'partial foreground', runId: 'run-main' };
yield { type: 'result', success: true, result: 'background final', runIds: ['run-bg'] };
},
});
runSession.mockResolvedValueOnce({
session: { abort: vi.fn(async () => {}) },
stream: async function* () {
yield { type: 'assistant', content: 'main reply', runId: 'run-main' };
yield { type: 'result', success: true, result: 'main reply', runIds: ['run-main'] };
},
});
(bot as any).sessionManager.runSession = runSession;
const msg: InboundMessage = {
channel: 'discord',
chatId: 'chat-1',
userId: 'user-1',
text: 'hello',
timestamp: new Date(),
};
await (bot as any).processMessage(msg, adapter);
expect(runSession).toHaveBeenCalledTimes(2);
const sentTexts = adapter.sendMessage.mock.calls.map(([payload]) => payload.text);
expect(sentTexts).toEqual(['main reply']);
});
it('treats <no-reply/> as intentional silence and does not deliver a visible message', async () => {
const bot = new LettaBot({
workingDir: workDir,
allowedTools: [],
});
const adapter = {
id: 'mock',
name: 'Mock',
start: vi.fn(async () => {}),
stop: vi.fn(async () => {}),
isRunning: vi.fn(() => true),
sendMessage: vi.fn(async (_msg: OutboundMessage) => ({ messageId: 'msg-1' })),
editMessage: vi.fn(async () => {}),
sendTypingIndicator: vi.fn(async () => {}),
stopTypingIndicator: vi.fn(async () => {}),
supportsEditing: vi.fn(() => false),
sendFile: vi.fn(async () => ({ messageId: 'file-1' })),
};
(bot as any).sessionManager.runSession = vi.fn(async () => ({
session: { abort: vi.fn(async () => {}) },
stream: async function* () {
yield { type: 'assistant', content: '<no-reply/>' };
yield { type: 'result', success: true, result: '<no-reply/>' };
},
}));
const msg: InboundMessage = {
channel: 'discord',
chatId: 'chat-1',
userId: 'user-1',
text: 'hello',
timestamp: new Date(),
};
await (bot as any).processMessage(msg, adapter);
expect(adapter.sendMessage).not.toHaveBeenCalled();
expect(adapter.editMessage).not.toHaveBeenCalled();
});
it('skips all post-stream delivery when message processing is cancelled', async () => {
const bot = new LettaBot({
workingDir: workDir,
allowedTools: [],
});
const adapter = {
id: 'mock',
name: 'Mock',
start: vi.fn(async () => {}),
stop: vi.fn(async () => {}),
isRunning: vi.fn(() => true),
sendMessage: vi.fn(async (_msg: OutboundMessage) => ({ messageId: 'msg-1' })),
editMessage: vi.fn(async () => {}),
sendTypingIndicator: vi.fn(async () => {}),
stopTypingIndicator: vi.fn(async () => {}),
supportsEditing: vi.fn(() => false),
sendFile: vi.fn(async () => ({ messageId: 'file-1' })),
};
(bot as any).sessionManager.runSession = vi.fn(async () => ({
session: { abort: vi.fn(async () => {}) },
stream: async function* () {
yield { type: 'assistant', content: 'this should never be delivered' };
yield { type: 'result', success: true, result: 'this should never be delivered' };
},
}));
const msg: InboundMessage = {
channel: 'discord',
chatId: 'chat-1',
userId: 'user-1',
text: 'hello',
timestamp: new Date(),
};
(bot as any).cancelledKeys.add('shared');
await (bot as any).processMessage(msg, adapter);
expect(adapter.sendMessage).not.toHaveBeenCalled();
expect(adapter.editMessage).not.toHaveBeenCalled();
});
});