525 lines
19 KiB
TypeScript
525 lines
19 KiB
TypeScript
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
|
import { mkdtempSync, rmSync } from 'node:fs';
|
|
import { join } from 'node:path';
|
|
import { tmpdir } from 'node:os';
|
|
import { LettaBot } from './bot.js';
|
|
import type { InboundMessage, OutboundMessage } from './types.js';
|
|
|
|
vi.mock('../tools/letta-api.js', () => ({
|
|
getPendingApprovals: vi.fn(),
|
|
rejectApproval: vi.fn(),
|
|
cancelRuns: vi.fn(),
|
|
cancelConversation: vi.fn(),
|
|
recoverOrphanedConversationApproval: vi.fn().mockResolvedValue({ recovered: false }),
|
|
recoverPendingApprovalsForAgent: vi.fn(),
|
|
isRecoverableConversationId: vi.fn(() => false),
|
|
getLatestRunError: vi.fn().mockResolvedValue(null),
|
|
getAgentModel: vi.fn(),
|
|
updateAgentModel: vi.fn(),
|
|
}));
|
|
|
|
describe('result divergence guard', () => {
|
|
let workDir: string;
|
|
|
|
beforeEach(() => {
|
|
workDir = mkdtempSync(join(tmpdir(), 'lettabot-result-guard-'));
|
|
});
|
|
|
|
afterEach(() => {
|
|
rmSync(workDir, { recursive: true, force: true });
|
|
});
|
|
|
|
it('does not resend full result text when streamed content was already flushed', async () => {
|
|
const bot = new LettaBot({
|
|
workingDir: workDir,
|
|
allowedTools: [],
|
|
});
|
|
|
|
const adapter = {
|
|
id: 'mock',
|
|
name: 'Mock',
|
|
start: vi.fn(async () => {}),
|
|
stop: vi.fn(async () => {}),
|
|
isRunning: vi.fn(() => true),
|
|
sendMessage: vi.fn(async (_msg: OutboundMessage) => ({ messageId: 'msg-1' })),
|
|
editMessage: vi.fn(async () => {}),
|
|
sendTypingIndicator: vi.fn(async () => {}),
|
|
stopTypingIndicator: vi.fn(async () => {}),
|
|
supportsEditing: vi.fn(() => false),
|
|
sendFile: vi.fn(async () => ({ messageId: 'file-1' })),
|
|
};
|
|
|
|
(bot as any).sessionManager.runSession = vi.fn(async () => ({
|
|
session: { abort: vi.fn(async () => {}) },
|
|
stream: async function* () {
|
|
// Assistant text is flushed when tool_call arrives.
|
|
yield { type: 'assistant', content: 'first segment' };
|
|
yield { type: 'tool_call', toolCallId: 'tc-1', toolName: 'Bash', toolInput: { command: 'echo hi' } };
|
|
// Result repeats the same text; this must not cause a duplicate send.
|
|
yield { type: 'result', success: true, result: 'first segment' };
|
|
},
|
|
}));
|
|
|
|
const msg: InboundMessage = {
|
|
channel: 'discord',
|
|
chatId: 'chat-1',
|
|
userId: 'user-1',
|
|
text: 'hello',
|
|
timestamp: new Date(),
|
|
};
|
|
|
|
await (bot as any).processMessage(msg, adapter);
|
|
|
|
const sentTexts = adapter.sendMessage.mock.calls.map(([payload]) => payload.text);
|
|
expect(sentTexts).toEqual(['first segment']);
|
|
});
|
|
|
|
it('prefers streamed assistant text when result text diverges after flush', async () => {
|
|
const bot = new LettaBot({
|
|
workingDir: workDir,
|
|
allowedTools: [],
|
|
});
|
|
|
|
const adapter = {
|
|
id: 'mock',
|
|
name: 'Mock',
|
|
start: vi.fn(async () => {}),
|
|
stop: vi.fn(async () => {}),
|
|
isRunning: vi.fn(() => true),
|
|
sendMessage: vi.fn(async (_msg: OutboundMessage) => ({ messageId: 'msg-1' })),
|
|
editMessage: vi.fn(async () => {}),
|
|
sendTypingIndicator: vi.fn(async () => {}),
|
|
stopTypingIndicator: vi.fn(async () => {}),
|
|
supportsEditing: vi.fn(() => false),
|
|
sendFile: vi.fn(async () => ({ messageId: 'file-1' })),
|
|
};
|
|
|
|
(bot as any).sessionManager.runSession = vi.fn(async () => ({
|
|
session: { abort: vi.fn(async () => {}) },
|
|
stream: async function* () {
|
|
yield { type: 'assistant', content: 'streamed-segment' };
|
|
yield { type: 'tool_call', toolCallId: 'tc-1', toolName: 'Bash', toolInput: { command: 'echo hi' } };
|
|
// Divergent stale result should not replace or resend streamed content.
|
|
yield { type: 'result', success: true, result: 'stale-result-segment' };
|
|
},
|
|
}));
|
|
|
|
const msg: InboundMessage = {
|
|
channel: 'discord',
|
|
chatId: 'chat-1',
|
|
userId: 'user-1',
|
|
text: 'hello',
|
|
timestamp: new Date(),
|
|
};
|
|
|
|
await (bot as any).processMessage(msg, adapter);
|
|
|
|
const sentTexts = adapter.sendMessage.mock.calls.map(([payload]) => payload.text);
|
|
expect(sentTexts).toEqual(['streamed-segment']);
|
|
});
|
|
|
|
it('stops after repeated failing lettabot CLI bash calls', async () => {
|
|
const bot = new LettaBot({
|
|
workingDir: workDir,
|
|
allowedTools: [],
|
|
maxToolCalls: 100,
|
|
});
|
|
const writeTurn = vi.fn(async () => {});
|
|
(bot as any).turnLogger = { write: writeTurn };
|
|
|
|
const abort = vi.fn(async () => {});
|
|
const adapter = {
|
|
id: 'mock',
|
|
name: 'Mock',
|
|
start: vi.fn(async () => {}),
|
|
stop: vi.fn(async () => {}),
|
|
isRunning: vi.fn(() => true),
|
|
sendMessage: vi.fn(async (_msg: OutboundMessage) => ({ messageId: 'msg-1' })),
|
|
editMessage: vi.fn(async () => {}),
|
|
sendTypingIndicator: vi.fn(async () => {}),
|
|
stopTypingIndicator: vi.fn(async () => {}),
|
|
supportsEditing: vi.fn(() => false),
|
|
sendFile: vi.fn(async () => ({ messageId: 'file-1' })),
|
|
};
|
|
|
|
(bot as any).sessionManager.runSession = vi.fn(async () => ({
|
|
session: { abort },
|
|
stream: async function* () {
|
|
yield { type: 'tool_call', toolCallId: 'tc-1', toolName: 'Bash', toolInput: { command: 'lettabot bluesky post --text "hi" --agent Bot' } };
|
|
yield { type: 'tool_result', toolCallId: 'tc-1', isError: true, content: 'Unknown command: bluesky' };
|
|
yield { type: 'tool_call', toolCallId: 'tc-2', toolName: 'Bash', toolInput: { command: 'lettabot bluesky post --text "hi" --agent Bot' } };
|
|
yield { type: 'tool_result', toolCallId: 'tc-2', isError: true, content: 'Unknown command: bluesky' };
|
|
yield { type: 'tool_call', toolCallId: 'tc-3', toolName: 'Bash', toolInput: { command: 'lettabot bluesky post --text "hi" --agent Bot' } };
|
|
yield { type: 'tool_result', toolCallId: 'tc-3', isError: true, content: 'Unknown command: bluesky' };
|
|
},
|
|
}));
|
|
|
|
const msg: InboundMessage = {
|
|
channel: 'discord',
|
|
chatId: 'chat-1',
|
|
userId: 'user-1',
|
|
text: 'hello',
|
|
timestamp: new Date(),
|
|
};
|
|
|
|
await (bot as any).processMessage(msg, adapter);
|
|
|
|
expect(abort).toHaveBeenCalled();
|
|
const sentTexts = adapter.sendMessage.mock.calls.map(([payload]) => payload.text as string);
|
|
expect(sentTexts.some(text => text.includes('repeated CLI command failures'))).toBe(true);
|
|
expect(writeTurn).toHaveBeenCalledTimes(1);
|
|
expect(writeTurn).toHaveBeenCalledWith(expect.objectContaining({
|
|
error: 'repeated Bash failure abort (3x): lettabot bluesky post --text "hi" --agent Bot',
|
|
}));
|
|
});
|
|
|
|
it('stops consuming stream and avoids retry after explicit tool-loop abort', async () => {
|
|
const bot = new LettaBot({
|
|
workingDir: workDir,
|
|
allowedTools: [],
|
|
maxToolCalls: 1,
|
|
});
|
|
const writeTurn = vi.fn(async () => {});
|
|
(bot as any).turnLogger = { write: writeTurn };
|
|
|
|
const adapter = {
|
|
id: 'mock',
|
|
name: 'Mock',
|
|
start: vi.fn(async () => {}),
|
|
stop: vi.fn(async () => {}),
|
|
isRunning: vi.fn(() => true),
|
|
sendMessage: vi.fn(async (_msg: OutboundMessage) => ({ messageId: 'msg-1' })),
|
|
editMessage: vi.fn(async () => {}),
|
|
sendTypingIndicator: vi.fn(async () => {}),
|
|
stopTypingIndicator: vi.fn(async () => {}),
|
|
supportsEditing: vi.fn(() => false),
|
|
sendFile: vi.fn(async () => ({ messageId: 'file-1' })),
|
|
};
|
|
|
|
const runSession = vi.fn();
|
|
runSession.mockResolvedValueOnce({
|
|
session: { abort: vi.fn(async () => {}) },
|
|
stream: async function* () {
|
|
yield { type: 'tool_call', toolCallId: 'tc-1', toolName: 'Bash', toolInput: { command: 'echo hi' } };
|
|
// These trailing events should be ignored because the run was already aborted.
|
|
yield { type: 'assistant', content: 'late assistant text' };
|
|
yield { type: 'result', success: false, error: 'error', stopReason: 'cancelled', result: '' };
|
|
},
|
|
});
|
|
runSession.mockResolvedValueOnce({
|
|
session: { abort: vi.fn(async () => {}) },
|
|
stream: async function* () {
|
|
yield { type: 'assistant', content: 'retried response' };
|
|
yield { type: 'result', success: true, result: 'retried response' };
|
|
},
|
|
});
|
|
(bot as any).sessionManager.runSession = runSession;
|
|
|
|
const msg: InboundMessage = {
|
|
channel: 'discord',
|
|
chatId: 'chat-1',
|
|
userId: 'user-1',
|
|
text: 'hello',
|
|
timestamp: new Date(),
|
|
};
|
|
|
|
await (bot as any).processMessage(msg, adapter);
|
|
|
|
expect(runSession).toHaveBeenCalledTimes(1);
|
|
const sentTexts = adapter.sendMessage.mock.calls.map(([payload]) => payload.text);
|
|
expect(sentTexts).toEqual(['(Agent got stuck in a tool loop and was stopped. Try sending your message again.)']);
|
|
expect(writeTurn).toHaveBeenCalledTimes(1);
|
|
expect(writeTurn).toHaveBeenCalledWith(expect.objectContaining({
|
|
error: 'tool loop abort after 1 tool calls',
|
|
}));
|
|
});
|
|
|
|
it('does not deliver reasoning text from error results as the response', async () => {
|
|
const bot = new LettaBot({
|
|
workingDir: workDir,
|
|
allowedTools: [],
|
|
});
|
|
|
|
const adapter = {
|
|
id: 'mock',
|
|
name: 'Mock',
|
|
start: vi.fn(async () => {}),
|
|
stop: vi.fn(async () => {}),
|
|
isRunning: vi.fn(() => true),
|
|
sendMessage: vi.fn(async (_msg: OutboundMessage) => ({ messageId: 'msg-1' })),
|
|
editMessage: vi.fn(async () => {}),
|
|
sendTypingIndicator: vi.fn(async () => {}),
|
|
stopTypingIndicator: vi.fn(async () => {}),
|
|
supportsEditing: vi.fn(() => false),
|
|
sendFile: vi.fn(async () => ({ messageId: 'file-1' })),
|
|
};
|
|
|
|
(bot as any).sessionManager.runSession = vi.fn(async () => ({
|
|
session: { abort: vi.fn(async () => {}) },
|
|
stream: async function* () {
|
|
// Reproduce the exact bug path: reasoning tokens only, then an error
|
|
// result whose result field contains the leaked reasoning text.
|
|
yield { type: 'reasoning', content: '**Evaluating response protocol**\n\nI\'m trying to figure out how to respond...' };
|
|
yield {
|
|
type: 'result',
|
|
success: false,
|
|
error: 'error',
|
|
stopReason: 'llm_api_error',
|
|
result: '**Evaluating response protocol**\n\nI\'m trying to figure out how to respond...',
|
|
};
|
|
},
|
|
}));
|
|
|
|
const msg: InboundMessage = {
|
|
channel: 'discord',
|
|
chatId: 'chat-1',
|
|
userId: 'user-1',
|
|
text: 'hello',
|
|
timestamp: new Date(),
|
|
};
|
|
|
|
await (bot as any).processMessage(msg, adapter);
|
|
|
|
const sentTexts = adapter.sendMessage.mock.calls.map(([payload]) => payload.text);
|
|
// Must show a formatted error message, never the raw reasoning text.
|
|
expect(sentTexts.length).toBeGreaterThanOrEqual(1);
|
|
const lastSent = sentTexts[sentTexts.length - 1];
|
|
expect(lastSent).not.toContain('Evaluating response protocol');
|
|
expect(lastSent).toMatch(/\(.*\)/); // Parenthesized system message
|
|
});
|
|
|
|
it('rebinds foreground run on post-tool-call assistant events with new run ID (#527)', async () => {
|
|
const bot = new LettaBot({
|
|
workingDir: workDir,
|
|
allowedTools: [],
|
|
});
|
|
|
|
const adapter = {
|
|
id: 'mock',
|
|
name: 'Mock',
|
|
start: vi.fn(async () => {}),
|
|
stop: vi.fn(async () => {}),
|
|
isRunning: vi.fn(() => true),
|
|
sendMessage: vi.fn(async (_msg: OutboundMessage) => ({ messageId: 'msg-1' })),
|
|
editMessage: vi.fn(async () => {}),
|
|
sendTypingIndicator: vi.fn(async () => {}),
|
|
stopTypingIndicator: vi.fn(async () => {}),
|
|
supportsEditing: vi.fn(() => false),
|
|
sendFile: vi.fn(async () => ({ messageId: 'file-1' })),
|
|
};
|
|
|
|
// Server assigns run-2 after tool call -- both runs are part of the same turn
|
|
(bot as any).sessionManager.runSession = vi.fn(async () => ({
|
|
session: { abort: vi.fn(async () => {}) },
|
|
stream: async function* () {
|
|
yield { type: 'assistant', content: 'Before tool. ', runId: 'run-1' };
|
|
yield { type: 'tool_call', toolCallId: 'tc-1', toolName: 'Bash', toolInput: { command: 'echo ok' }, runId: 'run-1' };
|
|
yield { type: 'tool_result', content: 'ok', isError: false, runId: 'run-1' };
|
|
yield { type: 'assistant', content: 'After tool.', runId: 'run-2' };
|
|
yield { type: 'result', success: true, result: 'Before tool. After tool.', runIds: ['run-2'] };
|
|
},
|
|
}));
|
|
|
|
const msg: InboundMessage = {
|
|
channel: 'discord',
|
|
chatId: 'chat-1',
|
|
userId: 'user-1',
|
|
text: 'run a command',
|
|
timestamp: new Date(),
|
|
};
|
|
|
|
await (bot as any).processMessage(msg, adapter);
|
|
|
|
const sentTexts = adapter.sendMessage.mock.calls.map(([payload]) => payload.text);
|
|
// Pre-tool and post-tool text are separate messages (finalized on type change)
|
|
expect(sentTexts).toEqual(['Before tool. ', 'After tool.']);
|
|
});
|
|
|
|
it('locks foreground on first event with run ID and displays immediately', async () => {
|
|
const bot = new LettaBot({
|
|
workingDir: workDir,
|
|
allowedTools: [],
|
|
display: { showReasoning: true, showToolCalls: true },
|
|
});
|
|
|
|
const adapter = {
|
|
id: 'mock',
|
|
name: 'Mock',
|
|
start: vi.fn(async () => {}),
|
|
stop: vi.fn(async () => {}),
|
|
isRunning: vi.fn(() => true),
|
|
sendMessage: vi.fn(async (_msg: OutboundMessage) => ({ messageId: 'msg-1' })),
|
|
editMessage: vi.fn(async () => {}),
|
|
sendTypingIndicator: vi.fn(async () => {}),
|
|
stopTypingIndicator: vi.fn(async () => {}),
|
|
supportsEditing: vi.fn(() => false),
|
|
sendFile: vi.fn(async () => ({ messageId: 'file-1' })),
|
|
};
|
|
|
|
// Reasoning and tool_call arrive before any assistant event. The pipeline
|
|
// locks foreground on the first event with a run ID (the reasoning event)
|
|
// and processes everything immediately -- no buffering.
|
|
(bot as any).sessionManager.runSession = vi.fn(async () => ({
|
|
session: { abort: vi.fn(async () => {}) },
|
|
stream: async function* () {
|
|
yield { type: 'reasoning', content: 'pre-tool thinking', runId: 'run-tool' };
|
|
yield { type: 'tool_call', toolCallId: 'tc-1', toolName: 'Bash', toolInput: { command: 'echo hi' }, runId: 'run-tool' };
|
|
yield { type: 'assistant', content: 'main reply', runId: 'run-main' };
|
|
yield { type: 'result', success: true, result: 'main reply', runIds: ['run-main'] };
|
|
},
|
|
}));
|
|
|
|
const msg: InboundMessage = {
|
|
channel: 'discord',
|
|
chatId: 'chat-1',
|
|
userId: 'user-1',
|
|
text: 'hello',
|
|
timestamp: new Date(),
|
|
};
|
|
|
|
await (bot as any).processMessage(msg, adapter);
|
|
|
|
const sentTexts = adapter.sendMessage.mock.calls.map(([payload]) => payload.text);
|
|
// Reasoning display + tool call display + main reply -- all immediate, no buffering
|
|
expect(sentTexts.length).toBe(3);
|
|
expect(sentTexts[2]).toBe('main reply');
|
|
});
|
|
|
|
it('retries once when a competing result arrives before any foreground terminal result', async () => {
|
|
const bot = new LettaBot({
|
|
workingDir: workDir,
|
|
allowedTools: [],
|
|
});
|
|
|
|
const adapter = {
|
|
id: 'mock',
|
|
name: 'Mock',
|
|
start: vi.fn(async () => {}),
|
|
stop: vi.fn(async () => {}),
|
|
isRunning: vi.fn(() => true),
|
|
sendMessage: vi.fn(async (_msg: OutboundMessage) => ({ messageId: 'msg-1' })),
|
|
editMessage: vi.fn(async () => {}),
|
|
sendTypingIndicator: vi.fn(async () => {}),
|
|
stopTypingIndicator: vi.fn(async () => {}),
|
|
supportsEditing: vi.fn(() => false),
|
|
sendFile: vi.fn(async () => ({ messageId: 'file-1' })),
|
|
};
|
|
|
|
const runSession = vi.fn();
|
|
runSession.mockResolvedValueOnce({
|
|
session: { abort: vi.fn(async () => {}) },
|
|
stream: async function* () {
|
|
yield { type: 'assistant', content: 'partial foreground', runId: 'run-main' };
|
|
yield { type: 'result', success: true, result: 'background final', runIds: ['run-bg'] };
|
|
},
|
|
});
|
|
runSession.mockResolvedValueOnce({
|
|
session: { abort: vi.fn(async () => {}) },
|
|
stream: async function* () {
|
|
yield { type: 'assistant', content: 'main reply', runId: 'run-main' };
|
|
yield { type: 'result', success: true, result: 'main reply', runIds: ['run-main'] };
|
|
},
|
|
});
|
|
(bot as any).sessionManager.runSession = runSession;
|
|
|
|
const msg: InboundMessage = {
|
|
channel: 'discord',
|
|
chatId: 'chat-1',
|
|
userId: 'user-1',
|
|
text: 'hello',
|
|
timestamp: new Date(),
|
|
};
|
|
|
|
await (bot as any).processMessage(msg, adapter);
|
|
|
|
expect(runSession).toHaveBeenCalledTimes(2);
|
|
const sentTexts = adapter.sendMessage.mock.calls.map(([payload]) => payload.text);
|
|
expect(sentTexts).toEqual(['main reply']);
|
|
});
|
|
|
|
it('treats <no-reply/> as intentional silence and does not deliver a visible message', async () => {
|
|
const bot = new LettaBot({
|
|
workingDir: workDir,
|
|
allowedTools: [],
|
|
});
|
|
|
|
const adapter = {
|
|
id: 'mock',
|
|
name: 'Mock',
|
|
start: vi.fn(async () => {}),
|
|
stop: vi.fn(async () => {}),
|
|
isRunning: vi.fn(() => true),
|
|
sendMessage: vi.fn(async (_msg: OutboundMessage) => ({ messageId: 'msg-1' })),
|
|
editMessage: vi.fn(async () => {}),
|
|
sendTypingIndicator: vi.fn(async () => {}),
|
|
stopTypingIndicator: vi.fn(async () => {}),
|
|
supportsEditing: vi.fn(() => false),
|
|
sendFile: vi.fn(async () => ({ messageId: 'file-1' })),
|
|
};
|
|
|
|
(bot as any).sessionManager.runSession = vi.fn(async () => ({
|
|
session: { abort: vi.fn(async () => {}) },
|
|
stream: async function* () {
|
|
yield { type: 'assistant', content: '<no-reply/>' };
|
|
yield { type: 'result', success: true, result: '<no-reply/>' };
|
|
},
|
|
}));
|
|
|
|
const msg: InboundMessage = {
|
|
channel: 'discord',
|
|
chatId: 'chat-1',
|
|
userId: 'user-1',
|
|
text: 'hello',
|
|
timestamp: new Date(),
|
|
};
|
|
|
|
await (bot as any).processMessage(msg, adapter);
|
|
|
|
expect(adapter.sendMessage).not.toHaveBeenCalled();
|
|
expect(adapter.editMessage).not.toHaveBeenCalled();
|
|
});
|
|
|
|
it('skips all post-stream delivery when message processing is cancelled', async () => {
|
|
const bot = new LettaBot({
|
|
workingDir: workDir,
|
|
allowedTools: [],
|
|
});
|
|
|
|
const adapter = {
|
|
id: 'mock',
|
|
name: 'Mock',
|
|
start: vi.fn(async () => {}),
|
|
stop: vi.fn(async () => {}),
|
|
isRunning: vi.fn(() => true),
|
|
sendMessage: vi.fn(async (_msg: OutboundMessage) => ({ messageId: 'msg-1' })),
|
|
editMessage: vi.fn(async () => {}),
|
|
sendTypingIndicator: vi.fn(async () => {}),
|
|
stopTypingIndicator: vi.fn(async () => {}),
|
|
supportsEditing: vi.fn(() => false),
|
|
sendFile: vi.fn(async () => ({ messageId: 'file-1' })),
|
|
};
|
|
|
|
(bot as any).sessionManager.runSession = vi.fn(async () => ({
|
|
session: { abort: vi.fn(async () => {}) },
|
|
stream: async function* () {
|
|
yield { type: 'assistant', content: 'this should never be delivered' };
|
|
yield { type: 'result', success: true, result: 'this should never be delivered' };
|
|
},
|
|
}));
|
|
|
|
const msg: InboundMessage = {
|
|
channel: 'discord',
|
|
chatId: 'chat-1',
|
|
userId: 'user-1',
|
|
text: 'hello',
|
|
timestamp: new Date(),
|
|
};
|
|
|
|
(bot as any).cancelledKeys.add('shared');
|
|
await (bot as any).processMessage(msg, adapter);
|
|
|
|
expect(adapter.sendMessage).not.toHaveBeenCalled();
|
|
expect(adapter.editMessage).not.toHaveBeenCalled();
|
|
});
|
|
});
|