ci(test): split unit vs API integration tests (#797)
This commit is contained in:
@@ -1,491 +0,0 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import { spawn } from "node:child_process";
|
||||
import type {
|
||||
ControlResponse,
|
||||
ErrorMessage,
|
||||
ResultMessage,
|
||||
StreamEvent,
|
||||
SystemInitMessage,
|
||||
WireMessage,
|
||||
} from "../types/protocol";
|
||||
|
||||
/**
|
||||
* Tests for --input-format stream-json bidirectional communication.
|
||||
* These verify the CLI's wire format for bidirectional communication.
|
||||
*/
|
||||
|
||||
// Prescriptive prompt to ensure single-step response without tool use
|
||||
const FAST_PROMPT =
|
||||
"This is a test. Do not call any tools. Just respond with the word OK and nothing else.";
|
||||
|
||||
/**
|
||||
* Helper to run bidirectional commands with stdin input.
|
||||
* Event-driven: waits for init message before sending input, waits for result before closing.
|
||||
*/
|
||||
async function runBidirectional(
|
||||
inputs: string[],
|
||||
extraArgs: string[] = [],
|
||||
timeoutMs = 180000, // 180s timeout - CI can be very slow
|
||||
): Promise<object[]> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const proc = spawn(
|
||||
"bun",
|
||||
[
|
||||
"run",
|
||||
"dev",
|
||||
"-p",
|
||||
"--input-format",
|
||||
"stream-json",
|
||||
"--output-format",
|
||||
"stream-json",
|
||||
"--new-agent",
|
||||
"-m",
|
||||
"haiku",
|
||||
"--yolo",
|
||||
...extraArgs,
|
||||
],
|
||||
{
|
||||
cwd: process.cwd(),
|
||||
// Mark as subagent to prevent polluting user's LRU settings
|
||||
env: { ...process.env, LETTA_CODE_AGENT_ROLE: "subagent" },
|
||||
},
|
||||
);
|
||||
|
||||
const objects: object[] = [];
|
||||
let buffer = "";
|
||||
let inputIndex = 0;
|
||||
let initReceived = false;
|
||||
let closing = false;
|
||||
|
||||
// Count expected responses based on input types
|
||||
const inputTypes = inputs.map((i) => {
|
||||
try {
|
||||
const parsed = JSON.parse(i);
|
||||
return parsed.type;
|
||||
} catch {
|
||||
return "invalid"; // Invalid JSON
|
||||
}
|
||||
});
|
||||
const expectedUserResults = inputTypes.filter((t) => t === "user").length;
|
||||
const expectedControlResponses = inputTypes.filter(
|
||||
(t) => t === "control_request",
|
||||
).length;
|
||||
const hasInvalidInput = inputTypes.includes("invalid");
|
||||
|
||||
let userResultsReceived = 0;
|
||||
let controlResponsesReceived = 0;
|
||||
|
||||
const maybeClose = () => {
|
||||
if (closing) return;
|
||||
|
||||
// For invalid input, close after receiving error
|
||||
// For control requests only, close after all control_responses
|
||||
// For user messages, close after all results
|
||||
// For mixed, close when we have all expected responses
|
||||
|
||||
const allUserResultsDone =
|
||||
expectedUserResults === 0 || userResultsReceived >= expectedUserResults;
|
||||
const allControlResponsesDone =
|
||||
expectedControlResponses === 0 ||
|
||||
controlResponsesReceived >= expectedControlResponses;
|
||||
const allInputsSent = inputIndex >= inputs.length;
|
||||
|
||||
if (allInputsSent && allUserResultsDone && allControlResponsesDone) {
|
||||
closing = true;
|
||||
setTimeout(() => proc.stdin?.end(), 500);
|
||||
}
|
||||
};
|
||||
|
||||
const processLine = (line: string) => {
|
||||
if (!line.trim()) return;
|
||||
try {
|
||||
const obj = JSON.parse(line);
|
||||
objects.push(obj);
|
||||
|
||||
// Check for init message - signal to start sending inputs
|
||||
if (obj.type === "system" && obj.subtype === "init" && !initReceived) {
|
||||
initReceived = true;
|
||||
sendNextInput();
|
||||
}
|
||||
|
||||
// Check for control_response
|
||||
if (obj.type === "control_response") {
|
||||
controlResponsesReceived++;
|
||||
maybeClose();
|
||||
}
|
||||
|
||||
// Check for result message
|
||||
if (obj.type === "result") {
|
||||
userResultsReceived++;
|
||||
// If more inputs to send, send next after a brief delay
|
||||
// This gives the CLI time to be ready for the next input
|
||||
if (inputIndex < inputs.length) {
|
||||
setTimeout(sendNextInput, 200);
|
||||
}
|
||||
// Always check if we should close (might have received all expected results)
|
||||
maybeClose();
|
||||
}
|
||||
|
||||
// Check for error message (for invalid JSON input test)
|
||||
if (obj.type === "error" && hasInvalidInput) {
|
||||
closing = true;
|
||||
setTimeout(() => proc.stdin?.end(), 500);
|
||||
}
|
||||
} catch {
|
||||
// Not valid JSON, ignore
|
||||
}
|
||||
};
|
||||
|
||||
const sendNextInput = () => {
|
||||
if (inputIndex < inputs.length) {
|
||||
proc.stdin?.write(`${inputs[inputIndex]}\n`);
|
||||
inputIndex++;
|
||||
}
|
||||
};
|
||||
|
||||
proc.stdout?.on("data", (data) => {
|
||||
buffer += data.toString();
|
||||
const lines = buffer.split("\n");
|
||||
buffer = lines.pop() || ""; // Keep incomplete line in buffer
|
||||
for (const line of lines) {
|
||||
processLine(line);
|
||||
}
|
||||
});
|
||||
|
||||
let stderr = "";
|
||||
proc.stderr?.on("data", (data) => {
|
||||
stderr += data.toString();
|
||||
});
|
||||
|
||||
proc.on("close", (code) => {
|
||||
// Process any remaining buffer
|
||||
if (buffer.trim()) {
|
||||
processLine(buffer);
|
||||
}
|
||||
|
||||
// Check if we got enough results
|
||||
const gotExpectedResults =
|
||||
userResultsReceived >= expectedUserResults &&
|
||||
controlResponsesReceived >= expectedControlResponses;
|
||||
|
||||
if (objects.length === 0 && code !== 0) {
|
||||
reject(
|
||||
new Error(
|
||||
`Process exited with code ${code}, no output received. stderr: ${stderr}`,
|
||||
),
|
||||
);
|
||||
} else if (!gotExpectedResults && code !== 0) {
|
||||
reject(
|
||||
new Error(
|
||||
`Process exited with code ${code} before all results received. ` +
|
||||
`Got ${userResultsReceived}/${expectedUserResults} user results, ` +
|
||||
`${controlResponsesReceived}/${expectedControlResponses} control responses. ` +
|
||||
`inputIndex: ${inputIndex}, initReceived: ${initReceived}. stderr: ${stderr}`,
|
||||
),
|
||||
);
|
||||
} else {
|
||||
resolve(objects);
|
||||
}
|
||||
});
|
||||
|
||||
// Safety timeout
|
||||
const timeout = setTimeout(() => {
|
||||
proc.kill();
|
||||
reject(
|
||||
new Error(
|
||||
`Timeout after ${timeoutMs}ms. Received ${objects.length} objects, init: ${initReceived}, userResults: ${userResultsReceived}/${expectedUserResults}, controlResponses: ${controlResponsesReceived}/${expectedControlResponses}`,
|
||||
),
|
||||
);
|
||||
}, timeoutMs);
|
||||
|
||||
proc.on("close", () => clearTimeout(timeout));
|
||||
});
|
||||
}
|
||||
|
||||
describe("input-format stream-json", () => {
|
||||
test(
|
||||
"initialize control request returns session info",
|
||||
async () => {
|
||||
const objects = (await runBidirectional([
|
||||
JSON.stringify({
|
||||
type: "control_request",
|
||||
request_id: "init_1",
|
||||
request: { subtype: "initialize" },
|
||||
}),
|
||||
])) as WireMessage[];
|
||||
|
||||
// Should have init event
|
||||
const initEvent = objects.find(
|
||||
(o): o is SystemInitMessage =>
|
||||
o.type === "system" && "subtype" in o && o.subtype === "init",
|
||||
);
|
||||
expect(initEvent).toBeDefined();
|
||||
expect(initEvent?.agent_id).toBeDefined();
|
||||
expect(initEvent?.session_id).toBeDefined();
|
||||
expect(initEvent?.model).toBeDefined();
|
||||
expect(initEvent?.tools).toBeInstanceOf(Array);
|
||||
|
||||
// Should have control_response
|
||||
const controlResponse = objects.find(
|
||||
(o): o is ControlResponse => o.type === "control_response",
|
||||
);
|
||||
expect(controlResponse).toBeDefined();
|
||||
expect(controlResponse?.response.subtype).toBe("success");
|
||||
expect(controlResponse?.response.request_id).toBe("init_1");
|
||||
if (controlResponse?.response.subtype === "success") {
|
||||
const initResponse = controlResponse.response.response as
|
||||
| { agent_id?: string }
|
||||
| undefined;
|
||||
expect(initResponse?.agent_id).toBeDefined();
|
||||
}
|
||||
},
|
||||
{ timeout: 200000 },
|
||||
);
|
||||
|
||||
test(
|
||||
"user message returns assistant response and result",
|
||||
async () => {
|
||||
const objects = (await runBidirectional([
|
||||
JSON.stringify({
|
||||
type: "user",
|
||||
message: { role: "user", content: FAST_PROMPT },
|
||||
}),
|
||||
])) as WireMessage[];
|
||||
|
||||
// Should have init event
|
||||
const initEvent = objects.find(
|
||||
(o): o is SystemInitMessage =>
|
||||
o.type === "system" && "subtype" in o && o.subtype === "init",
|
||||
);
|
||||
expect(initEvent).toBeDefined();
|
||||
|
||||
// Should have message events
|
||||
const messageEvents = objects.filter(
|
||||
(o): o is WireMessage & { type: "message" } => o.type === "message",
|
||||
);
|
||||
expect(messageEvents.length).toBeGreaterThan(0);
|
||||
|
||||
// All messages should have session_id
|
||||
// uuid is present on content messages (reasoning, assistant) but not meta messages (stop_reason, usage_statistics)
|
||||
for (const msg of messageEvents) {
|
||||
expect(msg.session_id).toBeDefined();
|
||||
}
|
||||
|
||||
// Content messages should have uuid
|
||||
const contentMessages = messageEvents.filter(
|
||||
(m) =>
|
||||
"message_type" in m &&
|
||||
(m.message_type === "reasoning_message" ||
|
||||
m.message_type === "assistant_message"),
|
||||
);
|
||||
for (const msg of contentMessages) {
|
||||
expect(msg.uuid).toBeDefined();
|
||||
}
|
||||
|
||||
// Should have result
|
||||
const result = objects.find(
|
||||
(o): o is ResultMessage => o.type === "result",
|
||||
);
|
||||
expect(result).toBeDefined();
|
||||
expect(result?.subtype).toBe("success");
|
||||
expect(result?.session_id).toBeDefined();
|
||||
expect(result?.agent_id).toBeDefined();
|
||||
expect(result?.duration_ms).toBeGreaterThan(0);
|
||||
},
|
||||
{ timeout: 200000 },
|
||||
);
|
||||
|
||||
test(
|
||||
"multi-turn conversation maintains context",
|
||||
async () => {
|
||||
// Multi-turn test needs 2 sequential LLM calls, so allow more time
|
||||
const objects = (await runBidirectional(
|
||||
[
|
||||
JSON.stringify({
|
||||
type: "user",
|
||||
message: {
|
||||
role: "user",
|
||||
content: "Say hello",
|
||||
},
|
||||
}),
|
||||
JSON.stringify({
|
||||
type: "user",
|
||||
message: {
|
||||
role: "user",
|
||||
content: "Say goodbye",
|
||||
},
|
||||
}),
|
||||
],
|
||||
[], // no extra args
|
||||
300000, // 300s for 2 sequential LLM calls - CI can be very slow
|
||||
)) as WireMessage[];
|
||||
|
||||
// Should have at least two results (one per turn)
|
||||
const results = objects.filter(
|
||||
(o): o is ResultMessage => o.type === "result",
|
||||
);
|
||||
expect(results.length).toBeGreaterThanOrEqual(2);
|
||||
|
||||
// Both results should be successful
|
||||
for (const result of results) {
|
||||
expect(result.subtype).toBe("success");
|
||||
expect(result.session_id).toBeDefined();
|
||||
expect(result.agent_id).toBeDefined();
|
||||
}
|
||||
|
||||
// The session_id should be consistent across turns (same agent)
|
||||
const firstResult = results[0];
|
||||
const lastResult = results[results.length - 1];
|
||||
expect(firstResult).toBeDefined();
|
||||
expect(lastResult).toBeDefined();
|
||||
if (firstResult && lastResult) {
|
||||
expect(firstResult.session_id).toBe(lastResult.session_id);
|
||||
}
|
||||
},
|
||||
{ timeout: 320000 },
|
||||
);
|
||||
|
||||
test(
|
||||
"interrupt control request is acknowledged",
|
||||
async () => {
|
||||
const objects = (await runBidirectional([
|
||||
JSON.stringify({
|
||||
type: "control_request",
|
||||
request_id: "int_1",
|
||||
request: { subtype: "interrupt" },
|
||||
}),
|
||||
])) as WireMessage[];
|
||||
|
||||
// Should have control_response for interrupt
|
||||
const controlResponse = objects.find(
|
||||
(o): o is ControlResponse =>
|
||||
o.type === "control_response" && o.response?.request_id === "int_1",
|
||||
);
|
||||
expect(controlResponse).toBeDefined();
|
||||
expect(controlResponse?.response.subtype).toBe("success");
|
||||
},
|
||||
{ timeout: 200000 },
|
||||
);
|
||||
|
||||
test(
|
||||
"--include-partial-messages emits stream_event in bidirectional mode",
|
||||
async () => {
|
||||
const objects = (await runBidirectional(
|
||||
[
|
||||
JSON.stringify({
|
||||
type: "user",
|
||||
message: { role: "user", content: FAST_PROMPT },
|
||||
}),
|
||||
],
|
||||
["--include-partial-messages"],
|
||||
)) as WireMessage[];
|
||||
|
||||
// Should have stream_event messages (not just "message" type)
|
||||
const streamEvents = objects.filter(
|
||||
(o): o is StreamEvent => o.type === "stream_event",
|
||||
);
|
||||
expect(streamEvents.length).toBeGreaterThan(0);
|
||||
|
||||
// Each stream_event should have the event payload and session_id
|
||||
// uuid is present on content events but not meta events (stop_reason, usage_statistics)
|
||||
for (const event of streamEvents) {
|
||||
expect(event.event).toBeDefined();
|
||||
expect(event.session_id).toBeDefined();
|
||||
}
|
||||
|
||||
// Content events should have uuid
|
||||
const contentEvents = streamEvents.filter(
|
||||
(e) =>
|
||||
"message_type" in e.event &&
|
||||
(e.event.message_type === "reasoning_message" ||
|
||||
e.event.message_type === "assistant_message"),
|
||||
);
|
||||
for (const event of contentEvents) {
|
||||
expect(event.uuid).toBeDefined();
|
||||
}
|
||||
|
||||
// Should still have result
|
||||
const result = objects.find(
|
||||
(o): o is ResultMessage => o.type === "result",
|
||||
);
|
||||
expect(result).toBeDefined();
|
||||
expect(result?.subtype).toBe("success");
|
||||
},
|
||||
{ timeout: 200000 },
|
||||
);
|
||||
|
||||
test(
|
||||
"unknown control request returns error",
|
||||
async () => {
|
||||
const objects = (await runBidirectional([
|
||||
JSON.stringify({
|
||||
type: "control_request",
|
||||
request_id: "unknown_1",
|
||||
request: { subtype: "unknown_subtype" },
|
||||
}),
|
||||
])) as WireMessage[];
|
||||
|
||||
// Should have control_response with error
|
||||
const controlResponse = objects.find(
|
||||
(o): o is ControlResponse =>
|
||||
o.type === "control_response" &&
|
||||
o.response?.request_id === "unknown_1",
|
||||
);
|
||||
expect(controlResponse).toBeDefined();
|
||||
expect(controlResponse?.response.subtype).toBe("error");
|
||||
},
|
||||
{ timeout: 200000 },
|
||||
);
|
||||
|
||||
test(
|
||||
"invalid JSON input returns error message",
|
||||
async () => {
|
||||
// Use raw string instead of JSON
|
||||
const objects = (await runBidirectional([
|
||||
"not valid json",
|
||||
])) as WireMessage[];
|
||||
|
||||
// Should have error message
|
||||
const errorMsg = objects.find(
|
||||
(o): o is ErrorMessage => o.type === "error",
|
||||
);
|
||||
expect(errorMsg).toBeDefined();
|
||||
expect(errorMsg?.message).toContain("Invalid JSON");
|
||||
},
|
||||
{ timeout: 200000 },
|
||||
);
|
||||
|
||||
test(
|
||||
"Task tool with explore subagent works",
|
||||
async () => {
|
||||
// Prescriptive prompt to ensure Task tool is used
|
||||
const objects = (await runBidirectional(
|
||||
[
|
||||
JSON.stringify({
|
||||
type: "user",
|
||||
message: {
|
||||
role: "user",
|
||||
content:
|
||||
"You MUST use the Task tool with subagent_type='explore' to find TypeScript files (*.ts) in the src directory. " +
|
||||
"Return only the subagent's report, nothing else.",
|
||||
},
|
||||
}),
|
||||
],
|
||||
[],
|
||||
300000, // 5 min timeout - subagent spawn + execution can be slow
|
||||
)) as WireMessage[];
|
||||
|
||||
// Should have a successful result
|
||||
const result = objects.find(
|
||||
(o): o is ResultMessage => o.type === "result",
|
||||
);
|
||||
expect(result).toBeDefined();
|
||||
expect(result?.subtype).toBe("success");
|
||||
|
||||
// Should have auto_approval events (Task tool was auto-approved via --yolo)
|
||||
const autoApprovals = objects.filter((o) => o.type === "auto_approval");
|
||||
expect(autoApprovals.length).toBeGreaterThan(0);
|
||||
},
|
||||
{ timeout: 320000 },
|
||||
);
|
||||
});
|
||||
@@ -1,223 +0,0 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import { spawn } from "node:child_process";
|
||||
import type {
|
||||
ResultMessage,
|
||||
StreamEvent,
|
||||
SystemInitMessage,
|
||||
} from "../types/protocol";
|
||||
|
||||
/**
|
||||
* Tests for stream-json output format.
|
||||
* These verify the message structure matches the wire format types.
|
||||
*/
|
||||
|
||||
async function runHeadlessCommand(
|
||||
prompt: string,
|
||||
extraArgs: string[] = [],
|
||||
timeoutMs = 180000, // 180s timeout - CI can be very slow
|
||||
): Promise<string[]> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const proc = spawn(
|
||||
"bun",
|
||||
[
|
||||
"run",
|
||||
"dev",
|
||||
"--new-agent",
|
||||
"-p",
|
||||
prompt,
|
||||
"--output-format",
|
||||
"stream-json",
|
||||
"--yolo",
|
||||
"-m",
|
||||
"haiku",
|
||||
...extraArgs,
|
||||
],
|
||||
{
|
||||
cwd: process.cwd(),
|
||||
// Mark as subagent to prevent polluting user's LRU settings
|
||||
env: { ...process.env, LETTA_CODE_AGENT_ROLE: "subagent" },
|
||||
},
|
||||
);
|
||||
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
|
||||
proc.stdout.on("data", (data) => {
|
||||
stdout += data.toString();
|
||||
});
|
||||
|
||||
proc.stderr.on("data", (data) => {
|
||||
stderr += data.toString();
|
||||
});
|
||||
|
||||
// Safety timeout for CI
|
||||
const timeout = setTimeout(() => {
|
||||
proc.kill();
|
||||
reject(new Error(`Process timeout after ${timeoutMs}ms: ${stderr}`));
|
||||
}, timeoutMs);
|
||||
|
||||
proc.on("close", (code) => {
|
||||
clearTimeout(timeout);
|
||||
if (code !== 0 && !stdout.includes('"type":"result"')) {
|
||||
reject(new Error(`Process exited with code ${code}: ${stderr}`));
|
||||
} else {
|
||||
// Parse line-delimited JSON
|
||||
const lines = stdout
|
||||
.split("\n")
|
||||
.filter((line) => line.trim())
|
||||
.filter((line) => {
|
||||
try {
|
||||
JSON.parse(line);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
});
|
||||
resolve(lines);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Prescriptive prompt to ensure single-step response without tool use
|
||||
const FAST_PROMPT =
|
||||
"This is a test. Do not call any tools. Just respond with the word OK and nothing else.";
|
||||
|
||||
describe("stream-json format", () => {
|
||||
test(
|
||||
"init message has type 'system' with subtype 'init'",
|
||||
async () => {
|
||||
const lines = await runHeadlessCommand(FAST_PROMPT);
|
||||
const initLine = lines.find((line) => {
|
||||
const obj = JSON.parse(line);
|
||||
return obj.type === "system" && obj.subtype === "init";
|
||||
});
|
||||
|
||||
expect(initLine).toBeDefined();
|
||||
if (!initLine) throw new Error("initLine not found");
|
||||
|
||||
const init = JSON.parse(initLine) as SystemInitMessage;
|
||||
expect(init.type).toBe("system");
|
||||
expect(init.subtype).toBe("init");
|
||||
expect(init.agent_id).toBeDefined();
|
||||
expect(init.session_id).toBe(init.agent_id); // session_id should equal agent_id
|
||||
expect(init.model).toBeDefined();
|
||||
expect(init.tools).toBeInstanceOf(Array);
|
||||
expect(init.cwd).toBeDefined();
|
||||
expect(init.uuid).toBe(`init-${init.agent_id}`);
|
||||
},
|
||||
{ timeout: 200000 },
|
||||
);
|
||||
|
||||
test(
|
||||
"messages have session_id and uuid",
|
||||
async () => {
|
||||
const lines = await runHeadlessCommand(FAST_PROMPT);
|
||||
|
||||
// Find a message line
|
||||
const messageLine = lines.find((line) => {
|
||||
const obj = JSON.parse(line);
|
||||
return obj.type === "message";
|
||||
});
|
||||
|
||||
expect(messageLine).toBeDefined();
|
||||
if (!messageLine) throw new Error("messageLine not found");
|
||||
|
||||
const msg = JSON.parse(messageLine) as {
|
||||
session_id: string;
|
||||
uuid: string;
|
||||
};
|
||||
expect(msg.session_id).toBeDefined();
|
||||
expect(msg.uuid).toBeDefined();
|
||||
// uuid should be otid or id from the Letta SDK chunk
|
||||
expect(msg.uuid).toBeTruthy();
|
||||
},
|
||||
{ timeout: 200000 },
|
||||
);
|
||||
|
||||
test(
|
||||
"result message has correct format",
|
||||
async () => {
|
||||
const lines = await runHeadlessCommand(FAST_PROMPT);
|
||||
const resultLine = lines.find((line) => {
|
||||
const obj = JSON.parse(line);
|
||||
return obj.type === "result";
|
||||
});
|
||||
|
||||
expect(resultLine).toBeDefined();
|
||||
if (!resultLine) throw new Error("resultLine not found");
|
||||
|
||||
const result = JSON.parse(resultLine) as ResultMessage & { uuid: string };
|
||||
expect(result.type).toBe("result");
|
||||
expect(result.subtype).toBe("success");
|
||||
expect(result.session_id).toBeDefined();
|
||||
expect(result.agent_id).toBeDefined();
|
||||
expect(result.session_id).toBe(result.agent_id);
|
||||
expect(result.duration_ms).toBeGreaterThan(0);
|
||||
expect(result.uuid).toContain("result-");
|
||||
expect(result.result).toBeDefined();
|
||||
},
|
||||
{ timeout: 200000 },
|
||||
);
|
||||
|
||||
test(
|
||||
"--include-partial-messages wraps chunks in stream_event",
|
||||
async () => {
|
||||
const lines = await runHeadlessCommand(FAST_PROMPT, [
|
||||
"--include-partial-messages",
|
||||
]);
|
||||
|
||||
// Find a stream_event line
|
||||
const streamEventLine = lines.find((line) => {
|
||||
const obj = JSON.parse(line);
|
||||
return obj.type === "stream_event";
|
||||
});
|
||||
|
||||
expect(streamEventLine).toBeDefined();
|
||||
if (!streamEventLine) throw new Error("streamEventLine not found");
|
||||
|
||||
const event = JSON.parse(streamEventLine) as StreamEvent;
|
||||
expect(event.type).toBe("stream_event");
|
||||
expect(event.event).toBeDefined();
|
||||
expect(event.session_id).toBeDefined();
|
||||
expect(event.uuid).toBeDefined();
|
||||
// The event should contain the original Letta SDK chunk
|
||||
expect("message_type" in event.event).toBe(true);
|
||||
},
|
||||
{ timeout: 200000 },
|
||||
);
|
||||
|
||||
test(
|
||||
"without --include-partial-messages, messages are type 'message'",
|
||||
async () => {
|
||||
const lines = await runHeadlessCommand(FAST_PROMPT);
|
||||
|
||||
// Should have message lines, not stream_event
|
||||
const messageLines = lines.filter((line) => {
|
||||
const obj = JSON.parse(line);
|
||||
return obj.type === "message";
|
||||
});
|
||||
|
||||
const streamEventLines = lines.filter((line) => {
|
||||
const obj = JSON.parse(line);
|
||||
return obj.type === "stream_event";
|
||||
});
|
||||
|
||||
// We should have some message lines (reasoning, assistant, stop_reason, etc.)
|
||||
// In rare cases with very fast responses, we might only get init + result
|
||||
// So check that IF we have content, it's "message" not "stream_event"
|
||||
if (messageLines.length > 0 || streamEventLines.length > 0) {
|
||||
expect(messageLines.length).toBeGreaterThan(0);
|
||||
expect(streamEventLines.length).toBe(0);
|
||||
}
|
||||
|
||||
// Always should have a result
|
||||
const resultLine = lines.find((line) => {
|
||||
const obj = JSON.parse(line);
|
||||
return obj.type === "result";
|
||||
});
|
||||
expect(resultLine).toBeDefined();
|
||||
},
|
||||
{ timeout: 200000 },
|
||||
);
|
||||
});
|
||||
@@ -1,257 +0,0 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import { spawn } from "node:child_process";
|
||||
|
||||
/**
|
||||
* Integration test for lazy approval recovery (LET-7101).
|
||||
*
|
||||
* NOTE: The lazy approval recovery is primarily designed for TUI mode where:
|
||||
* 1. User has a session with pending approvals (e.g., from a previous run)
|
||||
* 2. User sends a new message before responding to the approval
|
||||
* 3. Server returns CONFLICT error
|
||||
* 4. CLI recovers by auto-denying stale approvals and retrying
|
||||
*
|
||||
* In bidirectional mode, messages sent during permission wait are dropped
|
||||
* (see headless.ts line 1710-1714), so we can't directly test the CONFLICT
|
||||
* scenario here. This test validates that the flow doesn't crash when
|
||||
* messages are sent while approvals are pending.
|
||||
*
|
||||
* The RecoveryMessage emission can be tested by:
|
||||
* 1. Manual testing in TUI mode (start session with orphaned approval)
|
||||
* 2. Or by modifying headless mode to not drop messages during permission wait
|
||||
*/
|
||||
|
||||
// Prompt that will trigger a Bash tool call requiring approval
|
||||
const BASH_TRIGGER_PROMPT =
|
||||
"Run this exact bash command: echo test123. Do not use any other tools.";
|
||||
|
||||
// Second message to send while approval is pending
|
||||
const INTERRUPT_MESSAGE =
|
||||
"Actually, just say OK instead. Do not call any tools.";
|
||||
|
||||
interface StreamMessage {
|
||||
type: string;
|
||||
subtype?: string;
|
||||
message_type?: string;
|
||||
stop_reason?: string;
|
||||
// biome-ignore lint/suspicious/noExplicitAny: index signature for arbitrary JSON fields
|
||||
[key: string]: any;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run bidirectional test with custom message handling.
|
||||
* Allows sending messages at specific points in the flow.
|
||||
*/
|
||||
async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{
|
||||
messages: StreamMessage[];
|
||||
success: boolean;
|
||||
errorSeen: boolean;
|
||||
}> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const proc = spawn(
|
||||
"bun",
|
||||
[
|
||||
"run",
|
||||
"dev",
|
||||
"-p",
|
||||
"--input-format",
|
||||
"stream-json",
|
||||
"--output-format",
|
||||
"stream-json",
|
||||
"--new-agent",
|
||||
"-m",
|
||||
"haiku",
|
||||
// NOTE: No --yolo flag - approvals are required
|
||||
],
|
||||
{
|
||||
cwd: process.cwd(),
|
||||
// Mark as subagent to prevent polluting user's LRU settings
|
||||
env: { ...process.env, LETTA_CODE_AGENT_ROLE: "subagent" },
|
||||
},
|
||||
);
|
||||
|
||||
const messages: StreamMessage[] = [];
|
||||
let buffer = "";
|
||||
let initReceived = false;
|
||||
let approvalSeen = false;
|
||||
let interruptSent = false;
|
||||
let errorSeen = false;
|
||||
let resultCount = 0;
|
||||
let closing = false;
|
||||
|
||||
const timeout = setTimeout(() => {
|
||||
if (!closing) {
|
||||
proc.kill();
|
||||
reject(new Error(`Test timed out after ${timeoutMs}ms`));
|
||||
}
|
||||
}, timeoutMs);
|
||||
|
||||
const cleanup = () => {
|
||||
closing = true;
|
||||
clearTimeout(timeout);
|
||||
setTimeout(() => {
|
||||
proc.stdin?.end();
|
||||
proc.kill();
|
||||
}, 500);
|
||||
};
|
||||
|
||||
const processLine = (line: string) => {
|
||||
if (!line.trim()) return;
|
||||
try {
|
||||
const msg: StreamMessage = JSON.parse(line);
|
||||
messages.push(msg);
|
||||
|
||||
// Debug output
|
||||
if (process.env.DEBUG_TEST) {
|
||||
console.log("MSG:", JSON.stringify(msg, null, 2));
|
||||
}
|
||||
|
||||
// Step 1: Wait for init, then send bash trigger prompt
|
||||
if (msg.type === "system" && msg.subtype === "init" && !initReceived) {
|
||||
initReceived = true;
|
||||
const userMsg = JSON.stringify({
|
||||
type: "user",
|
||||
message: { role: "user", content: BASH_TRIGGER_PROMPT },
|
||||
});
|
||||
proc.stdin?.write(`${userMsg}\n`);
|
||||
return;
|
||||
}
|
||||
|
||||
// Step 2: When we see approval request, send another user message instead
|
||||
if (
|
||||
msg.type === "message" &&
|
||||
msg.message_type === "approval_request_message" &&
|
||||
!approvalSeen
|
||||
) {
|
||||
approvalSeen = true;
|
||||
// Wait a moment, then send interrupt message (NOT an approval)
|
||||
setTimeout(() => {
|
||||
if (!interruptSent) {
|
||||
interruptSent = true;
|
||||
const userMsg = JSON.stringify({
|
||||
type: "user",
|
||||
message: { role: "user", content: INTERRUPT_MESSAGE },
|
||||
});
|
||||
proc.stdin?.write(`${userMsg}\n`);
|
||||
}
|
||||
}, 500);
|
||||
return;
|
||||
}
|
||||
|
||||
// Track recovery messages - this is the key signal that lazy recovery worked
|
||||
if (
|
||||
msg.type === "recovery" &&
|
||||
msg.recovery_type === "approval_pending"
|
||||
) {
|
||||
errorSeen = true; // reusing this flag to mean "recovery message seen"
|
||||
}
|
||||
|
||||
// Also track raw errors (shouldn't see these if recovery works properly)
|
||||
if (
|
||||
msg.type === "error" ||
|
||||
(msg.type === "message" && msg.message_type === "error_message")
|
||||
) {
|
||||
const detail = msg.detail || msg.message || "";
|
||||
if (detail.toLowerCase().includes("cannot send a new message")) {
|
||||
// Raw error leaked through - recovery may have failed
|
||||
console.log(
|
||||
"WARNING: Raw CONFLICT error seen (recovery may have failed)",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Track results - we need 2 (one for each user message, though first may fail)
|
||||
if (msg.type === "result") {
|
||||
resultCount++;
|
||||
// After second result (or after seeing error + result), we're done
|
||||
if (resultCount >= 2 || (errorSeen && resultCount >= 1)) {
|
||||
cleanup();
|
||||
resolve({ messages, success: true, errorSeen });
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Not valid JSON, ignore
|
||||
}
|
||||
};
|
||||
|
||||
proc.stdout?.on("data", (data) => {
|
||||
buffer += data.toString();
|
||||
const lines = buffer.split("\n");
|
||||
buffer = lines.pop() || "";
|
||||
for (const line of lines) {
|
||||
processLine(line);
|
||||
}
|
||||
});
|
||||
|
||||
let _stderr = "";
|
||||
proc.stderr?.on("data", (data) => {
|
||||
_stderr += data.toString();
|
||||
});
|
||||
|
||||
proc.on("close", (_code) => {
|
||||
clearTimeout(timeout);
|
||||
// Process any remaining buffer
|
||||
if (buffer.trim()) {
|
||||
processLine(buffer);
|
||||
}
|
||||
|
||||
if (!closing) {
|
||||
// If we got here without resolving, check what we have
|
||||
resolve({
|
||||
messages,
|
||||
success: resultCount > 0,
|
||||
errorSeen,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
proc.on("error", (err) => {
|
||||
clearTimeout(timeout);
|
||||
reject(err);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
describe("lazy approval recovery", () => {
|
||||
test("handles concurrent message while approval is pending", async () => {
|
||||
const result = await runLazyRecoveryTest();
|
||||
|
||||
// Log messages for debugging if test fails
|
||||
if (!result.success) {
|
||||
console.log("All messages received:");
|
||||
for (const msg of result.messages) {
|
||||
console.log(JSON.stringify(msg, null, 2));
|
||||
}
|
||||
}
|
||||
|
||||
// We should have seen the approval request (proves tool requiring approval was called)
|
||||
const approvalRequest = result.messages.find(
|
||||
(m) => m.message_type === "approval_request_message",
|
||||
);
|
||||
expect(approvalRequest).toBeDefined();
|
||||
|
||||
// The test should complete successfully
|
||||
expect(result.success).toBe(true);
|
||||
|
||||
// Count results - we should get at least 1 (the second message should always complete)
|
||||
const resultCount = result.messages.filter(
|
||||
(m) => m.type === "result",
|
||||
).length;
|
||||
expect(resultCount).toBeGreaterThanOrEqual(1);
|
||||
|
||||
// KEY ASSERTION: Check if we saw the recovery message
|
||||
// This proves the lazy recovery mechanism was triggered
|
||||
const recoveryMessage = result.messages.find(
|
||||
(m) => m.type === "recovery" && m.recovery_type === "approval_pending",
|
||||
);
|
||||
if (recoveryMessage) {
|
||||
console.log("Recovery message detected - lazy recovery worked correctly");
|
||||
expect(result.errorSeen).toBe(true); // Should have been set when we saw recovery
|
||||
} else {
|
||||
// Recovery might not be triggered if approval was auto-handled before second message
|
||||
// This can happen due to timing - the test still validates the flow works
|
||||
console.log(
|
||||
"Note: No recovery message seen - approval may have been handled before conflict",
|
||||
);
|
||||
}
|
||||
}, 180000); // 3 minute timeout for CI
|
||||
});
|
||||
@@ -2,22 +2,14 @@ import { describe, expect, test } from "bun:test";
|
||||
import { spawn } from "node:child_process";
|
||||
|
||||
/**
|
||||
* Integration tests for CLI startup flows.
|
||||
* Startup flow tests that validate flag conflict handling.
|
||||
*
|
||||
* These tests verify the boot flow decision tree:
|
||||
* - Flag conflict detection
|
||||
* - --conversation: derives agent from conversation
|
||||
* - --agent: uses specified agent
|
||||
* - --new-agent: creates new agent
|
||||
* - Error messages for invalid inputs
|
||||
*
|
||||
* Note: Tests that depend on settings files (.letta/) are harder to isolate
|
||||
* because the CLI uses process.cwd(). For now, we focus on flag-based tests.
|
||||
* These must remain runnable in fork PR CI (no secrets), so they should not
|
||||
* require a working Letta server or LETTA_API_KEY.
|
||||
*/
|
||||
|
||||
const projectRoot = process.cwd();
|
||||
|
||||
// Helper to run CLI and capture output
|
||||
async function runCli(
|
||||
args: string[],
|
||||
options: {
|
||||
@@ -74,10 +66,6 @@ async function runCli(
|
||||
});
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Flag Conflict Tests (fast, no API calls needed)
|
||||
// ============================================================================
|
||||
|
||||
describe("Startup Flow - Flag Conflicts", () => {
|
||||
test("--conversation conflicts with --agent", async () => {
|
||||
const result = await runCli(
|
||||
@@ -136,227 +124,3 @@ describe("Startup Flow - Flag Conflicts", () => {
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Invalid Input Tests (require API calls but fail fast)
|
||||
// ============================================================================
|
||||
|
||||
describe("Startup Flow - Invalid Inputs", () => {
|
||||
test(
|
||||
"--agent with nonexistent ID shows error",
|
||||
async () => {
|
||||
const result = await runCli(
|
||||
["--agent", "agent-definitely-does-not-exist-12345", "-p", "test"],
|
||||
{ expectExit: 1, timeoutMs: 60000 },
|
||||
);
|
||||
expect(result.stderr).toContain("not found");
|
||||
},
|
||||
{ timeout: 70000 },
|
||||
);
|
||||
|
||||
test(
|
||||
"--conversation with nonexistent ID shows error",
|
||||
async () => {
|
||||
const result = await runCli(
|
||||
[
|
||||
"--conversation",
|
||||
"conversation-definitely-does-not-exist-12345",
|
||||
"-p",
|
||||
"test",
|
||||
],
|
||||
{ expectExit: 1, timeoutMs: 60000 },
|
||||
);
|
||||
expect(result.stderr).toContain("not found");
|
||||
},
|
||||
{ timeout: 70000 },
|
||||
);
|
||||
|
||||
test("--from-af with nonexistent file shows error", async () => {
|
||||
const result = await runCli(
|
||||
["--from-af", "/nonexistent/path/agent.af", "-p", "test"],
|
||||
{ expectExit: 1 },
|
||||
);
|
||||
expect(result.stderr).toContain("not found");
|
||||
});
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Integration Tests (require API access, create real agents)
|
||||
// ============================================================================
|
||||
|
||||
describe("Startup Flow - Integration", () => {
|
||||
// Store created agent/conversation IDs for cleanup and reuse
|
||||
let testAgentId: string | null = null;
|
||||
|
||||
test(
|
||||
"--new-agent creates agent and responds",
|
||||
async () => {
|
||||
const result = await runCli(
|
||||
[
|
||||
"--new-agent",
|
||||
"-m",
|
||||
"haiku",
|
||||
"-p",
|
||||
"Say OK and nothing else",
|
||||
"--output-format",
|
||||
"json",
|
||||
],
|
||||
{ timeoutMs: 120000 },
|
||||
);
|
||||
|
||||
expect(result.exitCode).toBe(0);
|
||||
// stdout includes the bun invocation line, extract just the JSON
|
||||
const jsonStart = result.stdout.indexOf("{");
|
||||
const output = JSON.parse(result.stdout.slice(jsonStart));
|
||||
expect(output.agent_id).toBeDefined();
|
||||
expect(output.result).toBeDefined();
|
||||
|
||||
// Save for later tests
|
||||
testAgentId = output.agent_id;
|
||||
},
|
||||
{ timeout: 130000 },
|
||||
);
|
||||
|
||||
test(
|
||||
"--agent with valid ID uses that agent",
|
||||
async () => {
|
||||
// Skip if previous test didn't create an agent
|
||||
if (!testAgentId) {
|
||||
console.log("Skipping: no test agent available");
|
||||
return;
|
||||
}
|
||||
|
||||
const result = await runCli(
|
||||
[
|
||||
"--agent",
|
||||
testAgentId,
|
||||
"-m",
|
||||
"haiku",
|
||||
"-p",
|
||||
"Say OK",
|
||||
"--output-format",
|
||||
"json",
|
||||
],
|
||||
{ timeoutMs: 120000 },
|
||||
);
|
||||
|
||||
expect(result.exitCode).toBe(0);
|
||||
const jsonStart = result.stdout.indexOf("{");
|
||||
const output = JSON.parse(result.stdout.slice(jsonStart));
|
||||
expect(output.agent_id).toBe(testAgentId);
|
||||
},
|
||||
{ timeout: 130000 },
|
||||
);
|
||||
|
||||
test(
|
||||
"--conversation with valid ID derives agent and uses conversation",
|
||||
async () => {
|
||||
// Skip if previous test didn't create an agent
|
||||
if (!testAgentId) {
|
||||
console.log("Skipping: no test agent available");
|
||||
return;
|
||||
}
|
||||
|
||||
// First, create a real conversation with --new (since --new-agent uses "default")
|
||||
const createResult = await runCli(
|
||||
[
|
||||
"--agent",
|
||||
testAgentId,
|
||||
"--new",
|
||||
"-m",
|
||||
"haiku",
|
||||
"-p",
|
||||
"Say CREATED",
|
||||
"--output-format",
|
||||
"json",
|
||||
],
|
||||
{ timeoutMs: 120000 },
|
||||
);
|
||||
expect(createResult.exitCode).toBe(0);
|
||||
const createJsonStart = createResult.stdout.indexOf("{");
|
||||
const createOutput = JSON.parse(
|
||||
createResult.stdout.slice(createJsonStart),
|
||||
);
|
||||
const realConversationId = createOutput.conversation_id;
|
||||
expect(realConversationId).toBeDefined();
|
||||
expect(realConversationId).not.toBe("default");
|
||||
|
||||
// Now test that --conversation can derive the agent from this conversation
|
||||
const result = await runCli(
|
||||
[
|
||||
"--conversation",
|
||||
realConversationId,
|
||||
"-m",
|
||||
"haiku",
|
||||
"-p",
|
||||
"Say OK",
|
||||
"--output-format",
|
||||
"json",
|
||||
],
|
||||
{ timeoutMs: 120000 },
|
||||
);
|
||||
|
||||
expect(result.exitCode).toBe(0);
|
||||
const jsonStart = result.stdout.indexOf("{");
|
||||
const output = JSON.parse(result.stdout.slice(jsonStart));
|
||||
// Should use the same agent that owns the conversation
|
||||
expect(output.agent_id).toBe(testAgentId);
|
||||
// Should use the specified conversation
|
||||
expect(output.conversation_id).toBe(realConversationId);
|
||||
},
|
||||
{ timeout: 180000 },
|
||||
);
|
||||
|
||||
test(
|
||||
"--new-agent with --init-blocks none creates minimal agent",
|
||||
async () => {
|
||||
const result = await runCli(
|
||||
[
|
||||
"--new-agent",
|
||||
"--init-blocks",
|
||||
"none",
|
||||
"-m",
|
||||
"haiku",
|
||||
"-p",
|
||||
"Say OK",
|
||||
"--output-format",
|
||||
"json",
|
||||
],
|
||||
{ timeoutMs: 120000 },
|
||||
);
|
||||
|
||||
expect(result.exitCode).toBe(0);
|
||||
// stdout includes the bun invocation line, extract just the JSON
|
||||
const jsonStart = result.stdout.indexOf("{");
|
||||
const output = JSON.parse(result.stdout.slice(jsonStart));
|
||||
expect(output.agent_id).toBeDefined();
|
||||
},
|
||||
{ timeout: 130000 },
|
||||
);
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// --continue Tests (depend on LRU state, harder to isolate)
|
||||
// ============================================================================
|
||||
|
||||
describe("Startup Flow - Continue Flag", () => {
|
||||
test(
|
||||
"--continue with no LRU shows error",
|
||||
async () => {
|
||||
// This test relies on running in a directory with no .letta/ settings
|
||||
// In practice, this might use the project's .letta/ which has an LRU
|
||||
// So we check for either success (if LRU exists) or error (if not)
|
||||
const result = await runCli(
|
||||
["--continue", "-p", "Say OK", "--output-format", "json"],
|
||||
{ timeoutMs: 60000 },
|
||||
);
|
||||
|
||||
// Either succeeds (LRU exists) or fails with specific error
|
||||
if (result.exitCode !== 0) {
|
||||
expect(result.stderr).toContain("No recent session found");
|
||||
}
|
||||
// If it succeeds, that's also valid (test env has LRU)
|
||||
},
|
||||
{ timeout: 70000 },
|
||||
);
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user