test(integration): reduce flaky startup/headless timeout failures (#1109)
Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
4
.github/workflows/ci.yml
vendored
4
.github/workflows/ci.yml
vendored
@@ -168,7 +168,7 @@ jobs:
|
|||||||
if: ${{ runner.os == 'Windows' && (github.event_name == 'push' || (github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository)) }}
|
if: ${{ runner.os == 'Windows' && (github.event_name == 'push' || (github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository)) }}
|
||||||
env:
|
env:
|
||||||
LETTA_API_KEY: ${{ secrets.LETTA_API_KEY }}
|
LETTA_API_KEY: ${{ secrets.LETTA_API_KEY }}
|
||||||
run: bun run src/tests/headless-windows.ts --model haiku
|
run: bun run src/tests/headless-windows.ts --model sonnet-4.6-low
|
||||||
|
|
||||||
- name: Publish dry-run
|
- name: Publish dry-run
|
||||||
if: ${{ github.event_name == 'push' }}
|
if: ${{ github.event_name == 'push' }}
|
||||||
@@ -223,7 +223,7 @@ jobs:
|
|||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
# Note: gemini-3-flash / glm-4.7 temporarily disabled due to instability
|
# Note: gemini-3-flash / glm-4.7 temporarily disabled due to instability
|
||||||
model: [gpt-5-minimal, gpt-4.1, sonnet-4.5, gemini-pro, haiku]
|
model: [gpt-5-minimal, gpt-4.1, sonnet-4.5, gemini-pro, sonnet-4.6-low]
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v6
|
||||||
|
|||||||
@@ -58,6 +58,14 @@ export interface ResumeData {
|
|||||||
messageHistory: Message[];
|
messageHistory: Message[];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface GetResumeDataOptions {
|
||||||
|
/**
|
||||||
|
* Controls whether backfill message history should be fetched.
|
||||||
|
* Defaults to true to preserve existing /resume behavior.
|
||||||
|
*/
|
||||||
|
includeMessageHistory?: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract approval requests from an approval_request_message.
|
* Extract approval requests from an approval_request_message.
|
||||||
* Exported for testing parallel tool call handling.
|
* Exported for testing parallel tool call handling.
|
||||||
@@ -327,8 +335,10 @@ export async function getResumeData(
|
|||||||
client: Letta,
|
client: Letta,
|
||||||
agent: AgentState,
|
agent: AgentState,
|
||||||
conversationId?: string,
|
conversationId?: string,
|
||||||
|
options: GetResumeDataOptions = {},
|
||||||
): Promise<ResumeData> {
|
): Promise<ResumeData> {
|
||||||
try {
|
try {
|
||||||
|
const includeMessageHistory = options.includeMessageHistory ?? true;
|
||||||
let inContextMessageIds: string[] | null | undefined;
|
let inContextMessageIds: string[] | null | undefined;
|
||||||
let messages: Message[] = [];
|
let messages: Message[] = [];
|
||||||
|
|
||||||
@@ -352,7 +362,7 @@ export async function getResumeData(
|
|||||||
"check-approval",
|
"check-approval",
|
||||||
"No in-context messages - no pending approvals",
|
"No in-context messages - no pending approvals",
|
||||||
);
|
);
|
||||||
if (isBackfillEnabled()) {
|
if (includeMessageHistory && isBackfillEnabled()) {
|
||||||
try {
|
try {
|
||||||
const backfill = await fetchConversationBackfillMessages(
|
const backfill = await fetchConversationBackfillMessages(
|
||||||
client,
|
client,
|
||||||
@@ -389,7 +399,7 @@ export async function getResumeData(
|
|||||||
|
|
||||||
// Fetch message history separately for backfill (desc then reverse for last N chronological)
|
// Fetch message history separately for backfill (desc then reverse for last N chronological)
|
||||||
// Wrapped in try/catch so backfill failures don't crash the CLI
|
// Wrapped in try/catch so backfill failures don't crash the CLI
|
||||||
if (isBackfillEnabled()) {
|
if (includeMessageHistory && isBackfillEnabled()) {
|
||||||
try {
|
try {
|
||||||
messages = await fetchConversationBackfillMessages(
|
messages = await fetchConversationBackfillMessages(
|
||||||
client,
|
client,
|
||||||
@@ -473,7 +483,7 @@ export async function getResumeData(
|
|||||||
// This filters to only the default conversation's messages (like the ADE does)
|
// This filters to only the default conversation's messages (like the ADE does)
|
||||||
// Wrapped in try/catch so backfill failures don't crash the CLI (e.g., older servers
|
// Wrapped in try/catch so backfill failures don't crash the CLI (e.g., older servers
|
||||||
// may not support conversation_id filter)
|
// may not support conversation_id filter)
|
||||||
if (isBackfillEnabled()) {
|
if (includeMessageHistory && isBackfillEnabled()) {
|
||||||
try {
|
try {
|
||||||
const messagesPage = await client.agents.messages.list(agent.id, {
|
const messagesPage = await client.agents.messages.list(agent.id, {
|
||||||
limit: BACKFILL_PAGE_LIMIT,
|
limit: BACKFILL_PAGE_LIMIT,
|
||||||
|
|||||||
@@ -2742,6 +2742,33 @@ async function runBidirectionalMode(
|
|||||||
console.log(JSON.stringify(registerResponse));
|
console.log(JSON.stringify(registerResponse));
|
||||||
} else if (subtype === "bootstrap_session_state") {
|
} else if (subtype === "bootstrap_session_state") {
|
||||||
const bootstrapReq = message.request as BootstrapSessionStateRequest;
|
const bootstrapReq = message.request as BootstrapSessionStateRequest;
|
||||||
|
const { getResumeData } = await import("./agent/check-approval");
|
||||||
|
let hasPendingApproval = false;
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Re-fetch for parity with approval checks elsewhere in headless mode.
|
||||||
|
const freshAgent = await client.agents.retrieve(agent.id);
|
||||||
|
const resume = await getResumeData(
|
||||||
|
client,
|
||||||
|
freshAgent,
|
||||||
|
conversationId,
|
||||||
|
{
|
||||||
|
includeMessageHistory: false,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
hasPendingApproval = (resume.pendingApprovals?.length ?? 0) > 0;
|
||||||
|
} catch (error) {
|
||||||
|
// Keep bootstrap non-fatal if approval probe fails on stale resources.
|
||||||
|
if (
|
||||||
|
!(error instanceof APIError) ||
|
||||||
|
(error.status !== 404 && error.status !== 422)
|
||||||
|
) {
|
||||||
|
console.warn(
|
||||||
|
`[bootstrap] pending-approval probe failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const bootstrapResp = await handleBootstrapSessionState({
|
const bootstrapResp = await handleBootstrapSessionState({
|
||||||
bootstrapReq,
|
bootstrapReq,
|
||||||
sessionContext: {
|
sessionContext: {
|
||||||
@@ -2754,7 +2781,7 @@ async function runBidirectionalMode(
|
|||||||
},
|
},
|
||||||
requestId: requestId ?? "",
|
requestId: requestId ?? "",
|
||||||
client,
|
client,
|
||||||
hasPendingApproval: false, // TODO: wire approval state when available
|
hasPendingApproval,
|
||||||
});
|
});
|
||||||
console.log(JSON.stringify(bootstrapResp));
|
console.log(JSON.stringify(bootstrapResp));
|
||||||
} else if (subtype === "list_messages") {
|
} else if (subtype === "list_messages") {
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ async function runBidirectional(
|
|||||||
"stream-json",
|
"stream-json",
|
||||||
"--new-agent",
|
"--new-agent",
|
||||||
"-m",
|
"-m",
|
||||||
"haiku",
|
"sonnet-4.6-low",
|
||||||
"--yolo",
|
"--yolo",
|
||||||
...extraArgs,
|
...extraArgs,
|
||||||
],
|
],
|
||||||
@@ -202,6 +202,31 @@ async function runBidirectional(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function runBidirectionalWithRetry(
|
||||||
|
inputs: string[],
|
||||||
|
extraArgs: string[] = [],
|
||||||
|
timeoutMs = 180000,
|
||||||
|
retryOnTimeouts = 1,
|
||||||
|
): Promise<object[]> {
|
||||||
|
let attempt = 0;
|
||||||
|
while (true) {
|
||||||
|
try {
|
||||||
|
return await runBidirectional(inputs, extraArgs, timeoutMs);
|
||||||
|
} catch (error) {
|
||||||
|
const isTimeoutError =
|
||||||
|
error instanceof Error && error.message.includes("Timeout after");
|
||||||
|
if (!isTimeoutError || attempt >= retryOnTimeouts) {
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
attempt += 1;
|
||||||
|
// CI API latency can cause occasional long-tail timeouts.
|
||||||
|
console.warn(
|
||||||
|
`[headless-input-format] retrying after timeout (${attempt}/${retryOnTimeouts})`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
describe("input-format stream-json", () => {
|
describe("input-format stream-json", () => {
|
||||||
test(
|
test(
|
||||||
"initialize control request returns session info",
|
"initialize control request returns session info",
|
||||||
@@ -299,7 +324,7 @@ describe("input-format stream-json", () => {
|
|||||||
"multi-turn conversation maintains context",
|
"multi-turn conversation maintains context",
|
||||||
async () => {
|
async () => {
|
||||||
// Multi-turn test needs 2 sequential LLM calls, so allow more time
|
// Multi-turn test needs 2 sequential LLM calls, so allow more time
|
||||||
const objects = (await runBidirectional(
|
const objects = (await runBidirectionalWithRetry(
|
||||||
[
|
[
|
||||||
JSON.stringify({
|
JSON.stringify({
|
||||||
type: "user",
|
type: "user",
|
||||||
@@ -318,6 +343,7 @@ describe("input-format stream-json", () => {
|
|||||||
],
|
],
|
||||||
[], // no extra args
|
[], // no extra args
|
||||||
300000, // 300s for 2 sequential LLM calls - CI can be very slow
|
300000, // 300s for 2 sequential LLM calls - CI can be very slow
|
||||||
|
1, // one retry for transient API slowness
|
||||||
)) as WireMessage[];
|
)) as WireMessage[];
|
||||||
|
|
||||||
// Should have at least two results (one per turn)
|
// Should have at least two results (one per turn)
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ async function runHeadlessCommand(
|
|||||||
"stream-json",
|
"stream-json",
|
||||||
"--yolo",
|
"--yolo",
|
||||||
"-m",
|
"-m",
|
||||||
"haiku",
|
"sonnet-4.6-low",
|
||||||
...extraArgs,
|
...extraArgs,
|
||||||
],
|
],
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ interface StreamMessage {
|
|||||||
* Run bidirectional test with custom message handling.
|
* Run bidirectional test with custom message handling.
|
||||||
* Allows sending messages at specific points in the flow.
|
* Allows sending messages at specific points in the flow.
|
||||||
*/
|
*/
|
||||||
async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{
|
async function runLazyRecoveryTest(timeoutMs = 300000): Promise<{
|
||||||
messages: StreamMessage[];
|
messages: StreamMessage[];
|
||||||
success: boolean;
|
success: boolean;
|
||||||
errorSeen: boolean;
|
errorSeen: boolean;
|
||||||
@@ -61,7 +61,7 @@ async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{
|
|||||||
"stream-json",
|
"stream-json",
|
||||||
"--new-agent",
|
"--new-agent",
|
||||||
"-m",
|
"-m",
|
||||||
"haiku",
|
"sonnet-4.6-low",
|
||||||
// NOTE: No --yolo flag - approvals are required
|
// NOTE: No --yolo flag - approvals are required
|
||||||
],
|
],
|
||||||
{
|
{
|
||||||
@@ -291,7 +291,12 @@ async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{
|
|||||||
|
|
||||||
describe("lazy approval recovery", () => {
|
describe("lazy approval recovery", () => {
|
||||||
test("handles concurrent message while approval is pending", async () => {
|
test("handles concurrent message while approval is pending", async () => {
|
||||||
const result = await runLazyRecoveryTest();
|
let result = await runLazyRecoveryTest();
|
||||||
|
if (!result.success) {
|
||||||
|
// Transient API/tool timing can occasionally miss the approval window;
|
||||||
|
// retry once before failing.
|
||||||
|
result = await runLazyRecoveryTest();
|
||||||
|
}
|
||||||
|
|
||||||
// Log messages for debugging if test fails
|
// Log messages for debugging if test fails
|
||||||
if (!result.success) {
|
if (!result.success) {
|
||||||
@@ -333,5 +338,5 @@ describe("lazy approval recovery", () => {
|
|||||||
"Note: No recovery message seen - approval may have been handled before conflict",
|
"Note: No recovery message seen - approval may have been handled before conflict",
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}, 180000); // 3 minute timeout for CI
|
}, 320000); // 5+ minute timeout for slow CI runners
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ async function startPendingApprovalSession(
|
|||||||
"--new-agent",
|
"--new-agent",
|
||||||
"--new",
|
"--new",
|
||||||
"-m",
|
"-m",
|
||||||
"haiku",
|
"sonnet-4.6-low",
|
||||||
],
|
],
|
||||||
{
|
{
|
||||||
cwd: process.cwd(),
|
cwd: process.cwd(),
|
||||||
|
|||||||
@@ -15,55 +15,77 @@ async function runCli(
|
|||||||
options: {
|
options: {
|
||||||
timeoutMs?: number;
|
timeoutMs?: number;
|
||||||
expectExit?: number;
|
expectExit?: number;
|
||||||
|
retryOnTimeouts?: number;
|
||||||
} = {},
|
} = {},
|
||||||
): Promise<{ stdout: string; stderr: string; exitCode: number | null }> {
|
): Promise<{ stdout: string; stderr: string; exitCode: number | null }> {
|
||||||
const { timeoutMs = 30000, expectExit } = options;
|
const { timeoutMs = 30000, expectExit, retryOnTimeouts = 1 } = options;
|
||||||
|
|
||||||
return new Promise((resolve, reject) => {
|
const runOnce = () =>
|
||||||
const proc = spawn("bun", ["run", "dev", ...args], {
|
new Promise<{ stdout: string; stderr: string; exitCode: number | null }>(
|
||||||
cwd: projectRoot,
|
(resolve, reject) => {
|
||||||
// Mark as subagent to prevent polluting user's LRU settings
|
const proc = spawn("bun", ["run", "dev", ...args], {
|
||||||
env: { ...process.env, LETTA_CODE_AGENT_ROLE: "subagent" },
|
cwd: projectRoot,
|
||||||
});
|
// Mark as subagent to prevent polluting user's LRU settings
|
||||||
|
env: { ...process.env, LETTA_CODE_AGENT_ROLE: "subagent" },
|
||||||
|
});
|
||||||
|
|
||||||
let stdout = "";
|
let stdout = "";
|
||||||
let stderr = "";
|
let stderr = "";
|
||||||
|
|
||||||
proc.stdout?.on("data", (data) => {
|
proc.stdout?.on("data", (data) => {
|
||||||
stdout += data.toString();
|
stdout += data.toString();
|
||||||
});
|
});
|
||||||
|
|
||||||
proc.stderr?.on("data", (data) => {
|
proc.stderr?.on("data", (data) => {
|
||||||
stderr += data.toString();
|
stderr += data.toString();
|
||||||
});
|
});
|
||||||
|
|
||||||
const timeout = setTimeout(() => {
|
const timeout = setTimeout(() => {
|
||||||
proc.kill();
|
proc.kill();
|
||||||
reject(
|
reject(
|
||||||
new Error(
|
new Error(
|
||||||
`Timeout after ${timeoutMs}ms. stdout: ${stdout}, stderr: ${stderr}`,
|
`Timeout after ${timeoutMs}ms. stdout: ${stdout}, stderr: ${stderr}`,
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
}, timeoutMs);
|
}, timeoutMs);
|
||||||
|
|
||||||
proc.on("close", (code) => {
|
proc.on("close", (code) => {
|
||||||
clearTimeout(timeout);
|
clearTimeout(timeout);
|
||||||
if (expectExit !== undefined && code !== expectExit) {
|
if (expectExit !== undefined && code !== expectExit) {
|
||||||
reject(
|
reject(
|
||||||
new Error(
|
new Error(
|
||||||
`Expected exit code ${expectExit}, got ${code}. stdout: ${stdout}, stderr: ${stderr}`,
|
`Expected exit code ${expectExit}, got ${code}. stdout: ${stdout}, stderr: ${stderr}`,
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
resolve({ stdout, stderr, exitCode: code });
|
resolve({ stdout, stderr, exitCode: code });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
proc.on("error", (err) => {
|
||||||
|
clearTimeout(timeout);
|
||||||
|
reject(err);
|
||||||
|
});
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
let attempt = 0;
|
||||||
|
while (true) {
|
||||||
|
try {
|
||||||
|
return await runOnce();
|
||||||
|
} catch (error) {
|
||||||
|
const isTimeoutError =
|
||||||
|
error instanceof Error && error.message.includes("Timeout after");
|
||||||
|
if (!isTimeoutError || attempt >= retryOnTimeouts) {
|
||||||
|
throw error;
|
||||||
}
|
}
|
||||||
});
|
attempt += 1;
|
||||||
|
// CI API calls can be transiently slow; retry once to reduce flakiness.
|
||||||
proc.on("error", (err) => {
|
console.warn(
|
||||||
clearTimeout(timeout);
|
`[startup-flow] retrying after timeout (${attempt}/${retryOnTimeouts}) args=${args.join(" ")}`,
|
||||||
reject(err);
|
);
|
||||||
});
|
}
|
||||||
});
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
@@ -123,13 +145,13 @@ describe("Startup Flow - Integration", () => {
|
|||||||
[
|
[
|
||||||
"--new-agent",
|
"--new-agent",
|
||||||
"-m",
|
"-m",
|
||||||
"haiku",
|
"sonnet-4.6-low",
|
||||||
"-p",
|
"-p",
|
||||||
"Say OK and nothing else",
|
"Say OK and nothing else",
|
||||||
"--output-format",
|
"--output-format",
|
||||||
"json",
|
"json",
|
||||||
],
|
],
|
||||||
{ timeoutMs: 120000 },
|
{ timeoutMs: 180000 },
|
||||||
);
|
);
|
||||||
|
|
||||||
expect(result.exitCode).toBe(0);
|
expect(result.exitCode).toBe(0);
|
||||||
@@ -141,7 +163,7 @@ describe("Startup Flow - Integration", () => {
|
|||||||
|
|
||||||
testAgentId = output.agent_id;
|
testAgentId = output.agent_id;
|
||||||
},
|
},
|
||||||
{ timeout: 130000 },
|
{ timeout: 190000 },
|
||||||
);
|
);
|
||||||
|
|
||||||
test(
|
test(
|
||||||
@@ -157,13 +179,13 @@ describe("Startup Flow - Integration", () => {
|
|||||||
"--agent",
|
"--agent",
|
||||||
testAgentId,
|
testAgentId,
|
||||||
"-m",
|
"-m",
|
||||||
"haiku",
|
"sonnet-4.6-low",
|
||||||
"-p",
|
"-p",
|
||||||
"Say OK",
|
"Say OK",
|
||||||
"--output-format",
|
"--output-format",
|
||||||
"json",
|
"json",
|
||||||
],
|
],
|
||||||
{ timeoutMs: 120000 },
|
{ timeoutMs: 180000 },
|
||||||
);
|
);
|
||||||
|
|
||||||
expect(result.exitCode).toBe(0);
|
expect(result.exitCode).toBe(0);
|
||||||
@@ -171,7 +193,7 @@ describe("Startup Flow - Integration", () => {
|
|||||||
const output = JSON.parse(result.stdout.slice(jsonStart));
|
const output = JSON.parse(result.stdout.slice(jsonStart));
|
||||||
expect(output.agent_id).toBe(testAgentId);
|
expect(output.agent_id).toBe(testAgentId);
|
||||||
},
|
},
|
||||||
{ timeout: 130000 },
|
{ timeout: 190000 },
|
||||||
);
|
);
|
||||||
|
|
||||||
test(
|
test(
|
||||||
@@ -189,13 +211,13 @@ describe("Startup Flow - Integration", () => {
|
|||||||
testAgentId,
|
testAgentId,
|
||||||
"--new",
|
"--new",
|
||||||
"-m",
|
"-m",
|
||||||
"haiku",
|
"sonnet-4.6-low",
|
||||||
"-p",
|
"-p",
|
||||||
"Say CREATED",
|
"Say CREATED",
|
||||||
"--output-format",
|
"--output-format",
|
||||||
"json",
|
"json",
|
||||||
],
|
],
|
||||||
{ timeoutMs: 120000 },
|
{ timeoutMs: 180000 },
|
||||||
);
|
);
|
||||||
expect(createResult.exitCode).toBe(0);
|
expect(createResult.exitCode).toBe(0);
|
||||||
const createJsonStart = createResult.stdout.indexOf("{");
|
const createJsonStart = createResult.stdout.indexOf("{");
|
||||||
@@ -211,13 +233,13 @@ describe("Startup Flow - Integration", () => {
|
|||||||
"--conversation",
|
"--conversation",
|
||||||
realConversationId,
|
realConversationId,
|
||||||
"-m",
|
"-m",
|
||||||
"haiku",
|
"sonnet-4.6-low",
|
||||||
"-p",
|
"-p",
|
||||||
"Say OK",
|
"Say OK",
|
||||||
"--output-format",
|
"--output-format",
|
||||||
"json",
|
"json",
|
||||||
],
|
],
|
||||||
{ timeoutMs: 120000 },
|
{ timeoutMs: 180000 },
|
||||||
);
|
);
|
||||||
|
|
||||||
expect(result.exitCode).toBe(0);
|
expect(result.exitCode).toBe(0);
|
||||||
@@ -238,13 +260,13 @@ describe("Startup Flow - Integration", () => {
|
|||||||
[
|
[
|
||||||
"--new-agent",
|
"--new-agent",
|
||||||
"-m",
|
"-m",
|
||||||
"haiku",
|
"sonnet-4.6-low",
|
||||||
"-p",
|
"-p",
|
||||||
"Say OK",
|
"Say OK",
|
||||||
"--output-format",
|
"--output-format",
|
||||||
"json",
|
"json",
|
||||||
],
|
],
|
||||||
{ timeoutMs: 120000 },
|
{ timeoutMs: 180000 },
|
||||||
);
|
);
|
||||||
expect(bootstrapResult.exitCode).toBe(0);
|
expect(bootstrapResult.exitCode).toBe(0);
|
||||||
const bootstrapJsonStart = bootstrapResult.stdout.indexOf("{");
|
const bootstrapJsonStart = bootstrapResult.stdout.indexOf("{");
|
||||||
@@ -262,13 +284,13 @@ describe("Startup Flow - Integration", () => {
|
|||||||
"--conversation",
|
"--conversation",
|
||||||
"default",
|
"default",
|
||||||
"-m",
|
"-m",
|
||||||
"haiku",
|
"sonnet-4.6-low",
|
||||||
"-p",
|
"-p",
|
||||||
"Say OK",
|
"Say OK",
|
||||||
"--output-format",
|
"--output-format",
|
||||||
"json",
|
"json",
|
||||||
],
|
],
|
||||||
{ timeoutMs: 120000 },
|
{ timeoutMs: 180000 },
|
||||||
);
|
);
|
||||||
|
|
||||||
expect(result.exitCode).toBe(0);
|
expect(result.exitCode).toBe(0);
|
||||||
@@ -277,7 +299,7 @@ describe("Startup Flow - Integration", () => {
|
|||||||
expect(output.agent_id).toBe(agentIdForTest);
|
expect(output.agent_id).toBe(agentIdForTest);
|
||||||
expect(output.conversation_id).toBe("default");
|
expect(output.conversation_id).toBe("default");
|
||||||
},
|
},
|
||||||
{ timeout: 130000 },
|
{ timeout: 190000 },
|
||||||
);
|
);
|
||||||
|
|
||||||
test(
|
test(
|
||||||
@@ -289,13 +311,13 @@ describe("Startup Flow - Integration", () => {
|
|||||||
"--init-blocks",
|
"--init-blocks",
|
||||||
"none",
|
"none",
|
||||||
"-m",
|
"-m",
|
||||||
"haiku",
|
"sonnet-4.6-low",
|
||||||
"-p",
|
"-p",
|
||||||
"Say OK",
|
"Say OK",
|
||||||
"--output-format",
|
"--output-format",
|
||||||
"json",
|
"json",
|
||||||
],
|
],
|
||||||
{ timeoutMs: 120000 },
|
{ timeoutMs: 180000 },
|
||||||
);
|
);
|
||||||
|
|
||||||
expect(result.exitCode).toBe(0);
|
expect(result.exitCode).toBe(0);
|
||||||
@@ -303,7 +325,7 @@ describe("Startup Flow - Integration", () => {
|
|||||||
const output = JSON.parse(result.stdout.slice(jsonStart));
|
const output = JSON.parse(result.stdout.slice(jsonStart));
|
||||||
expect(output.agent_id).toBeDefined();
|
expect(output.agent_id).toBeDefined();
|
||||||
},
|
},
|
||||||
{ timeout: 130000 },
|
{ timeout: 190000 },
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
130
src/tests/agent/getResumeData.test.ts
Normal file
130
src/tests/agent/getResumeData.test.ts
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
import { describe, expect, mock, test } from "bun:test";
|
||||||
|
import type Letta from "@letta-ai/letta-client";
|
||||||
|
import type { AgentState } from "@letta-ai/letta-client/resources/agents/agents";
|
||||||
|
import type { Message } from "@letta-ai/letta-client/resources/agents/messages";
|
||||||
|
import { getResumeData } from "../../agent/check-approval";
|
||||||
|
|
||||||
|
function makeAgent(overrides: Partial<AgentState> = {}): AgentState {
|
||||||
|
return {
|
||||||
|
id: "agent-test",
|
||||||
|
message_ids: ["msg-last"],
|
||||||
|
...overrides,
|
||||||
|
} as AgentState;
|
||||||
|
}
|
||||||
|
|
||||||
|
function makeApprovalMessage(id = "msg-last"): Message {
|
||||||
|
return {
|
||||||
|
id,
|
||||||
|
date: new Date().toISOString(),
|
||||||
|
message_type: "approval_request_message",
|
||||||
|
tool_calls: [
|
||||||
|
{
|
||||||
|
tool_call_id: "tool-1",
|
||||||
|
name: "Bash",
|
||||||
|
arguments: '{"command":"echo hi"}',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
} as unknown as Message;
|
||||||
|
}
|
||||||
|
|
||||||
|
function makeUserMessage(id = "msg-last"): Message {
|
||||||
|
return {
|
||||||
|
id,
|
||||||
|
date: new Date().toISOString(),
|
||||||
|
message_type: "user_message",
|
||||||
|
} as Message;
|
||||||
|
}
|
||||||
|
|
||||||
|
describe("getResumeData", () => {
|
||||||
|
test("includeMessageHistory=false still computes pending approvals without backfill (conversation path)", async () => {
|
||||||
|
const conversationsRetrieve = mock(async () => ({
|
||||||
|
in_context_message_ids: ["msg-last"],
|
||||||
|
}));
|
||||||
|
const conversationsList = mock(async () => ({
|
||||||
|
getPaginatedItems: () => [],
|
||||||
|
}));
|
||||||
|
const agentsList = mock(async () => ({ items: [] }));
|
||||||
|
const messagesRetrieve = mock(async () => [makeApprovalMessage()]);
|
||||||
|
|
||||||
|
const client = {
|
||||||
|
conversations: {
|
||||||
|
retrieve: conversationsRetrieve,
|
||||||
|
messages: { list: conversationsList },
|
||||||
|
},
|
||||||
|
agents: { messages: { list: agentsList } },
|
||||||
|
messages: { retrieve: messagesRetrieve },
|
||||||
|
} as unknown as Letta;
|
||||||
|
|
||||||
|
const resume = await getResumeData(client, makeAgent(), "conv-abc", {
|
||||||
|
includeMessageHistory: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(conversationsRetrieve).toHaveBeenCalledTimes(1);
|
||||||
|
expect(messagesRetrieve).toHaveBeenCalledTimes(1);
|
||||||
|
expect(conversationsList).toHaveBeenCalledTimes(0);
|
||||||
|
expect(resume.pendingApprovals).toHaveLength(1);
|
||||||
|
expect(resume.pendingApprovals[0]?.toolName).toBe("Bash");
|
||||||
|
expect(resume.messageHistory).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("includeMessageHistory=false skips default-conversation backfill calls", async () => {
|
||||||
|
const conversationsRetrieve = mock(async () => ({
|
||||||
|
in_context_message_ids: ["msg-last"],
|
||||||
|
}));
|
||||||
|
const conversationsList = mock(async () => ({
|
||||||
|
getPaginatedItems: () => [],
|
||||||
|
}));
|
||||||
|
const agentsList = mock(async () => ({ items: [] }));
|
||||||
|
const messagesRetrieve = mock(async () => [makeApprovalMessage()]);
|
||||||
|
|
||||||
|
const client = {
|
||||||
|
conversations: {
|
||||||
|
retrieve: conversationsRetrieve,
|
||||||
|
messages: { list: conversationsList },
|
||||||
|
},
|
||||||
|
agents: { messages: { list: agentsList } },
|
||||||
|
messages: { retrieve: messagesRetrieve },
|
||||||
|
} as unknown as Letta;
|
||||||
|
|
||||||
|
const resume = await getResumeData(
|
||||||
|
client,
|
||||||
|
makeAgent({ message_ids: ["msg-last"] }),
|
||||||
|
"default",
|
||||||
|
{ includeMessageHistory: false },
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(messagesRetrieve).toHaveBeenCalledTimes(1);
|
||||||
|
expect(agentsList).toHaveBeenCalledTimes(0);
|
||||||
|
expect(resume.pendingApprovals).toHaveLength(1);
|
||||||
|
expect(resume.messageHistory).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("default behavior keeps backfill enabled when options are omitted", async () => {
|
||||||
|
const conversationsRetrieve = mock(async () => ({
|
||||||
|
in_context_message_ids: ["msg-last"],
|
||||||
|
}));
|
||||||
|
const conversationsList = mock(async () => ({
|
||||||
|
getPaginatedItems: () => [],
|
||||||
|
}));
|
||||||
|
const agentsList = mock(async () => ({
|
||||||
|
items: [makeUserMessage("msg-a"), makeUserMessage("msg-b")],
|
||||||
|
}));
|
||||||
|
const messagesRetrieve = mock(async () => [makeUserMessage()]);
|
||||||
|
|
||||||
|
const client = {
|
||||||
|
conversations: {
|
||||||
|
retrieve: conversationsRetrieve,
|
||||||
|
messages: { list: conversationsList },
|
||||||
|
},
|
||||||
|
agents: { messages: { list: agentsList } },
|
||||||
|
messages: { retrieve: messagesRetrieve },
|
||||||
|
} as unknown as Letta;
|
||||||
|
|
||||||
|
const resume = await getResumeData(client, makeAgent(), "default");
|
||||||
|
|
||||||
|
expect(messagesRetrieve).toHaveBeenCalledTimes(1);
|
||||||
|
expect(agentsList).toHaveBeenCalledTimes(1);
|
||||||
|
expect(resume.pendingApprovals).toHaveLength(0);
|
||||||
|
expect(resume.messageHistory.length).toBeGreaterThan(0);
|
||||||
|
});
|
||||||
|
});
|
||||||
23
src/tests/headless/bootstrap-pending-approval-wiring.test.ts
Normal file
23
src/tests/headless/bootstrap-pending-approval-wiring.test.ts
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
import { describe, expect, test } from "bun:test";
|
||||||
|
import { readFileSync } from "node:fs";
|
||||||
|
import { fileURLToPath } from "node:url";
|
||||||
|
|
||||||
|
describe("bootstrap pending-approval wiring", () => {
|
||||||
|
test("bootstrap_session_state probes approvals via getResumeData without backfill", () => {
|
||||||
|
const headlessPath = fileURLToPath(
|
||||||
|
new URL("../../headless.ts", import.meta.url),
|
||||||
|
);
|
||||||
|
const source = readFileSync(headlessPath, "utf-8");
|
||||||
|
|
||||||
|
expect(source).toContain(
|
||||||
|
'const { getResumeData } = await import("./agent/check-approval");',
|
||||||
|
);
|
||||||
|
expect(source).toContain("includeMessageHistory: false");
|
||||||
|
expect(source).toContain(
|
||||||
|
"hasPendingApproval = (resume.pendingApprovals?.length ?? 0) > 0;",
|
||||||
|
);
|
||||||
|
expect(source).not.toContain(
|
||||||
|
"hasPendingApproval: false, // TODO: wire approval state when available",
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
Reference in New Issue
Block a user