feat: add background task notification system (#827)

Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
Charles Packer
2026-02-04 22:45:16 -08:00
committed by GitHub
parent 84e9a6d744
commit 48ccd8f220
44 changed files with 2244 additions and 234 deletions

View File

@@ -1,7 +1,9 @@
import { describe, expect, test } from "bun:test";
import * as fs from "node:fs";
import { bash } from "../../tools/impl/Bash";
import { bash_output } from "../../tools/impl/BashOutput";
import { kill_bash } from "../../tools/impl/KillBash";
import { backgroundProcesses } from "../../tools/impl/process_manager";
const isWindows = process.platform === "win32";
@@ -68,4 +70,41 @@ describe.skipIf(isWindows)("Bash background tools", () => {
expect(result.killed).toBe(false);
});
test("background process returns output file path", async () => {
const result = await bash({
command: "echo 'test'",
description: "Test output file",
run_in_background: true,
});
expect(result.content[0]?.text).toContain("Output file:");
expect(result.content[0]?.text).toMatch(/\.log$/);
});
test("background process writes to output file", async () => {
const startResult = await bash({
command: "echo 'file output test'",
description: "Test file writing",
run_in_background: true,
});
// Extract bash ID and get the output file path
const match = startResult.content[0]?.text.match(/bash_(\d+)/);
expect(match).toBeDefined();
const bashId = `bash_${match?.[1]}`;
// Wait for command to complete
await new Promise((resolve) => setTimeout(resolve, 300));
// Get the output file path from the background process
const bgProcess = backgroundProcesses.get(bashId);
expect(bgProcess?.outputFile).toBeDefined();
// Read the file and verify content
const outputFile = bgProcess?.outputFile;
expect(outputFile).toBeDefined();
const fileContent = fs.readFileSync(outputFile as string, "utf-8");
expect(fileContent).toContain("file output test");
});
});

View File

@@ -0,0 +1,394 @@
import { afterEach, describe, expect, test } from "bun:test";
import * as fs from "node:fs";
import {
appendToOutputFile,
type BackgroundTask,
backgroundTasks,
createBackgroundOutputFile,
getNextTaskId,
} from "../../tools/impl/process_manager";
import { task_output } from "../../tools/impl/TaskOutput";
import { task_stop } from "../../tools/impl/TaskStop";
/**
* Tests for Task background execution infrastructure.
*
* Since the full task() function requires subagent infrastructure,
* these tests verify the background task tracking, output file handling,
* and integration with TaskOutput/TaskStop tools.
*/
describe("Task background infrastructure", () => {
// Clean up after each test
afterEach(() => {
// Clear all background tasks
backgroundTasks.clear();
});
test("getNextTaskId generates sequential IDs", () => {
const id1 = getNextTaskId();
const id2 = getNextTaskId();
const id3 = getNextTaskId();
expect(id1).toMatch(/^task_\d+$/);
expect(id2).toMatch(/^task_\d+$/);
expect(id3).toMatch(/^task_\d+$/);
// Extract numbers and verify they're sequential
const num1 = parseInt(id1.replace("task_", ""), 10);
const num2 = parseInt(id2.replace("task_", ""), 10);
const num3 = parseInt(id3.replace("task_", ""), 10);
expect(num2).toBe(num1 + 1);
expect(num3).toBe(num2 + 1);
});
test("createBackgroundOutputFile creates file and returns path", () => {
const taskId = getNextTaskId();
const outputFile = createBackgroundOutputFile(taskId);
expect(outputFile).toContain(taskId);
expect(outputFile).toMatch(/\.log$/);
expect(fs.existsSync(outputFile)).toBe(true);
// Clean up
fs.unlinkSync(outputFile);
});
test("appendToOutputFile writes content to file", () => {
const taskId = getNextTaskId();
const outputFile = createBackgroundOutputFile(taskId);
appendToOutputFile(outputFile, "First line\n");
appendToOutputFile(outputFile, "Second line\n");
const content = fs.readFileSync(outputFile, "utf-8");
expect(content).toBe("First line\nSecond line\n");
// Clean up
fs.unlinkSync(outputFile);
});
test("backgroundTasks map stores and retrieves tasks", () => {
const taskId = "task_test_1";
const outputFile = createBackgroundOutputFile(taskId);
const bgTask: BackgroundTask = {
description: "Test task",
subagentType: "explore",
subagentId: "subagent_1",
status: "running",
output: [],
startTime: new Date(),
outputFile,
abortController: new AbortController(),
};
backgroundTasks.set(taskId, bgTask);
expect(backgroundTasks.has(taskId)).toBe(true);
expect(backgroundTasks.get(taskId)?.description).toBe("Test task");
expect(backgroundTasks.get(taskId)?.status).toBe("running");
// Clean up
fs.unlinkSync(outputFile);
});
});
describe("TaskOutput with background tasks", () => {
afterEach(() => {
backgroundTasks.clear();
});
test("TaskOutput retrieves output from background task", async () => {
const taskId = "task_output_test_1";
const outputFile = createBackgroundOutputFile(taskId);
const bgTask: BackgroundTask = {
description: "Test retrieval",
subagentType: "explore",
subagentId: "subagent_2",
status: "completed",
output: ["Task completed successfully", "Found 5 files"],
startTime: new Date(),
outputFile,
};
backgroundTasks.set(taskId, bgTask);
const result = await task_output({
task_id: taskId,
block: false,
timeout: 1000,
});
expect(result.message).toContain("Task completed successfully");
expect(result.message).toContain("Found 5 files");
expect(result.status).toBe("completed");
// Clean up
fs.unlinkSync(outputFile);
});
test("TaskOutput includes error in output", async () => {
const taskId = "task_error_test";
const outputFile = createBackgroundOutputFile(taskId);
const bgTask: BackgroundTask = {
description: "Test error",
subagentType: "general-purpose",
subagentId: "subagent_3",
status: "failed",
output: ["Started processing"],
error: "Connection timeout",
startTime: new Date(),
outputFile,
};
backgroundTasks.set(taskId, bgTask);
const result = await task_output({
task_id: taskId,
block: false,
timeout: 1000,
});
expect(result.message).toContain("Started processing");
expect(result.message).toContain("Connection timeout");
expect(result.status).toBe("failed");
// Clean up
fs.unlinkSync(outputFile);
});
test("TaskOutput with block=true waits for task completion", async () => {
const taskId = "task_block_test";
const outputFile = createBackgroundOutputFile(taskId);
const bgTask: BackgroundTask = {
description: "Test blocking",
subagentType: "explore",
subagentId: "subagent_4",
status: "running",
output: [],
startTime: new Date(),
outputFile,
};
backgroundTasks.set(taskId, bgTask);
// Simulate task completing after 200ms
setTimeout(() => {
bgTask.status = "completed";
bgTask.output.push("Task finished");
}, 200);
const startTime = Date.now();
const result = await task_output({
task_id: taskId,
block: true,
timeout: 5000,
});
const elapsed = Date.now() - startTime;
// Should have waited for the task to complete
expect(elapsed).toBeGreaterThanOrEqual(150);
expect(result.status).toBe("completed");
expect(result.message).toContain("Task finished");
// Clean up
fs.unlinkSync(outputFile);
});
test("TaskOutput respects timeout when blocking", async () => {
const taskId = "task_timeout_test";
const outputFile = createBackgroundOutputFile(taskId);
const bgTask: BackgroundTask = {
description: "Test timeout",
subagentType: "explore",
subagentId: "subagent_5",
status: "running",
output: ["Still running..."],
startTime: new Date(),
outputFile,
};
backgroundTasks.set(taskId, bgTask);
const startTime = Date.now();
const result = await task_output({
task_id: taskId,
block: true,
timeout: 300, // Short timeout
});
const elapsed = Date.now() - startTime;
// Should have timed out around 300ms
expect(elapsed).toBeGreaterThanOrEqual(250);
expect(elapsed).toBeLessThan(1000);
expect(result.status).toBe("running"); // Still running after timeout
// Clean up
fs.unlinkSync(outputFile);
});
test("TaskOutput handles non-existent task_id", async () => {
const result = await task_output({
task_id: "nonexistent_task",
block: false,
timeout: 1000,
});
expect(result.message).toContain("No background process found");
});
});
describe("TaskStop with background tasks", () => {
afterEach(() => {
backgroundTasks.clear();
});
test("TaskStop aborts running task", async () => {
const taskId = "task_stop_test";
const outputFile = createBackgroundOutputFile(taskId);
const abortController = new AbortController();
const bgTask: BackgroundTask = {
description: "Test abort",
subagentType: "general-purpose",
subagentId: "subagent_6",
status: "running",
output: [],
startTime: new Date(),
outputFile,
abortController,
};
backgroundTasks.set(taskId, bgTask);
// Verify task is running
expect(bgTask.status).toBe("running");
expect(abortController.signal.aborted).toBe(false);
// Stop the task
const result = await task_stop({ task_id: taskId });
expect(result.killed).toBe(true);
expect(bgTask.status).toBe("failed");
expect(bgTask.error).toBe("Aborted by user");
expect(abortController.signal.aborted).toBe(true);
// Clean up
fs.unlinkSync(outputFile);
});
test("TaskStop returns false for completed task", async () => {
const taskId = "task_stop_completed";
const outputFile = createBackgroundOutputFile(taskId);
const bgTask: BackgroundTask = {
description: "Completed task",
subagentType: "explore",
subagentId: "subagent_7",
status: "completed",
output: ["Done"],
startTime: new Date(),
outputFile,
};
backgroundTasks.set(taskId, bgTask);
// Try to stop completed task
const result = await task_stop({ task_id: taskId });
expect(result.killed).toBe(false);
expect(bgTask.status).toBe("completed"); // Status unchanged
// Clean up
fs.unlinkSync(outputFile);
});
test("TaskStop returns false for task without abortController", async () => {
const taskId = "task_stop_no_abort";
const outputFile = createBackgroundOutputFile(taskId);
const bgTask: BackgroundTask = {
description: "Task without abort",
subagentType: "explore",
subagentId: "subagent_8",
status: "running",
output: [],
startTime: new Date(),
outputFile,
// No abortController
};
backgroundTasks.set(taskId, bgTask);
const result = await task_stop({ task_id: taskId });
expect(result.killed).toBe(false);
// Clean up
fs.unlinkSync(outputFile);
});
test("TaskStop handles non-existent task_id", async () => {
const result = await task_stop({ task_id: "nonexistent_task" });
expect(result.killed).toBe(false);
});
});
describe("Output file integration", () => {
afterEach(() => {
backgroundTasks.clear();
});
test("Output file contains task progress", () => {
const taskId = "task_file_test";
const outputFile = createBackgroundOutputFile(taskId);
// Simulate the output that Task.ts writes
appendToOutputFile(outputFile, `[Task started: Find auth code]\n`);
appendToOutputFile(outputFile, `[subagent_type: explore]\n\n`);
appendToOutputFile(
outputFile,
`subagent_type=explore agent_id=agent-123\n\n`,
);
appendToOutputFile(outputFile, `Found authentication code in src/auth/\n`);
appendToOutputFile(outputFile, `\n[Task completed]\n`);
const content = fs.readFileSync(outputFile, "utf-8");
expect(content).toContain("[Task started: Find auth code]");
expect(content).toContain("[subagent_type: explore]");
expect(content).toContain("agent_id=agent-123");
expect(content).toContain("Found authentication code");
expect(content).toContain("[Task completed]");
// Clean up
fs.unlinkSync(outputFile);
});
test("Output file contains error information", () => {
const taskId = "task_file_error";
const outputFile = createBackgroundOutputFile(taskId);
// Simulate error output
appendToOutputFile(outputFile, `[Task started: Complex analysis]\n`);
appendToOutputFile(outputFile, `[subagent_type: general-purpose]\n\n`);
appendToOutputFile(outputFile, `[error] Model rate limit exceeded\n`);
appendToOutputFile(outputFile, `\n[Task failed]\n`);
const content = fs.readFileSync(outputFile, "utf-8");
expect(content).toContain("[Task started: Complex analysis]");
expect(content).toContain("[error] Model rate limit exceeded");
expect(content).toContain("[Task failed]");
// Clean up
fs.unlinkSync(outputFile);
});
});

View File

@@ -0,0 +1,167 @@
import { describe, expect, test } from "bun:test";
import { bash } from "../../tools/impl/Bash";
import { backgroundProcesses } from "../../tools/impl/process_manager";
import { task_output } from "../../tools/impl/TaskOutput";
import { task_stop } from "../../tools/impl/TaskStop";
const isWindows = process.platform === "win32";
describe.skipIf(isWindows)("TaskOutput and TaskStop", () => {
test("TaskOutput with block=false returns immediately without waiting", async () => {
// Start a slow background process
const startResult = await bash({
command: "sleep 2 && echo 'done'",
description: "Slow process",
run_in_background: true,
});
const match = startResult.content[0]?.text.match(/bash_(\d+)/);
expect(match).toBeDefined();
const taskId = `bash_${match?.[1]}`;
// Non-blocking call should return immediately
const startTime = Date.now();
const result = await task_output({
task_id: taskId,
block: false,
timeout: 30000,
});
const elapsed = Date.now() - startTime;
// Should return in less than 500ms (not waiting for 2s sleep)
expect(elapsed).toBeLessThan(500);
expect(result.status).toBe("running");
// Cleanup
await task_stop({ task_id: taskId });
});
test("TaskOutput with block=true waits for completion", async () => {
// Start a quick background process
const startResult = await bash({
command: "sleep 0.3 && echo 'completed'",
description: "Quick process",
run_in_background: true,
});
const match = startResult.content[0]?.text.match(/bash_(\d+)/);
expect(match).toBeDefined();
const taskId = `bash_${match?.[1]}`;
// Blocking call should wait for completion
const result = await task_output({
task_id: taskId,
block: true,
timeout: 5000,
});
// Should have waited and gotten the output
expect(result.message).toContain("completed");
expect(result.status).toBe("completed");
});
test("TaskOutput respects timeout when blocking", async () => {
// Start a long-running process
const startResult = await bash({
command: "sleep 10",
description: "Long process",
run_in_background: true,
});
const match = startResult.content[0]?.text.match(/bash_(\d+)/);
expect(match).toBeDefined();
const taskId = `bash_${match?.[1]}`;
// Block with short timeout
const startTime = Date.now();
const result = await task_output({
task_id: taskId,
block: true,
timeout: 300, // 300ms timeout
});
const elapsed = Date.now() - startTime;
// Should have timed out around 300ms, not waited for 10s
expect(elapsed).toBeLessThan(1000);
expect(elapsed).toBeGreaterThanOrEqual(250); // Allow some tolerance
expect(result.status).toBe("running"); // Still running after timeout
// Cleanup
await task_stop({ task_id: taskId });
});
test("TaskOutput handles non-existent task_id", async () => {
const result = await task_output({
task_id: "nonexistent_task",
block: false,
timeout: 1000,
});
expect(result.message).toContain("No background process found");
});
test("TaskStop terminates process using task_id", async () => {
// Start long-running process
const startResult = await bash({
command: "sleep 10",
description: "Process to kill",
run_in_background: true,
});
const match = startResult.content[0]?.text.match(/bash_(\d+)/);
const taskId = `bash_${match?.[1]}`;
// Kill using task_id
const killResult = await task_stop({ task_id: taskId });
expect(killResult.killed).toBe(true);
// Verify process is gone
expect(backgroundProcesses.has(taskId)).toBe(false);
});
test("TaskStop supports deprecated shell_id parameter", async () => {
// Start long-running process
const startResult = await bash({
command: "sleep 10",
description: "Process to kill",
run_in_background: true,
});
const match = startResult.content[0]?.text.match(/bash_(\d+)/);
const shellId = `bash_${match?.[1]}`;
// Kill using deprecated shell_id
const killResult = await task_stop({ shell_id: shellId });
expect(killResult.killed).toBe(true);
});
test("TaskStop handles non-existent task_id", async () => {
const result = await task_stop({ task_id: "nonexistent" });
expect(result.killed).toBe(false);
});
test("TaskOutput defaults to block=true", async () => {
// Start a quick background process
const startResult = await bash({
command: "sleep 0.2 && echo 'default-block-test'",
description: "Default block test",
run_in_background: true,
});
const match = startResult.content[0]?.text.match(/bash_(\d+)/);
const taskId = `bash_${match?.[1]}`;
// Call without specifying block - should default to true
const result = await task_output({
task_id: taskId,
timeout: 5000,
});
// Should have waited and gotten the output
expect(result.message).toContain("default-block-test");
expect(result.status).toBe("completed");
});
});