From 8e5bc3956fa782f568e7be7d7486e4d09985e2e5 Mon Sep 17 00:00:00 2001 From: Charles Packer Date: Mon, 9 Feb 2026 14:49:38 -0800 Subject: [PATCH] fix: propagate max-step failures and unify task transcripts (#874) Co-authored-by: Letta --- src/agent/export.ts | 8 +- src/agent/subagents/manager.ts | 5 +- .../lazy-approval-recovery.test.ts | 70 +++++++++-- .../subagent-max-steps-propagation.test.ts | 20 +++ .../tools/task-foreground-transcript.test.ts | 29 +++++ src/tools/impl/Task.ts | 114 ++++++++++++------ 6 files changed, 198 insertions(+), 48 deletions(-) create mode 100644 src/tests/agent/subagent-max-steps-propagation.test.ts create mode 100644 src/tests/tools/task-foreground-transcript.test.ts diff --git a/src/agent/export.ts b/src/agent/export.ts index 30650f3..dadbf88 100644 --- a/src/agent/export.ts +++ b/src/agent/export.ts @@ -53,8 +53,12 @@ export async function packageSkills( continue; } - // Check if skill exists in known repos (prefer source_url over embedding) - const sourceUrl = await findSkillSourceUrl(entry.name); + // Check if skill exists in known repos (prefer source_url over embedding). + // When an explicit skillsDir is provided, skip network lookup and always + // package local files for deterministic behavior (especially in tests). + const sourceUrl = skillsDir + ? null + : await findSkillSourceUrl(entry.name); const skill: SkillSchema = { name: entry.name }; diff --git a/src/agent/subagents/manager.ts b/src/agent/subagents/manager.ts index 9dd13b4..5879180 100644 --- a/src/agent/subagents/manager.ts +++ b/src/agent/subagents/manager.ts @@ -685,12 +685,15 @@ async function executeSubagent( } } + const propagatedError = state.finalError?.trim(); + const fallbackError = stderr || `Subagent exited with code ${exitCode}`; + return { agentId: state.agentId || "", conversationId: state.conversationId || undefined, report: "", success: false, - error: stderr || `Subagent exited with code ${exitCode}`, + error: propagatedError || fallbackError, }; } diff --git a/src/integration-tests/lazy-approval-recovery.test.ts b/src/integration-tests/lazy-approval-recovery.test.ts index 8efe01e..9bf2145 100644 --- a/src/integration-tests/lazy-approval-recovery.test.ts +++ b/src/integration-tests/lazy-approval-recovery.test.ts @@ -33,6 +33,8 @@ interface StreamMessage { subtype?: string; message_type?: string; stop_reason?: string; + request_id?: string; + request?: { subtype?: string }; // biome-ignore lint/suspicious/noExplicitAny: index signature for arbitrary JSON fields [key: string]: any; } @@ -77,6 +79,8 @@ async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{ let errorSeen = false; let resultCount = 0; let closing = false; + let waitingForApprovalControlRequest = false; + let pendingToolCallId: string | undefined; const timeout = setTimeout(() => { if (!closing) { @@ -105,6 +109,39 @@ async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{ console.log("MSG:", JSON.stringify(msg, null, 2)); } + // Handle permission control requests from headless. + // We only deny the can_use_tool request after seeing an approval_request_message + // so the test still exercises the pending-approval flow. + if ( + msg.type === "control_request" && + msg.request?.subtype === "can_use_tool" && + waitingForApprovalControlRequest + ) { + const requestId = msg.request_id; + if (requestId) { + if (pendingToolCallId && !requestId.endsWith(pendingToolCallId)) { + console.log( + `Note: control_request id ${requestId} did not match expected tool id ${pendingToolCallId}`, + ); + } + + const denyApproval = JSON.stringify({ + type: "control_response", + response: { + request_id: requestId, + response: { + behavior: "deny", + message: + "Denied by integration test to simulate stale approval", + }, + }, + }); + proc.stdin?.write(`${denyApproval}\n`); + waitingForApprovalControlRequest = false; + } + return; + } + // Step 1: Wait for init, then send bash trigger prompt if (msg.type === "system" && msg.subtype === "init" && !initReceived) { initReceived = true; @@ -116,23 +153,36 @@ async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{ return; } - // Step 2: When we see approval request, send another user message instead + // Step 2: When we see approval request, send another user message instead, + // then explicitly deny the pending approval so the flow can complete in + // headless stream-json mode (which waits for approval responses). if ( msg.type === "message" && msg.message_type === "approval_request_message" && !approvalSeen ) { approvalSeen = true; - // Wait a moment, then send interrupt message (NOT an approval) + + const toolCall = Array.isArray(msg.tool_call) + ? msg.tool_call[0] + : msg.tool_call; + pendingToolCallId = + toolCall && typeof toolCall === "object" + ? (toolCall as { tool_call_id?: string }).tool_call_id + : undefined; + waitingForApprovalControlRequest = true; + + // Wait a moment, then send interrupt message (approval deny will be sent + // only after we see the corresponding control_request from headless). setTimeout(() => { - if (!interruptSent) { - interruptSent = true; - const userMsg = JSON.stringify({ - type: "user", - message: { role: "user", content: INTERRUPT_MESSAGE }, - }); - proc.stdin?.write(`${userMsg}\n`); - } + if (interruptSent) return; + interruptSent = true; + + const userMsg = JSON.stringify({ + type: "user", + message: { role: "user", content: INTERRUPT_MESSAGE }, + }); + proc.stdin?.write(`${userMsg}\n`); }, 500); return; } diff --git a/src/tests/agent/subagent-max-steps-propagation.test.ts b/src/tests/agent/subagent-max-steps-propagation.test.ts new file mode 100644 index 0000000..48b3d31 --- /dev/null +++ b/src/tests/agent/subagent-max-steps-propagation.test.ts @@ -0,0 +1,20 @@ +import { describe, expect, test } from "bun:test"; +import { readFileSync } from "node:fs"; +import { fileURLToPath } from "node:url"; + +describe("subagent max_steps error propagation", () => { + test("non-zero exit path prefers parsed finalError over generic exit code", () => { + const managerPath = fileURLToPath( + new URL("../../agent/subagents/manager.ts", import.meta.url), + ); + const source = readFileSync(managerPath, "utf-8"); + + expect(source).toContain( + "const propagatedError = state.finalError?.trim();", + ); + expect(source).toContain( + `const fallbackError = stderr || \`Subagent exited with code \${exitCode}\`;`, + ); + expect(source).toContain("error: propagatedError || fallbackError"); + }); +}); diff --git a/src/tests/tools/task-foreground-transcript.test.ts b/src/tests/tools/task-foreground-transcript.test.ts new file mode 100644 index 0000000..b0d2fd2 --- /dev/null +++ b/src/tests/tools/task-foreground-transcript.test.ts @@ -0,0 +1,29 @@ +import { describe, expect, test } from "bun:test"; +import { readFileSync } from "node:fs"; +import { fileURLToPath } from "node:url"; + +describe("Task foreground transcript wiring", () => { + test("writes foreground transcript start and result output", () => { + const taskPath = fileURLToPath( + new URL("../../tools/impl/Task.ts", import.meta.url), + ); + const source = readFileSync(taskPath, "utf-8"); + + expect(source).toContain("const foregroundTaskId = getNextTaskId();"); + expect(source).toContain( + "const outputFile = createBackgroundOutputFile(foregroundTaskId);", + ); + expect(source).toContain( + "writeTaskTranscriptStart(outputFile, description, subagent_type);", + ); + expect(source).toContain( + "writeTaskTranscriptResult(outputFile, result, header);", + ); + expect(source).toContain( + `return \`\${truncatedOutput}\\nOutput file: \${outputFile}\`;`, + ); + expect(source).toContain( + `return \`Error: \${errorMessage}\\nOutput file: \${outputFile}\`;`, + ); + }); +}); diff --git a/src/tools/impl/Task.ts b/src/tools/impl/Task.ts index e35c82a..2a71a24 100644 --- a/src/tools/impl/Task.ts +++ b/src/tools/impl/Task.ts @@ -47,6 +47,60 @@ interface TaskArgs { // Valid subagent_types when deploying an existing agent const VALID_DEPLOY_TYPES = new Set(["explore", "general-purpose"]); +type TaskRunResult = { + agentId: string; + conversationId?: string; + report: string; + success: boolean; + error?: string; + totalTokens?: number; +}; + +function buildTaskResultHeader( + subagentType: string, + result: Pick, +): string { + return [ + `subagent_type=${subagentType}`, + result.agentId ? `agent_id=${result.agentId}` : undefined, + result.conversationId + ? `conversation_id=${result.conversationId}` + : undefined, + ] + .filter(Boolean) + .join(" "); +} + +function writeTaskTranscriptStart( + outputFile: string, + description: string, + subagentType: string, +): void { + appendToOutputFile( + outputFile, + `[Task started: ${description}]\n[subagent_type: ${subagentType}]\n\n`, + ); +} + +function writeTaskTranscriptResult( + outputFile: string, + result: TaskRunResult, + header: string, +): void { + if (result.success) { + appendToOutputFile( + outputFile, + `${header}\n\n${result.report}\n\n[Task completed]\n`, + ); + return; + } + + appendToOutputFile( + outputFile, + `[error] ${result.error || "Subagent execution failed"}\n\n[Task failed]\n`, + ); +} + /** * Task tool - Launch a specialized subagent to handle complex tasks */ @@ -151,10 +205,7 @@ export async function task(args: TaskArgs): Promise { backgroundTasks.set(taskId, bgTask); // Write initial status to output file - appendToOutputFile( - outputFile, - `[Task started: ${description}]\n[subagent_type: ${subagent_type}]\n\n`, - ); + writeTaskTranscriptStart(outputFile, description, subagent_type); // Fire-and-forget: run subagent without awaiting spawnSubagent( @@ -175,30 +226,13 @@ export async function task(args: TaskArgs): Promise { } // Build output header - const header = [ - `subagent_type=${subagent_type}`, - result.agentId ? `agent_id=${result.agentId}` : undefined, - result.conversationId - ? `conversation_id=${result.conversationId}` - : undefined, - ] - .filter(Boolean) - .join(" "); + const header = buildTaskResultHeader(subagent_type, result); // Write result to output file + writeTaskTranscriptResult(outputFile, result, header); if (result.success) { - appendToOutputFile(outputFile, `${header}\n\n${result.report}\n`); bgTask.output.push(result.report || ""); - } else { - appendToOutputFile( - outputFile, - `[error] ${result.error || "Subagent execution failed"}\n`, - ); } - appendToOutputFile( - outputFile, - `\n[Task ${result.success ? "completed" : "failed"}]\n`, - ); // Mark subagent as completed in state store completeSubagent(subagentId, { @@ -299,6 +333,12 @@ export async function task(args: TaskArgs): Promise { return `Task running in background with ID: ${taskId}\nOutput file: ${outputFile}`; } + // Foreground tasks now also write transcripts so users can inspect full output + // even when inline content is truncated. + const foregroundTaskId = getNextTaskId(); + const outputFile = createBackgroundOutputFile(foregroundTaskId); + writeTaskTranscriptStart(outputFile, description, subagent_type); + try { const result = await spawnSubagent( subagent_type, @@ -331,22 +371,22 @@ export async function task(args: TaskArgs): Promise { }); if (!result.success) { - return `Error: ${result.error || "Subagent execution failed"}`; + const errorMessage = result.error || "Subagent execution failed"; + const failedResult: TaskRunResult = { + ...result, + error: errorMessage, + }; + writeTaskTranscriptResult(outputFile, failedResult, ""); + return `Error: ${errorMessage}\nOutput file: ${outputFile}`; } // Include stable subagent metadata so orchestrators can attribute results. // Keep the tool return type as a string for compatibility. - const header = [ - `subagent_type=${subagent_type}`, - result.agentId ? `agent_id=${result.agentId}` : undefined, - result.conversationId - ? `conversation_id=${result.conversationId}` - : undefined, - ] - .filter(Boolean) - .join(" "); + const header = buildTaskResultHeader(subagent_type, result); const fullOutput = `${header}\n\n${result.report}`; + writeTaskTranscriptResult(outputFile, result, header); + const userCwd = process.env.USER_CWD || process.cwd(); // Apply truncation to prevent excessive token usage (same pattern as Bash tool) @@ -357,7 +397,7 @@ export async function task(args: TaskArgs): Promise { { workingDirectory: userCwd, toolName: "Task" }, ); - return truncatedOutput; + return `${truncatedOutput}\nOutput file: ${outputFile}`; } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); completeSubagent(subagentId, { success: false, error: errorMessage }); @@ -374,6 +414,10 @@ export async function task(args: TaskArgs): Promise { // Silently ignore hook errors }); - return `Error: ${errorMessage}`; + appendToOutputFile( + outputFile, + `[error] ${errorMessage}\n\n[Task failed]\n`, + ); + return `Error: ${errorMessage}\nOutput file: ${outputFile}`; } }