From 8e5bc3956fa782f568e7be7d7486e4d09985e2e5 Mon Sep 17 00:00:00 2001
From: Charles Packer <packercharles@gmail.com>
Date: Mon, 9 Feb 2026 14:49:38 -0800
Subject: [PATCH] fix: propagate max-step failures and unify task transcripts
 (#874)

Co-authored-by: Letta <noreply@letta.com>
---
 src/agent/export.ts                           |   8 +-
 src/agent/subagents/manager.ts                |   5 +-
 .../lazy-approval-recovery.test.ts            |  70 +++++++++--
 .../subagent-max-steps-propagation.test.ts    |  20 +++
 .../tools/task-foreground-transcript.test.ts  |  29 +++++
 src/tools/impl/Task.ts                        | 114 ++++++++++++------
 6 files changed, 198 insertions(+), 48 deletions(-)
 create mode 100644 src/tests/agent/subagent-max-steps-propagation.test.ts
 create mode 100644 src/tests/tools/task-foreground-transcript.test.ts

diff --git a/src/agent/export.ts b/src/agent/export.ts
index 30650f3..dadbf88 100644
--- a/src/agent/export.ts
+++ b/src/agent/export.ts
@@ -53,8 +53,12 @@ export async function packageSkills(
           continue;
         }
 
-        // Check if skill exists in known repos (prefer source_url over embedding)
-        const sourceUrl = await findSkillSourceUrl(entry.name);
+        // Check if skill exists in known repos (prefer source_url over embedding).
+        // When an explicit skillsDir is provided, skip network lookup and always
+        // package local files for deterministic behavior (especially in tests).
+        const sourceUrl = skillsDir
+          ? null
+          : await findSkillSourceUrl(entry.name);
 
         const skill: SkillSchema = { name: entry.name };
 
diff --git a/src/agent/subagents/manager.ts b/src/agent/subagents/manager.ts
index 9dd13b4..5879180 100644
--- a/src/agent/subagents/manager.ts
+++ b/src/agent/subagents/manager.ts
@@ -685,12 +685,15 @@ async function executeSubagent(
         }
       }
 
+      const propagatedError = state.finalError?.trim();
+      const fallbackError = stderr || `Subagent exited with code ${exitCode}`;
+
       return {
         agentId: state.agentId || "",
         conversationId: state.conversationId || undefined,
         report: "",
         success: false,
-        error: stderr || `Subagent exited with code ${exitCode}`,
+        error: propagatedError || fallbackError,
       };
     }
 
diff --git a/src/integration-tests/lazy-approval-recovery.test.ts b/src/integration-tests/lazy-approval-recovery.test.ts
index 8efe01e..9bf2145 100644
--- a/src/integration-tests/lazy-approval-recovery.test.ts
+++ b/src/integration-tests/lazy-approval-recovery.test.ts
@@ -33,6 +33,8 @@ interface StreamMessage {
   subtype?: string;
   message_type?: string;
   stop_reason?: string;
+  request_id?: string;
+  request?: { subtype?: string };
   // biome-ignore lint/suspicious/noExplicitAny: index signature for arbitrary JSON fields
   [key: string]: any;
 }
@@ -77,6 +79,8 @@ async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{
     let errorSeen = false;
     let resultCount = 0;
     let closing = false;
+    let waitingForApprovalControlRequest = false;
+    let pendingToolCallId: string | undefined;
 
     const timeout = setTimeout(() => {
       if (!closing) {
@@ -105,6 +109,39 @@ async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{
           console.log("MSG:", JSON.stringify(msg, null, 2));
         }
 
+        // Handle permission control requests from headless.
+        // We only deny the can_use_tool request after seeing an approval_request_message
+        // so the test still exercises the pending-approval flow.
+        if (
+          msg.type === "control_request" &&
+          msg.request?.subtype === "can_use_tool" &&
+          waitingForApprovalControlRequest
+        ) {
+          const requestId = msg.request_id;
+          if (requestId) {
+            if (pendingToolCallId && !requestId.endsWith(pendingToolCallId)) {
+              console.log(
+                `Note: control_request id ${requestId} did not match expected tool id ${pendingToolCallId}`,
+              );
+            }
+
+            const denyApproval = JSON.stringify({
+              type: "control_response",
+              response: {
+                request_id: requestId,
+                response: {
+                  behavior: "deny",
+                  message:
+                    "Denied by integration test to simulate stale approval",
+                },
+              },
+            });
+            proc.stdin?.write(`${denyApproval}\n`);
+            waitingForApprovalControlRequest = false;
+          }
+          return;
+        }
+
         // Step 1: Wait for init, then send bash trigger prompt
         if (msg.type === "system" && msg.subtype === "init" && !initReceived) {
           initReceived = true;
@@ -116,23 +153,36 @@ async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{
           return;
         }
 
-        // Step 2: When we see approval request, send another user message instead
+        // Step 2: When we see approval request, send another user message instead,
+        // then explicitly deny the pending approval so the flow can complete in
+        // headless stream-json mode (which waits for approval responses).
         if (
           msg.type === "message" &&
           msg.message_type === "approval_request_message" &&
           !approvalSeen
         ) {
           approvalSeen = true;
-          // Wait a moment, then send interrupt message (NOT an approval)
+
+          const toolCall = Array.isArray(msg.tool_call)
+            ? msg.tool_call[0]
+            : msg.tool_call;
+          pendingToolCallId =
+            toolCall && typeof toolCall === "object"
+              ? (toolCall as { tool_call_id?: string }).tool_call_id
+              : undefined;
+          waitingForApprovalControlRequest = true;
+
+          // Wait a moment, then send interrupt message (approval deny will be sent
+          // only after we see the corresponding control_request from headless).
           setTimeout(() => {
-            if (!interruptSent) {
-              interruptSent = true;
-              const userMsg = JSON.stringify({
-                type: "user",
-                message: { role: "user", content: INTERRUPT_MESSAGE },
-              });
-              proc.stdin?.write(`${userMsg}\n`);
-            }
+            if (interruptSent) return;
+            interruptSent = true;
+
+            const userMsg = JSON.stringify({
+              type: "user",
+              message: { role: "user", content: INTERRUPT_MESSAGE },
+            });
+            proc.stdin?.write(`${userMsg}\n`);
           }, 500);
           return;
         }
diff --git a/src/tests/agent/subagent-max-steps-propagation.test.ts b/src/tests/agent/subagent-max-steps-propagation.test.ts
new file mode 100644
index 0000000..48b3d31
--- /dev/null
+++ b/src/tests/agent/subagent-max-steps-propagation.test.ts
@@ -0,0 +1,20 @@
+import { describe, expect, test } from "bun:test";
+import { readFileSync } from "node:fs";
+import { fileURLToPath } from "node:url";
+
+describe("subagent max_steps error propagation", () => {
+  test("non-zero exit path prefers parsed finalError over generic exit code", () => {
+    const managerPath = fileURLToPath(
+      new URL("../../agent/subagents/manager.ts", import.meta.url),
+    );
+    const source = readFileSync(managerPath, "utf-8");
+
+    expect(source).toContain(
+      "const propagatedError = state.finalError?.trim();",
+    );
+    expect(source).toContain(
+      `const fallbackError = stderr || \`Subagent exited with code \${exitCode}\`;`,
+    );
+    expect(source).toContain("error: propagatedError || fallbackError");
+  });
+});
diff --git a/src/tests/tools/task-foreground-transcript.test.ts b/src/tests/tools/task-foreground-transcript.test.ts
new file mode 100644
index 0000000..b0d2fd2
--- /dev/null
+++ b/src/tests/tools/task-foreground-transcript.test.ts
@@ -0,0 +1,29 @@
+import { describe, expect, test } from "bun:test";
+import { readFileSync } from "node:fs";
+import { fileURLToPath } from "node:url";
+
+describe("Task foreground transcript wiring", () => {
+  test("writes foreground transcript start and result output", () => {
+    const taskPath = fileURLToPath(
+      new URL("../../tools/impl/Task.ts", import.meta.url),
+    );
+    const source = readFileSync(taskPath, "utf-8");
+
+    expect(source).toContain("const foregroundTaskId = getNextTaskId();");
+    expect(source).toContain(
+      "const outputFile = createBackgroundOutputFile(foregroundTaskId);",
+    );
+    expect(source).toContain(
+      "writeTaskTranscriptStart(outputFile, description, subagent_type);",
+    );
+    expect(source).toContain(
+      "writeTaskTranscriptResult(outputFile, result, header);",
+    );
+    expect(source).toContain(
+      `return \`\${truncatedOutput}\\nOutput file: \${outputFile}\`;`,
+    );
+    expect(source).toContain(
+      `return \`Error: \${errorMessage}\\nOutput file: \${outputFile}\`;`,
+    );
+  });
+});
diff --git a/src/tools/impl/Task.ts b/src/tools/impl/Task.ts
index e35c82a..2a71a24 100644
--- a/src/tools/impl/Task.ts
+++ b/src/tools/impl/Task.ts
@@ -47,6 +47,60 @@ interface TaskArgs {
 // Valid subagent_types when deploying an existing agent
 const VALID_DEPLOY_TYPES = new Set(["explore", "general-purpose"]);
 
+type TaskRunResult = {
+  agentId: string;
+  conversationId?: string;
+  report: string;
+  success: boolean;
+  error?: string;
+  totalTokens?: number;
+};
+
+function buildTaskResultHeader(
+  subagentType: string,
+  result: Pick<TaskRunResult, "agentId" | "conversationId">,
+): string {
+  return [
+    `subagent_type=${subagentType}`,
+    result.agentId ? `agent_id=${result.agentId}` : undefined,
+    result.conversationId
+      ? `conversation_id=${result.conversationId}`
+      : undefined,
+  ]
+    .filter(Boolean)
+    .join(" ");
+}
+
+function writeTaskTranscriptStart(
+  outputFile: string,
+  description: string,
+  subagentType: string,
+): void {
+  appendToOutputFile(
+    outputFile,
+    `[Task started: ${description}]\n[subagent_type: ${subagentType}]\n\n`,
+  );
+}
+
+function writeTaskTranscriptResult(
+  outputFile: string,
+  result: TaskRunResult,
+  header: string,
+): void {
+  if (result.success) {
+    appendToOutputFile(
+      outputFile,
+      `${header}\n\n${result.report}\n\n[Task completed]\n`,
+    );
+    return;
+  }
+
+  appendToOutputFile(
+    outputFile,
+    `[error] ${result.error || "Subagent execution failed"}\n\n[Task failed]\n`,
+  );
+}
+
 /**
  * Task tool - Launch a specialized subagent to handle complex tasks
  */
@@ -151,10 +205,7 @@ export async function task(args: TaskArgs): Promise<string> {
     backgroundTasks.set(taskId, bgTask);
 
     // Write initial status to output file
-    appendToOutputFile(
-      outputFile,
-      `[Task started: ${description}]\n[subagent_type: ${subagent_type}]\n\n`,
-    );
+    writeTaskTranscriptStart(outputFile, description, subagent_type);
 
     // Fire-and-forget: run subagent without awaiting
     spawnSubagent(
@@ -175,30 +226,13 @@ export async function task(args: TaskArgs): Promise<string> {
         }
 
         // Build output header
-        const header = [
-          `subagent_type=${subagent_type}`,
-          result.agentId ? `agent_id=${result.agentId}` : undefined,
-          result.conversationId
-            ? `conversation_id=${result.conversationId}`
-            : undefined,
-        ]
-          .filter(Boolean)
-          .join(" ");
+        const header = buildTaskResultHeader(subagent_type, result);
 
         // Write result to output file
+        writeTaskTranscriptResult(outputFile, result, header);
         if (result.success) {
-          appendToOutputFile(outputFile, `${header}\n\n${result.report}\n`);
           bgTask.output.push(result.report || "");
-        } else {
-          appendToOutputFile(
-            outputFile,
-            `[error] ${result.error || "Subagent execution failed"}\n`,
-          );
         }
-        appendToOutputFile(
-          outputFile,
-          `\n[Task ${result.success ? "completed" : "failed"}]\n`,
-        );
 
         // Mark subagent as completed in state store
         completeSubagent(subagentId, {
@@ -299,6 +333,12 @@ export async function task(args: TaskArgs): Promise<string> {
     return `Task running in background with ID: ${taskId}\nOutput file: ${outputFile}`;
   }
 
+  // Foreground tasks now also write transcripts so users can inspect full output
+  // even when inline content is truncated.
+  const foregroundTaskId = getNextTaskId();
+  const outputFile = createBackgroundOutputFile(foregroundTaskId);
+  writeTaskTranscriptStart(outputFile, description, subagent_type);
+
   try {
     const result = await spawnSubagent(
       subagent_type,
@@ -331,22 +371,22 @@ export async function task(args: TaskArgs): Promise<string> {
     });
 
     if (!result.success) {
-      return `Error: ${result.error || "Subagent execution failed"}`;
+      const errorMessage = result.error || "Subagent execution failed";
+      const failedResult: TaskRunResult = {
+        ...result,
+        error: errorMessage,
+      };
+      writeTaskTranscriptResult(outputFile, failedResult, "");
+      return `Error: ${errorMessage}\nOutput file: ${outputFile}`;
     }
 
     // Include stable subagent metadata so orchestrators can attribute results.
     // Keep the tool return type as a string for compatibility.
-    const header = [
-      `subagent_type=${subagent_type}`,
-      result.agentId ? `agent_id=${result.agentId}` : undefined,
-      result.conversationId
-        ? `conversation_id=${result.conversationId}`
-        : undefined,
-    ]
-      .filter(Boolean)
-      .join(" ");
+    const header = buildTaskResultHeader(subagent_type, result);
 
     const fullOutput = `${header}\n\n${result.report}`;
+    writeTaskTranscriptResult(outputFile, result, header);
+
     const userCwd = process.env.USER_CWD || process.cwd();
 
     // Apply truncation to prevent excessive token usage (same pattern as Bash tool)
@@ -357,7 +397,7 @@ export async function task(args: TaskArgs): Promise<string> {
       { workingDirectory: userCwd, toolName: "Task" },
     );
 
-    return truncatedOutput;
+    return `${truncatedOutput}\nOutput file: ${outputFile}`;
   } catch (error) {
     const errorMessage = error instanceof Error ? error.message : String(error);
     completeSubagent(subagentId, { success: false, error: errorMessage });
@@ -374,6 +414,10 @@ export async function task(args: TaskArgs): Promise<string> {
       // Silently ignore hook errors
     });
 
-    return `Error: ${errorMessage}`;
+    appendToOutputFile(
+      outputFile,
+      `[error] ${errorMessage}\n\n[Task failed]\n`,
+    );
+    return `Error: ${errorMessage}\nOutput file: ${outputFile}`;
   }
 }