fix: propagate max-step failures and unify task transcripts (#874)

Co-authored-by: Letta <noreply@letta.com>
2026-02-09 14:49:38 -08:00
parent 101fc6f874
commit 8e5bc3956f
6 changed files with 198 additions and 48 deletions
--- a/src/agent/export.ts
+++ b/src/agent/export.ts
@@ -53,8 +53,12 @@ export async function packageSkills(
          continue;
        }

-        // Check if skill exists in known repos (prefer source_url over embedding)
-        const sourceUrl = await findSkillSourceUrl(entry.name);
+        // Check if skill exists in known repos (prefer source_url over embedding).
+        // When an explicit skillsDir is provided, skip network lookup and always
+        // package local files for deterministic behavior (especially in tests).
+        const sourceUrl = skillsDir
+          ? null
+          : await findSkillSourceUrl(entry.name);

        const skill: SkillSchema = { name: entry.name };

--- a/src/agent/subagents/manager.ts
+++ b/src/agent/subagents/manager.ts
@@ -685,12 +685,15 @@ async function executeSubagent(
        }
      }

+      const propagatedError = state.finalError?.trim();
+      const fallbackError = stderr || `Subagent exited with code ${exitCode}`;
+
      return {
        agentId: state.agentId || "",
        conversationId: state.conversationId || undefined,
        report: "",
        success: false,
-        error: stderr || `Subagent exited with code ${exitCode}`,
+        error: propagatedError || fallbackError,
      };
    }

--- a/src/integration-tests/lazy-approval-recovery.test.ts
+++ b/src/integration-tests/lazy-approval-recovery.test.ts
@@ -33,6 +33,8 @@ interface StreamMessage {
  subtype?: string;
  message_type?: string;
  stop_reason?: string;
+  request_id?: string;
+  request?: { subtype?: string };
  // biome-ignore lint/suspicious/noExplicitAny: index signature for arbitrary JSON fields
  [key: string]: any;
 }
@@ -77,6 +79,8 @@ async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{
    let errorSeen = false;
    let resultCount = 0;
    let closing = false;
+    let waitingForApprovalControlRequest = false;
+    let pendingToolCallId: string | undefined;

    const timeout = setTimeout(() => {
      if (!closing) {
@@ -105,6 +109,39 @@ async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{
          console.log("MSG:", JSON.stringify(msg, null, 2));
        }

+        // Handle permission control requests from headless.
+        // We only deny the can_use_tool request after seeing an approval_request_message
+        // so the test still exercises the pending-approval flow.
+        if (
+          msg.type === "control_request" &&
+          msg.request?.subtype === "can_use_tool" &&
+          waitingForApprovalControlRequest
+        ) {
+          const requestId = msg.request_id;
+          if (requestId) {
+            if (pendingToolCallId && !requestId.endsWith(pendingToolCallId)) {
+              console.log(
+                `Note: control_request id ${requestId} did not match expected tool id ${pendingToolCallId}`,
+              );
+            }
+
+            const denyApproval = JSON.stringify({
+              type: "control_response",
+              response: {
+                request_id: requestId,
+                response: {
+                  behavior: "deny",
+                  message:
+                    "Denied by integration test to simulate stale approval",
+                },
+              },
+            });
+            proc.stdin?.write(`${denyApproval}\n`);
+            waitingForApprovalControlRequest = false;
+          }
+          return;
+        }
+
        // Step 1: Wait for init, then send bash trigger prompt
        if (msg.type === "system" && msg.subtype === "init" && !initReceived) {
          initReceived = true;
@@ -116,23 +153,36 @@ async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{
          return;
        }

-        // Step 2: When we see approval request, send another user message instead
+        // Step 2: When we see approval request, send another user message instead,
+        // then explicitly deny the pending approval so the flow can complete in
+        // headless stream-json mode (which waits for approval responses).
        if (
          msg.type === "message" &&
          msg.message_type === "approval_request_message" &&
          !approvalSeen
        ) {
          approvalSeen = true;
-          // Wait a moment, then send interrupt message (NOT an approval)
+
+          const toolCall = Array.isArray(msg.tool_call)
+            ? msg.tool_call[0]
+            : msg.tool_call;
+          pendingToolCallId =
+            toolCall && typeof toolCall === "object"
+              ? (toolCall as { tool_call_id?: string }).tool_call_id
+              : undefined;
+          waitingForApprovalControlRequest = true;
+
+          // Wait a moment, then send interrupt message (approval deny will be sent
+          // only after we see the corresponding control_request from headless).
          setTimeout(() => {
-            if (!interruptSent) {
-              interruptSent = true;
-              const userMsg = JSON.stringify({
-                type: "user",
-                message: { role: "user", content: INTERRUPT_MESSAGE },
-              });
-              proc.stdin?.write(`${userMsg}\n`);
-            }
+            if (interruptSent) return;
+            interruptSent = true;
+
+            const userMsg = JSON.stringify({
+              type: "user",
+              message: { role: "user", content: INTERRUPT_MESSAGE },
+            });
+            proc.stdin?.write(`${userMsg}\n`);
          }, 500);
          return;
        }
--- a/src/tests/agent/subagent-max-steps-propagation.test.ts
+++ b/src/tests/agent/subagent-max-steps-propagation.test.ts
@@ -0,0 +1,20 @@
+import { describe, expect, test } from "bun:test";
+import { readFileSync } from "node:fs";
+import { fileURLToPath } from "node:url";
+
+describe("subagent max_steps error propagation", () => {
+  test("non-zero exit path prefers parsed finalError over generic exit code", () => {
+    const managerPath = fileURLToPath(
+      new URL("../../agent/subagents/manager.ts", import.meta.url),
+    );
+    const source = readFileSync(managerPath, "utf-8");
+
+    expect(source).toContain(
+      "const propagatedError = state.finalError?.trim();",
+    );
+    expect(source).toContain(
+      `const fallbackError = stderr || \`Subagent exited with code \${exitCode}\`;`,
+    );
+    expect(source).toContain("error: propagatedError || fallbackError");
+  });
+});
--- a/src/tests/tools/task-foreground-transcript.test.ts
+++ b/src/tests/tools/task-foreground-transcript.test.ts
@@ -0,0 +1,29 @@
+import { describe, expect, test } from "bun:test";
+import { readFileSync } from "node:fs";
+import { fileURLToPath } from "node:url";
+
+describe("Task foreground transcript wiring", () => {
+  test("writes foreground transcript start and result output", () => {
+    const taskPath = fileURLToPath(
+      new URL("../../tools/impl/Task.ts", import.meta.url),
+    );
+    const source = readFileSync(taskPath, "utf-8");
+
+    expect(source).toContain("const foregroundTaskId = getNextTaskId();");
+    expect(source).toContain(
+      "const outputFile = createBackgroundOutputFile(foregroundTaskId);",
+    );
+    expect(source).toContain(
+      "writeTaskTranscriptStart(outputFile, description, subagent_type);",
+    );
+    expect(source).toContain(
+      "writeTaskTranscriptResult(outputFile, result, header);",
+    );
+    expect(source).toContain(
+      `return \`\${truncatedOutput}\\nOutput file: \${outputFile}\`;`,
+    );
+    expect(source).toContain(
+      `return \`Error: \${errorMessage}\\nOutput file: \${outputFile}\`;`,
+    );
+  });
+});
--- a/src/tools/impl/Task.ts
+++ b/src/tools/impl/Task.ts
@@ -47,6 +47,60 @@ interface TaskArgs {
 // Valid subagent_types when deploying an existing agent
 const VALID_DEPLOY_TYPES = new Set(["explore", "general-purpose"]);

+type TaskRunResult = {
+  agentId: string;
+  conversationId?: string;
+  report: string;
+  success: boolean;
+  error?: string;
+  totalTokens?: number;
+};
+
+function buildTaskResultHeader(
+  subagentType: string,
+  result: Pick<TaskRunResult, "agentId" | "conversationId">,
+): string {
+  return [
+    `subagent_type=${subagentType}`,
+    result.agentId ? `agent_id=${result.agentId}` : undefined,
+    result.conversationId
+      ? `conversation_id=${result.conversationId}`
+      : undefined,
+  ]
+    .filter(Boolean)
+    .join(" ");
+}
+
+function writeTaskTranscriptStart(
+  outputFile: string,
+  description: string,
+  subagentType: string,
+): void {
+  appendToOutputFile(
+    outputFile,
+    `[Task started: ${description}]\n[subagent_type: ${subagentType}]\n\n`,
+  );
+}
+
+function writeTaskTranscriptResult(
+  outputFile: string,
+  result: TaskRunResult,
+  header: string,
+): void {
+  if (result.success) {
+    appendToOutputFile(
+      outputFile,
+      `${header}\n\n${result.report}\n\n[Task completed]\n`,
+    );
+    return;
+  }
+
+  appendToOutputFile(
+    outputFile,
+    `[error] ${result.error || "Subagent execution failed"}\n\n[Task failed]\n`,
+  );
+}
+
 /**
 * Task tool - Launch a specialized subagent to handle complex tasks
 */
@@ -151,10 +205,7 @@ export async function task(args: TaskArgs): Promise<string> {
    backgroundTasks.set(taskId, bgTask);

    // Write initial status to output file
-    appendToOutputFile(
-      outputFile,
-      `[Task started: ${description}]\n[subagent_type: ${subagent_type}]\n\n`,
-    );
+    writeTaskTranscriptStart(outputFile, description, subagent_type);

    // Fire-and-forget: run subagent without awaiting
    spawnSubagent(
@@ -175,30 +226,13 @@ export async function task(args: TaskArgs): Promise<string> {
        }

        // Build output header
-        const header = [
-          `subagent_type=${subagent_type}`,
-          result.agentId ? `agent_id=${result.agentId}` : undefined,
-          result.conversationId
-            ? `conversation_id=${result.conversationId}`
-            : undefined,
-        ]
-          .filter(Boolean)
-          .join(" ");
+        const header = buildTaskResultHeader(subagent_type, result);

        // Write result to output file
+        writeTaskTranscriptResult(outputFile, result, header);
        if (result.success) {
-          appendToOutputFile(outputFile, `${header}\n\n${result.report}\n`);
          bgTask.output.push(result.report || "");
-        } else {
-          appendToOutputFile(
-            outputFile,
-            `[error] ${result.error || "Subagent execution failed"}\n`,
-          );
        }
-        appendToOutputFile(
-          outputFile,
-          `\n[Task ${result.success ? "completed" : "failed"}]\n`,
-        );

        // Mark subagent as completed in state store
        completeSubagent(subagentId, {
@@ -299,6 +333,12 @@ export async function task(args: TaskArgs): Promise<string> {
    return `Task running in background with ID: ${taskId}\nOutput file: ${outputFile}`;
  }

+  // Foreground tasks now also write transcripts so users can inspect full output
+  // even when inline content is truncated.
+  const foregroundTaskId = getNextTaskId();
+  const outputFile = createBackgroundOutputFile(foregroundTaskId);
+  writeTaskTranscriptStart(outputFile, description, subagent_type);
+
  try {
    const result = await spawnSubagent(
      subagent_type,
@@ -331,22 +371,22 @@ export async function task(args: TaskArgs): Promise<string> {
    });

    if (!result.success) {
-      return `Error: ${result.error || "Subagent execution failed"}`;
+      const errorMessage = result.error || "Subagent execution failed";
+      const failedResult: TaskRunResult = {
+        ...result,
+        error: errorMessage,
+      };
+      writeTaskTranscriptResult(outputFile, failedResult, "");
+      return `Error: ${errorMessage}\nOutput file: ${outputFile}`;
    }

    // Include stable subagent metadata so orchestrators can attribute results.
    // Keep the tool return type as a string for compatibility.
-    const header = [
-      `subagent_type=${subagent_type}`,
-      result.agentId ? `agent_id=${result.agentId}` : undefined,
-      result.conversationId
-        ? `conversation_id=${result.conversationId}`
-        : undefined,
-    ]
-      .filter(Boolean)
-      .join(" ");
+    const header = buildTaskResultHeader(subagent_type, result);

    const fullOutput = `${header}\n\n${result.report}`;
+    writeTaskTranscriptResult(outputFile, result, header);
+
    const userCwd = process.env.USER_CWD || process.cwd();

    // Apply truncation to prevent excessive token usage (same pattern as Bash tool)
@@ -357,7 +397,7 @@ export async function task(args: TaskArgs): Promise<string> {
      { workingDirectory: userCwd, toolName: "Task" },
    );

-    return truncatedOutput;
+    return `${truncatedOutput}\nOutput file: ${outputFile}`;
  } catch (error) {
    const errorMessage = error instanceof Error ? error.message : String(error);
    completeSubagent(subagentId, { success: false, error: errorMessage });
@@ -374,6 +414,10 @@ export async function task(args: TaskArgs): Promise<string> {
      // Silently ignore hook errors
    });

-    return `Error: ${errorMessage}`;
+    appendToOutputFile(
+      outputFile,
+      `[error] ${errorMessage}\n\n[Task failed]\n`,
+    );
+    return `Error: ${errorMessage}\nOutput file: ${outputFile}`;
  }
 }