fix: propagate max-step failures and unify task transcripts (#874)

Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
Charles Packer
2026-02-09 14:49:38 -08:00
committed by GitHub
parent 101fc6f874
commit 8e5bc3956f
6 changed files with 198 additions and 48 deletions

View File

@@ -53,8 +53,12 @@ export async function packageSkills(
continue;
}
// Check if skill exists in known repos (prefer source_url over embedding)
const sourceUrl = await findSkillSourceUrl(entry.name);
// Check if skill exists in known repos (prefer source_url over embedding).
// When an explicit skillsDir is provided, skip network lookup and always
// package local files for deterministic behavior (especially in tests).
const sourceUrl = skillsDir
? null
: await findSkillSourceUrl(entry.name);
const skill: SkillSchema = { name: entry.name };

View File

@@ -685,12 +685,15 @@ async function executeSubagent(
}
}
const propagatedError = state.finalError?.trim();
const fallbackError = stderr || `Subagent exited with code ${exitCode}`;
return {
agentId: state.agentId || "",
conversationId: state.conversationId || undefined,
report: "",
success: false,
error: stderr || `Subagent exited with code ${exitCode}`,
error: propagatedError || fallbackError,
};
}

View File

@@ -33,6 +33,8 @@ interface StreamMessage {
subtype?: string;
message_type?: string;
stop_reason?: string;
request_id?: string;
request?: { subtype?: string };
// biome-ignore lint/suspicious/noExplicitAny: index signature for arbitrary JSON fields
[key: string]: any;
}
@@ -77,6 +79,8 @@ async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{
let errorSeen = false;
let resultCount = 0;
let closing = false;
let waitingForApprovalControlRequest = false;
let pendingToolCallId: string | undefined;
const timeout = setTimeout(() => {
if (!closing) {
@@ -105,6 +109,39 @@ async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{
console.log("MSG:", JSON.stringify(msg, null, 2));
}
// Handle permission control requests from headless.
// We only deny the can_use_tool request after seeing an approval_request_message
// so the test still exercises the pending-approval flow.
if (
msg.type === "control_request" &&
msg.request?.subtype === "can_use_tool" &&
waitingForApprovalControlRequest
) {
const requestId = msg.request_id;
if (requestId) {
if (pendingToolCallId && !requestId.endsWith(pendingToolCallId)) {
console.log(
`Note: control_request id ${requestId} did not match expected tool id ${pendingToolCallId}`,
);
}
const denyApproval = JSON.stringify({
type: "control_response",
response: {
request_id: requestId,
response: {
behavior: "deny",
message:
"Denied by integration test to simulate stale approval",
},
},
});
proc.stdin?.write(`${denyApproval}\n`);
waitingForApprovalControlRequest = false;
}
return;
}
// Step 1: Wait for init, then send bash trigger prompt
if (msg.type === "system" && msg.subtype === "init" && !initReceived) {
initReceived = true;
@@ -116,23 +153,36 @@ async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{
return;
}
// Step 2: When we see approval request, send another user message instead
// Step 2: When we see approval request, send another user message instead,
// then explicitly deny the pending approval so the flow can complete in
// headless stream-json mode (which waits for approval responses).
if (
msg.type === "message" &&
msg.message_type === "approval_request_message" &&
!approvalSeen
) {
approvalSeen = true;
// Wait a moment, then send interrupt message (NOT an approval)
const toolCall = Array.isArray(msg.tool_call)
? msg.tool_call[0]
: msg.tool_call;
pendingToolCallId =
toolCall && typeof toolCall === "object"
? (toolCall as { tool_call_id?: string }).tool_call_id
: undefined;
waitingForApprovalControlRequest = true;
// Wait a moment, then send interrupt message (approval deny will be sent
// only after we see the corresponding control_request from headless).
setTimeout(() => {
if (!interruptSent) {
interruptSent = true;
const userMsg = JSON.stringify({
type: "user",
message: { role: "user", content: INTERRUPT_MESSAGE },
});
proc.stdin?.write(`${userMsg}\n`);
}
if (interruptSent) return;
interruptSent = true;
const userMsg = JSON.stringify({
type: "user",
message: { role: "user", content: INTERRUPT_MESSAGE },
});
proc.stdin?.write(`${userMsg}\n`);
}, 500);
return;
}

View File

@@ -0,0 +1,20 @@
import { describe, expect, test } from "bun:test";
import { readFileSync } from "node:fs";
import { fileURLToPath } from "node:url";
describe("subagent max_steps error propagation", () => {
test("non-zero exit path prefers parsed finalError over generic exit code", () => {
const managerPath = fileURLToPath(
new URL("../../agent/subagents/manager.ts", import.meta.url),
);
const source = readFileSync(managerPath, "utf-8");
expect(source).toContain(
"const propagatedError = state.finalError?.trim();",
);
expect(source).toContain(
`const fallbackError = stderr || \`Subagent exited with code \${exitCode}\`;`,
);
expect(source).toContain("error: propagatedError || fallbackError");
});
});

View File

@@ -0,0 +1,29 @@
import { describe, expect, test } from "bun:test";
import { readFileSync } from "node:fs";
import { fileURLToPath } from "node:url";
describe("Task foreground transcript wiring", () => {
test("writes foreground transcript start and result output", () => {
const taskPath = fileURLToPath(
new URL("../../tools/impl/Task.ts", import.meta.url),
);
const source = readFileSync(taskPath, "utf-8");
expect(source).toContain("const foregroundTaskId = getNextTaskId();");
expect(source).toContain(
"const outputFile = createBackgroundOutputFile(foregroundTaskId);",
);
expect(source).toContain(
"writeTaskTranscriptStart(outputFile, description, subagent_type);",
);
expect(source).toContain(
"writeTaskTranscriptResult(outputFile, result, header);",
);
expect(source).toContain(
`return \`\${truncatedOutput}\\nOutput file: \${outputFile}\`;`,
);
expect(source).toContain(
`return \`Error: \${errorMessage}\\nOutput file: \${outputFile}\`;`,
);
});
});

View File

@@ -47,6 +47,60 @@ interface TaskArgs {
// Valid subagent_types when deploying an existing agent
const VALID_DEPLOY_TYPES = new Set(["explore", "general-purpose"]);
type TaskRunResult = {
agentId: string;
conversationId?: string;
report: string;
success: boolean;
error?: string;
totalTokens?: number;
};
function buildTaskResultHeader(
subagentType: string,
result: Pick<TaskRunResult, "agentId" | "conversationId">,
): string {
return [
`subagent_type=${subagentType}`,
result.agentId ? `agent_id=${result.agentId}` : undefined,
result.conversationId
? `conversation_id=${result.conversationId}`
: undefined,
]
.filter(Boolean)
.join(" ");
}
function writeTaskTranscriptStart(
outputFile: string,
description: string,
subagentType: string,
): void {
appendToOutputFile(
outputFile,
`[Task started: ${description}]\n[subagent_type: ${subagentType}]\n\n`,
);
}
function writeTaskTranscriptResult(
outputFile: string,
result: TaskRunResult,
header: string,
): void {
if (result.success) {
appendToOutputFile(
outputFile,
`${header}\n\n${result.report}\n\n[Task completed]\n`,
);
return;
}
appendToOutputFile(
outputFile,
`[error] ${result.error || "Subagent execution failed"}\n\n[Task failed]\n`,
);
}
/**
* Task tool - Launch a specialized subagent to handle complex tasks
*/
@@ -151,10 +205,7 @@ export async function task(args: TaskArgs): Promise<string> {
backgroundTasks.set(taskId, bgTask);
// Write initial status to output file
appendToOutputFile(
outputFile,
`[Task started: ${description}]\n[subagent_type: ${subagent_type}]\n\n`,
);
writeTaskTranscriptStart(outputFile, description, subagent_type);
// Fire-and-forget: run subagent without awaiting
spawnSubagent(
@@ -175,30 +226,13 @@ export async function task(args: TaskArgs): Promise<string> {
}
// Build output header
const header = [
`subagent_type=${subagent_type}`,
result.agentId ? `agent_id=${result.agentId}` : undefined,
result.conversationId
? `conversation_id=${result.conversationId}`
: undefined,
]
.filter(Boolean)
.join(" ");
const header = buildTaskResultHeader(subagent_type, result);
// Write result to output file
writeTaskTranscriptResult(outputFile, result, header);
if (result.success) {
appendToOutputFile(outputFile, `${header}\n\n${result.report}\n`);
bgTask.output.push(result.report || "");
} else {
appendToOutputFile(
outputFile,
`[error] ${result.error || "Subagent execution failed"}\n`,
);
}
appendToOutputFile(
outputFile,
`\n[Task ${result.success ? "completed" : "failed"}]\n`,
);
// Mark subagent as completed in state store
completeSubagent(subagentId, {
@@ -299,6 +333,12 @@ export async function task(args: TaskArgs): Promise<string> {
return `Task running in background with ID: ${taskId}\nOutput file: ${outputFile}`;
}
// Foreground tasks now also write transcripts so users can inspect full output
// even when inline content is truncated.
const foregroundTaskId = getNextTaskId();
const outputFile = createBackgroundOutputFile(foregroundTaskId);
writeTaskTranscriptStart(outputFile, description, subagent_type);
try {
const result = await spawnSubagent(
subagent_type,
@@ -331,22 +371,22 @@ export async function task(args: TaskArgs): Promise<string> {
});
if (!result.success) {
return `Error: ${result.error || "Subagent execution failed"}`;
const errorMessage = result.error || "Subagent execution failed";
const failedResult: TaskRunResult = {
...result,
error: errorMessage,
};
writeTaskTranscriptResult(outputFile, failedResult, "");
return `Error: ${errorMessage}\nOutput file: ${outputFile}`;
}
// Include stable subagent metadata so orchestrators can attribute results.
// Keep the tool return type as a string for compatibility.
const header = [
`subagent_type=${subagent_type}`,
result.agentId ? `agent_id=${result.agentId}` : undefined,
result.conversationId
? `conversation_id=${result.conversationId}`
: undefined,
]
.filter(Boolean)
.join(" ");
const header = buildTaskResultHeader(subagent_type, result);
const fullOutput = `${header}\n\n${result.report}`;
writeTaskTranscriptResult(outputFile, result, header);
const userCwd = process.env.USER_CWD || process.cwd();
// Apply truncation to prevent excessive token usage (same pattern as Bash tool)
@@ -357,7 +397,7 @@ export async function task(args: TaskArgs): Promise<string> {
{ workingDirectory: userCwd, toolName: "Task" },
);
return truncatedOutput;
return `${truncatedOutput}\nOutput file: ${outputFile}`;
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
completeSubagent(subagentId, { success: false, error: errorMessage });
@@ -374,6 +414,10 @@ export async function task(args: TaskArgs): Promise<string> {
// Silently ignore hook errors
});
return `Error: ${errorMessage}`;
appendToOutputFile(
outputFile,
`[error] ${errorMessage}\n\n[Task failed]\n`,
);
return `Error: ${errorMessage}\nOutput file: ${outputFile}`;
}
}