fix: propagate max-step failures and unify task transcripts (#874)
Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
@@ -53,8 +53,12 @@ export async function packageSkills(
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if skill exists in known repos (prefer source_url over embedding)
|
||||
const sourceUrl = await findSkillSourceUrl(entry.name);
|
||||
// Check if skill exists in known repos (prefer source_url over embedding).
|
||||
// When an explicit skillsDir is provided, skip network lookup and always
|
||||
// package local files for deterministic behavior (especially in tests).
|
||||
const sourceUrl = skillsDir
|
||||
? null
|
||||
: await findSkillSourceUrl(entry.name);
|
||||
|
||||
const skill: SkillSchema = { name: entry.name };
|
||||
|
||||
|
||||
@@ -685,12 +685,15 @@ async function executeSubagent(
|
||||
}
|
||||
}
|
||||
|
||||
const propagatedError = state.finalError?.trim();
|
||||
const fallbackError = stderr || `Subagent exited with code ${exitCode}`;
|
||||
|
||||
return {
|
||||
agentId: state.agentId || "",
|
||||
conversationId: state.conversationId || undefined,
|
||||
report: "",
|
||||
success: false,
|
||||
error: stderr || `Subagent exited with code ${exitCode}`,
|
||||
error: propagatedError || fallbackError,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -33,6 +33,8 @@ interface StreamMessage {
|
||||
subtype?: string;
|
||||
message_type?: string;
|
||||
stop_reason?: string;
|
||||
request_id?: string;
|
||||
request?: { subtype?: string };
|
||||
// biome-ignore lint/suspicious/noExplicitAny: index signature for arbitrary JSON fields
|
||||
[key: string]: any;
|
||||
}
|
||||
@@ -77,6 +79,8 @@ async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{
|
||||
let errorSeen = false;
|
||||
let resultCount = 0;
|
||||
let closing = false;
|
||||
let waitingForApprovalControlRequest = false;
|
||||
let pendingToolCallId: string | undefined;
|
||||
|
||||
const timeout = setTimeout(() => {
|
||||
if (!closing) {
|
||||
@@ -105,6 +109,39 @@ async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{
|
||||
console.log("MSG:", JSON.stringify(msg, null, 2));
|
||||
}
|
||||
|
||||
// Handle permission control requests from headless.
|
||||
// We only deny the can_use_tool request after seeing an approval_request_message
|
||||
// so the test still exercises the pending-approval flow.
|
||||
if (
|
||||
msg.type === "control_request" &&
|
||||
msg.request?.subtype === "can_use_tool" &&
|
||||
waitingForApprovalControlRequest
|
||||
) {
|
||||
const requestId = msg.request_id;
|
||||
if (requestId) {
|
||||
if (pendingToolCallId && !requestId.endsWith(pendingToolCallId)) {
|
||||
console.log(
|
||||
`Note: control_request id ${requestId} did not match expected tool id ${pendingToolCallId}`,
|
||||
);
|
||||
}
|
||||
|
||||
const denyApproval = JSON.stringify({
|
||||
type: "control_response",
|
||||
response: {
|
||||
request_id: requestId,
|
||||
response: {
|
||||
behavior: "deny",
|
||||
message:
|
||||
"Denied by integration test to simulate stale approval",
|
||||
},
|
||||
},
|
||||
});
|
||||
proc.stdin?.write(`${denyApproval}\n`);
|
||||
waitingForApprovalControlRequest = false;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Step 1: Wait for init, then send bash trigger prompt
|
||||
if (msg.type === "system" && msg.subtype === "init" && !initReceived) {
|
||||
initReceived = true;
|
||||
@@ -116,23 +153,36 @@ async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{
|
||||
return;
|
||||
}
|
||||
|
||||
// Step 2: When we see approval request, send another user message instead
|
||||
// Step 2: When we see approval request, send another user message instead,
|
||||
// then explicitly deny the pending approval so the flow can complete in
|
||||
// headless stream-json mode (which waits for approval responses).
|
||||
if (
|
||||
msg.type === "message" &&
|
||||
msg.message_type === "approval_request_message" &&
|
||||
!approvalSeen
|
||||
) {
|
||||
approvalSeen = true;
|
||||
// Wait a moment, then send interrupt message (NOT an approval)
|
||||
|
||||
const toolCall = Array.isArray(msg.tool_call)
|
||||
? msg.tool_call[0]
|
||||
: msg.tool_call;
|
||||
pendingToolCallId =
|
||||
toolCall && typeof toolCall === "object"
|
||||
? (toolCall as { tool_call_id?: string }).tool_call_id
|
||||
: undefined;
|
||||
waitingForApprovalControlRequest = true;
|
||||
|
||||
// Wait a moment, then send interrupt message (approval deny will be sent
|
||||
// only after we see the corresponding control_request from headless).
|
||||
setTimeout(() => {
|
||||
if (!interruptSent) {
|
||||
interruptSent = true;
|
||||
const userMsg = JSON.stringify({
|
||||
type: "user",
|
||||
message: { role: "user", content: INTERRUPT_MESSAGE },
|
||||
});
|
||||
proc.stdin?.write(`${userMsg}\n`);
|
||||
}
|
||||
if (interruptSent) return;
|
||||
interruptSent = true;
|
||||
|
||||
const userMsg = JSON.stringify({
|
||||
type: "user",
|
||||
message: { role: "user", content: INTERRUPT_MESSAGE },
|
||||
});
|
||||
proc.stdin?.write(`${userMsg}\n`);
|
||||
}, 500);
|
||||
return;
|
||||
}
|
||||
|
||||
20
src/tests/agent/subagent-max-steps-propagation.test.ts
Normal file
20
src/tests/agent/subagent-max-steps-propagation.test.ts
Normal file
@@ -0,0 +1,20 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import { readFileSync } from "node:fs";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
describe("subagent max_steps error propagation", () => {
|
||||
test("non-zero exit path prefers parsed finalError over generic exit code", () => {
|
||||
const managerPath = fileURLToPath(
|
||||
new URL("../../agent/subagents/manager.ts", import.meta.url),
|
||||
);
|
||||
const source = readFileSync(managerPath, "utf-8");
|
||||
|
||||
expect(source).toContain(
|
||||
"const propagatedError = state.finalError?.trim();",
|
||||
);
|
||||
expect(source).toContain(
|
||||
`const fallbackError = stderr || \`Subagent exited with code \${exitCode}\`;`,
|
||||
);
|
||||
expect(source).toContain("error: propagatedError || fallbackError");
|
||||
});
|
||||
});
|
||||
29
src/tests/tools/task-foreground-transcript.test.ts
Normal file
29
src/tests/tools/task-foreground-transcript.test.ts
Normal file
@@ -0,0 +1,29 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import { readFileSync } from "node:fs";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
describe("Task foreground transcript wiring", () => {
|
||||
test("writes foreground transcript start and result output", () => {
|
||||
const taskPath = fileURLToPath(
|
||||
new URL("../../tools/impl/Task.ts", import.meta.url),
|
||||
);
|
||||
const source = readFileSync(taskPath, "utf-8");
|
||||
|
||||
expect(source).toContain("const foregroundTaskId = getNextTaskId();");
|
||||
expect(source).toContain(
|
||||
"const outputFile = createBackgroundOutputFile(foregroundTaskId);",
|
||||
);
|
||||
expect(source).toContain(
|
||||
"writeTaskTranscriptStart(outputFile, description, subagent_type);",
|
||||
);
|
||||
expect(source).toContain(
|
||||
"writeTaskTranscriptResult(outputFile, result, header);",
|
||||
);
|
||||
expect(source).toContain(
|
||||
`return \`\${truncatedOutput}\\nOutput file: \${outputFile}\`;`,
|
||||
);
|
||||
expect(source).toContain(
|
||||
`return \`Error: \${errorMessage}\\nOutput file: \${outputFile}\`;`,
|
||||
);
|
||||
});
|
||||
});
|
||||
@@ -47,6 +47,60 @@ interface TaskArgs {
|
||||
// Valid subagent_types when deploying an existing agent
|
||||
const VALID_DEPLOY_TYPES = new Set(["explore", "general-purpose"]);
|
||||
|
||||
type TaskRunResult = {
|
||||
agentId: string;
|
||||
conversationId?: string;
|
||||
report: string;
|
||||
success: boolean;
|
||||
error?: string;
|
||||
totalTokens?: number;
|
||||
};
|
||||
|
||||
function buildTaskResultHeader(
|
||||
subagentType: string,
|
||||
result: Pick<TaskRunResult, "agentId" | "conversationId">,
|
||||
): string {
|
||||
return [
|
||||
`subagent_type=${subagentType}`,
|
||||
result.agentId ? `agent_id=${result.agentId}` : undefined,
|
||||
result.conversationId
|
||||
? `conversation_id=${result.conversationId}`
|
||||
: undefined,
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join(" ");
|
||||
}
|
||||
|
||||
function writeTaskTranscriptStart(
|
||||
outputFile: string,
|
||||
description: string,
|
||||
subagentType: string,
|
||||
): void {
|
||||
appendToOutputFile(
|
||||
outputFile,
|
||||
`[Task started: ${description}]\n[subagent_type: ${subagentType}]\n\n`,
|
||||
);
|
||||
}
|
||||
|
||||
function writeTaskTranscriptResult(
|
||||
outputFile: string,
|
||||
result: TaskRunResult,
|
||||
header: string,
|
||||
): void {
|
||||
if (result.success) {
|
||||
appendToOutputFile(
|
||||
outputFile,
|
||||
`${header}\n\n${result.report}\n\n[Task completed]\n`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
appendToOutputFile(
|
||||
outputFile,
|
||||
`[error] ${result.error || "Subagent execution failed"}\n\n[Task failed]\n`,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Task tool - Launch a specialized subagent to handle complex tasks
|
||||
*/
|
||||
@@ -151,10 +205,7 @@ export async function task(args: TaskArgs): Promise<string> {
|
||||
backgroundTasks.set(taskId, bgTask);
|
||||
|
||||
// Write initial status to output file
|
||||
appendToOutputFile(
|
||||
outputFile,
|
||||
`[Task started: ${description}]\n[subagent_type: ${subagent_type}]\n\n`,
|
||||
);
|
||||
writeTaskTranscriptStart(outputFile, description, subagent_type);
|
||||
|
||||
// Fire-and-forget: run subagent without awaiting
|
||||
spawnSubagent(
|
||||
@@ -175,30 +226,13 @@ export async function task(args: TaskArgs): Promise<string> {
|
||||
}
|
||||
|
||||
// Build output header
|
||||
const header = [
|
||||
`subagent_type=${subagent_type}`,
|
||||
result.agentId ? `agent_id=${result.agentId}` : undefined,
|
||||
result.conversationId
|
||||
? `conversation_id=${result.conversationId}`
|
||||
: undefined,
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join(" ");
|
||||
const header = buildTaskResultHeader(subagent_type, result);
|
||||
|
||||
// Write result to output file
|
||||
writeTaskTranscriptResult(outputFile, result, header);
|
||||
if (result.success) {
|
||||
appendToOutputFile(outputFile, `${header}\n\n${result.report}\n`);
|
||||
bgTask.output.push(result.report || "");
|
||||
} else {
|
||||
appendToOutputFile(
|
||||
outputFile,
|
||||
`[error] ${result.error || "Subagent execution failed"}\n`,
|
||||
);
|
||||
}
|
||||
appendToOutputFile(
|
||||
outputFile,
|
||||
`\n[Task ${result.success ? "completed" : "failed"}]\n`,
|
||||
);
|
||||
|
||||
// Mark subagent as completed in state store
|
||||
completeSubagent(subagentId, {
|
||||
@@ -299,6 +333,12 @@ export async function task(args: TaskArgs): Promise<string> {
|
||||
return `Task running in background with ID: ${taskId}\nOutput file: ${outputFile}`;
|
||||
}
|
||||
|
||||
// Foreground tasks now also write transcripts so users can inspect full output
|
||||
// even when inline content is truncated.
|
||||
const foregroundTaskId = getNextTaskId();
|
||||
const outputFile = createBackgroundOutputFile(foregroundTaskId);
|
||||
writeTaskTranscriptStart(outputFile, description, subagent_type);
|
||||
|
||||
try {
|
||||
const result = await spawnSubagent(
|
||||
subagent_type,
|
||||
@@ -331,22 +371,22 @@ export async function task(args: TaskArgs): Promise<string> {
|
||||
});
|
||||
|
||||
if (!result.success) {
|
||||
return `Error: ${result.error || "Subagent execution failed"}`;
|
||||
const errorMessage = result.error || "Subagent execution failed";
|
||||
const failedResult: TaskRunResult = {
|
||||
...result,
|
||||
error: errorMessage,
|
||||
};
|
||||
writeTaskTranscriptResult(outputFile, failedResult, "");
|
||||
return `Error: ${errorMessage}\nOutput file: ${outputFile}`;
|
||||
}
|
||||
|
||||
// Include stable subagent metadata so orchestrators can attribute results.
|
||||
// Keep the tool return type as a string for compatibility.
|
||||
const header = [
|
||||
`subagent_type=${subagent_type}`,
|
||||
result.agentId ? `agent_id=${result.agentId}` : undefined,
|
||||
result.conversationId
|
||||
? `conversation_id=${result.conversationId}`
|
||||
: undefined,
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join(" ");
|
||||
const header = buildTaskResultHeader(subagent_type, result);
|
||||
|
||||
const fullOutput = `${header}\n\n${result.report}`;
|
||||
writeTaskTranscriptResult(outputFile, result, header);
|
||||
|
||||
const userCwd = process.env.USER_CWD || process.cwd();
|
||||
|
||||
// Apply truncation to prevent excessive token usage (same pattern as Bash tool)
|
||||
@@ -357,7 +397,7 @@ export async function task(args: TaskArgs): Promise<string> {
|
||||
{ workingDirectory: userCwd, toolName: "Task" },
|
||||
);
|
||||
|
||||
return truncatedOutput;
|
||||
return `${truncatedOutput}\nOutput file: ${outputFile}`;
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
completeSubagent(subagentId, { success: false, error: errorMessage });
|
||||
@@ -374,6 +414,10 @@ export async function task(args: TaskArgs): Promise<string> {
|
||||
// Silently ignore hook errors
|
||||
});
|
||||
|
||||
return `Error: ${errorMessage}`;
|
||||
appendToOutputFile(
|
||||
outputFile,
|
||||
`[error] ${errorMessage}\n\n[Task failed]\n`,
|
||||
);
|
||||
return `Error: ${errorMessage}\nOutput file: ${outputFile}`;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user