feat: reduce time-to-boot, remove default eager approval checks on inputs, auto-cancel stale approvals (#579)
Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
341
scripts/latency-benchmark.ts
Normal file
341
scripts/latency-benchmark.ts
Normal file
@@ -0,0 +1,341 @@
|
|||||||
|
#!/usr/bin/env bun
|
||||||
|
/**
|
||||||
|
* Latency Benchmark Script for Letta Code CLI
|
||||||
|
*
|
||||||
|
* Runs headless mode with LETTA_DEBUG_TIMINGS=1 and parses the output
|
||||||
|
* to measure latency breakdown at different stages.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* bun scripts/latency-benchmark.ts
|
||||||
|
* bun scripts/latency-benchmark.ts --scenario fresh-agent
|
||||||
|
* bun scripts/latency-benchmark.ts --iterations 5
|
||||||
|
*
|
||||||
|
* Requires: LETTA_API_KEY environment variable
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { spawn } from "node:child_process";
|
||||||
|
|
||||||
|
interface ApiCall {
|
||||||
|
method: string;
|
||||||
|
path: string;
|
||||||
|
durationMs: number;
|
||||||
|
status?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface Milestone {
|
||||||
|
name: string;
|
||||||
|
offsetMs: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface BenchmarkResult {
|
||||||
|
scenario: string;
|
||||||
|
totalMs: number;
|
||||||
|
milestones: Milestone[];
|
||||||
|
apiCalls: ApiCall[];
|
||||||
|
exitCode: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ScenarioConfig {
|
||||||
|
name: string;
|
||||||
|
description: string;
|
||||||
|
args: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Define benchmark scenarios
|
||||||
|
const SCENARIOS: ScenarioConfig[] = [
|
||||||
|
{
|
||||||
|
name: "fresh-agent",
|
||||||
|
description: "Create new agent and send simple prompt",
|
||||||
|
args: [
|
||||||
|
"-p",
|
||||||
|
"What is 2+2? Reply with just the number.",
|
||||||
|
"--new-agent",
|
||||||
|
"--yolo",
|
||||||
|
"--output-format",
|
||||||
|
"json",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "resume-agent",
|
||||||
|
description: "Resume last agent and send simple prompt",
|
||||||
|
args: [
|
||||||
|
"-p",
|
||||||
|
"What is 3+3? Reply with just the number.",
|
||||||
|
"--continue",
|
||||||
|
"--yolo",
|
||||||
|
"--output-format",
|
||||||
|
"json",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "minimal-math",
|
||||||
|
description: "Simple math question (no tool calls)",
|
||||||
|
args: [
|
||||||
|
"-p",
|
||||||
|
"What is 5+5? Reply with just the number.",
|
||||||
|
"--continue",
|
||||||
|
"--yolo",
|
||||||
|
"--output-format",
|
||||||
|
"json",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse timing logs from stderr output
|
||||||
|
*/
|
||||||
|
function parseTimingLogs(stderr: string): {
|
||||||
|
milestones: Milestone[];
|
||||||
|
apiCalls: ApiCall[];
|
||||||
|
} {
|
||||||
|
const milestones: Milestone[] = [];
|
||||||
|
const apiCalls: ApiCall[] = [];
|
||||||
|
|
||||||
|
const lines = stderr.split("\n");
|
||||||
|
|
||||||
|
for (const line of lines) {
|
||||||
|
// Parse milestones: [timing] MILESTONE CLI_START at +0ms (12:34:56.789)
|
||||||
|
const milestoneMatch = line.match(
|
||||||
|
/\[timing\] MILESTONE (\S+) at \+(\d+(?:\.\d+)?)(ms|s)/,
|
||||||
|
);
|
||||||
|
if (milestoneMatch) {
|
||||||
|
const name = milestoneMatch[1]!;
|
||||||
|
let offsetMs = parseFloat(milestoneMatch[2]!);
|
||||||
|
if (milestoneMatch[3] === "s") {
|
||||||
|
offsetMs *= 1000;
|
||||||
|
}
|
||||||
|
milestones.push({ name, offsetMs });
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse API calls: [timing] GET /v1/agents/... -> 245ms (status: 200)
|
||||||
|
const apiMatch = line.match(
|
||||||
|
/\[timing\] (GET|POST|PUT|DELETE|PATCH) (\S+) -> (\d+(?:\.\d+)?)(ms|s)(?: \(status: (\d+)\))?/,
|
||||||
|
);
|
||||||
|
if (apiMatch) {
|
||||||
|
const method = apiMatch[1]!;
|
||||||
|
const path = apiMatch[2]!;
|
||||||
|
let durationMs = parseFloat(apiMatch[3]!);
|
||||||
|
if (apiMatch[4] === "s") {
|
||||||
|
durationMs *= 1000;
|
||||||
|
}
|
||||||
|
const status = apiMatch[5] ? parseInt(apiMatch[5], 10) : undefined;
|
||||||
|
apiCalls.push({ method, path, durationMs, status });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { milestones, apiCalls };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run a single benchmark scenario
|
||||||
|
*/
|
||||||
|
async function runBenchmark(scenario: ScenarioConfig): Promise<BenchmarkResult> {
|
||||||
|
const start = performance.now();
|
||||||
|
|
||||||
|
return new Promise((resolve) => {
|
||||||
|
const proc = spawn("bun", ["run", "dev", ...scenario.args], {
|
||||||
|
env: { ...process.env, LETTA_DEBUG_TIMINGS: "1" },
|
||||||
|
stdio: ["pipe", "pipe", "pipe"],
|
||||||
|
});
|
||||||
|
|
||||||
|
let stdout = "";
|
||||||
|
let stderr = "";
|
||||||
|
|
||||||
|
proc.stdout.on("data", (data) => {
|
||||||
|
stdout += data.toString();
|
||||||
|
});
|
||||||
|
|
||||||
|
proc.stderr.on("data", (data) => {
|
||||||
|
stderr += data.toString();
|
||||||
|
});
|
||||||
|
|
||||||
|
proc.on("close", (code) => {
|
||||||
|
const totalMs = performance.now() - start;
|
||||||
|
const { milestones, apiCalls } = parseTimingLogs(stderr);
|
||||||
|
|
||||||
|
resolve({
|
||||||
|
scenario: scenario.name,
|
||||||
|
totalMs,
|
||||||
|
milestones,
|
||||||
|
apiCalls,
|
||||||
|
exitCode: code ?? 1,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Timeout after 2 minutes
|
||||||
|
setTimeout(() => {
|
||||||
|
proc.kill("SIGTERM");
|
||||||
|
}, 120000);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format duration for display
|
||||||
|
*/
|
||||||
|
function formatMs(ms: number): string {
|
||||||
|
if (ms < 1000) return `${Math.round(ms)}ms`;
|
||||||
|
return `${(ms / 1000).toFixed(2)}s`;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Print benchmark results
|
||||||
|
*/
|
||||||
|
function printResults(results: BenchmarkResult[]): void {
|
||||||
|
console.log("\n" + "=".repeat(70));
|
||||||
|
console.log("LATENCY BENCHMARK RESULTS");
|
||||||
|
console.log("=".repeat(70) + "\n");
|
||||||
|
|
||||||
|
for (const result of results) {
|
||||||
|
const scenario = SCENARIOS.find((s) => s.name === result.scenario);
|
||||||
|
console.log(`Scenario: ${result.scenario}`);
|
||||||
|
console.log(` ${scenario?.description || ""}`);
|
||||||
|
console.log(` Exit code: ${result.exitCode}`);
|
||||||
|
console.log(` Total wall time: ${formatMs(result.totalMs)}`);
|
||||||
|
console.log("");
|
||||||
|
|
||||||
|
// Print milestones
|
||||||
|
if (result.milestones.length > 0) {
|
||||||
|
console.log(" Milestones:");
|
||||||
|
let prevMs = 0;
|
||||||
|
for (const milestone of result.milestones) {
|
||||||
|
const delta = milestone.offsetMs - prevMs;
|
||||||
|
const deltaStr = prevMs === 0 ? "" : ` (+${formatMs(delta)})`;
|
||||||
|
console.log(
|
||||||
|
` +${formatMs(milestone.offsetMs).padStart(8)} ${milestone.name}${deltaStr}`,
|
||||||
|
);
|
||||||
|
prevMs = milestone.offsetMs;
|
||||||
|
}
|
||||||
|
console.log("");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print API calls summary
|
||||||
|
if (result.apiCalls.length > 0) {
|
||||||
|
console.log(" API Calls:");
|
||||||
|
const totalApiMs = result.apiCalls.reduce((sum, c) => sum + c.durationMs, 0);
|
||||||
|
|
||||||
|
// Group by path pattern
|
||||||
|
const grouped: Record<string, { count: number; totalMs: number }> = {};
|
||||||
|
for (const call of result.apiCalls) {
|
||||||
|
// Normalize paths (remove UUIDs)
|
||||||
|
const normalizedPath = call.path.replace(
|
||||||
|
/[a-f0-9-]{36}/g,
|
||||||
|
"{id}",
|
||||||
|
);
|
||||||
|
const key = `${call.method} ${normalizedPath}`;
|
||||||
|
if (!grouped[key]) {
|
||||||
|
grouped[key] = { count: 0, totalMs: 0 };
|
||||||
|
}
|
||||||
|
grouped[key].count++;
|
||||||
|
grouped[key].totalMs += call.durationMs;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort by total time
|
||||||
|
const sorted = Object.entries(grouped).sort(
|
||||||
|
(a, b) => b[1].totalMs - a[1].totalMs,
|
||||||
|
);
|
||||||
|
|
||||||
|
for (const [endpoint, stats] of sorted) {
|
||||||
|
const countStr = stats.count > 1 ? ` (x${stats.count})` : "";
|
||||||
|
console.log(
|
||||||
|
` ${formatMs(stats.totalMs).padStart(8)} ${endpoint}${countStr}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(` ${"─".repeat(50)}`);
|
||||||
|
console.log(` ${formatMs(totalApiMs).padStart(8)} Total API time`);
|
||||||
|
console.log(
|
||||||
|
` ${formatMs(result.totalMs - totalApiMs).padStart(8)} CLI overhead (non-API)`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log("\n" + "-".repeat(70) + "\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Summary table
|
||||||
|
console.log("SUMMARY");
|
||||||
|
console.log("-".repeat(70));
|
||||||
|
console.log(
|
||||||
|
"Scenario".padEnd(20) +
|
||||||
|
"Total".padStart(12) +
|
||||||
|
"API Time".padStart(12) +
|
||||||
|
"CLI Overhead".padStart(14),
|
||||||
|
);
|
||||||
|
console.log("-".repeat(70));
|
||||||
|
|
||||||
|
for (const result of results) {
|
||||||
|
const totalApiMs = result.apiCalls.reduce((sum, c) => sum + c.durationMs, 0);
|
||||||
|
const cliOverhead = result.totalMs - totalApiMs;
|
||||||
|
console.log(
|
||||||
|
result.scenario.padEnd(20) +
|
||||||
|
formatMs(result.totalMs).padStart(12) +
|
||||||
|
formatMs(totalApiMs).padStart(12) +
|
||||||
|
formatMs(cliOverhead).padStart(14),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
console.log("-".repeat(70));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main(): Promise<void> {
|
||||||
|
// Parse args
|
||||||
|
const args = process.argv.slice(2);
|
||||||
|
let scenarioFilter: string | null = null;
|
||||||
|
let iterations = 1;
|
||||||
|
|
||||||
|
for (let i = 0; i < args.length; i++) {
|
||||||
|
if (args[i] === "--scenario" && args[i + 1]) {
|
||||||
|
scenarioFilter = args[++i]!;
|
||||||
|
} else if (args[i] === "--iterations" && args[i + 1]) {
|
||||||
|
iterations = parseInt(args[++i]!, 10);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check prereqs
|
||||||
|
if (!process.env.LETTA_API_KEY) {
|
||||||
|
console.error("Error: LETTA_API_KEY environment variable is required");
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filter scenarios
|
||||||
|
const scenariosToRun = scenarioFilter
|
||||||
|
? SCENARIOS.filter((s) => s.name === scenarioFilter)
|
||||||
|
: SCENARIOS;
|
||||||
|
|
||||||
|
if (scenariosToRun.length === 0) {
|
||||||
|
console.error(`Error: Unknown scenario "${scenarioFilter}"`);
|
||||||
|
console.error(`Available scenarios: ${SCENARIOS.map((s) => s.name).join(", ")}`);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log("Running latency benchmarks...");
|
||||||
|
console.log(`Scenarios: ${scenariosToRun.map((s) => s.name).join(", ")}`);
|
||||||
|
console.log(`Iterations: ${iterations}`);
|
||||||
|
console.log("");
|
||||||
|
|
||||||
|
const allResults: BenchmarkResult[] = [];
|
||||||
|
|
||||||
|
for (let iter = 0; iter < iterations; iter++) {
|
||||||
|
if (iterations > 1) {
|
||||||
|
console.log(`\n--- Iteration ${iter + 1} of ${iterations} ---`);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const scenario of scenariosToRun) {
|
||||||
|
console.log(`Running: ${scenario.name}...`);
|
||||||
|
const result = await runBenchmark(scenario);
|
||||||
|
allResults.push(result);
|
||||||
|
|
||||||
|
if (result.exitCode !== 0) {
|
||||||
|
console.warn(` Warning: ${scenario.name} exited with code ${result.exitCode}`);
|
||||||
|
} else {
|
||||||
|
console.log(` Completed in ${formatMs(result.totalMs)}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printResults(allResults);
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch((err) => {
|
||||||
|
console.error(err);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
@@ -2,9 +2,14 @@ import type { MessageCreate } from "@letta-ai/letta-client/resources/agents/agen
|
|||||||
import { getClient } from "./client";
|
import { getClient } from "./client";
|
||||||
import { APPROVAL_RECOVERY_PROMPT } from "./promptAssets";
|
import { APPROVAL_RECOVERY_PROMPT } from "./promptAssets";
|
||||||
|
|
||||||
|
// Error when trying to SEND approval but server has no pending approval
|
||||||
const APPROVAL_RECOVERY_DETAIL_FRAGMENT =
|
const APPROVAL_RECOVERY_DETAIL_FRAGMENT =
|
||||||
"no tool call is currently awaiting approval";
|
"no tool call is currently awaiting approval";
|
||||||
|
|
||||||
|
// Error when trying to SEND message but server has pending approval waiting
|
||||||
|
// This is the CONFLICT error - opposite of desync
|
||||||
|
const APPROVAL_PENDING_DETAIL_FRAGMENT = "cannot send a new message";
|
||||||
|
|
||||||
type RunErrorMetadata =
|
type RunErrorMetadata =
|
||||||
| {
|
| {
|
||||||
error_type?: string;
|
error_type?: string;
|
||||||
@@ -20,6 +25,19 @@ export function isApprovalStateDesyncError(detail: unknown): boolean {
|
|||||||
return detail.toLowerCase().includes(APPROVAL_RECOVERY_DETAIL_FRAGMENT);
|
return detail.toLowerCase().includes(APPROVAL_RECOVERY_DETAIL_FRAGMENT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if error indicates there's a pending approval blocking new messages.
|
||||||
|
* This is the CONFLICT error from the backend when trying to send a user message
|
||||||
|
* while the agent is waiting for approval on a tool call.
|
||||||
|
*
|
||||||
|
* Error format:
|
||||||
|
* { detail: "CONFLICT: Cannot send a new message: The agent is waiting for approval..." }
|
||||||
|
*/
|
||||||
|
export function isApprovalPendingError(detail: unknown): boolean {
|
||||||
|
if (typeof detail !== "string") return false;
|
||||||
|
return detail.toLowerCase().includes(APPROVAL_PENDING_DETAIL_FRAGMENT);
|
||||||
|
}
|
||||||
|
|
||||||
export async function fetchRunErrorDetail(
|
export async function fetchRunErrorDetail(
|
||||||
runId: string | null | undefined,
|
runId: string | null | undefined,
|
||||||
): Promise<string | null> {
|
): Promise<string | null> {
|
||||||
|
|||||||
@@ -29,8 +29,9 @@ export interface ResumeData {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract approval requests from an approval_request_message.
|
* Extract approval requests from an approval_request_message.
|
||||||
|
* Exported for testing parallel tool call handling.
|
||||||
*/
|
*/
|
||||||
function extractApprovals(messageToCheck: Message): {
|
export function extractApprovals(messageToCheck: Message): {
|
||||||
pendingApproval: ApprovalRequest | null;
|
pendingApproval: ApprovalRequest | null;
|
||||||
pendingApprovals: ApprovalRequest[];
|
pendingApprovals: ApprovalRequest[];
|
||||||
} {
|
} {
|
||||||
|
|||||||
@@ -271,26 +271,14 @@ export async function createAgent(
|
|||||||
// Track provenance: which blocks were created
|
// Track provenance: which blocks were created
|
||||||
// Note: We no longer reuse shared blocks - each agent gets fresh blocks
|
// Note: We no longer reuse shared blocks - each agent gets fresh blocks
|
||||||
const blockProvenance: BlockProvenance[] = [];
|
const blockProvenance: BlockProvenance[] = [];
|
||||||
const blockIds: string[] = [];
|
|
||||||
|
|
||||||
// Create all blocks fresh for the new agent
|
// Mark new blocks for provenance tracking (actual creation happens in agents.create)
|
||||||
for (const block of filteredMemoryBlocks) {
|
for (const block of filteredMemoryBlocks) {
|
||||||
try {
|
blockProvenance.push({ label: block.label, source: "new" });
|
||||||
const createdBlock = await client.blocks.create(block);
|
|
||||||
if (!createdBlock.id) {
|
|
||||||
throw new Error(`Created block ${block.label} has no ID`);
|
|
||||||
}
|
|
||||||
blockIds.push(createdBlock.id);
|
|
||||||
blockProvenance.push({ label: block.label, source: "new" });
|
|
||||||
} catch (error) {
|
|
||||||
console.error(`Failed to create block ${block.label}:`, error);
|
|
||||||
throw error;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add any referenced block IDs (existing blocks to attach)
|
// Mark referenced blocks for provenance tracking
|
||||||
for (const blockId of referencedBlockIds) {
|
for (const blockId of referencedBlockIds) {
|
||||||
blockIds.push(blockId);
|
|
||||||
blockProvenance.push({ label: blockId, source: "shared" });
|
blockProvenance.push({ label: blockId, source: "shared" });
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -314,7 +302,9 @@ export async function createAgent(
|
|||||||
systemPromptContent = `${systemPromptContent}\n\n${options.systemPromptAppend}`;
|
systemPromptContent = `${systemPromptContent}\n\n${options.systemPromptAppend}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create agent with all block IDs (existing + newly created)
|
// Create agent with inline memory blocks (LET-7101: single API call instead of N+1)
|
||||||
|
// - memory_blocks: new blocks to create inline
|
||||||
|
// - block_ids: references to existing blocks (for shared memory)
|
||||||
const tags = ["origin:letta-code"];
|
const tags = ["origin:letta-code"];
|
||||||
if (process.env.LETTA_CODE_AGENT_ROLE === "subagent") {
|
if (process.env.LETTA_CODE_AGENT_ROLE === "subagent") {
|
||||||
tags.push("role:subagent");
|
tags.push("role:subagent");
|
||||||
@@ -332,7 +322,11 @@ export async function createAgent(
|
|||||||
model: modelHandle,
|
model: modelHandle,
|
||||||
context_window_limit: contextWindow,
|
context_window_limit: contextWindow,
|
||||||
tools: toolNames,
|
tools: toolNames,
|
||||||
block_ids: blockIds,
|
// New blocks created inline with agent (saves ~2s of sequential API calls)
|
||||||
|
memory_blocks:
|
||||||
|
filteredMemoryBlocks.length > 0 ? filteredMemoryBlocks : undefined,
|
||||||
|
// Referenced block IDs (existing blocks to attach)
|
||||||
|
block_ids: referencedBlockIds.length > 0 ? referencedBlockIds : undefined,
|
||||||
tags,
|
tags,
|
||||||
// should be default off, but just in case
|
// should be default off, but just in case
|
||||||
include_base_tools: false,
|
include_base_tools: false,
|
||||||
|
|||||||
@@ -486,3 +486,125 @@ export function formatSkillsForMemory(
|
|||||||
// Otherwise fall back to compact tree format
|
// Otherwise fall back to compact tree format
|
||||||
return formatSkillsAsTree(skills, skillsDirectory);
|
return formatSkillsAsTree(skills, skillsDirectory);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Skills Sync with Hash-Based Caching (Phase 2.5 - LET-7101)
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
import { createHash } from "node:crypto";
|
||||||
|
import { mkdir, writeFile } from "node:fs/promises";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the project-local skills hash file path.
|
||||||
|
* Uses .letta/skills-hash.json in the current working directory
|
||||||
|
* because the skills block content depends on the project's .skills/ folder.
|
||||||
|
*/
|
||||||
|
function getSkillsHashFilePath(): string {
|
||||||
|
return join(process.cwd(), ".letta", "skills-hash.json");
|
||||||
|
}
|
||||||
|
|
||||||
|
interface SkillsHashCache {
|
||||||
|
hash: string;
|
||||||
|
timestamp: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute a hash of the formatted skills content
|
||||||
|
*/
|
||||||
|
function computeSkillsHash(content: string): string {
|
||||||
|
return createHash("sha256").update(content).digest("hex").slice(0, 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the cached skills hash (if any)
|
||||||
|
*/
|
||||||
|
async function getCachedSkillsHash(): Promise<string | null> {
|
||||||
|
try {
|
||||||
|
const hashFile = getSkillsHashFilePath();
|
||||||
|
const data = await readFile(hashFile, "utf-8");
|
||||||
|
const cache: SkillsHashCache = JSON.parse(data);
|
||||||
|
return cache.hash;
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the cached skills hash
|
||||||
|
*/
|
||||||
|
async function setCachedSkillsHash(hash: string): Promise<void> {
|
||||||
|
try {
|
||||||
|
const hashFile = getSkillsHashFilePath();
|
||||||
|
// Ensure project .letta directory exists
|
||||||
|
const lettaDir = join(process.cwd(), ".letta");
|
||||||
|
await mkdir(lettaDir, { recursive: true });
|
||||||
|
|
||||||
|
const cache: SkillsHashCache = {
|
||||||
|
hash,
|
||||||
|
timestamp: new Date().toISOString(),
|
||||||
|
};
|
||||||
|
await writeFile(hashFile, JSON.stringify(cache, null, 2));
|
||||||
|
} catch {
|
||||||
|
// Ignore cache write failures - not critical
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sync skills to an agent's memory block.
|
||||||
|
* Discovers skills from filesystem and updates the skills block.
|
||||||
|
*
|
||||||
|
* @param client - Letta client
|
||||||
|
* @param agentId - Agent ID to update
|
||||||
|
* @param skillsDirectory - Path to project skills directory
|
||||||
|
* @param options - Optional settings
|
||||||
|
* @returns Object indicating if sync occurred and discovered skills
|
||||||
|
*/
|
||||||
|
export async function syncSkillsToAgent(
|
||||||
|
client: import("@letta-ai/letta-client").default,
|
||||||
|
agentId: string,
|
||||||
|
skillsDirectory: string,
|
||||||
|
options?: { skipIfUnchanged?: boolean },
|
||||||
|
): Promise<{ synced: boolean; skills: Skill[] }> {
|
||||||
|
// Discover skills from filesystem
|
||||||
|
const { skills, errors } = await discoverSkills(skillsDirectory);
|
||||||
|
|
||||||
|
if (errors.length > 0) {
|
||||||
|
for (const error of errors) {
|
||||||
|
console.warn(`[skills] Discovery error: ${error.path}: ${error.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Format skills for memory block
|
||||||
|
const formattedSkills = formatSkillsForMemory(skills, skillsDirectory);
|
||||||
|
|
||||||
|
// Check if we can skip the update
|
||||||
|
if (options?.skipIfUnchanged) {
|
||||||
|
const newHash = computeSkillsHash(formattedSkills);
|
||||||
|
const cachedHash = await getCachedSkillsHash();
|
||||||
|
|
||||||
|
if (newHash === cachedHash) {
|
||||||
|
return { synced: false, skills };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update the block and cache the new hash
|
||||||
|
await client.agents.blocks.update("skills", {
|
||||||
|
agent_id: agentId,
|
||||||
|
value: formattedSkills,
|
||||||
|
});
|
||||||
|
await setCachedSkillsHash(newHash);
|
||||||
|
|
||||||
|
return { synced: true, skills };
|
||||||
|
}
|
||||||
|
|
||||||
|
// No skip option - always update
|
||||||
|
await client.agents.blocks.update("skills", {
|
||||||
|
agent_id: agentId,
|
||||||
|
value: formattedSkills,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Update hash cache for future runs
|
||||||
|
const newHash = computeSkillsHash(formattedSkills);
|
||||||
|
await setCachedSkillsHash(newHash);
|
||||||
|
|
||||||
|
return { synced: true, skills };
|
||||||
|
}
|
||||||
|
|||||||
160
src/cli/App.tsx
160
src/cli/App.tsx
@@ -30,6 +30,7 @@ import {
|
|||||||
import {
|
import {
|
||||||
buildApprovalRecoveryMessage,
|
buildApprovalRecoveryMessage,
|
||||||
fetchRunErrorDetail,
|
fetchRunErrorDetail,
|
||||||
|
isApprovalPendingError,
|
||||||
isApprovalStateDesyncError,
|
isApprovalStateDesyncError,
|
||||||
} from "../agent/approval-recovery";
|
} from "../agent/approval-recovery";
|
||||||
import { prefetchAvailableModelHandles } from "../agent/available-models";
|
import { prefetchAvailableModelHandles } from "../agent/available-models";
|
||||||
@@ -175,10 +176,10 @@ import { useTerminalRows, useTerminalWidth } from "./hooks/useTerminalWidth";
|
|||||||
const CLEAR_SCREEN_AND_HOME = "\u001B[2J\u001B[H";
|
const CLEAR_SCREEN_AND_HOME = "\u001B[2J\u001B[H";
|
||||||
const MIN_RESIZE_DELTA = 2;
|
const MIN_RESIZE_DELTA = 2;
|
||||||
|
|
||||||
// Feature flag: Check for pending approvals before sending messages
|
// Eager approval checking is now CONDITIONAL (LET-7101):
|
||||||
// This prevents infinite thinking state when there's an orphaned approval
|
// - Enabled when resuming a session (--resume, --continue, or startupApprovals exist)
|
||||||
// Can be disabled if the latency check adds too much overhead
|
// - Disabled for normal messages (lazy recovery handles edge cases)
|
||||||
const CHECK_PENDING_APPROVALS_BEFORE_SEND = true;
|
// This saves ~2s latency per message in the common case.
|
||||||
|
|
||||||
// Feature flag: Eagerly cancel streams client-side when user presses ESC
|
// Feature flag: Eagerly cancel streams client-side when user presses ESC
|
||||||
// When true (default), immediately abort the stream after calling .cancel()
|
// When true (default), immediately abort the stream after calling .cancel()
|
||||||
@@ -678,6 +679,12 @@ export default function App({
|
|||||||
>(null);
|
>(null);
|
||||||
const toolAbortControllerRef = useRef<AbortController | null>(null);
|
const toolAbortControllerRef = useRef<AbortController | null>(null);
|
||||||
|
|
||||||
|
// Eager approval checking: only enabled when resuming a session (LET-7101)
|
||||||
|
// After first successful message, we disable it since any new approvals are from our own turn
|
||||||
|
const [needsEagerApprovalCheck, setNeedsEagerApprovalCheck] = useState(
|
||||||
|
() => resumedExistingConversation || startupApprovals.length > 0,
|
||||||
|
);
|
||||||
|
|
||||||
// Track auto-handled results to combine with user decisions
|
// Track auto-handled results to combine with user decisions
|
||||||
const [autoHandledResults, setAutoHandledResults] = useState<
|
const [autoHandledResults, setAutoHandledResults] = useState<
|
||||||
Array<{
|
Array<{
|
||||||
@@ -1941,6 +1948,12 @@ export default function App({
|
|||||||
setStreaming(false);
|
setStreaming(false);
|
||||||
llmApiErrorRetriesRef.current = 0; // Reset retry counter on success
|
llmApiErrorRetriesRef.current = 0; // Reset retry counter on success
|
||||||
|
|
||||||
|
// Disable eager approval check after first successful message (LET-7101)
|
||||||
|
// Any new approvals from here on are from our own turn, not orphaned
|
||||||
|
if (needsEagerApprovalCheck) {
|
||||||
|
setNeedsEagerApprovalCheck(false);
|
||||||
|
}
|
||||||
|
|
||||||
// Send desktop notification when turn completes
|
// Send desktop notification when turn completes
|
||||||
// and we're not about to auto-send another queued message
|
// and we're not about to auto-send another queued message
|
||||||
if (!waitingForQueueCancelRef.current) {
|
if (!waitingForQueueCancelRef.current) {
|
||||||
@@ -2552,6 +2565,112 @@ export default function App({
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check for approval pending error (sent user message while approval waiting)
|
||||||
|
// This is the lazy recovery path for when needsEagerApprovalCheck is false
|
||||||
|
const approvalPendingDetected =
|
||||||
|
isApprovalPendingError(detailFromRun) ||
|
||||||
|
isApprovalPendingError(latestErrorText);
|
||||||
|
|
||||||
|
if (
|
||||||
|
!hasApprovalInPayload &&
|
||||||
|
approvalPendingDetected &&
|
||||||
|
llmApiErrorRetriesRef.current < LLM_API_ERROR_MAX_RETRIES
|
||||||
|
) {
|
||||||
|
llmApiErrorRetriesRef.current += 1;
|
||||||
|
|
||||||
|
// Log for debugging (visible in transcripts)
|
||||||
|
const statusId = uid("status");
|
||||||
|
buffersRef.current.byId.set(statusId, {
|
||||||
|
kind: "status",
|
||||||
|
id: statusId,
|
||||||
|
lines: [
|
||||||
|
"[LAZY RECOVERY] Detected CONFLICT: server has pending approval",
|
||||||
|
"[LAZY RECOVERY] Fetching stale approvals to auto-deny...",
|
||||||
|
],
|
||||||
|
});
|
||||||
|
buffersRef.current.order.push(statusId);
|
||||||
|
refreshDerived();
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Fetch pending approvals and auto-deny them
|
||||||
|
const client = await getClient();
|
||||||
|
const agent = await client.agents.retrieve(agentIdRef.current);
|
||||||
|
const { pendingApprovals: existingApprovals } =
|
||||||
|
await getResumeData(client, agent, conversationIdRef.current);
|
||||||
|
|
||||||
|
if (existingApprovals && existingApprovals.length > 0) {
|
||||||
|
// Update status with details
|
||||||
|
buffersRef.current.byId.set(statusId, {
|
||||||
|
kind: "status",
|
||||||
|
id: statusId,
|
||||||
|
lines: [
|
||||||
|
"[LAZY RECOVERY] Detected CONFLICT: server has pending approval",
|
||||||
|
`[LAZY RECOVERY] Found ${existingApprovals.length} stale approval(s):`,
|
||||||
|
...existingApprovals.map(
|
||||||
|
(a) =>
|
||||||
|
` - ${a.toolName} (${a.toolCallId.slice(0, 8)}...)`,
|
||||||
|
),
|
||||||
|
"[LAZY RECOVERY] Auto-denying and batching with user message...",
|
||||||
|
],
|
||||||
|
});
|
||||||
|
refreshDerived();
|
||||||
|
|
||||||
|
// Create denial results for all stale approvals
|
||||||
|
// Use the same format as handleCancelApprovals (lines 6390-6395)
|
||||||
|
const denialResults = existingApprovals.map((approval) => ({
|
||||||
|
type: "approval" as const,
|
||||||
|
tool_call_id: approval.toolCallId,
|
||||||
|
approve: false,
|
||||||
|
reason:
|
||||||
|
"Auto-denied: stale approval from interrupted session",
|
||||||
|
}));
|
||||||
|
|
||||||
|
// Prepend approval denials to the current input (keeps user message)
|
||||||
|
const approvalPayload: ApprovalCreate = {
|
||||||
|
type: "approval",
|
||||||
|
approvals: denialResults,
|
||||||
|
};
|
||||||
|
currentInput.unshift(approvalPayload);
|
||||||
|
} else {
|
||||||
|
// No approvals found - server state may have cleared
|
||||||
|
buffersRef.current.byId.set(statusId, {
|
||||||
|
kind: "status",
|
||||||
|
id: statusId,
|
||||||
|
lines: [
|
||||||
|
"[LAZY RECOVERY] Detected CONFLICT but no pending approvals found",
|
||||||
|
"[LAZY RECOVERY] Retrying message...",
|
||||||
|
],
|
||||||
|
});
|
||||||
|
refreshDerived();
|
||||||
|
}
|
||||||
|
} catch (_recoveryError) {
|
||||||
|
// If we can't fetch approvals, just retry the original message
|
||||||
|
buffersRef.current.byId.set(statusId, {
|
||||||
|
kind: "status",
|
||||||
|
id: statusId,
|
||||||
|
lines: [
|
||||||
|
"[LAZY RECOVERY] Failed to fetch pending approvals",
|
||||||
|
"[LAZY RECOVERY] Retrying message anyway...",
|
||||||
|
],
|
||||||
|
});
|
||||||
|
refreshDerived();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Brief pause so user can see the status
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 500));
|
||||||
|
|
||||||
|
// Remove the transient status
|
||||||
|
buffersRef.current.byId.delete(statusId);
|
||||||
|
buffersRef.current.order = buffersRef.current.order.filter(
|
||||||
|
(id) => id !== statusId,
|
||||||
|
);
|
||||||
|
refreshDerived();
|
||||||
|
|
||||||
|
// Reset interrupted flag so retry stream chunks are processed
|
||||||
|
buffersRef.current.interrupted = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// Check if this is a retriable error (transient LLM API error)
|
// Check if this is a retriable error (transient LLM API error)
|
||||||
const retriable = await isRetriableError(
|
const retriable = await isRetriableError(
|
||||||
stopReasonToHandle,
|
stopReasonToHandle,
|
||||||
@@ -2757,6 +2876,7 @@ export default function App({
|
|||||||
setStreaming,
|
setStreaming,
|
||||||
currentModelId,
|
currentModelId,
|
||||||
updateStreamingOutput,
|
updateStreamingOutput,
|
||||||
|
needsEagerApprovalCheck,
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -3278,7 +3398,8 @@ export default function App({
|
|||||||
const checkPendingApprovalsForSlashCommand = useCallback(async (): Promise<
|
const checkPendingApprovalsForSlashCommand = useCallback(async (): Promise<
|
||||||
{ blocked: true } | { blocked: false }
|
{ blocked: true } | { blocked: false }
|
||||||
> => {
|
> => {
|
||||||
if (!CHECK_PENDING_APPROVALS_BEFORE_SEND) {
|
// Only check eagerly when resuming a session (LET-7101)
|
||||||
|
if (!needsEagerApprovalCheck) {
|
||||||
return { blocked: false };
|
return { blocked: false };
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3425,7 +3546,13 @@ export default function App({
|
|||||||
// If check fails, proceed anyway (don't block user)
|
// If check fails, proceed anyway (don't block user)
|
||||||
return { blocked: false };
|
return { blocked: false };
|
||||||
}
|
}
|
||||||
}, [agentId, processConversation, refreshDerived, updateStreamingOutput]);
|
}, [
|
||||||
|
agentId,
|
||||||
|
processConversation,
|
||||||
|
refreshDerived,
|
||||||
|
updateStreamingOutput,
|
||||||
|
needsEagerApprovalCheck,
|
||||||
|
]);
|
||||||
|
|
||||||
// biome-ignore lint/correctness/useExhaustiveDependencies: refs read .current dynamically, complex callback with intentional deps
|
// biome-ignore lint/correctness/useExhaustiveDependencies: refs read .current dynamically, complex callback with intentional deps
|
||||||
const onSubmit = useCallback(
|
const onSubmit = useCallback(
|
||||||
@@ -5372,7 +5499,20 @@ DO NOT respond to these messages or otherwise consider them in your response unl
|
|||||||
|
|
||||||
// Check for pending approvals before sending message (skip if we already have
|
// Check for pending approvals before sending message (skip if we already have
|
||||||
// a queued approval response to send first).
|
// a queued approval response to send first).
|
||||||
if (CHECK_PENDING_APPROVALS_BEFORE_SEND && !queuedApprovalResults) {
|
// Only do eager check when resuming a session (LET-7101) - otherwise lazy recovery handles it
|
||||||
|
if (needsEagerApprovalCheck && !queuedApprovalResults) {
|
||||||
|
// Log for debugging
|
||||||
|
const eagerStatusId = uid("status");
|
||||||
|
buffersRef.current.byId.set(eagerStatusId, {
|
||||||
|
kind: "status",
|
||||||
|
id: eagerStatusId,
|
||||||
|
lines: [
|
||||||
|
"[EAGER CHECK] Checking for pending approvals (resume mode)...",
|
||||||
|
],
|
||||||
|
});
|
||||||
|
buffersRef.current.order.push(eagerStatusId);
|
||||||
|
refreshDerived();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const client = await getClient();
|
const client = await getClient();
|
||||||
// Fetch fresh agent state to check for pending approvals with accurate in-context messages
|
// Fetch fresh agent state to check for pending approvals with accurate in-context messages
|
||||||
@@ -5383,6 +5523,12 @@ DO NOT respond to these messages or otherwise consider them in your response unl
|
|||||||
conversationIdRef.current,
|
conversationIdRef.current,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Remove eager check status
|
||||||
|
buffersRef.current.byId.delete(eagerStatusId);
|
||||||
|
buffersRef.current.order = buffersRef.current.order.filter(
|
||||||
|
(id) => id !== eagerStatusId,
|
||||||
|
);
|
||||||
|
|
||||||
// Check if user cancelled while we were fetching approval state
|
// Check if user cancelled while we were fetching approval state
|
||||||
if (
|
if (
|
||||||
userCancelledRef.current ||
|
userCancelledRef.current ||
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import type { ApprovalResult } from "./agent/approval-execution";
|
|||||||
import {
|
import {
|
||||||
buildApprovalRecoveryMessage,
|
buildApprovalRecoveryMessage,
|
||||||
fetchRunErrorDetail,
|
fetchRunErrorDetail,
|
||||||
|
isApprovalPendingError,
|
||||||
isApprovalStateDesyncError,
|
isApprovalStateDesyncError,
|
||||||
} from "./agent/approval-recovery";
|
} from "./agent/approval-recovery";
|
||||||
import { getClient } from "./agent/client";
|
import { getClient } from "./agent/client";
|
||||||
@@ -40,11 +41,17 @@ import type {
|
|||||||
ControlResponse,
|
ControlResponse,
|
||||||
ErrorMessage,
|
ErrorMessage,
|
||||||
MessageWire,
|
MessageWire,
|
||||||
|
RecoveryMessage,
|
||||||
ResultMessage,
|
ResultMessage,
|
||||||
RetryMessage,
|
RetryMessage,
|
||||||
StreamEvent,
|
StreamEvent,
|
||||||
SystemInitMessage,
|
SystemInitMessage,
|
||||||
} from "./types/protocol";
|
} from "./types/protocol";
|
||||||
|
import {
|
||||||
|
markMilestone,
|
||||||
|
measureSinceMilestone,
|
||||||
|
reportAllMilestones,
|
||||||
|
} from "./utils/timing";
|
||||||
|
|
||||||
// Maximum number of times to retry a turn when the backend
|
// Maximum number of times to retry a turn when the backend
|
||||||
// reports an `llm_api_error` stop reason. This helps smooth
|
// reports an `llm_api_error` stop reason. This helps smooth
|
||||||
@@ -169,6 +176,7 @@ export async function handleHeadlessCommand(
|
|||||||
}
|
}
|
||||||
|
|
||||||
const client = await getClient();
|
const client = await getClient();
|
||||||
|
markMilestone("HEADLESS_CLIENT_READY");
|
||||||
|
|
||||||
// Check for --resume flag (interactive only)
|
// Check for --resume flag (interactive only)
|
||||||
if (values.resume) {
|
if (values.resume) {
|
||||||
@@ -462,6 +470,7 @@ export async function handleHeadlessCommand(
|
|||||||
console.error("No agent found. Use --new-agent to create a new agent.");
|
console.error("No agent found. Use --new-agent to create a new agent.");
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
|
markMilestone("HEADLESS_AGENT_RESOLVED");
|
||||||
|
|
||||||
// Check if we're resuming an existing agent (not creating a new one)
|
// Check if we're resuming an existing agent (not creating a new one)
|
||||||
const isResumingAgent = !!(
|
const isResumingAgent = !!(
|
||||||
@@ -567,6 +576,7 @@ export async function handleHeadlessCommand(
|
|||||||
});
|
});
|
||||||
conversationId = conversation.id;
|
conversationId = conversation.id;
|
||||||
}
|
}
|
||||||
|
markMilestone("HEADLESS_CONVERSATION_READY");
|
||||||
|
|
||||||
// Save session (agent + conversation) to both project and global settings
|
// Save session (agent + conversation) to both project and global settings
|
||||||
// Skip for subagents - they shouldn't pollute the LRU settings
|
// Skip for subagents - they shouldn't pollute the LRU settings
|
||||||
@@ -591,41 +601,33 @@ export async function handleHeadlessCommand(
|
|||||||
|
|
||||||
// Set agent context for tools that need it (e.g., Skill tool, Task tool)
|
// Set agent context for tools that need it (e.g., Skill tool, Task tool)
|
||||||
setAgentContext(agent.id, skillsDirectory);
|
setAgentContext(agent.id, skillsDirectory);
|
||||||
await initializeLoadedSkillsFlag();
|
|
||||||
|
|
||||||
// Re-discover skills and update the skills memory block
|
// Fire-and-forget: Initialize loaded skills flag (LET-7101)
|
||||||
|
// Don't await - this is just for the skill unload reminder
|
||||||
|
initializeLoadedSkillsFlag().catch(() => {
|
||||||
|
// Ignore errors - not critical
|
||||||
|
});
|
||||||
|
|
||||||
|
// Fire-and-forget: Sync skills in background (LET-7101)
|
||||||
// This ensures new skills added after agent creation are available
|
// This ensures new skills added after agent creation are available
|
||||||
try {
|
// Don't await - proceed to message sending immediately
|
||||||
const { discoverSkills, formatSkillsForMemory, SKILLS_DIR } = await import(
|
(async () => {
|
||||||
"./agent/skills"
|
try {
|
||||||
);
|
const { syncSkillsToAgent, SKILLS_DIR } = await import("./agent/skills");
|
||||||
const { join } = await import("node:path");
|
const { join } = await import("node:path");
|
||||||
|
|
||||||
const resolvedSkillsDirectory =
|
const resolvedSkillsDirectory =
|
||||||
skillsDirectory || join(process.cwd(), SKILLS_DIR);
|
skillsDirectory || join(process.cwd(), SKILLS_DIR);
|
||||||
const { skills, errors } = await discoverSkills(resolvedSkillsDirectory);
|
|
||||||
|
|
||||||
if (errors.length > 0) {
|
await syncSkillsToAgent(client, agent.id, resolvedSkillsDirectory, {
|
||||||
console.warn("Errors encountered during skill discovery:");
|
skipIfUnchanged: true,
|
||||||
for (const error of errors) {
|
});
|
||||||
console.warn(` ${error.path}: ${error.message}`);
|
} catch (error) {
|
||||||
}
|
console.warn(
|
||||||
|
`[skills] Background sync failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
})();
|
||||||
// Update the skills memory block with freshly discovered skills
|
|
||||||
const formattedSkills = formatSkillsForMemory(
|
|
||||||
skills,
|
|
||||||
resolvedSkillsDirectory,
|
|
||||||
);
|
|
||||||
await client.agents.blocks.update("skills", {
|
|
||||||
agent_id: agent.id,
|
|
||||||
value: formattedSkills,
|
|
||||||
});
|
|
||||||
} catch (error) {
|
|
||||||
console.warn(
|
|
||||||
`Failed to update skills: ${error instanceof Error ? error.message : String(error)}`,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Validate output format
|
// Validate output format
|
||||||
const outputFormat =
|
const outputFormat =
|
||||||
@@ -830,8 +832,11 @@ export async function handleHeadlessCommand(
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Clear any pending approvals before starting a new turn
|
// Clear any pending approvals before starting a new turn - ONLY when resuming (LET-7101)
|
||||||
await resolveAllPendingApprovals();
|
// For new agents/conversations, lazy recovery handles any edge cases
|
||||||
|
if (isResumingAgent) {
|
||||||
|
await resolveAllPendingApprovals();
|
||||||
|
}
|
||||||
|
|
||||||
// Build message content with reminders (plan mode first, then skill unload)
|
// Build message content with reminders (plan mode first, then skill unload)
|
||||||
const { permissionMode } = await import("./permissions/mode");
|
const { permissionMode } = await import("./permissions/mode");
|
||||||
@@ -865,6 +870,9 @@ export async function handleHeadlessCommand(
|
|||||||
let lastKnownRunId: string | null = null;
|
let lastKnownRunId: string | null = null;
|
||||||
let llmApiErrorRetries = 0;
|
let llmApiErrorRetries = 0;
|
||||||
|
|
||||||
|
markMilestone("HEADLESS_FIRST_STREAM_START");
|
||||||
|
measureSinceMilestone("headless-setup-total", "HEADLESS_CLIENT_READY");
|
||||||
|
|
||||||
try {
|
try {
|
||||||
while (true) {
|
while (true) {
|
||||||
const stream = await sendMessageStream(conversationId, currentInput);
|
const stream = await sendMessageStream(conversationId, currentInput);
|
||||||
@@ -922,8 +930,25 @@ export async function handleHeadlessCommand(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Detect server conflict due to pending approval; handle it and retry
|
// Detect server conflict due to pending approval; handle it and retry
|
||||||
if (errorInfo?.message?.includes("Cannot send a new message")) {
|
// Check both detail and message fields since error formats vary
|
||||||
// Don't emit this error; clear approvals and retry outer loop
|
if (
|
||||||
|
isApprovalPendingError(errorInfo?.detail) ||
|
||||||
|
isApprovalPendingError(errorInfo?.message)
|
||||||
|
) {
|
||||||
|
// Emit recovery message for stream-json mode (enables testing)
|
||||||
|
if (outputFormat === "stream-json") {
|
||||||
|
const recoveryMsg: RecoveryMessage = {
|
||||||
|
type: "recovery",
|
||||||
|
recovery_type: "approval_pending",
|
||||||
|
message:
|
||||||
|
"Detected pending approval conflict; auto-denying stale approval and retrying",
|
||||||
|
run_id: lastRunId ?? undefined,
|
||||||
|
session_id: sessionId,
|
||||||
|
uuid: `recovery-${lastRunId || crypto.randomUUID()}`,
|
||||||
|
};
|
||||||
|
console.log(JSON.stringify(recoveryMsg));
|
||||||
|
}
|
||||||
|
// Clear approvals and retry outer loop
|
||||||
await resolveAllPendingApprovals();
|
await resolveAllPendingApprovals();
|
||||||
// Reset state and restart turn
|
// Reset state and restart turn
|
||||||
stopReason = "error" as StopReasonType;
|
stopReason = "error" as StopReasonType;
|
||||||
@@ -1541,6 +1566,10 @@ export async function handleHeadlessCommand(
|
|||||||
}
|
}
|
||||||
console.log(resultText);
|
console.log(resultText);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Report all milestones at the end for latency audit
|
||||||
|
markMilestone("HEADLESS_COMPLETE");
|
||||||
|
reportAllMilestones();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
66
src/index.ts
66
src/index.ts
@@ -17,6 +17,7 @@ import { permissionMode } from "./permissions/mode";
|
|||||||
import { settingsManager } from "./settings-manager";
|
import { settingsManager } from "./settings-manager";
|
||||||
import { telemetry } from "./telemetry";
|
import { telemetry } from "./telemetry";
|
||||||
import { loadTools } from "./tools/manager";
|
import { loadTools } from "./tools/manager";
|
||||||
|
import { markMilestone } from "./utils/timing";
|
||||||
|
|
||||||
// Stable empty array constants to prevent new references on every render
|
// Stable empty array constants to prevent new references on every render
|
||||||
// These are used as fallbacks when resumeData is null, avoiding the React
|
// These are used as fallbacks when resumeData is null, avoiding the React
|
||||||
@@ -335,9 +336,12 @@ async function getPinnedAgentNames(): Promise<{ id: string; name: string }[]> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async function main(): Promise<void> {
|
async function main(): Promise<void> {
|
||||||
|
markMilestone("CLI_START");
|
||||||
|
|
||||||
// Initialize settings manager (loads settings once into memory)
|
// Initialize settings manager (loads settings once into memory)
|
||||||
await settingsManager.initialize();
|
await settingsManager.initialize();
|
||||||
const settings = await settingsManager.getSettingsWithSecureTokens();
|
const settings = await settingsManager.getSettingsWithSecureTokens();
|
||||||
|
markMilestone("SETTINGS_LOADED");
|
||||||
|
|
||||||
// Initialize LSP infrastructure for type checking
|
// Initialize LSP infrastructure for type checking
|
||||||
if (process.env.LETTA_ENABLE_LSP) {
|
if (process.env.LETTA_ENABLE_LSP) {
|
||||||
@@ -728,6 +732,7 @@ async function main(): Promise<void> {
|
|||||||
// Validate credentials by checking health endpoint
|
// Validate credentials by checking health endpoint
|
||||||
const { validateCredentials } = await import("./auth/oauth");
|
const { validateCredentials } = await import("./auth/oauth");
|
||||||
const isValid = await validateCredentials(baseURL, apiKey ?? "");
|
const isValid = await validateCredentials(baseURL, apiKey ?? "");
|
||||||
|
markMilestone("CREDENTIALS_VALIDATED");
|
||||||
|
|
||||||
if (!isValid) {
|
if (!isValid) {
|
||||||
// For headless mode, error out with helpful message
|
// For headless mode, error out with helpful message
|
||||||
@@ -829,18 +834,22 @@ async function main(): Promise<void> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (isHeadless) {
|
if (isHeadless) {
|
||||||
|
markMilestone("HEADLESS_MODE_START");
|
||||||
// For headless mode, load tools synchronously (respecting model/toolset when provided)
|
// For headless mode, load tools synchronously (respecting model/toolset when provided)
|
||||||
const modelForTools = getModelForToolLoading(
|
const modelForTools = getModelForToolLoading(
|
||||||
specifiedModel,
|
specifiedModel,
|
||||||
specifiedToolset as "codex" | "default" | undefined,
|
specifiedToolset as "codex" | "default" | undefined,
|
||||||
);
|
);
|
||||||
await loadTools(modelForTools);
|
await loadTools(modelForTools);
|
||||||
|
markMilestone("TOOLS_LOADED");
|
||||||
|
|
||||||
const { handleHeadlessCommand } = await import("./headless");
|
const { handleHeadlessCommand } = await import("./headless");
|
||||||
await handleHeadlessCommand(process.argv, specifiedModel, skillsDirectory);
|
await handleHeadlessCommand(process.argv, specifiedModel, skillsDirectory);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
markMilestone("TUI_MODE_START");
|
||||||
|
|
||||||
// Enable enhanced key reporting (Shift+Enter, etc.) BEFORE Ink initializes.
|
// Enable enhanced key reporting (Shift+Enter, etc.) BEFORE Ink initializes.
|
||||||
// In VS Code/xterm.js this typically requires a short handshake (query + enable).
|
// In VS Code/xterm.js this typically requires a short handshake (query + enable).
|
||||||
try {
|
try {
|
||||||
@@ -853,6 +862,7 @@ async function main(): Promise<void> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Interactive: lazy-load React/Ink + App
|
// Interactive: lazy-load React/Ink + App
|
||||||
|
markMilestone("REACT_IMPORT_START");
|
||||||
const React = await import("react");
|
const React = await import("react");
|
||||||
const { render } = await import("ink");
|
const { render } = await import("ink");
|
||||||
const { useState, useEffect } = React;
|
const { useState, useEffect } = React;
|
||||||
@@ -1441,42 +1451,35 @@ async function main(): Promise<void> {
|
|||||||
|
|
||||||
// Set agent context for tools that need it (e.g., Skill tool)
|
// Set agent context for tools that need it (e.g., Skill tool)
|
||||||
setAgentContext(agent.id, skillsDirectory);
|
setAgentContext(agent.id, skillsDirectory);
|
||||||
await initializeLoadedSkillsFlag();
|
|
||||||
|
|
||||||
// Re-discover skills and update the skills memory block
|
// Fire-and-forget: Initialize loaded skills flag (LET-7101)
|
||||||
|
// Don't await - this is just for the skill unload reminder
|
||||||
|
initializeLoadedSkillsFlag().catch(() => {
|
||||||
|
// Ignore errors - not critical
|
||||||
|
});
|
||||||
|
|
||||||
|
// Fire-and-forget: Sync skills in background (LET-7101)
|
||||||
// This ensures new skills added after agent creation are available
|
// This ensures new skills added after agent creation are available
|
||||||
try {
|
// Don't await - user can start typing immediately
|
||||||
const { discoverSkills, formatSkillsForMemory, SKILLS_DIR } =
|
(async () => {
|
||||||
await import("./agent/skills");
|
try {
|
||||||
const { join } = await import("node:path");
|
const { syncSkillsToAgent, SKILLS_DIR } = await import(
|
||||||
|
"./agent/skills"
|
||||||
|
);
|
||||||
|
const { join } = await import("node:path");
|
||||||
|
|
||||||
const resolvedSkillsDirectory =
|
const resolvedSkillsDirectory =
|
||||||
skillsDirectory || join(process.cwd(), SKILLS_DIR);
|
skillsDirectory || join(process.cwd(), SKILLS_DIR);
|
||||||
const { skills, errors } = await discoverSkills(
|
|
||||||
resolvedSkillsDirectory,
|
|
||||||
);
|
|
||||||
|
|
||||||
if (errors.length > 0) {
|
await syncSkillsToAgent(client, agent.id, resolvedSkillsDirectory, {
|
||||||
console.warn("Errors encountered during skill discovery:");
|
skipIfUnchanged: true,
|
||||||
for (const error of errors) {
|
});
|
||||||
console.warn(` ${error.path}: ${error.message}`);
|
} catch (error) {
|
||||||
}
|
console.warn(
|
||||||
|
`[skills] Background sync failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
})();
|
||||||
// Update the skills memory block with freshly discovered skills
|
|
||||||
const formattedSkills = formatSkillsForMemory(
|
|
||||||
skills,
|
|
||||||
resolvedSkillsDirectory,
|
|
||||||
);
|
|
||||||
await client.agents.blocks.update("skills", {
|
|
||||||
agent_id: agent.id,
|
|
||||||
value: formattedSkills,
|
|
||||||
});
|
|
||||||
} catch (error) {
|
|
||||||
console.warn(
|
|
||||||
`Failed to update skills: ${error instanceof Error ? error.message : String(error)}`,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if we're resuming an existing agent
|
// Check if we're resuming an existing agent
|
||||||
// We're resuming if:
|
// We're resuming if:
|
||||||
@@ -1797,6 +1800,7 @@ async function main(): Promise<void> {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
markMilestone("REACT_RENDER_START");
|
||||||
render(
|
render(
|
||||||
React.createElement(LoadingApp, {
|
React.createElement(LoadingApp, {
|
||||||
continueSession: shouldContinue,
|
continueSession: shouldContinue,
|
||||||
|
|||||||
249
src/tests/approval-recovery.test.ts
Normal file
249
src/tests/approval-recovery.test.ts
Normal file
@@ -0,0 +1,249 @@
|
|||||||
|
import { describe, expect, test } from "bun:test";
|
||||||
|
import type { Message } from "@letta-ai/letta-client/resources/agents/messages";
|
||||||
|
import {
|
||||||
|
isApprovalPendingError,
|
||||||
|
isApprovalStateDesyncError,
|
||||||
|
} from "../agent/approval-recovery";
|
||||||
|
import { extractApprovals } from "../agent/check-approval";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests for approval error detection helpers (LET-7101).
|
||||||
|
*
|
||||||
|
* These functions detect two opposite error conditions:
|
||||||
|
* 1. isApprovalStateDesyncError: Sent approval, but server has no pending approval
|
||||||
|
* 2. isApprovalPendingError: Sent user message, but server has pending approval waiting
|
||||||
|
*/
|
||||||
|
|
||||||
|
describe("isApprovalStateDesyncError", () => {
|
||||||
|
test("detects desync error in detail string", () => {
|
||||||
|
const detail = "No tool call is currently awaiting approval";
|
||||||
|
expect(isApprovalStateDesyncError(detail)).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("detects desync error case-insensitively", () => {
|
||||||
|
const detail = "NO TOOL CALL IS CURRENTLY AWAITING APPROVAL";
|
||||||
|
expect(isApprovalStateDesyncError(detail)).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("detects desync error in longer message", () => {
|
||||||
|
const detail =
|
||||||
|
"Error: No tool call is currently awaiting approval. The approval request may have expired.";
|
||||||
|
expect(isApprovalStateDesyncError(detail)).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("returns false for unrelated errors", () => {
|
||||||
|
expect(isApprovalStateDesyncError("Connection timeout")).toBe(false);
|
||||||
|
expect(isApprovalStateDesyncError("Internal server error")).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("returns false for non-string input", () => {
|
||||||
|
expect(isApprovalStateDesyncError(null)).toBe(false);
|
||||||
|
expect(isApprovalStateDesyncError(undefined)).toBe(false);
|
||||||
|
expect(isApprovalStateDesyncError(123)).toBe(false);
|
||||||
|
expect(isApprovalStateDesyncError({ error: "test" })).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("isApprovalPendingError", () => {
|
||||||
|
// This is the actual error format from the Letta backend (screenshot from LET-7101)
|
||||||
|
const REAL_ERROR_DETAIL =
|
||||||
|
"CONFLICT: Cannot send a new message: The agent is waiting for approval on a tool call. Please approve or deny the pending request before continuing.";
|
||||||
|
|
||||||
|
test("detects approval pending error in real error format", () => {
|
||||||
|
expect(isApprovalPendingError(REAL_ERROR_DETAIL)).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("detects approval pending error case-insensitively", () => {
|
||||||
|
expect(isApprovalPendingError("CANNOT SEND A NEW MESSAGE")).toBe(true);
|
||||||
|
expect(isApprovalPendingError("cannot send a new message")).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("detects partial match in longer message", () => {
|
||||||
|
const detail = "Error occurred: Cannot send a new message while processing";
|
||||||
|
expect(isApprovalPendingError(detail)).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("returns false for desync errors (opposite case)", () => {
|
||||||
|
// These are the OPPOSITE error - when we send approval but there's nothing pending
|
||||||
|
expect(
|
||||||
|
isApprovalPendingError("No tool call is currently awaiting approval"),
|
||||||
|
).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("returns false for unrelated errors", () => {
|
||||||
|
expect(isApprovalPendingError("Connection timeout")).toBe(false);
|
||||||
|
expect(isApprovalPendingError("Rate limit exceeded")).toBe(false);
|
||||||
|
expect(isApprovalPendingError("Invalid API key")).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("returns false for non-string input", () => {
|
||||||
|
expect(isApprovalPendingError(null)).toBe(false);
|
||||||
|
expect(isApprovalPendingError(undefined)).toBe(false);
|
||||||
|
expect(isApprovalPendingError(123)).toBe(false);
|
||||||
|
expect(isApprovalPendingError({ detail: REAL_ERROR_DETAIL })).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests for parallel tool call approval extraction.
|
||||||
|
* Ensures lazy recovery handles multiple simultaneous tool calls correctly.
|
||||||
|
*/
|
||||||
|
describe("extractApprovals", () => {
|
||||||
|
// Helper to create a minimal Message-like object for testing
|
||||||
|
// We use 'as Message' cast because the real Message type is complex
|
||||||
|
const createMessage = (overrides: {
|
||||||
|
tool_calls?: Array<{
|
||||||
|
tool_call_id?: string;
|
||||||
|
name?: string;
|
||||||
|
arguments?: string;
|
||||||
|
}>;
|
||||||
|
tool_call?: {
|
||||||
|
tool_call_id?: string;
|
||||||
|
name?: string;
|
||||||
|
arguments?: string;
|
||||||
|
};
|
||||||
|
}): Message =>
|
||||||
|
({
|
||||||
|
id: "test-msg-id",
|
||||||
|
date: new Date().toISOString(),
|
||||||
|
message_type: "approval_request_message",
|
||||||
|
...overrides,
|
||||||
|
}) as unknown as Message;
|
||||||
|
|
||||||
|
test("extracts single tool call from tool_calls array", () => {
|
||||||
|
const msg = createMessage({
|
||||||
|
tool_calls: [
|
||||||
|
{
|
||||||
|
tool_call_id: "call-1",
|
||||||
|
name: "Bash",
|
||||||
|
arguments: '{"command": "echo hello"}',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = extractApprovals(msg);
|
||||||
|
|
||||||
|
expect(result.pendingApprovals).toHaveLength(1);
|
||||||
|
expect(result.pendingApprovals[0]!.toolCallId).toBe("call-1");
|
||||||
|
expect(result.pendingApprovals[0]!.toolName).toBe("Bash");
|
||||||
|
expect(result.pendingApproval?.toolCallId).toBe("call-1");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("extracts multiple parallel tool calls", () => {
|
||||||
|
const msg = createMessage({
|
||||||
|
tool_calls: [
|
||||||
|
{
|
||||||
|
tool_call_id: "call-1",
|
||||||
|
name: "Bash",
|
||||||
|
arguments: '{"command": "echo hello"}',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
tool_call_id: "call-2",
|
||||||
|
name: "web_search",
|
||||||
|
arguments: '{"query": "test"}',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
tool_call_id: "call-3",
|
||||||
|
name: "Read",
|
||||||
|
arguments: '{"file_path": "/tmp/test.txt"}',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = extractApprovals(msg);
|
||||||
|
|
||||||
|
expect(result.pendingApprovals).toHaveLength(3);
|
||||||
|
expect(result.pendingApprovals[0]!.toolCallId).toBe("call-1");
|
||||||
|
expect(result.pendingApprovals[0]!.toolName).toBe("Bash");
|
||||||
|
expect(result.pendingApprovals[1]!.toolCallId).toBe("call-2");
|
||||||
|
expect(result.pendingApprovals[1]!.toolName).toBe("web_search");
|
||||||
|
expect(result.pendingApprovals[2]!.toolCallId).toBe("call-3");
|
||||||
|
expect(result.pendingApprovals[2]!.toolName).toBe("Read");
|
||||||
|
// pendingApproval is deprecated, should be first item
|
||||||
|
expect(result.pendingApproval?.toolCallId).toBe("call-1");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("handles deprecated single tool_call field", () => {
|
||||||
|
const msg = createMessage({
|
||||||
|
tool_call: {
|
||||||
|
tool_call_id: "call-legacy",
|
||||||
|
name: "Write",
|
||||||
|
arguments: '{"file_path": "/tmp/out.txt"}',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = extractApprovals(msg);
|
||||||
|
|
||||||
|
expect(result.pendingApprovals).toHaveLength(1);
|
||||||
|
expect(result.pendingApprovals[0]!.toolCallId).toBe("call-legacy");
|
||||||
|
expect(result.pendingApprovals[0]!.toolName).toBe("Write");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("prefers tool_calls array over deprecated tool_call", () => {
|
||||||
|
const msg = createMessage({
|
||||||
|
tool_calls: [{ tool_call_id: "call-new", name: "Bash", arguments: "{}" }],
|
||||||
|
tool_call: {
|
||||||
|
tool_call_id: "call-old",
|
||||||
|
name: "Write",
|
||||||
|
arguments: "{}",
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = extractApprovals(msg);
|
||||||
|
|
||||||
|
// Should use tool_calls, not tool_call
|
||||||
|
expect(result.pendingApprovals).toHaveLength(1);
|
||||||
|
expect(result.pendingApprovals[0]!.toolCallId).toBe("call-new");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("filters out tool calls without tool_call_id", () => {
|
||||||
|
const msg = createMessage({
|
||||||
|
tool_calls: [
|
||||||
|
{ tool_call_id: "call-valid", name: "Bash", arguments: "{}" },
|
||||||
|
{ name: "Invalid", arguments: "{}" }, // Missing tool_call_id
|
||||||
|
{ tool_call_id: "", name: "Empty", arguments: "{}" }, // Empty tool_call_id
|
||||||
|
{ tool_call_id: "call-valid-2", name: "Read", arguments: "{}" },
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = extractApprovals(msg);
|
||||||
|
|
||||||
|
// Should only include entries with valid tool_call_id
|
||||||
|
expect(result.pendingApprovals).toHaveLength(2);
|
||||||
|
expect(result.pendingApprovals[0]!.toolCallId).toBe("call-valid");
|
||||||
|
expect(result.pendingApprovals[1]!.toolCallId).toBe("call-valid-2");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("returns empty array when no tool calls present", () => {
|
||||||
|
const msg = createMessage({});
|
||||||
|
|
||||||
|
const result = extractApprovals(msg);
|
||||||
|
|
||||||
|
expect(result.pendingApprovals).toHaveLength(0);
|
||||||
|
expect(result.pendingApproval).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
test("handles missing name and arguments gracefully", () => {
|
||||||
|
const msg = createMessage({
|
||||||
|
tool_calls: [{ tool_call_id: "call-minimal" }],
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = extractApprovals(msg);
|
||||||
|
|
||||||
|
expect(result.pendingApprovals).toHaveLength(1);
|
||||||
|
expect(result.pendingApprovals[0]!.toolCallId).toBe("call-minimal");
|
||||||
|
expect(result.pendingApprovals[0]!.toolName).toBe("");
|
||||||
|
expect(result.pendingApprovals[0]!.toolArgs).toBe("");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Note: Full integration testing of lazy approval recovery requires:
|
||||||
|
* 1. Starting CLI without --yolo
|
||||||
|
* 2. Sending a prompt that triggers a tool call requiring approval
|
||||||
|
* 3. Instead of approving, sending another user message
|
||||||
|
* 4. Verifying the CONFLICT error is detected and recovery happens
|
||||||
|
*
|
||||||
|
* This is complex to automate reliably in unit tests.
|
||||||
|
* Manual testing or a dedicated integration test suite is recommended.
|
||||||
|
*/
|
||||||
256
src/tests/lazy-approval-recovery.test.ts
Normal file
256
src/tests/lazy-approval-recovery.test.ts
Normal file
@@ -0,0 +1,256 @@
|
|||||||
|
import { describe, expect, test } from "bun:test";
|
||||||
|
import { spawn } from "node:child_process";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Integration test for lazy approval recovery (LET-7101).
|
||||||
|
*
|
||||||
|
* NOTE: The lazy approval recovery is primarily designed for TUI mode where:
|
||||||
|
* 1. User has a session with pending approvals (e.g., from a previous run)
|
||||||
|
* 2. User sends a new message before responding to the approval
|
||||||
|
* 3. Server returns CONFLICT error
|
||||||
|
* 4. CLI recovers by auto-denying stale approvals and retrying
|
||||||
|
*
|
||||||
|
* In bidirectional mode, messages sent during permission wait are dropped
|
||||||
|
* (see headless.ts line 1710-1714), so we can't directly test the CONFLICT
|
||||||
|
* scenario here. This test validates that the flow doesn't crash when
|
||||||
|
* messages are sent while approvals are pending.
|
||||||
|
*
|
||||||
|
* The RecoveryMessage emission can be tested by:
|
||||||
|
* 1. Manual testing in TUI mode (start session with orphaned approval)
|
||||||
|
* 2. Or by modifying headless mode to not drop messages during permission wait
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Prompt that will trigger a Bash tool call requiring approval
|
||||||
|
const BASH_TRIGGER_PROMPT =
|
||||||
|
"Run this exact bash command: echo test123. Do not use any other tools.";
|
||||||
|
|
||||||
|
// Second message to send while approval is pending
|
||||||
|
const INTERRUPT_MESSAGE =
|
||||||
|
"Actually, just say OK instead. Do not call any tools.";
|
||||||
|
|
||||||
|
interface StreamMessage {
|
||||||
|
type: string;
|
||||||
|
subtype?: string;
|
||||||
|
message_type?: string;
|
||||||
|
stop_reason?: string;
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
|
[key: string]: any;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run bidirectional test with custom message handling.
|
||||||
|
* Allows sending messages at specific points in the flow.
|
||||||
|
*/
|
||||||
|
async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{
|
||||||
|
messages: StreamMessage[];
|
||||||
|
success: boolean;
|
||||||
|
errorSeen: boolean;
|
||||||
|
}> {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const proc = spawn(
|
||||||
|
"bun",
|
||||||
|
[
|
||||||
|
"run",
|
||||||
|
"dev",
|
||||||
|
"-p",
|
||||||
|
"--input-format",
|
||||||
|
"stream-json",
|
||||||
|
"--output-format",
|
||||||
|
"stream-json",
|
||||||
|
"--new-agent",
|
||||||
|
"-m",
|
||||||
|
"haiku",
|
||||||
|
// NOTE: No --yolo flag - approvals are required
|
||||||
|
],
|
||||||
|
{
|
||||||
|
cwd: process.cwd(),
|
||||||
|
env: { ...process.env },
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
const messages: StreamMessage[] = [];
|
||||||
|
let buffer = "";
|
||||||
|
let initReceived = false;
|
||||||
|
let approvalSeen = false;
|
||||||
|
let interruptSent = false;
|
||||||
|
let errorSeen = false;
|
||||||
|
let resultCount = 0;
|
||||||
|
let closing = false;
|
||||||
|
|
||||||
|
const timeout = setTimeout(() => {
|
||||||
|
if (!closing) {
|
||||||
|
proc.kill();
|
||||||
|
reject(new Error(`Test timed out after ${timeoutMs}ms`));
|
||||||
|
}
|
||||||
|
}, timeoutMs);
|
||||||
|
|
||||||
|
const cleanup = () => {
|
||||||
|
closing = true;
|
||||||
|
clearTimeout(timeout);
|
||||||
|
setTimeout(() => {
|
||||||
|
proc.stdin?.end();
|
||||||
|
proc.kill();
|
||||||
|
}, 500);
|
||||||
|
};
|
||||||
|
|
||||||
|
const processLine = (line: string) => {
|
||||||
|
if (!line.trim()) return;
|
||||||
|
try {
|
||||||
|
const msg: StreamMessage = JSON.parse(line);
|
||||||
|
messages.push(msg);
|
||||||
|
|
||||||
|
// Debug output
|
||||||
|
if (process.env.DEBUG_TEST) {
|
||||||
|
console.log("MSG:", JSON.stringify(msg, null, 2));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 1: Wait for init, then send bash trigger prompt
|
||||||
|
if (msg.type === "system" && msg.subtype === "init" && !initReceived) {
|
||||||
|
initReceived = true;
|
||||||
|
const userMsg = JSON.stringify({
|
||||||
|
type: "user",
|
||||||
|
message: { role: "user", content: BASH_TRIGGER_PROMPT },
|
||||||
|
});
|
||||||
|
proc.stdin?.write(`${userMsg}\n`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 2: When we see approval request, send another user message instead
|
||||||
|
if (
|
||||||
|
msg.type === "message" &&
|
||||||
|
msg.message_type === "approval_request_message" &&
|
||||||
|
!approvalSeen
|
||||||
|
) {
|
||||||
|
approvalSeen = true;
|
||||||
|
// Wait a moment, then send interrupt message (NOT an approval)
|
||||||
|
setTimeout(() => {
|
||||||
|
if (!interruptSent) {
|
||||||
|
interruptSent = true;
|
||||||
|
const userMsg = JSON.stringify({
|
||||||
|
type: "user",
|
||||||
|
message: { role: "user", content: INTERRUPT_MESSAGE },
|
||||||
|
});
|
||||||
|
proc.stdin?.write(`${userMsg}\n`);
|
||||||
|
}
|
||||||
|
}, 500);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Track recovery messages - this is the key signal that lazy recovery worked
|
||||||
|
if (
|
||||||
|
msg.type === "recovery" &&
|
||||||
|
msg.recovery_type === "approval_pending"
|
||||||
|
) {
|
||||||
|
errorSeen = true; // reusing this flag to mean "recovery message seen"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Also track raw errors (shouldn't see these if recovery works properly)
|
||||||
|
if (
|
||||||
|
msg.type === "error" ||
|
||||||
|
(msg.type === "message" && msg.message_type === "error_message")
|
||||||
|
) {
|
||||||
|
const detail = msg.detail || msg.message || "";
|
||||||
|
if (detail.toLowerCase().includes("cannot send a new message")) {
|
||||||
|
// Raw error leaked through - recovery may have failed
|
||||||
|
console.log(
|
||||||
|
"WARNING: Raw CONFLICT error seen (recovery may have failed)",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Track results - we need 2 (one for each user message, though first may fail)
|
||||||
|
if (msg.type === "result") {
|
||||||
|
resultCount++;
|
||||||
|
// After second result (or after seeing error + result), we're done
|
||||||
|
if (resultCount >= 2 || (errorSeen && resultCount >= 1)) {
|
||||||
|
cleanup();
|
||||||
|
resolve({ messages, success: true, errorSeen });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Not valid JSON, ignore
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
proc.stdout?.on("data", (data) => {
|
||||||
|
buffer += data.toString();
|
||||||
|
const lines = buffer.split("\n");
|
||||||
|
buffer = lines.pop() || "";
|
||||||
|
for (const line of lines) {
|
||||||
|
processLine(line);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let _stderr = "";
|
||||||
|
proc.stderr?.on("data", (data) => {
|
||||||
|
_stderr += data.toString();
|
||||||
|
});
|
||||||
|
|
||||||
|
proc.on("close", (_code) => {
|
||||||
|
clearTimeout(timeout);
|
||||||
|
// Process any remaining buffer
|
||||||
|
if (buffer.trim()) {
|
||||||
|
processLine(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!closing) {
|
||||||
|
// If we got here without resolving, check what we have
|
||||||
|
resolve({
|
||||||
|
messages,
|
||||||
|
success: resultCount > 0,
|
||||||
|
errorSeen,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
proc.on("error", (err) => {
|
||||||
|
clearTimeout(timeout);
|
||||||
|
reject(err);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
describe("lazy approval recovery", () => {
|
||||||
|
test("handles concurrent message while approval is pending", async () => {
|
||||||
|
const result = await runLazyRecoveryTest();
|
||||||
|
|
||||||
|
// Log messages for debugging if test fails
|
||||||
|
if (!result.success) {
|
||||||
|
console.log("All messages received:");
|
||||||
|
for (const msg of result.messages) {
|
||||||
|
console.log(JSON.stringify(msg, null, 2));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We should have seen the approval request (proves tool requiring approval was called)
|
||||||
|
const approvalRequest = result.messages.find(
|
||||||
|
(m) => m.message_type === "approval_request_message",
|
||||||
|
);
|
||||||
|
expect(approvalRequest).toBeDefined();
|
||||||
|
|
||||||
|
// The test should complete successfully
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
|
||||||
|
// Count results - we should get at least 1 (the second message should always complete)
|
||||||
|
const resultCount = result.messages.filter(
|
||||||
|
(m) => m.type === "result",
|
||||||
|
).length;
|
||||||
|
expect(resultCount).toBeGreaterThanOrEqual(1);
|
||||||
|
|
||||||
|
// KEY ASSERTION: Check if we saw the recovery message
|
||||||
|
// This proves the lazy recovery mechanism was triggered
|
||||||
|
const recoveryMessage = result.messages.find(
|
||||||
|
(m) => m.type === "recovery" && m.recovery_type === "approval_pending",
|
||||||
|
);
|
||||||
|
if (recoveryMessage) {
|
||||||
|
console.log("Recovery message detected - lazy recovery worked correctly");
|
||||||
|
expect(result.errorSeen).toBe(true); // Should have been set when we saw recovery
|
||||||
|
} else {
|
||||||
|
// Recovery might not be triggered if approval was auto-handled before second message
|
||||||
|
// This can happen due to timing - the test still validates the flow works
|
||||||
|
console.log(
|
||||||
|
"Note: No recovery message seen - approval may have been handled before conflict",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}, 180000); // 3 minute timeout for CI
|
||||||
|
});
|
||||||
@@ -149,7 +149,7 @@ async function readSkillContent(
|
|||||||
try {
|
try {
|
||||||
const content = await readFile(projectSkillPath, "utf-8");
|
const content = await readFile(projectSkillPath, "utf-8");
|
||||||
return { content, path: projectSkillPath };
|
return { content, path: projectSkillPath };
|
||||||
} catch (primaryError) {
|
} catch {
|
||||||
// Fallback: check for bundled skills in a repo-level skills directory (legacy)
|
// Fallback: check for bundled skills in a repo-level skills directory (legacy)
|
||||||
try {
|
try {
|
||||||
const bundledSkillsDir = join(process.cwd(), "skills", "skills");
|
const bundledSkillsDir = join(process.cwd(), "skills", "skills");
|
||||||
@@ -157,8 +157,11 @@ async function readSkillContent(
|
|||||||
const content = await readFile(bundledSkillPath, "utf-8");
|
const content = await readFile(bundledSkillPath, "utf-8");
|
||||||
return { content, path: bundledSkillPath };
|
return { content, path: bundledSkillPath };
|
||||||
} catch {
|
} catch {
|
||||||
// If all fallbacks fail, rethrow the original error
|
// If all fallbacks fail, throw a helpful error message (LET-7101)
|
||||||
throw primaryError;
|
// Suggest refresh in case skills sync is still running in background
|
||||||
|
throw new Error(
|
||||||
|
`Skill "${skillId}" not found. If you recently added this skill, try Skill({ command: "refresh" }) to re-scan the skills directory.`,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -198,6 +198,19 @@ export interface RetryMessage extends MessageEnvelope {
|
|||||||
run_id?: string;
|
run_id?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Recovery message emitted when the CLI detects and recovers from errors.
|
||||||
|
* Used for approval state conflicts and other recoverable errors.
|
||||||
|
*/
|
||||||
|
export interface RecoveryMessage extends MessageEnvelope {
|
||||||
|
type: "recovery";
|
||||||
|
/** Type of recovery performed */
|
||||||
|
recovery_type: "approval_pending" | "approval_desync";
|
||||||
|
/** Human-readable description of what happened */
|
||||||
|
message: string;
|
||||||
|
run_id?: string;
|
||||||
|
}
|
||||||
|
|
||||||
// ═══════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════
|
||||||
// RESULT
|
// RESULT
|
||||||
// ═══════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════
|
||||||
@@ -329,6 +342,7 @@ export type WireMessage =
|
|||||||
| AutoApprovalMessage
|
| AutoApprovalMessage
|
||||||
| ErrorMessage
|
| ErrorMessage
|
||||||
| RetryMessage
|
| RetryMessage
|
||||||
|
| RecoveryMessage
|
||||||
| ResultMessage
|
| ResultMessage
|
||||||
| ControlResponse
|
| ControlResponse
|
||||||
| ControlRequest; // CLI → SDK control requests (e.g., can_use_tool)
|
| ControlRequest; // CLI → SDK control requests (e.g., can_use_tool)
|
||||||
|
|||||||
@@ -34,6 +34,112 @@ export function logTiming(message: string): void {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Milestone tracking for latency audits
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
// Store milestones with their timestamps (ms since process start via performance.now())
|
||||||
|
const milestones: Map<string, number> = new Map();
|
||||||
|
|
||||||
|
// Reference time for relative measurements (set on first milestone)
|
||||||
|
let firstMilestoneTime: number | null = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mark a named milestone in the boot/execution sequence.
|
||||||
|
* Call this at key points to track where time is spent.
|
||||||
|
*
|
||||||
|
* @param name - Descriptive name like "SETTINGS_LOADED" or "AGENT_RESOLVED"
|
||||||
|
*/
|
||||||
|
export function markMilestone(name: string): void {
|
||||||
|
const now = performance.now();
|
||||||
|
milestones.set(name, now);
|
||||||
|
|
||||||
|
if (firstMilestoneTime === null) {
|
||||||
|
firstMilestoneTime = now;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isTimingsEnabled()) {
|
||||||
|
const relative = now - firstMilestoneTime;
|
||||||
|
console.error(
|
||||||
|
`[timing] MILESTONE ${name} at +${formatDuration(relative)} (${formatTimestamp(new Date())})`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Measure time elapsed since a previous milestone.
|
||||||
|
*
|
||||||
|
* @param label - Description of what we're measuring (e.g., "tool loading")
|
||||||
|
* @param fromMilestone - Name of the starting milestone
|
||||||
|
*/
|
||||||
|
export function measureSinceMilestone(
|
||||||
|
label: string,
|
||||||
|
fromMilestone: string,
|
||||||
|
): void {
|
||||||
|
if (!isTimingsEnabled()) return;
|
||||||
|
|
||||||
|
const startTime = milestones.get(fromMilestone);
|
||||||
|
if (startTime === undefined) {
|
||||||
|
console.error(
|
||||||
|
`[timing] WARNING: milestone "${fromMilestone}" not found for measurement "${label}"`,
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const duration = performance.now() - startTime;
|
||||||
|
console.error(`[timing] ${label}: ${formatDuration(duration)}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the duration between two milestones in milliseconds.
|
||||||
|
* Returns null if either milestone doesn't exist.
|
||||||
|
*/
|
||||||
|
export function getMilestoneDuration(
|
||||||
|
fromMilestone: string,
|
||||||
|
toMilestone: string,
|
||||||
|
): number | null {
|
||||||
|
const start = milestones.get(fromMilestone);
|
||||||
|
const end = milestones.get(toMilestone);
|
||||||
|
if (start === undefined || end === undefined) return null;
|
||||||
|
return end - start;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Print a summary of all milestones with relative timestamps.
|
||||||
|
* Useful at the end of a benchmark run.
|
||||||
|
*/
|
||||||
|
export function reportAllMilestones(): void {
|
||||||
|
if (!isTimingsEnabled() || milestones.size === 0) return;
|
||||||
|
|
||||||
|
const first = firstMilestoneTime ?? 0;
|
||||||
|
|
||||||
|
console.error(`[timing] ======== MILESTONE SUMMARY ========`);
|
||||||
|
|
||||||
|
// Sort by timestamp
|
||||||
|
const sorted = [...milestones.entries()].sort((a, b) => a[1] - b[1]);
|
||||||
|
|
||||||
|
let prevTime = first;
|
||||||
|
for (const [name, time] of sorted) {
|
||||||
|
const relativeToStart = time - first;
|
||||||
|
const delta = time - prevTime;
|
||||||
|
const deltaStr = prevTime === first ? "" : ` (+${formatDuration(delta)})`;
|
||||||
|
console.error(
|
||||||
|
`[timing] +${formatDuration(relativeToStart).padStart(8)} ${name}${deltaStr}`,
|
||||||
|
);
|
||||||
|
prevTime = time;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.error(`[timing] =====================================`);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clear all milestones (useful for running multiple benchmarks in sequence).
|
||||||
|
*/
|
||||||
|
export function clearMilestones(): void {
|
||||||
|
milestones.clear();
|
||||||
|
firstMilestoneTime = null;
|
||||||
|
}
|
||||||
|
|
||||||
// Simple fetch type that matches the SDK's expected signature
|
// Simple fetch type that matches the SDK's expected signature
|
||||||
type SimpleFetch = (
|
type SimpleFetch = (
|
||||||
input: string | URL | Request,
|
input: string | URL | Request,
|
||||||
|
|||||||
Reference in New Issue
Block a user