feat: reduce time-to-boot, remove default eager approval checks on inputs, auto-cancel stale approvals (#579)

Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
Charles Packer
2026-01-17 16:19:30 -08:00
committed by GitHub
parent f4eb921af7
commit 5f5c0df18e
13 changed files with 1376 additions and 93 deletions

View File

@@ -0,0 +1,341 @@
#!/usr/bin/env bun
/**
* Latency Benchmark Script for Letta Code CLI
*
* Runs headless mode with LETTA_DEBUG_TIMINGS=1 and parses the output
* to measure latency breakdown at different stages.
*
* Usage:
* bun scripts/latency-benchmark.ts
* bun scripts/latency-benchmark.ts --scenario fresh-agent
* bun scripts/latency-benchmark.ts --iterations 5
*
* Requires: LETTA_API_KEY environment variable
*/
import { spawn } from "node:child_process";
interface ApiCall {
method: string;
path: string;
durationMs: number;
status?: number;
}
interface Milestone {
name: string;
offsetMs: number;
}
interface BenchmarkResult {
scenario: string;
totalMs: number;
milestones: Milestone[];
apiCalls: ApiCall[];
exitCode: number;
}
interface ScenarioConfig {
name: string;
description: string;
args: string[];
}
// Define benchmark scenarios
const SCENARIOS: ScenarioConfig[] = [
{
name: "fresh-agent",
description: "Create new agent and send simple prompt",
args: [
"-p",
"What is 2+2? Reply with just the number.",
"--new-agent",
"--yolo",
"--output-format",
"json",
],
},
{
name: "resume-agent",
description: "Resume last agent and send simple prompt",
args: [
"-p",
"What is 3+3? Reply with just the number.",
"--continue",
"--yolo",
"--output-format",
"json",
],
},
{
name: "minimal-math",
description: "Simple math question (no tool calls)",
args: [
"-p",
"What is 5+5? Reply with just the number.",
"--continue",
"--yolo",
"--output-format",
"json",
],
},
];
/**
* Parse timing logs from stderr output
*/
function parseTimingLogs(stderr: string): {
milestones: Milestone[];
apiCalls: ApiCall[];
} {
const milestones: Milestone[] = [];
const apiCalls: ApiCall[] = [];
const lines = stderr.split("\n");
for (const line of lines) {
// Parse milestones: [timing] MILESTONE CLI_START at +0ms (12:34:56.789)
const milestoneMatch = line.match(
/\[timing\] MILESTONE (\S+) at \+(\d+(?:\.\d+)?)(ms|s)/,
);
if (milestoneMatch) {
const name = milestoneMatch[1]!;
let offsetMs = parseFloat(milestoneMatch[2]!);
if (milestoneMatch[3] === "s") {
offsetMs *= 1000;
}
milestones.push({ name, offsetMs });
continue;
}
// Parse API calls: [timing] GET /v1/agents/... -> 245ms (status: 200)
const apiMatch = line.match(
/\[timing\] (GET|POST|PUT|DELETE|PATCH) (\S+) -> (\d+(?:\.\d+)?)(ms|s)(?: \(status: (\d+)\))?/,
);
if (apiMatch) {
const method = apiMatch[1]!;
const path = apiMatch[2]!;
let durationMs = parseFloat(apiMatch[3]!);
if (apiMatch[4] === "s") {
durationMs *= 1000;
}
const status = apiMatch[5] ? parseInt(apiMatch[5], 10) : undefined;
apiCalls.push({ method, path, durationMs, status });
}
}
return { milestones, apiCalls };
}
/**
* Run a single benchmark scenario
*/
async function runBenchmark(scenario: ScenarioConfig): Promise<BenchmarkResult> {
const start = performance.now();
return new Promise((resolve) => {
const proc = spawn("bun", ["run", "dev", ...scenario.args], {
env: { ...process.env, LETTA_DEBUG_TIMINGS: "1" },
stdio: ["pipe", "pipe", "pipe"],
});
let stdout = "";
let stderr = "";
proc.stdout.on("data", (data) => {
stdout += data.toString();
});
proc.stderr.on("data", (data) => {
stderr += data.toString();
});
proc.on("close", (code) => {
const totalMs = performance.now() - start;
const { milestones, apiCalls } = parseTimingLogs(stderr);
resolve({
scenario: scenario.name,
totalMs,
milestones,
apiCalls,
exitCode: code ?? 1,
});
});
// Timeout after 2 minutes
setTimeout(() => {
proc.kill("SIGTERM");
}, 120000);
});
}
/**
* Format duration for display
*/
function formatMs(ms: number): string {
if (ms < 1000) return `${Math.round(ms)}ms`;
return `${(ms / 1000).toFixed(2)}s`;
}
/**
* Print benchmark results
*/
function printResults(results: BenchmarkResult[]): void {
console.log("\n" + "=".repeat(70));
console.log("LATENCY BENCHMARK RESULTS");
console.log("=".repeat(70) + "\n");
for (const result of results) {
const scenario = SCENARIOS.find((s) => s.name === result.scenario);
console.log(`Scenario: ${result.scenario}`);
console.log(` ${scenario?.description || ""}`);
console.log(` Exit code: ${result.exitCode}`);
console.log(` Total wall time: ${formatMs(result.totalMs)}`);
console.log("");
// Print milestones
if (result.milestones.length > 0) {
console.log(" Milestones:");
let prevMs = 0;
for (const milestone of result.milestones) {
const delta = milestone.offsetMs - prevMs;
const deltaStr = prevMs === 0 ? "" : ` (+${formatMs(delta)})`;
console.log(
` +${formatMs(milestone.offsetMs).padStart(8)} ${milestone.name}${deltaStr}`,
);
prevMs = milestone.offsetMs;
}
console.log("");
}
// Print API calls summary
if (result.apiCalls.length > 0) {
console.log(" API Calls:");
const totalApiMs = result.apiCalls.reduce((sum, c) => sum + c.durationMs, 0);
// Group by path pattern
const grouped: Record<string, { count: number; totalMs: number }> = {};
for (const call of result.apiCalls) {
// Normalize paths (remove UUIDs)
const normalizedPath = call.path.replace(
/[a-f0-9-]{36}/g,
"{id}",
);
const key = `${call.method} ${normalizedPath}`;
if (!grouped[key]) {
grouped[key] = { count: 0, totalMs: 0 };
}
grouped[key].count++;
grouped[key].totalMs += call.durationMs;
}
// Sort by total time
const sorted = Object.entries(grouped).sort(
(a, b) => b[1].totalMs - a[1].totalMs,
);
for (const [endpoint, stats] of sorted) {
const countStr = stats.count > 1 ? ` (x${stats.count})` : "";
console.log(
` ${formatMs(stats.totalMs).padStart(8)} ${endpoint}${countStr}`,
);
}
console.log(` ${"─".repeat(50)}`);
console.log(` ${formatMs(totalApiMs).padStart(8)} Total API time`);
console.log(
` ${formatMs(result.totalMs - totalApiMs).padStart(8)} CLI overhead (non-API)`,
);
}
console.log("\n" + "-".repeat(70) + "\n");
}
// Summary table
console.log("SUMMARY");
console.log("-".repeat(70));
console.log(
"Scenario".padEnd(20) +
"Total".padStart(12) +
"API Time".padStart(12) +
"CLI Overhead".padStart(14),
);
console.log("-".repeat(70));
for (const result of results) {
const totalApiMs = result.apiCalls.reduce((sum, c) => sum + c.durationMs, 0);
const cliOverhead = result.totalMs - totalApiMs;
console.log(
result.scenario.padEnd(20) +
formatMs(result.totalMs).padStart(12) +
formatMs(totalApiMs).padStart(12) +
formatMs(cliOverhead).padStart(14),
);
}
console.log("-".repeat(70));
}
async function main(): Promise<void> {
// Parse args
const args = process.argv.slice(2);
let scenarioFilter: string | null = null;
let iterations = 1;
for (let i = 0; i < args.length; i++) {
if (args[i] === "--scenario" && args[i + 1]) {
scenarioFilter = args[++i]!;
} else if (args[i] === "--iterations" && args[i + 1]) {
iterations = parseInt(args[++i]!, 10);
}
}
// Check prereqs
if (!process.env.LETTA_API_KEY) {
console.error("Error: LETTA_API_KEY environment variable is required");
process.exit(1);
}
// Filter scenarios
const scenariosToRun = scenarioFilter
? SCENARIOS.filter((s) => s.name === scenarioFilter)
: SCENARIOS;
if (scenariosToRun.length === 0) {
console.error(`Error: Unknown scenario "${scenarioFilter}"`);
console.error(`Available scenarios: ${SCENARIOS.map((s) => s.name).join(", ")}`);
process.exit(1);
}
console.log("Running latency benchmarks...");
console.log(`Scenarios: ${scenariosToRun.map((s) => s.name).join(", ")}`);
console.log(`Iterations: ${iterations}`);
console.log("");
const allResults: BenchmarkResult[] = [];
for (let iter = 0; iter < iterations; iter++) {
if (iterations > 1) {
console.log(`\n--- Iteration ${iter + 1} of ${iterations} ---`);
}
for (const scenario of scenariosToRun) {
console.log(`Running: ${scenario.name}...`);
const result = await runBenchmark(scenario);
allResults.push(result);
if (result.exitCode !== 0) {
console.warn(` Warning: ${scenario.name} exited with code ${result.exitCode}`);
} else {
console.log(` Completed in ${formatMs(result.totalMs)}`);
}
}
}
printResults(allResults);
}
main().catch((err) => {
console.error(err);
process.exit(1);
});

View File

@@ -2,9 +2,14 @@ import type { MessageCreate } from "@letta-ai/letta-client/resources/agents/agen
import { getClient } from "./client";
import { APPROVAL_RECOVERY_PROMPT } from "./promptAssets";
// Error when trying to SEND approval but server has no pending approval
const APPROVAL_RECOVERY_DETAIL_FRAGMENT =
"no tool call is currently awaiting approval";
// Error when trying to SEND message but server has pending approval waiting
// This is the CONFLICT error - opposite of desync
const APPROVAL_PENDING_DETAIL_FRAGMENT = "cannot send a new message";
type RunErrorMetadata =
| {
error_type?: string;
@@ -20,6 +25,19 @@ export function isApprovalStateDesyncError(detail: unknown): boolean {
return detail.toLowerCase().includes(APPROVAL_RECOVERY_DETAIL_FRAGMENT);
}
/**
* Check if error indicates there's a pending approval blocking new messages.
* This is the CONFLICT error from the backend when trying to send a user message
* while the agent is waiting for approval on a tool call.
*
* Error format:
* { detail: "CONFLICT: Cannot send a new message: The agent is waiting for approval..." }
*/
export function isApprovalPendingError(detail: unknown): boolean {
if (typeof detail !== "string") return false;
return detail.toLowerCase().includes(APPROVAL_PENDING_DETAIL_FRAGMENT);
}
export async function fetchRunErrorDetail(
runId: string | null | undefined,
): Promise<string | null> {

View File

@@ -29,8 +29,9 @@ export interface ResumeData {
/**
* Extract approval requests from an approval_request_message.
* Exported for testing parallel tool call handling.
*/
function extractApprovals(messageToCheck: Message): {
export function extractApprovals(messageToCheck: Message): {
pendingApproval: ApprovalRequest | null;
pendingApprovals: ApprovalRequest[];
} {

View File

@@ -271,26 +271,14 @@ export async function createAgent(
// Track provenance: which blocks were created
// Note: We no longer reuse shared blocks - each agent gets fresh blocks
const blockProvenance: BlockProvenance[] = [];
const blockIds: string[] = [];
// Create all blocks fresh for the new agent
// Mark new blocks for provenance tracking (actual creation happens in agents.create)
for (const block of filteredMemoryBlocks) {
try {
const createdBlock = await client.blocks.create(block);
if (!createdBlock.id) {
throw new Error(`Created block ${block.label} has no ID`);
}
blockIds.push(createdBlock.id);
blockProvenance.push({ label: block.label, source: "new" });
} catch (error) {
console.error(`Failed to create block ${block.label}:`, error);
throw error;
}
blockProvenance.push({ label: block.label, source: "new" });
}
// Add any referenced block IDs (existing blocks to attach)
// Mark referenced blocks for provenance tracking
for (const blockId of referencedBlockIds) {
blockIds.push(blockId);
blockProvenance.push({ label: blockId, source: "shared" });
}
@@ -314,7 +302,9 @@ export async function createAgent(
systemPromptContent = `${systemPromptContent}\n\n${options.systemPromptAppend}`;
}
// Create agent with all block IDs (existing + newly created)
// Create agent with inline memory blocks (LET-7101: single API call instead of N+1)
// - memory_blocks: new blocks to create inline
// - block_ids: references to existing blocks (for shared memory)
const tags = ["origin:letta-code"];
if (process.env.LETTA_CODE_AGENT_ROLE === "subagent") {
tags.push("role:subagent");
@@ -332,7 +322,11 @@ export async function createAgent(
model: modelHandle,
context_window_limit: contextWindow,
tools: toolNames,
block_ids: blockIds,
// New blocks created inline with agent (saves ~2s of sequential API calls)
memory_blocks:
filteredMemoryBlocks.length > 0 ? filteredMemoryBlocks : undefined,
// Referenced block IDs (existing blocks to attach)
block_ids: referencedBlockIds.length > 0 ? referencedBlockIds : undefined,
tags,
// should be default off, but just in case
include_base_tools: false,

View File

@@ -486,3 +486,125 @@ export function formatSkillsForMemory(
// Otherwise fall back to compact tree format
return formatSkillsAsTree(skills, skillsDirectory);
}
// ============================================================================
// Skills Sync with Hash-Based Caching (Phase 2.5 - LET-7101)
// ============================================================================
import { createHash } from "node:crypto";
import { mkdir, writeFile } from "node:fs/promises";
/**
* Get the project-local skills hash file path.
* Uses .letta/skills-hash.json in the current working directory
* because the skills block content depends on the project's .skills/ folder.
*/
function getSkillsHashFilePath(): string {
return join(process.cwd(), ".letta", "skills-hash.json");
}
interface SkillsHashCache {
hash: string;
timestamp: string;
}
/**
* Compute a hash of the formatted skills content
*/
function computeSkillsHash(content: string): string {
return createHash("sha256").update(content).digest("hex").slice(0, 16);
}
/**
* Get the cached skills hash (if any)
*/
async function getCachedSkillsHash(): Promise<string | null> {
try {
const hashFile = getSkillsHashFilePath();
const data = await readFile(hashFile, "utf-8");
const cache: SkillsHashCache = JSON.parse(data);
return cache.hash;
} catch {
return null;
}
}
/**
* Set the cached skills hash
*/
async function setCachedSkillsHash(hash: string): Promise<void> {
try {
const hashFile = getSkillsHashFilePath();
// Ensure project .letta directory exists
const lettaDir = join(process.cwd(), ".letta");
await mkdir(lettaDir, { recursive: true });
const cache: SkillsHashCache = {
hash,
timestamp: new Date().toISOString(),
};
await writeFile(hashFile, JSON.stringify(cache, null, 2));
} catch {
// Ignore cache write failures - not critical
}
}
/**
* Sync skills to an agent's memory block.
* Discovers skills from filesystem and updates the skills block.
*
* @param client - Letta client
* @param agentId - Agent ID to update
* @param skillsDirectory - Path to project skills directory
* @param options - Optional settings
* @returns Object indicating if sync occurred and discovered skills
*/
export async function syncSkillsToAgent(
client: import("@letta-ai/letta-client").default,
agentId: string,
skillsDirectory: string,
options?: { skipIfUnchanged?: boolean },
): Promise<{ synced: boolean; skills: Skill[] }> {
// Discover skills from filesystem
const { skills, errors } = await discoverSkills(skillsDirectory);
if (errors.length > 0) {
for (const error of errors) {
console.warn(`[skills] Discovery error: ${error.path}: ${error.message}`);
}
}
// Format skills for memory block
const formattedSkills = formatSkillsForMemory(skills, skillsDirectory);
// Check if we can skip the update
if (options?.skipIfUnchanged) {
const newHash = computeSkillsHash(formattedSkills);
const cachedHash = await getCachedSkillsHash();
if (newHash === cachedHash) {
return { synced: false, skills };
}
// Update the block and cache the new hash
await client.agents.blocks.update("skills", {
agent_id: agentId,
value: formattedSkills,
});
await setCachedSkillsHash(newHash);
return { synced: true, skills };
}
// No skip option - always update
await client.agents.blocks.update("skills", {
agent_id: agentId,
value: formattedSkills,
});
// Update hash cache for future runs
const newHash = computeSkillsHash(formattedSkills);
await setCachedSkillsHash(newHash);
return { synced: true, skills };
}

View File

@@ -30,6 +30,7 @@ import {
import {
buildApprovalRecoveryMessage,
fetchRunErrorDetail,
isApprovalPendingError,
isApprovalStateDesyncError,
} from "../agent/approval-recovery";
import { prefetchAvailableModelHandles } from "../agent/available-models";
@@ -175,10 +176,10 @@ import { useTerminalRows, useTerminalWidth } from "./hooks/useTerminalWidth";
const CLEAR_SCREEN_AND_HOME = "\u001B[2J\u001B[H";
const MIN_RESIZE_DELTA = 2;
// Feature flag: Check for pending approvals before sending messages
// This prevents infinite thinking state when there's an orphaned approval
// Can be disabled if the latency check adds too much overhead
const CHECK_PENDING_APPROVALS_BEFORE_SEND = true;
// Eager approval checking is now CONDITIONAL (LET-7101):
// - Enabled when resuming a session (--resume, --continue, or startupApprovals exist)
// - Disabled for normal messages (lazy recovery handles edge cases)
// This saves ~2s latency per message in the common case.
// Feature flag: Eagerly cancel streams client-side when user presses ESC
// When true (default), immediately abort the stream after calling .cancel()
@@ -678,6 +679,12 @@ export default function App({
>(null);
const toolAbortControllerRef = useRef<AbortController | null>(null);
// Eager approval checking: only enabled when resuming a session (LET-7101)
// After first successful message, we disable it since any new approvals are from our own turn
const [needsEagerApprovalCheck, setNeedsEagerApprovalCheck] = useState(
() => resumedExistingConversation || startupApprovals.length > 0,
);
// Track auto-handled results to combine with user decisions
const [autoHandledResults, setAutoHandledResults] = useState<
Array<{
@@ -1941,6 +1948,12 @@ export default function App({
setStreaming(false);
llmApiErrorRetriesRef.current = 0; // Reset retry counter on success
// Disable eager approval check after first successful message (LET-7101)
// Any new approvals from here on are from our own turn, not orphaned
if (needsEagerApprovalCheck) {
setNeedsEagerApprovalCheck(false);
}
// Send desktop notification when turn completes
// and we're not about to auto-send another queued message
if (!waitingForQueueCancelRef.current) {
@@ -2552,6 +2565,112 @@ export default function App({
return;
}
// Check for approval pending error (sent user message while approval waiting)
// This is the lazy recovery path for when needsEagerApprovalCheck is false
const approvalPendingDetected =
isApprovalPendingError(detailFromRun) ||
isApprovalPendingError(latestErrorText);
if (
!hasApprovalInPayload &&
approvalPendingDetected &&
llmApiErrorRetriesRef.current < LLM_API_ERROR_MAX_RETRIES
) {
llmApiErrorRetriesRef.current += 1;
// Log for debugging (visible in transcripts)
const statusId = uid("status");
buffersRef.current.byId.set(statusId, {
kind: "status",
id: statusId,
lines: [
"[LAZY RECOVERY] Detected CONFLICT: server has pending approval",
"[LAZY RECOVERY] Fetching stale approvals to auto-deny...",
],
});
buffersRef.current.order.push(statusId);
refreshDerived();
try {
// Fetch pending approvals and auto-deny them
const client = await getClient();
const agent = await client.agents.retrieve(agentIdRef.current);
const { pendingApprovals: existingApprovals } =
await getResumeData(client, agent, conversationIdRef.current);
if (existingApprovals && existingApprovals.length > 0) {
// Update status with details
buffersRef.current.byId.set(statusId, {
kind: "status",
id: statusId,
lines: [
"[LAZY RECOVERY] Detected CONFLICT: server has pending approval",
`[LAZY RECOVERY] Found ${existingApprovals.length} stale approval(s):`,
...existingApprovals.map(
(a) =>
` - ${a.toolName} (${a.toolCallId.slice(0, 8)}...)`,
),
"[LAZY RECOVERY] Auto-denying and batching with user message...",
],
});
refreshDerived();
// Create denial results for all stale approvals
// Use the same format as handleCancelApprovals (lines 6390-6395)
const denialResults = existingApprovals.map((approval) => ({
type: "approval" as const,
tool_call_id: approval.toolCallId,
approve: false,
reason:
"Auto-denied: stale approval from interrupted session",
}));
// Prepend approval denials to the current input (keeps user message)
const approvalPayload: ApprovalCreate = {
type: "approval",
approvals: denialResults,
};
currentInput.unshift(approvalPayload);
} else {
// No approvals found - server state may have cleared
buffersRef.current.byId.set(statusId, {
kind: "status",
id: statusId,
lines: [
"[LAZY RECOVERY] Detected CONFLICT but no pending approvals found",
"[LAZY RECOVERY] Retrying message...",
],
});
refreshDerived();
}
} catch (_recoveryError) {
// If we can't fetch approvals, just retry the original message
buffersRef.current.byId.set(statusId, {
kind: "status",
id: statusId,
lines: [
"[LAZY RECOVERY] Failed to fetch pending approvals",
"[LAZY RECOVERY] Retrying message anyway...",
],
});
refreshDerived();
}
// Brief pause so user can see the status
await new Promise((resolve) => setTimeout(resolve, 500));
// Remove the transient status
buffersRef.current.byId.delete(statusId);
buffersRef.current.order = buffersRef.current.order.filter(
(id) => id !== statusId,
);
refreshDerived();
// Reset interrupted flag so retry stream chunks are processed
buffersRef.current.interrupted = false;
continue;
}
// Check if this is a retriable error (transient LLM API error)
const retriable = await isRetriableError(
stopReasonToHandle,
@@ -2757,6 +2876,7 @@ export default function App({
setStreaming,
currentModelId,
updateStreamingOutput,
needsEagerApprovalCheck,
],
);
@@ -3278,7 +3398,8 @@ export default function App({
const checkPendingApprovalsForSlashCommand = useCallback(async (): Promise<
{ blocked: true } | { blocked: false }
> => {
if (!CHECK_PENDING_APPROVALS_BEFORE_SEND) {
// Only check eagerly when resuming a session (LET-7101)
if (!needsEagerApprovalCheck) {
return { blocked: false };
}
@@ -3425,7 +3546,13 @@ export default function App({
// If check fails, proceed anyway (don't block user)
return { blocked: false };
}
}, [agentId, processConversation, refreshDerived, updateStreamingOutput]);
}, [
agentId,
processConversation,
refreshDerived,
updateStreamingOutput,
needsEagerApprovalCheck,
]);
// biome-ignore lint/correctness/useExhaustiveDependencies: refs read .current dynamically, complex callback with intentional deps
const onSubmit = useCallback(
@@ -5372,7 +5499,20 @@ DO NOT respond to these messages or otherwise consider them in your response unl
// Check for pending approvals before sending message (skip if we already have
// a queued approval response to send first).
if (CHECK_PENDING_APPROVALS_BEFORE_SEND && !queuedApprovalResults) {
// Only do eager check when resuming a session (LET-7101) - otherwise lazy recovery handles it
if (needsEagerApprovalCheck && !queuedApprovalResults) {
// Log for debugging
const eagerStatusId = uid("status");
buffersRef.current.byId.set(eagerStatusId, {
kind: "status",
id: eagerStatusId,
lines: [
"[EAGER CHECK] Checking for pending approvals (resume mode)...",
],
});
buffersRef.current.order.push(eagerStatusId);
refreshDerived();
try {
const client = await getClient();
// Fetch fresh agent state to check for pending approvals with accurate in-context messages
@@ -5383,6 +5523,12 @@ DO NOT respond to these messages or otherwise consider them in your response unl
conversationIdRef.current,
);
// Remove eager check status
buffersRef.current.byId.delete(eagerStatusId);
buffersRef.current.order = buffersRef.current.order.filter(
(id) => id !== eagerStatusId,
);
// Check if user cancelled while we were fetching approval state
if (
userCancelledRef.current ||

View File

@@ -11,6 +11,7 @@ import type { ApprovalResult } from "./agent/approval-execution";
import {
buildApprovalRecoveryMessage,
fetchRunErrorDetail,
isApprovalPendingError,
isApprovalStateDesyncError,
} from "./agent/approval-recovery";
import { getClient } from "./agent/client";
@@ -40,11 +41,17 @@ import type {
ControlResponse,
ErrorMessage,
MessageWire,
RecoveryMessage,
ResultMessage,
RetryMessage,
StreamEvent,
SystemInitMessage,
} from "./types/protocol";
import {
markMilestone,
measureSinceMilestone,
reportAllMilestones,
} from "./utils/timing";
// Maximum number of times to retry a turn when the backend
// reports an `llm_api_error` stop reason. This helps smooth
@@ -169,6 +176,7 @@ export async function handleHeadlessCommand(
}
const client = await getClient();
markMilestone("HEADLESS_CLIENT_READY");
// Check for --resume flag (interactive only)
if (values.resume) {
@@ -462,6 +470,7 @@ export async function handleHeadlessCommand(
console.error("No agent found. Use --new-agent to create a new agent.");
process.exit(1);
}
markMilestone("HEADLESS_AGENT_RESOLVED");
// Check if we're resuming an existing agent (not creating a new one)
const isResumingAgent = !!(
@@ -567,6 +576,7 @@ export async function handleHeadlessCommand(
});
conversationId = conversation.id;
}
markMilestone("HEADLESS_CONVERSATION_READY");
// Save session (agent + conversation) to both project and global settings
// Skip for subagents - they shouldn't pollute the LRU settings
@@ -591,41 +601,33 @@ export async function handleHeadlessCommand(
// Set agent context for tools that need it (e.g., Skill tool, Task tool)
setAgentContext(agent.id, skillsDirectory);
await initializeLoadedSkillsFlag();
// Re-discover skills and update the skills memory block
// Fire-and-forget: Initialize loaded skills flag (LET-7101)
// Don't await - this is just for the skill unload reminder
initializeLoadedSkillsFlag().catch(() => {
// Ignore errors - not critical
});
// Fire-and-forget: Sync skills in background (LET-7101)
// This ensures new skills added after agent creation are available
try {
const { discoverSkills, formatSkillsForMemory, SKILLS_DIR } = await import(
"./agent/skills"
);
const { join } = await import("node:path");
// Don't await - proceed to message sending immediately
(async () => {
try {
const { syncSkillsToAgent, SKILLS_DIR } = await import("./agent/skills");
const { join } = await import("node:path");
const resolvedSkillsDirectory =
skillsDirectory || join(process.cwd(), SKILLS_DIR);
const { skills, errors } = await discoverSkills(resolvedSkillsDirectory);
const resolvedSkillsDirectory =
skillsDirectory || join(process.cwd(), SKILLS_DIR);
if (errors.length > 0) {
console.warn("Errors encountered during skill discovery:");
for (const error of errors) {
console.warn(` ${error.path}: ${error.message}`);
}
await syncSkillsToAgent(client, agent.id, resolvedSkillsDirectory, {
skipIfUnchanged: true,
});
} catch (error) {
console.warn(
`[skills] Background sync failed: ${error instanceof Error ? error.message : String(error)}`,
);
}
// Update the skills memory block with freshly discovered skills
const formattedSkills = formatSkillsForMemory(
skills,
resolvedSkillsDirectory,
);
await client.agents.blocks.update("skills", {
agent_id: agent.id,
value: formattedSkills,
});
} catch (error) {
console.warn(
`Failed to update skills: ${error instanceof Error ? error.message : String(error)}`,
);
}
})();
// Validate output format
const outputFormat =
@@ -830,8 +832,11 @@ export async function handleHeadlessCommand(
}
};
// Clear any pending approvals before starting a new turn
await resolveAllPendingApprovals();
// Clear any pending approvals before starting a new turn - ONLY when resuming (LET-7101)
// For new agents/conversations, lazy recovery handles any edge cases
if (isResumingAgent) {
await resolveAllPendingApprovals();
}
// Build message content with reminders (plan mode first, then skill unload)
const { permissionMode } = await import("./permissions/mode");
@@ -865,6 +870,9 @@ export async function handleHeadlessCommand(
let lastKnownRunId: string | null = null;
let llmApiErrorRetries = 0;
markMilestone("HEADLESS_FIRST_STREAM_START");
measureSinceMilestone("headless-setup-total", "HEADLESS_CLIENT_READY");
try {
while (true) {
const stream = await sendMessageStream(conversationId, currentInput);
@@ -922,8 +930,25 @@ export async function handleHeadlessCommand(
}
// Detect server conflict due to pending approval; handle it and retry
if (errorInfo?.message?.includes("Cannot send a new message")) {
// Don't emit this error; clear approvals and retry outer loop
// Check both detail and message fields since error formats vary
if (
isApprovalPendingError(errorInfo?.detail) ||
isApprovalPendingError(errorInfo?.message)
) {
// Emit recovery message for stream-json mode (enables testing)
if (outputFormat === "stream-json") {
const recoveryMsg: RecoveryMessage = {
type: "recovery",
recovery_type: "approval_pending",
message:
"Detected pending approval conflict; auto-denying stale approval and retrying",
run_id: lastRunId ?? undefined,
session_id: sessionId,
uuid: `recovery-${lastRunId || crypto.randomUUID()}`,
};
console.log(JSON.stringify(recoveryMsg));
}
// Clear approvals and retry outer loop
await resolveAllPendingApprovals();
// Reset state and restart turn
stopReason = "error" as StopReasonType;
@@ -1541,6 +1566,10 @@ export async function handleHeadlessCommand(
}
console.log(resultText);
}
// Report all milestones at the end for latency audit
markMilestone("HEADLESS_COMPLETE");
reportAllMilestones();
}
/**

View File

@@ -17,6 +17,7 @@ import { permissionMode } from "./permissions/mode";
import { settingsManager } from "./settings-manager";
import { telemetry } from "./telemetry";
import { loadTools } from "./tools/manager";
import { markMilestone } from "./utils/timing";
// Stable empty array constants to prevent new references on every render
// These are used as fallbacks when resumeData is null, avoiding the React
@@ -335,9 +336,12 @@ async function getPinnedAgentNames(): Promise<{ id: string; name: string }[]> {
}
async function main(): Promise<void> {
markMilestone("CLI_START");
// Initialize settings manager (loads settings once into memory)
await settingsManager.initialize();
const settings = await settingsManager.getSettingsWithSecureTokens();
markMilestone("SETTINGS_LOADED");
// Initialize LSP infrastructure for type checking
if (process.env.LETTA_ENABLE_LSP) {
@@ -728,6 +732,7 @@ async function main(): Promise<void> {
// Validate credentials by checking health endpoint
const { validateCredentials } = await import("./auth/oauth");
const isValid = await validateCredentials(baseURL, apiKey ?? "");
markMilestone("CREDENTIALS_VALIDATED");
if (!isValid) {
// For headless mode, error out with helpful message
@@ -829,18 +834,22 @@ async function main(): Promise<void> {
}
if (isHeadless) {
markMilestone("HEADLESS_MODE_START");
// For headless mode, load tools synchronously (respecting model/toolset when provided)
const modelForTools = getModelForToolLoading(
specifiedModel,
specifiedToolset as "codex" | "default" | undefined,
);
await loadTools(modelForTools);
markMilestone("TOOLS_LOADED");
const { handleHeadlessCommand } = await import("./headless");
await handleHeadlessCommand(process.argv, specifiedModel, skillsDirectory);
return;
}
markMilestone("TUI_MODE_START");
// Enable enhanced key reporting (Shift+Enter, etc.) BEFORE Ink initializes.
// In VS Code/xterm.js this typically requires a short handshake (query + enable).
try {
@@ -853,6 +862,7 @@ async function main(): Promise<void> {
}
// Interactive: lazy-load React/Ink + App
markMilestone("REACT_IMPORT_START");
const React = await import("react");
const { render } = await import("ink");
const { useState, useEffect } = React;
@@ -1441,42 +1451,35 @@ async function main(): Promise<void> {
// Set agent context for tools that need it (e.g., Skill tool)
setAgentContext(agent.id, skillsDirectory);
await initializeLoadedSkillsFlag();
// Re-discover skills and update the skills memory block
// Fire-and-forget: Initialize loaded skills flag (LET-7101)
// Don't await - this is just for the skill unload reminder
initializeLoadedSkillsFlag().catch(() => {
// Ignore errors - not critical
});
// Fire-and-forget: Sync skills in background (LET-7101)
// This ensures new skills added after agent creation are available
try {
const { discoverSkills, formatSkillsForMemory, SKILLS_DIR } =
await import("./agent/skills");
const { join } = await import("node:path");
// Don't await - user can start typing immediately
(async () => {
try {
const { syncSkillsToAgent, SKILLS_DIR } = await import(
"./agent/skills"
);
const { join } = await import("node:path");
const resolvedSkillsDirectory =
skillsDirectory || join(process.cwd(), SKILLS_DIR);
const { skills, errors } = await discoverSkills(
resolvedSkillsDirectory,
);
const resolvedSkillsDirectory =
skillsDirectory || join(process.cwd(), SKILLS_DIR);
if (errors.length > 0) {
console.warn("Errors encountered during skill discovery:");
for (const error of errors) {
console.warn(` ${error.path}: ${error.message}`);
}
await syncSkillsToAgent(client, agent.id, resolvedSkillsDirectory, {
skipIfUnchanged: true,
});
} catch (error) {
console.warn(
`[skills] Background sync failed: ${error instanceof Error ? error.message : String(error)}`,
);
}
// Update the skills memory block with freshly discovered skills
const formattedSkills = formatSkillsForMemory(
skills,
resolvedSkillsDirectory,
);
await client.agents.blocks.update("skills", {
agent_id: agent.id,
value: formattedSkills,
});
} catch (error) {
console.warn(
`Failed to update skills: ${error instanceof Error ? error.message : String(error)}`,
);
}
})();
// Check if we're resuming an existing agent
// We're resuming if:
@@ -1797,6 +1800,7 @@ async function main(): Promise<void> {
});
}
markMilestone("REACT_RENDER_START");
render(
React.createElement(LoadingApp, {
continueSession: shouldContinue,

View File

@@ -0,0 +1,249 @@
import { describe, expect, test } from "bun:test";
import type { Message } from "@letta-ai/letta-client/resources/agents/messages";
import {
isApprovalPendingError,
isApprovalStateDesyncError,
} from "../agent/approval-recovery";
import { extractApprovals } from "../agent/check-approval";
/**
* Tests for approval error detection helpers (LET-7101).
*
* These functions detect two opposite error conditions:
* 1. isApprovalStateDesyncError: Sent approval, but server has no pending approval
* 2. isApprovalPendingError: Sent user message, but server has pending approval waiting
*/
describe("isApprovalStateDesyncError", () => {
test("detects desync error in detail string", () => {
const detail = "No tool call is currently awaiting approval";
expect(isApprovalStateDesyncError(detail)).toBe(true);
});
test("detects desync error case-insensitively", () => {
const detail = "NO TOOL CALL IS CURRENTLY AWAITING APPROVAL";
expect(isApprovalStateDesyncError(detail)).toBe(true);
});
test("detects desync error in longer message", () => {
const detail =
"Error: No tool call is currently awaiting approval. The approval request may have expired.";
expect(isApprovalStateDesyncError(detail)).toBe(true);
});
test("returns false for unrelated errors", () => {
expect(isApprovalStateDesyncError("Connection timeout")).toBe(false);
expect(isApprovalStateDesyncError("Internal server error")).toBe(false);
});
test("returns false for non-string input", () => {
expect(isApprovalStateDesyncError(null)).toBe(false);
expect(isApprovalStateDesyncError(undefined)).toBe(false);
expect(isApprovalStateDesyncError(123)).toBe(false);
expect(isApprovalStateDesyncError({ error: "test" })).toBe(false);
});
});
describe("isApprovalPendingError", () => {
// This is the actual error format from the Letta backend (screenshot from LET-7101)
const REAL_ERROR_DETAIL =
"CONFLICT: Cannot send a new message: The agent is waiting for approval on a tool call. Please approve or deny the pending request before continuing.";
test("detects approval pending error in real error format", () => {
expect(isApprovalPendingError(REAL_ERROR_DETAIL)).toBe(true);
});
test("detects approval pending error case-insensitively", () => {
expect(isApprovalPendingError("CANNOT SEND A NEW MESSAGE")).toBe(true);
expect(isApprovalPendingError("cannot send a new message")).toBe(true);
});
test("detects partial match in longer message", () => {
const detail = "Error occurred: Cannot send a new message while processing";
expect(isApprovalPendingError(detail)).toBe(true);
});
test("returns false for desync errors (opposite case)", () => {
// These are the OPPOSITE error - when we send approval but there's nothing pending
expect(
isApprovalPendingError("No tool call is currently awaiting approval"),
).toBe(false);
});
test("returns false for unrelated errors", () => {
expect(isApprovalPendingError("Connection timeout")).toBe(false);
expect(isApprovalPendingError("Rate limit exceeded")).toBe(false);
expect(isApprovalPendingError("Invalid API key")).toBe(false);
});
test("returns false for non-string input", () => {
expect(isApprovalPendingError(null)).toBe(false);
expect(isApprovalPendingError(undefined)).toBe(false);
expect(isApprovalPendingError(123)).toBe(false);
expect(isApprovalPendingError({ detail: REAL_ERROR_DETAIL })).toBe(false);
});
});
/**
* Tests for parallel tool call approval extraction.
* Ensures lazy recovery handles multiple simultaneous tool calls correctly.
*/
describe("extractApprovals", () => {
// Helper to create a minimal Message-like object for testing
// We use 'as Message' cast because the real Message type is complex
const createMessage = (overrides: {
tool_calls?: Array<{
tool_call_id?: string;
name?: string;
arguments?: string;
}>;
tool_call?: {
tool_call_id?: string;
name?: string;
arguments?: string;
};
}): Message =>
({
id: "test-msg-id",
date: new Date().toISOString(),
message_type: "approval_request_message",
...overrides,
}) as unknown as Message;
test("extracts single tool call from tool_calls array", () => {
const msg = createMessage({
tool_calls: [
{
tool_call_id: "call-1",
name: "Bash",
arguments: '{"command": "echo hello"}',
},
],
});
const result = extractApprovals(msg);
expect(result.pendingApprovals).toHaveLength(1);
expect(result.pendingApprovals[0]!.toolCallId).toBe("call-1");
expect(result.pendingApprovals[0]!.toolName).toBe("Bash");
expect(result.pendingApproval?.toolCallId).toBe("call-1");
});
test("extracts multiple parallel tool calls", () => {
const msg = createMessage({
tool_calls: [
{
tool_call_id: "call-1",
name: "Bash",
arguments: '{"command": "echo hello"}',
},
{
tool_call_id: "call-2",
name: "web_search",
arguments: '{"query": "test"}',
},
{
tool_call_id: "call-3",
name: "Read",
arguments: '{"file_path": "/tmp/test.txt"}',
},
],
});
const result = extractApprovals(msg);
expect(result.pendingApprovals).toHaveLength(3);
expect(result.pendingApprovals[0]!.toolCallId).toBe("call-1");
expect(result.pendingApprovals[0]!.toolName).toBe("Bash");
expect(result.pendingApprovals[1]!.toolCallId).toBe("call-2");
expect(result.pendingApprovals[1]!.toolName).toBe("web_search");
expect(result.pendingApprovals[2]!.toolCallId).toBe("call-3");
expect(result.pendingApprovals[2]!.toolName).toBe("Read");
// pendingApproval is deprecated, should be first item
expect(result.pendingApproval?.toolCallId).toBe("call-1");
});
test("handles deprecated single tool_call field", () => {
const msg = createMessage({
tool_call: {
tool_call_id: "call-legacy",
name: "Write",
arguments: '{"file_path": "/tmp/out.txt"}',
},
});
const result = extractApprovals(msg);
expect(result.pendingApprovals).toHaveLength(1);
expect(result.pendingApprovals[0]!.toolCallId).toBe("call-legacy");
expect(result.pendingApprovals[0]!.toolName).toBe("Write");
});
test("prefers tool_calls array over deprecated tool_call", () => {
const msg = createMessage({
tool_calls: [{ tool_call_id: "call-new", name: "Bash", arguments: "{}" }],
tool_call: {
tool_call_id: "call-old",
name: "Write",
arguments: "{}",
},
});
const result = extractApprovals(msg);
// Should use tool_calls, not tool_call
expect(result.pendingApprovals).toHaveLength(1);
expect(result.pendingApprovals[0]!.toolCallId).toBe("call-new");
});
test("filters out tool calls without tool_call_id", () => {
const msg = createMessage({
tool_calls: [
{ tool_call_id: "call-valid", name: "Bash", arguments: "{}" },
{ name: "Invalid", arguments: "{}" }, // Missing tool_call_id
{ tool_call_id: "", name: "Empty", arguments: "{}" }, // Empty tool_call_id
{ tool_call_id: "call-valid-2", name: "Read", arguments: "{}" },
],
});
const result = extractApprovals(msg);
// Should only include entries with valid tool_call_id
expect(result.pendingApprovals).toHaveLength(2);
expect(result.pendingApprovals[0]!.toolCallId).toBe("call-valid");
expect(result.pendingApprovals[1]!.toolCallId).toBe("call-valid-2");
});
test("returns empty array when no tool calls present", () => {
const msg = createMessage({});
const result = extractApprovals(msg);
expect(result.pendingApprovals).toHaveLength(0);
expect(result.pendingApproval).toBeNull();
});
test("handles missing name and arguments gracefully", () => {
const msg = createMessage({
tool_calls: [{ tool_call_id: "call-minimal" }],
});
const result = extractApprovals(msg);
expect(result.pendingApprovals).toHaveLength(1);
expect(result.pendingApprovals[0]!.toolCallId).toBe("call-minimal");
expect(result.pendingApprovals[0]!.toolName).toBe("");
expect(result.pendingApprovals[0]!.toolArgs).toBe("");
});
});
/**
* Note: Full integration testing of lazy approval recovery requires:
* 1. Starting CLI without --yolo
* 2. Sending a prompt that triggers a tool call requiring approval
* 3. Instead of approving, sending another user message
* 4. Verifying the CONFLICT error is detected and recovery happens
*
* This is complex to automate reliably in unit tests.
* Manual testing or a dedicated integration test suite is recommended.
*/

View File

@@ -0,0 +1,256 @@
import { describe, expect, test } from "bun:test";
import { spawn } from "node:child_process";
/**
* Integration test for lazy approval recovery (LET-7101).
*
* NOTE: The lazy approval recovery is primarily designed for TUI mode where:
* 1. User has a session with pending approvals (e.g., from a previous run)
* 2. User sends a new message before responding to the approval
* 3. Server returns CONFLICT error
* 4. CLI recovers by auto-denying stale approvals and retrying
*
* In bidirectional mode, messages sent during permission wait are dropped
* (see headless.ts line 1710-1714), so we can't directly test the CONFLICT
* scenario here. This test validates that the flow doesn't crash when
* messages are sent while approvals are pending.
*
* The RecoveryMessage emission can be tested by:
* 1. Manual testing in TUI mode (start session with orphaned approval)
* 2. Or by modifying headless mode to not drop messages during permission wait
*/
// Prompt that will trigger a Bash tool call requiring approval
const BASH_TRIGGER_PROMPT =
"Run this exact bash command: echo test123. Do not use any other tools.";
// Second message to send while approval is pending
const INTERRUPT_MESSAGE =
"Actually, just say OK instead. Do not call any tools.";
interface StreamMessage {
type: string;
subtype?: string;
message_type?: string;
stop_reason?: string;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
[key: string]: any;
}
/**
* Run bidirectional test with custom message handling.
* Allows sending messages at specific points in the flow.
*/
async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{
messages: StreamMessage[];
success: boolean;
errorSeen: boolean;
}> {
return new Promise((resolve, reject) => {
const proc = spawn(
"bun",
[
"run",
"dev",
"-p",
"--input-format",
"stream-json",
"--output-format",
"stream-json",
"--new-agent",
"-m",
"haiku",
// NOTE: No --yolo flag - approvals are required
],
{
cwd: process.cwd(),
env: { ...process.env },
},
);
const messages: StreamMessage[] = [];
let buffer = "";
let initReceived = false;
let approvalSeen = false;
let interruptSent = false;
let errorSeen = false;
let resultCount = 0;
let closing = false;
const timeout = setTimeout(() => {
if (!closing) {
proc.kill();
reject(new Error(`Test timed out after ${timeoutMs}ms`));
}
}, timeoutMs);
const cleanup = () => {
closing = true;
clearTimeout(timeout);
setTimeout(() => {
proc.stdin?.end();
proc.kill();
}, 500);
};
const processLine = (line: string) => {
if (!line.trim()) return;
try {
const msg: StreamMessage = JSON.parse(line);
messages.push(msg);
// Debug output
if (process.env.DEBUG_TEST) {
console.log("MSG:", JSON.stringify(msg, null, 2));
}
// Step 1: Wait for init, then send bash trigger prompt
if (msg.type === "system" && msg.subtype === "init" && !initReceived) {
initReceived = true;
const userMsg = JSON.stringify({
type: "user",
message: { role: "user", content: BASH_TRIGGER_PROMPT },
});
proc.stdin?.write(`${userMsg}\n`);
return;
}
// Step 2: When we see approval request, send another user message instead
if (
msg.type === "message" &&
msg.message_type === "approval_request_message" &&
!approvalSeen
) {
approvalSeen = true;
// Wait a moment, then send interrupt message (NOT an approval)
setTimeout(() => {
if (!interruptSent) {
interruptSent = true;
const userMsg = JSON.stringify({
type: "user",
message: { role: "user", content: INTERRUPT_MESSAGE },
});
proc.stdin?.write(`${userMsg}\n`);
}
}, 500);
return;
}
// Track recovery messages - this is the key signal that lazy recovery worked
if (
msg.type === "recovery" &&
msg.recovery_type === "approval_pending"
) {
errorSeen = true; // reusing this flag to mean "recovery message seen"
}
// Also track raw errors (shouldn't see these if recovery works properly)
if (
msg.type === "error" ||
(msg.type === "message" && msg.message_type === "error_message")
) {
const detail = msg.detail || msg.message || "";
if (detail.toLowerCase().includes("cannot send a new message")) {
// Raw error leaked through - recovery may have failed
console.log(
"WARNING: Raw CONFLICT error seen (recovery may have failed)",
);
}
}
// Track results - we need 2 (one for each user message, though first may fail)
if (msg.type === "result") {
resultCount++;
// After second result (or after seeing error + result), we're done
if (resultCount >= 2 || (errorSeen && resultCount >= 1)) {
cleanup();
resolve({ messages, success: true, errorSeen });
}
}
} catch {
// Not valid JSON, ignore
}
};
proc.stdout?.on("data", (data) => {
buffer += data.toString();
const lines = buffer.split("\n");
buffer = lines.pop() || "";
for (const line of lines) {
processLine(line);
}
});
let _stderr = "";
proc.stderr?.on("data", (data) => {
_stderr += data.toString();
});
proc.on("close", (_code) => {
clearTimeout(timeout);
// Process any remaining buffer
if (buffer.trim()) {
processLine(buffer);
}
if (!closing) {
// If we got here without resolving, check what we have
resolve({
messages,
success: resultCount > 0,
errorSeen,
});
}
});
proc.on("error", (err) => {
clearTimeout(timeout);
reject(err);
});
});
}
describe("lazy approval recovery", () => {
test("handles concurrent message while approval is pending", async () => {
const result = await runLazyRecoveryTest();
// Log messages for debugging if test fails
if (!result.success) {
console.log("All messages received:");
for (const msg of result.messages) {
console.log(JSON.stringify(msg, null, 2));
}
}
// We should have seen the approval request (proves tool requiring approval was called)
const approvalRequest = result.messages.find(
(m) => m.message_type === "approval_request_message",
);
expect(approvalRequest).toBeDefined();
// The test should complete successfully
expect(result.success).toBe(true);
// Count results - we should get at least 1 (the second message should always complete)
const resultCount = result.messages.filter(
(m) => m.type === "result",
).length;
expect(resultCount).toBeGreaterThanOrEqual(1);
// KEY ASSERTION: Check if we saw the recovery message
// This proves the lazy recovery mechanism was triggered
const recoveryMessage = result.messages.find(
(m) => m.type === "recovery" && m.recovery_type === "approval_pending",
);
if (recoveryMessage) {
console.log("Recovery message detected - lazy recovery worked correctly");
expect(result.errorSeen).toBe(true); // Should have been set when we saw recovery
} else {
// Recovery might not be triggered if approval was auto-handled before second message
// This can happen due to timing - the test still validates the flow works
console.log(
"Note: No recovery message seen - approval may have been handled before conflict",
);
}
}, 180000); // 3 minute timeout for CI
});

View File

@@ -149,7 +149,7 @@ async function readSkillContent(
try {
const content = await readFile(projectSkillPath, "utf-8");
return { content, path: projectSkillPath };
} catch (primaryError) {
} catch {
// Fallback: check for bundled skills in a repo-level skills directory (legacy)
try {
const bundledSkillsDir = join(process.cwd(), "skills", "skills");
@@ -157,8 +157,11 @@ async function readSkillContent(
const content = await readFile(bundledSkillPath, "utf-8");
return { content, path: bundledSkillPath };
} catch {
// If all fallbacks fail, rethrow the original error
throw primaryError;
// If all fallbacks fail, throw a helpful error message (LET-7101)
// Suggest refresh in case skills sync is still running in background
throw new Error(
`Skill "${skillId}" not found. If you recently added this skill, try Skill({ command: "refresh" }) to re-scan the skills directory.`,
);
}
}
}

View File

@@ -198,6 +198,19 @@ export interface RetryMessage extends MessageEnvelope {
run_id?: string;
}
/**
* Recovery message emitted when the CLI detects and recovers from errors.
* Used for approval state conflicts and other recoverable errors.
*/
export interface RecoveryMessage extends MessageEnvelope {
type: "recovery";
/** Type of recovery performed */
recovery_type: "approval_pending" | "approval_desync";
/** Human-readable description of what happened */
message: string;
run_id?: string;
}
// ═══════════════════════════════════════════════════════════════
// RESULT
// ═══════════════════════════════════════════════════════════════
@@ -329,6 +342,7 @@ export type WireMessage =
| AutoApprovalMessage
| ErrorMessage
| RetryMessage
| RecoveryMessage
| ResultMessage
| ControlResponse
| ControlRequest; // CLI → SDK control requests (e.g., can_use_tool)

View File

@@ -34,6 +34,112 @@ export function logTiming(message: string): void {
}
}
// ============================================================================
// Milestone tracking for latency audits
// ============================================================================
// Store milestones with their timestamps (ms since process start via performance.now())
const milestones: Map<string, number> = new Map();
// Reference time for relative measurements (set on first milestone)
let firstMilestoneTime: number | null = null;
/**
* Mark a named milestone in the boot/execution sequence.
* Call this at key points to track where time is spent.
*
* @param name - Descriptive name like "SETTINGS_LOADED" or "AGENT_RESOLVED"
*/
export function markMilestone(name: string): void {
const now = performance.now();
milestones.set(name, now);
if (firstMilestoneTime === null) {
firstMilestoneTime = now;
}
if (isTimingsEnabled()) {
const relative = now - firstMilestoneTime;
console.error(
`[timing] MILESTONE ${name} at +${formatDuration(relative)} (${formatTimestamp(new Date())})`,
);
}
}
/**
* Measure time elapsed since a previous milestone.
*
* @param label - Description of what we're measuring (e.g., "tool loading")
* @param fromMilestone - Name of the starting milestone
*/
export function measureSinceMilestone(
label: string,
fromMilestone: string,
): void {
if (!isTimingsEnabled()) return;
const startTime = milestones.get(fromMilestone);
if (startTime === undefined) {
console.error(
`[timing] WARNING: milestone "${fromMilestone}" not found for measurement "${label}"`,
);
return;
}
const duration = performance.now() - startTime;
console.error(`[timing] ${label}: ${formatDuration(duration)}`);
}
/**
* Get the duration between two milestones in milliseconds.
* Returns null if either milestone doesn't exist.
*/
export function getMilestoneDuration(
fromMilestone: string,
toMilestone: string,
): number | null {
const start = milestones.get(fromMilestone);
const end = milestones.get(toMilestone);
if (start === undefined || end === undefined) return null;
return end - start;
}
/**
* Print a summary of all milestones with relative timestamps.
* Useful at the end of a benchmark run.
*/
export function reportAllMilestones(): void {
if (!isTimingsEnabled() || milestones.size === 0) return;
const first = firstMilestoneTime ?? 0;
console.error(`[timing] ======== MILESTONE SUMMARY ========`);
// Sort by timestamp
const sorted = [...milestones.entries()].sort((a, b) => a[1] - b[1]);
let prevTime = first;
for (const [name, time] of sorted) {
const relativeToStart = time - first;
const delta = time - prevTime;
const deltaStr = prevTime === first ? "" : ` (+${formatDuration(delta)})`;
console.error(
`[timing] +${formatDuration(relativeToStart).padStart(8)} ${name}${deltaStr}`,
);
prevTime = time;
}
console.error(`[timing] =====================================`);
}
/**
* Clear all milestones (useful for running multiple benchmarks in sequence).
*/
export function clearMilestones(): void {
milestones.clear();
firstMilestoneTime = null;
}
// Simple fetch type that matches the SDK's expected signature
type SimpleFetch = (
input: string | URL | Request,