feat: reduce time-to-boot, remove default eager approval checks on inputs, auto-cancel stale approvals (#579)

Co-authored-by: Letta <noreply@letta.com>
2026-01-17 16:19:30 -08:00
parent f4eb921af7
commit 5f5c0df18e
13 changed files with 1376 additions and 93 deletions
--- a/scripts/latency-benchmark.ts
+++ b/scripts/latency-benchmark.ts
@@ -0,0 +1,341 @@
+#!/usr/bin/env bun
+/**
+ * Latency Benchmark Script for Letta Code CLI
+ *
+ * Runs headless mode with LETTA_DEBUG_TIMINGS=1 and parses the output
+ * to measure latency breakdown at different stages.
+ *
+ * Usage:
+ *   bun scripts/latency-benchmark.ts
+ *   bun scripts/latency-benchmark.ts --scenario fresh-agent
+ *   bun scripts/latency-benchmark.ts --iterations 5
+ *
+ * Requires: LETTA_API_KEY environment variable
+ */
+
+import { spawn } from "node:child_process";
+
+interface ApiCall {
+  method: string;
+  path: string;
+  durationMs: number;
+  status?: number;
+}
+
+interface Milestone {
+  name: string;
+  offsetMs: number;
+}
+
+interface BenchmarkResult {
+  scenario: string;
+  totalMs: number;
+  milestones: Milestone[];
+  apiCalls: ApiCall[];
+  exitCode: number;
+}
+
+interface ScenarioConfig {
+  name: string;
+  description: string;
+  args: string[];
+}
+
+// Define benchmark scenarios
+const SCENARIOS: ScenarioConfig[] = [
+  {
+    name: "fresh-agent",
+    description: "Create new agent and send simple prompt",
+    args: [
+      "-p",
+      "What is 2+2? Reply with just the number.",
+      "--new-agent",
+      "--yolo",
+      "--output-format",
+      "json",
+    ],
+  },
+  {
+    name: "resume-agent",
+    description: "Resume last agent and send simple prompt",
+    args: [
+      "-p",
+      "What is 3+3? Reply with just the number.",
+      "--continue",
+      "--yolo",
+      "--output-format",
+      "json",
+    ],
+  },
+  {
+    name: "minimal-math",
+    description: "Simple math question (no tool calls)",
+    args: [
+      "-p",
+      "What is 5+5? Reply with just the number.",
+      "--continue",
+      "--yolo",
+      "--output-format",
+      "json",
+    ],
+  },
+];
+
+/**
+ * Parse timing logs from stderr output
+ */
+function parseTimingLogs(stderr: string): {
+  milestones: Milestone[];
+  apiCalls: ApiCall[];
+} {
+  const milestones: Milestone[] = [];
+  const apiCalls: ApiCall[] = [];
+
+  const lines = stderr.split("\n");
+
+  for (const line of lines) {
+    // Parse milestones: [timing] MILESTONE CLI_START at +0ms (12:34:56.789)
+    const milestoneMatch = line.match(
+      /\[timing\] MILESTONE (\S+) at \+(\d+(?:\.\d+)?)(ms|s)/,
+    );
+    if (milestoneMatch) {
+      const name = milestoneMatch[1]!;
+      let offsetMs = parseFloat(milestoneMatch[2]!);
+      if (milestoneMatch[3] === "s") {
+        offsetMs *= 1000;
+      }
+      milestones.push({ name, offsetMs });
+      continue;
+    }
+
+    // Parse API calls: [timing] GET /v1/agents/... -> 245ms (status: 200)
+    const apiMatch = line.match(
+      /\[timing\] (GET|POST|PUT|DELETE|PATCH) (\S+) -> (\d+(?:\.\d+)?)(ms|s)(?: \(status: (\d+)\))?/,
+    );
+    if (apiMatch) {
+      const method = apiMatch[1]!;
+      const path = apiMatch[2]!;
+      let durationMs = parseFloat(apiMatch[3]!);
+      if (apiMatch[4] === "s") {
+        durationMs *= 1000;
+      }
+      const status = apiMatch[5] ? parseInt(apiMatch[5], 10) : undefined;
+      apiCalls.push({ method, path, durationMs, status });
+    }
+  }
+
+  return { milestones, apiCalls };
+}
+
+/**
+ * Run a single benchmark scenario
+ */
+async function runBenchmark(scenario: ScenarioConfig): Promise<BenchmarkResult> {
+  const start = performance.now();
+
+  return new Promise((resolve) => {
+    const proc = spawn("bun", ["run", "dev", ...scenario.args], {
+      env: { ...process.env, LETTA_DEBUG_TIMINGS: "1" },
+      stdio: ["pipe", "pipe", "pipe"],
+    });
+
+    let stdout = "";
+    let stderr = "";
+
+    proc.stdout.on("data", (data) => {
+      stdout += data.toString();
+    });
+
+    proc.stderr.on("data", (data) => {
+      stderr += data.toString();
+    });
+
+    proc.on("close", (code) => {
+      const totalMs = performance.now() - start;
+      const { milestones, apiCalls } = parseTimingLogs(stderr);
+
+      resolve({
+        scenario: scenario.name,
+        totalMs,
+        milestones,
+        apiCalls,
+        exitCode: code ?? 1,
+      });
+    });
+
+    // Timeout after 2 minutes
+    setTimeout(() => {
+      proc.kill("SIGTERM");
+    }, 120000);
+  });
+}
+
+/**
+ * Format duration for display
+ */
+function formatMs(ms: number): string {
+  if (ms < 1000) return `${Math.round(ms)}ms`;
+  return `${(ms / 1000).toFixed(2)}s`;
+}
+
+/**
+ * Print benchmark results
+ */
+function printResults(results: BenchmarkResult[]): void {
+  console.log("\n" + "=".repeat(70));
+  console.log("LATENCY BENCHMARK RESULTS");
+  console.log("=".repeat(70) + "\n");
+
+  for (const result of results) {
+    const scenario = SCENARIOS.find((s) => s.name === result.scenario);
+    console.log(`Scenario: ${result.scenario}`);
+    console.log(`  ${scenario?.description || ""}`);
+    console.log(`  Exit code: ${result.exitCode}`);
+    console.log(`  Total wall time: ${formatMs(result.totalMs)}`);
+    console.log("");
+
+    // Print milestones
+    if (result.milestones.length > 0) {
+      console.log("  Milestones:");
+      let prevMs = 0;
+      for (const milestone of result.milestones) {
+        const delta = milestone.offsetMs - prevMs;
+        const deltaStr = prevMs === 0 ? "" : ` (+${formatMs(delta)})`;
+        console.log(
+          `    +${formatMs(milestone.offsetMs).padStart(8)} ${milestone.name}${deltaStr}`,
+        );
+        prevMs = milestone.offsetMs;
+      }
+      console.log("");
+    }
+
+    // Print API calls summary
+    if (result.apiCalls.length > 0) {
+      console.log("  API Calls:");
+      const totalApiMs = result.apiCalls.reduce((sum, c) => sum + c.durationMs, 0);
+
+      // Group by path pattern
+      const grouped: Record<string, { count: number; totalMs: number }> = {};
+      for (const call of result.apiCalls) {
+        // Normalize paths (remove UUIDs)
+        const normalizedPath = call.path.replace(
+          /[a-f0-9-]{36}/g,
+          "{id}",
+        );
+        const key = `${call.method} ${normalizedPath}`;
+        if (!grouped[key]) {
+          grouped[key] = { count: 0, totalMs: 0 };
+        }
+        grouped[key].count++;
+        grouped[key].totalMs += call.durationMs;
+      }
+
+      // Sort by total time
+      const sorted = Object.entries(grouped).sort(
+        (a, b) => b[1].totalMs - a[1].totalMs,
+      );
+
+      for (const [endpoint, stats] of sorted) {
+        const countStr = stats.count > 1 ? ` (x${stats.count})` : "";
+        console.log(
+          `    ${formatMs(stats.totalMs).padStart(8)} ${endpoint}${countStr}`,
+        );
+      }
+
+      console.log(`    ${"─".repeat(50)}`);
+      console.log(`    ${formatMs(totalApiMs).padStart(8)} Total API time`);
+      console.log(
+        `    ${formatMs(result.totalMs - totalApiMs).padStart(8)} CLI overhead (non-API)`,
+      );
+    }
+
+    console.log("\n" + "-".repeat(70) + "\n");
+  }
+
+  // Summary table
+  console.log("SUMMARY");
+  console.log("-".repeat(70));
+  console.log(
+    "Scenario".padEnd(20) +
+      "Total".padStart(12) +
+      "API Time".padStart(12) +
+      "CLI Overhead".padStart(14),
+  );
+  console.log("-".repeat(70));
+
+  for (const result of results) {
+    const totalApiMs = result.apiCalls.reduce((sum, c) => sum + c.durationMs, 0);
+    const cliOverhead = result.totalMs - totalApiMs;
+    console.log(
+      result.scenario.padEnd(20) +
+        formatMs(result.totalMs).padStart(12) +
+        formatMs(totalApiMs).padStart(12) +
+        formatMs(cliOverhead).padStart(14),
+    );
+  }
+  console.log("-".repeat(70));
+}
+
+async function main(): Promise<void> {
+  // Parse args
+  const args = process.argv.slice(2);
+  let scenarioFilter: string | null = null;
+  let iterations = 1;
+
+  for (let i = 0; i < args.length; i++) {
+    if (args[i] === "--scenario" && args[i + 1]) {
+      scenarioFilter = args[++i]!;
+    } else if (args[i] === "--iterations" && args[i + 1]) {
+      iterations = parseInt(args[++i]!, 10);
+    }
+  }
+
+  // Check prereqs
+  if (!process.env.LETTA_API_KEY) {
+    console.error("Error: LETTA_API_KEY environment variable is required");
+    process.exit(1);
+  }
+
+  // Filter scenarios
+  const scenariosToRun = scenarioFilter
+    ? SCENARIOS.filter((s) => s.name === scenarioFilter)
+    : SCENARIOS;
+
+  if (scenariosToRun.length === 0) {
+    console.error(`Error: Unknown scenario "${scenarioFilter}"`);
+    console.error(`Available scenarios: ${SCENARIOS.map((s) => s.name).join(", ")}`);
+    process.exit(1);
+  }
+
+  console.log("Running latency benchmarks...");
+  console.log(`Scenarios: ${scenariosToRun.map((s) => s.name).join(", ")}`);
+  console.log(`Iterations: ${iterations}`);
+  console.log("");
+
+  const allResults: BenchmarkResult[] = [];
+
+  for (let iter = 0; iter < iterations; iter++) {
+    if (iterations > 1) {
+      console.log(`\n--- Iteration ${iter + 1} of ${iterations} ---`);
+    }
+
+    for (const scenario of scenariosToRun) {
+      console.log(`Running: ${scenario.name}...`);
+      const result = await runBenchmark(scenario);
+      allResults.push(result);
+
+      if (result.exitCode !== 0) {
+        console.warn(`  Warning: ${scenario.name} exited with code ${result.exitCode}`);
+      } else {
+        console.log(`  Completed in ${formatMs(result.totalMs)}`);
+      }
+    }
+  }
+
+  printResults(allResults);
+}
+
+main().catch((err) => {
+  console.error(err);
+  process.exit(1);
+});
--- a/src/agent/approval-recovery.ts
+++ b/src/agent/approval-recovery.ts
@@ -2,9 +2,14 @@ import type { MessageCreate } from "@letta-ai/letta-client/resources/agents/agen
 import { getClient } from "./client";
 import { APPROVAL_RECOVERY_PROMPT } from "./promptAssets";

+// Error when trying to SEND approval but server has no pending approval
 const APPROVAL_RECOVERY_DETAIL_FRAGMENT =
  "no tool call is currently awaiting approval";

+// Error when trying to SEND message but server has pending approval waiting
+// This is the CONFLICT error - opposite of desync
+const APPROVAL_PENDING_DETAIL_FRAGMENT = "cannot send a new message";
+
 type RunErrorMetadata =
  | {
      error_type?: string;
@@ -20,6 +25,19 @@ export function isApprovalStateDesyncError(detail: unknown): boolean {
  return detail.toLowerCase().includes(APPROVAL_RECOVERY_DETAIL_FRAGMENT);
 }

+/**
+ * Check if error indicates there's a pending approval blocking new messages.
+ * This is the CONFLICT error from the backend when trying to send a user message
+ * while the agent is waiting for approval on a tool call.
+ *
+ * Error format:
+ * { detail: "CONFLICT: Cannot send a new message: The agent is waiting for approval..." }
+ */
+export function isApprovalPendingError(detail: unknown): boolean {
+  if (typeof detail !== "string") return false;
+  return detail.toLowerCase().includes(APPROVAL_PENDING_DETAIL_FRAGMENT);
+}
+
 export async function fetchRunErrorDetail(
  runId: string | null | undefined,
 ): Promise<string | null> {
--- a/src/agent/check-approval.ts
+++ b/src/agent/check-approval.ts
@@ -29,8 +29,9 @@ export interface ResumeData {

 /**
 * Extract approval requests from an approval_request_message.
+ * Exported for testing parallel tool call handling.
 */
-function extractApprovals(messageToCheck: Message): {
+export function extractApprovals(messageToCheck: Message): {
  pendingApproval: ApprovalRequest | null;
  pendingApprovals: ApprovalRequest[];
 } {
--- a/src/agent/create.ts
+++ b/src/agent/create.ts
@@ -271,26 +271,14 @@ export async function createAgent(
  // Track provenance: which blocks were created
  // Note: We no longer reuse shared blocks - each agent gets fresh blocks
  const blockProvenance: BlockProvenance[] = [];
-  const blockIds: string[] = [];

-  // Create all blocks fresh for the new agent
+  // Mark new blocks for provenance tracking (actual creation happens in agents.create)
  for (const block of filteredMemoryBlocks) {
-    try {
-      const createdBlock = await client.blocks.create(block);
-      if (!createdBlock.id) {
-        throw new Error(`Created block ${block.label} has no ID`);
-      }
-      blockIds.push(createdBlock.id);
-      blockProvenance.push({ label: block.label, source: "new" });
-    } catch (error) {
-      console.error(`Failed to create block ${block.label}:`, error);
-      throw error;
-    }
+    blockProvenance.push({ label: block.label, source: "new" });
  }

-  // Add any referenced block IDs (existing blocks to attach)
+  // Mark referenced blocks for provenance tracking
  for (const blockId of referencedBlockIds) {
-    blockIds.push(blockId);
    blockProvenance.push({ label: blockId, source: "shared" });
  }

@@ -314,7 +302,9 @@ export async function createAgent(
    systemPromptContent = `${systemPromptContent}\n\n${options.systemPromptAppend}`;
  }

-  // Create agent with all block IDs (existing + newly created)
+  // Create agent with inline memory blocks (LET-7101: single API call instead of N+1)
+  // - memory_blocks: new blocks to create inline
+  // - block_ids: references to existing blocks (for shared memory)
  const tags = ["origin:letta-code"];
  if (process.env.LETTA_CODE_AGENT_ROLE === "subagent") {
    tags.push("role:subagent");
@@ -332,7 +322,11 @@ export async function createAgent(
    model: modelHandle,
    context_window_limit: contextWindow,
    tools: toolNames,
-    block_ids: blockIds,
+    // New blocks created inline with agent (saves ~2s of sequential API calls)
+    memory_blocks:
+      filteredMemoryBlocks.length > 0 ? filteredMemoryBlocks : undefined,
+    // Referenced block IDs (existing blocks to attach)
+    block_ids: referencedBlockIds.length > 0 ? referencedBlockIds : undefined,
    tags,
    // should be default off, but just in case
    include_base_tools: false,
--- a/src/agent/skills.ts
+++ b/src/agent/skills.ts
@@ -486,3 +486,125 @@ export function formatSkillsForMemory(
  // Otherwise fall back to compact tree format
  return formatSkillsAsTree(skills, skillsDirectory);
 }
+
+// ============================================================================
+// Skills Sync with Hash-Based Caching (Phase 2.5 - LET-7101)
+// ============================================================================
+
+import { createHash } from "node:crypto";
+import { mkdir, writeFile } from "node:fs/promises";
+
+/**
+ * Get the project-local skills hash file path.
+ * Uses .letta/skills-hash.json in the current working directory
+ * because the skills block content depends on the project's .skills/ folder.
+ */
+function getSkillsHashFilePath(): string {
+  return join(process.cwd(), ".letta", "skills-hash.json");
+}
+
+interface SkillsHashCache {
+  hash: string;
+  timestamp: string;
+}
+
+/**
+ * Compute a hash of the formatted skills content
+ */
+function computeSkillsHash(content: string): string {
+  return createHash("sha256").update(content).digest("hex").slice(0, 16);
+}
+
+/**
+ * Get the cached skills hash (if any)
+ */
+async function getCachedSkillsHash(): Promise<string | null> {
+  try {
+    const hashFile = getSkillsHashFilePath();
+    const data = await readFile(hashFile, "utf-8");
+    const cache: SkillsHashCache = JSON.parse(data);
+    return cache.hash;
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Set the cached skills hash
+ */
+async function setCachedSkillsHash(hash: string): Promise<void> {
+  try {
+    const hashFile = getSkillsHashFilePath();
+    // Ensure project .letta directory exists
+    const lettaDir = join(process.cwd(), ".letta");
+    await mkdir(lettaDir, { recursive: true });
+
+    const cache: SkillsHashCache = {
+      hash,
+      timestamp: new Date().toISOString(),
+    };
+    await writeFile(hashFile, JSON.stringify(cache, null, 2));
+  } catch {
+    // Ignore cache write failures - not critical
+  }
+}
+
+/**
+ * Sync skills to an agent's memory block.
+ * Discovers skills from filesystem and updates the skills block.
+ *
+ * @param client - Letta client
+ * @param agentId - Agent ID to update
+ * @param skillsDirectory - Path to project skills directory
+ * @param options - Optional settings
+ * @returns Object indicating if sync occurred and discovered skills
+ */
+export async function syncSkillsToAgent(
+  client: import("@letta-ai/letta-client").default,
+  agentId: string,
+  skillsDirectory: string,
+  options?: { skipIfUnchanged?: boolean },
+): Promise<{ synced: boolean; skills: Skill[] }> {
+  // Discover skills from filesystem
+  const { skills, errors } = await discoverSkills(skillsDirectory);
+
+  if (errors.length > 0) {
+    for (const error of errors) {
+      console.warn(`[skills] Discovery error: ${error.path}: ${error.message}`);
+    }
+  }
+
+  // Format skills for memory block
+  const formattedSkills = formatSkillsForMemory(skills, skillsDirectory);
+
+  // Check if we can skip the update
+  if (options?.skipIfUnchanged) {
+    const newHash = computeSkillsHash(formattedSkills);
+    const cachedHash = await getCachedSkillsHash();
+
+    if (newHash === cachedHash) {
+      return { synced: false, skills };
+    }
+
+    // Update the block and cache the new hash
+    await client.agents.blocks.update("skills", {
+      agent_id: agentId,
+      value: formattedSkills,
+    });
+    await setCachedSkillsHash(newHash);
+
+    return { synced: true, skills };
+  }
+
+  // No skip option - always update
+  await client.agents.blocks.update("skills", {
+    agent_id: agentId,
+    value: formattedSkills,
+  });
+
+  // Update hash cache for future runs
+  const newHash = computeSkillsHash(formattedSkills);
+  await setCachedSkillsHash(newHash);
+
+  return { synced: true, skills };
+}
--- a/src/cli/App.tsx
+++ b/src/cli/App.tsx
@@ -30,6 +30,7 @@ import {
 import {
  buildApprovalRecoveryMessage,
  fetchRunErrorDetail,
+  isApprovalPendingError,
  isApprovalStateDesyncError,
 } from "../agent/approval-recovery";
 import { prefetchAvailableModelHandles } from "../agent/available-models";
@@ -175,10 +176,10 @@ import { useTerminalRows, useTerminalWidth } from "./hooks/useTerminalWidth";
 const CLEAR_SCREEN_AND_HOME = "\u001B[2J\u001B[H";
 const MIN_RESIZE_DELTA = 2;

-// Feature flag: Check for pending approvals before sending messages
-// This prevents infinite thinking state when there's an orphaned approval
-// Can be disabled if the latency check adds too much overhead
-const CHECK_PENDING_APPROVALS_BEFORE_SEND = true;
+// Eager approval checking is now CONDITIONAL (LET-7101):
+// - Enabled when resuming a session (--resume, --continue, or startupApprovals exist)
+// - Disabled for normal messages (lazy recovery handles edge cases)
+// This saves ~2s latency per message in the common case.

 // Feature flag: Eagerly cancel streams client-side when user presses ESC
 // When true (default), immediately abort the stream after calling .cancel()
@@ -678,6 +679,12 @@ export default function App({
  >(null);
  const toolAbortControllerRef = useRef<AbortController | null>(null);

+  // Eager approval checking: only enabled when resuming a session (LET-7101)
+  // After first successful message, we disable it since any new approvals are from our own turn
+  const [needsEagerApprovalCheck, setNeedsEagerApprovalCheck] = useState(
+    () => resumedExistingConversation || startupApprovals.length > 0,
+  );
+
  // Track auto-handled results to combine with user decisions
  const [autoHandledResults, setAutoHandledResults] = useState<
    Array<{
@@ -1941,6 +1948,12 @@ export default function App({
            setStreaming(false);
            llmApiErrorRetriesRef.current = 0; // Reset retry counter on success

+            // Disable eager approval check after first successful message (LET-7101)
+            // Any new approvals from here on are from our own turn, not orphaned
+            if (needsEagerApprovalCheck) {
+              setNeedsEagerApprovalCheck(false);
+            }
+
            // Send desktop notification when turn completes
            // and we're not about to auto-send another queued message
            if (!waitingForQueueCancelRef.current) {
@@ -2552,6 +2565,112 @@ export default function App({
            return;
          }

+          // Check for approval pending error (sent user message while approval waiting)
+          // This is the lazy recovery path for when needsEagerApprovalCheck is false
+          const approvalPendingDetected =
+            isApprovalPendingError(detailFromRun) ||
+            isApprovalPendingError(latestErrorText);
+
+          if (
+            !hasApprovalInPayload &&
+            approvalPendingDetected &&
+            llmApiErrorRetriesRef.current < LLM_API_ERROR_MAX_RETRIES
+          ) {
+            llmApiErrorRetriesRef.current += 1;
+
+            // Log for debugging (visible in transcripts)
+            const statusId = uid("status");
+            buffersRef.current.byId.set(statusId, {
+              kind: "status",
+              id: statusId,
+              lines: [
+                "[LAZY RECOVERY] Detected CONFLICT: server has pending approval",
+                "[LAZY RECOVERY] Fetching stale approvals to auto-deny...",
+              ],
+            });
+            buffersRef.current.order.push(statusId);
+            refreshDerived();
+
+            try {
+              // Fetch pending approvals and auto-deny them
+              const client = await getClient();
+              const agent = await client.agents.retrieve(agentIdRef.current);
+              const { pendingApprovals: existingApprovals } =
+                await getResumeData(client, agent, conversationIdRef.current);
+
+              if (existingApprovals && existingApprovals.length > 0) {
+                // Update status with details
+                buffersRef.current.byId.set(statusId, {
+                  kind: "status",
+                  id: statusId,
+                  lines: [
+                    "[LAZY RECOVERY] Detected CONFLICT: server has pending approval",
+                    `[LAZY RECOVERY] Found ${existingApprovals.length} stale approval(s):`,
+                    ...existingApprovals.map(
+                      (a) =>
+                        `  - ${a.toolName} (${a.toolCallId.slice(0, 8)}...)`,
+                    ),
+                    "[LAZY RECOVERY] Auto-denying and batching with user message...",
+                  ],
+                });
+                refreshDerived();
+
+                // Create denial results for all stale approvals
+                // Use the same format as handleCancelApprovals (lines 6390-6395)
+                const denialResults = existingApprovals.map((approval) => ({
+                  type: "approval" as const,
+                  tool_call_id: approval.toolCallId,
+                  approve: false,
+                  reason:
+                    "Auto-denied: stale approval from interrupted session",
+                }));
+
+                // Prepend approval denials to the current input (keeps user message)
+                const approvalPayload: ApprovalCreate = {
+                  type: "approval",
+                  approvals: denialResults,
+                };
+                currentInput.unshift(approvalPayload);
+              } else {
+                // No approvals found - server state may have cleared
+                buffersRef.current.byId.set(statusId, {
+                  kind: "status",
+                  id: statusId,
+                  lines: [
+                    "[LAZY RECOVERY] Detected CONFLICT but no pending approvals found",
+                    "[LAZY RECOVERY] Retrying message...",
+                  ],
+                });
+                refreshDerived();
+              }
+            } catch (_recoveryError) {
+              // If we can't fetch approvals, just retry the original message
+              buffersRef.current.byId.set(statusId, {
+                kind: "status",
+                id: statusId,
+                lines: [
+                  "[LAZY RECOVERY] Failed to fetch pending approvals",
+                  "[LAZY RECOVERY] Retrying message anyway...",
+                ],
+              });
+              refreshDerived();
+            }
+
+            // Brief pause so user can see the status
+            await new Promise((resolve) => setTimeout(resolve, 500));
+
+            // Remove the transient status
+            buffersRef.current.byId.delete(statusId);
+            buffersRef.current.order = buffersRef.current.order.filter(
+              (id) => id !== statusId,
+            );
+            refreshDerived();
+
+            // Reset interrupted flag so retry stream chunks are processed
+            buffersRef.current.interrupted = false;
+            continue;
+          }
+
          // Check if this is a retriable error (transient LLM API error)
          const retriable = await isRetriableError(
            stopReasonToHandle,
@@ -2757,6 +2876,7 @@ export default function App({
      setStreaming,
      currentModelId,
      updateStreamingOutput,
+      needsEagerApprovalCheck,
    ],
  );

@@ -3278,7 +3398,8 @@ export default function App({
  const checkPendingApprovalsForSlashCommand = useCallback(async (): Promise<
    { blocked: true } | { blocked: false }
  > => {
-    if (!CHECK_PENDING_APPROVALS_BEFORE_SEND) {
+    // Only check eagerly when resuming a session (LET-7101)
+    if (!needsEagerApprovalCheck) {
      return { blocked: false };
    }

@@ -3425,7 +3546,13 @@ export default function App({
      // If check fails, proceed anyway (don't block user)
      return { blocked: false };
    }
-  }, [agentId, processConversation, refreshDerived, updateStreamingOutput]);
+  }, [
+    agentId,
+    processConversation,
+    refreshDerived,
+    updateStreamingOutput,
+    needsEagerApprovalCheck,
+  ]);

  // biome-ignore lint/correctness/useExhaustiveDependencies: refs read .current dynamically, complex callback with intentional deps
  const onSubmit = useCallback(
@@ -5372,7 +5499,20 @@ DO NOT respond to these messages or otherwise consider them in your response unl

      // Check for pending approvals before sending message (skip if we already have
      // a queued approval response to send first).
-      if (CHECK_PENDING_APPROVALS_BEFORE_SEND && !queuedApprovalResults) {
+      // Only do eager check when resuming a session (LET-7101) - otherwise lazy recovery handles it
+      if (needsEagerApprovalCheck && !queuedApprovalResults) {
+        // Log for debugging
+        const eagerStatusId = uid("status");
+        buffersRef.current.byId.set(eagerStatusId, {
+          kind: "status",
+          id: eagerStatusId,
+          lines: [
+            "[EAGER CHECK] Checking for pending approvals (resume mode)...",
+          ],
+        });
+        buffersRef.current.order.push(eagerStatusId);
+        refreshDerived();
+
        try {
          const client = await getClient();
          // Fetch fresh agent state to check for pending approvals with accurate in-context messages
@@ -5383,6 +5523,12 @@ DO NOT respond to these messages or otherwise consider them in your response unl
            conversationIdRef.current,
          );

+          // Remove eager check status
+          buffersRef.current.byId.delete(eagerStatusId);
+          buffersRef.current.order = buffersRef.current.order.filter(
+            (id) => id !== eagerStatusId,
+          );
+
          // Check if user cancelled while we were fetching approval state
          if (
            userCancelledRef.current ||
--- a/src/headless.ts
+++ b/src/headless.ts
@@ -11,6 +11,7 @@ import type { ApprovalResult } from "./agent/approval-execution";
 import {
  buildApprovalRecoveryMessage,
  fetchRunErrorDetail,
+  isApprovalPendingError,
  isApprovalStateDesyncError,
 } from "./agent/approval-recovery";
 import { getClient } from "./agent/client";
@@ -40,11 +41,17 @@ import type {
  ControlResponse,
  ErrorMessage,
  MessageWire,
+  RecoveryMessage,
  ResultMessage,
  RetryMessage,
  StreamEvent,
  SystemInitMessage,
 } from "./types/protocol";
+import {
+  markMilestone,
+  measureSinceMilestone,
+  reportAllMilestones,
+} from "./utils/timing";

 // Maximum number of times to retry a turn when the backend
 // reports an `llm_api_error` stop reason. This helps smooth
@@ -169,6 +176,7 @@ export async function handleHeadlessCommand(
  }

  const client = await getClient();
+  markMilestone("HEADLESS_CLIENT_READY");

  // Check for --resume flag (interactive only)
  if (values.resume) {
@@ -462,6 +470,7 @@ export async function handleHeadlessCommand(
    console.error("No agent found. Use --new-agent to create a new agent.");
    process.exit(1);
  }
+  markMilestone("HEADLESS_AGENT_RESOLVED");

  // Check if we're resuming an existing agent (not creating a new one)
  const isResumingAgent = !!(
@@ -567,6 +576,7 @@ export async function handleHeadlessCommand(
    });
    conversationId = conversation.id;
  }
+  markMilestone("HEADLESS_CONVERSATION_READY");

  // Save session (agent + conversation) to both project and global settings
  // Skip for subagents - they shouldn't pollute the LRU settings
@@ -591,41 +601,33 @@ export async function handleHeadlessCommand(

  // Set agent context for tools that need it (e.g., Skill tool, Task tool)
  setAgentContext(agent.id, skillsDirectory);
-  await initializeLoadedSkillsFlag();

-  // Re-discover skills and update the skills memory block
+  // Fire-and-forget: Initialize loaded skills flag (LET-7101)
+  // Don't await - this is just for the skill unload reminder
+  initializeLoadedSkillsFlag().catch(() => {
+    // Ignore errors - not critical
+  });
+
+  // Fire-and-forget: Sync skills in background (LET-7101)
  // This ensures new skills added after agent creation are available
-  try {
-    const { discoverSkills, formatSkillsForMemory, SKILLS_DIR } = await import(
-      "./agent/skills"
-    );
-    const { join } = await import("node:path");
+  // Don't await - proceed to message sending immediately
+  (async () => {
+    try {
+      const { syncSkillsToAgent, SKILLS_DIR } = await import("./agent/skills");
+      const { join } = await import("node:path");

-    const resolvedSkillsDirectory =
-      skillsDirectory || join(process.cwd(), SKILLS_DIR);
-    const { skills, errors } = await discoverSkills(resolvedSkillsDirectory);
+      const resolvedSkillsDirectory =
+        skillsDirectory || join(process.cwd(), SKILLS_DIR);

-    if (errors.length > 0) {
-      console.warn("Errors encountered during skill discovery:");
-      for (const error of errors) {
-        console.warn(`  ${error.path}: ${error.message}`);
-      }
+      await syncSkillsToAgent(client, agent.id, resolvedSkillsDirectory, {
+        skipIfUnchanged: true,
+      });
+    } catch (error) {
+      console.warn(
+        `[skills] Background sync failed: ${error instanceof Error ? error.message : String(error)}`,
+      );
    }
-
-    // Update the skills memory block with freshly discovered skills
-    const formattedSkills = formatSkillsForMemory(
-      skills,
-      resolvedSkillsDirectory,
-    );
-    await client.agents.blocks.update("skills", {
-      agent_id: agent.id,
-      value: formattedSkills,
-    });
-  } catch (error) {
-    console.warn(
-      `Failed to update skills: ${error instanceof Error ? error.message : String(error)}`,
-    );
-  }
+  })();

  // Validate output format
  const outputFormat =
@@ -830,8 +832,11 @@ export async function handleHeadlessCommand(
    }
  };

-  // Clear any pending approvals before starting a new turn
-  await resolveAllPendingApprovals();
+  // Clear any pending approvals before starting a new turn - ONLY when resuming (LET-7101)
+  // For new agents/conversations, lazy recovery handles any edge cases
+  if (isResumingAgent) {
+    await resolveAllPendingApprovals();
+  }

  // Build message content with reminders (plan mode first, then skill unload)
  const { permissionMode } = await import("./permissions/mode");
@@ -865,6 +870,9 @@ export async function handleHeadlessCommand(
  let lastKnownRunId: string | null = null;
  let llmApiErrorRetries = 0;

+  markMilestone("HEADLESS_FIRST_STREAM_START");
+  measureSinceMilestone("headless-setup-total", "HEADLESS_CLIENT_READY");
+
  try {
    while (true) {
      const stream = await sendMessageStream(conversationId, currentInput);
@@ -922,8 +930,25 @@ export async function handleHeadlessCommand(
          }

          // Detect server conflict due to pending approval; handle it and retry
-          if (errorInfo?.message?.includes("Cannot send a new message")) {
-            // Don't emit this error; clear approvals and retry outer loop
+          // Check both detail and message fields since error formats vary
+          if (
+            isApprovalPendingError(errorInfo?.detail) ||
+            isApprovalPendingError(errorInfo?.message)
+          ) {
+            // Emit recovery message for stream-json mode (enables testing)
+            if (outputFormat === "stream-json") {
+              const recoveryMsg: RecoveryMessage = {
+                type: "recovery",
+                recovery_type: "approval_pending",
+                message:
+                  "Detected pending approval conflict; auto-denying stale approval and retrying",
+                run_id: lastRunId ?? undefined,
+                session_id: sessionId,
+                uuid: `recovery-${lastRunId || crypto.randomUUID()}`,
+              };
+              console.log(JSON.stringify(recoveryMsg));
+            }
+            // Clear approvals and retry outer loop
            await resolveAllPendingApprovals();
            // Reset state and restart turn
            stopReason = "error" as StopReasonType;
@@ -1541,6 +1566,10 @@ export async function handleHeadlessCommand(
    }
    console.log(resultText);
  }
+
+  // Report all milestones at the end for latency audit
+  markMilestone("HEADLESS_COMPLETE");
+  reportAllMilestones();
 }

 /**
--- a/src/index.ts
+++ b/src/index.ts
@@ -17,6 +17,7 @@ import { permissionMode } from "./permissions/mode";
 import { settingsManager } from "./settings-manager";
 import { telemetry } from "./telemetry";
 import { loadTools } from "./tools/manager";
+import { markMilestone } from "./utils/timing";

 // Stable empty array constants to prevent new references on every render
 // These are used as fallbacks when resumeData is null, avoiding the React
@@ -335,9 +336,12 @@ async function getPinnedAgentNames(): Promise<{ id: string; name: string }[]> {
 }

 async function main(): Promise<void> {
+  markMilestone("CLI_START");
+
  // Initialize settings manager (loads settings once into memory)
  await settingsManager.initialize();
  const settings = await settingsManager.getSettingsWithSecureTokens();
+  markMilestone("SETTINGS_LOADED");

  // Initialize LSP infrastructure for type checking
  if (process.env.LETTA_ENABLE_LSP) {
@@ -728,6 +732,7 @@ async function main(): Promise<void> {
  // Validate credentials by checking health endpoint
  const { validateCredentials } = await import("./auth/oauth");
  const isValid = await validateCredentials(baseURL, apiKey ?? "");
+  markMilestone("CREDENTIALS_VALIDATED");

  if (!isValid) {
    // For headless mode, error out with helpful message
@@ -829,18 +834,22 @@ async function main(): Promise<void> {
  }

  if (isHeadless) {
+    markMilestone("HEADLESS_MODE_START");
    // For headless mode, load tools synchronously (respecting model/toolset when provided)
    const modelForTools = getModelForToolLoading(
      specifiedModel,
      specifiedToolset as "codex" | "default" | undefined,
    );
    await loadTools(modelForTools);
+    markMilestone("TOOLS_LOADED");

    const { handleHeadlessCommand } = await import("./headless");
    await handleHeadlessCommand(process.argv, specifiedModel, skillsDirectory);
    return;
  }

+  markMilestone("TUI_MODE_START");
+
  // Enable enhanced key reporting (Shift+Enter, etc.) BEFORE Ink initializes.
  // In VS Code/xterm.js this typically requires a short handshake (query + enable).
  try {
@@ -853,6 +862,7 @@ async function main(): Promise<void> {
  }

  // Interactive: lazy-load React/Ink + App
+  markMilestone("REACT_IMPORT_START");
  const React = await import("react");
  const { render } = await import("ink");
  const { useState, useEffect } = React;
@@ -1441,42 +1451,35 @@ async function main(): Promise<void> {

        // Set agent context for tools that need it (e.g., Skill tool)
        setAgentContext(agent.id, skillsDirectory);
-        await initializeLoadedSkillsFlag();

-        // Re-discover skills and update the skills memory block
+        // Fire-and-forget: Initialize loaded skills flag (LET-7101)
+        // Don't await - this is just for the skill unload reminder
+        initializeLoadedSkillsFlag().catch(() => {
+          // Ignore errors - not critical
+        });
+
+        // Fire-and-forget: Sync skills in background (LET-7101)
        // This ensures new skills added after agent creation are available
-        try {
-          const { discoverSkills, formatSkillsForMemory, SKILLS_DIR } =
-            await import("./agent/skills");
-          const { join } = await import("node:path");
+        // Don't await - user can start typing immediately
+        (async () => {
+          try {
+            const { syncSkillsToAgent, SKILLS_DIR } = await import(
+              "./agent/skills"
+            );
+            const { join } = await import("node:path");

-          const resolvedSkillsDirectory =
-            skillsDirectory || join(process.cwd(), SKILLS_DIR);
-          const { skills, errors } = await discoverSkills(
-            resolvedSkillsDirectory,
-          );
+            const resolvedSkillsDirectory =
+              skillsDirectory || join(process.cwd(), SKILLS_DIR);

-          if (errors.length > 0) {
-            console.warn("Errors encountered during skill discovery:");
-            for (const error of errors) {
-              console.warn(`  ${error.path}: ${error.message}`);
-            }
+            await syncSkillsToAgent(client, agent.id, resolvedSkillsDirectory, {
+              skipIfUnchanged: true,
+            });
+          } catch (error) {
+            console.warn(
+              `[skills] Background sync failed: ${error instanceof Error ? error.message : String(error)}`,
+            );
          }
-
-          // Update the skills memory block with freshly discovered skills
-          const formattedSkills = formatSkillsForMemory(
-            skills,
-            resolvedSkillsDirectory,
-          );
-          await client.agents.blocks.update("skills", {
-            agent_id: agent.id,
-            value: formattedSkills,
-          });
-        } catch (error) {
-          console.warn(
-            `Failed to update skills: ${error instanceof Error ? error.message : String(error)}`,
-          );
-        }
+        })();

        // Check if we're resuming an existing agent
        // We're resuming if:
@@ -1797,6 +1800,7 @@ async function main(): Promise<void> {
    });
  }

+  markMilestone("REACT_RENDER_START");
  render(
    React.createElement(LoadingApp, {
      continueSession: shouldContinue,
--- a/src/tests/approval-recovery.test.ts
+++ b/src/tests/approval-recovery.test.ts
@@ -0,0 +1,249 @@
+import { describe, expect, test } from "bun:test";
+import type { Message } from "@letta-ai/letta-client/resources/agents/messages";
+import {
+  isApprovalPendingError,
+  isApprovalStateDesyncError,
+} from "../agent/approval-recovery";
+import { extractApprovals } from "../agent/check-approval";
+
+/**
+ * Tests for approval error detection helpers (LET-7101).
+ *
+ * These functions detect two opposite error conditions:
+ * 1. isApprovalStateDesyncError: Sent approval, but server has no pending approval
+ * 2. isApprovalPendingError: Sent user message, but server has pending approval waiting
+ */
+
+describe("isApprovalStateDesyncError", () => {
+  test("detects desync error in detail string", () => {
+    const detail = "No tool call is currently awaiting approval";
+    expect(isApprovalStateDesyncError(detail)).toBe(true);
+  });
+
+  test("detects desync error case-insensitively", () => {
+    const detail = "NO TOOL CALL IS CURRENTLY AWAITING APPROVAL";
+    expect(isApprovalStateDesyncError(detail)).toBe(true);
+  });
+
+  test("detects desync error in longer message", () => {
+    const detail =
+      "Error: No tool call is currently awaiting approval. The approval request may have expired.";
+    expect(isApprovalStateDesyncError(detail)).toBe(true);
+  });
+
+  test("returns false for unrelated errors", () => {
+    expect(isApprovalStateDesyncError("Connection timeout")).toBe(false);
+    expect(isApprovalStateDesyncError("Internal server error")).toBe(false);
+  });
+
+  test("returns false for non-string input", () => {
+    expect(isApprovalStateDesyncError(null)).toBe(false);
+    expect(isApprovalStateDesyncError(undefined)).toBe(false);
+    expect(isApprovalStateDesyncError(123)).toBe(false);
+    expect(isApprovalStateDesyncError({ error: "test" })).toBe(false);
+  });
+});
+
+describe("isApprovalPendingError", () => {
+  // This is the actual error format from the Letta backend (screenshot from LET-7101)
+  const REAL_ERROR_DETAIL =
+    "CONFLICT: Cannot send a new message: The agent is waiting for approval on a tool call. Please approve or deny the pending request before continuing.";
+
+  test("detects approval pending error in real error format", () => {
+    expect(isApprovalPendingError(REAL_ERROR_DETAIL)).toBe(true);
+  });
+
+  test("detects approval pending error case-insensitively", () => {
+    expect(isApprovalPendingError("CANNOT SEND A NEW MESSAGE")).toBe(true);
+    expect(isApprovalPendingError("cannot send a new message")).toBe(true);
+  });
+
+  test("detects partial match in longer message", () => {
+    const detail = "Error occurred: Cannot send a new message while processing";
+    expect(isApprovalPendingError(detail)).toBe(true);
+  });
+
+  test("returns false for desync errors (opposite case)", () => {
+    // These are the OPPOSITE error - when we send approval but there's nothing pending
+    expect(
+      isApprovalPendingError("No tool call is currently awaiting approval"),
+    ).toBe(false);
+  });
+
+  test("returns false for unrelated errors", () => {
+    expect(isApprovalPendingError("Connection timeout")).toBe(false);
+    expect(isApprovalPendingError("Rate limit exceeded")).toBe(false);
+    expect(isApprovalPendingError("Invalid API key")).toBe(false);
+  });
+
+  test("returns false for non-string input", () => {
+    expect(isApprovalPendingError(null)).toBe(false);
+    expect(isApprovalPendingError(undefined)).toBe(false);
+    expect(isApprovalPendingError(123)).toBe(false);
+    expect(isApprovalPendingError({ detail: REAL_ERROR_DETAIL })).toBe(false);
+  });
+});
+
+/**
+ * Tests for parallel tool call approval extraction.
+ * Ensures lazy recovery handles multiple simultaneous tool calls correctly.
+ */
+describe("extractApprovals", () => {
+  // Helper to create a minimal Message-like object for testing
+  // We use 'as Message' cast because the real Message type is complex
+  const createMessage = (overrides: {
+    tool_calls?: Array<{
+      tool_call_id?: string;
+      name?: string;
+      arguments?: string;
+    }>;
+    tool_call?: {
+      tool_call_id?: string;
+      name?: string;
+      arguments?: string;
+    };
+  }): Message =>
+    ({
+      id: "test-msg-id",
+      date: new Date().toISOString(),
+      message_type: "approval_request_message",
+      ...overrides,
+    }) as unknown as Message;
+
+  test("extracts single tool call from tool_calls array", () => {
+    const msg = createMessage({
+      tool_calls: [
+        {
+          tool_call_id: "call-1",
+          name: "Bash",
+          arguments: '{"command": "echo hello"}',
+        },
+      ],
+    });
+
+    const result = extractApprovals(msg);
+
+    expect(result.pendingApprovals).toHaveLength(1);
+    expect(result.pendingApprovals[0]!.toolCallId).toBe("call-1");
+    expect(result.pendingApprovals[0]!.toolName).toBe("Bash");
+    expect(result.pendingApproval?.toolCallId).toBe("call-1");
+  });
+
+  test("extracts multiple parallel tool calls", () => {
+    const msg = createMessage({
+      tool_calls: [
+        {
+          tool_call_id: "call-1",
+          name: "Bash",
+          arguments: '{"command": "echo hello"}',
+        },
+        {
+          tool_call_id: "call-2",
+          name: "web_search",
+          arguments: '{"query": "test"}',
+        },
+        {
+          tool_call_id: "call-3",
+          name: "Read",
+          arguments: '{"file_path": "/tmp/test.txt"}',
+        },
+      ],
+    });
+
+    const result = extractApprovals(msg);
+
+    expect(result.pendingApprovals).toHaveLength(3);
+    expect(result.pendingApprovals[0]!.toolCallId).toBe("call-1");
+    expect(result.pendingApprovals[0]!.toolName).toBe("Bash");
+    expect(result.pendingApprovals[1]!.toolCallId).toBe("call-2");
+    expect(result.pendingApprovals[1]!.toolName).toBe("web_search");
+    expect(result.pendingApprovals[2]!.toolCallId).toBe("call-3");
+    expect(result.pendingApprovals[2]!.toolName).toBe("Read");
+    // pendingApproval is deprecated, should be first item
+    expect(result.pendingApproval?.toolCallId).toBe("call-1");
+  });
+
+  test("handles deprecated single tool_call field", () => {
+    const msg = createMessage({
+      tool_call: {
+        tool_call_id: "call-legacy",
+        name: "Write",
+        arguments: '{"file_path": "/tmp/out.txt"}',
+      },
+    });
+
+    const result = extractApprovals(msg);
+
+    expect(result.pendingApprovals).toHaveLength(1);
+    expect(result.pendingApprovals[0]!.toolCallId).toBe("call-legacy");
+    expect(result.pendingApprovals[0]!.toolName).toBe("Write");
+  });
+
+  test("prefers tool_calls array over deprecated tool_call", () => {
+    const msg = createMessage({
+      tool_calls: [{ tool_call_id: "call-new", name: "Bash", arguments: "{}" }],
+      tool_call: {
+        tool_call_id: "call-old",
+        name: "Write",
+        arguments: "{}",
+      },
+    });
+
+    const result = extractApprovals(msg);
+
+    // Should use tool_calls, not tool_call
+    expect(result.pendingApprovals).toHaveLength(1);
+    expect(result.pendingApprovals[0]!.toolCallId).toBe("call-new");
+  });
+
+  test("filters out tool calls without tool_call_id", () => {
+    const msg = createMessage({
+      tool_calls: [
+        { tool_call_id: "call-valid", name: "Bash", arguments: "{}" },
+        { name: "Invalid", arguments: "{}" }, // Missing tool_call_id
+        { tool_call_id: "", name: "Empty", arguments: "{}" }, // Empty tool_call_id
+        { tool_call_id: "call-valid-2", name: "Read", arguments: "{}" },
+      ],
+    });
+
+    const result = extractApprovals(msg);
+
+    // Should only include entries with valid tool_call_id
+    expect(result.pendingApprovals).toHaveLength(2);
+    expect(result.pendingApprovals[0]!.toolCallId).toBe("call-valid");
+    expect(result.pendingApprovals[1]!.toolCallId).toBe("call-valid-2");
+  });
+
+  test("returns empty array when no tool calls present", () => {
+    const msg = createMessage({});
+
+    const result = extractApprovals(msg);
+
+    expect(result.pendingApprovals).toHaveLength(0);
+    expect(result.pendingApproval).toBeNull();
+  });
+
+  test("handles missing name and arguments gracefully", () => {
+    const msg = createMessage({
+      tool_calls: [{ tool_call_id: "call-minimal" }],
+    });
+
+    const result = extractApprovals(msg);
+
+    expect(result.pendingApprovals).toHaveLength(1);
+    expect(result.pendingApprovals[0]!.toolCallId).toBe("call-minimal");
+    expect(result.pendingApprovals[0]!.toolName).toBe("");
+    expect(result.pendingApprovals[0]!.toolArgs).toBe("");
+  });
+});
+
+/**
+ * Note: Full integration testing of lazy approval recovery requires:
+ * 1. Starting CLI without --yolo
+ * 2. Sending a prompt that triggers a tool call requiring approval
+ * 3. Instead of approving, sending another user message
+ * 4. Verifying the CONFLICT error is detected and recovery happens
+ *
+ * This is complex to automate reliably in unit tests.
+ * Manual testing or a dedicated integration test suite is recommended.
+ */
--- a/src/tests/lazy-approval-recovery.test.ts
+++ b/src/tests/lazy-approval-recovery.test.ts
@@ -0,0 +1,256 @@
+import { describe, expect, test } from "bun:test";
+import { spawn } from "node:child_process";
+
+/**
+ * Integration test for lazy approval recovery (LET-7101).
+ *
+ * NOTE: The lazy approval recovery is primarily designed for TUI mode where:
+ * 1. User has a session with pending approvals (e.g., from a previous run)
+ * 2. User sends a new message before responding to the approval
+ * 3. Server returns CONFLICT error
+ * 4. CLI recovers by auto-denying stale approvals and retrying
+ *
+ * In bidirectional mode, messages sent during permission wait are dropped
+ * (see headless.ts line 1710-1714), so we can't directly test the CONFLICT
+ * scenario here. This test validates that the flow doesn't crash when
+ * messages are sent while approvals are pending.
+ *
+ * The RecoveryMessage emission can be tested by:
+ * 1. Manual testing in TUI mode (start session with orphaned approval)
+ * 2. Or by modifying headless mode to not drop messages during permission wait
+ */
+
+// Prompt that will trigger a Bash tool call requiring approval
+const BASH_TRIGGER_PROMPT =
+  "Run this exact bash command: echo test123. Do not use any other tools.";
+
+// Second message to send while approval is pending
+const INTERRUPT_MESSAGE =
+  "Actually, just say OK instead. Do not call any tools.";
+
+interface StreamMessage {
+  type: string;
+  subtype?: string;
+  message_type?: string;
+  stop_reason?: string;
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  [key: string]: any;
+}
+
+/**
+ * Run bidirectional test with custom message handling.
+ * Allows sending messages at specific points in the flow.
+ */
+async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{
+  messages: StreamMessage[];
+  success: boolean;
+  errorSeen: boolean;
+}> {
+  return new Promise((resolve, reject) => {
+    const proc = spawn(
+      "bun",
+      [
+        "run",
+        "dev",
+        "-p",
+        "--input-format",
+        "stream-json",
+        "--output-format",
+        "stream-json",
+        "--new-agent",
+        "-m",
+        "haiku",
+        // NOTE: No --yolo flag - approvals are required
+      ],
+      {
+        cwd: process.cwd(),
+        env: { ...process.env },
+      },
+    );
+
+    const messages: StreamMessage[] = [];
+    let buffer = "";
+    let initReceived = false;
+    let approvalSeen = false;
+    let interruptSent = false;
+    let errorSeen = false;
+    let resultCount = 0;
+    let closing = false;
+
+    const timeout = setTimeout(() => {
+      if (!closing) {
+        proc.kill();
+        reject(new Error(`Test timed out after ${timeoutMs}ms`));
+      }
+    }, timeoutMs);
+
+    const cleanup = () => {
+      closing = true;
+      clearTimeout(timeout);
+      setTimeout(() => {
+        proc.stdin?.end();
+        proc.kill();
+      }, 500);
+    };
+
+    const processLine = (line: string) => {
+      if (!line.trim()) return;
+      try {
+        const msg: StreamMessage = JSON.parse(line);
+        messages.push(msg);
+
+        // Debug output
+        if (process.env.DEBUG_TEST) {
+          console.log("MSG:", JSON.stringify(msg, null, 2));
+        }
+
+        // Step 1: Wait for init, then send bash trigger prompt
+        if (msg.type === "system" && msg.subtype === "init" && !initReceived) {
+          initReceived = true;
+          const userMsg = JSON.stringify({
+            type: "user",
+            message: { role: "user", content: BASH_TRIGGER_PROMPT },
+          });
+          proc.stdin?.write(`${userMsg}\n`);
+          return;
+        }
+
+        // Step 2: When we see approval request, send another user message instead
+        if (
+          msg.type === "message" &&
+          msg.message_type === "approval_request_message" &&
+          !approvalSeen
+        ) {
+          approvalSeen = true;
+          // Wait a moment, then send interrupt message (NOT an approval)
+          setTimeout(() => {
+            if (!interruptSent) {
+              interruptSent = true;
+              const userMsg = JSON.stringify({
+                type: "user",
+                message: { role: "user", content: INTERRUPT_MESSAGE },
+              });
+              proc.stdin?.write(`${userMsg}\n`);
+            }
+          }, 500);
+          return;
+        }
+
+        // Track recovery messages - this is the key signal that lazy recovery worked
+        if (
+          msg.type === "recovery" &&
+          msg.recovery_type === "approval_pending"
+        ) {
+          errorSeen = true; // reusing this flag to mean "recovery message seen"
+        }
+
+        // Also track raw errors (shouldn't see these if recovery works properly)
+        if (
+          msg.type === "error" ||
+          (msg.type === "message" && msg.message_type === "error_message")
+        ) {
+          const detail = msg.detail || msg.message || "";
+          if (detail.toLowerCase().includes("cannot send a new message")) {
+            // Raw error leaked through - recovery may have failed
+            console.log(
+              "WARNING: Raw CONFLICT error seen (recovery may have failed)",
+            );
+          }
+        }
+
+        // Track results - we need 2 (one for each user message, though first may fail)
+        if (msg.type === "result") {
+          resultCount++;
+          // After second result (or after seeing error + result), we're done
+          if (resultCount >= 2 || (errorSeen && resultCount >= 1)) {
+            cleanup();
+            resolve({ messages, success: true, errorSeen });
+          }
+        }
+      } catch {
+        // Not valid JSON, ignore
+      }
+    };
+
+    proc.stdout?.on("data", (data) => {
+      buffer += data.toString();
+      const lines = buffer.split("\n");
+      buffer = lines.pop() || "";
+      for (const line of lines) {
+        processLine(line);
+      }
+    });
+
+    let _stderr = "";
+    proc.stderr?.on("data", (data) => {
+      _stderr += data.toString();
+    });
+
+    proc.on("close", (_code) => {
+      clearTimeout(timeout);
+      // Process any remaining buffer
+      if (buffer.trim()) {
+        processLine(buffer);
+      }
+
+      if (!closing) {
+        // If we got here without resolving, check what we have
+        resolve({
+          messages,
+          success: resultCount > 0,
+          errorSeen,
+        });
+      }
+    });
+
+    proc.on("error", (err) => {
+      clearTimeout(timeout);
+      reject(err);
+    });
+  });
+}
+
+describe("lazy approval recovery", () => {
+  test("handles concurrent message while approval is pending", async () => {
+    const result = await runLazyRecoveryTest();
+
+    // Log messages for debugging if test fails
+    if (!result.success) {
+      console.log("All messages received:");
+      for (const msg of result.messages) {
+        console.log(JSON.stringify(msg, null, 2));
+      }
+    }
+
+    // We should have seen the approval request (proves tool requiring approval was called)
+    const approvalRequest = result.messages.find(
+      (m) => m.message_type === "approval_request_message",
+    );
+    expect(approvalRequest).toBeDefined();
+
+    // The test should complete successfully
+    expect(result.success).toBe(true);
+
+    // Count results - we should get at least 1 (the second message should always complete)
+    const resultCount = result.messages.filter(
+      (m) => m.type === "result",
+    ).length;
+    expect(resultCount).toBeGreaterThanOrEqual(1);
+
+    // KEY ASSERTION: Check if we saw the recovery message
+    // This proves the lazy recovery mechanism was triggered
+    const recoveryMessage = result.messages.find(
+      (m) => m.type === "recovery" && m.recovery_type === "approval_pending",
+    );
+    if (recoveryMessage) {
+      console.log("Recovery message detected - lazy recovery worked correctly");
+      expect(result.errorSeen).toBe(true); // Should have been set when we saw recovery
+    } else {
+      // Recovery might not be triggered if approval was auto-handled before second message
+      // This can happen due to timing - the test still validates the flow works
+      console.log(
+        "Note: No recovery message seen - approval may have been handled before conflict",
+      );
+    }
+  }, 180000); // 3 minute timeout for CI
+});
--- a/src/tools/impl/Skill.ts
+++ b/src/tools/impl/Skill.ts
@@ -149,7 +149,7 @@ async function readSkillContent(
  try {
    const content = await readFile(projectSkillPath, "utf-8");
    return { content, path: projectSkillPath };
-  } catch (primaryError) {
+  } catch {
    // Fallback: check for bundled skills in a repo-level skills directory (legacy)
    try {
      const bundledSkillsDir = join(process.cwd(), "skills", "skills");
@@ -157,8 +157,11 @@ async function readSkillContent(
      const content = await readFile(bundledSkillPath, "utf-8");
      return { content, path: bundledSkillPath };
    } catch {
-      // If all fallbacks fail, rethrow the original error
-      throw primaryError;
+      // If all fallbacks fail, throw a helpful error message (LET-7101)
+      // Suggest refresh in case skills sync is still running in background
+      throw new Error(
+        `Skill "${skillId}" not found. If you recently added this skill, try Skill({ command: "refresh" }) to re-scan the skills directory.`,
+      );
    }
  }
 }
--- a/src/types/protocol.ts
+++ b/src/types/protocol.ts
@@ -198,6 +198,19 @@ export interface RetryMessage extends MessageEnvelope {
  run_id?: string;
 }

+/**
+ * Recovery message emitted when the CLI detects and recovers from errors.
+ * Used for approval state conflicts and other recoverable errors.
+ */
+export interface RecoveryMessage extends MessageEnvelope {
+  type: "recovery";
+  /** Type of recovery performed */
+  recovery_type: "approval_pending" | "approval_desync";
+  /** Human-readable description of what happened */
+  message: string;
+  run_id?: string;
+}
+
 // ═══════════════════════════════════════════════════════════════
 // RESULT
 // ═══════════════════════════════════════════════════════════════
@@ -329,6 +342,7 @@ export type WireMessage =
  | AutoApprovalMessage
  | ErrorMessage
  | RetryMessage
+  | RecoveryMessage
  | ResultMessage
  | ControlResponse
  | ControlRequest; // CLI → SDK control requests (e.g., can_use_tool)
--- a/src/utils/timing.ts
+++ b/src/utils/timing.ts
@@ -34,6 +34,112 @@ export function logTiming(message: string): void {
  }
 }

+// ============================================================================
+// Milestone tracking for latency audits
+// ============================================================================
+
+// Store milestones with their timestamps (ms since process start via performance.now())
+const milestones: Map<string, number> = new Map();
+
+// Reference time for relative measurements (set on first milestone)
+let firstMilestoneTime: number | null = null;
+
+/**
+ * Mark a named milestone in the boot/execution sequence.
+ * Call this at key points to track where time is spent.
+ *
+ * @param name - Descriptive name like "SETTINGS_LOADED" or "AGENT_RESOLVED"
+ */
+export function markMilestone(name: string): void {
+  const now = performance.now();
+  milestones.set(name, now);
+
+  if (firstMilestoneTime === null) {
+    firstMilestoneTime = now;
+  }
+
+  if (isTimingsEnabled()) {
+    const relative = now - firstMilestoneTime;
+    console.error(
+      `[timing] MILESTONE ${name} at +${formatDuration(relative)} (${formatTimestamp(new Date())})`,
+    );
+  }
+}
+
+/**
+ * Measure time elapsed since a previous milestone.
+ *
+ * @param label - Description of what we're measuring (e.g., "tool loading")
+ * @param fromMilestone - Name of the starting milestone
+ */
+export function measureSinceMilestone(
+  label: string,
+  fromMilestone: string,
+): void {
+  if (!isTimingsEnabled()) return;
+
+  const startTime = milestones.get(fromMilestone);
+  if (startTime === undefined) {
+    console.error(
+      `[timing] WARNING: milestone "${fromMilestone}" not found for measurement "${label}"`,
+    );
+    return;
+  }
+
+  const duration = performance.now() - startTime;
+  console.error(`[timing] ${label}: ${formatDuration(duration)}`);
+}
+
+/**
+ * Get the duration between two milestones in milliseconds.
+ * Returns null if either milestone doesn't exist.
+ */
+export function getMilestoneDuration(
+  fromMilestone: string,
+  toMilestone: string,
+): number | null {
+  const start = milestones.get(fromMilestone);
+  const end = milestones.get(toMilestone);
+  if (start === undefined || end === undefined) return null;
+  return end - start;
+}
+
+/**
+ * Print a summary of all milestones with relative timestamps.
+ * Useful at the end of a benchmark run.
+ */
+export function reportAllMilestones(): void {
+  if (!isTimingsEnabled() || milestones.size === 0) return;
+
+  const first = firstMilestoneTime ?? 0;
+
+  console.error(`[timing] ======== MILESTONE SUMMARY ========`);
+
+  // Sort by timestamp
+  const sorted = [...milestones.entries()].sort((a, b) => a[1] - b[1]);
+
+  let prevTime = first;
+  for (const [name, time] of sorted) {
+    const relativeToStart = time - first;
+    const delta = time - prevTime;
+    const deltaStr = prevTime === first ? "" : ` (+${formatDuration(delta)})`;
+    console.error(
+      `[timing]   +${formatDuration(relativeToStart).padStart(8)} ${name}${deltaStr}`,
+    );
+    prevTime = time;
+  }
+
+  console.error(`[timing] =====================================`);
+}
+
+/**
+ * Clear all milestones (useful for running multiple benchmarks in sequence).
+ */
+export function clearMilestones(): void {
+  milestones.clear();
+  firstMilestoneTime = null;
+}
+
 // Simple fetch type that matches the SDK's expected signature
 type SimpleFetch = (
  input: string | URL | Request,