feat: reduce time-to-boot, remove default eager approval checks on inputs, auto-cancel stale approvals (#579)
Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
341
scripts/latency-benchmark.ts
Normal file
341
scripts/latency-benchmark.ts
Normal file
@@ -0,0 +1,341 @@
|
||||
#!/usr/bin/env bun
|
||||
/**
|
||||
* Latency Benchmark Script for Letta Code CLI
|
||||
*
|
||||
* Runs headless mode with LETTA_DEBUG_TIMINGS=1 and parses the output
|
||||
* to measure latency breakdown at different stages.
|
||||
*
|
||||
* Usage:
|
||||
* bun scripts/latency-benchmark.ts
|
||||
* bun scripts/latency-benchmark.ts --scenario fresh-agent
|
||||
* bun scripts/latency-benchmark.ts --iterations 5
|
||||
*
|
||||
* Requires: LETTA_API_KEY environment variable
|
||||
*/
|
||||
|
||||
import { spawn } from "node:child_process";
|
||||
|
||||
interface ApiCall {
|
||||
method: string;
|
||||
path: string;
|
||||
durationMs: number;
|
||||
status?: number;
|
||||
}
|
||||
|
||||
interface Milestone {
|
||||
name: string;
|
||||
offsetMs: number;
|
||||
}
|
||||
|
||||
interface BenchmarkResult {
|
||||
scenario: string;
|
||||
totalMs: number;
|
||||
milestones: Milestone[];
|
||||
apiCalls: ApiCall[];
|
||||
exitCode: number;
|
||||
}
|
||||
|
||||
interface ScenarioConfig {
|
||||
name: string;
|
||||
description: string;
|
||||
args: string[];
|
||||
}
|
||||
|
||||
// Define benchmark scenarios
|
||||
const SCENARIOS: ScenarioConfig[] = [
|
||||
{
|
||||
name: "fresh-agent",
|
||||
description: "Create new agent and send simple prompt",
|
||||
args: [
|
||||
"-p",
|
||||
"What is 2+2? Reply with just the number.",
|
||||
"--new-agent",
|
||||
"--yolo",
|
||||
"--output-format",
|
||||
"json",
|
||||
],
|
||||
},
|
||||
{
|
||||
name: "resume-agent",
|
||||
description: "Resume last agent and send simple prompt",
|
||||
args: [
|
||||
"-p",
|
||||
"What is 3+3? Reply with just the number.",
|
||||
"--continue",
|
||||
"--yolo",
|
||||
"--output-format",
|
||||
"json",
|
||||
],
|
||||
},
|
||||
{
|
||||
name: "minimal-math",
|
||||
description: "Simple math question (no tool calls)",
|
||||
args: [
|
||||
"-p",
|
||||
"What is 5+5? Reply with just the number.",
|
||||
"--continue",
|
||||
"--yolo",
|
||||
"--output-format",
|
||||
"json",
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
/**
|
||||
* Parse timing logs from stderr output
|
||||
*/
|
||||
function parseTimingLogs(stderr: string): {
|
||||
milestones: Milestone[];
|
||||
apiCalls: ApiCall[];
|
||||
} {
|
||||
const milestones: Milestone[] = [];
|
||||
const apiCalls: ApiCall[] = [];
|
||||
|
||||
const lines = stderr.split("\n");
|
||||
|
||||
for (const line of lines) {
|
||||
// Parse milestones: [timing] MILESTONE CLI_START at +0ms (12:34:56.789)
|
||||
const milestoneMatch = line.match(
|
||||
/\[timing\] MILESTONE (\S+) at \+(\d+(?:\.\d+)?)(ms|s)/,
|
||||
);
|
||||
if (milestoneMatch) {
|
||||
const name = milestoneMatch[1]!;
|
||||
let offsetMs = parseFloat(milestoneMatch[2]!);
|
||||
if (milestoneMatch[3] === "s") {
|
||||
offsetMs *= 1000;
|
||||
}
|
||||
milestones.push({ name, offsetMs });
|
||||
continue;
|
||||
}
|
||||
|
||||
// Parse API calls: [timing] GET /v1/agents/... -> 245ms (status: 200)
|
||||
const apiMatch = line.match(
|
||||
/\[timing\] (GET|POST|PUT|DELETE|PATCH) (\S+) -> (\d+(?:\.\d+)?)(ms|s)(?: \(status: (\d+)\))?/,
|
||||
);
|
||||
if (apiMatch) {
|
||||
const method = apiMatch[1]!;
|
||||
const path = apiMatch[2]!;
|
||||
let durationMs = parseFloat(apiMatch[3]!);
|
||||
if (apiMatch[4] === "s") {
|
||||
durationMs *= 1000;
|
||||
}
|
||||
const status = apiMatch[5] ? parseInt(apiMatch[5], 10) : undefined;
|
||||
apiCalls.push({ method, path, durationMs, status });
|
||||
}
|
||||
}
|
||||
|
||||
return { milestones, apiCalls };
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a single benchmark scenario
|
||||
*/
|
||||
async function runBenchmark(scenario: ScenarioConfig): Promise<BenchmarkResult> {
|
||||
const start = performance.now();
|
||||
|
||||
return new Promise((resolve) => {
|
||||
const proc = spawn("bun", ["run", "dev", ...scenario.args], {
|
||||
env: { ...process.env, LETTA_DEBUG_TIMINGS: "1" },
|
||||
stdio: ["pipe", "pipe", "pipe"],
|
||||
});
|
||||
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
|
||||
proc.stdout.on("data", (data) => {
|
||||
stdout += data.toString();
|
||||
});
|
||||
|
||||
proc.stderr.on("data", (data) => {
|
||||
stderr += data.toString();
|
||||
});
|
||||
|
||||
proc.on("close", (code) => {
|
||||
const totalMs = performance.now() - start;
|
||||
const { milestones, apiCalls } = parseTimingLogs(stderr);
|
||||
|
||||
resolve({
|
||||
scenario: scenario.name,
|
||||
totalMs,
|
||||
milestones,
|
||||
apiCalls,
|
||||
exitCode: code ?? 1,
|
||||
});
|
||||
});
|
||||
|
||||
// Timeout after 2 minutes
|
||||
setTimeout(() => {
|
||||
proc.kill("SIGTERM");
|
||||
}, 120000);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Format duration for display
|
||||
*/
|
||||
function formatMs(ms: number): string {
|
||||
if (ms < 1000) return `${Math.round(ms)}ms`;
|
||||
return `${(ms / 1000).toFixed(2)}s`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Print benchmark results
|
||||
*/
|
||||
function printResults(results: BenchmarkResult[]): void {
|
||||
console.log("\n" + "=".repeat(70));
|
||||
console.log("LATENCY BENCHMARK RESULTS");
|
||||
console.log("=".repeat(70) + "\n");
|
||||
|
||||
for (const result of results) {
|
||||
const scenario = SCENARIOS.find((s) => s.name === result.scenario);
|
||||
console.log(`Scenario: ${result.scenario}`);
|
||||
console.log(` ${scenario?.description || ""}`);
|
||||
console.log(` Exit code: ${result.exitCode}`);
|
||||
console.log(` Total wall time: ${formatMs(result.totalMs)}`);
|
||||
console.log("");
|
||||
|
||||
// Print milestones
|
||||
if (result.milestones.length > 0) {
|
||||
console.log(" Milestones:");
|
||||
let prevMs = 0;
|
||||
for (const milestone of result.milestones) {
|
||||
const delta = milestone.offsetMs - prevMs;
|
||||
const deltaStr = prevMs === 0 ? "" : ` (+${formatMs(delta)})`;
|
||||
console.log(
|
||||
` +${formatMs(milestone.offsetMs).padStart(8)} ${milestone.name}${deltaStr}`,
|
||||
);
|
||||
prevMs = milestone.offsetMs;
|
||||
}
|
||||
console.log("");
|
||||
}
|
||||
|
||||
// Print API calls summary
|
||||
if (result.apiCalls.length > 0) {
|
||||
console.log(" API Calls:");
|
||||
const totalApiMs = result.apiCalls.reduce((sum, c) => sum + c.durationMs, 0);
|
||||
|
||||
// Group by path pattern
|
||||
const grouped: Record<string, { count: number; totalMs: number }> = {};
|
||||
for (const call of result.apiCalls) {
|
||||
// Normalize paths (remove UUIDs)
|
||||
const normalizedPath = call.path.replace(
|
||||
/[a-f0-9-]{36}/g,
|
||||
"{id}",
|
||||
);
|
||||
const key = `${call.method} ${normalizedPath}`;
|
||||
if (!grouped[key]) {
|
||||
grouped[key] = { count: 0, totalMs: 0 };
|
||||
}
|
||||
grouped[key].count++;
|
||||
grouped[key].totalMs += call.durationMs;
|
||||
}
|
||||
|
||||
// Sort by total time
|
||||
const sorted = Object.entries(grouped).sort(
|
||||
(a, b) => b[1].totalMs - a[1].totalMs,
|
||||
);
|
||||
|
||||
for (const [endpoint, stats] of sorted) {
|
||||
const countStr = stats.count > 1 ? ` (x${stats.count})` : "";
|
||||
console.log(
|
||||
` ${formatMs(stats.totalMs).padStart(8)} ${endpoint}${countStr}`,
|
||||
);
|
||||
}
|
||||
|
||||
console.log(` ${"─".repeat(50)}`);
|
||||
console.log(` ${formatMs(totalApiMs).padStart(8)} Total API time`);
|
||||
console.log(
|
||||
` ${formatMs(result.totalMs - totalApiMs).padStart(8)} CLI overhead (non-API)`,
|
||||
);
|
||||
}
|
||||
|
||||
console.log("\n" + "-".repeat(70) + "\n");
|
||||
}
|
||||
|
||||
// Summary table
|
||||
console.log("SUMMARY");
|
||||
console.log("-".repeat(70));
|
||||
console.log(
|
||||
"Scenario".padEnd(20) +
|
||||
"Total".padStart(12) +
|
||||
"API Time".padStart(12) +
|
||||
"CLI Overhead".padStart(14),
|
||||
);
|
||||
console.log("-".repeat(70));
|
||||
|
||||
for (const result of results) {
|
||||
const totalApiMs = result.apiCalls.reduce((sum, c) => sum + c.durationMs, 0);
|
||||
const cliOverhead = result.totalMs - totalApiMs;
|
||||
console.log(
|
||||
result.scenario.padEnd(20) +
|
||||
formatMs(result.totalMs).padStart(12) +
|
||||
formatMs(totalApiMs).padStart(12) +
|
||||
formatMs(cliOverhead).padStart(14),
|
||||
);
|
||||
}
|
||||
console.log("-".repeat(70));
|
||||
}
|
||||
|
||||
async function main(): Promise<void> {
|
||||
// Parse args
|
||||
const args = process.argv.slice(2);
|
||||
let scenarioFilter: string | null = null;
|
||||
let iterations = 1;
|
||||
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
if (args[i] === "--scenario" && args[i + 1]) {
|
||||
scenarioFilter = args[++i]!;
|
||||
} else if (args[i] === "--iterations" && args[i + 1]) {
|
||||
iterations = parseInt(args[++i]!, 10);
|
||||
}
|
||||
}
|
||||
|
||||
// Check prereqs
|
||||
if (!process.env.LETTA_API_KEY) {
|
||||
console.error("Error: LETTA_API_KEY environment variable is required");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Filter scenarios
|
||||
const scenariosToRun = scenarioFilter
|
||||
? SCENARIOS.filter((s) => s.name === scenarioFilter)
|
||||
: SCENARIOS;
|
||||
|
||||
if (scenariosToRun.length === 0) {
|
||||
console.error(`Error: Unknown scenario "${scenarioFilter}"`);
|
||||
console.error(`Available scenarios: ${SCENARIOS.map((s) => s.name).join(", ")}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log("Running latency benchmarks...");
|
||||
console.log(`Scenarios: ${scenariosToRun.map((s) => s.name).join(", ")}`);
|
||||
console.log(`Iterations: ${iterations}`);
|
||||
console.log("");
|
||||
|
||||
const allResults: BenchmarkResult[] = [];
|
||||
|
||||
for (let iter = 0; iter < iterations; iter++) {
|
||||
if (iterations > 1) {
|
||||
console.log(`\n--- Iteration ${iter + 1} of ${iterations} ---`);
|
||||
}
|
||||
|
||||
for (const scenario of scenariosToRun) {
|
||||
console.log(`Running: ${scenario.name}...`);
|
||||
const result = await runBenchmark(scenario);
|
||||
allResults.push(result);
|
||||
|
||||
if (result.exitCode !== 0) {
|
||||
console.warn(` Warning: ${scenario.name} exited with code ${result.exitCode}`);
|
||||
} else {
|
||||
console.log(` Completed in ${formatMs(result.totalMs)}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
printResults(allResults);
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
});
|
||||
@@ -2,9 +2,14 @@ import type { MessageCreate } from "@letta-ai/letta-client/resources/agents/agen
|
||||
import { getClient } from "./client";
|
||||
import { APPROVAL_RECOVERY_PROMPT } from "./promptAssets";
|
||||
|
||||
// Error when trying to SEND approval but server has no pending approval
|
||||
const APPROVAL_RECOVERY_DETAIL_FRAGMENT =
|
||||
"no tool call is currently awaiting approval";
|
||||
|
||||
// Error when trying to SEND message but server has pending approval waiting
|
||||
// This is the CONFLICT error - opposite of desync
|
||||
const APPROVAL_PENDING_DETAIL_FRAGMENT = "cannot send a new message";
|
||||
|
||||
type RunErrorMetadata =
|
||||
| {
|
||||
error_type?: string;
|
||||
@@ -20,6 +25,19 @@ export function isApprovalStateDesyncError(detail: unknown): boolean {
|
||||
return detail.toLowerCase().includes(APPROVAL_RECOVERY_DETAIL_FRAGMENT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if error indicates there's a pending approval blocking new messages.
|
||||
* This is the CONFLICT error from the backend when trying to send a user message
|
||||
* while the agent is waiting for approval on a tool call.
|
||||
*
|
||||
* Error format:
|
||||
* { detail: "CONFLICT: Cannot send a new message: The agent is waiting for approval..." }
|
||||
*/
|
||||
export function isApprovalPendingError(detail: unknown): boolean {
|
||||
if (typeof detail !== "string") return false;
|
||||
return detail.toLowerCase().includes(APPROVAL_PENDING_DETAIL_FRAGMENT);
|
||||
}
|
||||
|
||||
export async function fetchRunErrorDetail(
|
||||
runId: string | null | undefined,
|
||||
): Promise<string | null> {
|
||||
|
||||
@@ -29,8 +29,9 @@ export interface ResumeData {
|
||||
|
||||
/**
|
||||
* Extract approval requests from an approval_request_message.
|
||||
* Exported for testing parallel tool call handling.
|
||||
*/
|
||||
function extractApprovals(messageToCheck: Message): {
|
||||
export function extractApprovals(messageToCheck: Message): {
|
||||
pendingApproval: ApprovalRequest | null;
|
||||
pendingApprovals: ApprovalRequest[];
|
||||
} {
|
||||
|
||||
@@ -271,26 +271,14 @@ export async function createAgent(
|
||||
// Track provenance: which blocks were created
|
||||
// Note: We no longer reuse shared blocks - each agent gets fresh blocks
|
||||
const blockProvenance: BlockProvenance[] = [];
|
||||
const blockIds: string[] = [];
|
||||
|
||||
// Create all blocks fresh for the new agent
|
||||
// Mark new blocks for provenance tracking (actual creation happens in agents.create)
|
||||
for (const block of filteredMemoryBlocks) {
|
||||
try {
|
||||
const createdBlock = await client.blocks.create(block);
|
||||
if (!createdBlock.id) {
|
||||
throw new Error(`Created block ${block.label} has no ID`);
|
||||
}
|
||||
blockIds.push(createdBlock.id);
|
||||
blockProvenance.push({ label: block.label, source: "new" });
|
||||
} catch (error) {
|
||||
console.error(`Failed to create block ${block.label}:`, error);
|
||||
throw error;
|
||||
}
|
||||
blockProvenance.push({ label: block.label, source: "new" });
|
||||
}
|
||||
|
||||
// Add any referenced block IDs (existing blocks to attach)
|
||||
// Mark referenced blocks for provenance tracking
|
||||
for (const blockId of referencedBlockIds) {
|
||||
blockIds.push(blockId);
|
||||
blockProvenance.push({ label: blockId, source: "shared" });
|
||||
}
|
||||
|
||||
@@ -314,7 +302,9 @@ export async function createAgent(
|
||||
systemPromptContent = `${systemPromptContent}\n\n${options.systemPromptAppend}`;
|
||||
}
|
||||
|
||||
// Create agent with all block IDs (existing + newly created)
|
||||
// Create agent with inline memory blocks (LET-7101: single API call instead of N+1)
|
||||
// - memory_blocks: new blocks to create inline
|
||||
// - block_ids: references to existing blocks (for shared memory)
|
||||
const tags = ["origin:letta-code"];
|
||||
if (process.env.LETTA_CODE_AGENT_ROLE === "subagent") {
|
||||
tags.push("role:subagent");
|
||||
@@ -332,7 +322,11 @@ export async function createAgent(
|
||||
model: modelHandle,
|
||||
context_window_limit: contextWindow,
|
||||
tools: toolNames,
|
||||
block_ids: blockIds,
|
||||
// New blocks created inline with agent (saves ~2s of sequential API calls)
|
||||
memory_blocks:
|
||||
filteredMemoryBlocks.length > 0 ? filteredMemoryBlocks : undefined,
|
||||
// Referenced block IDs (existing blocks to attach)
|
||||
block_ids: referencedBlockIds.length > 0 ? referencedBlockIds : undefined,
|
||||
tags,
|
||||
// should be default off, but just in case
|
||||
include_base_tools: false,
|
||||
|
||||
@@ -486,3 +486,125 @@ export function formatSkillsForMemory(
|
||||
// Otherwise fall back to compact tree format
|
||||
return formatSkillsAsTree(skills, skillsDirectory);
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Skills Sync with Hash-Based Caching (Phase 2.5 - LET-7101)
|
||||
// ============================================================================
|
||||
|
||||
import { createHash } from "node:crypto";
|
||||
import { mkdir, writeFile } from "node:fs/promises";
|
||||
|
||||
/**
|
||||
* Get the project-local skills hash file path.
|
||||
* Uses .letta/skills-hash.json in the current working directory
|
||||
* because the skills block content depends on the project's .skills/ folder.
|
||||
*/
|
||||
function getSkillsHashFilePath(): string {
|
||||
return join(process.cwd(), ".letta", "skills-hash.json");
|
||||
}
|
||||
|
||||
interface SkillsHashCache {
|
||||
hash: string;
|
||||
timestamp: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute a hash of the formatted skills content
|
||||
*/
|
||||
function computeSkillsHash(content: string): string {
|
||||
return createHash("sha256").update(content).digest("hex").slice(0, 16);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the cached skills hash (if any)
|
||||
*/
|
||||
async function getCachedSkillsHash(): Promise<string | null> {
|
||||
try {
|
||||
const hashFile = getSkillsHashFilePath();
|
||||
const data = await readFile(hashFile, "utf-8");
|
||||
const cache: SkillsHashCache = JSON.parse(data);
|
||||
return cache.hash;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the cached skills hash
|
||||
*/
|
||||
async function setCachedSkillsHash(hash: string): Promise<void> {
|
||||
try {
|
||||
const hashFile = getSkillsHashFilePath();
|
||||
// Ensure project .letta directory exists
|
||||
const lettaDir = join(process.cwd(), ".letta");
|
||||
await mkdir(lettaDir, { recursive: true });
|
||||
|
||||
const cache: SkillsHashCache = {
|
||||
hash,
|
||||
timestamp: new Date().toISOString(),
|
||||
};
|
||||
await writeFile(hashFile, JSON.stringify(cache, null, 2));
|
||||
} catch {
|
||||
// Ignore cache write failures - not critical
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sync skills to an agent's memory block.
|
||||
* Discovers skills from filesystem and updates the skills block.
|
||||
*
|
||||
* @param client - Letta client
|
||||
* @param agentId - Agent ID to update
|
||||
* @param skillsDirectory - Path to project skills directory
|
||||
* @param options - Optional settings
|
||||
* @returns Object indicating if sync occurred and discovered skills
|
||||
*/
|
||||
export async function syncSkillsToAgent(
|
||||
client: import("@letta-ai/letta-client").default,
|
||||
agentId: string,
|
||||
skillsDirectory: string,
|
||||
options?: { skipIfUnchanged?: boolean },
|
||||
): Promise<{ synced: boolean; skills: Skill[] }> {
|
||||
// Discover skills from filesystem
|
||||
const { skills, errors } = await discoverSkills(skillsDirectory);
|
||||
|
||||
if (errors.length > 0) {
|
||||
for (const error of errors) {
|
||||
console.warn(`[skills] Discovery error: ${error.path}: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Format skills for memory block
|
||||
const formattedSkills = formatSkillsForMemory(skills, skillsDirectory);
|
||||
|
||||
// Check if we can skip the update
|
||||
if (options?.skipIfUnchanged) {
|
||||
const newHash = computeSkillsHash(formattedSkills);
|
||||
const cachedHash = await getCachedSkillsHash();
|
||||
|
||||
if (newHash === cachedHash) {
|
||||
return { synced: false, skills };
|
||||
}
|
||||
|
||||
// Update the block and cache the new hash
|
||||
await client.agents.blocks.update("skills", {
|
||||
agent_id: agentId,
|
||||
value: formattedSkills,
|
||||
});
|
||||
await setCachedSkillsHash(newHash);
|
||||
|
||||
return { synced: true, skills };
|
||||
}
|
||||
|
||||
// No skip option - always update
|
||||
await client.agents.blocks.update("skills", {
|
||||
agent_id: agentId,
|
||||
value: formattedSkills,
|
||||
});
|
||||
|
||||
// Update hash cache for future runs
|
||||
const newHash = computeSkillsHash(formattedSkills);
|
||||
await setCachedSkillsHash(newHash);
|
||||
|
||||
return { synced: true, skills };
|
||||
}
|
||||
|
||||
160
src/cli/App.tsx
160
src/cli/App.tsx
@@ -30,6 +30,7 @@ import {
|
||||
import {
|
||||
buildApprovalRecoveryMessage,
|
||||
fetchRunErrorDetail,
|
||||
isApprovalPendingError,
|
||||
isApprovalStateDesyncError,
|
||||
} from "../agent/approval-recovery";
|
||||
import { prefetchAvailableModelHandles } from "../agent/available-models";
|
||||
@@ -175,10 +176,10 @@ import { useTerminalRows, useTerminalWidth } from "./hooks/useTerminalWidth";
|
||||
const CLEAR_SCREEN_AND_HOME = "\u001B[2J\u001B[H";
|
||||
const MIN_RESIZE_DELTA = 2;
|
||||
|
||||
// Feature flag: Check for pending approvals before sending messages
|
||||
// This prevents infinite thinking state when there's an orphaned approval
|
||||
// Can be disabled if the latency check adds too much overhead
|
||||
const CHECK_PENDING_APPROVALS_BEFORE_SEND = true;
|
||||
// Eager approval checking is now CONDITIONAL (LET-7101):
|
||||
// - Enabled when resuming a session (--resume, --continue, or startupApprovals exist)
|
||||
// - Disabled for normal messages (lazy recovery handles edge cases)
|
||||
// This saves ~2s latency per message in the common case.
|
||||
|
||||
// Feature flag: Eagerly cancel streams client-side when user presses ESC
|
||||
// When true (default), immediately abort the stream after calling .cancel()
|
||||
@@ -678,6 +679,12 @@ export default function App({
|
||||
>(null);
|
||||
const toolAbortControllerRef = useRef<AbortController | null>(null);
|
||||
|
||||
// Eager approval checking: only enabled when resuming a session (LET-7101)
|
||||
// After first successful message, we disable it since any new approvals are from our own turn
|
||||
const [needsEagerApprovalCheck, setNeedsEagerApprovalCheck] = useState(
|
||||
() => resumedExistingConversation || startupApprovals.length > 0,
|
||||
);
|
||||
|
||||
// Track auto-handled results to combine with user decisions
|
||||
const [autoHandledResults, setAutoHandledResults] = useState<
|
||||
Array<{
|
||||
@@ -1941,6 +1948,12 @@ export default function App({
|
||||
setStreaming(false);
|
||||
llmApiErrorRetriesRef.current = 0; // Reset retry counter on success
|
||||
|
||||
// Disable eager approval check after first successful message (LET-7101)
|
||||
// Any new approvals from here on are from our own turn, not orphaned
|
||||
if (needsEagerApprovalCheck) {
|
||||
setNeedsEagerApprovalCheck(false);
|
||||
}
|
||||
|
||||
// Send desktop notification when turn completes
|
||||
// and we're not about to auto-send another queued message
|
||||
if (!waitingForQueueCancelRef.current) {
|
||||
@@ -2552,6 +2565,112 @@ export default function App({
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for approval pending error (sent user message while approval waiting)
|
||||
// This is the lazy recovery path for when needsEagerApprovalCheck is false
|
||||
const approvalPendingDetected =
|
||||
isApprovalPendingError(detailFromRun) ||
|
||||
isApprovalPendingError(latestErrorText);
|
||||
|
||||
if (
|
||||
!hasApprovalInPayload &&
|
||||
approvalPendingDetected &&
|
||||
llmApiErrorRetriesRef.current < LLM_API_ERROR_MAX_RETRIES
|
||||
) {
|
||||
llmApiErrorRetriesRef.current += 1;
|
||||
|
||||
// Log for debugging (visible in transcripts)
|
||||
const statusId = uid("status");
|
||||
buffersRef.current.byId.set(statusId, {
|
||||
kind: "status",
|
||||
id: statusId,
|
||||
lines: [
|
||||
"[LAZY RECOVERY] Detected CONFLICT: server has pending approval",
|
||||
"[LAZY RECOVERY] Fetching stale approvals to auto-deny...",
|
||||
],
|
||||
});
|
||||
buffersRef.current.order.push(statusId);
|
||||
refreshDerived();
|
||||
|
||||
try {
|
||||
// Fetch pending approvals and auto-deny them
|
||||
const client = await getClient();
|
||||
const agent = await client.agents.retrieve(agentIdRef.current);
|
||||
const { pendingApprovals: existingApprovals } =
|
||||
await getResumeData(client, agent, conversationIdRef.current);
|
||||
|
||||
if (existingApprovals && existingApprovals.length > 0) {
|
||||
// Update status with details
|
||||
buffersRef.current.byId.set(statusId, {
|
||||
kind: "status",
|
||||
id: statusId,
|
||||
lines: [
|
||||
"[LAZY RECOVERY] Detected CONFLICT: server has pending approval",
|
||||
`[LAZY RECOVERY] Found ${existingApprovals.length} stale approval(s):`,
|
||||
...existingApprovals.map(
|
||||
(a) =>
|
||||
` - ${a.toolName} (${a.toolCallId.slice(0, 8)}...)`,
|
||||
),
|
||||
"[LAZY RECOVERY] Auto-denying and batching with user message...",
|
||||
],
|
||||
});
|
||||
refreshDerived();
|
||||
|
||||
// Create denial results for all stale approvals
|
||||
// Use the same format as handleCancelApprovals (lines 6390-6395)
|
||||
const denialResults = existingApprovals.map((approval) => ({
|
||||
type: "approval" as const,
|
||||
tool_call_id: approval.toolCallId,
|
||||
approve: false,
|
||||
reason:
|
||||
"Auto-denied: stale approval from interrupted session",
|
||||
}));
|
||||
|
||||
// Prepend approval denials to the current input (keeps user message)
|
||||
const approvalPayload: ApprovalCreate = {
|
||||
type: "approval",
|
||||
approvals: denialResults,
|
||||
};
|
||||
currentInput.unshift(approvalPayload);
|
||||
} else {
|
||||
// No approvals found - server state may have cleared
|
||||
buffersRef.current.byId.set(statusId, {
|
||||
kind: "status",
|
||||
id: statusId,
|
||||
lines: [
|
||||
"[LAZY RECOVERY] Detected CONFLICT but no pending approvals found",
|
||||
"[LAZY RECOVERY] Retrying message...",
|
||||
],
|
||||
});
|
||||
refreshDerived();
|
||||
}
|
||||
} catch (_recoveryError) {
|
||||
// If we can't fetch approvals, just retry the original message
|
||||
buffersRef.current.byId.set(statusId, {
|
||||
kind: "status",
|
||||
id: statusId,
|
||||
lines: [
|
||||
"[LAZY RECOVERY] Failed to fetch pending approvals",
|
||||
"[LAZY RECOVERY] Retrying message anyway...",
|
||||
],
|
||||
});
|
||||
refreshDerived();
|
||||
}
|
||||
|
||||
// Brief pause so user can see the status
|
||||
await new Promise((resolve) => setTimeout(resolve, 500));
|
||||
|
||||
// Remove the transient status
|
||||
buffersRef.current.byId.delete(statusId);
|
||||
buffersRef.current.order = buffersRef.current.order.filter(
|
||||
(id) => id !== statusId,
|
||||
);
|
||||
refreshDerived();
|
||||
|
||||
// Reset interrupted flag so retry stream chunks are processed
|
||||
buffersRef.current.interrupted = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if this is a retriable error (transient LLM API error)
|
||||
const retriable = await isRetriableError(
|
||||
stopReasonToHandle,
|
||||
@@ -2757,6 +2876,7 @@ export default function App({
|
||||
setStreaming,
|
||||
currentModelId,
|
||||
updateStreamingOutput,
|
||||
needsEagerApprovalCheck,
|
||||
],
|
||||
);
|
||||
|
||||
@@ -3278,7 +3398,8 @@ export default function App({
|
||||
const checkPendingApprovalsForSlashCommand = useCallback(async (): Promise<
|
||||
{ blocked: true } | { blocked: false }
|
||||
> => {
|
||||
if (!CHECK_PENDING_APPROVALS_BEFORE_SEND) {
|
||||
// Only check eagerly when resuming a session (LET-7101)
|
||||
if (!needsEagerApprovalCheck) {
|
||||
return { blocked: false };
|
||||
}
|
||||
|
||||
@@ -3425,7 +3546,13 @@ export default function App({
|
||||
// If check fails, proceed anyway (don't block user)
|
||||
return { blocked: false };
|
||||
}
|
||||
}, [agentId, processConversation, refreshDerived, updateStreamingOutput]);
|
||||
}, [
|
||||
agentId,
|
||||
processConversation,
|
||||
refreshDerived,
|
||||
updateStreamingOutput,
|
||||
needsEagerApprovalCheck,
|
||||
]);
|
||||
|
||||
// biome-ignore lint/correctness/useExhaustiveDependencies: refs read .current dynamically, complex callback with intentional deps
|
||||
const onSubmit = useCallback(
|
||||
@@ -5372,7 +5499,20 @@ DO NOT respond to these messages or otherwise consider them in your response unl
|
||||
|
||||
// Check for pending approvals before sending message (skip if we already have
|
||||
// a queued approval response to send first).
|
||||
if (CHECK_PENDING_APPROVALS_BEFORE_SEND && !queuedApprovalResults) {
|
||||
// Only do eager check when resuming a session (LET-7101) - otherwise lazy recovery handles it
|
||||
if (needsEagerApprovalCheck && !queuedApprovalResults) {
|
||||
// Log for debugging
|
||||
const eagerStatusId = uid("status");
|
||||
buffersRef.current.byId.set(eagerStatusId, {
|
||||
kind: "status",
|
||||
id: eagerStatusId,
|
||||
lines: [
|
||||
"[EAGER CHECK] Checking for pending approvals (resume mode)...",
|
||||
],
|
||||
});
|
||||
buffersRef.current.order.push(eagerStatusId);
|
||||
refreshDerived();
|
||||
|
||||
try {
|
||||
const client = await getClient();
|
||||
// Fetch fresh agent state to check for pending approvals with accurate in-context messages
|
||||
@@ -5383,6 +5523,12 @@ DO NOT respond to these messages or otherwise consider them in your response unl
|
||||
conversationIdRef.current,
|
||||
);
|
||||
|
||||
// Remove eager check status
|
||||
buffersRef.current.byId.delete(eagerStatusId);
|
||||
buffersRef.current.order = buffersRef.current.order.filter(
|
||||
(id) => id !== eagerStatusId,
|
||||
);
|
||||
|
||||
// Check if user cancelled while we were fetching approval state
|
||||
if (
|
||||
userCancelledRef.current ||
|
||||
|
||||
@@ -11,6 +11,7 @@ import type { ApprovalResult } from "./agent/approval-execution";
|
||||
import {
|
||||
buildApprovalRecoveryMessage,
|
||||
fetchRunErrorDetail,
|
||||
isApprovalPendingError,
|
||||
isApprovalStateDesyncError,
|
||||
} from "./agent/approval-recovery";
|
||||
import { getClient } from "./agent/client";
|
||||
@@ -40,11 +41,17 @@ import type {
|
||||
ControlResponse,
|
||||
ErrorMessage,
|
||||
MessageWire,
|
||||
RecoveryMessage,
|
||||
ResultMessage,
|
||||
RetryMessage,
|
||||
StreamEvent,
|
||||
SystemInitMessage,
|
||||
} from "./types/protocol";
|
||||
import {
|
||||
markMilestone,
|
||||
measureSinceMilestone,
|
||||
reportAllMilestones,
|
||||
} from "./utils/timing";
|
||||
|
||||
// Maximum number of times to retry a turn when the backend
|
||||
// reports an `llm_api_error` stop reason. This helps smooth
|
||||
@@ -169,6 +176,7 @@ export async function handleHeadlessCommand(
|
||||
}
|
||||
|
||||
const client = await getClient();
|
||||
markMilestone("HEADLESS_CLIENT_READY");
|
||||
|
||||
// Check for --resume flag (interactive only)
|
||||
if (values.resume) {
|
||||
@@ -462,6 +470,7 @@ export async function handleHeadlessCommand(
|
||||
console.error("No agent found. Use --new-agent to create a new agent.");
|
||||
process.exit(1);
|
||||
}
|
||||
markMilestone("HEADLESS_AGENT_RESOLVED");
|
||||
|
||||
// Check if we're resuming an existing agent (not creating a new one)
|
||||
const isResumingAgent = !!(
|
||||
@@ -567,6 +576,7 @@ export async function handleHeadlessCommand(
|
||||
});
|
||||
conversationId = conversation.id;
|
||||
}
|
||||
markMilestone("HEADLESS_CONVERSATION_READY");
|
||||
|
||||
// Save session (agent + conversation) to both project and global settings
|
||||
// Skip for subagents - they shouldn't pollute the LRU settings
|
||||
@@ -591,41 +601,33 @@ export async function handleHeadlessCommand(
|
||||
|
||||
// Set agent context for tools that need it (e.g., Skill tool, Task tool)
|
||||
setAgentContext(agent.id, skillsDirectory);
|
||||
await initializeLoadedSkillsFlag();
|
||||
|
||||
// Re-discover skills and update the skills memory block
|
||||
// Fire-and-forget: Initialize loaded skills flag (LET-7101)
|
||||
// Don't await - this is just for the skill unload reminder
|
||||
initializeLoadedSkillsFlag().catch(() => {
|
||||
// Ignore errors - not critical
|
||||
});
|
||||
|
||||
// Fire-and-forget: Sync skills in background (LET-7101)
|
||||
// This ensures new skills added after agent creation are available
|
||||
try {
|
||||
const { discoverSkills, formatSkillsForMemory, SKILLS_DIR } = await import(
|
||||
"./agent/skills"
|
||||
);
|
||||
const { join } = await import("node:path");
|
||||
// Don't await - proceed to message sending immediately
|
||||
(async () => {
|
||||
try {
|
||||
const { syncSkillsToAgent, SKILLS_DIR } = await import("./agent/skills");
|
||||
const { join } = await import("node:path");
|
||||
|
||||
const resolvedSkillsDirectory =
|
||||
skillsDirectory || join(process.cwd(), SKILLS_DIR);
|
||||
const { skills, errors } = await discoverSkills(resolvedSkillsDirectory);
|
||||
const resolvedSkillsDirectory =
|
||||
skillsDirectory || join(process.cwd(), SKILLS_DIR);
|
||||
|
||||
if (errors.length > 0) {
|
||||
console.warn("Errors encountered during skill discovery:");
|
||||
for (const error of errors) {
|
||||
console.warn(` ${error.path}: ${error.message}`);
|
||||
}
|
||||
await syncSkillsToAgent(client, agent.id, resolvedSkillsDirectory, {
|
||||
skipIfUnchanged: true,
|
||||
});
|
||||
} catch (error) {
|
||||
console.warn(
|
||||
`[skills] Background sync failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
}
|
||||
|
||||
// Update the skills memory block with freshly discovered skills
|
||||
const formattedSkills = formatSkillsForMemory(
|
||||
skills,
|
||||
resolvedSkillsDirectory,
|
||||
);
|
||||
await client.agents.blocks.update("skills", {
|
||||
agent_id: agent.id,
|
||||
value: formattedSkills,
|
||||
});
|
||||
} catch (error) {
|
||||
console.warn(
|
||||
`Failed to update skills: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
}
|
||||
})();
|
||||
|
||||
// Validate output format
|
||||
const outputFormat =
|
||||
@@ -830,8 +832,11 @@ export async function handleHeadlessCommand(
|
||||
}
|
||||
};
|
||||
|
||||
// Clear any pending approvals before starting a new turn
|
||||
await resolveAllPendingApprovals();
|
||||
// Clear any pending approvals before starting a new turn - ONLY when resuming (LET-7101)
|
||||
// For new agents/conversations, lazy recovery handles any edge cases
|
||||
if (isResumingAgent) {
|
||||
await resolveAllPendingApprovals();
|
||||
}
|
||||
|
||||
// Build message content with reminders (plan mode first, then skill unload)
|
||||
const { permissionMode } = await import("./permissions/mode");
|
||||
@@ -865,6 +870,9 @@ export async function handleHeadlessCommand(
|
||||
let lastKnownRunId: string | null = null;
|
||||
let llmApiErrorRetries = 0;
|
||||
|
||||
markMilestone("HEADLESS_FIRST_STREAM_START");
|
||||
measureSinceMilestone("headless-setup-total", "HEADLESS_CLIENT_READY");
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
const stream = await sendMessageStream(conversationId, currentInput);
|
||||
@@ -922,8 +930,25 @@ export async function handleHeadlessCommand(
|
||||
}
|
||||
|
||||
// Detect server conflict due to pending approval; handle it and retry
|
||||
if (errorInfo?.message?.includes("Cannot send a new message")) {
|
||||
// Don't emit this error; clear approvals and retry outer loop
|
||||
// Check both detail and message fields since error formats vary
|
||||
if (
|
||||
isApprovalPendingError(errorInfo?.detail) ||
|
||||
isApprovalPendingError(errorInfo?.message)
|
||||
) {
|
||||
// Emit recovery message for stream-json mode (enables testing)
|
||||
if (outputFormat === "stream-json") {
|
||||
const recoveryMsg: RecoveryMessage = {
|
||||
type: "recovery",
|
||||
recovery_type: "approval_pending",
|
||||
message:
|
||||
"Detected pending approval conflict; auto-denying stale approval and retrying",
|
||||
run_id: lastRunId ?? undefined,
|
||||
session_id: sessionId,
|
||||
uuid: `recovery-${lastRunId || crypto.randomUUID()}`,
|
||||
};
|
||||
console.log(JSON.stringify(recoveryMsg));
|
||||
}
|
||||
// Clear approvals and retry outer loop
|
||||
await resolveAllPendingApprovals();
|
||||
// Reset state and restart turn
|
||||
stopReason = "error" as StopReasonType;
|
||||
@@ -1541,6 +1566,10 @@ export async function handleHeadlessCommand(
|
||||
}
|
||||
console.log(resultText);
|
||||
}
|
||||
|
||||
// Report all milestones at the end for latency audit
|
||||
markMilestone("HEADLESS_COMPLETE");
|
||||
reportAllMilestones();
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
66
src/index.ts
66
src/index.ts
@@ -17,6 +17,7 @@ import { permissionMode } from "./permissions/mode";
|
||||
import { settingsManager } from "./settings-manager";
|
||||
import { telemetry } from "./telemetry";
|
||||
import { loadTools } from "./tools/manager";
|
||||
import { markMilestone } from "./utils/timing";
|
||||
|
||||
// Stable empty array constants to prevent new references on every render
|
||||
// These are used as fallbacks when resumeData is null, avoiding the React
|
||||
@@ -335,9 +336,12 @@ async function getPinnedAgentNames(): Promise<{ id: string; name: string }[]> {
|
||||
}
|
||||
|
||||
async function main(): Promise<void> {
|
||||
markMilestone("CLI_START");
|
||||
|
||||
// Initialize settings manager (loads settings once into memory)
|
||||
await settingsManager.initialize();
|
||||
const settings = await settingsManager.getSettingsWithSecureTokens();
|
||||
markMilestone("SETTINGS_LOADED");
|
||||
|
||||
// Initialize LSP infrastructure for type checking
|
||||
if (process.env.LETTA_ENABLE_LSP) {
|
||||
@@ -728,6 +732,7 @@ async function main(): Promise<void> {
|
||||
// Validate credentials by checking health endpoint
|
||||
const { validateCredentials } = await import("./auth/oauth");
|
||||
const isValid = await validateCredentials(baseURL, apiKey ?? "");
|
||||
markMilestone("CREDENTIALS_VALIDATED");
|
||||
|
||||
if (!isValid) {
|
||||
// For headless mode, error out with helpful message
|
||||
@@ -829,18 +834,22 @@ async function main(): Promise<void> {
|
||||
}
|
||||
|
||||
if (isHeadless) {
|
||||
markMilestone("HEADLESS_MODE_START");
|
||||
// For headless mode, load tools synchronously (respecting model/toolset when provided)
|
||||
const modelForTools = getModelForToolLoading(
|
||||
specifiedModel,
|
||||
specifiedToolset as "codex" | "default" | undefined,
|
||||
);
|
||||
await loadTools(modelForTools);
|
||||
markMilestone("TOOLS_LOADED");
|
||||
|
||||
const { handleHeadlessCommand } = await import("./headless");
|
||||
await handleHeadlessCommand(process.argv, specifiedModel, skillsDirectory);
|
||||
return;
|
||||
}
|
||||
|
||||
markMilestone("TUI_MODE_START");
|
||||
|
||||
// Enable enhanced key reporting (Shift+Enter, etc.) BEFORE Ink initializes.
|
||||
// In VS Code/xterm.js this typically requires a short handshake (query + enable).
|
||||
try {
|
||||
@@ -853,6 +862,7 @@ async function main(): Promise<void> {
|
||||
}
|
||||
|
||||
// Interactive: lazy-load React/Ink + App
|
||||
markMilestone("REACT_IMPORT_START");
|
||||
const React = await import("react");
|
||||
const { render } = await import("ink");
|
||||
const { useState, useEffect } = React;
|
||||
@@ -1441,42 +1451,35 @@ async function main(): Promise<void> {
|
||||
|
||||
// Set agent context for tools that need it (e.g., Skill tool)
|
||||
setAgentContext(agent.id, skillsDirectory);
|
||||
await initializeLoadedSkillsFlag();
|
||||
|
||||
// Re-discover skills and update the skills memory block
|
||||
// Fire-and-forget: Initialize loaded skills flag (LET-7101)
|
||||
// Don't await - this is just for the skill unload reminder
|
||||
initializeLoadedSkillsFlag().catch(() => {
|
||||
// Ignore errors - not critical
|
||||
});
|
||||
|
||||
// Fire-and-forget: Sync skills in background (LET-7101)
|
||||
// This ensures new skills added after agent creation are available
|
||||
try {
|
||||
const { discoverSkills, formatSkillsForMemory, SKILLS_DIR } =
|
||||
await import("./agent/skills");
|
||||
const { join } = await import("node:path");
|
||||
// Don't await - user can start typing immediately
|
||||
(async () => {
|
||||
try {
|
||||
const { syncSkillsToAgent, SKILLS_DIR } = await import(
|
||||
"./agent/skills"
|
||||
);
|
||||
const { join } = await import("node:path");
|
||||
|
||||
const resolvedSkillsDirectory =
|
||||
skillsDirectory || join(process.cwd(), SKILLS_DIR);
|
||||
const { skills, errors } = await discoverSkills(
|
||||
resolvedSkillsDirectory,
|
||||
);
|
||||
const resolvedSkillsDirectory =
|
||||
skillsDirectory || join(process.cwd(), SKILLS_DIR);
|
||||
|
||||
if (errors.length > 0) {
|
||||
console.warn("Errors encountered during skill discovery:");
|
||||
for (const error of errors) {
|
||||
console.warn(` ${error.path}: ${error.message}`);
|
||||
}
|
||||
await syncSkillsToAgent(client, agent.id, resolvedSkillsDirectory, {
|
||||
skipIfUnchanged: true,
|
||||
});
|
||||
} catch (error) {
|
||||
console.warn(
|
||||
`[skills] Background sync failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
}
|
||||
|
||||
// Update the skills memory block with freshly discovered skills
|
||||
const formattedSkills = formatSkillsForMemory(
|
||||
skills,
|
||||
resolvedSkillsDirectory,
|
||||
);
|
||||
await client.agents.blocks.update("skills", {
|
||||
agent_id: agent.id,
|
||||
value: formattedSkills,
|
||||
});
|
||||
} catch (error) {
|
||||
console.warn(
|
||||
`Failed to update skills: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
}
|
||||
})();
|
||||
|
||||
// Check if we're resuming an existing agent
|
||||
// We're resuming if:
|
||||
@@ -1797,6 +1800,7 @@ async function main(): Promise<void> {
|
||||
});
|
||||
}
|
||||
|
||||
markMilestone("REACT_RENDER_START");
|
||||
render(
|
||||
React.createElement(LoadingApp, {
|
||||
continueSession: shouldContinue,
|
||||
|
||||
249
src/tests/approval-recovery.test.ts
Normal file
249
src/tests/approval-recovery.test.ts
Normal file
@@ -0,0 +1,249 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import type { Message } from "@letta-ai/letta-client/resources/agents/messages";
|
||||
import {
|
||||
isApprovalPendingError,
|
||||
isApprovalStateDesyncError,
|
||||
} from "../agent/approval-recovery";
|
||||
import { extractApprovals } from "../agent/check-approval";
|
||||
|
||||
/**
|
||||
* Tests for approval error detection helpers (LET-7101).
|
||||
*
|
||||
* These functions detect two opposite error conditions:
|
||||
* 1. isApprovalStateDesyncError: Sent approval, but server has no pending approval
|
||||
* 2. isApprovalPendingError: Sent user message, but server has pending approval waiting
|
||||
*/
|
||||
|
||||
describe("isApprovalStateDesyncError", () => {
|
||||
test("detects desync error in detail string", () => {
|
||||
const detail = "No tool call is currently awaiting approval";
|
||||
expect(isApprovalStateDesyncError(detail)).toBe(true);
|
||||
});
|
||||
|
||||
test("detects desync error case-insensitively", () => {
|
||||
const detail = "NO TOOL CALL IS CURRENTLY AWAITING APPROVAL";
|
||||
expect(isApprovalStateDesyncError(detail)).toBe(true);
|
||||
});
|
||||
|
||||
test("detects desync error in longer message", () => {
|
||||
const detail =
|
||||
"Error: No tool call is currently awaiting approval. The approval request may have expired.";
|
||||
expect(isApprovalStateDesyncError(detail)).toBe(true);
|
||||
});
|
||||
|
||||
test("returns false for unrelated errors", () => {
|
||||
expect(isApprovalStateDesyncError("Connection timeout")).toBe(false);
|
||||
expect(isApprovalStateDesyncError("Internal server error")).toBe(false);
|
||||
});
|
||||
|
||||
test("returns false for non-string input", () => {
|
||||
expect(isApprovalStateDesyncError(null)).toBe(false);
|
||||
expect(isApprovalStateDesyncError(undefined)).toBe(false);
|
||||
expect(isApprovalStateDesyncError(123)).toBe(false);
|
||||
expect(isApprovalStateDesyncError({ error: "test" })).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("isApprovalPendingError", () => {
|
||||
// This is the actual error format from the Letta backend (screenshot from LET-7101)
|
||||
const REAL_ERROR_DETAIL =
|
||||
"CONFLICT: Cannot send a new message: The agent is waiting for approval on a tool call. Please approve or deny the pending request before continuing.";
|
||||
|
||||
test("detects approval pending error in real error format", () => {
|
||||
expect(isApprovalPendingError(REAL_ERROR_DETAIL)).toBe(true);
|
||||
});
|
||||
|
||||
test("detects approval pending error case-insensitively", () => {
|
||||
expect(isApprovalPendingError("CANNOT SEND A NEW MESSAGE")).toBe(true);
|
||||
expect(isApprovalPendingError("cannot send a new message")).toBe(true);
|
||||
});
|
||||
|
||||
test("detects partial match in longer message", () => {
|
||||
const detail = "Error occurred: Cannot send a new message while processing";
|
||||
expect(isApprovalPendingError(detail)).toBe(true);
|
||||
});
|
||||
|
||||
test("returns false for desync errors (opposite case)", () => {
|
||||
// These are the OPPOSITE error - when we send approval but there's nothing pending
|
||||
expect(
|
||||
isApprovalPendingError("No tool call is currently awaiting approval"),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
test("returns false for unrelated errors", () => {
|
||||
expect(isApprovalPendingError("Connection timeout")).toBe(false);
|
||||
expect(isApprovalPendingError("Rate limit exceeded")).toBe(false);
|
||||
expect(isApprovalPendingError("Invalid API key")).toBe(false);
|
||||
});
|
||||
|
||||
test("returns false for non-string input", () => {
|
||||
expect(isApprovalPendingError(null)).toBe(false);
|
||||
expect(isApprovalPendingError(undefined)).toBe(false);
|
||||
expect(isApprovalPendingError(123)).toBe(false);
|
||||
expect(isApprovalPendingError({ detail: REAL_ERROR_DETAIL })).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* Tests for parallel tool call approval extraction.
|
||||
* Ensures lazy recovery handles multiple simultaneous tool calls correctly.
|
||||
*/
|
||||
describe("extractApprovals", () => {
|
||||
// Helper to create a minimal Message-like object for testing
|
||||
// We use 'as Message' cast because the real Message type is complex
|
||||
const createMessage = (overrides: {
|
||||
tool_calls?: Array<{
|
||||
tool_call_id?: string;
|
||||
name?: string;
|
||||
arguments?: string;
|
||||
}>;
|
||||
tool_call?: {
|
||||
tool_call_id?: string;
|
||||
name?: string;
|
||||
arguments?: string;
|
||||
};
|
||||
}): Message =>
|
||||
({
|
||||
id: "test-msg-id",
|
||||
date: new Date().toISOString(),
|
||||
message_type: "approval_request_message",
|
||||
...overrides,
|
||||
}) as unknown as Message;
|
||||
|
||||
test("extracts single tool call from tool_calls array", () => {
|
||||
const msg = createMessage({
|
||||
tool_calls: [
|
||||
{
|
||||
tool_call_id: "call-1",
|
||||
name: "Bash",
|
||||
arguments: '{"command": "echo hello"}',
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const result = extractApprovals(msg);
|
||||
|
||||
expect(result.pendingApprovals).toHaveLength(1);
|
||||
expect(result.pendingApprovals[0]!.toolCallId).toBe("call-1");
|
||||
expect(result.pendingApprovals[0]!.toolName).toBe("Bash");
|
||||
expect(result.pendingApproval?.toolCallId).toBe("call-1");
|
||||
});
|
||||
|
||||
test("extracts multiple parallel tool calls", () => {
|
||||
const msg = createMessage({
|
||||
tool_calls: [
|
||||
{
|
||||
tool_call_id: "call-1",
|
||||
name: "Bash",
|
||||
arguments: '{"command": "echo hello"}',
|
||||
},
|
||||
{
|
||||
tool_call_id: "call-2",
|
||||
name: "web_search",
|
||||
arguments: '{"query": "test"}',
|
||||
},
|
||||
{
|
||||
tool_call_id: "call-3",
|
||||
name: "Read",
|
||||
arguments: '{"file_path": "/tmp/test.txt"}',
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const result = extractApprovals(msg);
|
||||
|
||||
expect(result.pendingApprovals).toHaveLength(3);
|
||||
expect(result.pendingApprovals[0]!.toolCallId).toBe("call-1");
|
||||
expect(result.pendingApprovals[0]!.toolName).toBe("Bash");
|
||||
expect(result.pendingApprovals[1]!.toolCallId).toBe("call-2");
|
||||
expect(result.pendingApprovals[1]!.toolName).toBe("web_search");
|
||||
expect(result.pendingApprovals[2]!.toolCallId).toBe("call-3");
|
||||
expect(result.pendingApprovals[2]!.toolName).toBe("Read");
|
||||
// pendingApproval is deprecated, should be first item
|
||||
expect(result.pendingApproval?.toolCallId).toBe("call-1");
|
||||
});
|
||||
|
||||
test("handles deprecated single tool_call field", () => {
|
||||
const msg = createMessage({
|
||||
tool_call: {
|
||||
tool_call_id: "call-legacy",
|
||||
name: "Write",
|
||||
arguments: '{"file_path": "/tmp/out.txt"}',
|
||||
},
|
||||
});
|
||||
|
||||
const result = extractApprovals(msg);
|
||||
|
||||
expect(result.pendingApprovals).toHaveLength(1);
|
||||
expect(result.pendingApprovals[0]!.toolCallId).toBe("call-legacy");
|
||||
expect(result.pendingApprovals[0]!.toolName).toBe("Write");
|
||||
});
|
||||
|
||||
test("prefers tool_calls array over deprecated tool_call", () => {
|
||||
const msg = createMessage({
|
||||
tool_calls: [{ tool_call_id: "call-new", name: "Bash", arguments: "{}" }],
|
||||
tool_call: {
|
||||
tool_call_id: "call-old",
|
||||
name: "Write",
|
||||
arguments: "{}",
|
||||
},
|
||||
});
|
||||
|
||||
const result = extractApprovals(msg);
|
||||
|
||||
// Should use tool_calls, not tool_call
|
||||
expect(result.pendingApprovals).toHaveLength(1);
|
||||
expect(result.pendingApprovals[0]!.toolCallId).toBe("call-new");
|
||||
});
|
||||
|
||||
test("filters out tool calls without tool_call_id", () => {
|
||||
const msg = createMessage({
|
||||
tool_calls: [
|
||||
{ tool_call_id: "call-valid", name: "Bash", arguments: "{}" },
|
||||
{ name: "Invalid", arguments: "{}" }, // Missing tool_call_id
|
||||
{ tool_call_id: "", name: "Empty", arguments: "{}" }, // Empty tool_call_id
|
||||
{ tool_call_id: "call-valid-2", name: "Read", arguments: "{}" },
|
||||
],
|
||||
});
|
||||
|
||||
const result = extractApprovals(msg);
|
||||
|
||||
// Should only include entries with valid tool_call_id
|
||||
expect(result.pendingApprovals).toHaveLength(2);
|
||||
expect(result.pendingApprovals[0]!.toolCallId).toBe("call-valid");
|
||||
expect(result.pendingApprovals[1]!.toolCallId).toBe("call-valid-2");
|
||||
});
|
||||
|
||||
test("returns empty array when no tool calls present", () => {
|
||||
const msg = createMessage({});
|
||||
|
||||
const result = extractApprovals(msg);
|
||||
|
||||
expect(result.pendingApprovals).toHaveLength(0);
|
||||
expect(result.pendingApproval).toBeNull();
|
||||
});
|
||||
|
||||
test("handles missing name and arguments gracefully", () => {
|
||||
const msg = createMessage({
|
||||
tool_calls: [{ tool_call_id: "call-minimal" }],
|
||||
});
|
||||
|
||||
const result = extractApprovals(msg);
|
||||
|
||||
expect(result.pendingApprovals).toHaveLength(1);
|
||||
expect(result.pendingApprovals[0]!.toolCallId).toBe("call-minimal");
|
||||
expect(result.pendingApprovals[0]!.toolName).toBe("");
|
||||
expect(result.pendingApprovals[0]!.toolArgs).toBe("");
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* Note: Full integration testing of lazy approval recovery requires:
|
||||
* 1. Starting CLI without --yolo
|
||||
* 2. Sending a prompt that triggers a tool call requiring approval
|
||||
* 3. Instead of approving, sending another user message
|
||||
* 4. Verifying the CONFLICT error is detected and recovery happens
|
||||
*
|
||||
* This is complex to automate reliably in unit tests.
|
||||
* Manual testing or a dedicated integration test suite is recommended.
|
||||
*/
|
||||
256
src/tests/lazy-approval-recovery.test.ts
Normal file
256
src/tests/lazy-approval-recovery.test.ts
Normal file
@@ -0,0 +1,256 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import { spawn } from "node:child_process";
|
||||
|
||||
/**
|
||||
* Integration test for lazy approval recovery (LET-7101).
|
||||
*
|
||||
* NOTE: The lazy approval recovery is primarily designed for TUI mode where:
|
||||
* 1. User has a session with pending approvals (e.g., from a previous run)
|
||||
* 2. User sends a new message before responding to the approval
|
||||
* 3. Server returns CONFLICT error
|
||||
* 4. CLI recovers by auto-denying stale approvals and retrying
|
||||
*
|
||||
* In bidirectional mode, messages sent during permission wait are dropped
|
||||
* (see headless.ts line 1710-1714), so we can't directly test the CONFLICT
|
||||
* scenario here. This test validates that the flow doesn't crash when
|
||||
* messages are sent while approvals are pending.
|
||||
*
|
||||
* The RecoveryMessage emission can be tested by:
|
||||
* 1. Manual testing in TUI mode (start session with orphaned approval)
|
||||
* 2. Or by modifying headless mode to not drop messages during permission wait
|
||||
*/
|
||||
|
||||
// Prompt that will trigger a Bash tool call requiring approval
|
||||
const BASH_TRIGGER_PROMPT =
|
||||
"Run this exact bash command: echo test123. Do not use any other tools.";
|
||||
|
||||
// Second message to send while approval is pending
|
||||
const INTERRUPT_MESSAGE =
|
||||
"Actually, just say OK instead. Do not call any tools.";
|
||||
|
||||
interface StreamMessage {
|
||||
type: string;
|
||||
subtype?: string;
|
||||
message_type?: string;
|
||||
stop_reason?: string;
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
[key: string]: any;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run bidirectional test with custom message handling.
|
||||
* Allows sending messages at specific points in the flow.
|
||||
*/
|
||||
async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{
|
||||
messages: StreamMessage[];
|
||||
success: boolean;
|
||||
errorSeen: boolean;
|
||||
}> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const proc = spawn(
|
||||
"bun",
|
||||
[
|
||||
"run",
|
||||
"dev",
|
||||
"-p",
|
||||
"--input-format",
|
||||
"stream-json",
|
||||
"--output-format",
|
||||
"stream-json",
|
||||
"--new-agent",
|
||||
"-m",
|
||||
"haiku",
|
||||
// NOTE: No --yolo flag - approvals are required
|
||||
],
|
||||
{
|
||||
cwd: process.cwd(),
|
||||
env: { ...process.env },
|
||||
},
|
||||
);
|
||||
|
||||
const messages: StreamMessage[] = [];
|
||||
let buffer = "";
|
||||
let initReceived = false;
|
||||
let approvalSeen = false;
|
||||
let interruptSent = false;
|
||||
let errorSeen = false;
|
||||
let resultCount = 0;
|
||||
let closing = false;
|
||||
|
||||
const timeout = setTimeout(() => {
|
||||
if (!closing) {
|
||||
proc.kill();
|
||||
reject(new Error(`Test timed out after ${timeoutMs}ms`));
|
||||
}
|
||||
}, timeoutMs);
|
||||
|
||||
const cleanup = () => {
|
||||
closing = true;
|
||||
clearTimeout(timeout);
|
||||
setTimeout(() => {
|
||||
proc.stdin?.end();
|
||||
proc.kill();
|
||||
}, 500);
|
||||
};
|
||||
|
||||
const processLine = (line: string) => {
|
||||
if (!line.trim()) return;
|
||||
try {
|
||||
const msg: StreamMessage = JSON.parse(line);
|
||||
messages.push(msg);
|
||||
|
||||
// Debug output
|
||||
if (process.env.DEBUG_TEST) {
|
||||
console.log("MSG:", JSON.stringify(msg, null, 2));
|
||||
}
|
||||
|
||||
// Step 1: Wait for init, then send bash trigger prompt
|
||||
if (msg.type === "system" && msg.subtype === "init" && !initReceived) {
|
||||
initReceived = true;
|
||||
const userMsg = JSON.stringify({
|
||||
type: "user",
|
||||
message: { role: "user", content: BASH_TRIGGER_PROMPT },
|
||||
});
|
||||
proc.stdin?.write(`${userMsg}\n`);
|
||||
return;
|
||||
}
|
||||
|
||||
// Step 2: When we see approval request, send another user message instead
|
||||
if (
|
||||
msg.type === "message" &&
|
||||
msg.message_type === "approval_request_message" &&
|
||||
!approvalSeen
|
||||
) {
|
||||
approvalSeen = true;
|
||||
// Wait a moment, then send interrupt message (NOT an approval)
|
||||
setTimeout(() => {
|
||||
if (!interruptSent) {
|
||||
interruptSent = true;
|
||||
const userMsg = JSON.stringify({
|
||||
type: "user",
|
||||
message: { role: "user", content: INTERRUPT_MESSAGE },
|
||||
});
|
||||
proc.stdin?.write(`${userMsg}\n`);
|
||||
}
|
||||
}, 500);
|
||||
return;
|
||||
}
|
||||
|
||||
// Track recovery messages - this is the key signal that lazy recovery worked
|
||||
if (
|
||||
msg.type === "recovery" &&
|
||||
msg.recovery_type === "approval_pending"
|
||||
) {
|
||||
errorSeen = true; // reusing this flag to mean "recovery message seen"
|
||||
}
|
||||
|
||||
// Also track raw errors (shouldn't see these if recovery works properly)
|
||||
if (
|
||||
msg.type === "error" ||
|
||||
(msg.type === "message" && msg.message_type === "error_message")
|
||||
) {
|
||||
const detail = msg.detail || msg.message || "";
|
||||
if (detail.toLowerCase().includes("cannot send a new message")) {
|
||||
// Raw error leaked through - recovery may have failed
|
||||
console.log(
|
||||
"WARNING: Raw CONFLICT error seen (recovery may have failed)",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Track results - we need 2 (one for each user message, though first may fail)
|
||||
if (msg.type === "result") {
|
||||
resultCount++;
|
||||
// After second result (or after seeing error + result), we're done
|
||||
if (resultCount >= 2 || (errorSeen && resultCount >= 1)) {
|
||||
cleanup();
|
||||
resolve({ messages, success: true, errorSeen });
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Not valid JSON, ignore
|
||||
}
|
||||
};
|
||||
|
||||
proc.stdout?.on("data", (data) => {
|
||||
buffer += data.toString();
|
||||
const lines = buffer.split("\n");
|
||||
buffer = lines.pop() || "";
|
||||
for (const line of lines) {
|
||||
processLine(line);
|
||||
}
|
||||
});
|
||||
|
||||
let _stderr = "";
|
||||
proc.stderr?.on("data", (data) => {
|
||||
_stderr += data.toString();
|
||||
});
|
||||
|
||||
proc.on("close", (_code) => {
|
||||
clearTimeout(timeout);
|
||||
// Process any remaining buffer
|
||||
if (buffer.trim()) {
|
||||
processLine(buffer);
|
||||
}
|
||||
|
||||
if (!closing) {
|
||||
// If we got here without resolving, check what we have
|
||||
resolve({
|
||||
messages,
|
||||
success: resultCount > 0,
|
||||
errorSeen,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
proc.on("error", (err) => {
|
||||
clearTimeout(timeout);
|
||||
reject(err);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
describe("lazy approval recovery", () => {
|
||||
test("handles concurrent message while approval is pending", async () => {
|
||||
const result = await runLazyRecoveryTest();
|
||||
|
||||
// Log messages for debugging if test fails
|
||||
if (!result.success) {
|
||||
console.log("All messages received:");
|
||||
for (const msg of result.messages) {
|
||||
console.log(JSON.stringify(msg, null, 2));
|
||||
}
|
||||
}
|
||||
|
||||
// We should have seen the approval request (proves tool requiring approval was called)
|
||||
const approvalRequest = result.messages.find(
|
||||
(m) => m.message_type === "approval_request_message",
|
||||
);
|
||||
expect(approvalRequest).toBeDefined();
|
||||
|
||||
// The test should complete successfully
|
||||
expect(result.success).toBe(true);
|
||||
|
||||
// Count results - we should get at least 1 (the second message should always complete)
|
||||
const resultCount = result.messages.filter(
|
||||
(m) => m.type === "result",
|
||||
).length;
|
||||
expect(resultCount).toBeGreaterThanOrEqual(1);
|
||||
|
||||
// KEY ASSERTION: Check if we saw the recovery message
|
||||
// This proves the lazy recovery mechanism was triggered
|
||||
const recoveryMessage = result.messages.find(
|
||||
(m) => m.type === "recovery" && m.recovery_type === "approval_pending",
|
||||
);
|
||||
if (recoveryMessage) {
|
||||
console.log("Recovery message detected - lazy recovery worked correctly");
|
||||
expect(result.errorSeen).toBe(true); // Should have been set when we saw recovery
|
||||
} else {
|
||||
// Recovery might not be triggered if approval was auto-handled before second message
|
||||
// This can happen due to timing - the test still validates the flow works
|
||||
console.log(
|
||||
"Note: No recovery message seen - approval may have been handled before conflict",
|
||||
);
|
||||
}
|
||||
}, 180000); // 3 minute timeout for CI
|
||||
});
|
||||
@@ -149,7 +149,7 @@ async function readSkillContent(
|
||||
try {
|
||||
const content = await readFile(projectSkillPath, "utf-8");
|
||||
return { content, path: projectSkillPath };
|
||||
} catch (primaryError) {
|
||||
} catch {
|
||||
// Fallback: check for bundled skills in a repo-level skills directory (legacy)
|
||||
try {
|
||||
const bundledSkillsDir = join(process.cwd(), "skills", "skills");
|
||||
@@ -157,8 +157,11 @@ async function readSkillContent(
|
||||
const content = await readFile(bundledSkillPath, "utf-8");
|
||||
return { content, path: bundledSkillPath };
|
||||
} catch {
|
||||
// If all fallbacks fail, rethrow the original error
|
||||
throw primaryError;
|
||||
// If all fallbacks fail, throw a helpful error message (LET-7101)
|
||||
// Suggest refresh in case skills sync is still running in background
|
||||
throw new Error(
|
||||
`Skill "${skillId}" not found. If you recently added this skill, try Skill({ command: "refresh" }) to re-scan the skills directory.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -198,6 +198,19 @@ export interface RetryMessage extends MessageEnvelope {
|
||||
run_id?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Recovery message emitted when the CLI detects and recovers from errors.
|
||||
* Used for approval state conflicts and other recoverable errors.
|
||||
*/
|
||||
export interface RecoveryMessage extends MessageEnvelope {
|
||||
type: "recovery";
|
||||
/** Type of recovery performed */
|
||||
recovery_type: "approval_pending" | "approval_desync";
|
||||
/** Human-readable description of what happened */
|
||||
message: string;
|
||||
run_id?: string;
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════
|
||||
// RESULT
|
||||
// ═══════════════════════════════════════════════════════════════
|
||||
@@ -329,6 +342,7 @@ export type WireMessage =
|
||||
| AutoApprovalMessage
|
||||
| ErrorMessage
|
||||
| RetryMessage
|
||||
| RecoveryMessage
|
||||
| ResultMessage
|
||||
| ControlResponse
|
||||
| ControlRequest; // CLI → SDK control requests (e.g., can_use_tool)
|
||||
|
||||
@@ -34,6 +34,112 @@ export function logTiming(message: string): void {
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Milestone tracking for latency audits
|
||||
// ============================================================================
|
||||
|
||||
// Store milestones with their timestamps (ms since process start via performance.now())
|
||||
const milestones: Map<string, number> = new Map();
|
||||
|
||||
// Reference time for relative measurements (set on first milestone)
|
||||
let firstMilestoneTime: number | null = null;
|
||||
|
||||
/**
|
||||
* Mark a named milestone in the boot/execution sequence.
|
||||
* Call this at key points to track where time is spent.
|
||||
*
|
||||
* @param name - Descriptive name like "SETTINGS_LOADED" or "AGENT_RESOLVED"
|
||||
*/
|
||||
export function markMilestone(name: string): void {
|
||||
const now = performance.now();
|
||||
milestones.set(name, now);
|
||||
|
||||
if (firstMilestoneTime === null) {
|
||||
firstMilestoneTime = now;
|
||||
}
|
||||
|
||||
if (isTimingsEnabled()) {
|
||||
const relative = now - firstMilestoneTime;
|
||||
console.error(
|
||||
`[timing] MILESTONE ${name} at +${formatDuration(relative)} (${formatTimestamp(new Date())})`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Measure time elapsed since a previous milestone.
|
||||
*
|
||||
* @param label - Description of what we're measuring (e.g., "tool loading")
|
||||
* @param fromMilestone - Name of the starting milestone
|
||||
*/
|
||||
export function measureSinceMilestone(
|
||||
label: string,
|
||||
fromMilestone: string,
|
||||
): void {
|
||||
if (!isTimingsEnabled()) return;
|
||||
|
||||
const startTime = milestones.get(fromMilestone);
|
||||
if (startTime === undefined) {
|
||||
console.error(
|
||||
`[timing] WARNING: milestone "${fromMilestone}" not found for measurement "${label}"`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const duration = performance.now() - startTime;
|
||||
console.error(`[timing] ${label}: ${formatDuration(duration)}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the duration between two milestones in milliseconds.
|
||||
* Returns null if either milestone doesn't exist.
|
||||
*/
|
||||
export function getMilestoneDuration(
|
||||
fromMilestone: string,
|
||||
toMilestone: string,
|
||||
): number | null {
|
||||
const start = milestones.get(fromMilestone);
|
||||
const end = milestones.get(toMilestone);
|
||||
if (start === undefined || end === undefined) return null;
|
||||
return end - start;
|
||||
}
|
||||
|
||||
/**
|
||||
* Print a summary of all milestones with relative timestamps.
|
||||
* Useful at the end of a benchmark run.
|
||||
*/
|
||||
export function reportAllMilestones(): void {
|
||||
if (!isTimingsEnabled() || milestones.size === 0) return;
|
||||
|
||||
const first = firstMilestoneTime ?? 0;
|
||||
|
||||
console.error(`[timing] ======== MILESTONE SUMMARY ========`);
|
||||
|
||||
// Sort by timestamp
|
||||
const sorted = [...milestones.entries()].sort((a, b) => a[1] - b[1]);
|
||||
|
||||
let prevTime = first;
|
||||
for (const [name, time] of sorted) {
|
||||
const relativeToStart = time - first;
|
||||
const delta = time - prevTime;
|
||||
const deltaStr = prevTime === first ? "" : ` (+${formatDuration(delta)})`;
|
||||
console.error(
|
||||
`[timing] +${formatDuration(relativeToStart).padStart(8)} ${name}${deltaStr}`,
|
||||
);
|
||||
prevTime = time;
|
||||
}
|
||||
|
||||
console.error(`[timing] =====================================`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all milestones (useful for running multiple benchmarks in sequence).
|
||||
*/
|
||||
export function clearMilestones(): void {
|
||||
milestones.clear();
|
||||
firstMilestoneTime = null;
|
||||
}
|
||||
|
||||
// Simple fetch type that matches the SDK's expected signature
|
||||
type SimpleFetch = (
|
||||
input: string | URL | Request,
|
||||
|
||||
Reference in New Issue
Block a user