letta-code/src/headless.ts

import { parseArgs } from "node:util";
import type { Letta } from "@letta-ai/letta-client";
import type {
  AgentState,
  MessageCreate,
} from "@letta-ai/letta-client/resources/agents/agents";
import type {
  ApprovalCreate,
  LettaStreamingResponse,
} from "@letta-ai/letta-client/resources/agents/messages";
import type { StopReasonType } from "@letta-ai/letta-client/resources/runs/runs";
import type { ApprovalResult } from "./agent/approval-execution";
import { getClient } from "./agent/client";
import { initializeLoadedSkillsFlag, setAgentContext } from "./agent/context";
import { createAgent } from "./agent/create";
import { sendMessageStream } from "./agent/message";
import { getModelUpdateArgs } from "./agent/model";
import { SessionStats } from "./agent/stats";
import {
  createBuffers,
  type Line,
  markIncompleteToolsAsCancelled,
  toLines,
} from "./cli/helpers/accumulator";
import { formatErrorDetails } from "./cli/helpers/errorFormatter";
import { safeJsonParseOr } from "./cli/helpers/safeJsonParse";
import { drainStreamWithResume } from "./cli/helpers/stream";
import { settingsManager } from "./settings-manager";
import {
  checkToolPermission,
  forceUpsertTools,
  isToolsNotFoundError,
} from "./tools/manager";
import type {
  AutoApprovalMessage,
  ControlResponse,
  ErrorMessage,
  MessageWire,
  ResultMessage,
  RetryMessage,
  StreamEvent,
  SystemInitMessage,
} from "./types/wire";

// Maximum number of times to retry a turn when the backend
// reports an `llm_api_error` stop reason. This helps smooth
// over transient LLM/backend issues without requiring the
// caller to manually resubmit the prompt.
const LLM_API_ERROR_MAX_RETRIES = 3;

export async function handleHeadlessCommand(
  argv: string[],
  model?: string,
  skillsDirectory?: string,
) {
  const settings = settingsManager.getSettings();

  // Parse CLI args
  // Include all flags from index.ts to prevent them from being treated as positionals
  const { values, positionals } = parseArgs({
    args: argv,
    options: {
      // Flags used in headless mode
      continue: { type: "boolean", short: "c" },
      new: { type: "boolean" },
      agent: { type: "string", short: "a" },
      model: { type: "string", short: "m" },
      system: { type: "string", short: "s" },
      toolset: { type: "string" },
      prompt: { type: "boolean", short: "p" },
      "output-format": { type: "string" },
      "input-format": { type: "string" },
      "include-partial-messages": { type: "boolean" },
      // Additional flags from index.ts that need to be filtered out
      help: { type: "boolean", short: "h" },
      version: { type: "boolean", short: "v" },
      run: { type: "boolean" },
      tools: { type: "string" },
      allowedTools: { type: "string" },
      disallowedTools: { type: "string" },
      "permission-mode": { type: "string" },
      yolo: { type: "boolean" },
      skills: { type: "string" },
      link: { type: "boolean" },
      unlink: { type: "boolean" },
      sleeptime: { type: "boolean" },
      "init-blocks": { type: "string" },
      "base-tools": { type: "string" },
      "from-af": { type: "string" },
    },
    strict: false,
    allowPositionals: true,
  });

  // Set tool filter if provided (controls which tools are loaded)
  if (values.tools !== undefined) {
    const { toolFilter } = await import("./tools/filter");
    toolFilter.setEnabledTools(values.tools as string);
  }

  // Check for input-format early - if stream-json, we don't need a prompt
  const inputFormat = values["input-format"] as string | undefined;
  const isBidirectionalMode = inputFormat === "stream-json";

  // Get prompt from either positional args or stdin (unless in bidirectional mode)
  let prompt = positionals.slice(2).join(" ");

  // If no prompt provided as args, try reading from stdin (unless in bidirectional mode)
  if (!prompt && !isBidirectionalMode) {
    // Check if stdin is available (piped input)
    if (!process.stdin.isTTY) {
      const chunks: Buffer[] = [];
      for await (const chunk of process.stdin) {
        chunks.push(chunk);
      }
      prompt = Buffer.concat(chunks).toString("utf-8").trim();
    }
  }

  if (!prompt && !isBidirectionalMode) {
    console.error("Error: No prompt provided");
    process.exit(1);
  }

  const client = await getClient();

  // Get base URL for tool upsert operations
  const baseURL =
    process.env.LETTA_BASE_URL ||
    settings.env?.LETTA_BASE_URL ||
    "https://api.letta.com";

  // Resolve agent (same logic as interactive mode)
  let agent: AgentState | null = null;
  const specifiedAgentId = values.agent as string | undefined;
  const shouldContinue = values.continue as boolean | undefined;
  const forceNew = values.new as boolean | undefined;
  const systemPromptId = values.system as string | undefined;
  const initBlocksRaw = values["init-blocks"] as string | undefined;
  const baseToolsRaw = values["base-tools"] as string | undefined;
  const sleeptimeFlag = (values.sleeptime as boolean | undefined) ?? undefined;
  const fromAfFile = values["from-af"] as string | undefined;

  // Validate --from-af flag
  if (fromAfFile) {
    if (specifiedAgentId) {
      console.error("Error: --from-af cannot be used with --agent");
      process.exit(1);
    }
    if (shouldContinue) {
      console.error("Error: --from-af cannot be used with --continue");
      process.exit(1);
    }
    if (forceNew) {
      console.error("Error: --from-af cannot be used with --new");
      process.exit(1);
    }
  }

  if (initBlocksRaw && !forceNew) {
    console.error(
      "Error: --init-blocks can only be used together with --new to control initial memory blocks.",
    );
    process.exit(1);
  }

  let initBlocks: string[] | undefined;
  if (initBlocksRaw !== undefined) {
    const trimmed = initBlocksRaw.trim();
    if (!trimmed || trimmed.toLowerCase() === "none") {
      initBlocks = [];
    } else {
      initBlocks = trimmed
        .split(",")
        .map((name) => name.trim())
        .filter((name) => name.length > 0);
    }
  }

  if (baseToolsRaw && !forceNew) {
    console.error(
      "Error: --base-tools can only be used together with --new to control initial base tools.",
    );
    process.exit(1);
  }

  let baseTools: string[] | undefined;
  if (baseToolsRaw !== undefined) {
    const trimmed = baseToolsRaw.trim();
    if (!trimmed || trimmed.toLowerCase() === "none") {
      baseTools = [];
    } else {
      baseTools = trimmed
        .split(",")
        .map((name) => name.trim())
        .filter((name) => name.length > 0);
    }
  }

  // Priority 1: Import from AgentFile template
  if (fromAfFile) {
    const { importAgentFromFile } = await import("./agent/import");
    const result = await importAgentFromFile({
      filePath: fromAfFile,
      modelOverride: model,
      stripMessages: true,
    });
    agent = result.agent;
  }

  // Priority 2: Try to use --agent specified ID
  if (!agent && specifiedAgentId) {
    try {
      agent = await client.agents.retrieve(specifiedAgentId);
    } catch (_error) {
      console.error(`Agent ${specifiedAgentId} not found, creating new one...`);
    }
  }

  // Priority 3: Check if --new flag was passed (skip all resume logic)
  if (!agent && forceNew) {
    const updateArgs = getModelUpdateArgs(model);
    try {
      const result = await createAgent(
        undefined,
        model,
        undefined,
        updateArgs,
        skillsDirectory,
        true, // parallelToolCalls always enabled
        sleeptimeFlag ?? settings.enableSleeptime,
        systemPromptId,
        initBlocks,
        baseTools,
      );
      agent = result.agent;
    } catch (err) {
      if (isToolsNotFoundError(err)) {
        console.warn("Tools missing on server, re-uploading and retrying...");
        await forceUpsertTools(client, baseURL);
        const result = await createAgent(
          undefined,
          model,
          undefined,
          updateArgs,
          skillsDirectory,
          true,
          sleeptimeFlag ?? settings.enableSleeptime,
          systemPromptId,
          initBlocks,
          baseTools,
        );
        agent = result.agent;
      } else {
        throw err;
      }
    }
  }

  // Priority 4: Try to resume from project settings (.letta/settings.local.json)
  if (!agent) {
    await settingsManager.loadLocalProjectSettings();
    const localProjectSettings = settingsManager.getLocalProjectSettings();
    if (localProjectSettings?.lastAgent) {
      try {
        agent = await client.agents.retrieve(localProjectSettings.lastAgent);
      } catch (_error) {
        console.error(
          `Project agent ${localProjectSettings.lastAgent} not found, creating new one...`,
        );
      }
    }
  }

  // Priority 5: Try to reuse global lastAgent if --continue flag is passed
  if (!agent && shouldContinue && settings.lastAgent) {
    try {
      agent = await client.agents.retrieve(settings.lastAgent);
    } catch (_error) {
      console.error(
        `Previous agent ${settings.lastAgent} not found, creating new one...`,
      );
    }
  }

  // Priority 6: Create a new agent
  if (!agent) {
    const updateArgs = getModelUpdateArgs(model);
    try {
      const result = await createAgent(
        undefined,
        model,
        undefined,
        updateArgs,
        skillsDirectory,
        true, // parallelToolCalls always enabled
        sleeptimeFlag ?? settings.enableSleeptime,
        systemPromptId,
        undefined,
        undefined,
      );
      agent = result.agent;
    } catch (err) {
      if (isToolsNotFoundError(err)) {
        console.warn("Tools missing on server, re-uploading and retrying...");
        await forceUpsertTools(client, baseURL);
        const result = await createAgent(
          undefined,
          model,
          undefined,
          updateArgs,
          skillsDirectory,
          true,
          sleeptimeFlag ?? settings.enableSleeptime,
          systemPromptId,
          undefined,
          undefined,
        );
        agent = result.agent;
      } else {
        throw err;
      }
    }
  }

  // Check if we're resuming an existing agent (not creating a new one)
  const isResumingAgent = !!(
    specifiedAgentId ||
    shouldContinue ||
    (!forceNew && !fromAfFile)
  );

  // If resuming and a model or system prompt was specified, apply those changes
  if (isResumingAgent && (model || systemPromptId)) {
    if (model) {
      const { resolveModel } = await import("./agent/model");
      const modelHandle = resolveModel(model);
      if (!modelHandle) {
        console.error(`Error: Invalid model "${model}"`);
        process.exit(1);
      }

      // Optimization: Skip update if agent is already using the specified model
      const currentModel = agent.llm_config?.model;
      const currentEndpointType = agent.llm_config?.model_endpoint_type;
      const currentHandle = `${currentEndpointType}/${currentModel}`;

      if (currentHandle !== modelHandle) {
        const { updateAgentLLMConfig } = await import("./agent/modify");
        const updateArgs = getModelUpdateArgs(model);
        await updateAgentLLMConfig(agent.id, modelHandle, updateArgs);
        // Refresh agent state after model update
        agent = await client.agents.retrieve(agent.id);
      }
    }

    if (systemPromptId) {
      const { updateAgentSystemPrompt } = await import("./agent/modify");
      const result = await updateAgentSystemPrompt(agent.id, systemPromptId);
      if (!result.success || !result.agent) {
        console.error(`Failed to update system prompt: ${result.message}`);
        process.exit(1);
      }
      agent = result.agent;
    }
  }

  // Save agent ID to both project and global settings
  await settingsManager.loadLocalProjectSettings();
  settingsManager.updateLocalProjectSettings({ lastAgent: agent.id });
  settingsManager.updateSettings({ lastAgent: agent.id });

  // Set agent context for tools that need it (e.g., Skill tool, Task tool)
  setAgentContext(agent.id, skillsDirectory);
  await initializeLoadedSkillsFlag();

  // Re-discover skills and update the skills memory block
  // This ensures new skills added after agent creation are available
  try {
    const { discoverSkills, formatSkillsForMemory, SKILLS_DIR } = await import(
      "./agent/skills"
    );
    const { join } = await import("node:path");

    const resolvedSkillsDirectory =
      skillsDirectory || join(process.cwd(), SKILLS_DIR);
    const { skills, errors } = await discoverSkills(resolvedSkillsDirectory);

    if (errors.length > 0) {
      console.warn("Errors encountered during skill discovery:");
      for (const error of errors) {
        console.warn(`  ${error.path}: ${error.message}`);
      }
    }

    // Update the skills memory block with freshly discovered skills
    const formattedSkills = formatSkillsForMemory(
      skills,
      resolvedSkillsDirectory,
    );
    await client.agents.blocks.update("skills", {
      agent_id: agent.id,
      value: formattedSkills,
    });
  } catch (error) {
    console.warn(
      `Failed to update skills: ${error instanceof Error ? error.message : String(error)}`,
    );
  }

  // Validate output format
  const outputFormat =
    (values["output-format"] as string | undefined) || "text";
  const includePartialMessages = Boolean(values["include-partial-messages"]);
  if (!["text", "json", "stream-json"].includes(outputFormat)) {
    console.error(
      `Error: Invalid output format "${outputFormat}". Valid formats: text, json, stream-json`,
    );
    process.exit(1);
  }
  if (inputFormat && inputFormat !== "stream-json") {
    console.error(
      `Error: Invalid input format "${inputFormat}". Valid formats: stream-json`,
    );
    process.exit(1);
  }

  // If input-format is stream-json, use bidirectional mode
  if (isBidirectionalMode) {
    await runBidirectionalMode(
      agent,
      client,
      outputFormat,
      includePartialMessages,
    );
    return;
  }

  // Create buffers to accumulate stream
  const buffers = createBuffers();

  // Initialize session stats
  const sessionStats = new SessionStats();

  // Use agent.id as session_id for all stream-json messages
  const sessionId = agent.id;

  // Output init event for stream-json format
  if (outputFormat === "stream-json") {
    const initEvent: SystemInitMessage = {
      type: "system",
      subtype: "init",
      session_id: sessionId,
      agent_id: agent.id,
      model: agent.llm_config?.model ?? "",
      tools:
        agent.tools?.map((t) => t.name).filter((n): n is string => !!n) || [],
      cwd: process.cwd(),
      mcp_servers: [],
      permission_mode: "",
      slash_commands: [],
      uuid: `init-${agent.id}`,
    };
    console.log(JSON.stringify(initEvent));
  }

  // Helper to resolve any pending approvals before sending user input
  const resolveAllPendingApprovals = async () => {
    const { getResumeData } = await import("./agent/check-approval");
    while (true) {
      // Re-fetch agent to get latest in-context messages (source of truth for backend)
      const freshAgent = await client.agents.retrieve(agent.id);
      const resume = await getResumeData(client, freshAgent);

      // Use plural field for parallel tool calls
      const pendingApprovals = resume.pendingApprovals || [];
      if (pendingApprovals.length === 0) break;

      // Phase 1: Collect decisions for all approvals
      type Decision =
        | {
            type: "approve";
            approval: {
              toolCallId: string;
              toolName: string;
              toolArgs: string;
            };
            reason: string;
            matchedRule: string;
          }
        | {
            type: "deny";
            approval: {
              toolCallId: string;
              toolName: string;
              toolArgs: string;
            };
            reason: string;
          };

      const decisions: Decision[] = [];

      for (const currentApproval of pendingApprovals) {
        const { toolName, toolArgs } = currentApproval;
        const parsedArgs = safeJsonParseOr<Record<string, unknown>>(
          toolArgs || "{}",
          {},
        );
        const permission = await checkToolPermission(toolName, parsedArgs);

        if (permission.decision === "deny" || permission.decision === "ask") {
          const denyReason =
            permission.decision === "ask"
              ? "Tool requires approval (headless mode)"
              : `Permission denied: ${permission.matchedRule || permission.reason}`;
          decisions.push({
            type: "deny",
            approval: currentApproval,
            reason: denyReason,
          });
          continue;
        }

        // Verify required args present; if missing, deny so the model retries with args
        const { getToolSchema } = await import("./tools/manager");
        const schema = getToolSchema(toolName);
        const required =
          (schema?.input_schema?.required as string[] | undefined) || [];
        const missing = required.filter(
          (key) => !(key in parsedArgs) || parsedArgs[key] == null,
        );
        if (missing.length > 0) {
          decisions.push({
            type: "deny",
            approval: currentApproval,
            reason: `Missing required parameter${missing.length > 1 ? "s" : ""}: ${missing.join(", ")}`,
          });
          continue;
        }

        // Approve for execution
        decisions.push({
          type: "approve",
          approval: currentApproval,
          reason: permission.reason || "Allowed by permission rule",
          matchedRule: permission.matchedRule || "auto-approved",
        });
      }

      // Phase 2: Execute approved tools and format results using shared function
      const { executeApprovalBatch } = await import(
        "./agent/approval-execution"
      );

      // Emit auto_approval events for stream-json format
      if (outputFormat === "stream-json") {
        for (const decision of decisions) {
          if (decision.type === "approve") {
            const autoApprovalMsg: AutoApprovalMessage = {
              type: "auto_approval",
              tool_call: {
                name: decision.approval.toolName,
                tool_call_id: decision.approval.toolCallId,
                arguments: decision.approval.toolArgs,
              },
              reason: decision.reason,
              matched_rule: decision.matchedRule,
              session_id: sessionId,
              uuid: `auto-approval-${decision.approval.toolCallId}`,
            };
            console.log(JSON.stringify(autoApprovalMsg));
          }
        }
      }

      const executedResults = await executeApprovalBatch(decisions);

      // Send all results in one batch
      const approvalInput: ApprovalCreate = {
        type: "approval",
        approvals: executedResults as ApprovalResult[],
      };

      // Send the approval to clear the pending state; drain the stream without output
      const approvalStream = await sendMessageStream(agent.id, [approvalInput]);
      if (outputFormat === "stream-json") {
        // Consume quickly but don't emit message frames to stdout
        for await (const _ of approvalStream) {
          // no-op
        }
      } else {
        await drainStreamWithResume(approvalStream, createBuffers(), () => {});
      }
    }
  };

  // Clear any pending approvals before starting a new turn
  await resolveAllPendingApprovals();

  // Build message content with reminders (plan mode first, then skill unload)
  const { permissionMode } = await import("./permissions/mode");
  const { hasLoadedSkills } = await import("./agent/context");
  let messageContent = "";

  // Add plan mode reminder if in plan mode (highest priority)
  if (permissionMode.getMode() === "plan") {
    const { PLAN_MODE_REMINDER } = await import("./agent/promptAssets");
    messageContent += PLAN_MODE_REMINDER;
  }

  // Add skill unload reminder if skills are loaded (using cached flag)
  if (hasLoadedSkills()) {
    const { SKILL_UNLOAD_REMINDER } = await import("./agent/promptAssets");
    messageContent += SKILL_UNLOAD_REMINDER;
  }

  // Add user prompt
  messageContent += prompt;

  // Start with the user message
  let currentInput: Array<MessageCreate | ApprovalCreate> = [
    {
      role: "user",
      content: [{ type: "text", text: messageContent }],
    },
  ];

  // Track lastRunId outside the while loop so it's available in catch block
  let lastKnownRunId: string | null = null;
  let llmApiErrorRetries = 0;

  try {
    while (true) {
      const stream = await sendMessageStream(agent.id, currentInput);

      // For stream-json, output each chunk as it arrives
      let stopReason: StopReasonType;
      let approvals: Array<{
        toolCallId: string;
        toolName: string;
        toolArgs: string;
      }> = [];
      let apiDurationMs: number;
      let lastRunId: string | null = null;

      if (outputFormat === "stream-json") {
        const startTime = performance.now();
        let lastStopReason: StopReasonType | null = null;

        // Track approval requests across streamed chunks
        const approvalRequests = new Map<
          string,
          { toolName: string; args: string }
        >();
        const autoApprovalEmitted = new Set<string>();
        let _lastApprovalId: string | null = null;

        // Track all run_ids seen during this turn
        const runIds = new Set<string>();

        for await (const chunk of stream) {
          // Track run_id if present
          if ("run_id" in chunk && chunk.run_id) {
            runIds.add(chunk.run_id);
          }

          // Detect mid-stream errors
          // Case 1: LettaErrorMessage from the API (has message_type: "error_message")
          if (
            "message_type" in chunk &&
            chunk.message_type === "error_message"
          ) {
            // This is a LettaErrorMessage - nest it in our wire format
            const apiError = chunk as LettaStreamingResponse.LettaErrorMessage;
            const errorEvent: ErrorMessage = {
              type: "error",
              message: apiError.message,
              stop_reason: "error",
              run_id: apiError.run_id,
              api_error: apiError,
              session_id: sessionId,
              uuid: crypto.randomUUID(),
            };
            console.log(JSON.stringify(errorEvent));

            // Still accumulate for tracking
            const { onChunk: accumulatorOnChunk } = await import(
              "./cli/helpers/accumulator"
            );
            accumulatorOnChunk(buffers, chunk);
            continue;
          }

          // Case 2: Generic error object without message_type
          const chunkWithError = chunk as typeof chunk & {
            error?: { message?: string; detail?: string };
          };
          if (chunkWithError.error && !("message_type" in chunk)) {
            // Emit as error event
            const errorText =
              chunkWithError.error.message || "An error occurred";
            const errorDetail = chunkWithError.error.detail || "";
            const fullErrorText = errorDetail
              ? `${errorText}: ${errorDetail}`
              : errorText;

            const errorEvent: ErrorMessage = {
              type: "error",
              message: fullErrorText,
              stop_reason: "error",
              session_id: sessionId,
              uuid: crypto.randomUUID(),
            };
            console.log(JSON.stringify(errorEvent));

            // Still accumulate for tracking
            const { onChunk: accumulatorOnChunk } = await import(
              "./cli/helpers/accumulator"
            );
            accumulatorOnChunk(buffers, chunk);
            continue;
          }

          // Detect server conflict due to pending approval; handle it and retry
          const errObj = (chunk as unknown as { error?: { detail?: string } })
            .error;
          if (errObj?.detail?.includes("Cannot send a new message")) {
            // Don't emit this error; clear approvals and retry outer loop
            await resolveAllPendingApprovals();
            // Reset state and restart turn
            lastStopReason = "error" as StopReasonType;
            break;
          }
          if (
            errObj?.detail?.includes(
              "No tool call is currently awaiting approval",
            )
          ) {
            // Server isn't ready for an approval yet; let the stream continue until it is
            // Suppress the error frame from output
            continue;
          }
          // Check if we should skip outputting approval requests in bypass mode
          const isApprovalRequest =
            chunk.message_type === "approval_request_message";
          let shouldOutputChunk = true;

          // Track approval requests (stream-aware: accumulate by tool_call_id)
          if (isApprovalRequest) {
            const chunkWithTools = chunk as typeof chunk & {
              tool_call?: {
                tool_call_id?: string;
                name?: string;
                arguments?: string;
              };
              tool_calls?: Array<{
                tool_call_id?: string;
                name?: string;
                arguments?: string;
              }>;
            };

            const toolCalls = Array.isArray(chunkWithTools.tool_calls)
              ? chunkWithTools.tool_calls
              : chunkWithTools.tool_call
                ? [chunkWithTools.tool_call]
                : [];

            for (const toolCall of toolCalls) {
              // Many backends stream tool_call chunks where only the first frame
              // carries the tool_call_id; subsequent argument deltas omit it.
              // Fall back to the last seen id within this turn so we can
              // properly accumulate args.
              let id: string | null = toolCall?.tool_call_id ?? _lastApprovalId;
              if (!id) {
                // As an additional guard, if exactly one approval is being
                // tracked already, use that id for continued argument deltas.
                if (approvalRequests.size === 1) {
                  id = Array.from(approvalRequests.keys())[0] ?? null;
                }
              }
              if (!id) continue; // cannot safely attribute this chunk

              _lastApprovalId = id;

              // Concatenate argument deltas; do not inject placeholder JSON
              const prev = approvalRequests.get(id);
              const base = prev?.args ?? "";
              const incomingArgs =
                toolCall?.arguments != null ? base + toolCall.arguments : base;

              // Preserve previously seen name; set if provided in this chunk
              const nextName = toolCall?.name || prev?.toolName || "";
              approvalRequests.set(id, {
                toolName: nextName,
                args: incomingArgs,
              });

              // Keep an up-to-date approvals array for downstream handling
              // Update existing approval if present, otherwise add new one
              const existingIndex = approvals.findIndex(
                (a) => a.toolCallId === id,
              );
              const approvalObj = {
                toolCallId: id,
                toolName: nextName,
                toolArgs: incomingArgs,
              };
              if (existingIndex >= 0) {
                approvals[existingIndex] = approvalObj;
              } else {
                approvals.push(approvalObj);
              }

              // Check if this approval will be auto-approved. Dedup per tool_call_id
              if (!autoApprovalEmitted.has(id) && nextName) {
                const parsedArgs = safeJsonParseOr<Record<
                  string,
                  unknown
                > | null>(incomingArgs || "{}", null);
                const permission = await checkToolPermission(
                  nextName,
                  parsedArgs || {},
                );
                if (permission.decision === "allow" && parsedArgs) {
                  // Only emit auto_approval if we already have all required params
                  const { getToolSchema } = await import("./tools/manager");
                  const schema = getToolSchema(nextName);
                  const required =
                    (schema?.input_schema?.required as string[] | undefined) ||
                    [];
                  const missing = required.filter(
                    (key) =>
                      !(key in parsedArgs) ||
                      (parsedArgs as Record<string, unknown>)[key] == null,
                  );
                  if (missing.length === 0) {
                    shouldOutputChunk = false;
                    const autoApprovalMsg: AutoApprovalMessage = {
                      type: "auto_approval",
                      tool_call: {
                        name: nextName,
                        tool_call_id: id,
                        arguments: incomingArgs || "{}",
                      },
                      reason: permission.reason || "Allowed by permission rule",
                      matched_rule: permission.matchedRule || "auto-approved",
                      session_id: sessionId,
                      uuid: `auto-approval-${id}`,
                    };
                    console.log(JSON.stringify(autoApprovalMsg));
                    autoApprovalEmitted.add(id);
                  }
                }
              }
            }
          }

          // Output chunk as message event (unless filtered)
          if (shouldOutputChunk) {
            // Use existing otid or id from the Letta SDK chunk
            const chunkWithIds = chunk as typeof chunk & {
              otid?: string;
              id?: string;
            };
            const uuid = chunkWithIds.otid || chunkWithIds.id;

            if (includePartialMessages) {
              // Emit as stream_event wrapper (like Claude Code with --include-partial-messages)
              const streamEvent: StreamEvent = {
                type: "stream_event",
                event: chunk,
                session_id: sessionId,
                uuid: uuid || crypto.randomUUID(),
              };
              console.log(JSON.stringify(streamEvent));
            } else {
              // Emit as regular message (default)
              const msg: MessageWire = {
                type: "message",
                ...chunk,
                session_id: sessionId,
                uuid: uuid || crypto.randomUUID(),
              };
              console.log(JSON.stringify(msg));
            }
          }

          // Still accumulate for approval tracking
          const { onChunk } = await import("./cli/helpers/accumulator");
          onChunk(buffers, chunk);

          // Track stop reason
          if (chunk.message_type === "stop_reason") {
            lastStopReason = chunk.stop_reason;
          }
        }

        stopReason = lastStopReason || "error";
        apiDurationMs = performance.now() - startTime;
        // Use the last run_id we saw (if any)
        lastRunId = runIds.size > 0 ? Array.from(runIds).pop() || null : null;
        if (lastRunId) lastKnownRunId = lastRunId;

        // Mark final line as finished
        const { markCurrentLineAsFinished } = await import(
          "./cli/helpers/accumulator"
        );
        markCurrentLineAsFinished(buffers);
      } else {
        // Normal mode: use drainStreamWithResume
        const result = await drainStreamWithResume(
          stream,
          buffers,
          () => {}, // No UI refresh needed in headless mode
        );
        stopReason = result.stopReason;
        approvals = result.approvals || [];
        apiDurationMs = result.apiDurationMs;
        lastRunId = result.lastRunId || null;
        if (lastRunId) lastKnownRunId = lastRunId;
      }

      // Track API duration for this stream
      sessionStats.endTurn(apiDurationMs);

      // Case 1: Turn ended normally
      if (stopReason === "end_turn") {
        break;
      }

      // Case 2: Requires approval - batch process all approvals
      if (stopReason === "requires_approval") {
        if (approvals.length === 0) {
          console.error("Unexpected empty approvals array");
          process.exit(1);
        }

        // Phase 1: Collect decisions for all approvals
        type Decision =
          | {
              type: "approve";
              approval: {
                toolCallId: string;
                toolName: string;
                toolArgs: string;
              };
            }
          | {
              type: "deny";
              approval: {
                toolCallId: string;
                toolName: string;
                toolArgs: string;
              };
              reason: string;
            };

        const decisions: Decision[] = [];

        for (const currentApproval of approvals) {
          const { toolName, toolArgs } = currentApproval;

          // Check permission using existing permission system
          const parsedArgs = safeJsonParseOr<Record<string, unknown>>(
            toolArgs,
            {},
          );
          const permission = await checkToolPermission(toolName, parsedArgs);

          // Handle deny decision
          if (permission.decision === "deny") {
            const denyReason = `Permission denied: ${permission.matchedRule || permission.reason}`;
            decisions.push({
              type: "deny",
              approval: currentApproval,
              reason: denyReason,
            });
            continue;
          }

          // Handle ask decision - in headless mode, auto-deny
          if (permission.decision === "ask") {
            decisions.push({
              type: "deny",
              approval: currentApproval,
              reason: "Tool requires approval (headless mode)",
            });
            continue;
          }

          // Permission is "allow" - verify we have required arguments before executing
          const { getToolSchema } = await import("./tools/manager");
          const schema = getToolSchema(toolName);
          const required =
            (schema?.input_schema?.required as string[] | undefined) || [];
          const missing = required.filter(
            (key) => !(key in parsedArgs) || parsedArgs[key] == null,
          );
          if (missing.length > 0) {
            // Auto-deny with a clear reason so the model can retry with arguments
            decisions.push({
              type: "deny",
              approval: currentApproval,
              reason: `Missing required parameter${missing.length > 1 ? "s" : ""}: ${missing.join(", ")}`,
            });
            continue;
          }

          // Approve this tool for execution
          decisions.push({
            type: "approve",
            approval: currentApproval,
          });
        }

        // Phase 2: Execute all approved tools and format results using shared function
        const { executeApprovalBatch } = await import(
          "./agent/approval-execution"
        );
        const executedResults = await executeApprovalBatch(decisions);

        // Send all results in one batch
        currentInput = [
          {
            type: "approval",
            approvals: executedResults as ApprovalResult[],
          },
        ];
        continue;
      }

      // Case 3: Transient LLM API error - retry with exponential backoff up to a limit
      if (stopReason === "llm_api_error") {
        if (llmApiErrorRetries < LLM_API_ERROR_MAX_RETRIES) {
          const attempt = llmApiErrorRetries + 1;
          const baseDelayMs = 1000;
          const delayMs = baseDelayMs * 2 ** (attempt - 1);

          llmApiErrorRetries = attempt;

          if (outputFormat === "stream-json") {
            const retryMsg: RetryMessage = {
              type: "retry",
              reason: "llm_api_error",
              attempt,
              max_attempts: LLM_API_ERROR_MAX_RETRIES,
              delay_ms: delayMs,
              run_id: lastRunId ?? undefined,
              session_id: sessionId,
              uuid: `retry-${lastRunId || crypto.randomUUID()}`,
            };
            console.log(JSON.stringify(retryMsg));
          } else {
            const delaySeconds = Math.round(delayMs / 1000);
            console.error(
              `LLM API error encountered (attempt ${attempt} of ${LLM_API_ERROR_MAX_RETRIES}), retrying in ${delaySeconds}s...`,
            );
          }

          // Exponential backoff before retrying the same input
          await new Promise((resolve) => setTimeout(resolve, delayMs));
          continue;
        }
      }

      // Unexpected stop reason (error, llm_api_error, etc.)
      // Before failing, check run metadata to see if this is a retriable llm_api_error
      // Fallback check: in case stop_reason is "error" but metadata indicates LLM error
      // This could happen if there's a backend edge case where LLMError is raised but
      // stop_reason isn't set correctly. The metadata.error is a LettaErrorMessage with
      // error_type="llm_error" for LLM errors (see streaming_service.py:402-411)
      if (
        stopReason === "error" &&
        lastRunId &&
        llmApiErrorRetries < LLM_API_ERROR_MAX_RETRIES
      ) {
        try {
          const run = await client.runs.retrieve(lastRunId);
          const metaError = run.metadata?.error as
            | {
                error_type?: string;
                message?: string;
                detail?: string;
                // Handle nested error structure (error.error) that can occur in some edge cases
                error?: { error_type?: string; detail?: string };
              }
            | undefined;

          // Check for llm_error at top level or nested (handles error.error nesting)
          const errorType =
            metaError?.error_type ?? metaError?.error?.error_type;

          // Fallback: detect LLM provider errors from detail even if misclassified as internal_error
          // Patterns are derived from handle_llm_error() message formats in the backend
          const detail = metaError?.detail ?? metaError?.error?.detail ?? "";
          const llmProviderPatterns = [
            "Anthropic API error", // anthropic_client.py:759
            "OpenAI API error", // openai_client.py:1034
            "Google Vertex API error", // google_vertex_client.py:848
            "overloaded", // anthropic_client.py:753 - used for LLMProviderOverloaded
            "api_error", // Anthropic SDK error type field
          ];
          const isLlmErrorFromDetail =
            errorType === "internal_error" &&
            llmProviderPatterns.some((pattern) => detail.includes(pattern));

          if (errorType === "llm_error" || isLlmErrorFromDetail) {
            const attempt = llmApiErrorRetries + 1;
            const baseDelayMs = 1000;
            const delayMs = baseDelayMs * 2 ** (attempt - 1);

            llmApiErrorRetries = attempt;

            if (outputFormat === "stream-json") {
              const retryMsg: RetryMessage = {
                type: "retry",
                reason: "llm_api_error",
                attempt,
                max_attempts: LLM_API_ERROR_MAX_RETRIES,
                delay_ms: delayMs,
                run_id: lastRunId ?? undefined,
                session_id: sessionId,
                uuid: `retry-${lastRunId || crypto.randomUUID()}`,
              };
              console.log(JSON.stringify(retryMsg));
            } else {
              const delaySeconds = Math.round(delayMs / 1000);
              console.error(
                `LLM API error encountered (attempt ${attempt} of ${LLM_API_ERROR_MAX_RETRIES}), retrying in ${delaySeconds}s...`,
              );
            }

            await new Promise((resolve) => setTimeout(resolve, delayMs));
            continue;
          }
        } catch (_e) {
          // If we can't fetch run metadata, fall through to normal error handling
        }
      }

      // Mark incomplete tool calls as cancelled to prevent stuck state
      markIncompleteToolsAsCancelled(buffers);

      // Extract error details from buffers if available
      const errorLines = toLines(buffers).filter(
        (line) => line.kind === "error",
      );
      const errorMessages = errorLines
        .map((line) => ("text" in line ? line.text : ""))
        .filter(Boolean);

      let errorMessage =
        errorMessages.length > 0
          ? errorMessages.join("; ")
          : `Unexpected stop reason: ${stopReason}`;

      // Fetch detailed error from run metadata if available (same as TUI mode)
      if (lastRunId && errorMessages.length === 0) {
        try {
          const run = await client.runs.retrieve(lastRunId);
          if (run.metadata?.error) {
            const errorData = run.metadata.error as {
              type?: string;
              message?: string;
              detail?: string;
            };
            // Construct error object that formatErrorDetails can parse
            const errorObject = {
              error: {
                error: errorData,
                run_id: lastRunId,
              },
            };
            errorMessage = formatErrorDetails(errorObject, agent.id);
          }
        } catch (_e) {
          // If we can't fetch error details, append note to error message
          errorMessage = `${errorMessage}\n(Unable to fetch additional error details from server)`;
        }
      }

      if (outputFormat === "stream-json") {
        // Emit error event
        const errorMsg: ErrorMessage = {
          type: "error",
          message: errorMessage,
          stop_reason: stopReason,
          run_id: lastRunId ?? undefined,
          session_id: sessionId,
          uuid: `error-${lastRunId || crypto.randomUUID()}`,
        };
        console.log(JSON.stringify(errorMsg));
      } else {
        console.error(`Error: ${errorMessage}`);
      }
      process.exit(1);
    }
  } catch (error) {
    // Mark incomplete tool calls as cancelled
    markIncompleteToolsAsCancelled(buffers);

    // Use comprehensive error formatting (same as TUI mode)
    const errorDetails = formatErrorDetails(error, agent.id);

    if (outputFormat === "stream-json") {
      const errorMsg: ErrorMessage = {
        type: "error",
        message: errorDetails,
        stop_reason: "error",
        run_id: lastKnownRunId ?? undefined,
        session_id: sessionId,
        uuid: `error-${lastKnownRunId || crypto.randomUUID()}`,
      };
      console.log(JSON.stringify(errorMsg));
    } else {
      console.error(`Error: ${errorDetails}`);
    }
    process.exit(1);
  }

  // Update stats with final usage data from buffers
  sessionStats.updateUsageFromBuffers(buffers);

  // Extract final result from transcript, with sensible fallbacks
  const lines = toLines(buffers);
  const reversed = [...lines].reverse();

  const lastAssistant = reversed.find(
    (line) =>
      line.kind === "assistant" &&
      "text" in line &&
      typeof line.text === "string" &&
      line.text.trim().length > 0,
  ) as Extract<Line, { kind: "assistant" }> | undefined;

  const lastReasoning = reversed.find(
    (line) =>
      line.kind === "reasoning" &&
      "text" in line &&
      typeof line.text === "string" &&
      line.text.trim().length > 0,
  ) as Extract<Line, { kind: "reasoning" }> | undefined;

  const lastToolResult = reversed.find(
    (line) =>
      line.kind === "tool_call" &&
      "resultText" in line &&
      typeof (line as Extract<Line, { kind: "tool_call" }>).resultText ===
        "string" &&
      ((line as Extract<Line, { kind: "tool_call" }>).resultText ?? "").trim()
        .length > 0,
  ) as Extract<Line, { kind: "tool_call" }> | undefined;

  const resultText =
    lastAssistant?.text ||
    lastReasoning?.text ||
    lastToolResult?.resultText ||
    "No assistant response found";

  // Output based on format
  if (outputFormat === "json") {
    const stats = sessionStats.getSnapshot();
    const output = {
      type: "result",
      subtype: "success",
      is_error: false,
      duration_ms: Math.round(stats.totalWallMs),
      duration_api_ms: Math.round(stats.totalApiMs),
      num_turns: stats.usage.stepCount,
      result: resultText,
      agent_id: agent.id,
      usage: {
        prompt_tokens: stats.usage.promptTokens,
        completion_tokens: stats.usage.completionTokens,
        total_tokens: stats.usage.totalTokens,
      },
    };
    console.log(JSON.stringify(output, null, 2));
  } else if (outputFormat === "stream-json") {
    // Output final result event
    const stats = sessionStats.getSnapshot();

    // Collect all run_ids from buffers
    const allRunIds = new Set<string>();
    for (const line of toLines(buffers)) {
      // Extract run_id from any line that might have it
      // This is a fallback in case we missed any during streaming
      if ("run_id" in line && typeof line.run_id === "string") {
        allRunIds.add(line.run_id);
      }
    }

    // Use the last run_id as the result uuid if available, otherwise derive from agent_id
    const resultUuid =
      allRunIds.size > 0
        ? `result-${Array.from(allRunIds).pop()}`
        : `result-${agent.id}`;
    const resultEvent: ResultMessage = {
      type: "result",
      subtype: "success",
      session_id: sessionId,
      duration_ms: Math.round(stats.totalWallMs),
      duration_api_ms: Math.round(stats.totalApiMs),
      num_turns: stats.usage.stepCount,
      result: resultText,
      agent_id: agent.id,
      run_ids: Array.from(allRunIds),
      usage: {
        prompt_tokens: stats.usage.promptTokens,
        completion_tokens: stats.usage.completionTokens,
        total_tokens: stats.usage.totalTokens,
      },
      uuid: resultUuid,
    };
    console.log(JSON.stringify(resultEvent));
  } else {
    // text format (default)
    if (!resultText || resultText === "No assistant response found") {
      console.error("No assistant response found");
      process.exit(1);
    }
    console.log(resultText);
  }
}

/**
 * Bidirectional mode for SDK communication.
 * Reads JSON messages from stdin, processes them, and outputs responses.
 * Stays alive until stdin closes.
 */
async function runBidirectionalMode(
  agent: AgentState,
  _client: Letta,
  _outputFormat: string,
  includePartialMessages: boolean,
): Promise<void> {
  const sessionId = agent.id;
  const readline = await import("node:readline");

  // Emit init event
  const initEvent = {
    type: "system",
    subtype: "init",
    session_id: sessionId,
    agent_id: agent.id,
    model: agent.llm_config?.model,
    tools: agent.tools?.map((t) => t.name) || [],
    cwd: process.cwd(),
    uuid: `init-${agent.id}`,
  };
  console.log(JSON.stringify(initEvent));

  // Track current operation for interrupt support
  let currentAbortController: AbortController | null = null;

  // Create readline interface for stdin
  const rl = readline.createInterface({
    input: process.stdin,
    terminal: false,
  });

  // Process lines as they arrive using async iterator
  for await (const line of rl) {
    if (!line.trim()) continue;

    let message: {
      type: string;
      message?: { role: string; content: string };
      request_id?: string;
      request?: { subtype: string };
      session_id?: string;
    };

    try {
      message = JSON.parse(line);
    } catch {
      const errorMsg: ErrorMessage = {
        type: "error",
        message: "Invalid JSON input",
        stop_reason: "error",
        session_id: sessionId,
        uuid: crypto.randomUUID(),
      };
      console.log(JSON.stringify(errorMsg));
      continue;
    }

    // Handle control requests
    if (message.type === "control_request") {
      const subtype = message.request?.subtype;
      const requestId = message.request_id;

      if (subtype === "initialize") {
        // Return session info
        const initResponse: ControlResponse = {
          type: "control_response",
          response: {
            subtype: "success",
            request_id: requestId ?? "",
            response: {
              agent_id: agent.id,
              model: agent.llm_config?.model,
              tools: agent.tools?.map((t) => t.name) || [],
            },
          },
          session_id: sessionId,
          uuid: crypto.randomUUID(),
        };
        console.log(JSON.stringify(initResponse));
      } else if (subtype === "interrupt") {
        // Abort current operation if any
        if (currentAbortController !== null) {
          (currentAbortController as AbortController).abort();
          currentAbortController = null;
        }
        const interruptResponse: ControlResponse = {
          type: "control_response",
          response: {
            subtype: "success",
            request_id: requestId ?? "",
          },
          session_id: sessionId,
          uuid: crypto.randomUUID(),
        };
        console.log(JSON.stringify(interruptResponse));
      } else {
        const errorResponse: ControlResponse = {
          type: "control_response",
          response: {
            subtype: "error",
            request_id: requestId ?? "",
            error: `Unknown control request subtype: ${subtype}`,
          },
          session_id: sessionId,
          uuid: crypto.randomUUID(),
        };
        console.log(JSON.stringify(errorResponse));
      }
      continue;
    }

    // Handle user messages
    if (message.type === "user" && message.message?.content) {
      const userContent = message.message.content;

      // Create abort controller for this operation
      currentAbortController = new AbortController();

      try {
        // Send message to agent
        const stream = await sendMessageStream(agent.id, [
          { role: "user", content: userContent },
        ]);

        const buffers = createBuffers();
        const startTime = performance.now();

        // Process stream
        for await (const chunk of stream) {
          // Check if aborted
          if (currentAbortController?.signal.aborted) {
            break;
          }

          // Output chunk
          const chunkWithIds = chunk as typeof chunk & {
            otid?: string;
            id?: string;
          };
          const uuid = chunkWithIds.otid || chunkWithIds.id;

          if (includePartialMessages) {
            const streamEvent: StreamEvent = {
              type: "stream_event",
              event: chunk,
              session_id: sessionId,
              uuid: uuid || crypto.randomUUID(),
            };
            console.log(JSON.stringify(streamEvent));
          } else {
            const msg: MessageWire = {
              type: "message",
              ...chunk,
              session_id: sessionId,
              uuid: uuid || crypto.randomUUID(),
            };
            console.log(JSON.stringify(msg));
          }

          // Accumulate for result
          const { onChunk } = await import("./cli/helpers/accumulator");
          onChunk(buffers, chunk);
        }

        // Emit result
        const durationMs = performance.now() - startTime;
        const lines = toLines(buffers);
        const reversed = [...lines].reverse();
        const lastAssistant = reversed.find(
          (line) =>
            line.kind === "assistant" &&
            "text" in line &&
            typeof line.text === "string" &&
            line.text.trim().length > 0,
        ) as Extract<Line, { kind: "assistant" }> | undefined;
        const resultText = lastAssistant?.text || "";

        const resultMsg: ResultMessage = {
          type: "result",
          subtype: currentAbortController?.signal.aborted
            ? "interrupted"
            : "success",
          session_id: sessionId,
          duration_ms: Math.round(durationMs),
          duration_api_ms: 0, // Not tracked in bidirectional mode
          num_turns: 1,
          result: resultText,
          agent_id: agent.id,
          run_ids: [],
          usage: null,
          uuid: `result-${agent.id}-${Date.now()}`,
        };
        console.log(JSON.stringify(resultMsg));
      } catch (error) {
        const errorMsg: ErrorMessage = {
          type: "error",
          message:
            error instanceof Error ? error.message : "Unknown error occurred",
          stop_reason: "error",
          session_id: sessionId,
          uuid: crypto.randomUUID(),
        };
        console.log(JSON.stringify(errorMsg));
      } finally {
        currentAbortController = null;
      }
      continue;
    }

    // Unknown message type
    const errorMsg: ErrorMessage = {
      type: "error",
      message: `Unknown message type: ${message.type}`,
      stop_reason: "error",
      session_id: sessionId,
      uuid: crypto.randomUUID(),
    };
    console.log(JSON.stringify(errorMsg));
  }

  // Stdin closed, exit gracefully
  process.exit(0);
}