fix: Patch headless mode GPT-5 (#88)

Co-authored-by: cpacker <packercharles@gmail.com>
2025-11-16 20:48:50 -08:00
parent 8df4c08510
commit 754db3b273
8 changed files with 75 additions and 46 deletions
--- a/src/agent/approval-execution.ts
+++ b/src/agent/approval-execution.ts
@@ -1,6 +1,11 @@
 // src/agent/approval-execution.ts
 // Shared logic for executing approval batches (used by both interactive and headless modes)

+import type {
+  ApprovalCreate,
+  ToolReturn,
+} from "@letta-ai/letta-client/resources/agents/messages";
+import type { ToolReturnMessage } from "@letta-ai/letta-client/resources/tools";
 import type { ApprovalRequest } from "../cli/helpers/stream";
 import { executeTool } from "../tools/manager";

@@ -8,16 +13,8 @@ export type ApprovalDecision =
  | { type: "approve"; approval: ApprovalRequest }
  | { type: "deny"; approval: ApprovalRequest; reason: string };

-export type ApprovalResult = {
-  type: "tool" | "approval";
-  tool_call_id: string;
-  tool_return?: string;
-  status?: "success" | "error";
-  stdout?: string[];
-  stderr?: string[];
-  approve?: boolean;
-  reason?: string;
-};
+// Align result type with the SDK's expected union for approvals payloads
+export type ApprovalResult = ToolReturn | ApprovalCreate.ApprovalReturn;

 /**
 * Execute a batch of approval decisions and format results for the backend.
@@ -35,7 +32,7 @@ export type ApprovalResult = {
 */
 export async function executeApprovalBatch(
  decisions: ApprovalDecision[],
-  onChunk?: (chunk: any) => void,
+  onChunk?: (chunk: ToolReturnMessage) => void,
 ): Promise<ApprovalResult[]> {
  const results: ApprovalResult[] = [];

--- a/src/agent/check-approval.ts
+++ b/src/agent/check-approval.ts
@@ -121,13 +121,20 @@ export async function getResumeData(
          : [];

      // Extract ALL tool calls for parallel approval support
-      pendingApprovals = toolCalls
-        .filter((tc) => tc?.tool_call_id && tc.name && tc.arguments)
-        .map((tc) => ({
-          toolCallId: tc.tool_call_id!,
-          toolName: tc.name!,
-          toolArgs: tc.arguments!,
-        }));
+      type ValidToolCall = {
+        tool_call_id: string;
+        name: string;
+        arguments: string;
+      };
+      const validToolCalls = toolCalls.filter(
+        (tc): tc is ValidToolCall =>
+          !!tc && !!tc.tool_call_id && !!tc.name && !!tc.arguments,
+      );
+      pendingApprovals = validToolCalls.map((tc) => ({
+        toolCallId: tc.tool_call_id,
+        toolName: tc.name,
+        toolArgs: tc.arguments,
+      }));

      // Set legacy singular field for backward compatibility (first approval only)
      if (pendingApprovals.length > 0) {
--- a/src/agent/create.ts
+++ b/src/agent/create.ts
@@ -224,7 +224,9 @@ export async function createAgent(
  // Apply updateArgs if provided (e.g., reasoningEffort, verbosity, etc.)
  // Skip if updateArgs only contains context_window (already set in create)
  if (updateArgs && Object.keys(updateArgs).length > 0) {
-    const { context_window, ...otherArgs } = updateArgs;
+    // Remove context_window if present; already set during create
+    const otherArgs = { ...updateArgs } as Record<string, unknown>;
+    delete (otherArgs as Record<string, unknown>).context_window;
    if (Object.keys(otherArgs).length > 0) {
      await updateAgentLLMConfig(
        agent.id,
--- a/src/agent/modify.ts
+++ b/src/agent/modify.ts
@@ -19,7 +19,7 @@ import { getClient } from "./client";
 */
 export async function updateAgentLLMConfig(
  agentId: string,
-  modelHandle: string,
+  _modelHandle: string,
  updateArgs?: Record<string, unknown>,
  preserveParallelToolCalls?: boolean,
 ): Promise<LlmConfig> {
--- a/src/auth/setup-ui.tsx
+++ b/src/auth/setup-ui.tsx
@@ -2,8 +2,8 @@
 * Ink UI components for OAuth setup flow
 */

+import { hostname } from "node:os";
 import { Box, Text, useApp, useInput } from "ink";
-import { hostname } from "os";
 import { useState } from "react";
 import { asciiLogo } from "../cli/components/AsciiArt.ts";
 import { settingsManager } from "../settings-manager";
--- a/src/cli/App.tsx
+++ b/src/cli/App.tsx
@@ -12,6 +12,7 @@ import type {
 import type { LlmConfig } from "@letta-ai/letta-client/resources/models/models";
 import { Box, Static } from "ink";
 import { useCallback, useEffect, useMemo, useRef, useState } from "react";
+import type { ApprovalResult } from "../agent/approval-execution";
 import { getResumeData } from "../agent/check-approval";
 import { getClient } from "../agent/client";
 import { sendMessageStream } from "../agent/message";
@@ -19,6 +20,7 @@ import { linkToolsToAgent, unlinkToolsFromAgent } from "../agent/modify";
 import { SessionStats } from "../agent/stats";
 import type { ApprovalContext } from "../permissions/analyzer";
 import { permissionMode } from "../permissions/mode";
+import type { ToolExecutionResult } from "../tools/manager";
 import {
  analyzeToolApproval,
  checkToolPermission,
@@ -148,13 +150,13 @@ export default function App({
      | { type: "deny"; approval: ApprovalRequest; reason: string }
    >
  >([]);
-  const [isExecutingTool, setIsExecutingTool] = useState(false);
+  const [isExecutingTool, _setIsExecutingTool] = useState(false);

  // Track auto-handled results to combine with user decisions
  const [autoHandledResults, setAutoHandledResults] = useState<
    Array<{
      toolCallId: string;
-      result: any;
+      result: ToolExecutionResult;
    }>
  >([]);
  const [autoDeniedApprovals, setAutoDeniedApprovals] = useState<
@@ -1291,7 +1293,7 @@ export default function App({
      await processConversation([
        {
          type: "approval",
-          approvals: allResults as any, // Type assertion: union type with optional fields is compatible at runtime
+          approvals: allResults as ApprovalResult[],
        },
      ]);
    },
@@ -1759,12 +1761,20 @@ export default function App({
                <ApprovalDialog
                  approvals={
                    pendingApprovals[approvalResults.length]
-                      ? [pendingApprovals[approvalResults.length]!]
+                      ? ([
+                          pendingApprovals[
+                            approvalResults.length
+                          ] as ApprovalRequest,
+                        ] as ApprovalRequest[])
                      : []
                  }
                  approvalContexts={
                    approvalContexts[approvalResults.length]
-                      ? [approvalContexts[approvalResults.length]!]
+                      ? ([
+                          approvalContexts[
+                            approvalResults.length
+                          ] as ApprovalContext,
+                        ] as ApprovalContext[])
                      : []
                  }
                  progress={{
--- a/src/headless.ts
+++ b/src/headless.ts
@@ -6,6 +6,7 @@ import type {
 } from "@letta-ai/letta-client/resources/agents/agents";
 import type { ApprovalCreate } from "@letta-ai/letta-client/resources/agents/messages";
 import type { StopReasonType } from "@letta-ai/letta-client/resources/runs/runs";
+import type { ApprovalResult } from "./agent/approval-execution";
 import { getClient } from "./agent/client";
 import { createAgent } from "./agent/create";
 import { sendMessageStream } from "./agent/message";
@@ -15,7 +16,7 @@ import { createBuffers, toLines } from "./cli/helpers/accumulator";
 import { safeJsonParseOr } from "./cli/helpers/safeJsonParse";
 import { drainStreamWithResume } from "./cli/helpers/stream";
 import { settingsManager } from "./settings-manager";
-import { checkToolPermission, executeTool } from "./tools/manager";
+import { checkToolPermission } from "./tools/manager";

 export async function handleHeadlessCommand(
  argv: string[],
@@ -213,7 +214,7 @@ export async function handleHeadlessCommand(
      const decisions: Decision[] = [];

      for (const currentApproval of pendingApprovals) {
-        const { toolCallId, toolName, toolArgs } = currentApproval;
+        const { toolName, toolArgs } = currentApproval;
        const parsedArgs = safeJsonParseOr<Record<string, unknown>>(
          toolArgs || "{}",
          {},
@@ -239,8 +240,7 @@ export async function handleHeadlessCommand(
        const required =
          (schema?.input_schema?.required as string[] | undefined) || [];
        const missing = required.filter(
-          (key) =>
-            !(key in parsedArgs) || String(parsedArgs[key] ?? "").length === 0,
+          (key) => !(key in parsedArgs) || parsedArgs[key] == null,
        );
        if (missing.length > 0) {
          decisions.push({
@@ -283,7 +283,7 @@ export async function handleHeadlessCommand(
      // Send all results in one batch
      const approvalInput: ApprovalCreate = {
        type: "approval",
-        approvals: executedResults as any,
+        approvals: executedResults as ApprovalResult[],
      };

      // Send the approval to clear the pending state; drain the stream without output
@@ -428,8 +428,19 @@ export async function handleHeadlessCommand(
                : [];

            for (const toolCall of toolCalls) {
-              const id = toolCall?.tool_call_id;
-              if (!id) continue; // remain strict: do not invent ids
+              // Many backends stream tool_call chunks where only the first frame
+              // carries the tool_call_id; subsequent argument deltas omit it.
+              // Fall back to the last seen id within this turn so we can
+              // properly accumulate args.
+              let id: string | null = toolCall?.tool_call_id ?? _lastApprovalId;
+              if (!id) {
+                // As an additional guard, if exactly one approval is being
+                // tracked already, use that id for continued argument deltas.
+                if (approvalRequests.size === 1) {
+                  id = Array.from(approvalRequests.keys())[0] ?? null;
+                }
+              }
+              if (!id) continue; // cannot safely attribute this chunk

              _lastApprovalId = id;

@@ -437,9 +448,7 @@ export async function handleHeadlessCommand(
              const prev = approvalRequests.get(id);
              const base = prev?.args ?? "";
              const incomingArgs =
-                toolCall?.arguments && toolCall.arguments.trim().length > 0
-                  ? base + toolCall.arguments
-                  : base;
+                toolCall?.arguments != null ? base + toolCall.arguments : base;

              // Preserve previously seen name; set if provided in this chunk
              const nextName = toolCall?.name || prev?.toolName || "";
@@ -484,8 +493,7 @@ export async function handleHeadlessCommand(
                  const missing = required.filter(
                    (key) =>
                      !(key in parsedArgs) ||
-                      String((parsedArgs as Record<string, unknown>)[key] ?? "")
-                        .length === 0,
+                      (parsedArgs as Record<string, unknown>)[key] == null,
                  );
                  if (missing.length === 0) {
                    shouldOutputChunk = false;
@@ -586,7 +594,7 @@ export async function handleHeadlessCommand(
        const decisions: Decision[] = [];

        for (const currentApproval of approvals) {
-          const { toolCallId, toolName, toolArgs } = currentApproval;
+          const { toolName, toolArgs } = currentApproval;

          // Check permission using existing permission system
          const parsedArgs = safeJsonParseOr<Record<string, unknown>>(
@@ -622,9 +630,7 @@ export async function handleHeadlessCommand(
          const required =
            (schema?.input_schema?.required as string[] | undefined) || [];
          const missing = required.filter(
-            (key) =>
-              !(key in parsedArgs) ||
-              String(parsedArgs[key] ?? "").length === 0,
+            (key) => !(key in parsedArgs) || parsedArgs[key] == null,
          );
          if (missing.length > 0) {
            // Auto-deny with a clear reason so the model can retry with arguments
@@ -653,7 +659,7 @@ export async function handleHeadlessCommand(
        currentInput = [
          {
            type: "approval",
-            approvals: executedResults as any,
+            approvals: executedResults as ApprovalResult[],
          },
        ];
        continue;
--- a/src/tests/headless-scenario.ts
+++ b/src/tests/headless-scenario.ts
@@ -16,12 +16,19 @@ type Args = {
 };

 function parseArgs(argv: string[]): Args {
-  const args: any = { output: "text", parallel: "on" };
+  const args: {
+    model?: string;
+    output: Args["output"];
+    parallel: Args["parallel"];
+  } = {
+    output: "text",
+    parallel: "on",
+  };
  for (let i = 0; i < argv.length; i++) {
    const v = argv[i];
    if (v === "--model") args.model = argv[++i];
-    else if (v === "--output") args.output = argv[++i];
-    else if (v === "--parallel") args.parallel = argv[++i];
+    else if (v === "--output") args.output = argv[++i] as Args["output"];
+    else if (v === "--parallel") args.parallel = argv[++i] as Args["parallel"];
  }
  if (!args.model) throw new Error("Missing --model");
  if (!["text", "json", "stream-json"].includes(args.output))