fix: improve error handling in headless bidirectional mode (#822)

Co-authored-by: Letta <noreply@letta.com>
2026-02-04 13:40:01 -08:00
parent 2792df7c84
commit 372c7e269b
2 changed files with 259 additions and 7 deletions
--- a/src/headless.ts
+++ b/src/headless.ts
@@ -1989,6 +1989,8 @@ async function runBidirectionalMode(
        const buffers = createBuffers(agent.id);
        const startTime = performance.now();
        let numTurns = 0;
+        let lastStopReason: StopReasonType | null = null; // Track for result subtype
+        let sawStreamError = false; // Track if we emitted an error during streaming

        // Initial input is the user message
        let currentInput: MessageCreate[] = [
@@ -2008,7 +2010,36 @@ async function runBidirectionalMode(
          const stream = await sendMessageStream(conversationId, currentInput, {
            agentId: agent.id,
          });
-          const streamJsonHook: DrainStreamHook = ({ chunk, shouldOutput }) => {
+          const streamJsonHook: DrainStreamHook = ({
+            chunk,
+            shouldOutput,
+            errorInfo,
+          }) => {
+            // Handle in-stream errors (emit ErrorMessage with full details)
+            if (errorInfo && shouldOutput) {
+              sawStreamError = true; // Track that we saw an error (affects result subtype)
+              const errorEvent: ErrorMessage = {
+                type: "error",
+                message: errorInfo.message,
+                stop_reason: "error",
+                run_id: errorInfo.run_id,
+                session_id: sessionId,
+                uuid: crypto.randomUUID(),
+                ...(errorInfo.error_type &&
+                  errorInfo.run_id && {
+                    api_error: {
+                      message_type: "error_message",
+                      message: errorInfo.message,
+                      error_type: errorInfo.error_type,
+                      detail: errorInfo.detail,
+                      run_id: errorInfo.run_id,
+                    },
+                  }),
+              };
+              console.log(JSON.stringify(errorEvent));
+              return { shouldAccumulate: true };
+            }
+
            if (!shouldOutput) {
              return { shouldAccumulate: true };
            }
@@ -2049,6 +2080,7 @@ async function runBidirectionalMode(
            streamJsonHook,
          );
          const stopReason = result.stopReason;
+          lastStopReason = stopReason; // Track for result subtype
          const approvals = result.approvals || [];

          // Case 1: Turn ended normally - break out of loop
@@ -2067,7 +2099,9 @@ async function runBidirectionalMode(
          // Case 3: Requires approval - process approvals and continue
          if (stopReason === "requires_approval") {
            if (approvals.length === 0) {
-              // No approvals to process - break
+              // Anomalous state: requires_approval but no approvals
+              // Treat as error rather than false-positive success
+              lastStopReason = "error";
              break;
            }

@@ -2245,11 +2279,23 @@ async function runBidirectionalMode(
          lastToolResult?.resultText ||
          "";

+        // Determine result subtype based on how the turn ended
+        const isAborted = currentAbortController?.signal.aborted;
+        // isError if: (1) stop reason indicates error, OR (2) we emitted an error during streaming
+        const isError =
+          sawStreamError ||
+          (lastStopReason &&
+            lastStopReason !== "end_turn" &&
+            lastStopReason !== "requires_approval");
+        const subtype: ResultMessage["subtype"] = isAborted
+          ? "interrupted"
+          : isError
+            ? "error"
+            : "success";
+
        const resultMsg: ResultMessage = {
          type: "result",
-          subtype: currentAbortController?.signal.aborted
-            ? "interrupted"
-            : "success",
+          subtype,
          session_id: sessionId,
          duration_ms: Math.round(durationMs),
          duration_api_ms: 0, // Not tracked in bidirectional mode
@@ -2260,18 +2306,44 @@ async function runBidirectionalMode(
          run_ids: [],
          usage: null,
          uuid: `result-${agent.id}-${Date.now()}`,
+          // Include stop_reason only when subtype is "error" (not "interrupted")
+          ...(subtype === "error" && {
+            stop_reason:
+              lastStopReason && lastStopReason !== "end_turn"
+                ? lastStopReason
+                : "error", // Use "error" if sawStreamError but lastStopReason was end_turn
+          }),
        };
        console.log(JSON.stringify(resultMsg));
      } catch (error) {
+        // Use formatErrorDetails for comprehensive error formatting (same as one-shot mode)
+        const errorDetails = formatErrorDetails(error, agent.id);
        const errorMsg: ErrorMessage = {
          type: "error",
-          message:
-            error instanceof Error ? error.message : "Unknown error occurred",
+          message: errorDetails,
          stop_reason: "error",
          session_id: sessionId,
          uuid: crypto.randomUUID(),
        };
        console.log(JSON.stringify(errorMsg));
+
+        // Also emit a result message with subtype: "error" so SDK knows the turn failed
+        const errorResultMsg: ResultMessage = {
+          type: "result",
+          subtype: "error",
+          session_id: sessionId,
+          duration_ms: 0,
+          duration_api_ms: 0,
+          num_turns: 0,
+          result: null,
+          agent_id: agent.id,
+          conversation_id: conversationId,
+          run_ids: [],
+          usage: null,
+          uuid: `result-error-${agent.id}-${Date.now()}`,
+          stop_reason: "error",
+        };
+        console.log(JSON.stringify(errorResultMsg));
      } finally {
        currentAbortController = null;
      }
--- a/src/integration-tests/headless-error-format.test.ts
+++ b/src/integration-tests/headless-error-format.test.ts
@@ -0,0 +1,180 @@
+import { describe, expect, test } from "bun:test";
+import type {
+  ErrorMessage,
+  ResultMessage,
+  ResultSubtype,
+} from "../types/protocol";
+
+/**
+ * Tests for error handling in headless mode.
+ *
+ * These tests document and verify the expected wire format for errors.
+ * See GitHub issue #813 for background.
+ *
+ * Expected behavior:
+ * 1. When an error occurs, ResultMessage.subtype should be "error" (not "success")
+ * 2. ErrorMessage should contain detailed API error info when available
+ * 3. Both one-shot and bidirectional modes should surface errors properly
+ */
+
+describe("headless error format types", () => {
+  test("ResultSubtype includes 'error' option", () => {
+    // This is a compile-time check - if ResultSubtype doesn't include "error",
+    // this would fail to compile.
+    const errorSubtype: ResultSubtype = "error";
+    expect(errorSubtype).toBe("error");
+
+    const successSubtype: ResultSubtype = "success";
+    expect(successSubtype).toBe("success");
+
+    const interruptedSubtype: ResultSubtype = "interrupted";
+    expect(interruptedSubtype).toBe("interrupted");
+  });
+
+  test("ResultMessage type supports stop_reason field", () => {
+    // Verify the ResultMessage type accepts stop_reason for error cases
+    const errorResult: ResultMessage = {
+      type: "result",
+      subtype: "error",
+      session_id: "test-session",
+      uuid: "test-uuid",
+      agent_id: "agent-123",
+      conversation_id: "conv-123",
+      duration_ms: 1000,
+      duration_api_ms: 500,
+      num_turns: 1,
+      result: null,
+      run_ids: ["run-123"],
+      usage: null,
+      stop_reason: "error", // This field should be present for errors
+    };
+
+    expect(errorResult.subtype).toBe("error");
+    expect(errorResult.stop_reason).toBe("error");
+  });
+
+  test("ErrorMessage type supports api_error field", () => {
+    // Verify ErrorMessage can include nested API error details
+    const errorMsg: ErrorMessage = {
+      type: "error",
+      message: "CONFLICT: Another request is being processed",
+      stop_reason: "error",
+      session_id: "test-session",
+      uuid: "test-uuid",
+      run_id: "run-123",
+      api_error: {
+        message_type: "error_message",
+        message: "CONFLICT: Another request is being processed",
+        error_type: "internal_error",
+        detail:
+          "Cannot send a new message: Another request is currently being processed for this conversation.",
+        run_id: "run-123",
+      },
+    };
+
+    expect(errorMsg.type).toBe("error");
+    expect(errorMsg.api_error).toBeDefined();
+    expect(errorMsg.api_error?.detail).toContain("Another request");
+  });
+});
+
+describe("headless error format expectations", () => {
+  /**
+   * These tests document the EXPECTED behavior for error handling.
+   * They verify the wire format contracts that the SDK depends on.
+   */
+
+  test("error result should have subtype 'error', not 'success'", () => {
+    // When an error occurs (stop_reason !== "end_turn"), the result
+    // should indicate failure, not success.
+    //
+    // Bug (issue #813): Bidirectional mode was returning subtype: "success"
+    // even when stop_reason was "error".
+    //
+    // Expected: subtype should be "error" so SDK can detect failure
+
+    // This is a contract test - verifying the expected structure
+    const mockErrorResult: ResultMessage = {
+      type: "result",
+      subtype: "error", // NOT "success"
+      session_id: "test",
+      uuid: "test",
+      agent_id: "agent-123",
+      conversation_id: "conv-123",
+      duration_ms: 1000,
+      duration_api_ms: 500,
+      num_turns: 1,
+      result: null,
+      run_ids: [],
+      usage: null,
+      stop_reason: "error",
+    };
+
+    // SDK transforms this to { success: false } based on subtype
+    const sdkSuccess = mockErrorResult.subtype === "success";
+    expect(sdkSuccess).toBe(false);
+  });
+
+  test("409 conflict error should include detail in message", () => {
+    // When API returns 409 with a detail field, that detail should be
+    // surfaced in the error message, not lost.
+    //
+    // Bug (issue #813): The detail was being lost, making debugging hard.
+    //
+    // Expected: ErrorMessage.message or api_error.detail contains the info
+
+    // Example 409 error detail
+    const conflictDetail =
+      "CONFLICT: Cannot send a new message: Another request is currently being processed for this conversation.";
+
+    // The error message should include this detail
+    const mockError: ErrorMessage = {
+      type: "error",
+      message: conflictDetail, // Detail should be in message
+      stop_reason: "error",
+      session_id: "test",
+      uuid: "test",
+      run_id: "run-123",
+    };
+
+    expect(mockError.message).toContain("CONFLICT");
+    expect(mockError.message).toContain("Another request");
+  });
+
+  test("approval pending error should include detail", () => {
+    // When conversation has a stuck approval, the error should explain this.
+    //
+    // Example error: "The agent is waiting for approval on a tool call.
+    // Please approve or deny the pending request before continuing."
+
+    const approvalDetail =
+      "CONFLICT: Cannot send a new message: The agent is waiting for approval on a tool call.";
+
+    const mockError: ErrorMessage = {
+      type: "error",
+      message: approvalDetail,
+      stop_reason: "error",
+      session_id: "test",
+      uuid: "test",
+    };
+
+    expect(mockError.message).toContain("waiting for approval");
+  });
+});
+
+/**
+ * Note for SDK team:
+ *
+ * The SDK (letta-code-sdk) transforms ResultMessage as follows:
+ *
+ *   success: msg.subtype === "success"
+ *   error: msg.subtype !== "success" ? msg.subtype : undefined
+ *
+ * With this fix:
+ * - Error results will have subtype: "error", so success will be false
+ * - The error field will be "error" (the subtype string)
+ *
+ * For more detailed error info, SDK could be updated to:
+ * 1. Parse ErrorMessage events (currently ignored)
+ * 2. Use stop_reason from ResultMessage for specific error types
+ */