ci(test): split unit vs API integration tests (#797)

2026-02-03 19:09:40 -06:00
parent 55a31adae7
commit d175b0e155
6 changed files with 292 additions and 240 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -71,9 +71,16 @@ jobs:
        uses: t1m0thyj/unlock-keyring@v1

      - name: Run tests (extended timeout)
+        # Unit tests must pass for fork PRs (no secrets). Keep API-dependent tests
+        # in a separate gated step.
+        run: bun test src/tests --timeout 15000
+
+      - name: Run integration tests (API)
+        # Only run on push to main or PRs from the same repo (not forks, to protect secrets)
+        if: ${{ github.event_name == 'push' || (github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository) }}
        env:
          LETTA_API_KEY: ${{ secrets.LETTA_API_KEY }}
-        run: bun test --timeout 15000
+        run: bun test src/integration-tests --timeout 15000

      - name: Build bundle
        run: bun run build
--- a/src/integration-tests/headless-input-format.test.ts
+++ b/src/integration-tests/headless-input-format.test.ts
--- a/src/integration-tests/headless-stream-json-format.test.ts
+++ b/src/integration-tests/headless-stream-json-format.test.ts
--- a/src/integration-tests/lazy-approval-recovery.test.ts
+++ b/src/integration-tests/lazy-approval-recovery.test.ts
--- a/src/integration-tests/startup-flow.integration.test.ts
+++ b/src/integration-tests/startup-flow.integration.test.ts
@@ -0,0 +1,281 @@
+import { describe, expect, test } from "bun:test";
+import { spawn } from "node:child_process";
+
+/**
+ * Startup flow integration tests.
+ *
+ * These spawn the real CLI and require LETTA_API_KEY to be set.
+ * They are executed in CI only for push to main / trusted PRs (non-forks).
+ */
+
+const projectRoot = process.cwd();
+
+async function runCli(
+  args: string[],
+  options: {
+    timeoutMs?: number;
+    expectExit?: number;
+  } = {},
+): Promise<{ stdout: string; stderr: string; exitCode: number | null }> {
+  const { timeoutMs = 30000, expectExit } = options;
+
+  return new Promise((resolve, reject) => {
+    const proc = spawn("bun", ["run", "dev", ...args], {
+      cwd: projectRoot,
+      // Mark as subagent to prevent polluting user's LRU settings
+      env: { ...process.env, LETTA_CODE_AGENT_ROLE: "subagent" },
+    });
+
+    let stdout = "";
+    let stderr = "";
+
+    proc.stdout?.on("data", (data) => {
+      stdout += data.toString();
+    });
+
+    proc.stderr?.on("data", (data) => {
+      stderr += data.toString();
+    });
+
+    const timeout = setTimeout(() => {
+      proc.kill();
+      reject(
+        new Error(
+          `Timeout after ${timeoutMs}ms. stdout: ${stdout}, stderr: ${stderr}`,
+        ),
+      );
+    }, timeoutMs);
+
+    proc.on("close", (code) => {
+      clearTimeout(timeout);
+      if (expectExit !== undefined && code !== expectExit) {
+        reject(
+          new Error(
+            `Expected exit code ${expectExit}, got ${code}. stdout: ${stdout}, stderr: ${stderr}`,
+          ),
+        );
+      } else {
+        resolve({ stdout, stderr, exitCode: code });
+      }
+    });
+
+    proc.on("error", (err) => {
+      clearTimeout(timeout);
+      reject(err);
+    });
+  });
+}
+
+// ============================================================================
+// Invalid Input Tests (require API calls but fail fast)
+// ============================================================================
+
+describe("Startup Flow - Invalid Inputs", () => {
+  test(
+    "--agent with nonexistent ID shows error",
+    async () => {
+      const result = await runCli(
+        ["--agent", "agent-definitely-does-not-exist-12345", "-p", "test"],
+        { expectExit: 1, timeoutMs: 60000 },
+      );
+      expect(result.stderr).toContain("not found");
+    },
+    { timeout: 70000 },
+  );
+
+  test(
+    "--conversation with nonexistent ID shows error",
+    async () => {
+      const result = await runCli(
+        [
+          "--conversation",
+          "conversation-definitely-does-not-exist-12345",
+          "-p",
+          "test",
+        ],
+        { expectExit: 1, timeoutMs: 60000 },
+      );
+      expect(result.stderr).toContain("not found");
+    },
+    { timeout: 70000 },
+  );
+
+  test("--from-af with nonexistent file shows error", async () => {
+    const result = await runCli(
+      ["--from-af", "/nonexistent/path/agent.af", "-p", "test"],
+      { expectExit: 1 },
+    );
+    expect(result.stderr).toContain("not found");
+  });
+});
+
+// ============================================================================
+// Integration Tests (require API access, create real agents)
+// ============================================================================
+
+describe("Startup Flow - Integration", () => {
+  let testAgentId: string | null = null;
+
+  test(
+    "--new-agent creates agent and responds",
+    async () => {
+      const result = await runCli(
+        [
+          "--new-agent",
+          "-m",
+          "haiku",
+          "-p",
+          "Say OK and nothing else",
+          "--output-format",
+          "json",
+        ],
+        { timeoutMs: 120000 },
+      );
+
+      expect(result.exitCode).toBe(0);
+      // stdout includes the bun invocation line, extract just the JSON
+      const jsonStart = result.stdout.indexOf("{");
+      const output = JSON.parse(result.stdout.slice(jsonStart));
+      expect(output.agent_id).toBeDefined();
+      expect(output.result).toBeDefined();
+
+      testAgentId = output.agent_id;
+    },
+    { timeout: 130000 },
+  );
+
+  test(
+    "--agent with valid ID uses that agent",
+    async () => {
+      if (!testAgentId) {
+        console.log("Skipping: no test agent available");
+        return;
+      }
+
+      const result = await runCli(
+        [
+          "--agent",
+          testAgentId,
+          "-m",
+          "haiku",
+          "-p",
+          "Say OK",
+          "--output-format",
+          "json",
+        ],
+        { timeoutMs: 120000 },
+      );
+
+      expect(result.exitCode).toBe(0);
+      const jsonStart = result.stdout.indexOf("{");
+      const output = JSON.parse(result.stdout.slice(jsonStart));
+      expect(output.agent_id).toBe(testAgentId);
+    },
+    { timeout: 130000 },
+  );
+
+  test(
+    "--conversation with valid ID derives agent and uses conversation",
+    async () => {
+      if (!testAgentId) {
+        console.log("Skipping: no test agent available");
+        return;
+      }
+
+      // First, create a real conversation with --new (since --new-agent uses "default")
+      const createResult = await runCli(
+        [
+          "--agent",
+          testAgentId,
+          "--new",
+          "-m",
+          "haiku",
+          "-p",
+          "Say CREATED",
+          "--output-format",
+          "json",
+        ],
+        { timeoutMs: 120000 },
+      );
+      expect(createResult.exitCode).toBe(0);
+      const createJsonStart = createResult.stdout.indexOf("{");
+      const createOutput = JSON.parse(
+        createResult.stdout.slice(createJsonStart),
+      );
+      const realConversationId = createOutput.conversation_id;
+      expect(realConversationId).toBeDefined();
+      expect(realConversationId).not.toBe("default");
+
+      const result = await runCli(
+        [
+          "--conversation",
+          realConversationId,
+          "-m",
+          "haiku",
+          "-p",
+          "Say OK",
+          "--output-format",
+          "json",
+        ],
+        { timeoutMs: 120000 },
+      );
+
+      expect(result.exitCode).toBe(0);
+      const jsonStart = result.stdout.indexOf("{");
+      const output = JSON.parse(result.stdout.slice(jsonStart));
+      expect(output.agent_id).toBe(testAgentId);
+      expect(output.conversation_id).toBe(realConversationId);
+    },
+    { timeout: 180000 },
+  );
+
+  test(
+    "--new-agent with --init-blocks none creates minimal agent",
+    async () => {
+      const result = await runCli(
+        [
+          "--new-agent",
+          "--init-blocks",
+          "none",
+          "-m",
+          "haiku",
+          "-p",
+          "Say OK",
+          "--output-format",
+          "json",
+        ],
+        { timeoutMs: 120000 },
+      );
+
+      expect(result.exitCode).toBe(0);
+      const jsonStart = result.stdout.indexOf("{");
+      const output = JSON.parse(result.stdout.slice(jsonStart));
+      expect(output.agent_id).toBeDefined();
+    },
+    { timeout: 130000 },
+  );
+});
+
+// ============================================================================
+// --continue Tests (depend on LRU state, harder to isolate)
+// ============================================================================
+
+describe("Startup Flow - Continue Flag", () => {
+  test(
+    "--continue with no LRU shows error",
+    async () => {
+      const result = await runCli(
+        ["--continue", "-p", "Say OK", "--output-format", "json"],
+        {
+          timeoutMs: 60000,
+        },
+      );
+
+      // Either succeeds (LRU exists) or fails with specific error
+      if (result.exitCode !== 0) {
+        expect(result.stderr).toContain("No recent session found");
+      }
+    },
+    { timeout: 70000 },
+  );
+});
--- a/src/tests/startup-flow.test.ts
+++ b/src/tests/startup-flow.test.ts
@@ -2,22 +2,14 @@ import { describe, expect, test } from "bun:test";
 import { spawn } from "node:child_process";

 /**
- * Integration tests for CLI startup flows.
+ * Startup flow tests that validate flag conflict handling.
 *
- * These tests verify the boot flow decision tree:
- * - Flag conflict detection
- * - --conversation: derives agent from conversation
- * - --agent: uses specified agent
- * - --new-agent: creates new agent
- * - Error messages for invalid inputs
- *
- * Note: Tests that depend on settings files (.letta/) are harder to isolate
- * because the CLI uses process.cwd(). For now, we focus on flag-based tests.
+ * These must remain runnable in fork PR CI (no secrets), so they should not
+ * require a working Letta server or LETTA_API_KEY.
 */

 const projectRoot = process.cwd();

-// Helper to run CLI and capture output
 async function runCli(
  args: string[],
  options: {
@@ -74,10 +66,6 @@ async function runCli(
  });
 }

-// ============================================================================
-// Flag Conflict Tests (fast, no API calls needed)
-// ============================================================================
-
 describe("Startup Flow - Flag Conflicts", () => {
  test("--conversation conflicts with --agent", async () => {
    const result = await runCli(
@@ -136,227 +124,3 @@ describe("Startup Flow - Flag Conflicts", () => {
    );
  });
 });
-
-// ============================================================================
-// Invalid Input Tests (require API calls but fail fast)
-// ============================================================================
-
-describe("Startup Flow - Invalid Inputs", () => {
-  test(
-    "--agent with nonexistent ID shows error",
-    async () => {
-      const result = await runCli(
-        ["--agent", "agent-definitely-does-not-exist-12345", "-p", "test"],
-        { expectExit: 1, timeoutMs: 60000 },
-      );
-      expect(result.stderr).toContain("not found");
-    },
-    { timeout: 70000 },
-  );
-
-  test(
-    "--conversation with nonexistent ID shows error",
-    async () => {
-      const result = await runCli(
-        [
-          "--conversation",
-          "conversation-definitely-does-not-exist-12345",
-          "-p",
-          "test",
-        ],
-        { expectExit: 1, timeoutMs: 60000 },
-      );
-      expect(result.stderr).toContain("not found");
-    },
-    { timeout: 70000 },
-  );
-
-  test("--from-af with nonexistent file shows error", async () => {
-    const result = await runCli(
-      ["--from-af", "/nonexistent/path/agent.af", "-p", "test"],
-      { expectExit: 1 },
-    );
-    expect(result.stderr).toContain("not found");
-  });
-});
-
-// ============================================================================
-// Integration Tests (require API access, create real agents)
-// ============================================================================
-
-describe("Startup Flow - Integration", () => {
-  // Store created agent/conversation IDs for cleanup and reuse
-  let testAgentId: string | null = null;
-
-  test(
-    "--new-agent creates agent and responds",
-    async () => {
-      const result = await runCli(
-        [
-          "--new-agent",
-          "-m",
-          "haiku",
-          "-p",
-          "Say OK and nothing else",
-          "--output-format",
-          "json",
-        ],
-        { timeoutMs: 120000 },
-      );
-
-      expect(result.exitCode).toBe(0);
-      // stdout includes the bun invocation line, extract just the JSON
-      const jsonStart = result.stdout.indexOf("{");
-      const output = JSON.parse(result.stdout.slice(jsonStart));
-      expect(output.agent_id).toBeDefined();
-      expect(output.result).toBeDefined();
-
-      // Save for later tests
-      testAgentId = output.agent_id;
-    },
-    { timeout: 130000 },
-  );
-
-  test(
-    "--agent with valid ID uses that agent",
-    async () => {
-      // Skip if previous test didn't create an agent
-      if (!testAgentId) {
-        console.log("Skipping: no test agent available");
-        return;
-      }
-
-      const result = await runCli(
-        [
-          "--agent",
-          testAgentId,
-          "-m",
-          "haiku",
-          "-p",
-          "Say OK",
-          "--output-format",
-          "json",
-        ],
-        { timeoutMs: 120000 },
-      );
-
-      expect(result.exitCode).toBe(0);
-      const jsonStart = result.stdout.indexOf("{");
-      const output = JSON.parse(result.stdout.slice(jsonStart));
-      expect(output.agent_id).toBe(testAgentId);
-    },
-    { timeout: 130000 },
-  );
-
-  test(
-    "--conversation with valid ID derives agent and uses conversation",
-    async () => {
-      // Skip if previous test didn't create an agent
-      if (!testAgentId) {
-        console.log("Skipping: no test agent available");
-        return;
-      }
-
-      // First, create a real conversation with --new (since --new-agent uses "default")
-      const createResult = await runCli(
-        [
-          "--agent",
-          testAgentId,
-          "--new",
-          "-m",
-          "haiku",
-          "-p",
-          "Say CREATED",
-          "--output-format",
-          "json",
-        ],
-        { timeoutMs: 120000 },
-      );
-      expect(createResult.exitCode).toBe(0);
-      const createJsonStart = createResult.stdout.indexOf("{");
-      const createOutput = JSON.parse(
-        createResult.stdout.slice(createJsonStart),
-      );
-      const realConversationId = createOutput.conversation_id;
-      expect(realConversationId).toBeDefined();
-      expect(realConversationId).not.toBe("default");
-
-      // Now test that --conversation can derive the agent from this conversation
-      const result = await runCli(
-        [
-          "--conversation",
-          realConversationId,
-          "-m",
-          "haiku",
-          "-p",
-          "Say OK",
-          "--output-format",
-          "json",
-        ],
-        { timeoutMs: 120000 },
-      );
-
-      expect(result.exitCode).toBe(0);
-      const jsonStart = result.stdout.indexOf("{");
-      const output = JSON.parse(result.stdout.slice(jsonStart));
-      // Should use the same agent that owns the conversation
-      expect(output.agent_id).toBe(testAgentId);
-      // Should use the specified conversation
-      expect(output.conversation_id).toBe(realConversationId);
-    },
-    { timeout: 180000 },
-  );
-
-  test(
-    "--new-agent with --init-blocks none creates minimal agent",
-    async () => {
-      const result = await runCli(
-        [
-          "--new-agent",
-          "--init-blocks",
-          "none",
-          "-m",
-          "haiku",
-          "-p",
-          "Say OK",
-          "--output-format",
-          "json",
-        ],
-        { timeoutMs: 120000 },
-      );
-
-      expect(result.exitCode).toBe(0);
-      // stdout includes the bun invocation line, extract just the JSON
-      const jsonStart = result.stdout.indexOf("{");
-      const output = JSON.parse(result.stdout.slice(jsonStart));
-      expect(output.agent_id).toBeDefined();
-    },
-    { timeout: 130000 },
-  );
-});
-
-// ============================================================================
-// --continue Tests (depend on LRU state, harder to isolate)
-// ============================================================================
-
-describe("Startup Flow - Continue Flag", () => {
-  test(
-    "--continue with no LRU shows error",
-    async () => {
-      // This test relies on running in a directory with no .letta/ settings
-      // In practice, this might use the project's .letta/ which has an LRU
-      // So we check for either success (if LRU exists) or error (if not)
-      const result = await runCli(
-        ["--continue", "-p", "Say OK", "--output-format", "json"],
-        { timeoutMs: 60000 },
-      );
-
-      // Either succeeds (LRU exists) or fails with specific error
-      if (result.exitCode !== 0) {
-        expect(result.stderr).toContain("No recent session found");
-      }
-      // If it succeeds, that's also valid (test env has LRU)
-    },
-    { timeout: 70000 },
-  );
-});