diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 800638b..5053f7e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -71,9 +71,16 @@ jobs: uses: t1m0thyj/unlock-keyring@v1 - name: Run tests (extended timeout) + # Unit tests must pass for fork PRs (no secrets). Keep API-dependent tests + # in a separate gated step. + run: bun test src/tests --timeout 15000 + + - name: Run integration tests (API) + # Only run on push to main or PRs from the same repo (not forks, to protect secrets) + if: ${{ github.event_name == 'push' || (github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository) }} env: LETTA_API_KEY: ${{ secrets.LETTA_API_KEY }} - run: bun test --timeout 15000 + run: bun test src/integration-tests --timeout 15000 - name: Build bundle run: bun run build diff --git a/src/tests/headless-input-format.test.ts b/src/integration-tests/headless-input-format.test.ts similarity index 100% rename from src/tests/headless-input-format.test.ts rename to src/integration-tests/headless-input-format.test.ts diff --git a/src/tests/headless-stream-json-format.test.ts b/src/integration-tests/headless-stream-json-format.test.ts similarity index 100% rename from src/tests/headless-stream-json-format.test.ts rename to src/integration-tests/headless-stream-json-format.test.ts diff --git a/src/tests/lazy-approval-recovery.test.ts b/src/integration-tests/lazy-approval-recovery.test.ts similarity index 100% rename from src/tests/lazy-approval-recovery.test.ts rename to src/integration-tests/lazy-approval-recovery.test.ts diff --git a/src/integration-tests/startup-flow.integration.test.ts b/src/integration-tests/startup-flow.integration.test.ts new file mode 100644 index 0000000..2313c1c --- /dev/null +++ b/src/integration-tests/startup-flow.integration.test.ts @@ -0,0 +1,281 @@ +import { describe, expect, test } from "bun:test"; +import { spawn } from "node:child_process"; + +/** + * Startup flow integration tests. + * + * These spawn the real CLI and require LETTA_API_KEY to be set. + * They are executed in CI only for push to main / trusted PRs (non-forks). + */ + +const projectRoot = process.cwd(); + +async function runCli( + args: string[], + options: { + timeoutMs?: number; + expectExit?: number; + } = {}, +): Promise<{ stdout: string; stderr: string; exitCode: number | null }> { + const { timeoutMs = 30000, expectExit } = options; + + return new Promise((resolve, reject) => { + const proc = spawn("bun", ["run", "dev", ...args], { + cwd: projectRoot, + // Mark as subagent to prevent polluting user's LRU settings + env: { ...process.env, LETTA_CODE_AGENT_ROLE: "subagent" }, + }); + + let stdout = ""; + let stderr = ""; + + proc.stdout?.on("data", (data) => { + stdout += data.toString(); + }); + + proc.stderr?.on("data", (data) => { + stderr += data.toString(); + }); + + const timeout = setTimeout(() => { + proc.kill(); + reject( + new Error( + `Timeout after ${timeoutMs}ms. stdout: ${stdout}, stderr: ${stderr}`, + ), + ); + }, timeoutMs); + + proc.on("close", (code) => { + clearTimeout(timeout); + if (expectExit !== undefined && code !== expectExit) { + reject( + new Error( + `Expected exit code ${expectExit}, got ${code}. stdout: ${stdout}, stderr: ${stderr}`, + ), + ); + } else { + resolve({ stdout, stderr, exitCode: code }); + } + }); + + proc.on("error", (err) => { + clearTimeout(timeout); + reject(err); + }); + }); +} + +// ============================================================================ +// Invalid Input Tests (require API calls but fail fast) +// ============================================================================ + +describe("Startup Flow - Invalid Inputs", () => { + test( + "--agent with nonexistent ID shows error", + async () => { + const result = await runCli( + ["--agent", "agent-definitely-does-not-exist-12345", "-p", "test"], + { expectExit: 1, timeoutMs: 60000 }, + ); + expect(result.stderr).toContain("not found"); + }, + { timeout: 70000 }, + ); + + test( + "--conversation with nonexistent ID shows error", + async () => { + const result = await runCli( + [ + "--conversation", + "conversation-definitely-does-not-exist-12345", + "-p", + "test", + ], + { expectExit: 1, timeoutMs: 60000 }, + ); + expect(result.stderr).toContain("not found"); + }, + { timeout: 70000 }, + ); + + test("--from-af with nonexistent file shows error", async () => { + const result = await runCli( + ["--from-af", "/nonexistent/path/agent.af", "-p", "test"], + { expectExit: 1 }, + ); + expect(result.stderr).toContain("not found"); + }); +}); + +// ============================================================================ +// Integration Tests (require API access, create real agents) +// ============================================================================ + +describe("Startup Flow - Integration", () => { + let testAgentId: string | null = null; + + test( + "--new-agent creates agent and responds", + async () => { + const result = await runCli( + [ + "--new-agent", + "-m", + "haiku", + "-p", + "Say OK and nothing else", + "--output-format", + "json", + ], + { timeoutMs: 120000 }, + ); + + expect(result.exitCode).toBe(0); + // stdout includes the bun invocation line, extract just the JSON + const jsonStart = result.stdout.indexOf("{"); + const output = JSON.parse(result.stdout.slice(jsonStart)); + expect(output.agent_id).toBeDefined(); + expect(output.result).toBeDefined(); + + testAgentId = output.agent_id; + }, + { timeout: 130000 }, + ); + + test( + "--agent with valid ID uses that agent", + async () => { + if (!testAgentId) { + console.log("Skipping: no test agent available"); + return; + } + + const result = await runCli( + [ + "--agent", + testAgentId, + "-m", + "haiku", + "-p", + "Say OK", + "--output-format", + "json", + ], + { timeoutMs: 120000 }, + ); + + expect(result.exitCode).toBe(0); + const jsonStart = result.stdout.indexOf("{"); + const output = JSON.parse(result.stdout.slice(jsonStart)); + expect(output.agent_id).toBe(testAgentId); + }, + { timeout: 130000 }, + ); + + test( + "--conversation with valid ID derives agent and uses conversation", + async () => { + if (!testAgentId) { + console.log("Skipping: no test agent available"); + return; + } + + // First, create a real conversation with --new (since --new-agent uses "default") + const createResult = await runCli( + [ + "--agent", + testAgentId, + "--new", + "-m", + "haiku", + "-p", + "Say CREATED", + "--output-format", + "json", + ], + { timeoutMs: 120000 }, + ); + expect(createResult.exitCode).toBe(0); + const createJsonStart = createResult.stdout.indexOf("{"); + const createOutput = JSON.parse( + createResult.stdout.slice(createJsonStart), + ); + const realConversationId = createOutput.conversation_id; + expect(realConversationId).toBeDefined(); + expect(realConversationId).not.toBe("default"); + + const result = await runCli( + [ + "--conversation", + realConversationId, + "-m", + "haiku", + "-p", + "Say OK", + "--output-format", + "json", + ], + { timeoutMs: 120000 }, + ); + + expect(result.exitCode).toBe(0); + const jsonStart = result.stdout.indexOf("{"); + const output = JSON.parse(result.stdout.slice(jsonStart)); + expect(output.agent_id).toBe(testAgentId); + expect(output.conversation_id).toBe(realConversationId); + }, + { timeout: 180000 }, + ); + + test( + "--new-agent with --init-blocks none creates minimal agent", + async () => { + const result = await runCli( + [ + "--new-agent", + "--init-blocks", + "none", + "-m", + "haiku", + "-p", + "Say OK", + "--output-format", + "json", + ], + { timeoutMs: 120000 }, + ); + + expect(result.exitCode).toBe(0); + const jsonStart = result.stdout.indexOf("{"); + const output = JSON.parse(result.stdout.slice(jsonStart)); + expect(output.agent_id).toBeDefined(); + }, + { timeout: 130000 }, + ); +}); + +// ============================================================================ +// --continue Tests (depend on LRU state, harder to isolate) +// ============================================================================ + +describe("Startup Flow - Continue Flag", () => { + test( + "--continue with no LRU shows error", + async () => { + const result = await runCli( + ["--continue", "-p", "Say OK", "--output-format", "json"], + { + timeoutMs: 60000, + }, + ); + + // Either succeeds (LRU exists) or fails with specific error + if (result.exitCode !== 0) { + expect(result.stderr).toContain("No recent session found"); + } + }, + { timeout: 70000 }, + ); +}); diff --git a/src/tests/startup-flow.test.ts b/src/tests/startup-flow.test.ts index 14315ba..5e4696d 100644 --- a/src/tests/startup-flow.test.ts +++ b/src/tests/startup-flow.test.ts @@ -2,22 +2,14 @@ import { describe, expect, test } from "bun:test"; import { spawn } from "node:child_process"; /** - * Integration tests for CLI startup flows. + * Startup flow tests that validate flag conflict handling. * - * These tests verify the boot flow decision tree: - * - Flag conflict detection - * - --conversation: derives agent from conversation - * - --agent: uses specified agent - * - --new-agent: creates new agent - * - Error messages for invalid inputs - * - * Note: Tests that depend on settings files (.letta/) are harder to isolate - * because the CLI uses process.cwd(). For now, we focus on flag-based tests. + * These must remain runnable in fork PR CI (no secrets), so they should not + * require a working Letta server or LETTA_API_KEY. */ const projectRoot = process.cwd(); -// Helper to run CLI and capture output async function runCli( args: string[], options: { @@ -74,10 +66,6 @@ async function runCli( }); } -// ============================================================================ -// Flag Conflict Tests (fast, no API calls needed) -// ============================================================================ - describe("Startup Flow - Flag Conflicts", () => { test("--conversation conflicts with --agent", async () => { const result = await runCli( @@ -136,227 +124,3 @@ describe("Startup Flow - Flag Conflicts", () => { ); }); }); - -// ============================================================================ -// Invalid Input Tests (require API calls but fail fast) -// ============================================================================ - -describe("Startup Flow - Invalid Inputs", () => { - test( - "--agent with nonexistent ID shows error", - async () => { - const result = await runCli( - ["--agent", "agent-definitely-does-not-exist-12345", "-p", "test"], - { expectExit: 1, timeoutMs: 60000 }, - ); - expect(result.stderr).toContain("not found"); - }, - { timeout: 70000 }, - ); - - test( - "--conversation with nonexistent ID shows error", - async () => { - const result = await runCli( - [ - "--conversation", - "conversation-definitely-does-not-exist-12345", - "-p", - "test", - ], - { expectExit: 1, timeoutMs: 60000 }, - ); - expect(result.stderr).toContain("not found"); - }, - { timeout: 70000 }, - ); - - test("--from-af with nonexistent file shows error", async () => { - const result = await runCli( - ["--from-af", "/nonexistent/path/agent.af", "-p", "test"], - { expectExit: 1 }, - ); - expect(result.stderr).toContain("not found"); - }); -}); - -// ============================================================================ -// Integration Tests (require API access, create real agents) -// ============================================================================ - -describe("Startup Flow - Integration", () => { - // Store created agent/conversation IDs for cleanup and reuse - let testAgentId: string | null = null; - - test( - "--new-agent creates agent and responds", - async () => { - const result = await runCli( - [ - "--new-agent", - "-m", - "haiku", - "-p", - "Say OK and nothing else", - "--output-format", - "json", - ], - { timeoutMs: 120000 }, - ); - - expect(result.exitCode).toBe(0); - // stdout includes the bun invocation line, extract just the JSON - const jsonStart = result.stdout.indexOf("{"); - const output = JSON.parse(result.stdout.slice(jsonStart)); - expect(output.agent_id).toBeDefined(); - expect(output.result).toBeDefined(); - - // Save for later tests - testAgentId = output.agent_id; - }, - { timeout: 130000 }, - ); - - test( - "--agent with valid ID uses that agent", - async () => { - // Skip if previous test didn't create an agent - if (!testAgentId) { - console.log("Skipping: no test agent available"); - return; - } - - const result = await runCli( - [ - "--agent", - testAgentId, - "-m", - "haiku", - "-p", - "Say OK", - "--output-format", - "json", - ], - { timeoutMs: 120000 }, - ); - - expect(result.exitCode).toBe(0); - const jsonStart = result.stdout.indexOf("{"); - const output = JSON.parse(result.stdout.slice(jsonStart)); - expect(output.agent_id).toBe(testAgentId); - }, - { timeout: 130000 }, - ); - - test( - "--conversation with valid ID derives agent and uses conversation", - async () => { - // Skip if previous test didn't create an agent - if (!testAgentId) { - console.log("Skipping: no test agent available"); - return; - } - - // First, create a real conversation with --new (since --new-agent uses "default") - const createResult = await runCli( - [ - "--agent", - testAgentId, - "--new", - "-m", - "haiku", - "-p", - "Say CREATED", - "--output-format", - "json", - ], - { timeoutMs: 120000 }, - ); - expect(createResult.exitCode).toBe(0); - const createJsonStart = createResult.stdout.indexOf("{"); - const createOutput = JSON.parse( - createResult.stdout.slice(createJsonStart), - ); - const realConversationId = createOutput.conversation_id; - expect(realConversationId).toBeDefined(); - expect(realConversationId).not.toBe("default"); - - // Now test that --conversation can derive the agent from this conversation - const result = await runCli( - [ - "--conversation", - realConversationId, - "-m", - "haiku", - "-p", - "Say OK", - "--output-format", - "json", - ], - { timeoutMs: 120000 }, - ); - - expect(result.exitCode).toBe(0); - const jsonStart = result.stdout.indexOf("{"); - const output = JSON.parse(result.stdout.slice(jsonStart)); - // Should use the same agent that owns the conversation - expect(output.agent_id).toBe(testAgentId); - // Should use the specified conversation - expect(output.conversation_id).toBe(realConversationId); - }, - { timeout: 180000 }, - ); - - test( - "--new-agent with --init-blocks none creates minimal agent", - async () => { - const result = await runCli( - [ - "--new-agent", - "--init-blocks", - "none", - "-m", - "haiku", - "-p", - "Say OK", - "--output-format", - "json", - ], - { timeoutMs: 120000 }, - ); - - expect(result.exitCode).toBe(0); - // stdout includes the bun invocation line, extract just the JSON - const jsonStart = result.stdout.indexOf("{"); - const output = JSON.parse(result.stdout.slice(jsonStart)); - expect(output.agent_id).toBeDefined(); - }, - { timeout: 130000 }, - ); -}); - -// ============================================================================ -// --continue Tests (depend on LRU state, harder to isolate) -// ============================================================================ - -describe("Startup Flow - Continue Flag", () => { - test( - "--continue with no LRU shows error", - async () => { - // This test relies on running in a directory with no .letta/ settings - // In practice, this might use the project's .letta/ which has an LRU - // So we check for either success (if LRU exists) or error (if not) - const result = await runCli( - ["--continue", "-p", "Say OK", "--output-format", "json"], - { timeoutMs: 60000 }, - ); - - // Either succeeds (LRU exists) or fails with specific error - if (result.exitCode !== 0) { - expect(result.stderr).toContain("No recent session found"); - } - // If it succeeds, that's also valid (test env has LRU) - }, - { timeout: 70000 }, - ); -});