feat: add headless matrix to ci (#86)

This commit is contained in:
Charles Packer
2025-11-09 12:34:25 -08:00
committed by GitHub
parent c234ea2b54
commit 3c084b52a6
3 changed files with 201 additions and 7 deletions

View File

@@ -66,8 +66,8 @@ jobs:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: bun install
- name: Run tests
run: bun test
- name: Run tests (extended timeout)
run: bun test --timeout 15000
- name: Build bundle
run: bun run build
@@ -111,3 +111,34 @@ jobs:
- name: Pack (no auth available)
if: ${{ github.event_name != 'push' }}
run: bun pm pack
headless:
needs: check
name: Headless / ${{ matrix.model }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
model: [gpt-5-minimal, gpt-4.1, default, gemini-pro, glm-4.6, haiku]
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Bun
uses: oven-sh/setup-bun@v1
with:
bun-version: 1.3.0
- name: Install dependencies
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: bun install
- name: Run headless scenario (all outputs)
if: ${{ github.event_name == 'push' || (github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository) }}
env:
LETTA_API_KEY: ${{ secrets.LETTA_API_KEY }}
run: |
bun run src/tests/headless-scenario.ts --model "${{ matrix.model }}" --output text --parallel on
bun run src/tests/headless-scenario.ts --model "${{ matrix.model }}" --output json --parallel on
bun run src/tests/headless-scenario.ts --model "${{ matrix.model }}" --output stream-json --parallel on

View File

@@ -0,0 +1,151 @@
#!/usr/bin/env bun
/**
* Headless scenario test runner
*
* Runs a single multi-step scenario against the LeTTA Code CLI (headless) for a given
* model and output format. Intended for CI matrix usage.
*
* Usage:
* bun tsx src/tests/headless-scenario.ts --model gpt-4.1 --output stream-json --parallel on
*/
type Args = {
model: string;
output: "text" | "json" | "stream-json";
parallel: "on" | "off" | "hybrid";
};
function parseArgs(argv: string[]): Args {
const args: any = { output: "text", parallel: "on" };
for (let i = 0; i < argv.length; i++) {
const v = argv[i];
if (v === "--model") args.model = argv[++i];
else if (v === "--output") args.output = argv[++i];
else if (v === "--parallel") args.parallel = argv[++i];
}
if (!args.model) throw new Error("Missing --model");
if (!["text", "json", "stream-json"].includes(args.output))
throw new Error(`Invalid --output ${args.output}`);
if (!["on", "off", "hybrid"].includes(args.parallel))
throw new Error(`Invalid --parallel ${args.parallel}`);
return args as Args;
}
// Tests run against Letta Cloud; only LETTA_API_KEY is required.
async function ensurePrereqs(_model: string): Promise<"ok" | "skip"> {
if (!process.env.LETTA_API_KEY) {
console.log("SKIP: Missing env LETTA_API_KEY");
return "skip";
}
return "ok";
}
function scenarioPrompt(): string {
return (
"I want to test your tool calling abilities (do not ask for any clarifications, this is an automated test suite inside a CI runner, there is no human to assist you). " +
"First, call a single web_search to get the weather in SF. " +
"Then, try calling two web_searches in parallel. " +
"Then, try calling the bash tool to output an echo. " +
"Then, try calling three copies of the bash tool in parallel to do 3 parallel echos: echo 'Test1', echo 'Test2', echo 'Test3'. " +
"Then finally, try calling 2 bash tools and 1 web_search, in parallel, so three parallel tools. " +
"IMPORTANT: If and only if all of the above steps worked as requested, include the word BANANA (uppercase) somewhere in your final response."
);
}
async function runCLI(
model: string,
output: Args["output"],
): Promise<{ stdout: string; code: number }> {
const cmd = [
"bun",
"run",
"dev",
"-p",
scenarioPrompt(),
"--yolo",
"--new",
"--output-format",
output,
"-m",
model,
];
const proc = Bun.spawn(cmd, { stdout: "pipe", stderr: "pipe" });
const out = await new Response(proc.stdout).text();
const err = await new Response(proc.stderr).text();
const code = await proc.exited;
if (code !== 0) {
console.error("CLI failed:", err || out);
}
return { stdout: out, code };
}
function assertContainsAll(hay: string, needles: string[]) {
for (const n of needles) {
if (!hay.includes(n)) throw new Error(`Missing expected output: ${n}`);
}
}
async function main() {
const { model, output } = parseArgs(process.argv.slice(2));
const prereq = await ensurePrereqs(model);
if (prereq === "skip") return;
const { stdout, code } = await runCLI(model, output);
if (code !== 0) {
process.exit(code);
}
try {
// Validate by output mode
if (output === "text") {
assertContainsAll(stdout, ["BANANA"]);
} else if (output === "json") {
try {
const obj = JSON.parse(stdout);
const result = String(obj?.result ?? "");
assertContainsAll(result, ["BANANA"]);
} catch (e) {
throw new Error(`Invalid JSON output: ${(e as Error).message}`);
}
} else if (output === "stream-json") {
// stream-json prints one JSON object per line; find the final result event
const lines = stdout.split(/\r?\n/).filter(Boolean);
const resultLine = lines.find((l) => {
try {
const o = JSON.parse(l);
return o?.type === "result";
} catch {
return false;
}
});
if (!resultLine) throw new Error("No final result event in stream-json");
const evt = JSON.parse(resultLine);
const result = String(evt?.result ?? "");
assertContainsAll(result, ["BANANA"]);
}
console.log(`OK: ${model} / ${output}`);
} catch (e) {
// Dump full stdout to aid debugging
console.error(`\n===== BEGIN STDOUT (${model} / ${output}) =====`);
console.error(stdout);
console.error(`===== END STDOUT (${model} / ${output}) =====\n`);
if (output === "stream-json") {
const lines = stdout.split(/\r?\n/).filter(Boolean);
const tail = lines.slice(-50).join("\n");
console.error(
"----- stream-json tail (last 50 lines) -----\n" +
tail +
"\n---------------------------------------------",
);
}
throw e;
}
}
main().catch((e) => {
console.error(String(e?.stack || e));
process.exit(1);
});

View File

@@ -1,10 +1,11 @@
import { createAgent } from "./src/agent/create";
import { sendMessageStream } from "./src/agent/message";
import { readFileSync, writeFileSync } from "node:fs";
import { createAgent } from "../agent/create";
import { sendMessageStream } from "../agent/message";
async function main() {
// Create a simple test image (1x1 red PNG)
const testImageBase64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg==";
const testImageBase64 =
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg==";
const testImagePath = "/tmp/test.png";
writeFileSync(testImagePath, Buffer.from(testImageBase64, "base64"));
console.log("Created test image at", testImagePath);
@@ -44,8 +45,19 @@ async function main() {
let fullResponse = "";
for await (const chunk of stream) {
if (chunk.message_type === "assistant_message" && chunk.content) {
fullResponse += chunk.content;
process.stdout.write(chunk.content);
// Handle both string and array content
let contentText = "";
if (typeof chunk.content === "string") {
contentText = chunk.content;
} else if (Array.isArray(chunk.content)) {
// Extract text from content array
contentText = chunk.content
.filter((item) => item.type === "text")
.map((item) => ("text" in item ? item.text : ""))
.join("");
}
fullResponse += contentText;
process.stdout.write(contentText);
}
}
if (!fullResponse) {