feat: add headless matrix to ci (#86)
This commit is contained in:
35
.github/workflows/ci.yml
vendored
35
.github/workflows/ci.yml
vendored
@@ -66,8 +66,8 @@ jobs:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: bun install
|
||||
|
||||
- name: Run tests
|
||||
run: bun test
|
||||
- name: Run tests (extended timeout)
|
||||
run: bun test --timeout 15000
|
||||
|
||||
- name: Build bundle
|
||||
run: bun run build
|
||||
@@ -111,3 +111,34 @@ jobs:
|
||||
- name: Pack (no auth available)
|
||||
if: ${{ github.event_name != 'push' }}
|
||||
run: bun pm pack
|
||||
|
||||
headless:
|
||||
needs: check
|
||||
name: Headless / ${{ matrix.model }}
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
model: [gpt-5-minimal, gpt-4.1, default, gemini-pro, glm-4.6, haiku]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Bun
|
||||
uses: oven-sh/setup-bun@v1
|
||||
with:
|
||||
bun-version: 1.3.0
|
||||
|
||||
- name: Install dependencies
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: bun install
|
||||
|
||||
- name: Run headless scenario (all outputs)
|
||||
if: ${{ github.event_name == 'push' || (github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository) }}
|
||||
env:
|
||||
LETTA_API_KEY: ${{ secrets.LETTA_API_KEY }}
|
||||
run: |
|
||||
bun run src/tests/headless-scenario.ts --model "${{ matrix.model }}" --output text --parallel on
|
||||
bun run src/tests/headless-scenario.ts --model "${{ matrix.model }}" --output json --parallel on
|
||||
bun run src/tests/headless-scenario.ts --model "${{ matrix.model }}" --output stream-json --parallel on
|
||||
|
||||
151
src/tests/headless-scenario.ts
Normal file
151
src/tests/headless-scenario.ts
Normal file
@@ -0,0 +1,151 @@
|
||||
#!/usr/bin/env bun
|
||||
/**
|
||||
* Headless scenario test runner
|
||||
*
|
||||
* Runs a single multi-step scenario against the LeTTA Code CLI (headless) for a given
|
||||
* model and output format. Intended for CI matrix usage.
|
||||
*
|
||||
* Usage:
|
||||
* bun tsx src/tests/headless-scenario.ts --model gpt-4.1 --output stream-json --parallel on
|
||||
*/
|
||||
|
||||
type Args = {
|
||||
model: string;
|
||||
output: "text" | "json" | "stream-json";
|
||||
parallel: "on" | "off" | "hybrid";
|
||||
};
|
||||
|
||||
function parseArgs(argv: string[]): Args {
|
||||
const args: any = { output: "text", parallel: "on" };
|
||||
for (let i = 0; i < argv.length; i++) {
|
||||
const v = argv[i];
|
||||
if (v === "--model") args.model = argv[++i];
|
||||
else if (v === "--output") args.output = argv[++i];
|
||||
else if (v === "--parallel") args.parallel = argv[++i];
|
||||
}
|
||||
if (!args.model) throw new Error("Missing --model");
|
||||
if (!["text", "json", "stream-json"].includes(args.output))
|
||||
throw new Error(`Invalid --output ${args.output}`);
|
||||
if (!["on", "off", "hybrid"].includes(args.parallel))
|
||||
throw new Error(`Invalid --parallel ${args.parallel}`);
|
||||
return args as Args;
|
||||
}
|
||||
|
||||
// Tests run against Letta Cloud; only LETTA_API_KEY is required.
|
||||
async function ensurePrereqs(_model: string): Promise<"ok" | "skip"> {
|
||||
if (!process.env.LETTA_API_KEY) {
|
||||
console.log("SKIP: Missing env LETTA_API_KEY");
|
||||
return "skip";
|
||||
}
|
||||
return "ok";
|
||||
}
|
||||
|
||||
function scenarioPrompt(): string {
|
||||
return (
|
||||
"I want to test your tool calling abilities (do not ask for any clarifications, this is an automated test suite inside a CI runner, there is no human to assist you). " +
|
||||
"First, call a single web_search to get the weather in SF. " +
|
||||
"Then, try calling two web_searches in parallel. " +
|
||||
"Then, try calling the bash tool to output an echo. " +
|
||||
"Then, try calling three copies of the bash tool in parallel to do 3 parallel echos: echo 'Test1', echo 'Test2', echo 'Test3'. " +
|
||||
"Then finally, try calling 2 bash tools and 1 web_search, in parallel, so three parallel tools. " +
|
||||
"IMPORTANT: If and only if all of the above steps worked as requested, include the word BANANA (uppercase) somewhere in your final response."
|
||||
);
|
||||
}
|
||||
|
||||
async function runCLI(
|
||||
model: string,
|
||||
output: Args["output"],
|
||||
): Promise<{ stdout: string; code: number }> {
|
||||
const cmd = [
|
||||
"bun",
|
||||
"run",
|
||||
"dev",
|
||||
"-p",
|
||||
scenarioPrompt(),
|
||||
"--yolo",
|
||||
"--new",
|
||||
"--output-format",
|
||||
output,
|
||||
"-m",
|
||||
model,
|
||||
];
|
||||
const proc = Bun.spawn(cmd, { stdout: "pipe", stderr: "pipe" });
|
||||
const out = await new Response(proc.stdout).text();
|
||||
const err = await new Response(proc.stderr).text();
|
||||
const code = await proc.exited;
|
||||
if (code !== 0) {
|
||||
console.error("CLI failed:", err || out);
|
||||
}
|
||||
return { stdout: out, code };
|
||||
}
|
||||
|
||||
function assertContainsAll(hay: string, needles: string[]) {
|
||||
for (const n of needles) {
|
||||
if (!hay.includes(n)) throw new Error(`Missing expected output: ${n}`);
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const { model, output } = parseArgs(process.argv.slice(2));
|
||||
const prereq = await ensurePrereqs(model);
|
||||
if (prereq === "skip") return;
|
||||
|
||||
const { stdout, code } = await runCLI(model, output);
|
||||
if (code !== 0) {
|
||||
process.exit(code);
|
||||
}
|
||||
|
||||
try {
|
||||
// Validate by output mode
|
||||
if (output === "text") {
|
||||
assertContainsAll(stdout, ["BANANA"]);
|
||||
} else if (output === "json") {
|
||||
try {
|
||||
const obj = JSON.parse(stdout);
|
||||
const result = String(obj?.result ?? "");
|
||||
assertContainsAll(result, ["BANANA"]);
|
||||
} catch (e) {
|
||||
throw new Error(`Invalid JSON output: ${(e as Error).message}`);
|
||||
}
|
||||
} else if (output === "stream-json") {
|
||||
// stream-json prints one JSON object per line; find the final result event
|
||||
const lines = stdout.split(/\r?\n/).filter(Boolean);
|
||||
const resultLine = lines.find((l) => {
|
||||
try {
|
||||
const o = JSON.parse(l);
|
||||
return o?.type === "result";
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
});
|
||||
if (!resultLine) throw new Error("No final result event in stream-json");
|
||||
const evt = JSON.parse(resultLine);
|
||||
const result = String(evt?.result ?? "");
|
||||
assertContainsAll(result, ["BANANA"]);
|
||||
}
|
||||
|
||||
console.log(`OK: ${model} / ${output}`);
|
||||
} catch (e) {
|
||||
// Dump full stdout to aid debugging
|
||||
console.error(`\n===== BEGIN STDOUT (${model} / ${output}) =====`);
|
||||
console.error(stdout);
|
||||
console.error(`===== END STDOUT (${model} / ${output}) =====\n`);
|
||||
|
||||
if (output === "stream-json") {
|
||||
const lines = stdout.split(/\r?\n/).filter(Boolean);
|
||||
const tail = lines.slice(-50).join("\n");
|
||||
console.error(
|
||||
"----- stream-json tail (last 50 lines) -----\n" +
|
||||
tail +
|
||||
"\n---------------------------------------------",
|
||||
);
|
||||
}
|
||||
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
main().catch((e) => {
|
||||
console.error(String(e?.stack || e));
|
||||
process.exit(1);
|
||||
});
|
||||
@@ -1,10 +1,11 @@
|
||||
import { createAgent } from "./src/agent/create";
|
||||
import { sendMessageStream } from "./src/agent/message";
|
||||
import { readFileSync, writeFileSync } from "node:fs";
|
||||
import { createAgent } from "../agent/create";
|
||||
import { sendMessageStream } from "../agent/message";
|
||||
|
||||
async function main() {
|
||||
// Create a simple test image (1x1 red PNG)
|
||||
const testImageBase64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg==";
|
||||
const testImageBase64 =
|
||||
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg==";
|
||||
const testImagePath = "/tmp/test.png";
|
||||
writeFileSync(testImagePath, Buffer.from(testImageBase64, "base64"));
|
||||
console.log("Created test image at", testImagePath);
|
||||
@@ -44,8 +45,19 @@ async function main() {
|
||||
let fullResponse = "";
|
||||
for await (const chunk of stream) {
|
||||
if (chunk.message_type === "assistant_message" && chunk.content) {
|
||||
fullResponse += chunk.content;
|
||||
process.stdout.write(chunk.content);
|
||||
// Handle both string and array content
|
||||
let contentText = "";
|
||||
if (typeof chunk.content === "string") {
|
||||
contentText = chunk.content;
|
||||
} else if (Array.isArray(chunk.content)) {
|
||||
// Extract text from content array
|
||||
contentText = chunk.content
|
||||
.filter((item) => item.type === "text")
|
||||
.map((item) => ("text" in item ? item.text : ""))
|
||||
.join("");
|
||||
}
|
||||
fullResponse += contentText;
|
||||
process.stdout.write(contentText);
|
||||
}
|
||||
}
|
||||
if (!fullResponse) {
|
||||
Reference in New Issue
Block a user