fix: TB regression workflow fixes (venv, Modal, warnings) (#1385)
Co-authored-by: Letta Code <noreply@letta.com>
This commit is contained in:
18
.github/workflows/terminal-bench-regression.yml
vendored
18
.github/workflows/terminal-bench-regression.yml
vendored
@@ -12,6 +12,9 @@ on:
|
||||
description: "Max concurrent tasks"
|
||||
default: "4"
|
||||
|
||||
env:
|
||||
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
||||
|
||||
jobs:
|
||||
regression:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -26,9 +29,14 @@ jobs:
|
||||
|
||||
- name: Setup Python + uv
|
||||
uses: astral-sh/setup-uv@v6
|
||||
with:
|
||||
enable-cache: false
|
||||
|
||||
- name: Install Harbor
|
||||
run: uv pip install --system "harbor>=0.1.45" "litellm>=1.0.0"
|
||||
- name: Create venv and install deps
|
||||
run: |
|
||||
uv venv .venv
|
||||
source .venv/bin/activate
|
||||
uv pip install "harbor>=0.1.45" "litellm>=1.0.0" "modal>=1.3.5"
|
||||
|
||||
- name: Configure Modal
|
||||
env:
|
||||
@@ -44,6 +52,8 @@ jobs:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
|
||||
# Build --task-name flags from regression-tasks.txt
|
||||
TASK_FLAGS=""
|
||||
while IFS= read -r task; do
|
||||
@@ -85,6 +95,8 @@ jobs:
|
||||
|
||||
- name: Setup Python + uv
|
||||
uses: astral-sh/setup-uv@v6
|
||||
with:
|
||||
enable-cache: false
|
||||
|
||||
- name: Generate report and update GitHub Issue
|
||||
env:
|
||||
@@ -93,7 +105,7 @@ jobs:
|
||||
GITHUB_RUN_ID: ${{ github.run_id }}
|
||||
GITHUB_SERVER_URL: ${{ github.server_url }}
|
||||
run: |
|
||||
python benchmarks/terminal_bench/report.py \
|
||||
uv run python benchmarks/terminal_bench/report.py \
|
||||
--results-dir results/ \
|
||||
--baseline benchmarks/terminal_bench/baseline.json \
|
||||
--repo "${{ github.repository }}"
|
||||
|
||||
Reference in New Issue
Block a user