fix: TB regression workflow fixes (venv, Modal, warnings) (#1385)

Co-authored-by: Letta Code <noreply@letta.com>
This commit is contained in:
Devansh Jain
2026-03-13 16:27:01 -07:00
committed by GitHub
parent 7d3b4a4500
commit 877ad1ada0
2 changed files with 16 additions and 4 deletions

View File

@@ -12,6 +12,9 @@ on:
description: "Max concurrent tasks"
default: "4"
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
jobs:
regression:
runs-on: ubuntu-latest
@@ -26,9 +29,14 @@ jobs:
- name: Setup Python + uv
uses: astral-sh/setup-uv@v6
with:
enable-cache: false
- name: Install Harbor
run: uv pip install --system "harbor>=0.1.45" "litellm>=1.0.0"
- name: Create venv and install deps
run: |
uv venv .venv
source .venv/bin/activate
uv pip install "harbor>=0.1.45" "litellm>=1.0.0" "modal>=1.3.5"
- name: Configure Modal
env:
@@ -44,6 +52,8 @@ jobs:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
source .venv/bin/activate
# Build --task-name flags from regression-tasks.txt
TASK_FLAGS=""
while IFS= read -r task; do
@@ -85,6 +95,8 @@ jobs:
- name: Setup Python + uv
uses: astral-sh/setup-uv@v6
with:
enable-cache: false
- name: Generate report and update GitHub Issue
env:
@@ -93,7 +105,7 @@ jobs:
GITHUB_RUN_ID: ${{ github.run_id }}
GITHUB_SERVER_URL: ${{ github.server_url }}
run: |
python benchmarks/terminal_bench/report.py \
uv run python benchmarks/terminal_bench/report.py \
--results-dir results/ \
--baseline benchmarks/terminal_bench/baseline.json \
--repo "${{ github.repository }}"