diff --git a/.github/workflows/terminal-bench-regression.yml b/.github/workflows/terminal-bench-regression.yml index 68d6002..8ed0265 100644 --- a/.github/workflows/terminal-bench-regression.yml +++ b/.github/workflows/terminal-bench-regression.yml @@ -27,8 +27,11 @@ jobs: - name: Setup Python + uv uses: astral-sh/setup-uv@v6 - - name: Install Harbor - run: uv pip install --system "harbor>=0.1.45" "litellm>=1.0.0" + - name: Create venv and install deps + run: | + uv venv .venv + source .venv/bin/activate + uv pip install "harbor>=0.1.45" "litellm>=1.0.0" - name: Configure Modal env: @@ -44,6 +47,8 @@ jobs: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} run: | + source .venv/bin/activate + # Build --task-name flags from regression-tasks.txt TASK_FLAGS="" while IFS= read -r task; do @@ -93,7 +98,7 @@ jobs: GITHUB_RUN_ID: ${{ github.run_id }} GITHUB_SERVER_URL: ${{ github.server_url }} run: | - python benchmarks/terminal_bench/report.py \ + uv run python benchmarks/terminal_bench/report.py \ --results-dir results/ \ --baseline benchmarks/terminal_bench/baseline.json \ --repo "${{ github.repository }}"