fix: TB regression workflow fixes (venv, Modal, warnings) (#1385)
Co-authored-by: Letta Code <noreply@letta.com>
This commit is contained in:
18
.github/workflows/terminal-bench-regression.yml
vendored
18
.github/workflows/terminal-bench-regression.yml
vendored
@@ -12,6 +12,9 @@ on:
|
|||||||
description: "Max concurrent tasks"
|
description: "Max concurrent tasks"
|
||||||
default: "4"
|
default: "4"
|
||||||
|
|
||||||
|
env:
|
||||||
|
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
regression:
|
regression:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@@ -26,9 +29,14 @@ jobs:
|
|||||||
|
|
||||||
- name: Setup Python + uv
|
- name: Setup Python + uv
|
||||||
uses: astral-sh/setup-uv@v6
|
uses: astral-sh/setup-uv@v6
|
||||||
|
with:
|
||||||
|
enable-cache: false
|
||||||
|
|
||||||
- name: Install Harbor
|
- name: Create venv and install deps
|
||||||
run: uv pip install --system "harbor>=0.1.45" "litellm>=1.0.0"
|
run: |
|
||||||
|
uv venv .venv
|
||||||
|
source .venv/bin/activate
|
||||||
|
uv pip install "harbor>=0.1.45" "litellm>=1.0.0" "modal>=1.3.5"
|
||||||
|
|
||||||
- name: Configure Modal
|
- name: Configure Modal
|
||||||
env:
|
env:
|
||||||
@@ -44,6 +52,8 @@ jobs:
|
|||||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||||
run: |
|
run: |
|
||||||
|
source .venv/bin/activate
|
||||||
|
|
||||||
# Build --task-name flags from regression-tasks.txt
|
# Build --task-name flags from regression-tasks.txt
|
||||||
TASK_FLAGS=""
|
TASK_FLAGS=""
|
||||||
while IFS= read -r task; do
|
while IFS= read -r task; do
|
||||||
@@ -85,6 +95,8 @@ jobs:
|
|||||||
|
|
||||||
- name: Setup Python + uv
|
- name: Setup Python + uv
|
||||||
uses: astral-sh/setup-uv@v6
|
uses: astral-sh/setup-uv@v6
|
||||||
|
with:
|
||||||
|
enable-cache: false
|
||||||
|
|
||||||
- name: Generate report and update GitHub Issue
|
- name: Generate report and update GitHub Issue
|
||||||
env:
|
env:
|
||||||
@@ -93,7 +105,7 @@ jobs:
|
|||||||
GITHUB_RUN_ID: ${{ github.run_id }}
|
GITHUB_RUN_ID: ${{ github.run_id }}
|
||||||
GITHUB_SERVER_URL: ${{ github.server_url }}
|
GITHUB_SERVER_URL: ${{ github.server_url }}
|
||||||
run: |
|
run: |
|
||||||
python benchmarks/terminal_bench/report.py \
|
uv run python benchmarks/terminal_bench/report.py \
|
||||||
--results-dir results/ \
|
--results-dir results/ \
|
||||||
--baseline benchmarks/terminal_bench/baseline.json \
|
--baseline benchmarks/terminal_bench/baseline.json \
|
||||||
--repo "${{ github.repository }}"
|
--repo "${{ github.repository }}"
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
apt-get update
|
apt-get update
|
||||||
apt-get install -y curl git unzip
|
apt-get install -y curl git unzip build-essential
|
||||||
|
|
||||||
# Install Node.js (required to run the letta CLI)
|
# Install Node.js (required to run the letta CLI)
|
||||||
curl -fsSL https://deb.nodesource.com/setup_20.x | bash -
|
curl -fsSL https://deb.nodesource.com/setup_20.x | bash -
|
||||||
|
|||||||
Reference in New Issue
Block a user