191 lines
6.1 KiB
JSON
191 lines
6.1 KiB
JSON
{
|
|
"sonnet-4.6-xhigh": {
|
|
"pass_rate": 0.427,
|
|
"tasks": {
|
|
"adaptive-rejection-sampler": false,
|
|
"bn-fit-modify": true,
|
|
"break-filter-js-from-html": false,
|
|
"build-cython-ext": true,
|
|
"build-pmars": true,
|
|
"build-pov-ray": true,
|
|
"caffe-cifar-10": false,
|
|
"cancel-async-tasks": false,
|
|
"chess-best-move": false,
|
|
"circuit-fibsqrt": false,
|
|
"cobol-modernization": false,
|
|
"code-from-image": true,
|
|
"compile-compcert": true,
|
|
"configure-git-webserver": true,
|
|
"constraints-scheduling": true,
|
|
"count-dataset-tokens": true,
|
|
"crack-7z-hash": false,
|
|
"custom-memory-heap-crash": false,
|
|
"db-wal-recovery": false,
|
|
"distribution-search": false,
|
|
"dna-assembly": false,
|
|
"dna-insert": false,
|
|
"extract-elf": true,
|
|
"extract-moves-from-video": false,
|
|
"feal-differential-cryptanalysis": true,
|
|
"feal-linear-cryptanalysis": false,
|
|
"filter-js-from-html": false,
|
|
"financial-document-processor": false,
|
|
"fix-code-vulnerability": true,
|
|
"fix-git": true,
|
|
"fix-ocaml-gc": true,
|
|
"gcode-to-text": true,
|
|
"git-leak-recovery": true,
|
|
"git-multibranch": true,
|
|
"gpt2-codegolf": false,
|
|
"headless-terminal": true,
|
|
"hf-model-inference": true,
|
|
"install-windows-3.11": false,
|
|
"kv-store-grpc": true,
|
|
"large-scale-text-editing": true,
|
|
"largest-eigenval": false,
|
|
"llm-inference-batching-scheduler": false,
|
|
"log-summary-date-ranges": true,
|
|
"mailman": false,
|
|
"make-doom-for-mips": false,
|
|
"make-mips-interpreter": true,
|
|
"mcmc-sampling-stan": true,
|
|
"merge-diff-arc-agi-task": true,
|
|
"model-extraction-relu-logits": false,
|
|
"modernize-scientific-stack": true,
|
|
"mteb-leaderboard": false,
|
|
"mteb-retrieve": false,
|
|
"multi-source-data-merger": true,
|
|
"nginx-request-logging": true,
|
|
"openssl-selfsigned-cert": true,
|
|
"overfull-hbox": false,
|
|
"password-recovery": true,
|
|
"path-tracing-reverse": false,
|
|
"path-tracing": false,
|
|
"polyglot-c-py": false,
|
|
"polyglot-rust-c": false,
|
|
"portfolio-optimization": false,
|
|
"protein-assembly": false,
|
|
"prove-plus-comm": true,
|
|
"pypi-server": true,
|
|
"pytorch-model-cli": false,
|
|
"pytorch-model-recovery": true,
|
|
"qemu-alpine-ssh": false,
|
|
"qemu-startup": true,
|
|
"query-optimize": false,
|
|
"raman-fitting": false,
|
|
"regex-chess": false,
|
|
"regex-log": true,
|
|
"reshard-c4-data": false,
|
|
"rstan-to-pystan": false,
|
|
"sam-cell-seg": false,
|
|
"sanitize-git-repo": false,
|
|
"schemelike-metacircular-eval": false,
|
|
"sparql-university": false,
|
|
"sqlite-db-truncate": true,
|
|
"sqlite-with-gcov": false,
|
|
"torch-pipeline-parallelism": false,
|
|
"torch-tensor-parallelism": true,
|
|
"train-fasttext": false,
|
|
"tune-mjcf": false,
|
|
"video-processing": false,
|
|
"vulnerable-secret": true,
|
|
"winning-avg-corewars": false,
|
|
"write-compressor": false
|
|
}
|
|
},
|
|
"gpt-5.3-codex-xhigh": {
|
|
"pass_rate": 0.6404,
|
|
"tasks": {
|
|
"adaptive-rejection-sampler": false,
|
|
"bn-fit-modify": true,
|
|
"break-filter-js-from-html": true,
|
|
"build-cython-ext": true,
|
|
"build-pmars": true,
|
|
"build-pov-ray": true,
|
|
"caffe-cifar-10": true,
|
|
"cancel-async-tasks": false,
|
|
"chess-best-move": true,
|
|
"circuit-fibsqrt": true,
|
|
"cobol-modernization": true,
|
|
"code-from-image": true,
|
|
"compile-compcert": true,
|
|
"configure-git-webserver": true,
|
|
"constraints-scheduling": true,
|
|
"count-dataset-tokens": true,
|
|
"crack-7z-hash": true,
|
|
"custom-memory-heap-crash": true,
|
|
"db-wal-recovery": false,
|
|
"distribution-search": true,
|
|
"dna-assembly": false,
|
|
"dna-insert": true,
|
|
"extract-elf": true,
|
|
"extract-moves-from-video": false,
|
|
"feal-differential-cryptanalysis": true,
|
|
"feal-linear-cryptanalysis": true,
|
|
"filter-js-from-html": false,
|
|
"financial-document-processor": true,
|
|
"fix-code-vulnerability": true,
|
|
"fix-git": true,
|
|
"fix-ocaml-gc": true,
|
|
"gcode-to-text": false,
|
|
"git-leak-recovery": true,
|
|
"git-multibranch": true,
|
|
"gpt2-codegolf": false,
|
|
"headless-terminal": true,
|
|
"hf-model-inference": true,
|
|
"install-windows-3.11": false,
|
|
"kv-store-grpc": true,
|
|
"large-scale-text-editing": true,
|
|
"largest-eigenval": true,
|
|
"llm-inference-batching-scheduler": true,
|
|
"log-summary-date-ranges": true,
|
|
"mailman": false,
|
|
"make-doom-for-mips": false,
|
|
"make-mips-interpreter": false,
|
|
"mcmc-sampling-stan": false,
|
|
"merge-diff-arc-agi-task": true,
|
|
"model-extraction-relu-logits": true,
|
|
"modernize-scientific-stack": true,
|
|
"mteb-leaderboard": true,
|
|
"mteb-retrieve": false,
|
|
"multi-source-data-merger": true,
|
|
"nginx-request-logging": true,
|
|
"openssl-selfsigned-cert": true,
|
|
"overfull-hbox": false,
|
|
"password-recovery": true,
|
|
"path-tracing-reverse": true,
|
|
"path-tracing": true,
|
|
"polyglot-c-py": false,
|
|
"polyglot-rust-c": false,
|
|
"portfolio-optimization": false,
|
|
"protein-assembly": true,
|
|
"prove-plus-comm": true,
|
|
"pypi-server": true,
|
|
"pytorch-model-cli": true,
|
|
"pytorch-model-recovery": false,
|
|
"qemu-alpine-ssh": false,
|
|
"qemu-startup": false,
|
|
"query-optimize": false,
|
|
"raman-fitting": false,
|
|
"regex-chess": false,
|
|
"regex-log": true,
|
|
"reshard-c4-data": false,
|
|
"rstan-to-pystan": true,
|
|
"sam-cell-seg": false,
|
|
"sanitize-git-repo": true,
|
|
"schemelike-metacircular-eval": false,
|
|
"sparql-university": true,
|
|
"sqlite-db-truncate": true,
|
|
"sqlite-with-gcov": true,
|
|
"torch-pipeline-parallelism": false,
|
|
"torch-tensor-parallelism": false,
|
|
"train-fasttext": false,
|
|
"tune-mjcf": true,
|
|
"video-processing": false,
|
|
"vulnerable-secret": true,
|
|
"winning-avg-corewars": true,
|
|
"write-compressor": false
|
|
}
|
|
}
|
|
}
|