{ "sonnet-4.6-xhigh": { "pass_rate": 0.427, "tasks": { "adaptive-rejection-sampler": false, "bn-fit-modify": true, "break-filter-js-from-html": false, "build-cython-ext": true, "build-pmars": true, "build-pov-ray": true, "caffe-cifar-10": false, "cancel-async-tasks": false, "chess-best-move": false, "circuit-fibsqrt": false, "cobol-modernization": false, "code-from-image": true, "compile-compcert": true, "configure-git-webserver": true, "constraints-scheduling": true, "count-dataset-tokens": true, "crack-7z-hash": false, "custom-memory-heap-crash": false, "db-wal-recovery": false, "distribution-search": false, "dna-assembly": false, "dna-insert": false, "extract-elf": true, "extract-moves-from-video": false, "feal-differential-cryptanalysis": true, "feal-linear-cryptanalysis": false, "filter-js-from-html": false, "financial-document-processor": false, "fix-code-vulnerability": true, "fix-git": true, "fix-ocaml-gc": true, "gcode-to-text": true, "git-leak-recovery": true, "git-multibranch": true, "gpt2-codegolf": false, "headless-terminal": true, "hf-model-inference": true, "install-windows-3.11": false, "kv-store-grpc": true, "large-scale-text-editing": true, "largest-eigenval": false, "llm-inference-batching-scheduler": false, "log-summary-date-ranges": true, "mailman": false, "make-doom-for-mips": false, "make-mips-interpreter": true, "mcmc-sampling-stan": true, "merge-diff-arc-agi-task": true, "model-extraction-relu-logits": false, "modernize-scientific-stack": true, "mteb-leaderboard": false, "mteb-retrieve": false, "multi-source-data-merger": true, "nginx-request-logging": true, "openssl-selfsigned-cert": true, "overfull-hbox": false, "password-recovery": true, "path-tracing-reverse": false, "path-tracing": false, "polyglot-c-py": false, "polyglot-rust-c": false, "portfolio-optimization": false, "protein-assembly": false, "prove-plus-comm": true, "pypi-server": true, "pytorch-model-cli": false, "pytorch-model-recovery": true, "qemu-alpine-ssh": false, "qemu-startup": true, "query-optimize": false, "raman-fitting": false, "regex-chess": false, "regex-log": true, "reshard-c4-data": false, "rstan-to-pystan": false, "sam-cell-seg": false, "sanitize-git-repo": false, "schemelike-metacircular-eval": false, "sparql-university": false, "sqlite-db-truncate": true, "sqlite-with-gcov": false, "torch-pipeline-parallelism": false, "torch-tensor-parallelism": true, "train-fasttext": false, "tune-mjcf": false, "video-processing": false, "vulnerable-secret": true, "winning-avg-corewars": false, "write-compressor": false } }, "gpt-5.3-codex-xhigh": { "pass_rate": 0.6404, "tasks": { "adaptive-rejection-sampler": false, "bn-fit-modify": true, "break-filter-js-from-html": true, "build-cython-ext": true, "build-pmars": true, "build-pov-ray": true, "caffe-cifar-10": true, "cancel-async-tasks": false, "chess-best-move": true, "circuit-fibsqrt": true, "cobol-modernization": true, "code-from-image": true, "compile-compcert": true, "configure-git-webserver": true, "constraints-scheduling": true, "count-dataset-tokens": true, "crack-7z-hash": true, "custom-memory-heap-crash": true, "db-wal-recovery": false, "distribution-search": true, "dna-assembly": false, "dna-insert": true, "extract-elf": true, "extract-moves-from-video": false, "feal-differential-cryptanalysis": true, "feal-linear-cryptanalysis": true, "filter-js-from-html": false, "financial-document-processor": true, "fix-code-vulnerability": true, "fix-git": true, "fix-ocaml-gc": true, "gcode-to-text": false, "git-leak-recovery": true, "git-multibranch": true, "gpt2-codegolf": false, "headless-terminal": true, "hf-model-inference": true, "install-windows-3.11": false, "kv-store-grpc": true, "large-scale-text-editing": true, "largest-eigenval": true, "llm-inference-batching-scheduler": true, "log-summary-date-ranges": true, "mailman": false, "make-doom-for-mips": false, "make-mips-interpreter": false, "mcmc-sampling-stan": false, "merge-diff-arc-agi-task": true, "model-extraction-relu-logits": true, "modernize-scientific-stack": true, "mteb-leaderboard": true, "mteb-retrieve": false, "multi-source-data-merger": true, "nginx-request-logging": true, "openssl-selfsigned-cert": true, "overfull-hbox": false, "password-recovery": true, "path-tracing-reverse": true, "path-tracing": true, "polyglot-c-py": false, "polyglot-rust-c": false, "portfolio-optimization": false, "protein-assembly": true, "prove-plus-comm": true, "pypi-server": true, "pytorch-model-cli": true, "pytorch-model-recovery": false, "qemu-alpine-ssh": false, "qemu-startup": false, "query-optimize": false, "raman-fitting": false, "regex-chess": false, "regex-log": true, "reshard-c4-data": false, "rstan-to-pystan": true, "sam-cell-seg": false, "sanitize-git-repo": true, "schemelike-metacircular-eval": false, "sparql-university": true, "sqlite-db-truncate": true, "sqlite-with-gcov": true, "torch-pipeline-parallelism": false, "torch-tensor-parallelism": false, "train-fasttext": false, "tune-mjcf": true, "video-processing": false, "vulnerable-secret": true, "winning-avg-corewars": true, "write-compressor": false } } }