157 lines
4.5 KiB
YAML
157 lines
4.5 KiB
YAML
- model: claude-3-5-haiku
|
|
average: 87.78
|
|
total_cost: 4.15
|
|
archival_memory_read_benchmark: 96.33
|
|
core_memory_write_benchmark: 91.0
|
|
core_memory_read_benchmark: 76.0
|
|
- model: gemini-2-5-pro
|
|
average: 98.22
|
|
total_cost: 5.02
|
|
archival_memory_read_benchmark: 96.0
|
|
core_memory_write_benchmark: 98.67
|
|
core_memory_read_benchmark: 100.0
|
|
- model: claude-3-7-sonnet-extended
|
|
average: 95.78
|
|
total_cost: 14.42
|
|
archival_memory_read_benchmark: 93.33
|
|
core_memory_write_benchmark: 95.67
|
|
core_memory_read_benchmark: 98.33
|
|
- model: gemini-2-5-flash
|
|
average: 94.0
|
|
total_cost: 0.55
|
|
archival_memory_read_benchmark: 93.0
|
|
core_memory_write_benchmark: 92.0
|
|
core_memory_read_benchmark: 97.0
|
|
- model: openai-gpt-4.1
|
|
average: 95.44
|
|
total_cost: 7.05
|
|
archival_memory_read_benchmark: 89.67
|
|
core_memory_write_benchmark: 99.33
|
|
core_memory_read_benchmark: 97.33
|
|
- model: claude-3-7-sonnet
|
|
average: 92.56
|
|
total_cost: 17.24
|
|
archival_memory_read_benchmark: 88.0
|
|
core_memory_write_benchmark: 96.33
|
|
core_memory_read_benchmark: 93.33
|
|
- model: together-llama-4-scout-17b
|
|
average: 78.56
|
|
total_cost: 0.77
|
|
archival_memory_read_benchmark: 86.33
|
|
core_memory_write_benchmark: 56.0
|
|
core_memory_read_benchmark: 93.33
|
|
- model: together-qwen-2-5-72b
|
|
average: 77.44
|
|
total_cost: 4.71
|
|
archival_memory_read_benchmark: 79.33
|
|
core_memory_write_benchmark: 68.33
|
|
core_memory_read_benchmark: 84.67
|
|
- model: claude-3-5-sonnet
|
|
average: 90.0
|
|
total_cost: 14.07
|
|
archival_memory_read_benchmark: 76.67
|
|
core_memory_write_benchmark: 98.33
|
|
core_memory_read_benchmark: 95.0
|
|
- model: openai-gpt-4o
|
|
average: 88.0
|
|
total_cost: 8.11
|
|
archival_memory_read_benchmark: 69.0
|
|
core_memory_write_benchmark: 98.67
|
|
core_memory_read_benchmark: 96.33
|
|
- model: together-llama-3-1-405b
|
|
average: 81.67
|
|
total_cost: 9.84
|
|
archival_memory_read_benchmark: 60.67
|
|
core_memory_write_benchmark: 86.0
|
|
core_memory_read_benchmark: 98.33
|
|
- model: together-llama-4-maverick-17b
|
|
average: 62.33
|
|
total_cost: 1.06
|
|
archival_memory_read_benchmark: 53.0
|
|
core_memory_write_benchmark: 39.33
|
|
core_memory_read_benchmark: 94.67
|
|
- model: openai-o1
|
|
average: 77.11
|
|
total_cost: 63.63
|
|
archival_memory_read_benchmark: 52.33
|
|
core_memory_write_benchmark: 82.0
|
|
core_memory_read_benchmark: 97.0
|
|
- model: openai-gpt-4.1-mini
|
|
average: 78.22
|
|
total_cost: 1.35
|
|
archival_memory_read_benchmark: 41.0
|
|
core_memory_write_benchmark: 95.0
|
|
core_memory_read_benchmark: 98.67
|
|
- model: together-deepseek-v3
|
|
average: 73.33
|
|
total_cost: 3.39
|
|
archival_memory_read_benchmark: 26.33
|
|
core_memory_write_benchmark: 96.0
|
|
core_memory_read_benchmark: 97.67
|
|
- model: together-llama-3-2-3b
|
|
average: 4.67
|
|
total_cost: 0.87
|
|
archival_memory_read_benchmark: 14.0
|
|
core_memory_write_benchmark: 0.0
|
|
core_memory_read_benchmark: 0.0
|
|
- model: together-llama-3-70b
|
|
average: 35.89
|
|
total_cost: 1.56
|
|
archival_memory_read_benchmark: 13.0
|
|
core_memory_write_benchmark: 0.0
|
|
core_memory_read_benchmark: 94.67
|
|
- model: together-meta-llama-3-1-8b
|
|
average: 32.67
|
|
total_cost: 0.98
|
|
archival_memory_read_benchmark: 8.0
|
|
core_memory_write_benchmark: 12.0
|
|
core_memory_read_benchmark: 78.0
|
|
- model: together-llama-3-3-70b
|
|
average: 66.33
|
|
total_cost: 2.56
|
|
archival_memory_read_benchmark: 6.33
|
|
core_memory_write_benchmark: 97.0
|
|
core_memory_read_benchmark: 95.67
|
|
- model: together-meta-llama-3-1-70b
|
|
average: 62.56
|
|
total_cost: 2.61
|
|
archival_memory_read_benchmark: 6.0
|
|
core_memory_write_benchmark: 86.67
|
|
core_memory_read_benchmark: 95.0
|
|
- model: openai-o3-mini
|
|
average: 65.67
|
|
total_cost: 3.67
|
|
archival_memory_read_benchmark: 5.33
|
|
core_memory_write_benchmark: 93.33
|
|
core_memory_read_benchmark: 98.33
|
|
- model: openai-o4-mini
|
|
average: 67.0
|
|
total_cost: 3.89
|
|
archival_memory_read_benchmark: 4.67
|
|
core_memory_write_benchmark: 98.33
|
|
core_memory_read_benchmark: 98.0
|
|
- model: openai-gpt-4.1-nano
|
|
average: 24.0
|
|
total_cost: 0.35
|
|
archival_memory_read_benchmark: 2.0
|
|
core_memory_write_benchmark: 14.0
|
|
core_memory_read_benchmark: 56.0
|
|
- model: openai-gpt-4o-mini
|
|
average: 65.22
|
|
total_cost: 0.35
|
|
archival_memory_read_benchmark: 1.33
|
|
core_memory_write_benchmark: 95.33
|
|
core_memory_read_benchmark: 99.0
|
|
- model: together-qwen-2-5-7b
|
|
average: 16.67
|
|
total_cost: 1.23
|
|
archival_memory_read_benchmark: 1.0
|
|
core_memory_write_benchmark: 36.67
|
|
core_memory_read_benchmark: 12.33
|
|
- model: openai-gpt-3.5-turbo
|
|
average: 21.0
|
|
total_cost: 1.71
|
|
archival_memory_read_benchmark: 0.67
|
|
core_memory_write_benchmark: 10.33
|
|
core_memory_read_benchmark: 52.0
|