- model: claude-3-5-haiku average: 87.78 total_cost: 4.15 archival_memory_read_benchmark: 96.33 core_memory_write_benchmark: 91.0 core_memory_read_benchmark: 76.0 - model: gemini-2-5-pro average: 98.22 total_cost: 5.02 archival_memory_read_benchmark: 96.0 core_memory_write_benchmark: 98.67 core_memory_read_benchmark: 100.0 - model: claude-3-7-sonnet-extended average: 95.78 total_cost: 14.42 archival_memory_read_benchmark: 93.33 core_memory_write_benchmark: 95.67 core_memory_read_benchmark: 98.33 - model: gemini-2-5-flash average: 94.0 total_cost: 0.55 archival_memory_read_benchmark: 93.0 core_memory_write_benchmark: 92.0 core_memory_read_benchmark: 97.0 - model: openai-gpt-4.1 average: 95.44 total_cost: 7.05 archival_memory_read_benchmark: 89.67 core_memory_write_benchmark: 99.33 core_memory_read_benchmark: 97.33 - model: claude-3-7-sonnet average: 92.56 total_cost: 17.24 archival_memory_read_benchmark: 88.0 core_memory_write_benchmark: 96.33 core_memory_read_benchmark: 93.33 - model: together-llama-4-scout-17b average: 78.56 total_cost: 0.77 archival_memory_read_benchmark: 86.33 core_memory_write_benchmark: 56.0 core_memory_read_benchmark: 93.33 - model: together-qwen-2-5-72b average: 77.44 total_cost: 4.71 archival_memory_read_benchmark: 79.33 core_memory_write_benchmark: 68.33 core_memory_read_benchmark: 84.67 - model: claude-3-5-sonnet average: 90.0 total_cost: 14.07 archival_memory_read_benchmark: 76.67 core_memory_write_benchmark: 98.33 core_memory_read_benchmark: 95.0 - model: openai-gpt-4o average: 88.0 total_cost: 8.11 archival_memory_read_benchmark: 69.0 core_memory_write_benchmark: 98.67 core_memory_read_benchmark: 96.33 - model: together-llama-3-1-405b average: 81.67 total_cost: 9.84 archival_memory_read_benchmark: 60.67 core_memory_write_benchmark: 86.0 core_memory_read_benchmark: 98.33 - model: together-llama-4-maverick-17b average: 62.33 total_cost: 1.06 archival_memory_read_benchmark: 53.0 core_memory_write_benchmark: 39.33 core_memory_read_benchmark: 94.67 - model: openai-o1 average: 77.11 total_cost: 63.63 archival_memory_read_benchmark: 52.33 core_memory_write_benchmark: 82.0 core_memory_read_benchmark: 97.0 - model: openai-gpt-4.1-mini average: 78.22 total_cost: 1.35 archival_memory_read_benchmark: 41.0 core_memory_write_benchmark: 95.0 core_memory_read_benchmark: 98.67 - model: together-deepseek-v3 average: 73.33 total_cost: 3.39 archival_memory_read_benchmark: 26.33 core_memory_write_benchmark: 96.0 core_memory_read_benchmark: 97.67 - model: together-llama-3-2-3b average: 4.67 total_cost: 0.87 archival_memory_read_benchmark: 14.0 core_memory_write_benchmark: 0.0 core_memory_read_benchmark: 0.0 - model: together-llama-3-70b average: 35.89 total_cost: 1.56 archival_memory_read_benchmark: 13.0 core_memory_write_benchmark: 0.0 core_memory_read_benchmark: 94.67 - model: together-meta-llama-3-1-8b average: 32.67 total_cost: 0.98 archival_memory_read_benchmark: 8.0 core_memory_write_benchmark: 12.0 core_memory_read_benchmark: 78.0 - model: together-llama-3-3-70b average: 66.33 total_cost: 2.56 archival_memory_read_benchmark: 6.33 core_memory_write_benchmark: 97.0 core_memory_read_benchmark: 95.67 - model: together-meta-llama-3-1-70b average: 62.56 total_cost: 2.61 archival_memory_read_benchmark: 6.0 core_memory_write_benchmark: 86.67 core_memory_read_benchmark: 95.0 - model: openai-o3-mini average: 65.67 total_cost: 3.67 archival_memory_read_benchmark: 5.33 core_memory_write_benchmark: 93.33 core_memory_read_benchmark: 98.33 - model: openai-o4-mini average: 67.0 total_cost: 3.89 archival_memory_read_benchmark: 4.67 core_memory_write_benchmark: 98.33 core_memory_read_benchmark: 98.0 - model: openai-gpt-4.1-nano average: 24.0 total_cost: 0.35 archival_memory_read_benchmark: 2.0 core_memory_write_benchmark: 14.0 core_memory_read_benchmark: 56.0 - model: openai-gpt-4o-mini average: 65.22 total_cost: 0.35 archival_memory_read_benchmark: 1.33 core_memory_write_benchmark: 95.33 core_memory_read_benchmark: 99.0 - model: together-qwen-2-5-7b average: 16.67 total_cost: 1.23 archival_memory_read_benchmark: 1.0 core_memory_write_benchmark: 36.67 core_memory_read_benchmark: 12.33 - model: openai-gpt-3.5-turbo average: 21.0 total_cost: 1.71 archival_memory_read_benchmark: 0.67 core_memory_write_benchmark: 10.33 core_memory_read_benchmark: 52.0