feat(gemini): add 3.1 pro preview support (#9553)

Add 3.1 model metadata for Google AI and update Gemini tests/examples to use the new handle.

👾 Generated with [Letta Code](https://letta.com)

Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
Kevin Lin
2026-02-19 12:09:59 -08:00
committed by Caren Thomas
parent e2ad8762fe
commit bd5b5fa9f3
6 changed files with 70 additions and 5 deletions

View File

@@ -386,6 +386,7 @@ LLM_MAX_CONTEXT_WINDOW = {
"gemini-2.5-computer-use-preview-10-2025": 1048576,
# gemini 3
"gemini-3-pro-preview": 1048576,
"gemini-3.1-pro-preview": 1048576,
"gemini-3-flash-preview": 1048576,
# gemini latest aliases
"gemini-flash-latest": 1048576,

View File

@@ -1,5 +1,6 @@
GOOGLE_MODEL_TO_CONTEXT_LENGTH = {
"gemini-3-pro-preview": 1048576,
"gemini-3.1-pro-preview": 1048576,
"gemini-3-flash-preview": 1048576,
"gemini-2.5-pro": 1048576,
"gemini-2.5-flash": 1048576,

View File

@@ -14109,6 +14109,48 @@
"supports_web_search": true,
"tpm": 800000
},
"gemini/gemini-3.1-pro-preview": {
"cache_read_input_token_cost": 2e-7,
"cache_read_input_token_cost_above_200k_tokens": 4e-7,
"input_cost_per_token": 2e-6,
"input_cost_per_token_above_200k_tokens": 4e-6,
"input_cost_per_token_batches": 1e-6,
"litellm_provider": "gemini",
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_images_per_prompt": 3000,
"max_input_tokens": 1048576,
"max_output_tokens": 65535,
"max_pdf_size_mb": 30,
"max_tokens": 65535,
"max_video_length": 1,
"max_videos_per_prompt": 10,
"mode": "chat",
"output_cost_per_token": 1.2e-5,
"output_cost_per_token_above_200k_tokens": 1.8e-5,
"output_cost_per_token_batches": 6e-6,
"rpm": 2000,
"source": "https://ai.google.dev/pricing/gemini-3",
"supported_endpoints": [
"/v1/chat/completions",
"/v1/completions",
"/v1/batch"
],
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text"],
"supports_audio_input": true,
"supports_function_calling": true,
"supports_pdf_input": true,
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
"supports_system_messages": true,
"supports_tool_choice": true,
"supports_video_input": true,
"supports_vision": true,
"supports_web_search": true,
"tpm": 800000
},
"gemini/gemini-3-flash-preview": {
"cache_read_input_token_cost": 5e-8,
"input_cost_per_audio_token": 1e-6,

21
letta/test_gemini.py Normal file
View File

@@ -0,0 +1,21 @@
from letta_client import Letta
def create_agent() -> None:
client = Letta(base_url="http://localhost:8283")
agent_state = client.agents.create(
name="test-gemini-3-pro-agent",
model="google_ai/gemini-3.1-pro-preview",
embedding="openai/text-embedding-3-small",
context_window_limit=16000,
)
print("Created agent: ", agent_state)
def main():
create_agent()
if __name__ == "__main__":
main()

View File

@@ -46,7 +46,7 @@ CACHE_TEST_CONFIGS = [
# OpenAI gpt-4o with prompt caching (Chat Completions API)
("openai/gpt-4o", {"provider_type": "openai"}),
# Gemini 3 Pro Preview with context caching
("google_ai/gemini-3-pro-preview", {"provider_type": "google_ai"}),
("google_ai/gemini-3.1-pro-preview", {"provider_type": "google_ai"}),
]
REASONING_TEST_CONFIGS = [
@@ -59,7 +59,7 @@ REASONING_TEST_CONFIGS = [
("openai/gpt-5.1", {"provider_type": "openai", "reasoning": {"reasoning_effort": "low"}}),
# Gemini 3 Pro Preview with thinking enabled
(
"google_ai/gemini-3-pro-preview",
"google_ai/gemini-3.1-pro-preview",
{"provider_type": "google_ai", "thinking_config": {"include_thoughts": True, "thinking_budget": 1024}},
),
]

View File

@@ -221,12 +221,12 @@ CACHING_TEST_CONFIGS = [
# The docs say "Implicit caching is enabled by default for all Gemini 2.5 models"
# This suggests 3 Pro Preview may require explicit caching instead
pytest.param(
"google_ai/gemini-3-pro-preview",
"google_ai/gemini-3.1-pro-preview",
{},
2048, # Min tokens for 3 Pro Preview
"cached_tokens", # Field name (normalized from cached_content_token_count)
None, # No separate write field
id="gemini-3-pro-preview-implicit",
id="gemini-3.1-pro-preview-implicit",
marks=pytest.mark.xfail(reason="Gemini 3 Pro Preview doesn't have implicit caching (only 2.5 models do)"),
),
]
@@ -924,7 +924,7 @@ async def test_gemini_3_pro_preview_implicit_caching(async_client: AsyncLetta):
Since implicit caching is stochastic (depends on routing, timing, etc.), we send
multiple messages in quick succession and check if ANY of them hit the cache.
"""
model = "google_ai/gemini-3-pro-preview"
model = "google_ai/gemini-3.1-pro-preview"
agent = await create_agent_with_large_memory(async_client, model, {}, "gemini-3-pro")
try: