feat(gemini): add 3.1 pro preview support (#9553)

Add 3.1 model metadata for Google AI and update Gemini tests/examples to use the new handle.

👾 Generated with [Letta Code](https://letta.com)

Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
Kevin Lin
2026-02-19 12:09:59 -08:00
committed by Caren Thomas
parent e2ad8762fe
commit bd5b5fa9f3
6 changed files with 70 additions and 5 deletions

View File

@@ -386,6 +386,7 @@ LLM_MAX_CONTEXT_WINDOW = {
"gemini-2.5-computer-use-preview-10-2025": 1048576, "gemini-2.5-computer-use-preview-10-2025": 1048576,
# gemini 3 # gemini 3
"gemini-3-pro-preview": 1048576, "gemini-3-pro-preview": 1048576,
"gemini-3.1-pro-preview": 1048576,
"gemini-3-flash-preview": 1048576, "gemini-3-flash-preview": 1048576,
# gemini latest aliases # gemini latest aliases
"gemini-flash-latest": 1048576, "gemini-flash-latest": 1048576,

View File

@@ -1,5 +1,6 @@
GOOGLE_MODEL_TO_CONTEXT_LENGTH = { GOOGLE_MODEL_TO_CONTEXT_LENGTH = {
"gemini-3-pro-preview": 1048576, "gemini-3-pro-preview": 1048576,
"gemini-3.1-pro-preview": 1048576,
"gemini-3-flash-preview": 1048576, "gemini-3-flash-preview": 1048576,
"gemini-2.5-pro": 1048576, "gemini-2.5-pro": 1048576,
"gemini-2.5-flash": 1048576, "gemini-2.5-flash": 1048576,

View File

@@ -14109,6 +14109,48 @@
"supports_web_search": true, "supports_web_search": true,
"tpm": 800000 "tpm": 800000
}, },
"gemini/gemini-3.1-pro-preview": {
"cache_read_input_token_cost": 2e-7,
"cache_read_input_token_cost_above_200k_tokens": 4e-7,
"input_cost_per_token": 2e-6,
"input_cost_per_token_above_200k_tokens": 4e-6,
"input_cost_per_token_batches": 1e-6,
"litellm_provider": "gemini",
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_images_per_prompt": 3000,
"max_input_tokens": 1048576,
"max_output_tokens": 65535,
"max_pdf_size_mb": 30,
"max_tokens": 65535,
"max_video_length": 1,
"max_videos_per_prompt": 10,
"mode": "chat",
"output_cost_per_token": 1.2e-5,
"output_cost_per_token_above_200k_tokens": 1.8e-5,
"output_cost_per_token_batches": 6e-6,
"rpm": 2000,
"source": "https://ai.google.dev/pricing/gemini-3",
"supported_endpoints": [
"/v1/chat/completions",
"/v1/completions",
"/v1/batch"
],
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text"],
"supports_audio_input": true,
"supports_function_calling": true,
"supports_pdf_input": true,
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
"supports_system_messages": true,
"supports_tool_choice": true,
"supports_video_input": true,
"supports_vision": true,
"supports_web_search": true,
"tpm": 800000
},
"gemini/gemini-3-flash-preview": { "gemini/gemini-3-flash-preview": {
"cache_read_input_token_cost": 5e-8, "cache_read_input_token_cost": 5e-8,
"input_cost_per_audio_token": 1e-6, "input_cost_per_audio_token": 1e-6,

21
letta/test_gemini.py Normal file
View File

@@ -0,0 +1,21 @@
from letta_client import Letta
def create_agent() -> None:
client = Letta(base_url="http://localhost:8283")
agent_state = client.agents.create(
name="test-gemini-3-pro-agent",
model="google_ai/gemini-3.1-pro-preview",
embedding="openai/text-embedding-3-small",
context_window_limit=16000,
)
print("Created agent: ", agent_state)
def main():
create_agent()
if __name__ == "__main__":
main()

View File

@@ -46,7 +46,7 @@ CACHE_TEST_CONFIGS = [
# OpenAI gpt-4o with prompt caching (Chat Completions API) # OpenAI gpt-4o with prompt caching (Chat Completions API)
("openai/gpt-4o", {"provider_type": "openai"}), ("openai/gpt-4o", {"provider_type": "openai"}),
# Gemini 3 Pro Preview with context caching # Gemini 3 Pro Preview with context caching
("google_ai/gemini-3-pro-preview", {"provider_type": "google_ai"}), ("google_ai/gemini-3.1-pro-preview", {"provider_type": "google_ai"}),
] ]
REASONING_TEST_CONFIGS = [ REASONING_TEST_CONFIGS = [
@@ -59,7 +59,7 @@ REASONING_TEST_CONFIGS = [
("openai/gpt-5.1", {"provider_type": "openai", "reasoning": {"reasoning_effort": "low"}}), ("openai/gpt-5.1", {"provider_type": "openai", "reasoning": {"reasoning_effort": "low"}}),
# Gemini 3 Pro Preview with thinking enabled # Gemini 3 Pro Preview with thinking enabled
( (
"google_ai/gemini-3-pro-preview", "google_ai/gemini-3.1-pro-preview",
{"provider_type": "google_ai", "thinking_config": {"include_thoughts": True, "thinking_budget": 1024}}, {"provider_type": "google_ai", "thinking_config": {"include_thoughts": True, "thinking_budget": 1024}},
), ),
] ]

View File

@@ -221,12 +221,12 @@ CACHING_TEST_CONFIGS = [
# The docs say "Implicit caching is enabled by default for all Gemini 2.5 models" # The docs say "Implicit caching is enabled by default for all Gemini 2.5 models"
# This suggests 3 Pro Preview may require explicit caching instead # This suggests 3 Pro Preview may require explicit caching instead
pytest.param( pytest.param(
"google_ai/gemini-3-pro-preview", "google_ai/gemini-3.1-pro-preview",
{}, {},
2048, # Min tokens for 3 Pro Preview 2048, # Min tokens for 3 Pro Preview
"cached_tokens", # Field name (normalized from cached_content_token_count) "cached_tokens", # Field name (normalized from cached_content_token_count)
None, # No separate write field None, # No separate write field
id="gemini-3-pro-preview-implicit", id="gemini-3.1-pro-preview-implicit",
marks=pytest.mark.xfail(reason="Gemini 3 Pro Preview doesn't have implicit caching (only 2.5 models do)"), marks=pytest.mark.xfail(reason="Gemini 3 Pro Preview doesn't have implicit caching (only 2.5 models do)"),
), ),
] ]
@@ -924,7 +924,7 @@ async def test_gemini_3_pro_preview_implicit_caching(async_client: AsyncLetta):
Since implicit caching is stochastic (depends on routing, timing, etc.), we send Since implicit caching is stochastic (depends on routing, timing, etc.), we send
multiple messages in quick succession and check if ANY of them hit the cache. multiple messages in quick succession and check if ANY of them hit the cache.
""" """
model = "google_ai/gemini-3-pro-preview" model = "google_ai/gemini-3.1-pro-preview"
agent = await create_agent_with_large_memory(async_client, model, {}, "gemini-3-pro") agent = await create_agent_with_large_memory(async_client, model, {}, "gemini-3-pro")
try: try: