feat(gemini): add 3.1 pro preview support (#9553)

Add 3.1 model metadata for Google AI and update Gemini tests/examples to use the new handle. 👾 Generated with [Letta Code](https://letta.com) Co-authored-by: Letta <noreply@letta.com>
2026-02-19 12:09:59 -08:00
parent e2ad8762fe
commit bd5b5fa9f3
6 changed files with 70 additions and 5 deletions
--- a/letta/constants.py
+++ b/letta/constants.py
@@ -386,6 +386,7 @@ LLM_MAX_CONTEXT_WINDOW = {
    "gemini-2.5-computer-use-preview-10-2025": 1048576,
    # gemini 3
    "gemini-3-pro-preview": 1048576,
+    "gemini-3.1-pro-preview": 1048576,
    "gemini-3-flash-preview": 1048576,
    # gemini latest aliases
    "gemini-flash-latest": 1048576,
--- a/letta/llm_api/google_constants.py
+++ b/letta/llm_api/google_constants.py
@@ -1,5 +1,6 @@
 GOOGLE_MODEL_TO_CONTEXT_LENGTH = {
    "gemini-3-pro-preview": 1048576,
+    "gemini-3.1-pro-preview": 1048576,
    "gemini-3-flash-preview": 1048576,
    "gemini-2.5-pro": 1048576,
    "gemini-2.5-flash": 1048576,
--- a/letta/model_specs/model_prices_and_context_window.json
+++ b/letta/model_specs/model_prices_and_context_window.json
@@ -14109,6 +14109,48 @@
    "supports_web_search": true,
    "tpm": 800000
  },
+  "gemini/gemini-3.1-pro-preview": {
+    "cache_read_input_token_cost": 2e-7,
+    "cache_read_input_token_cost_above_200k_tokens": 4e-7,
+    "input_cost_per_token": 2e-6,
+    "input_cost_per_token_above_200k_tokens": 4e-6,
+    "input_cost_per_token_batches": 1e-6,
+    "litellm_provider": "gemini",
+    "max_audio_length_hours": 8.4,
+    "max_audio_per_prompt": 1,
+    "max_images_per_prompt": 3000,
+    "max_input_tokens": 1048576,
+    "max_output_tokens": 65535,
+    "max_pdf_size_mb": 30,
+    "max_tokens": 65535,
+    "max_video_length": 1,
+    "max_videos_per_prompt": 10,
+    "mode": "chat",
+    "output_cost_per_token": 1.2e-5,
+    "output_cost_per_token_above_200k_tokens": 1.8e-5,
+    "output_cost_per_token_batches": 6e-6,
+    "rpm": 2000,
+    "source": "https://ai.google.dev/pricing/gemini-3",
+    "supported_endpoints": [
+      "/v1/chat/completions",
+      "/v1/completions",
+      "/v1/batch"
+    ],
+    "supported_modalities": ["text", "image", "audio", "video"],
+    "supported_output_modalities": ["text"],
+    "supports_audio_input": true,
+    "supports_function_calling": true,
+    "supports_pdf_input": true,
+    "supports_prompt_caching": true,
+    "supports_reasoning": true,
+    "supports_response_schema": true,
+    "supports_system_messages": true,
+    "supports_tool_choice": true,
+    "supports_video_input": true,
+    "supports_vision": true,
+    "supports_web_search": true,
+    "tpm": 800000
+  },
  "gemini/gemini-3-flash-preview": {
    "cache_read_input_token_cost": 5e-8,
    "input_cost_per_audio_token": 1e-6,
--- a/letta/test_gemini.py
+++ b/letta/test_gemini.py
@@ -0,0 +1,21 @@
+from letta_client import Letta
+
+
+def create_agent() -> None:
+    client = Letta(base_url="http://localhost:8283")
+
+    agent_state = client.agents.create(
+        name="test-gemini-3-pro-agent",
+        model="google_ai/gemini-3.1-pro-preview",
+        embedding="openai/text-embedding-3-small",
+        context_window_limit=16000,
+    )
+    print("Created agent: ", agent_state)
+
+
+def main():
+    create_agent()
+
+
+if __name__ == "__main__":
+    main()
--- a/tests/integration_test_usage_tracking.py
+++ b/tests/integration_test_usage_tracking.py
@@ -46,7 +46,7 @@ CACHE_TEST_CONFIGS = [
    # OpenAI gpt-4o with prompt caching (Chat Completions API)
    ("openai/gpt-4o", {"provider_type": "openai"}),
    # Gemini 3 Pro Preview with context caching
-    ("google_ai/gemini-3-pro-preview", {"provider_type": "google_ai"}),
+    ("google_ai/gemini-3.1-pro-preview", {"provider_type": "google_ai"}),
 ]

 REASONING_TEST_CONFIGS = [
@@ -59,7 +59,7 @@ REASONING_TEST_CONFIGS = [
    ("openai/gpt-5.1", {"provider_type": "openai", "reasoning": {"reasoning_effort": "low"}}),
    # Gemini 3 Pro Preview with thinking enabled
    (
-        "google_ai/gemini-3-pro-preview",
+        "google_ai/gemini-3.1-pro-preview",
        {"provider_type": "google_ai", "thinking_config": {"include_thoughts": True, "thinking_budget": 1024}},
    ),
 ]
--- a/tests/test_prompt_caching.py
+++ b/tests/test_prompt_caching.py
@@ -221,12 +221,12 @@ CACHING_TEST_CONFIGS = [
    # The docs say "Implicit caching is enabled by default for all Gemini 2.5 models"
    # This suggests 3 Pro Preview may require explicit caching instead
    pytest.param(
-        "google_ai/gemini-3-pro-preview",
+        "google_ai/gemini-3.1-pro-preview",
        {},
        2048,  # Min tokens for 3 Pro Preview
        "cached_tokens",  # Field name (normalized from cached_content_token_count)
        None,  # No separate write field
-        id="gemini-3-pro-preview-implicit",
+        id="gemini-3.1-pro-preview-implicit",
        marks=pytest.mark.xfail(reason="Gemini 3 Pro Preview doesn't have implicit caching (only 2.5 models do)"),
    ),
 ]
@@ -924,7 +924,7 @@ async def test_gemini_3_pro_preview_implicit_caching(async_client: AsyncLetta):
    Since implicit caching is stochastic (depends on routing, timing, etc.), we send
    multiple messages in quick succession and check if ANY of them hit the cache.
    """
-    model = "google_ai/gemini-3-pro-preview"
+    model = "google_ai/gemini-3.1-pro-preview"
    agent = await create_agent_with_large_memory(async_client, model, {}, "gemini-3-pro")

    try: