diff --git a/letta/constants.py b/letta/constants.py index 04ca17d0..ac1bb174 100644 --- a/letta/constants.py +++ b/letta/constants.py @@ -386,6 +386,7 @@ LLM_MAX_CONTEXT_WINDOW = { "gemini-2.5-computer-use-preview-10-2025": 1048576, # gemini 3 "gemini-3-pro-preview": 1048576, + "gemini-3.1-pro-preview": 1048576, "gemini-3-flash-preview": 1048576, # gemini latest aliases "gemini-flash-latest": 1048576, diff --git a/letta/llm_api/google_constants.py b/letta/llm_api/google_constants.py index a7e9151a..ddc58634 100644 --- a/letta/llm_api/google_constants.py +++ b/letta/llm_api/google_constants.py @@ -1,5 +1,6 @@ GOOGLE_MODEL_TO_CONTEXT_LENGTH = { "gemini-3-pro-preview": 1048576, + "gemini-3.1-pro-preview": 1048576, "gemini-3-flash-preview": 1048576, "gemini-2.5-pro": 1048576, "gemini-2.5-flash": 1048576, diff --git a/letta/model_specs/model_prices_and_context_window.json b/letta/model_specs/model_prices_and_context_window.json index 7a63e5a5..2ce2e366 100644 --- a/letta/model_specs/model_prices_and_context_window.json +++ b/letta/model_specs/model_prices_and_context_window.json @@ -14109,6 +14109,48 @@ "supports_web_search": true, "tpm": 800000 }, + "gemini/gemini-3.1-pro-preview": { + "cache_read_input_token_cost": 2e-7, + "cache_read_input_token_cost_above_200k_tokens": 4e-7, + "input_cost_per_token": 2e-6, + "input_cost_per_token_above_200k_tokens": 4e-6, + "input_cost_per_token_batches": 1e-6, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1.2e-5, + "output_cost_per_token_above_200k_tokens": 1.8e-5, + "output_cost_per_token_batches": 6e-6, + "rpm": 2000, + "source": "https://ai.google.dev/pricing/gemini-3", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 800000 + }, "gemini/gemini-3-flash-preview": { "cache_read_input_token_cost": 5e-8, "input_cost_per_audio_token": 1e-6, diff --git a/letta/test_gemini.py b/letta/test_gemini.py new file mode 100644 index 00000000..ffb39634 --- /dev/null +++ b/letta/test_gemini.py @@ -0,0 +1,21 @@ +from letta_client import Letta + + +def create_agent() -> None: + client = Letta(base_url="http://localhost:8283") + + agent_state = client.agents.create( + name="test-gemini-3-pro-agent", + model="google_ai/gemini-3.1-pro-preview", + embedding="openai/text-embedding-3-small", + context_window_limit=16000, + ) + print("Created agent: ", agent_state) + + +def main(): + create_agent() + + +if __name__ == "__main__": + main() diff --git a/tests/integration_test_usage_tracking.py b/tests/integration_test_usage_tracking.py index c010690e..f4b5098e 100644 --- a/tests/integration_test_usage_tracking.py +++ b/tests/integration_test_usage_tracking.py @@ -46,7 +46,7 @@ CACHE_TEST_CONFIGS = [ # OpenAI gpt-4o with prompt caching (Chat Completions API) ("openai/gpt-4o", {"provider_type": "openai"}), # Gemini 3 Pro Preview with context caching - ("google_ai/gemini-3-pro-preview", {"provider_type": "google_ai"}), + ("google_ai/gemini-3.1-pro-preview", {"provider_type": "google_ai"}), ] REASONING_TEST_CONFIGS = [ @@ -59,7 +59,7 @@ REASONING_TEST_CONFIGS = [ ("openai/gpt-5.1", {"provider_type": "openai", "reasoning": {"reasoning_effort": "low"}}), # Gemini 3 Pro Preview with thinking enabled ( - "google_ai/gemini-3-pro-preview", + "google_ai/gemini-3.1-pro-preview", {"provider_type": "google_ai", "thinking_config": {"include_thoughts": True, "thinking_budget": 1024}}, ), ] diff --git a/tests/test_prompt_caching.py b/tests/test_prompt_caching.py index c7de0315..432a2b41 100644 --- a/tests/test_prompt_caching.py +++ b/tests/test_prompt_caching.py @@ -221,12 +221,12 @@ CACHING_TEST_CONFIGS = [ # The docs say "Implicit caching is enabled by default for all Gemini 2.5 models" # This suggests 3 Pro Preview may require explicit caching instead pytest.param( - "google_ai/gemini-3-pro-preview", + "google_ai/gemini-3.1-pro-preview", {}, 2048, # Min tokens for 3 Pro Preview "cached_tokens", # Field name (normalized from cached_content_token_count) None, # No separate write field - id="gemini-3-pro-preview-implicit", + id="gemini-3.1-pro-preview-implicit", marks=pytest.mark.xfail(reason="Gemini 3 Pro Preview doesn't have implicit caching (only 2.5 models do)"), ), ] @@ -924,7 +924,7 @@ async def test_gemini_3_pro_preview_implicit_caching(async_client: AsyncLetta): Since implicit caching is stochastic (depends on routing, timing, etc.), we send multiple messages in quick succession and check if ANY of them hit the cache. """ - model = "google_ai/gemini-3-pro-preview" + model = "google_ai/gemini-3.1-pro-preview" agent = await create_agent_with_large_memory(async_client, model, {}, "gemini-3-pro") try: