diff --git a/letta/constants.py b/letta/constants.py
index 04ca17d0..ac1bb174 100644
--- a/letta/constants.py
+++ b/letta/constants.py
@@ -386,6 +386,7 @@ LLM_MAX_CONTEXT_WINDOW = {
     "gemini-2.5-computer-use-preview-10-2025": 1048576,
     # gemini 3
     "gemini-3-pro-preview": 1048576,
+    "gemini-3.1-pro-preview": 1048576,
     "gemini-3-flash-preview": 1048576,
     # gemini latest aliases
     "gemini-flash-latest": 1048576,
diff --git a/letta/llm_api/google_constants.py b/letta/llm_api/google_constants.py
index a7e9151a..ddc58634 100644
--- a/letta/llm_api/google_constants.py
+++ b/letta/llm_api/google_constants.py
@@ -1,5 +1,6 @@
 GOOGLE_MODEL_TO_CONTEXT_LENGTH = {
     "gemini-3-pro-preview": 1048576,
+    "gemini-3.1-pro-preview": 1048576,
     "gemini-3-flash-preview": 1048576,
     "gemini-2.5-pro": 1048576,
     "gemini-2.5-flash": 1048576,
diff --git a/letta/model_specs/model_prices_and_context_window.json b/letta/model_specs/model_prices_and_context_window.json
index 7a63e5a5..2ce2e366 100644
--- a/letta/model_specs/model_prices_and_context_window.json
+++ b/letta/model_specs/model_prices_and_context_window.json
@@ -14109,6 +14109,48 @@
     "supports_web_search": true,
     "tpm": 800000
   },
+  "gemini/gemini-3.1-pro-preview": {
+    "cache_read_input_token_cost": 2e-7,
+    "cache_read_input_token_cost_above_200k_tokens": 4e-7,
+    "input_cost_per_token": 2e-6,
+    "input_cost_per_token_above_200k_tokens": 4e-6,
+    "input_cost_per_token_batches": 1e-6,
+    "litellm_provider": "gemini",
+    "max_audio_length_hours": 8.4,
+    "max_audio_per_prompt": 1,
+    "max_images_per_prompt": 3000,
+    "max_input_tokens": 1048576,
+    "max_output_tokens": 65535,
+    "max_pdf_size_mb": 30,
+    "max_tokens": 65535,
+    "max_video_length": 1,
+    "max_videos_per_prompt": 10,
+    "mode": "chat",
+    "output_cost_per_token": 1.2e-5,
+    "output_cost_per_token_above_200k_tokens": 1.8e-5,
+    "output_cost_per_token_batches": 6e-6,
+    "rpm": 2000,
+    "source": "https://ai.google.dev/pricing/gemini-3",
+    "supported_endpoints": [
+      "/v1/chat/completions",
+      "/v1/completions",
+      "/v1/batch"
+    ],
+    "supported_modalities": ["text", "image", "audio", "video"],
+    "supported_output_modalities": ["text"],
+    "supports_audio_input": true,
+    "supports_function_calling": true,
+    "supports_pdf_input": true,
+    "supports_prompt_caching": true,
+    "supports_reasoning": true,
+    "supports_response_schema": true,
+    "supports_system_messages": true,
+    "supports_tool_choice": true,
+    "supports_video_input": true,
+    "supports_vision": true,
+    "supports_web_search": true,
+    "tpm": 800000
+  },
   "gemini/gemini-3-flash-preview": {
     "cache_read_input_token_cost": 5e-8,
     "input_cost_per_audio_token": 1e-6,
diff --git a/letta/test_gemini.py b/letta/test_gemini.py
new file mode 100644
index 00000000..ffb39634
--- /dev/null
+++ b/letta/test_gemini.py
@@ -0,0 +1,21 @@
+from letta_client import Letta
+
+
+def create_agent() -> None:
+    client = Letta(base_url="http://localhost:8283")
+
+    agent_state = client.agents.create(
+        name="test-gemini-3-pro-agent",
+        model="google_ai/gemini-3.1-pro-preview",
+        embedding="openai/text-embedding-3-small",
+        context_window_limit=16000,
+    )
+    print("Created agent: ", agent_state)
+
+
+def main():
+    create_agent()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/integration_test_usage_tracking.py b/tests/integration_test_usage_tracking.py
index c010690e..f4b5098e 100644
--- a/tests/integration_test_usage_tracking.py
+++ b/tests/integration_test_usage_tracking.py
@@ -46,7 +46,7 @@ CACHE_TEST_CONFIGS = [
     # OpenAI gpt-4o with prompt caching (Chat Completions API)
     ("openai/gpt-4o", {"provider_type": "openai"}),
     # Gemini 3 Pro Preview with context caching
-    ("google_ai/gemini-3-pro-preview", {"provider_type": "google_ai"}),
+    ("google_ai/gemini-3.1-pro-preview", {"provider_type": "google_ai"}),
 ]
 
 REASONING_TEST_CONFIGS = [
@@ -59,7 +59,7 @@ REASONING_TEST_CONFIGS = [
     ("openai/gpt-5.1", {"provider_type": "openai", "reasoning": {"reasoning_effort": "low"}}),
     # Gemini 3 Pro Preview with thinking enabled
     (
-        "google_ai/gemini-3-pro-preview",
+        "google_ai/gemini-3.1-pro-preview",
         {"provider_type": "google_ai", "thinking_config": {"include_thoughts": True, "thinking_budget": 1024}},
     ),
 ]
diff --git a/tests/test_prompt_caching.py b/tests/test_prompt_caching.py
index c7de0315..432a2b41 100644
--- a/tests/test_prompt_caching.py
+++ b/tests/test_prompt_caching.py
@@ -221,12 +221,12 @@ CACHING_TEST_CONFIGS = [
     # The docs say "Implicit caching is enabled by default for all Gemini 2.5 models"
     # This suggests 3 Pro Preview may require explicit caching instead
     pytest.param(
-        "google_ai/gemini-3-pro-preview",
+        "google_ai/gemini-3.1-pro-preview",
         {},
         2048,  # Min tokens for 3 Pro Preview
         "cached_tokens",  # Field name (normalized from cached_content_token_count)
         None,  # No separate write field
-        id="gemini-3-pro-preview-implicit",
+        id="gemini-3.1-pro-preview-implicit",
         marks=pytest.mark.xfail(reason="Gemini 3 Pro Preview doesn't have implicit caching (only 2.5 models do)"),
     ),
 ]
@@ -924,7 +924,7 @@ async def test_gemini_3_pro_preview_implicit_caching(async_client: AsyncLetta):
     Since implicit caching is stochastic (depends on routing, timing, etc.), we send
     multiple messages in quick succession and check if ANY of them hit the cache.
     """
-    model = "google_ai/gemini-3-pro-preview"
+    model = "google_ai/gemini-3.1-pro-preview"
     agent = await create_agent_with_large_memory(async_client, model, {}, "gemini-3-pro")
 
     try: