diff --git a/letta/llm_api/google_vertex_client.py b/letta/llm_api/google_vertex_client.py
index 79e5c028..aa2e99ba 100644
--- a/letta/llm_api/google_vertex_client.py
+++ b/letta/llm_api/google_vertex_client.py
@@ -370,16 +370,22 @@ class GoogleVertexClient(LLMClientBase):
         #   - Range: -1, 0, or 512-24576
         # TODO when using v3 agent loop, properly support the native thinking in Gemini
 
-        # Add thinking_config for flash
+        # Add thinking_config for all Gemini reasoning models (2.5 series)
         # If enable_reasoner is False, set thinking_budget to 0
         # Otherwise, use the value from max_reasoning_tokens
-        if "flash" in llm_config.model:
-            # Gemini flash models may fail to call tools even with FunctionCallingConfigMode.ANY if thinking is fully disabled, set to minimum to prevent tool call failure
+        if self.is_reasoning_model(llm_config) or "flash" in llm_config.model:
+            # Gemini reasoning models may fail to call tools even with FunctionCallingConfigMode.ANY if thinking is fully disabled, set to minimum to prevent tool call failure
             thinking_budget = llm_config.max_reasoning_tokens if llm_config.enable_reasoner else self.get_thinking_budget(llm_config.model)
             if thinking_budget <= 0:
-                logger.error(
+                logger.warning(
                     f"Thinking budget of {thinking_budget} for Gemini reasoning model {llm_config.model}, this will likely cause tool call failures"
                 )
+                # For models that require thinking mode (2.5 Pro, 3.x), override with minimum valid budget
+                if llm_config.model.startswith("gemini-2.5-pro") or llm_config.model.startswith("gemini-3"):
+                    thinking_budget = 128
+                    logger.warning(
+                        f"Overriding thinking_budget to {thinking_budget} for model {llm_config.model} which requires thinking mode"
+                    )
             thinking_config = ThinkingConfig(
                 thinking_budget=(thinking_budget),
                 include_thoughts=(thinking_budget > 1),
@@ -658,16 +664,24 @@ class GoogleVertexClient(LLMClientBase):
     # | 2.5 Pro         | Dynamic thinking: Model decides when and how much to think        | 128-32768    | N/A: Cannot disable        | thinkingBudget = -1     |
     # | 2.5 Flash       | Dynamic thinking: Model decides when and how much to think        | 0-24576      | thinkingBudget = 0         | thinkingBudget = -1     |
     # | 2.5 Flash Lite  | Model does not think                                              | 512-24576    | thinkingBudget = 0         | thinkingBudget = -1     |
+    # | 3.x             | Dynamic thinking: Model decides when and how much to think        | 128-?        | N/A: Cannot disable        | thinkingBudget = -1     |
     def get_thinking_budget(self, model: str) -> bool:
         if model_settings.gemini_force_minimum_thinking_budget:
             if all(substring in model for substring in ["2.5", "flash", "lite"]):
                 return 512
             elif all(substring in model for substring in ["2.5", "flash"]):
                 return 1
+        # Gemini 3 and 2.5 Pro require thinking mode and cannot have budget 0
+        if model.startswith("gemini-3") or model.startswith("gemini-2.5-pro"):
+            return 128  # Minimum valid budget for models that require thinking
         return 0
 
     def is_reasoning_model(self, llm_config: LLMConfig) -> bool:
-        return llm_config.model.startswith("gemini-2.5-flash") or llm_config.model.startswith("gemini-2.5-pro")
+        return (
+            llm_config.model.startswith("gemini-2.5-flash")
+            or llm_config.model.startswith("gemini-2.5-pro")
+            or llm_config.model.startswith("gemini-3")
+        )
 
     def is_malformed_function_call(self, response_data: dict) -> dict:
         response = GenerateContentResponse(**response_data)
diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py
index 338cb477..9fe0292b 100644
--- a/letta/schemas/llm_config.py
+++ b/letta/schemas/llm_config.py
@@ -422,8 +422,10 @@ class LLMConfig(BaseModel):
 
             # Anthropic 3.7/4 and Gemini: toggle honored
             is_google_reasoner_with_configurable_thinking = (
-                cls.is_google_vertex_reasoning_model(config) or cls.is_google_ai_reasoning_model(config)
-            ) and not config.model.startswith("gemini-2.5-pro")
+                (cls.is_google_vertex_reasoning_model(config) or cls.is_google_ai_reasoning_model(config))
+                and not config.model.startswith("gemini-2.5-pro")
+                and not config.model.startswith("gemini-3")
+            )
             if cls.is_anthropic_reasoning_model(config) or is_google_reasoner_with_configurable_thinking:
                 config.enable_reasoner = bool(reasoning)
                 config.put_inner_thoughts_in_kwargs = False
@@ -431,8 +433,8 @@ class LLMConfig(BaseModel):
                     config.max_reasoning_tokens = 1024
                 return config
 
-            # Google Gemini 2.5 Pro: not possible to disable
-            if config.model.startswith("gemini-2.5-pro"):
+            # Google Gemini 2.5 Pro and Gemini 3: not possible to disable
+            if config.model.startswith("gemini-2.5-pro") or config.model.startswith("gemini-3"):
                 config.put_inner_thoughts_in_kwargs = False
                 config.enable_reasoner = True
                 if config.max_reasoning_tokens == 0:
@@ -466,8 +468,8 @@ class LLMConfig(BaseModel):
                 # Set verbosity for GPT-5 models
                 if config.model.startswith("gpt-5") and config.verbosity is None:
                     config.verbosity = "medium"
-            elif config.model.startswith("gemini-2.5-pro"):
-                logger.warning("Reasoning cannot be disabled for Gemini 2.5 Pro model")
+            elif config.model.startswith("gemini-2.5-pro") or config.model.startswith("gemini-3"):
+                logger.warning(f"Reasoning cannot be disabled for {config.model} model")
                 # Handle as non-reasoner until we support summary
                 config.put_inner_thoughts_in_kwargs = True
                 config.enable_reasoner = True
diff --git a/tests/configs/llm_model_configs/gemini-2.5-pro-vertex.json b/tests/configs/llm_model_configs/gemini-2.5-pro-vertex.json
index 4231e1c7..6a0fca1f 100644
--- a/tests/configs/llm_model_configs/gemini-2.5-pro-vertex.json
+++ b/tests/configs/llm_model_configs/gemini-2.5-pro-vertex.json
@@ -3,5 +3,7 @@
   "model_endpoint_type": "google_vertex",
   "model_endpoint": "https://us-central1-aiplatform.googleapis.com/v1/projects/memgpt-428419/locations/us-central1",
   "context_window": 1048576,
-  "put_inner_thoughts_in_kwargs": true
+  "put_inner_thoughts_in_kwargs": false,
+  "enable_reasoner": true,
+  "max_reasoning_tokens": 1024
 }
diff --git a/tests/configs/llm_model_configs/gemini-2.5-pro.json b/tests/configs/llm_model_configs/gemini-2.5-pro.json
index c291917c..e8a11b2b 100644
--- a/tests/configs/llm_model_configs/gemini-2.5-pro.json
+++ b/tests/configs/llm_model_configs/gemini-2.5-pro.json
@@ -4,5 +4,7 @@
   "model_endpoint_type": "google_ai",
   "model_endpoint": "https://generativelanguage.googleapis.com",
   "model_wrapper": null,
-  "put_inner_thoughts_in_kwargs": true
+  "put_inner_thoughts_in_kwargs": false,
+  "enable_reasoner": true,
+  "max_reasoning_tokens": 1024
 }