diff --git a/fern/openapi.json b/fern/openapi.json
index dc495cc5..88c0fe28 100644
--- a/fern/openapi.json
+++ b/fern/openapi.json
@@ -46358,12 +46358,40 @@
             ],
             "title": "Response Format",
             "description": "The response format for the model."
+          },
+          "thinking": {
+            "$ref": "#/components/schemas/ZAIThinking",
+            "description": "The thinking configuration for GLM-4.5+ models.",
+            "default": {
+              "type": "enabled",
+              "clear_thinking": false
+            }
           }
         },
         "type": "object",
         "title": "ZAIModelSettings",
         "description": "Z.ai (ZhipuAI) model configuration (OpenAI-compatible)."
       },
+      "ZAIThinking": {
+        "properties": {
+          "type": {
+            "type": "string",
+            "enum": ["enabled", "disabled"],
+            "title": "Type",
+            "description": "Whether thinking is enabled or disabled.",
+            "default": "enabled"
+          },
+          "clear_thinking": {
+            "type": "boolean",
+            "title": "Clear Thinking",
+            "description": "If False, preserved thinking is used (recommended for agents).",
+            "default": false
+          }
+        },
+        "type": "object",
+        "title": "ZAIThinking",
+        "description": "Thinking configuration for ZAI GLM-4.5+ models."
+      },
       "letta__schemas__agent_file__AgentSchema": {
         "properties": {
           "name": {
diff --git a/letta/interfaces/openai_streaming_interface.py b/letta/interfaces/openai_streaming_interface.py
index ca3602df..bf0a2938 100644
--- a/letta/interfaces/openai_streaming_interface.py
+++ b/letta/interfaces/openai_streaming_interface.py
@@ -887,14 +887,10 @@ class SimpleOpenAIStreamingInterface:
                 prev_message_type = assistant_msg.message_type
                 yield assistant_msg
 
-            if (
-                hasattr(chunk, "choices")
-                and len(chunk.choices) > 0
-                and hasattr(chunk.choices[0], "delta")
-                and hasattr(chunk.choices[0].delta, "reasoning_content")
-            ):
+            if hasattr(chunk, "choices") and len(chunk.choices) > 0 and hasattr(chunk.choices[0], "delta"):
                 delta = chunk.choices[0].delta
-                reasoning_content = getattr(delta, "reasoning_content", None)
+                # Check for reasoning_content (standard) or reasoning (OpenRouter)
+                reasoning_content = getattr(delta, "reasoning_content", None) or getattr(delta, "reasoning", None)
                 if reasoning_content is not None and reasoning_content != "":
                     if prev_message_type and prev_message_type != "reasoning_message":
                         message_index += 1
diff --git a/letta/llm_api/openai_client.py b/letta/llm_api/openai_client.py
index ca26c768..8560d86b 100644
--- a/letta/llm_api/openai_client.py
+++ b/letta/llm_api/openai_client.py
@@ -564,6 +564,17 @@ class OpenAIClient(LLMClientBase):
         # If set, then in the backend "medium" thinking is turned on
         # request_data["reasoning_effort"] = "medium"
 
+        # Add OpenRouter reasoning configuration via extra_body
+        if is_openrouter and llm_config.enable_reasoner:
+            reasoning_config = {}
+            if llm_config.reasoning_effort:
+                reasoning_config["effort"] = llm_config.reasoning_effort
+            if llm_config.max_reasoning_tokens and llm_config.max_reasoning_tokens > 0:
+                reasoning_config["max_tokens"] = llm_config.max_reasoning_tokens
+            if not reasoning_config:
+                reasoning_config = {"enabled": True}
+            request_data["extra_body"] = {"reasoning": reasoning_config}
+
         return request_data
 
     @trace_method
@@ -765,12 +776,12 @@ class OpenAIClient(LLMClientBase):
         ):
             if "choices" in response_data and len(response_data["choices"]) > 0:
                 choice_data = response_data["choices"][0]
-                if "message" in choice_data and "reasoning_content" in choice_data["message"]:
-                    reasoning_content = choice_data["message"]["reasoning_content"]
-                    if reasoning_content:
-                        chat_completion_response.choices[0].message.reasoning_content = reasoning_content
-
-                        chat_completion_response.choices[0].message.reasoning_content_signature = None
+                message_data = choice_data.get("message", {})
+                # Check for reasoning_content (standard) or reasoning (OpenRouter)
+                reasoning_content = message_data.get("reasoning_content") or message_data.get("reasoning")
+                if reasoning_content:
+                    chat_completion_response.choices[0].message.reasoning_content = reasoning_content
+                    chat_completion_response.choices[0].message.reasoning_content_signature = None
 
         # Unpack inner thoughts if they were embedded in function arguments
         if llm_config.put_inner_thoughts_in_kwargs:
diff --git a/letta/llm_api/zai_client.py b/letta/llm_api/zai_client.py
index 9eec79c2..c7e3d059 100644
--- a/letta/llm_api/zai_client.py
+++ b/letta/llm_api/zai_client.py
@@ -1,4 +1,3 @@
-import os
 from typing import List, Optional
 
 from openai import AsyncOpenAI, AsyncStream, OpenAI
@@ -11,9 +10,15 @@ from letta.schemas.embedding_config import EmbeddingConfig
 from letta.schemas.enums import AgentType
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import Message as PydanticMessage
+from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
 from letta.settings import model_settings
 
 
+def is_zai_reasoning_model(model_name: str) -> bool:
+    """Check if the model is a ZAI reasoning model (GLM-4.5+)."""
+    return model_name.startswith("glm-4.5") or model_name.startswith("glm-4.6") or model_name.startswith("glm-4.7")
+
+
 class ZAIClient(OpenAIClient):
     """Z.ai (ZhipuAI) client - uses OpenAI-compatible API."""
 
@@ -23,6 +28,10 @@ class ZAIClient(OpenAIClient):
     def supports_structured_output(self, llm_config: LLMConfig) -> bool:
         return False
 
+    def is_reasoning_model(self, llm_config: LLMConfig) -> bool:
+        """Returns True if the model is a ZAI reasoning model (GLM-4.5+)."""
+        return is_zai_reasoning_model(llm_config.model)
+
     @trace_method
     def build_request_data(
         self,
@@ -35,6 +44,24 @@ class ZAIClient(OpenAIClient):
         tool_return_truncation_chars: Optional[int] = None,
     ) -> dict:
         data = super().build_request_data(agent_type, messages, llm_config, tools, force_tool_call, requires_subsequent_tool_call)
+
+        # Add thinking configuration for ZAI GLM-4.5+ models
+        # Must explicitly send type: "disabled" when reasoning is off, as GLM-4.7 has thinking on by default
+        if self.is_reasoning_model(llm_config):
+            if llm_config.enable_reasoner:
+                data["extra_body"] = {
+                    "thinking": {
+                        "type": "enabled",
+                        "clear_thinking": False,  # Preserved thinking for agents
+                    }
+                }
+            else:
+                data["extra_body"] = {
+                    "thinking": {
+                        "type": "disabled",
+                    }
+                }
+
         return data
 
     @trace_method
@@ -79,3 +106,39 @@ class ZAIClient(OpenAIClient):
         response = await client.embeddings.create(model=embedding_config.embedding_model, input=inputs)
 
         return [r.embedding for r in response.data]
+
+    @trace_method
+    async def convert_response_to_chat_completion(
+        self,
+        response_data: dict,
+        input_messages: List[PydanticMessage],
+        llm_config: LLMConfig,
+    ) -> ChatCompletionResponse:
+        """
+        Converts raw ZAI response dict into the ChatCompletionResponse Pydantic model.
+        Handles extraction of reasoning_content from ZAI GLM-4.5+ responses.
+        """
+        # Use parent class conversion first
+        chat_completion_response = await super().convert_response_to_chat_completion(response_data, input_messages, llm_config)
+
+        # Parse reasoning_content from ZAI responses (similar to OpenAI pattern)
+        # ZAI returns reasoning_content in delta.reasoning_content (streaming) or message.reasoning_content
+        if (
+            chat_completion_response.choices
+            and len(chat_completion_response.choices) > 0
+            and chat_completion_response.choices[0].message
+            and not chat_completion_response.choices[0].message.reasoning_content
+        ):
+            if "choices" in response_data and len(response_data["choices"]) > 0:
+                choice_data = response_data["choices"][0]
+                if "message" in choice_data and "reasoning_content" in choice_data["message"]:
+                    reasoning_content = choice_data["message"]["reasoning_content"]
+                    if reasoning_content:
+                        chat_completion_response.choices[0].message.reasoning_content = reasoning_content
+                        chat_completion_response.choices[0].message.reasoning_content_signature = None
+
+        # If we used a reasoning model, mark that reasoning content was used
+        if self.is_reasoning_model(llm_config) and llm_config.enable_reasoner:
+            chat_completion_response.choices[0].message.omitted_reasoning_content = True
+
+        return chat_completion_response
diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py
index 4a62b2c1..4c9ad3fd 100644
--- a/letta/schemas/llm_config.py
+++ b/letta/schemas/llm_config.py
@@ -374,9 +374,13 @@ class LLMConfig(BaseModel):
                 temperature=self.temperature,
             )
         elif self.model_endpoint_type == "zai":
+            from letta.schemas.model import ZAIThinking
+
+            thinking_type = "enabled" if self.enable_reasoner else "disabled"
             return ZAIModelSettings(
                 max_output_tokens=self.max_tokens or 4096,
                 temperature=self.temperature,
+                thinking=ZAIThinking(type=thinking_type, clear_thinking=False),
             )
         elif self.model_endpoint_type == "groq":
             return GroqModelSettings(
@@ -451,6 +455,45 @@ class LLMConfig(BaseModel):
             config.model.startswith("gemini-2.5-flash") or config.model.startswith("gemini-2.5-pro")
         )
 
+    @classmethod
+    def is_zai_reasoning_model(cls, config: "LLMConfig") -> bool:
+        return config.model_endpoint_type == "zai" and (
+            config.model.startswith("glm-4.5") or config.model.startswith("glm-4.6") or config.model.startswith("glm-4.7")
+        )
+
+    @classmethod
+    def is_openrouter_reasoning_model(cls, config: "LLMConfig") -> bool:
+        """Check if this is an OpenRouter model that supports reasoning.
+
+        OpenRouter model names include provider prefix, e.g.:
+        - anthropic/claude-sonnet-4
+        - openai/o3-mini
+        - moonshotai/kimi-k2-thinking
+        - deepseek/deepseek-r1
+        """
+        if config.model_endpoint_type != "openrouter":
+            return False
+        model = config.model.lower()
+        # OpenAI reasoning models
+        if "/o1" in model or "/o3" in model or "/o4" in model or "/gpt-5" in model:
+            return True
+        # Anthropic Claude reasoning models
+        if "claude-3-7-sonnet" in model or "claude-sonnet-4" in model or "claude-opus-4" in model or "claude-haiku-4" in model:
+            return True
+        # Google Gemini reasoning models
+        if "gemini" in model:
+            return True
+        # ZAI GLM reasoning models
+        if "glm-4.5" in model or "glm-4.6" in model or "glm-4.7" in model:
+            return True
+        # DeepSeek reasoning models
+        if "deepseek-r1" in model or "deepseek-reasoner" in model:
+            return True
+        # Moonshot Kimi reasoning models
+        if "kimi" in model:
+            return True
+        return False
+
     @classmethod
     def supports_verbosity(cls, config: "LLMConfig") -> bool:
         """Check if the model supports verbosity control."""
@@ -505,6 +548,18 @@ class LLMConfig(BaseModel):
                     config.effort = "medium"
                 return config
 
+            # ZAI GLM-4.5+ models: toggle honored (similar to Anthropic)
+            if cls.is_zai_reasoning_model(config):
+                config.enable_reasoner = bool(reasoning)
+                config.put_inner_thoughts_in_kwargs = False
+                return config
+
+            # OpenRouter reasoning models: toggle honored
+            if cls.is_openrouter_reasoning_model(config):
+                config.enable_reasoner = bool(reasoning)
+                config.put_inner_thoughts_in_kwargs = False
+                return config
+
             # Google Gemini 2.5 Pro and Gemini 3: not possible to disable
             if config.model.startswith("gemini-2.5-pro") or config.model.startswith("gemini-3"):
                 config.put_inner_thoughts_in_kwargs = False
@@ -565,6 +620,10 @@ class LLMConfig(BaseModel):
                 config.put_inner_thoughts_in_kwargs = True
                 if config.max_reasoning_tokens == 0:
                     config.max_reasoning_tokens = 1024
+            elif cls.is_zai_reasoning_model(config):
+                config.put_inner_thoughts_in_kwargs = False
+            elif cls.is_openrouter_reasoning_model(config):
+                config.put_inner_thoughts_in_kwargs = False
             elif cls.is_openai_reasoning_model(config):
                 config.put_inner_thoughts_in_kwargs = False
                 if config.reasoning_effort is None:
diff --git a/letta/schemas/model.py b/letta/schemas/model.py
index f5d5fdac..ea855206 100644
--- a/letta/schemas/model.py
+++ b/letta/schemas/model.py
@@ -374,12 +374,22 @@ class XAIModelSettings(ModelSettings):
         }
 
 
+class ZAIThinking(BaseModel):
+    """Thinking configuration for ZAI GLM-4.5+ models."""
+
+    type: Literal["enabled", "disabled"] = Field("enabled", description="Whether thinking is enabled or disabled.")
+    clear_thinking: bool = Field(False, description="If False, preserved thinking is used (recommended for agents).")
+
+
 class ZAIModelSettings(ModelSettings):
     """Z.ai (ZhipuAI) model configuration (OpenAI-compatible)."""
 
     provider_type: Literal[ProviderType.zai] = Field(ProviderType.zai, description="The type of the provider.")
     temperature: float = Field(0.7, description="The temperature of the model.")
     response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
+    thinking: ZAIThinking = Field(
+        ZAIThinking(type="enabled", clear_thinking=False), description="The thinking configuration for GLM-4.5+ models."
+    )
 
     def _to_legacy_config_params(self) -> dict:
         return {
@@ -388,6 +398,7 @@ class ZAIModelSettings(ModelSettings):
             "response_format": self.response_format,
             "parallel_tool_calls": self.parallel_tool_calls,
             "strict": False,  # ZAI does not support strict mode
+            "extended_thinking": self.thinking.type == "enabled",
         }
 
 
diff --git a/letta/services/agent_manager.py b/letta/services/agent_manager.py
index 56b6a62f..8a9192dc 100644
--- a/letta/services/agent_manager.py
+++ b/letta/services/agent_manager.py
@@ -351,9 +351,11 @@ class AgentManager:
 
         # For v1 agents, enforce sane defaults even when reasoning is omitted
         if agent_create.agent_type == AgentType.letta_v1_agent:
-            # Claude 3.7/4 or OpenAI o1/o3/o4/gpt-5
-            default_reasoning = LLMConfig.is_anthropic_reasoning_model(agent_create.llm_config) or LLMConfig.is_openai_reasoning_model(
-                agent_create.llm_config
+            # Claude 3.7/4 or OpenAI o1/o3/o4/gpt-5 or ZAI GLM-4.5+
+            default_reasoning = (
+                LLMConfig.is_anthropic_reasoning_model(agent_create.llm_config)
+                or LLMConfig.is_openai_reasoning_model(agent_create.llm_config)
+                or LLMConfig.is_zai_reasoning_model(agent_create.llm_config)
             )
             agent_create.llm_config = LLMConfig.apply_reasoning_setting_to_config(
                 agent_create.llm_config,
diff --git a/tests/model_settings/zai-glm-4.6.json b/tests/model_settings/zai-glm-4.6.json
index 00ca14c6..e0a2e2f2 100644
--- a/tests/model_settings/zai-glm-4.6.json
+++ b/tests/model_settings/zai-glm-4.6.json
@@ -4,6 +4,10 @@
     "provider_type": "zai",
     "temperature": 1.0,
     "max_output_tokens": 4096,
-    "parallel_tool_calls": false
+    "parallel_tool_calls": false,
+    "thinking": {
+      "type": "enabled",
+      "clear_thinking": false
+    }
   }
 }