diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py
index 94f294b5..7d3e32a1 100644
--- a/letta/schemas/llm_config.py
+++ b/letta/schemas/llm_config.py
@@ -1,3 +1,4 @@
+import re
 from typing import TYPE_CHECKING, Literal, Optional
 
 from pydantic import BaseModel, ConfigDict, Field, model_validator
@@ -139,7 +140,9 @@ class LLMConfig(BaseModel):
 
         # Set max_tokens defaults based on model (only if not explicitly provided)
         if "max_tokens" not in values:
-            if model.startswith("gpt-5"):  # Covers both gpt-5 and gpt-5.1
+            if re.match(r"^gpt-5\.[23]", model) and "-chat" not in model:
+                values["max_tokens"] = 128000
+            elif model.startswith("gpt-5"):
                 values["max_tokens"] = 16384
             elif model == "gpt-4.1":
                 values["max_tokens"] = 8192
@@ -299,7 +302,7 @@ class LLMConfig(BaseModel):
                 context_window=272000,
                 reasoning_effort="none",  # Default to "none" for GPT-5.2
                 verbosity="medium",
-                max_tokens=16384,
+                max_tokens=128000,
             )
         elif model_name == "letta":
             return cls(
diff --git a/letta/schemas/providers/openai.py b/letta/schemas/providers/openai.py
index c4c979e4..c5431596 100644
--- a/letta/schemas/providers/openai.py
+++ b/letta/schemas/providers/openai.py
@@ -50,10 +50,22 @@ class OpenAIProvider(Provider):
         except Exception as e:
             raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR)
 
+    @staticmethod
+    def _openai_default_max_output_tokens(model_name: str) -> int:
+        """Return a sensible max-output-tokens default for OpenAI models.
+
+        gpt-5.2* / gpt-5.3* support 128k output tokens, except the
+        `-chat` variants which are capped at 16k.
+        """
+        import re
+
+        if re.match(r"^gpt-5\.[23]", model_name) and "-chat" not in model_name:
+            return 128000
+        return 16384
+
     def get_default_max_output_tokens(self, model_name: str) -> int:
         """Get the default max output tokens for OpenAI models (sync fallback)."""
-        # Simple default for openai
-        return 16384
+        return self._openai_default_max_output_tokens(model_name)
 
     async def get_default_max_output_tokens_async(self, model_name: str) -> int:
         """Get the default max output tokens for OpenAI models.
@@ -67,8 +79,7 @@ class OpenAIProvider(Provider):
         if max_output is not None:
             return max_output
 
-        # Simple default for openai
-        return 16384
+        return self._openai_default_max_output_tokens(model_name)
 
     async def _get_models_async(self) -> list[dict]:
         from letta.llm_api.openai import openai_get_model_list_async
diff --git a/letta/server/server.py b/letta/server/server.py
index 33c98482..06ca99e6 100644
--- a/letta/server/server.py
+++ b/letta/server/server.py
@@ -675,6 +675,12 @@ class SyncServer(object):
                 # Get the current agent's llm_config if not already set
                 agent = await self.agent_manager.get_agent_by_id_async(agent_id=agent_id, actor=actor)
                 request.llm_config = agent.llm_config.model_copy()
+            else:
+                # TODO: Refactor update_agent to accept partial llm_config so we
+                # don't need to fetch the full agent just to preserve max_tokens.
+                if request.max_tokens is None and "max_output_tokens" not in request.model_settings.model_fields_set:
+                    agent = await self.agent_manager.get_agent_by_id_async(agent_id=agent_id, actor=actor)
+                    request.llm_config.max_tokens = agent.llm_config.max_tokens
             update_llm_config_params = request.model_settings._to_legacy_config_params()
             # Don't clobber max_tokens with the Pydantic default when the caller
             # didn't explicitly provide max_output_tokens in the request.