diff --git a/letta/constants.py b/letta/constants.py
index 468afa4c..b6fcb973 100644
--- a/letta/constants.py
+++ b/letta/constants.py
@@ -27,7 +27,6 @@ TOOL_CALL_ID_MAX_LEN = 29
 
 # minimum context window size
 MIN_CONTEXT_WINDOW = 4096
-DEFAULT_CONTEXT_WINDOW_SIZE = 32000
 
 # embeddings
 MAX_EMBEDDING_DIM = 4096  # maximum supported embeding size - do NOT change or else DBs will need to be reset
diff --git a/letta/server/server.py b/letta/server/server.py
index 2fe0b5d5..d4687314 100644
--- a/letta/server/server.py
+++ b/letta/server/server.py
@@ -983,6 +983,10 @@ class SyncServer(Server):
                 warnings.warn(f"An error occurred while listing LLM models for provider {provider}: {e}")
 
         llm_models.extend(self.get_local_llm_configs())
+
+        # respect global maximum
+        for llm_config in llm_models:
+            llm_config.context_window = min(llm_config.context_window, model_settings.global_max_context_window_limit)
         return llm_models
 
     def list_embedding_models(self) -> List[EmbeddingConfig]:
@@ -1028,7 +1032,7 @@ class SyncServer(Server):
                 raise ValueError(f"Context window limit ({context_window_limit}) is greater than maximum of ({llm_config.context_window})")
             llm_config.context_window = context_window_limit
         else:
-            llm_config.context_window = min(llm_config.context_window, constants.DEFAULT_CONTEXT_WINDOW_SIZE)
+            llm_config.context_window = min(llm_config.context_window, model_settings.global_max_context_window_limit)
 
         return llm_config
 
diff --git a/letta/settings.py b/letta/settings.py
index d69c0777..2a214846 100644
--- a/letta/settings.py
+++ b/letta/settings.py
@@ -50,6 +50,8 @@ class ModelSettings(BaseSettings):
 
     model_config = SettingsConfigDict(env_file=".env", extra="ignore")
 
+    global_max_context_window_limit: int = 32000
+
     # env_prefix='my_prefix_'
 
     # when we use /completions APIs (instead of /chat/completions), we need to specify a model wrapper