diff --git a/letta/constants.py b/letta/constants.py
index ef4eafc2..1a59137e 100644
--- a/letta/constants.py
+++ b/letta/constants.py
@@ -27,6 +27,7 @@ TOOL_CALL_ID_MAX_LEN = 29
 
 # minimum context window size
 MIN_CONTEXT_WINDOW = 4096
+DEFAULT_CONTEXT_WINDOW_SIZE = 32000
 
 # embeddings
 MAX_EMBEDDING_DIM = 4096  # maximum supported embeding size - do NOT change or else DBs will need to be reset
diff --git a/letta/llm_api/google_vertex.py b/letta/llm_api/google_vertex.py
index 7b551af2..9530211f 100644
--- a/letta/llm_api/google_vertex.py
+++ b/letta/llm_api/google_vertex.py
@@ -1,7 +1,5 @@
 import uuid
-from typing import List, Optional, Tuple
-
-import requests
+from typing import List, Optional
 
 from letta.constants import NON_USER_MSG_PREFIX
 from letta.local_llm.json_parser import clean_json_string_extra_backslash
diff --git a/letta/server/server.py b/letta/server/server.py
index 5c32182a..ac889f55 100644
--- a/letta/server/server.py
+++ b/letta/server/server.py
@@ -1112,6 +1112,8 @@ class SyncServer(Server):
             if context_window_limit > llm_config.context_window:
                 raise ValueError(f"Context window limit ({context_window_limit}) is greater than maximum of ({llm_config.context_window})")
             llm_config.context_window = context_window_limit
+        else:
+            llm_config.context_window = min(llm_config.context_window, constants.DEFAULT_CONTEXT_WINDOW_SIZE)
 
         return llm_config