diff --git a/letta/schemas/providers/azure.py b/letta/schemas/providers/azure.py
index 3264c7fd..f17b91a8 100644
--- a/letta/schemas/providers/azure.py
+++ b/letta/schemas/providers/azure.py
@@ -106,8 +106,10 @@ class AzureProvider(Provider):
         return list(latest_models.values())
 
     async def list_llm_models_async(self) -> list[LLMConfig]:
-        # TODO (cliandy): asyncify
-        model_list = self.azure_openai_get_deployed_model_list()
+        # Run blocking model list fetch in thread pool to avoid blocking event loop
+        import asyncio
+
+        model_list = await asyncio.to_thread(self.azure_openai_get_deployed_model_list)
         # Extract models that support text generation
         model_options = [m for m in model_list if m.get("capabilities").get("chat_completion") == True]
 
diff --git a/letta/server/rest_api/app.py b/letta/server/rest_api/app.py
index 459a07c3..84dda5d5 100644
--- a/letta/server/rest_api/app.py
+++ b/letta/server/rest_api/app.py
@@ -163,6 +163,18 @@ async def lifespan(app_: FastAPI):
     except Exception as e:
         logger.warning(f"[Worker {worker_id}] Failed to start watchdog: {e}")
 
+    # Pre-download NLTK data to avoid blocking during requests (fallback if Docker build failed)
+    try:
+        import asyncio
+
+        import nltk
+
+        logger.info(f"[Worker {worker_id}] Checking NLTK data availability...")
+        await asyncio.to_thread(nltk.download, "punkt_tab", quiet=True)
+        logger.info(f"[Worker {worker_id}] NLTK data ready")
+    except Exception as e:
+        logger.warning(f"[Worker {worker_id}] Failed to download NLTK data: {e}")
+
     if telemetry_settings.profiler:
         try:
             import googlecloudprofiler