diff --git a/letta/llm_api/llm_api_tools.py b/letta/llm_api/llm_api_tools.py
index 7a778cda..d86abc9b 100644
--- a/letta/llm_api/llm_api_tools.py
+++ b/letta/llm_api/llm_api_tools.py
@@ -215,6 +215,9 @@ def create(
                 chat_completion_request=data,
                 stream_interface=stream_interface,
                 name=name,
+                # NOTE: needs to be true for OpenAI proxies that use the `reasoning_content` field
+                # For example, DeepSeek, or LM Studio
+                expect_reasoning_content=False,
             )
         else:  # Client did not request token streaming (expect a blocking backend response)
             data.stream = False
@@ -272,6 +275,9 @@ def create(
                 chat_completion_request=data,
                 stream_interface=stream_interface,
                 name=name,
+                # TODO turn on to support reasoning content from xAI reasoners:
+                # https://docs.x.ai/docs/guides/reasoning#reasoning
+                expect_reasoning_content=False,
             )
         else:  # Client did not request token streaming (expect a blocking backend response)
             data.stream = False
@@ -486,7 +492,10 @@ def create(
         if stream:
             raise NotImplementedError(f"Streaming not yet implemented for TogetherAI (via the /completions endpoint).")
 
-        if model_settings.together_api_key is None and llm_config.model_endpoint == "https://api.together.ai/v1/completions":
+        if model_settings.together_api_key is None and (
+            llm_config.model_endpoint == "https://api.together.ai/v1/completions"
+            or llm_config.model_endpoint == "https://api.together.xyz/v1/completions"
+        ):
             raise LettaConfigurationError(message="TogetherAI key is missing from letta config file", missing_fields=["together_api_key"])
 
         return get_chat_completion(
@@ -560,6 +569,8 @@ def create(
                 chat_completion_request=data,
                 stream_interface=stream_interface,
                 name=name,
+                # TODO should we toggle for R1 vs V3?
+                expect_reasoning_content=True,
             )
         else:  # Client did not request token streaming (expect a blocking backend response)
             data.stream = False
diff --git a/letta/llm_api/openai.py b/letta/llm_api/openai.py
index 2fe8ade3..6a0f182b 100644
--- a/letta/llm_api/openai.py
+++ b/letta/llm_api/openai.py
@@ -8,7 +8,13 @@ from letta.constants import LETTA_MODEL_ENDPOINT
 from letta.errors import ErrorCode, LLMAuthenticationError, LLMError
 from letta.helpers.datetime_helpers import timestamp_to_datetime
 from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_structured_output, make_post_request
-from letta.llm_api.openai_client import accepts_developer_role, supports_parallel_tool_calling, supports_temperature_param
+from letta.llm_api.openai_client import (
+    accepts_developer_role,
+    requires_auto_tool_choice,
+    supports_parallel_tool_calling,
+    supports_structured_output,
+    supports_temperature_param,
+)
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
 from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
 from letta.log import get_logger
@@ -50,9 +56,7 @@ def openai_check_valid_api_key(base_url: str, api_key: Union[str, None]) -> None
         raise ValueError("No API key provided")
 
 
-def openai_get_model_list(
-    url: str, api_key: Optional[str] = None, fix_url: Optional[bool] = False, extra_params: Optional[dict] = None
-) -> dict:
+def openai_get_model_list(url: str, api_key: Optional[str] = None, fix_url: bool = False, extra_params: Optional[dict] = None) -> dict:
     """https://platform.openai.com/docs/api-reference/models/list"""
     from letta.utils import printd
 
@@ -154,7 +158,10 @@ def build_openai_chat_completions_request(
         elif function_call not in ["none", "auto", "required"]:
             tool_choice = ToolFunctionChoice(type="function", function=ToolFunctionChoiceFunctionCall(name=function_call))
         else:
-            tool_choice = function_call
+            if requires_auto_tool_choice(llm_config):
+                tool_choice = "auto"
+            else:
+                tool_choice = function_call
         data = ChatCompletionRequest(
             model=model,
             messages=openai_message_list,
@@ -197,12 +204,13 @@ def build_openai_chat_completions_request(
     if use_structured_output and data.tools is not None and len(data.tools) > 0:
         # Convert to structured output style (which has 'strict' and no optionals)
         for tool in data.tools:
-            try:
-                # tool["function"] = convert_to_structured_output(tool["function"])
-                structured_output_version = convert_to_structured_output(tool.function.model_dump())
-                tool.function = FunctionSchema(**structured_output_version)
-            except ValueError as e:
-                warnings.warn(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
+            if supports_structured_output(llm_config):
+                try:
+                    # tool["function"] = convert_to_structured_output(tool["function"])
+                    structured_output_version = convert_to_structured_output(tool.function.model_dump())
+                    tool.function = FunctionSchema(**structured_output_version)
+                except ValueError as e:
+                    warnings.warn(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
     return data
 
 
@@ -221,7 +229,7 @@ def openai_chat_completions_process_stream(
     expect_reasoning_content: bool = True,
     name: Optional[str] = None,
 ) -> ChatCompletionResponse:
-    """Process a streaming completion response, and return a ChatCompletionRequest at the end.
+    """Process a streaming completion response, and return a ChatCompletionResponse at the end.
 
     To "stream" the response in Letta, we want to call a streaming-compatible interface function
     on the chunks received from the OpenAI-compatible server POST SSE response.
@@ -293,6 +301,9 @@ def openai_chat_completions_process_stream(
             url=url, api_key=api_key, chat_completion_request=chat_completion_request
         ):
             assert isinstance(chat_completion_chunk, ChatCompletionChunkResponse), type(chat_completion_chunk)
+            if chat_completion_chunk.choices is None or len(chat_completion_chunk.choices) == 0:
+                warnings.warn(f"No choices in chunk: {chat_completion_chunk}")
+                continue
 
             # NOTE: this assumes that the tool call ID will only appear in one of the chunks during the stream
             if override_tool_call_id:
@@ -429,6 +440,9 @@ def openai_chat_completions_process_stream(
     except Exception as e:
         if stream_interface:
             stream_interface.stream_end()
+        import traceback
+
+        traceback.print_exc()
         logger.error(f"Parsing ChatCompletion stream failed with error:\n{str(e)}")
         raise e
     finally:
@@ -463,14 +477,27 @@ def openai_chat_completions_request_stream(
     url: str,
     api_key: str,
     chat_completion_request: ChatCompletionRequest,
+    fix_url: bool = False,
 ) -> Generator[ChatCompletionChunkResponse, None, None]:
+
+    # In some cases we may want to double-check the URL and do basic correction, eg:
+    # In Letta config the address for vLLM is w/o a /v1 suffix for simplicity
+    # However if we're treating the server as an OpenAI proxy we want the /v1 suffix on our model hit
+    if fix_url:
+        if not url.endswith("/v1"):
+            url = smart_urljoin(url, "v1")
+
     data = prepare_openai_payload(chat_completion_request)
     data["stream"] = True
     client = OpenAI(api_key=api_key, base_url=url, max_retries=0)
-    stream = client.chat.completions.create(**data)
-    for chunk in stream:
-        # TODO: Use the native OpenAI objects here?
-        yield ChatCompletionChunkResponse(**chunk.model_dump(exclude_none=True))
+    try:
+        stream = client.chat.completions.create(**data)
+        for chunk in stream:
+            # TODO: Use the native OpenAI objects here?
+            yield ChatCompletionChunkResponse(**chunk.model_dump(exclude_none=True))
+    except Exception as e:
+        print(f"Error request stream from /v1/chat/completions, url={url}, data={data}:\n{e}")
+        raise e
 
 
 def openai_chat_completions_request(
diff --git a/letta/llm_api/openai_client.py b/letta/llm_api/openai_client.py
index c641f5e1..dc6804d1 100644
--- a/letta/llm_api/openai_client.py
+++ b/letta/llm_api/openai_client.py
@@ -75,6 +75,35 @@ def supports_parallel_tool_calling(model: str) -> bool:
         return True
 
 
+# TODO move into LLMConfig as a field?
+def supports_structured_output(llm_config: LLMConfig) -> bool:
+    """Certain providers don't support structured output."""
+
+    # FIXME pretty hacky - turn off for providers we know users will use,
+    #       but also don't support structured output
+    if "nebius.com" in llm_config.model_endpoint:
+        return False
+    else:
+        return True
+
+
+# TODO move into LLMConfig as a field?
+def requires_auto_tool_choice(llm_config: LLMConfig) -> bool:
+    """Certain providers require the tool choice to be set to 'auto'."""
+
+    if "nebius.com" in llm_config.model_endpoint:
+        return True
+    # proxy also has this issue (FIXME check)
+    elif llm_config.model_endpoint == LETTA_MODEL_ENDPOINT:
+        return True
+    # same with vLLM (FIXME check)
+    elif llm_config.handle and "vllm" in llm_config.handle:
+        return True
+    else:
+        # will use "required" instead of "auto"
+        return False
+
+
 class OpenAIClient(LLMClientBase):
     def _prepare_client_kwargs(self, llm_config: LLMConfig) -> dict:
         api_key = None
@@ -136,7 +165,7 @@ class OpenAIClient(LLMClientBase):
         # TODO(matt) move into LLMConfig
         # TODO: This vllm checking is very brittle and is a patch at most
         tool_choice = None
-        if llm_config.model_endpoint == LETTA_MODEL_ENDPOINT or (llm_config.handle and "vllm" in llm_config.handle):
+        if requires_auto_tool_choice(llm_config):
             tool_choice = "auto"  # TODO change to "required" once proxy supports it
         elif tools:
             # only set if tools is non-Null
@@ -171,11 +200,12 @@ class OpenAIClient(LLMClientBase):
         if data.tools is not None and len(data.tools) > 0:
             # Convert to structured output style (which has 'strict' and no optionals)
             for tool in data.tools:
-                try:
-                    structured_output_version = convert_to_structured_output(tool.function.model_dump())
-                    tool.function = FunctionSchema(**structured_output_version)
-                except ValueError as e:
-                    logger.warning(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
+                if supports_structured_output(llm_config):
+                    try:
+                        structured_output_version = convert_to_structured_output(tool.function.model_dump())
+                        tool.function = FunctionSchema(**structured_output_version)
+                    except ValueError as e:
+                        logger.warning(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
 
         return data.model_dump(exclude_unset=True)
 
diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py
index 903d9a7e..ab024708 100644
--- a/letta/schemas/llm_config.py
+++ b/letta/schemas/llm_config.py
@@ -24,7 +24,6 @@ class LLMConfig(BaseModel):
         max_tokens (int): The maximum number of tokens to generate.
     """
 
-    # TODO: 🤮 don't default to a vendor! bug city!
     model: str = Field(..., description="LLM model name. ")
     model_endpoint_type: Literal[
         "openai",
diff --git a/letta/schemas/openai/chat_completion_response.py b/letta/schemas/openai/chat_completion_response.py
index d4332b22..44ef5cff 100644
--- a/letta/schemas/openai/chat_completion_response.py
+++ b/letta/schemas/openai/chat_completion_response.py
@@ -1,5 +1,5 @@
 import datetime
-from typing import Dict, List, Literal, Optional, Union
+from typing import List, Literal, Optional, Union
 
 from pydantic import BaseModel
 
@@ -27,6 +27,7 @@ class LogProbToken(BaseModel):
     bytes: Optional[List[int]]
 
 
+# Legacy?
 class MessageContentLogProb(BaseModel):
     token: str
     logprob: float
@@ -34,6 +35,25 @@ class MessageContentLogProb(BaseModel):
     top_logprobs: Optional[List[LogProbToken]]
 
 
+class TopLogprob(BaseModel):
+    token: str
+    bytes: Optional[List[int]] = None
+    logprob: float
+
+
+class ChatCompletionTokenLogprob(BaseModel):
+    token: str
+    bytes: Optional[List[int]] = None
+    logprob: float
+    top_logprobs: List[TopLogprob]
+
+
+class ChoiceLogprobs(BaseModel):
+    content: Optional[List[ChatCompletionTokenLogprob]] = None
+
+    refusal: Optional[List[ChatCompletionTokenLogprob]] = None
+
+
 class Message(BaseModel):
     content: Optional[str] = None
     tool_calls: Optional[List[ToolCall]] = None
@@ -49,7 +69,7 @@ class Choice(BaseModel):
     finish_reason: str
     index: int
     message: Message
-    logprobs: Optional[Dict[str, Union[List[MessageContentLogProb], None]]] = None
+    logprobs: Optional[ChoiceLogprobs] = None
     seed: Optional[int] = None  # found in TogetherAI
 
 
@@ -134,7 +154,7 @@ class ChatCompletionResponse(BaseModel):
 class FunctionCallDelta(BaseModel):
     # arguments: Optional[str] = None
     name: Optional[str] = None
-    arguments: str
+    arguments: Optional[str] = None
     # name: str
 
 
@@ -179,7 +199,7 @@ class ChunkChoice(BaseModel):
     finish_reason: Optional[str] = None  # NOTE: when streaming will be null
     index: int
     delta: MessageDelta
-    logprobs: Optional[Dict[str, Union[List[MessageContentLogProb], None]]] = None
+    logprobs: Optional[ChoiceLogprobs] = None
 
 
 class ChatCompletionChunkResponse(BaseModel):
diff --git a/letta/schemas/providers.py b/letta/schemas/providers.py
index f1e9edd6..822b291a 100644
--- a/letta/schemas/providers.py
+++ b/letta/schemas/providers.py
@@ -4,7 +4,7 @@ from typing import List, Literal, Optional
 
 from pydantic import BaseModel, Field, model_validator
 
-from letta.constants import LETTA_MODEL_ENDPOINT, LLM_MAX_TOKENS, MIN_CONTEXT_WINDOW
+from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, LETTA_MODEL_ENDPOINT, LLM_MAX_TOKENS, MIN_CONTEXT_WINDOW
 from letta.llm_api.azure_openai import get_azure_chat_completions_endpoint, get_azure_embeddings_endpoint
 from letta.llm_api.azure_openai_constants import AZURE_MODEL_TO_CONTEXT_LENGTH
 from letta.schemas.embedding_config import EmbeddingConfig
@@ -57,7 +57,7 @@ class Provider(ProviderBase):
         """String representation of the provider for display purposes"""
         raise NotImplementedError
 
-    def get_handle(self, model_name: str, is_embedding: bool = False) -> str:
+    def get_handle(self, model_name: str, is_embedding: bool = False, base_name: Optional[str] = None) -> str:
         """
         Get the handle for a model, with support for custom overrides.
 
@@ -68,11 +68,13 @@ class Provider(ProviderBase):
         Returns:
             str: The handle for the model.
         """
-        overrides = EMBEDDING_HANDLE_OVERRIDES if is_embedding else LLM_HANDLE_OVERRIDES
-        if self.name in overrides and model_name in overrides[self.name]:
-            model_name = overrides[self.name][model_name]
+        base_name = base_name if base_name else self.name
 
-        return f"{self.name}/{model_name}"
+        overrides = EMBEDDING_HANDLE_OVERRIDES if is_embedding else LLM_HANDLE_OVERRIDES
+        if base_name in overrides and model_name in overrides[base_name]:
+            model_name = overrides[base_name][model_name]
+
+        return f"{base_name}/{model_name}"
 
     def cast_to_subtype(self):
         match (self.provider_type):
@@ -162,21 +164,34 @@ class OpenAIProvider(Provider):
 
         openai_check_valid_api_key(self.base_url, self.api_key)
 
-    def list_llm_models(self) -> List[LLMConfig]:
+    def _get_models(self) -> List[dict]:
         from letta.llm_api.openai import openai_get_model_list
 
         # Some hardcoded support for OpenRouter (so that we only get models with tool calling support)...
         # See: https://openrouter.ai/docs/requests
         extra_params = {"supported_parameters": "tools"} if "openrouter.ai" in self.base_url else None
-        response = openai_get_model_list(self.base_url, api_key=self.api_key, extra_params=extra_params)
 
-        # TogetherAI's response is missing the 'data' field
-        # assert "data" in response, f"OpenAI model query response missing 'data' field: {response}"
+        # Similar to Nebius
+        extra_params = {"verbose": True} if "nebius.com" in self.base_url else None
+
+        response = openai_get_model_list(
+            self.base_url,
+            api_key=self.api_key,
+            extra_params=extra_params,
+            # fix_url=True,  # NOTE: make sure together ends with /v1
+        )
+
         if "data" in response:
             data = response["data"]
         else:
+            # TogetherAI's response is missing the 'data' field
             data = response
 
+        return data
+
+    def list_llm_models(self) -> List[LLMConfig]:
+        data = self._get_models()
+
         configs = []
         for model in data:
             assert "id" in model, f"OpenAI model missing 'id' field: {model}"
@@ -192,8 +207,8 @@ class OpenAIProvider(Provider):
                 continue
 
             # TogetherAI includes the type, which we can use to filter out embedding models
-            if self.base_url == "https://api.together.ai/v1":
-                if "type" in model and model["type"] != "chat":
+            if "api.together.ai" in self.base_url or "api.together.xyz" in self.base_url:
+                if "type" in model and model["type"] not in ["chat", "language"]:
                     continue
 
                 # for TogetherAI, we need to skip the models that don't support JSON mode / function calling
@@ -211,11 +226,25 @@ class OpenAIProvider(Provider):
                     continue
                 if model["config"]["chat_template"] is None:
                     continue
+                if model["config"]["chat_template"] is not None and "tools" not in model["config"]["chat_template"]:
+                    # NOTE: this is a hack to filter out models that don't support tool calling
+                    continue
                 if "tools" not in model["config"]["chat_template"]:
                     continue
                 # if "config" in data and "chat_template" in data["config"] and "tools" not in data["config"]["chat_template"]:
                 # continue
 
+            if "nebius.com" in self.base_url:
+                # Nebius includes the type, which we can use to filter for text models
+                try:
+                    model_type = model["architecture"]["modality"]
+                    if model_type not in ["text->text", "text+image->text"]:
+                        # print(f"Skipping model w/ modality {model_type}:\n{model}")
+                        continue
+                except KeyError:
+                    print(f"Couldn't access architecture type field, skipping model:\n{model}")
+                    continue
+
             # for openai, filter models
             if self.base_url == "https://api.openai.com/v1":
                 allowed_types = ["gpt-4", "o1", "o3"]
@@ -235,13 +264,19 @@ class OpenAIProvider(Provider):
                 if skip:
                     continue
 
+            # set the handle to openai-proxy if the base URL isn't OpenAI
+            if self.base_url != "https://api.openai.com/v1":
+                handle = self.get_handle(model_name, base_name="openai-proxy")
+            else:
+                handle = self.get_handle(model_name)
+
             configs.append(
                 LLMConfig(
                     model=model_name,
                     model_endpoint_type="openai",
                     model_endpoint=self.base_url,
                     context_window=context_window_size,
-                    handle=self.get_handle(model_name),
+                    handle=handle,
                     provider_name=self.name,
                     provider_category=self.provider_category,
                 )
@@ -256,33 +291,87 @@ class OpenAIProvider(Provider):
 
     def list_embedding_models(self) -> List[EmbeddingConfig]:
 
-        # TODO: actually automatically list models
-        return [
-            EmbeddingConfig(
-                embedding_model="text-embedding-ada-002",
-                embedding_endpoint_type="openai",
-                embedding_endpoint=self.base_url,
-                embedding_dim=1536,
-                embedding_chunk_size=300,
-                handle=self.get_handle("text-embedding-ada-002", is_embedding=True),
-            ),
-            EmbeddingConfig(
-                embedding_model="text-embedding-3-small",
-                embedding_endpoint_type="openai",
-                embedding_endpoint=self.base_url,
-                embedding_dim=2000,
-                embedding_chunk_size=300,
-                handle=self.get_handle("text-embedding-3-small", is_embedding=True),
-            ),
-            EmbeddingConfig(
-                embedding_model="text-embedding-3-large",
-                embedding_endpoint_type="openai",
-                embedding_endpoint=self.base_url,
-                embedding_dim=2000,
-                embedding_chunk_size=300,
-                handle=self.get_handle("text-embedding-3-large", is_embedding=True),
-            ),
-        ]
+        if self.base_url == "https://api.openai.com/v1":
+            # TODO: actually automatically list models for OpenAI
+            return [
+                EmbeddingConfig(
+                    embedding_model="text-embedding-ada-002",
+                    embedding_endpoint_type="openai",
+                    embedding_endpoint=self.base_url,
+                    embedding_dim=1536,
+                    embedding_chunk_size=300,
+                    handle=self.get_handle("text-embedding-ada-002", is_embedding=True),
+                ),
+                EmbeddingConfig(
+                    embedding_model="text-embedding-3-small",
+                    embedding_endpoint_type="openai",
+                    embedding_endpoint=self.base_url,
+                    embedding_dim=2000,
+                    embedding_chunk_size=300,
+                    handle=self.get_handle("text-embedding-3-small", is_embedding=True),
+                ),
+                EmbeddingConfig(
+                    embedding_model="text-embedding-3-large",
+                    embedding_endpoint_type="openai",
+                    embedding_endpoint=self.base_url,
+                    embedding_dim=2000,
+                    embedding_chunk_size=300,
+                    handle=self.get_handle("text-embedding-3-large", is_embedding=True),
+                ),
+            ]
+
+        else:
+            # Actually attempt to list
+            data = self._get_models()
+
+            configs = []
+            for model in data:
+                assert "id" in model, f"Model missing 'id' field: {model}"
+                model_name = model["id"]
+
+                if "context_length" in model:
+                    # Context length is returned in Nebius as "context_length"
+                    context_window_size = model["context_length"]
+                else:
+                    context_window_size = self.get_model_context_window_size(model_name)
+
+                # We need the context length for embeddings too
+                if not context_window_size:
+                    continue
+
+                if "nebius.com" in self.base_url:
+                    # Nebius includes the type, which we can use to filter for embedidng models
+                    try:
+                        model_type = model["architecture"]["modality"]
+                        if model_type not in ["text->embedding"]:
+                            # print(f"Skipping model w/ modality {model_type}:\n{model}")
+                            continue
+                    except KeyError:
+                        print(f"Couldn't access architecture type field, skipping model:\n{model}")
+                        continue
+
+                elif "together.ai" in self.base_url or "together.xyz" in self.base_url:
+                    # TogetherAI includes the type, which we can use to filter for embedding models
+                    if "type" in model and model["type"] not in ["embedding"]:
+                        # print(f"Skipping model w/ modality {model_type}:\n{model}")
+                        continue
+
+                else:
+                    # For other providers we should skip by default, since we don't want to assume embeddings are supported
+                    continue
+
+                configs.append(
+                    EmbeddingConfig(
+                        embedding_model=model_name,
+                        embedding_endpoint_type=self.provider_type,
+                        embedding_endpoint=self.base_url,
+                        embedding_dim=context_window_size,
+                        embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE,
+                        handle=self.get_handle(model, is_embedding=True),
+                    )
+                )
+
+            return configs
 
     def get_model_context_window_size(self, model_name: str):
         if model_name in LLM_MAX_TOKENS:
diff --git a/letta/server/rest_api/interface.py b/letta/server/rest_api/interface.py
index 9a89f907..66cc77a9 100644
--- a/letta/server/rest_api/interface.py
+++ b/letta/server/rest_api/interface.py
@@ -482,6 +482,10 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
 
         data: {"function_return": "None", "status": "success", "date": "2024-02-29T06:07:50.847262+00:00"}
         """
+        if not chunk.choices or len(chunk.choices) == 0:
+            warnings.warn(f"No choices in chunk: {chunk}")
+            return None
+
         choice = chunk.choices[0]
         message_delta = choice.delta
         otid = Message.generate_otid_from_id(message_id, message_index)
diff --git a/letta/server/server.py b/letta/server/server.py
index 5bd8a4b9..54c21ca6 100644
--- a/letta/server/server.py
+++ b/letta/server/server.py
@@ -1219,6 +1219,9 @@ class SyncServer(Server):
             try:
                 llm_models.extend(provider.list_llm_models())
             except Exception as e:
+                import traceback
+
+                traceback.print_exc()
                 warnings.warn(f"An error occurred while listing LLM models for provider {provider}: {e}")
 
         llm_models.extend(self.get_local_llm_configs())
diff --git a/letta/settings.py b/letta/settings.py
index 81551bac..1daa2e88 100644
--- a/letta/settings.py
+++ b/letta/settings.py
@@ -2,7 +2,7 @@ import os
 from pathlib import Path
 from typing import Optional
 
-from pydantic import Field
+from pydantic import AliasChoices, Field
 from pydantic_settings import BaseSettings, SettingsConfigDict
 
 from letta.local_llm.constants import DEFAULT_WRAPPER_NAME
@@ -70,7 +70,13 @@ class ModelSettings(BaseSettings):
 
     # openai
     openai_api_key: Optional[str] = None
-    openai_api_base: str = "https://api.openai.com/v1"
+    openai_api_base: str = Field(
+        default="https://api.openai.com/v1",
+        # NOTE: We previously used OPENAI_API_BASE, but this was deprecated in favor of OPENAI_BASE_URL
+        # preferred first, fallback second
+        # env=["OPENAI_BASE_URL", "OPENAI_API_BASE"],  # pydantic-settings v2
+        validation_alias=AliasChoices("OPENAI_BASE_URL", "OPENAI_API_BASE"),  # pydantic-settings v1
+    )
 
     # deepseek
     deepseek_api_key: Optional[str] = None