diff --git a/letta/functions/schema_generator.py b/letta/functions/schema_generator.py
index a300dbb2..9fe0e37b 100644
--- a/letta/functions/schema_generator.py
+++ b/letta/functions/schema_generator.py
@@ -1,5 +1,4 @@
 import inspect
-import warnings
 from typing import Any, Dict, List, Optional, Tuple, Type, Union, get_args, get_origin
 
 from docstring_parser import parse
@@ -101,7 +100,7 @@ def type_to_json_schema_type(py_type) -> dict:
         args = get_args(py_type)
         if len(args) == 0:
             # is this correct
-            warnings.warn("Defaulting to string type for untyped List")
+            logger.warning("Defaulting to string type for untyped List")
             return {
                 "type": "array",
                 "items": {"type": "string"},
diff --git a/letta/llm_api/deepseek_client.py b/letta/llm_api/deepseek_client.py
index 03137921..8099155b 100644
--- a/letta/llm_api/deepseek_client.py
+++ b/letta/llm_api/deepseek_client.py
@@ -1,7 +1,6 @@
 import json
 import os
 import re
-import warnings
 from typing import List, Optional
 
 from openai import AsyncOpenAI, AsyncStream, OpenAI
@@ -130,7 +129,7 @@ def build_deepseek_chat_completions_request(
     if llm_config.model:
         model = llm_config.model
     else:
-        warnings.warn(f"Model type not set in llm_config: {llm_config.model_dump_json(indent=4)}")
+        logger.warning(f"Model type not set in llm_config: {llm_config.model_dump_json(indent=4)}")
         model = None
     if use_tool_naming:
         if function_call is None:
diff --git a/letta/llm_api/helpers.py b/letta/llm_api/helpers.py
index 905c620c..7aa663a7 100644
--- a/letta/llm_api/helpers.py
+++ b/letta/llm_api/helpers.py
@@ -1,6 +1,5 @@
 import copy
 import json
-import warnings
 from collections import OrderedDict
 from typing import Any, List, Union
 
@@ -300,7 +299,7 @@ def unpack_inner_thoughts_from_kwargs(choice: Choice, inner_thoughts_key: str) -
 
     if message.role == "assistant" and message.tool_calls and len(message.tool_calls) >= 1:
         if len(message.tool_calls) > 1:
-            warnings.warn(f"Unpacking inner thoughts from more than one tool call ({len(message.tool_calls)}) is not supported")
+            logger.warning(f"Unpacking inner thoughts from more than one tool call ({len(message.tool_calls)}) is not supported")
         # TODO support multiple tool calls
         tool_call = message.tool_calls[0]
 
@@ -316,20 +315,20 @@ def unpack_inner_thoughts_from_kwargs(choice: Choice, inner_thoughts_key: str) -
                 new_choice.message.tool_calls[0].function.arguments = json_dumps(func_args)
                 # also replace the message content
                 if new_choice.message.content is not None:
-                    warnings.warn(f"Overwriting existing inner monologue ({new_choice.message.content}) with kwarg ({inner_thoughts})")
+                    logger.warning(f"Overwriting existing inner monologue ({new_choice.message.content}) with kwarg ({inner_thoughts})")
                 new_choice.message.content = inner_thoughts
 
                 # update the choice object
                 rewritten_choice = new_choice
             else:
-                warnings.warn(f"Did not find inner thoughts in tool call: {str(tool_call)}")
+                logger.warning(f"Did not find inner thoughts in tool call: {str(tool_call)}")
 
         except json.JSONDecodeError as e:
-            warnings.warn(f"Failed to strip inner thoughts from kwargs: {e}")
+            logger.warning(f"Failed to strip inner thoughts from kwargs: {e}")
             logger.error(f"Failed to strip inner thoughts from kwargs: {e}, Tool call arguments: {tool_call.function.arguments}")
             raise e
     else:
-        warnings.warn(f"Did not find tool call in message: {str(message)}")
+        logger.warning(f"Did not find tool call in message: {str(message)}")
 
     return rewritten_choice
 
diff --git a/letta/llm_api/openai.py b/letta/llm_api/openai.py
index a3c79e98..23787b22 100644
--- a/letta/llm_api/openai.py
+++ b/letta/llm_api/openai.py
@@ -1,4 +1,3 @@
-import warnings
 from typing import Generator, List, Optional, Union
 
 import httpx
@@ -70,9 +69,10 @@ def openai_get_model_list(url: str, api_key: Optional[str] = None, fix_url: bool
     # In Letta config the address for vLLM is w/o a /v1 suffix for simplicity
     # However if we're treating the server as an OpenAI proxy we want the /v1 suffix on our model hit
 
-    import warnings
-
-    warnings.warn("The synchronous version of openai_get_model_list function is deprecated. Use the async one instead.", DeprecationWarning)
+    logger.warning(
+        "The synchronous version of openai_get_model_list function is deprecated. Use the async one instead.",
+        stacklevel=2,
+    )
 
     if fix_url:
         if not url.endswith("/v1"):
@@ -224,7 +224,7 @@ def build_openai_chat_completions_request(
     if llm_config.model:
         model = llm_config.model
     else:
-        warnings.warn(f"Model type not set in llm_config: {llm_config.model_dump_json(indent=4)}")
+        logger.warning(f"Model type not set in llm_config: {llm_config.model_dump_json(indent=4)}")
         model = None
 
     if use_tool_naming:
@@ -285,7 +285,7 @@ def build_openai_chat_completions_request(
                     structured_output_version = convert_to_structured_output(tool.function.model_dump())
                     tool.function = FunctionSchema(**structured_output_version)
                 except ValueError as e:
-                    warnings.warn(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
+                    logger.warning(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
     return data
 
 
@@ -377,7 +377,7 @@ def openai_chat_completions_process_stream(
         ):
             assert isinstance(chat_completion_chunk, ChatCompletionChunkResponse), type(chat_completion_chunk)
             if chat_completion_chunk.choices is None or len(chat_completion_chunk.choices) == 0:
-                warnings.warn(f"No choices in chunk: {chat_completion_chunk}")
+                logger.warning(f"No choices in chunk: {chat_completion_chunk}")
                 continue
 
             # NOTE: this assumes that the tool call ID will only appear in one of the chunks during the stream
@@ -472,7 +472,7 @@ def openai_chat_completions_process_stream(
                             try:
                                 accum_message.tool_calls[tool_call_delta.index].id = tool_call_delta.id
                             except IndexError:
-                                warnings.warn(
+                                logger.warning(
                                     f"Tool call index out of range ({tool_call_delta.index})\ncurrent tool calls: {accum_message.tool_calls}\ncurrent delta: {tool_call_delta}"
                                 )
                                 # force index 0
@@ -486,14 +486,14 @@ def openai_chat_completions_process_stream(
                                         tool_call_delta.index
                                     ].function.name += tool_call_delta.function.name  # TODO check for parallel tool calls
                                 except IndexError:
-                                    warnings.warn(
+                                    logger.warning(
                                         f"Tool call index out of range ({tool_call_delta.index})\ncurrent tool calls: {accum_message.tool_calls}\ncurrent delta: {tool_call_delta}"
                                     )
                             if tool_call_delta.function.arguments is not None:
                                 try:
                                     accum_message.tool_calls[tool_call_delta.index].function.arguments += tool_call_delta.function.arguments
                                 except IndexError:
-                                    warnings.warn(
+                                    logger.warning(
                                         f"Tool call index out of range ({tool_call_delta.index})\ncurrent tool calls: {accum_message.tool_calls}\ncurrent delta: {tool_call_delta}"
                                     )
 
@@ -642,7 +642,7 @@ def prepare_openai_payload(chat_completion_request: ChatCompletionRequest):
     #         try:
     #             tool["function"] = convert_to_structured_output(tool["function"])
     #         except ValueError as e:
-    #             warnings.warn(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
+    #             logger.warning(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
 
     if not supports_parallel_tool_calling(chat_completion_request.model):
         data.pop("parallel_tool_calls", None)
diff --git a/letta/local_llm/constants.py b/letta/local_llm/constants.py
index 2b51101d..19fce8e8 100644
--- a/letta/local_llm/constants.py
+++ b/letta/local_llm/constants.py
@@ -1,9 +1,10 @@
+# Import constants from settings to avoid circular import
+# (settings.py imports from this module indirectly through log.py)
+# Import this here to avoid circular dependency at module level
 from letta.local_llm.llm_chat_completion_wrappers.chatml import ChatMLInnerMonologueWrapper
+from letta.settings import DEFAULT_WRAPPER_NAME, INNER_THOUGHTS_KWARG
 
 DEFAULT_WRAPPER = ChatMLInnerMonologueWrapper
-DEFAULT_WRAPPER_NAME = "chatml"
-
-INNER_THOUGHTS_KWARG = "thinking"
 INNER_THOUGHTS_KWARG_VERTEX = "thinking"
 VALID_INNER_THOUGHTS_KWARGS = ("thinking", "inner_thoughts")
 INNER_THOUGHTS_KWARG_DESCRIPTION = "Deep inner monologue private to you only."
diff --git a/letta/local_llm/json_parser.py b/letta/local_llm/json_parser.py
index 961c32d4..74bc6e03 100644
--- a/letta/local_llm/json_parser.py
+++ b/letta/local_llm/json_parser.py
@@ -1,6 +1,9 @@
 import json
 import re
-import warnings
+
+from letta.log import get_logger
+
+logger = get_logger(__name__)
 
 from letta.errors import LLMJSONParsingError
 from letta.helpers.json_helpers import json_loads
@@ -83,7 +86,7 @@ def clean_and_interpret_send_message_json(json_string):
 
     kwarg = model_settings.inner_thoughts_kwarg
     if kwarg not in VALID_INNER_THOUGHTS_KWARGS:
-        warnings.warn(f"INNER_THOUGHTS_KWARG is not valid: {kwarg}")
+        logger.warning(f"INNER_THOUGHTS_KWARG is not valid: {kwarg}")
         kwarg = INNER_THOUGHTS_KWARG
 
     # If normal parsing fails, attempt to clean and extract manually
diff --git a/letta/local_llm/utils.py b/letta/local_llm/utils.py
index 6027484d..be1e313e 100644
--- a/letta/local_llm/utils.py
+++ b/letta/local_llm/utils.py
@@ -1,5 +1,4 @@
 import os
-import warnings
 from typing import List, Union
 
 import requests
@@ -84,11 +83,11 @@ def num_tokens_from_functions(functions: List[dict], model: str = "gpt-4"):
         function_tokens = len(encoding.encode(function["name"]))
         if function["description"]:
             if not isinstance(function["description"], str):
-                warnings.warn(f"Function {function['name']} has non-string description: {function['description']}")
+                logger.warning(f"Function {function['name']} has non-string description: {function['description']}")
             else:
                 function_tokens += len(encoding.encode(function["description"]))
         else:
-            warnings.warn(f"Function {function['name']} has no description, function: {function}")
+            logger.warning(f"Function {function['name']} has no description, function: {function}")
 
         if "parameters" in function:
             parameters = function["parameters"]
diff --git a/letta/prompts/prompt_generator.py b/letta/prompts/prompt_generator.py
index f4e1c737..90c4a665 100644
--- a/letta/prompts/prompt_generator.py
+++ b/letta/prompts/prompt_generator.py
@@ -1,6 +1,10 @@
 from datetime import datetime
 from typing import List, Literal, Optional
 
+from letta.log import get_logger
+
+logger = get_logger(__name__)
+
 from letta.constants import IN_CONTEXT_MEMORY_KEYWORD
 from letta.helpers import ToolRulesSolver
 from letta.helpers.datetime_helpers import format_datetime, get_local_time_fast
@@ -137,7 +141,7 @@ class PromptGenerator:
             if append_icm_if_missing:
                 if memory_variable_string not in system_prompt:
                     # In this case, append it to the end to make sure memory is still injected
-                    # warnings.warn(f"{IN_CONTEXT_MEMORY_KEYWORD} variable was missing from system prompt, appending instead")
+                    # logger.warning(f"{IN_CONTEXT_MEMORY_KEYWORD} variable was missing from system prompt, appending instead")
                     system_prompt += "\n\n" + memory_variable_string
 
             # render the variables using the built-in templater
diff --git a/letta/schemas/memory.py b/letta/schemas/memory.py
index 6cb7a2ce..fec5a998 100644
--- a/letta/schemas/memory.py
+++ b/letta/schemas/memory.py
@@ -4,6 +4,10 @@ from datetime import datetime
 from io import StringIO
 from typing import TYPE_CHECKING, List, Optional, Union
 
+from letta.log import get_logger
+
+logger = get_logger(__name__)
+
 from openai.types.beta.function_tool import FunctionTool as OpenAITool
 from pydantic import BaseModel, Field, field_validator
 
@@ -319,7 +323,7 @@ class Memory(BaseModel, validate_assignment=True):
         """Deprecated: use compile() instead."""
         import warnings
 
-        warnings.warn("compile_in_thread_async is deprecated; use compile()", DeprecationWarning, stacklevel=2)
+        logger.warning("compile_in_thread_async is deprecated; use compile()", stacklevel=2)
         return self.compile(tool_usage_rules=tool_usage_rules, sources=sources, max_files_open=max_files_open, llm_config=llm_config)
 
     def list_block_labels(self) -> List[str]:
diff --git a/letta/schemas/message.py b/letta/schemas/message.py
index d6218c92..6d495417 100644
--- a/letta/schemas/message.py
+++ b/letta/schemas/message.py
@@ -1,10 +1,13 @@
 from __future__ import annotations
 
+from letta.log import get_logger
+
+logger = get_logger(__name__)
+
 import copy
 import json
 import re
 import uuid
-import warnings
 from collections import OrderedDict
 from datetime import datetime, timezone
 from enum import Enum
@@ -72,7 +75,7 @@ def add_inner_thoughts_to_tool_call(
         updated_tool_call.function.arguments = json_dumps(ordered_args)
         return updated_tool_call
     except json.JSONDecodeError as e:
-        warnings.warn(f"Failed to put inner thoughts in kwargs: {e}")
+        logger.warning(f"Failed to put inner thoughts in kwargs: {e}")
         raise e
 
 
@@ -510,7 +513,7 @@ class Message(BaseMessage):
                 )
 
             else:
-                warnings.warn(f"Unrecognized content part in assistant message: {content_part}")
+                logger.warning(f"Unrecognized content part in assistant message: {content_part}")
 
         return messages
 
@@ -1193,7 +1196,7 @@ class Message(BaseMessage):
             if bool(re.match(r"^[^\s<|\\/>]+$", self.name)):
                 openai_message["name"] = self.name
             else:
-                warnings.warn(f"Using OpenAI with invalid 'name' field (name={self.name} role={self.role}).")
+                logger.warning(f"Using OpenAI with invalid 'name' field (name={self.name} role={self.role}).")
 
         if parse_content_parts and self.content is not None:
             for content in self.content:
@@ -1260,7 +1263,7 @@ class Message(BaseMessage):
                 if bool(re.match(r"^[^\s<|\\/>]+$", self.name)):
                     user_dict["name"] = self.name
                 else:
-                    warnings.warn(f"Using OpenAI with invalid 'name' field (name={self.name} role={self.role}).")
+                    logger.warning(f"Using OpenAI with invalid 'name' field (name={self.name} role={self.role}).")
 
             message_dicts.append(user_dict)
 
@@ -1597,7 +1600,7 @@ class Message(BaseMessage):
             text_content = None
 
         if self.role != "tool" and self.name is not None:
-            warnings.warn(f"Using Google AI with non-null 'name' field (name={self.name} role={self.role}), not yet supported.")
+            logger.warning(f"Using Google AI with non-null 'name' field (name={self.name} role={self.role}), not yet supported.")
 
         if self.role == "system":
             # NOTE: Gemini API doesn't have a 'system' role, use 'user' instead
@@ -1717,7 +1720,7 @@ class Message(BaseMessage):
             assert self.tool_call_id is not None, vars(self)
 
             if self.name is None:
-                warnings.warn("Couldn't find function name on tool call, defaulting to tool ID instead.")
+                logger.warning("Couldn't find function name on tool call, defaulting to tool ID instead.")
                 function_name = self.tool_call_id
             else:
                 function_name = self.name
@@ -1750,7 +1753,7 @@ class Message(BaseMessage):
         if "parts" not in google_ai_message or not google_ai_message["parts"]:
             # If parts is empty, add a default text part
             google_ai_message["parts"] = [{"text": "empty message"}]
-            warnings.warn(
+            logger.warning(
                 f"Empty 'parts' detected in message with role '{self.role}'. Added default empty text part. Full message:\n{vars(self)}"
             )
 
diff --git a/letta/schemas/providers/anthropic.py b/letta/schemas/providers/anthropic.py
index 9ed2ce5f..8fb57cf4 100644
--- a/letta/schemas/providers/anthropic.py
+++ b/letta/schemas/providers/anthropic.py
@@ -1,6 +1,9 @@
-import warnings
 from typing import Literal
 
+from letta.log import get_logger
+
+logger = get_logger(__name__)
+
 import anthropic
 from pydantic import Field
 
@@ -149,7 +152,7 @@ class AnthropicProvider(Provider):
                     model["context_window"] = model_library[model["id"]]
                 else:
                     # On fallback, we can set 200k (generally safe), but we should warn the user
-                    warnings.warn(f"Couldn't find context window size for model {model['id']}, defaulting to 200,000")
+                    logger.warning(f"Couldn't find context window size for model {model['id']}, defaulting to 200,000")
                     model["context_window"] = 200000
 
             # Optional override: enable 1M context for Sonnet 4/4.5 when flag is set
diff --git a/letta/schemas/providers/base.py b/letta/schemas/providers/base.py
index 1de956b2..df9da662 100644
--- a/letta/schemas/providers/base.py
+++ b/letta/schemas/providers/base.py
@@ -1,5 +1,9 @@
 from datetime import datetime
 
+from letta.log import get_logger
+
+logger = get_logger(__name__)
+
 from pydantic import BaseModel, Field, model_validator
 
 from letta.schemas.embedding_config import EmbeddingConfig
@@ -90,7 +94,7 @@ class Provider(ProviderBase):
         import asyncio
         import warnings
 
-        warnings.warn("list_llm_models is deprecated, use list_llm_models_async instead", DeprecationWarning, stacklevel=2)
+        logger.warning("list_llm_models is deprecated, use list_llm_models_async instead", stacklevel=2)
 
         # Simplified asyncio handling - just use asyncio.run()
         # This works in most contexts and avoids complex event loop detection
@@ -115,7 +119,7 @@ class Provider(ProviderBase):
         import asyncio
         import warnings
 
-        warnings.warn("list_embedding_models is deprecated, use list_embedding_models_async instead", DeprecationWarning, stacklevel=2)
+        logger.warning("list_embedding_models is deprecated, use list_embedding_models_async instead", stacklevel=2)
 
         # Simplified asyncio handling - just use asyncio.run()
         # This works in most contexts and avoids complex event loop detection
diff --git a/letta/schemas/providers/cerebras.py b/letta/schemas/providers/cerebras.py
index 85ef6d1a..75adf5c9 100644
--- a/letta/schemas/providers/cerebras.py
+++ b/letta/schemas/providers/cerebras.py
@@ -1,6 +1,9 @@
-import warnings
 from typing import Literal
 
+from letta.log import get_logger
+
+logger = get_logger(__name__)
+
 from pydantic import Field
 
 from letta.schemas.enums import ProviderCategory, ProviderType
@@ -58,7 +61,7 @@ class CerebrasProvider(OpenAIProvider):
                 context_window_size = self.get_model_context_window_size(model_name)
 
             if not context_window_size:
-                warnings.warn(f"Couldn't find context window size for model {model_name}")
+                logger.warning(f"Couldn't find context window size for model {model_name}")
                 continue
 
             # Cerebras supports function calling
diff --git a/letta/schemas/providers/google_gemini.py b/letta/schemas/providers/google_gemini.py
index ba7a2021..161ac568 100644
--- a/letta/schemas/providers/google_gemini.py
+++ b/letta/schemas/providers/google_gemini.py
@@ -1,6 +1,10 @@
 import asyncio
 from typing import Literal
 
+from letta.log import get_logger
+
+logger = get_logger(__name__)
+
 from pydantic import Field
 
 from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, LLM_MAX_TOKENS
@@ -88,7 +92,7 @@ class GoogleAIProvider(Provider):
     def get_model_context_window(self, model_name: str) -> int | None:
         import warnings
 
-        warnings.warn("This is deprecated, use get_model_context_window_async when possible.", DeprecationWarning)
+        logger.warning("This is deprecated, use get_model_context_window_async when possible.")
         from letta.llm_api.google_ai_client import google_ai_get_model_context_window
 
         if model_name in LLM_MAX_TOKENS:
diff --git a/letta/schemas/providers/lmstudio.py b/letta/schemas/providers/lmstudio.py
index b04d9825..801adfb7 100644
--- a/letta/schemas/providers/lmstudio.py
+++ b/letta/schemas/providers/lmstudio.py
@@ -1,6 +1,9 @@
-import warnings
 from typing import Literal
 
+from letta.log import get_logger
+
+logger = get_logger(__name__)
+
 from pydantic import Field
 
 from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE
@@ -27,14 +30,14 @@ class LMStudioOpenAIProvider(OpenAIProvider):
         response = await openai_get_model_list_async(self.model_endpoint_url)
 
         if "data" not in response:
-            warnings.warn(f"LMStudio OpenAI model query response missing 'data' field: {response}")
+            logger.warning(f"LMStudio OpenAI model query response missing 'data' field: {response}")
             return []
 
         configs = []
         for model in response["data"]:
             model_type = model.get("type")
             if not model_type:
-                warnings.warn(f"LMStudio OpenAI model missing 'type' field: {model}")
+                logger.warning(f"LMStudio OpenAI model missing 'type' field: {model}")
                 continue
             if model_type not in ("vlm", "llm"):
                 continue
@@ -48,7 +51,7 @@ class LMStudioOpenAIProvider(OpenAIProvider):
             if "compatibility_type" in model:
                 compatibility_type = model["compatibility_type"]
             else:
-                warnings.warn(f"LMStudio OpenAI model missing 'compatibility_type' field: {model}")
+                logger.warning(f"LMStudio OpenAI model missing 'compatibility_type' field: {model}")
                 continue
 
             configs.append(
@@ -72,14 +75,14 @@ class LMStudioOpenAIProvider(OpenAIProvider):
         response = await openai_get_model_list_async(self.model_endpoint_url)
 
         if "data" not in response:
-            warnings.warn(f"LMStudio OpenAI model query response missing 'data' field: {response}")
+            logger.warning(f"LMStudio OpenAI model query response missing 'data' field: {response}")
             return []
 
         configs = []
         for model in response["data"]:
             model_type = model.get("type")
             if not model_type:
-                warnings.warn(f"LMStudio OpenAI model missing 'type' field: {model}")
+                logger.warning(f"LMStudio OpenAI model missing 'type' field: {model}")
                 continue
             if model_type not in ("embeddings"):
                 continue
diff --git a/letta/schemas/providers/together.py b/letta/schemas/providers/together.py
index 6dc7b083..1229d2bd 100644
--- a/letta/schemas/providers/together.py
+++ b/letta/schemas/providers/together.py
@@ -4,6 +4,10 @@ Note: this supports completions (deprecated by openai) and chat completions via
 
 from typing import Literal, Optional
 
+from letta.log import get_logger
+
+logger = get_logger(__name__)
+
 from pydantic import Field
 
 from letta.constants import MIN_CONTEXT_WINDOW
@@ -33,7 +37,7 @@ class TogetherProvider(OpenAIProvider):
     async def list_embedding_models_async(self) -> list[EmbeddingConfig]:
         import warnings
 
-        warnings.warn(
+        logger.warning(
             "Letta does not currently support listing embedding models for Together. Please "
             "contact support or reach out via GitHub or Discord to get support."
         )
diff --git a/letta/schemas/providers/xai.py b/letta/schemas/providers/xai.py
index ed8902ed..c199c516 100644
--- a/letta/schemas/providers/xai.py
+++ b/letta/schemas/providers/xai.py
@@ -1,6 +1,9 @@
-import warnings
 from typing import Literal
 
+from letta.log import get_logger
+
+logger = get_logger(__name__)
+
 from pydantic import Field
 
 from letta.schemas.enums import ProviderCategory, ProviderType
@@ -49,7 +52,7 @@ class XAIProvider(OpenAIProvider):
                 context_window_size = self.get_model_context_window_size(model_name)
 
             if not context_window_size:
-                warnings.warn(f"Couldn't find context window size for model {model_name}")
+                logger.warning(f"Couldn't find context window size for model {model_name}")
                 continue
 
             configs.append(
diff --git a/letta/server/rest_api/interface.py b/letta/server/rest_api/interface.py
index 78593e97..a927611a 100644
--- a/letta/server/rest_api/interface.py
+++ b/letta/server/rest_api/interface.py
@@ -1,7 +1,10 @@
 import asyncio
 import json
 import queue
-import warnings
+
+from letta.log import get_logger
+
+logger = get_logger(__name__)
 from collections import deque
 from datetime import datetime
 from typing import AsyncGenerator, Literal, Optional, Union
@@ -503,7 +506,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
         data: {"function_return": "None", "status": "success", "date": "2024-02-29T06:07:50.847262+00:00"}
         """
         if not chunk.choices or len(chunk.choices) == 0:
-            warnings.warn(f"No choices in chunk: {chunk}")
+            logger.warning(f"No choices in chunk: {chunk}")
             return None
 
         choice = chunk.choices[0]
@@ -1028,7 +1031,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                 # created=1713216662
                 # model='gpt-4o-mini-2024-07-18'
                 # object='chat.completion.chunk'
-                warnings.warn(f"Couldn't find delta in chunk: {chunk}")
+                logger.warning(f"Couldn't find delta in chunk: {chunk}")
             return None
 
         return processed_chunk
@@ -1255,7 +1258,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                     try:
                         func_args = parse_json(function_call.function.arguments)
                     except:
-                        warnings.warn(f"Failed to parse function arguments: {function_call.function.arguments}")
+                        logger.warning(f"Failed to parse function arguments: {function_call.function.arguments}")
                         func_args = {}
 
                     if (
diff --git a/letta/server/server.py b/letta/server/server.py
index 24fdbd0d..064755b3 100644
--- a/letta/server/server.py
+++ b/letta/server/server.py
@@ -2,7 +2,6 @@ import asyncio
 import json
 import os
 import traceback
-import warnings
 from abc import abstractmethod
 from datetime import datetime
 from pathlib import Path
@@ -1149,9 +1148,9 @@ class SyncServer(object):
         #                        llm_config = LLMConfig(**config_data)
         #                        llm_models.append(llm_config)
         #                except (json.JSONDecodeError, ValueError) as e:
-        #                    warnings.warn(f"Error parsing LLM config file {filename}: {e}")
+        #                    logger.warning(f"Error parsing LLM config file {filename}: {e}")
         # except Exception as e:
-        #    warnings.warn(f"Error reading LLM configs directory: {e}")
+        #    logger.warning(f"Error reading LLM configs directory: {e}")
         return llm_models
 
     def get_local_embedding_configs(self):
@@ -1169,9 +1168,9 @@ class SyncServer(object):
         #                        embedding_config = EmbeddingConfig(**config_data)
         #                        embedding_models.append(embedding_config)
         #                except (json.JSONDecodeError, ValueError) as e:
-        #                    warnings.warn(f"Error parsing embedding config file {filename}: {e}")
+        #                    logger.warning(f"Error parsing embedding config file {filename}: {e}")
         # except Exception as e:
-        #    warnings.warn(f"Error reading embedding configs directory: {e}")
+        #    logger.warning(f"Error reading embedding configs directory: {e}")
         return embedding_models
 
     def add_llm_model(self, request: LLMConfig) -> LLMConfig:
diff --git a/letta/services/agent_manager.py b/letta/services/agent_manager.py
index 27c4e5b4..ce883bae 100644
--- a/letta/services/agent_manager.py
+++ b/letta/services/agent_manager.py
@@ -1954,9 +1954,8 @@ class AgentManager:
         """
         import warnings
 
-        warnings.warn(
+        logger.warning(
             "list_passages_async is deprecated. Use query_source_passages_async or query_agent_passages_async instead.",
-            DeprecationWarning,
             stacklevel=2,
         )
 
diff --git a/letta/services/helpers/agent_manager_helper.py b/letta/services/helpers/agent_manager_helper.py
index fe5d7d05..30e429e8 100644
--- a/letta/services/helpers/agent_manager_helper.py
+++ b/letta/services/helpers/agent_manager_helper.py
@@ -2,6 +2,10 @@ import uuid
 from datetime import datetime
 from typing import List, Literal, Optional, Set
 
+from letta.log import get_logger
+
+logger = get_logger(__name__)
+
 import numpy as np
 from sqlalchemy import Select, and_, asc, desc, func, literal, nulls_last, or_, select, union_all
 from sqlalchemy.orm import noload
@@ -304,7 +308,7 @@ def compile_system_message(
         if append_icm_if_missing:
             if memory_variable_string not in system_prompt:
                 # In this case, append it to the end to make sure memory is still injected
-                # warnings.warn(f"{IN_CONTEXT_MEMORY_KEYWORD} variable was missing from system prompt, appending instead")
+                # logger.warning(f"{IN_CONTEXT_MEMORY_KEYWORD} variable was missing from system prompt, appending instead")
                 system_prompt += "\n\n" + memory_variable_string
 
         # render the variables using the built-in templater
diff --git a/letta/services/passage_manager.py b/letta/services/passage_manager.py
index c15b4e11..7d8e3ee7 100644
--- a/letta/services/passage_manager.py
+++ b/letta/services/passage_manager.py
@@ -120,12 +120,8 @@ class PassageManager:
     @trace_method
     async def get_passage_by_id_async(self, passage_id: str, actor: PydanticUser) -> Optional[PydanticPassage]:
         """DEPRECATED: Use get_agent_passage_by_id_async() or get_source_passage_by_id_async() instead."""
-        import warnings
-
-        warnings.warn(
-            "get_passage_by_id_async is deprecated. Use get_agent_passage_by_id_async() or get_source_passage_by_id_async() instead.",
-            DeprecationWarning,
-            stacklevel=2,
+        logger.warning(
+            "get_passage_by_id_async is deprecated. Use get_agent_passage_by_id_async() or get_source_passage_by_id_async() instead."
         )
 
         async with db_registry.async_session() as session:
@@ -231,13 +227,7 @@ class PassageManager:
     @trace_method
     async def create_passage_async(self, pydantic_passage: PydanticPassage, actor: PydanticUser) -> PydanticPassage:
         """DEPRECATED: Use create_agent_passage_async() or create_source_passage_async() instead."""
-        import warnings
-
-        warnings.warn(
-            "create_passage_async is deprecated. Use create_agent_passage_async() or create_source_passage_async() instead.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
+        logger.warning("create_passage_async is deprecated. Use create_agent_passage_async() or create_source_passage_async() instead.")
 
         # Common fields for both passage types
         passage = self._preprocess_passage_for_creation(pydantic_passage=pydantic_passage)
@@ -365,9 +355,8 @@ class PassageManager:
         """DEPRECATED: Use create_many_agent_passages() or create_many_source_passages() instead."""
         import warnings
 
-        warnings.warn(
+        logger.warning(
             "create_many_passages is deprecated. Use create_many_agent_passages() or create_many_source_passages() instead.",
-            DeprecationWarning,
             stacklevel=2,
         )
         return [self.create_passage(p, actor) for p in passages]
@@ -378,9 +367,8 @@ class PassageManager:
         """DEPRECATED: Use create_many_agent_passages_async() or create_many_source_passages_async() instead."""
         import warnings
 
-        warnings.warn(
+        logger.warning(
             "create_many_passages_async is deprecated. Use create_many_agent_passages_async() or create_many_source_passages_async() instead.",
-            DeprecationWarning,
             stacklevel=2,
         )
 
@@ -651,9 +639,8 @@ class PassageManager:
         """DEPRECATED: Use delete_agent_passage_by_id_async() or delete_source_passage_by_id_async() instead."""
         import warnings
 
-        warnings.warn(
+        logger.warning(
             "delete_passage_by_id_async is deprecated. Use delete_agent_passage_by_id_async() or delete_source_passage_by_id_async() instead.",
-            DeprecationWarning,
             stacklevel=2,
         )
 
@@ -765,9 +752,8 @@ class PassageManager:
         """DEPRECATED: Use delete_agent_passages() or delete_source_passages() instead."""
         import warnings
 
-        warnings.warn(
+        logger.warning(
             "delete_passages is deprecated. Use delete_agent_passages() or delete_source_passages() instead.",
-            DeprecationWarning,
             stacklevel=2,
         )
         # TODO: This is very inefficient
@@ -787,7 +773,7 @@ class PassageManager:
         """DEPRECATED: Use agent_passage_size() instead (this only counted agent passages anyway)."""
         import warnings
 
-        warnings.warn("size is deprecated. Use agent_passage_size() instead.", DeprecationWarning, stacklevel=2)
+        logger.warning("size is deprecated. Use agent_passage_size() instead.", stacklevel=2)
         return self.agent_passage_size(actor=actor, agent_id=agent_id)
 
     @enforce_types
diff --git a/letta/services/tool_manager.py b/letta/services/tool_manager.py
index 7df4c9c0..264189b2 100644
--- a/letta/services/tool_manager.py
+++ b/letta/services/tool_manager.py
@@ -1,5 +1,4 @@
 import importlib
-import warnings
 from typing import List, Optional, Set, Union
 
 from sqlalchemy import and_, func, or_, select
@@ -636,7 +635,7 @@ class ToolManager:
                 module = importlib.import_module(module_name)
                 functions_to_schema.update(load_function_set(module))
             except ValueError as e:
-                warnings.warn(f"Error loading function set '{module_name}': {e}")
+                logger.warning(f"Error loading function set '{module_name}': {e}")
             except Exception as e:
                 raise e
 
diff --git a/letta/settings.py b/letta/settings.py
index ca658df8..0b02d0dd 100644
--- a/letta/settings.py
+++ b/letta/settings.py
@@ -6,10 +6,13 @@ from typing import Optional
 from pydantic import AliasChoices, Field
 from pydantic_settings import BaseSettings, SettingsConfigDict
 
-from letta.local_llm.constants import DEFAULT_WRAPPER_NAME, INNER_THOUGHTS_KWARG
 from letta.schemas.enums import SandboxType
 from letta.services.summarizer.enums import SummarizationMode
 
+# Define constants here to avoid circular import with letta.log
+DEFAULT_WRAPPER_NAME = "chatml"
+INNER_THOUGHTS_KWARG = "thinking"
+
 
 class ToolSettings(BaseSettings):
     # Sandbox Configurations
diff --git a/letta/system.py b/letta/system.py
index 59ddc321..dfbf5b28 100644
--- a/letta/system.py
+++ b/letta/system.py
@@ -1,7 +1,10 @@
 import json
-import warnings
 from typing import Optional
 
+from letta.log import get_logger
+
+logger = get_logger(__name__)
+
 from .constants import (
     INITIAL_BOOT_MESSAGE,
     INITIAL_BOOT_MESSAGE_SEND_MESSAGE_FIRST_MSG,
@@ -160,7 +163,7 @@ def package_system_message(system_message, timezone, message_type="system_alert"
     try:
         message_json = json.loads(system_message)
         if "type" in message_json and message_json["type"] == message_type:
-            warnings.warn(f"Attempted to pack a system message that is already packed. Not packing: '{system_message}'")
+            logger.warning(f"Attempted to pack a system message that is already packed. Not packing: '{system_message}'")
             return system_message
     except:
         pass  # do nothing, expected behavior that the message is not JSON
@@ -251,7 +254,7 @@ def unpack_message(packed_message: str) -> str:
         if "type" in message_json and message_json["type"] in ["login", "heartbeat"]:
             # This is a valid user message that the ADE expects, so don't print warning
             return packed_message
-        warnings.warn(f"Was unable to find 'message' field in packed message object: '{packed_message}'")
+        logger.warning(f"Was unable to find 'message' field in packed message object: '{packed_message}'")
         return packed_message
     else:
         try:
@@ -260,6 +263,6 @@ def unpack_message(packed_message: str) -> str:
             return packed_message
 
         if message_type != "user_message":
-            warnings.warn(f"Expected type to be 'user_message', but was '{message_type}', so not unpacking: '{packed_message}'")
+            logger.warning(f"Expected type to be 'user_message', but was '{message_type}', so not unpacking: '{packed_message}'")
             return packed_message
         return message_json.get("message")