chore: refactor not to use warnings.warn (#5730)

* refactor not to use warnings.warn * temp circular import fix maybe unecessary/bnad * fix Deprecation warning * fix deprecation warning and mcp thing? * revert changes to mcp server test * fix deprecation warning
2025-10-23 22:05:37 -07:00
parent c4c9d12f42
commit 704d3b2d79
25 changed files with 127 additions and 96 deletions
--- a/letta/functions/schema_generator.py
+++ b/letta/functions/schema_generator.py
@@ -1,5 +1,4 @@
 import inspect
-import warnings
 from typing import Any, Dict, List, Optional, Tuple, Type, Union, get_args, get_origin

 from docstring_parser import parse
@@ -101,7 +100,7 @@ def type_to_json_schema_type(py_type) -> dict:
        args = get_args(py_type)
        if len(args) == 0:
            # is this correct
-            warnings.warn("Defaulting to string type for untyped List")
+            logger.warning("Defaulting to string type for untyped List")
            return {
                "type": "array",
                "items": {"type": "string"},
--- a/letta/llm_api/deepseek_client.py
+++ b/letta/llm_api/deepseek_client.py
@@ -1,7 +1,6 @@
 import json
 import os
 import re
-import warnings
 from typing import List, Optional

 from openai import AsyncOpenAI, AsyncStream, OpenAI
@@ -130,7 +129,7 @@ def build_deepseek_chat_completions_request(
    if llm_config.model:
        model = llm_config.model
    else:
-        warnings.warn(f"Model type not set in llm_config: {llm_config.model_dump_json(indent=4)}")
+        logger.warning(f"Model type not set in llm_config: {llm_config.model_dump_json(indent=4)}")
        model = None
    if use_tool_naming:
        if function_call is None:
--- a/letta/llm_api/helpers.py
+++ b/letta/llm_api/helpers.py
@@ -1,6 +1,5 @@
 import copy
 import json
-import warnings
 from collections import OrderedDict
 from typing import Any, List, Union

@@ -300,7 +299,7 @@ def unpack_inner_thoughts_from_kwargs(choice: Choice, inner_thoughts_key: str) -

    if message.role == "assistant" and message.tool_calls and len(message.tool_calls) >= 1:
        if len(message.tool_calls) > 1:
-            warnings.warn(f"Unpacking inner thoughts from more than one tool call ({len(message.tool_calls)}) is not supported")
+            logger.warning(f"Unpacking inner thoughts from more than one tool call ({len(message.tool_calls)}) is not supported")
        # TODO support multiple tool calls
        tool_call = message.tool_calls[0]

@@ -316,20 +315,20 @@ def unpack_inner_thoughts_from_kwargs(choice: Choice, inner_thoughts_key: str) -
                new_choice.message.tool_calls[0].function.arguments = json_dumps(func_args)
                # also replace the message content
                if new_choice.message.content is not None:
-                    warnings.warn(f"Overwriting existing inner monologue ({new_choice.message.content}) with kwarg ({inner_thoughts})")
+                    logger.warning(f"Overwriting existing inner monologue ({new_choice.message.content}) with kwarg ({inner_thoughts})")
                new_choice.message.content = inner_thoughts

                # update the choice object
                rewritten_choice = new_choice
            else:
-                warnings.warn(f"Did not find inner thoughts in tool call: {str(tool_call)}")
+                logger.warning(f"Did not find inner thoughts in tool call: {str(tool_call)}")

        except json.JSONDecodeError as e:
-            warnings.warn(f"Failed to strip inner thoughts from kwargs: {e}")
+            logger.warning(f"Failed to strip inner thoughts from kwargs: {e}")
            logger.error(f"Failed to strip inner thoughts from kwargs: {e}, Tool call arguments: {tool_call.function.arguments}")
            raise e
    else:
-        warnings.warn(f"Did not find tool call in message: {str(message)}")
+        logger.warning(f"Did not find tool call in message: {str(message)}")

    return rewritten_choice

--- a/letta/llm_api/openai.py
+++ b/letta/llm_api/openai.py
@@ -1,4 +1,3 @@
-import warnings
 from typing import Generator, List, Optional, Union

 import httpx
@@ -70,9 +69,10 @@ def openai_get_model_list(url: str, api_key: Optional[str] = None, fix_url: bool
    # In Letta config the address for vLLM is w/o a /v1 suffix for simplicity
    # However if we're treating the server as an OpenAI proxy we want the /v1 suffix on our model hit

-    import warnings
-
-    warnings.warn("The synchronous version of openai_get_model_list function is deprecated. Use the async one instead.", DeprecationWarning)
+    logger.warning(
+        "The synchronous version of openai_get_model_list function is deprecated. Use the async one instead.",
+        stacklevel=2,
+    )

    if fix_url:
        if not url.endswith("/v1"):
@@ -224,7 +224,7 @@ def build_openai_chat_completions_request(
    if llm_config.model:
        model = llm_config.model
    else:
-        warnings.warn(f"Model type not set in llm_config: {llm_config.model_dump_json(indent=4)}")
+        logger.warning(f"Model type not set in llm_config: {llm_config.model_dump_json(indent=4)}")
        model = None

    if use_tool_naming:
@@ -285,7 +285,7 @@ def build_openai_chat_completions_request(
                    structured_output_version = convert_to_structured_output(tool.function.model_dump())
                    tool.function = FunctionSchema(**structured_output_version)
                except ValueError as e:
-                    warnings.warn(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
+                    logger.warning(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
    return data


@@ -377,7 +377,7 @@ def openai_chat_completions_process_stream(
        ):
            assert isinstance(chat_completion_chunk, ChatCompletionChunkResponse), type(chat_completion_chunk)
            if chat_completion_chunk.choices is None or len(chat_completion_chunk.choices) == 0:
-                warnings.warn(f"No choices in chunk: {chat_completion_chunk}")
+                logger.warning(f"No choices in chunk: {chat_completion_chunk}")
                continue

            # NOTE: this assumes that the tool call ID will only appear in one of the chunks during the stream
@@ -472,7 +472,7 @@ def openai_chat_completions_process_stream(
                            try:
                                accum_message.tool_calls[tool_call_delta.index].id = tool_call_delta.id
                            except IndexError:
-                                warnings.warn(
+                                logger.warning(
                                    f"Tool call index out of range ({tool_call_delta.index})\ncurrent tool calls: {accum_message.tool_calls}\ncurrent delta: {tool_call_delta}"
                                )
                                # force index 0
@@ -486,14 +486,14 @@ def openai_chat_completions_process_stream(
                                        tool_call_delta.index
                                    ].function.name += tool_call_delta.function.name  # TODO check for parallel tool calls
                                except IndexError:
-                                    warnings.warn(
+                                    logger.warning(
                                        f"Tool call index out of range ({tool_call_delta.index})\ncurrent tool calls: {accum_message.tool_calls}\ncurrent delta: {tool_call_delta}"
                                    )
                            if tool_call_delta.function.arguments is not None:
                                try:
                                    accum_message.tool_calls[tool_call_delta.index].function.arguments += tool_call_delta.function.arguments
                                except IndexError:
-                                    warnings.warn(
+                                    logger.warning(
                                        f"Tool call index out of range ({tool_call_delta.index})\ncurrent tool calls: {accum_message.tool_calls}\ncurrent delta: {tool_call_delta}"
                                    )

@@ -642,7 +642,7 @@ def prepare_openai_payload(chat_completion_request: ChatCompletionRequest):
    #         try:
    #             tool["function"] = convert_to_structured_output(tool["function"])
    #         except ValueError as e:
-    #             warnings.warn(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
+    #             logger.warning(f"Failed to convert tool function to structured output, tool={tool}, error={e}")

    if not supports_parallel_tool_calling(chat_completion_request.model):
        data.pop("parallel_tool_calls", None)
--- a/letta/local_llm/constants.py
+++ b/letta/local_llm/constants.py
@@ -1,9 +1,10 @@
+# Import constants from settings to avoid circular import
+# (settings.py imports from this module indirectly through log.py)
+# Import this here to avoid circular dependency at module level
 from letta.local_llm.llm_chat_completion_wrappers.chatml import ChatMLInnerMonologueWrapper
+from letta.settings import DEFAULT_WRAPPER_NAME, INNER_THOUGHTS_KWARG

 DEFAULT_WRAPPER = ChatMLInnerMonologueWrapper
-DEFAULT_WRAPPER_NAME = "chatml"
-
-INNER_THOUGHTS_KWARG = "thinking"
 INNER_THOUGHTS_KWARG_VERTEX = "thinking"
 VALID_INNER_THOUGHTS_KWARGS = ("thinking", "inner_thoughts")
 INNER_THOUGHTS_KWARG_DESCRIPTION = "Deep inner monologue private to you only."
--- a/letta/local_llm/json_parser.py
+++ b/letta/local_llm/json_parser.py
@@ -1,6 +1,9 @@
 import json
 import re
-import warnings
+
+from letta.log import get_logger
+
+logger = get_logger(__name__)

 from letta.errors import LLMJSONParsingError
 from letta.helpers.json_helpers import json_loads
@@ -83,7 +86,7 @@ def clean_and_interpret_send_message_json(json_string):

    kwarg = model_settings.inner_thoughts_kwarg
    if kwarg not in VALID_INNER_THOUGHTS_KWARGS:
-        warnings.warn(f"INNER_THOUGHTS_KWARG is not valid: {kwarg}")
+        logger.warning(f"INNER_THOUGHTS_KWARG is not valid: {kwarg}")
        kwarg = INNER_THOUGHTS_KWARG

    # If normal parsing fails, attempt to clean and extract manually
--- a/letta/local_llm/utils.py
+++ b/letta/local_llm/utils.py
@@ -1,5 +1,4 @@
 import os
-import warnings
 from typing import List, Union

 import requests
@@ -84,11 +83,11 @@ def num_tokens_from_functions(functions: List[dict], model: str = "gpt-4"):
        function_tokens = len(encoding.encode(function["name"]))
        if function["description"]:
            if not isinstance(function["description"], str):
-                warnings.warn(f"Function {function['name']} has non-string description: {function['description']}")
+                logger.warning(f"Function {function['name']} has non-string description: {function['description']}")
            else:
                function_tokens += len(encoding.encode(function["description"]))
        else:
-            warnings.warn(f"Function {function['name']} has no description, function: {function}")
+            logger.warning(f"Function {function['name']} has no description, function: {function}")

        if "parameters" in function:
            parameters = function["parameters"]
--- a/letta/prompts/prompt_generator.py
+++ b/letta/prompts/prompt_generator.py
@@ -1,6 +1,10 @@
 from datetime import datetime
 from typing import List, Literal, Optional

+from letta.log import get_logger
+
+logger = get_logger(__name__)
+
 from letta.constants import IN_CONTEXT_MEMORY_KEYWORD
 from letta.helpers import ToolRulesSolver
 from letta.helpers.datetime_helpers import format_datetime, get_local_time_fast
@@ -137,7 +141,7 @@ class PromptGenerator:
            if append_icm_if_missing:
                if memory_variable_string not in system_prompt:
                    # In this case, append it to the end to make sure memory is still injected
-                    # warnings.warn(f"{IN_CONTEXT_MEMORY_KEYWORD} variable was missing from system prompt, appending instead")
+                    # logger.warning(f"{IN_CONTEXT_MEMORY_KEYWORD} variable was missing from system prompt, appending instead")
                    system_prompt += "\n\n" + memory_variable_string

            # render the variables using the built-in templater
--- a/letta/schemas/memory.py
+++ b/letta/schemas/memory.py
@@ -4,6 +4,10 @@ from datetime import datetime
 from io import StringIO
 from typing import TYPE_CHECKING, List, Optional, Union

+from letta.log import get_logger
+
+logger = get_logger(__name__)
+
 from openai.types.beta.function_tool import FunctionTool as OpenAITool
 from pydantic import BaseModel, Field, field_validator

@@ -319,7 +323,7 @@ class Memory(BaseModel, validate_assignment=True):
        """Deprecated: use compile() instead."""
        import warnings

-        warnings.warn("compile_in_thread_async is deprecated; use compile()", DeprecationWarning, stacklevel=2)
+        logger.warning("compile_in_thread_async is deprecated; use compile()", stacklevel=2)
        return self.compile(tool_usage_rules=tool_usage_rules, sources=sources, max_files_open=max_files_open, llm_config=llm_config)

    def list_block_labels(self) -> List[str]:
--- a/letta/schemas/message.py
+++ b/letta/schemas/message.py
@@ -1,10 +1,13 @@
 from __future__ import annotations

+from letta.log import get_logger
+
+logger = get_logger(__name__)
+
 import copy
 import json
 import re
 import uuid
-import warnings
 from collections import OrderedDict
 from datetime import datetime, timezone
 from enum import Enum
@@ -72,7 +75,7 @@ def add_inner_thoughts_to_tool_call(
        updated_tool_call.function.arguments = json_dumps(ordered_args)
        return updated_tool_call
    except json.JSONDecodeError as e:
-        warnings.warn(f"Failed to put inner thoughts in kwargs: {e}")
+        logger.warning(f"Failed to put inner thoughts in kwargs: {e}")
        raise e


@@ -510,7 +513,7 @@ class Message(BaseMessage):
                )

            else:
-                warnings.warn(f"Unrecognized content part in assistant message: {content_part}")
+                logger.warning(f"Unrecognized content part in assistant message: {content_part}")

        return messages

@@ -1193,7 +1196,7 @@ class Message(BaseMessage):
            if bool(re.match(r"^[^\s<|\\/>]+$", self.name)):
                openai_message["name"] = self.name
            else:
-                warnings.warn(f"Using OpenAI with invalid 'name' field (name={self.name} role={self.role}).")
+                logger.warning(f"Using OpenAI with invalid 'name' field (name={self.name} role={self.role}).")

        if parse_content_parts and self.content is not None:
            for content in self.content:
@@ -1260,7 +1263,7 @@ class Message(BaseMessage):
                if bool(re.match(r"^[^\s<|\\/>]+$", self.name)):
                    user_dict["name"] = self.name
                else:
-                    warnings.warn(f"Using OpenAI with invalid 'name' field (name={self.name} role={self.role}).")
+                    logger.warning(f"Using OpenAI with invalid 'name' field (name={self.name} role={self.role}).")

            message_dicts.append(user_dict)

@@ -1597,7 +1600,7 @@ class Message(BaseMessage):
            text_content = None

        if self.role != "tool" and self.name is not None:
-            warnings.warn(f"Using Google AI with non-null 'name' field (name={self.name} role={self.role}), not yet supported.")
+            logger.warning(f"Using Google AI with non-null 'name' field (name={self.name} role={self.role}), not yet supported.")

        if self.role == "system":
            # NOTE: Gemini API doesn't have a 'system' role, use 'user' instead
@@ -1717,7 +1720,7 @@ class Message(BaseMessage):
            assert self.tool_call_id is not None, vars(self)

            if self.name is None:
-                warnings.warn("Couldn't find function name on tool call, defaulting to tool ID instead.")
+                logger.warning("Couldn't find function name on tool call, defaulting to tool ID instead.")
                function_name = self.tool_call_id
            else:
                function_name = self.name
@@ -1750,7 +1753,7 @@ class Message(BaseMessage):
        if "parts" not in google_ai_message or not google_ai_message["parts"]:
            # If parts is empty, add a default text part
            google_ai_message["parts"] = [{"text": "empty message"}]
-            warnings.warn(
+            logger.warning(
                f"Empty 'parts' detected in message with role '{self.role}'. Added default empty text part. Full message:\n{vars(self)}"
            )

--- a/letta/schemas/providers/anthropic.py
+++ b/letta/schemas/providers/anthropic.py
@@ -1,6 +1,9 @@
-import warnings
 from typing import Literal

+from letta.log import get_logger
+
+logger = get_logger(__name__)
+
 import anthropic
 from pydantic import Field

@@ -149,7 +152,7 @@ class AnthropicProvider(Provider):
                    model["context_window"] = model_library[model["id"]]
                else:
                    # On fallback, we can set 200k (generally safe), but we should warn the user
-                    warnings.warn(f"Couldn't find context window size for model {model['id']}, defaulting to 200,000")
+                    logger.warning(f"Couldn't find context window size for model {model['id']}, defaulting to 200,000")
                    model["context_window"] = 200000

            # Optional override: enable 1M context for Sonnet 4/4.5 when flag is set
--- a/letta/schemas/providers/base.py
+++ b/letta/schemas/providers/base.py
@@ -1,5 +1,9 @@
 from datetime import datetime

+from letta.log import get_logger
+
+logger = get_logger(__name__)
+
 from pydantic import BaseModel, Field, model_validator

 from letta.schemas.embedding_config import EmbeddingConfig
@@ -90,7 +94,7 @@ class Provider(ProviderBase):
        import asyncio
        import warnings

-        warnings.warn("list_llm_models is deprecated, use list_llm_models_async instead", DeprecationWarning, stacklevel=2)
+        logger.warning("list_llm_models is deprecated, use list_llm_models_async instead", stacklevel=2)

        # Simplified asyncio handling - just use asyncio.run()
        # This works in most contexts and avoids complex event loop detection
@@ -115,7 +119,7 @@ class Provider(ProviderBase):
        import asyncio
        import warnings

-        warnings.warn("list_embedding_models is deprecated, use list_embedding_models_async instead", DeprecationWarning, stacklevel=2)
+        logger.warning("list_embedding_models is deprecated, use list_embedding_models_async instead", stacklevel=2)

        # Simplified asyncio handling - just use asyncio.run()
        # This works in most contexts and avoids complex event loop detection
--- a/letta/schemas/providers/cerebras.py
+++ b/letta/schemas/providers/cerebras.py
@@ -1,6 +1,9 @@
-import warnings
 from typing import Literal

+from letta.log import get_logger
+
+logger = get_logger(__name__)
+
 from pydantic import Field

 from letta.schemas.enums import ProviderCategory, ProviderType
@@ -58,7 +61,7 @@ class CerebrasProvider(OpenAIProvider):
                context_window_size = self.get_model_context_window_size(model_name)

            if not context_window_size:
-                warnings.warn(f"Couldn't find context window size for model {model_name}")
+                logger.warning(f"Couldn't find context window size for model {model_name}")
                continue

            # Cerebras supports function calling
--- a/letta/schemas/providers/google_gemini.py
+++ b/letta/schemas/providers/google_gemini.py
@@ -1,6 +1,10 @@
 import asyncio
 from typing import Literal

+from letta.log import get_logger
+
+logger = get_logger(__name__)
+
 from pydantic import Field

 from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, LLM_MAX_TOKENS
@@ -88,7 +92,7 @@ class GoogleAIProvider(Provider):
    def get_model_context_window(self, model_name: str) -> int | None:
        import warnings

-        warnings.warn("This is deprecated, use get_model_context_window_async when possible.", DeprecationWarning)
+        logger.warning("This is deprecated, use get_model_context_window_async when possible.")
        from letta.llm_api.google_ai_client import google_ai_get_model_context_window

        if model_name in LLM_MAX_TOKENS:
--- a/letta/schemas/providers/lmstudio.py
+++ b/letta/schemas/providers/lmstudio.py
@@ -1,6 +1,9 @@
-import warnings
 from typing import Literal

+from letta.log import get_logger
+
+logger = get_logger(__name__)
+
 from pydantic import Field

 from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE
@@ -27,14 +30,14 @@ class LMStudioOpenAIProvider(OpenAIProvider):
        response = await openai_get_model_list_async(self.model_endpoint_url)

        if "data" not in response:
-            warnings.warn(f"LMStudio OpenAI model query response missing 'data' field: {response}")
+            logger.warning(f"LMStudio OpenAI model query response missing 'data' field: {response}")
            return []

        configs = []
        for model in response["data"]:
            model_type = model.get("type")
            if not model_type:
-                warnings.warn(f"LMStudio OpenAI model missing 'type' field: {model}")
+                logger.warning(f"LMStudio OpenAI model missing 'type' field: {model}")
                continue
            if model_type not in ("vlm", "llm"):
                continue
@@ -48,7 +51,7 @@ class LMStudioOpenAIProvider(OpenAIProvider):
            if "compatibility_type" in model:
                compatibility_type = model["compatibility_type"]
            else:
-                warnings.warn(f"LMStudio OpenAI model missing 'compatibility_type' field: {model}")
+                logger.warning(f"LMStudio OpenAI model missing 'compatibility_type' field: {model}")
                continue

            configs.append(
@@ -72,14 +75,14 @@ class LMStudioOpenAIProvider(OpenAIProvider):
        response = await openai_get_model_list_async(self.model_endpoint_url)

        if "data" not in response:
-            warnings.warn(f"LMStudio OpenAI model query response missing 'data' field: {response}")
+            logger.warning(f"LMStudio OpenAI model query response missing 'data' field: {response}")
            return []

        configs = []
        for model in response["data"]:
            model_type = model.get("type")
            if not model_type:
-                warnings.warn(f"LMStudio OpenAI model missing 'type' field: {model}")
+                logger.warning(f"LMStudio OpenAI model missing 'type' field: {model}")
                continue
            if model_type not in ("embeddings"):
                continue
--- a/letta/schemas/providers/together.py
+++ b/letta/schemas/providers/together.py
@@ -4,6 +4,10 @@ Note: this supports completions (deprecated by openai) and chat completions via

 from typing import Literal, Optional

+from letta.log import get_logger
+
+logger = get_logger(__name__)
+
 from pydantic import Field

 from letta.constants import MIN_CONTEXT_WINDOW
@@ -33,7 +37,7 @@ class TogetherProvider(OpenAIProvider):
    async def list_embedding_models_async(self) -> list[EmbeddingConfig]:
        import warnings

-        warnings.warn(
+        logger.warning(
            "Letta does not currently support listing embedding models for Together. Please "
            "contact support or reach out via GitHub or Discord to get support."
        )
--- a/letta/schemas/providers/xai.py
+++ b/letta/schemas/providers/xai.py
@@ -1,6 +1,9 @@
-import warnings
 from typing import Literal

+from letta.log import get_logger
+
+logger = get_logger(__name__)
+
 from pydantic import Field

 from letta.schemas.enums import ProviderCategory, ProviderType
@@ -49,7 +52,7 @@ class XAIProvider(OpenAIProvider):
                context_window_size = self.get_model_context_window_size(model_name)

            if not context_window_size:
-                warnings.warn(f"Couldn't find context window size for model {model_name}")
+                logger.warning(f"Couldn't find context window size for model {model_name}")
                continue

            configs.append(
--- a/letta/server/rest_api/interface.py
+++ b/letta/server/rest_api/interface.py
@@ -1,7 +1,10 @@
 import asyncio
 import json
 import queue
-import warnings
+
+from letta.log import get_logger
+
+logger = get_logger(__name__)
 from collections import deque
 from datetime import datetime
 from typing import AsyncGenerator, Literal, Optional, Union
@@ -503,7 +506,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
        data: {"function_return": "None", "status": "success", "date": "2024-02-29T06:07:50.847262+00:00"}
        """
        if not chunk.choices or len(chunk.choices) == 0:
-            warnings.warn(f"No choices in chunk: {chunk}")
+            logger.warning(f"No choices in chunk: {chunk}")
            return None

        choice = chunk.choices[0]
@@ -1028,7 +1031,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                # created=1713216662
                # model='gpt-4o-mini-2024-07-18'
                # object='chat.completion.chunk'
-                warnings.warn(f"Couldn't find delta in chunk: {chunk}")
+                logger.warning(f"Couldn't find delta in chunk: {chunk}")
            return None

        return processed_chunk
@@ -1255,7 +1258,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                    try:
                        func_args = parse_json(function_call.function.arguments)
                    except:
-                        warnings.warn(f"Failed to parse function arguments: {function_call.function.arguments}")
+                        logger.warning(f"Failed to parse function arguments: {function_call.function.arguments}")
                        func_args = {}

                    if (
--- a/letta/server/server.py
+++ b/letta/server/server.py
@@ -2,7 +2,6 @@ import asyncio
 import json
 import os
 import traceback
-import warnings
 from abc import abstractmethod
 from datetime import datetime
 from pathlib import Path
@@ -1149,9 +1148,9 @@ class SyncServer(object):
        #                        llm_config = LLMConfig(**config_data)
        #                        llm_models.append(llm_config)
        #                except (json.JSONDecodeError, ValueError) as e:
-        #                    warnings.warn(f"Error parsing LLM config file {filename}: {e}")
+        #                    logger.warning(f"Error parsing LLM config file {filename}: {e}")
        # except Exception as e:
-        #    warnings.warn(f"Error reading LLM configs directory: {e}")
+        #    logger.warning(f"Error reading LLM configs directory: {e}")
        return llm_models

    def get_local_embedding_configs(self):
@@ -1169,9 +1168,9 @@ class SyncServer(object):
        #                        embedding_config = EmbeddingConfig(**config_data)
        #                        embedding_models.append(embedding_config)
        #                except (json.JSONDecodeError, ValueError) as e:
-        #                    warnings.warn(f"Error parsing embedding config file {filename}: {e}")
+        #                    logger.warning(f"Error parsing embedding config file {filename}: {e}")
        # except Exception as e:
-        #    warnings.warn(f"Error reading embedding configs directory: {e}")
+        #    logger.warning(f"Error reading embedding configs directory: {e}")
        return embedding_models

    def add_llm_model(self, request: LLMConfig) -> LLMConfig:
--- a/letta/services/agent_manager.py
+++ b/letta/services/agent_manager.py
@@ -1954,9 +1954,8 @@ class AgentManager:
        """
        import warnings

-        warnings.warn(
+        logger.warning(
            "list_passages_async is deprecated. Use query_source_passages_async or query_agent_passages_async instead.",
-            DeprecationWarning,
            stacklevel=2,
        )

--- a/letta/services/helpers/agent_manager_helper.py
+++ b/letta/services/helpers/agent_manager_helper.py
@@ -2,6 +2,10 @@ import uuid
 from datetime import datetime
 from typing import List, Literal, Optional, Set

+from letta.log import get_logger
+
+logger = get_logger(__name__)
+
 import numpy as np
 from sqlalchemy import Select, and_, asc, desc, func, literal, nulls_last, or_, select, union_all
 from sqlalchemy.orm import noload
@@ -304,7 +308,7 @@ def compile_system_message(
        if append_icm_if_missing:
            if memory_variable_string not in system_prompt:
                # In this case, append it to the end to make sure memory is still injected
-                # warnings.warn(f"{IN_CONTEXT_MEMORY_KEYWORD} variable was missing from system prompt, appending instead")
+                # logger.warning(f"{IN_CONTEXT_MEMORY_KEYWORD} variable was missing from system prompt, appending instead")
                system_prompt += "\n\n" + memory_variable_string

        # render the variables using the built-in templater
--- a/letta/services/passage_manager.py
+++ b/letta/services/passage_manager.py
@@ -120,12 +120,8 @@ class PassageManager:
    @trace_method
    async def get_passage_by_id_async(self, passage_id: str, actor: PydanticUser) -> Optional[PydanticPassage]:
        """DEPRECATED: Use get_agent_passage_by_id_async() or get_source_passage_by_id_async() instead."""
-        import warnings
-
-        warnings.warn(
-            "get_passage_by_id_async is deprecated. Use get_agent_passage_by_id_async() or get_source_passage_by_id_async() instead.",
-            DeprecationWarning,
-            stacklevel=2,
+        logger.warning(
+            "get_passage_by_id_async is deprecated. Use get_agent_passage_by_id_async() or get_source_passage_by_id_async() instead."
        )

        async with db_registry.async_session() as session:
@@ -231,13 +227,7 @@ class PassageManager:
    @trace_method
    async def create_passage_async(self, pydantic_passage: PydanticPassage, actor: PydanticUser) -> PydanticPassage:
        """DEPRECATED: Use create_agent_passage_async() or create_source_passage_async() instead."""
-        import warnings
-
-        warnings.warn(
-            "create_passage_async is deprecated. Use create_agent_passage_async() or create_source_passage_async() instead.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
+        logger.warning("create_passage_async is deprecated. Use create_agent_passage_async() or create_source_passage_async() instead.")

        # Common fields for both passage types
        passage = self._preprocess_passage_for_creation(pydantic_passage=pydantic_passage)
@@ -365,9 +355,8 @@ class PassageManager:
        """DEPRECATED: Use create_many_agent_passages() or create_many_source_passages() instead."""
        import warnings

-        warnings.warn(
+        logger.warning(
            "create_many_passages is deprecated. Use create_many_agent_passages() or create_many_source_passages() instead.",
-            DeprecationWarning,
            stacklevel=2,
        )
        return [self.create_passage(p, actor) for p in passages]
@@ -378,9 +367,8 @@ class PassageManager:
        """DEPRECATED: Use create_many_agent_passages_async() or create_many_source_passages_async() instead."""
        import warnings

-        warnings.warn(
+        logger.warning(
            "create_many_passages_async is deprecated. Use create_many_agent_passages_async() or create_many_source_passages_async() instead.",
-            DeprecationWarning,
            stacklevel=2,
        )

@@ -651,9 +639,8 @@ class PassageManager:
        """DEPRECATED: Use delete_agent_passage_by_id_async() or delete_source_passage_by_id_async() instead."""
        import warnings

-        warnings.warn(
+        logger.warning(
            "delete_passage_by_id_async is deprecated. Use delete_agent_passage_by_id_async() or delete_source_passage_by_id_async() instead.",
-            DeprecationWarning,
            stacklevel=2,
        )

@@ -765,9 +752,8 @@ class PassageManager:
        """DEPRECATED: Use delete_agent_passages() or delete_source_passages() instead."""
        import warnings

-        warnings.warn(
+        logger.warning(
            "delete_passages is deprecated. Use delete_agent_passages() or delete_source_passages() instead.",
-            DeprecationWarning,
            stacklevel=2,
        )
        # TODO: This is very inefficient
@@ -787,7 +773,7 @@ class PassageManager:
        """DEPRECATED: Use agent_passage_size() instead (this only counted agent passages anyway)."""
        import warnings

-        warnings.warn("size is deprecated. Use agent_passage_size() instead.", DeprecationWarning, stacklevel=2)
+        logger.warning("size is deprecated. Use agent_passage_size() instead.", stacklevel=2)
        return self.agent_passage_size(actor=actor, agent_id=agent_id)

    @enforce_types
--- a/letta/services/tool_manager.py
+++ b/letta/services/tool_manager.py
@@ -1,5 +1,4 @@
 import importlib
-import warnings
 from typing import List, Optional, Set, Union

 from sqlalchemy import and_, func, or_, select
@@ -636,7 +635,7 @@ class ToolManager:
                module = importlib.import_module(module_name)
                functions_to_schema.update(load_function_set(module))
            except ValueError as e:
-                warnings.warn(f"Error loading function set '{module_name}': {e}")
+                logger.warning(f"Error loading function set '{module_name}': {e}")
            except Exception as e:
                raise e

--- a/letta/settings.py
+++ b/letta/settings.py
@@ -6,10 +6,13 @@ from typing import Optional
 from pydantic import AliasChoices, Field
 from pydantic_settings import BaseSettings, SettingsConfigDict

-from letta.local_llm.constants import DEFAULT_WRAPPER_NAME, INNER_THOUGHTS_KWARG
 from letta.schemas.enums import SandboxType
 from letta.services.summarizer.enums import SummarizationMode

+# Define constants here to avoid circular import with letta.log
+DEFAULT_WRAPPER_NAME = "chatml"
+INNER_THOUGHTS_KWARG = "thinking"
+

 class ToolSettings(BaseSettings):
    # Sandbox Configurations
--- a/letta/system.py
+++ b/letta/system.py
@@ -1,7 +1,10 @@
 import json
-import warnings
 from typing import Optional

+from letta.log import get_logger
+
+logger = get_logger(__name__)
+
 from .constants import (
    INITIAL_BOOT_MESSAGE,
    INITIAL_BOOT_MESSAGE_SEND_MESSAGE_FIRST_MSG,
@@ -160,7 +163,7 @@ def package_system_message(system_message, timezone, message_type="system_alert"
    try:
        message_json = json.loads(system_message)
        if "type" in message_json and message_json["type"] == message_type:
-            warnings.warn(f"Attempted to pack a system message that is already packed. Not packing: '{system_message}'")
+            logger.warning(f"Attempted to pack a system message that is already packed. Not packing: '{system_message}'")
            return system_message
    except:
        pass  # do nothing, expected behavior that the message is not JSON
@@ -251,7 +254,7 @@ def unpack_message(packed_message: str) -> str:
        if "type" in message_json and message_json["type"] in ["login", "heartbeat"]:
            # This is a valid user message that the ADE expects, so don't print warning
            return packed_message
-        warnings.warn(f"Was unable to find 'message' field in packed message object: '{packed_message}'")
+        logger.warning(f"Was unable to find 'message' field in packed message object: '{packed_message}'")
        return packed_message
    else:
        try:
@@ -260,6 +263,6 @@ def unpack_message(packed_message: str) -> str:
            return packed_message

        if message_type != "user_message":
-            warnings.warn(f"Expected type to be 'user_message', but was '{message_type}', so not unpacking: '{packed_message}'")
+            logger.warning(f"Expected type to be 'user_message', but was '{message_type}', so not unpacking: '{packed_message}'")
            return packed_message
        return message_json.get("message")