diff --git a/letta/llm_api/deepseek_client.py b/letta/llm_api/deepseek_client.py index 462468be..03137921 100644 --- a/letta/llm_api/deepseek_client.py +++ b/letta/llm_api/deepseek_client.py @@ -9,10 +9,13 @@ from openai.types.chat.chat_completion import ChatCompletion from openai.types.chat.chat_completion_chunk import ChatCompletionChunk from letta.llm_api.openai_client import OpenAIClient +from letta.log import get_logger from letta.otel.tracing import trace_method from letta.schemas.enums import AgentType from letta.schemas.llm_config import LLMConfig from letta.schemas.message import Message as PydanticMessage + +logger = get_logger(__name__) from letta.schemas.openai.chat_completion_request import ( AssistantMessage, ChatCompletionRequest, @@ -91,7 +94,7 @@ def map_messages_to_deepseek_format(messages: List[ChatMessage]) -> List[_Messag merged_message = merge_tool_message(deepseek_messages[-1], message) deepseek_messages[-1] = merged_message else: - print(f"Skipping message: {message}") + logger.warning(f"Skipping message: {message}") # This needs to end on a user message, add a dummy message if the last was assistant if deepseek_messages[-1].role == "assistant": @@ -308,7 +311,7 @@ def convert_deepseek_response_to_chatcompletion( ) ] except (json.JSONDecodeError, TypeError, KeyError) as e: - print(e) + logger.error(f"Failed to parse DeepSeek response: {e}") tool_calls = response.choices[0].message.tool_calls raise ValueError(f"Failed to create valid JSON {content}") diff --git a/letta/llm_api/google_ai_client.py b/letta/llm_api/google_ai_client.py index e7987aa2..653cea5b 100644 --- a/letta/llm_api/google_ai_client.py +++ b/letta/llm_api/google_ai_client.py @@ -92,10 +92,7 @@ async def google_ai_get_model_list_async( except httpx.HTTPStatusError as http_err: # Handle HTTP errors (e.g., response 4XX, 5XX) printd(f"Got HTTPError, exception={http_err}") - # Print the HTTP status code - print(f"HTTP Error: {http_err.response.status_code}") - # Print the response content (error message from server) - print(f"Message: {http_err.response.text}") + logger.error(f"HTTP Error: {http_err.response.status_code}, Message: {http_err.response.text}") raise http_err except httpx.RequestError as req_err: @@ -136,10 +133,7 @@ def google_ai_get_model_details(base_url: str, api_key: str, model: str, key_in_ except httpx.HTTPStatusError as http_err: # Handle HTTP errors (e.g., response 4XX, 5XX) printd(f"Got HTTPError, exception={http_err}") - # Print the HTTP status code - print(f"HTTP Error: {http_err.response.status_code}") - # Print the response content (error message from server) - print(f"Message: {http_err.response.text}") + logger.error(f"HTTP Error: {http_err.response.status_code}, Message: {http_err.response.text}") raise http_err except httpx.RequestError as req_err: @@ -182,10 +176,7 @@ async def google_ai_get_model_details_async( except httpx.HTTPStatusError as http_err: # Handle HTTP errors (e.g., response 4XX, 5XX) printd(f"Got HTTPError, exception={http_err}") - # Print the HTTP status code - print(f"HTTP Error: {http_err.response.status_code}") - # Print the response content (error message from server) - print(f"Message: {http_err.response.text}") + logger.error(f"HTTP Error: {http_err.response.status_code}, Message: {http_err.response.text}") raise http_err except httpx.RequestError as req_err: diff --git a/letta/llm_api/helpers.py b/letta/llm_api/helpers.py index fa7a369a..905c620c 100644 --- a/letta/llm_api/helpers.py +++ b/letta/llm_api/helpers.py @@ -8,11 +8,14 @@ import requests from letta.constants import OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING from letta.helpers.json_helpers import json_dumps +from letta.log import get_logger from letta.schemas.message import Message from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice from letta.settings import summarizer_settings from letta.utils import count_tokens, printd +logger = get_logger(__name__) + def _convert_to_structured_output_helper(property: dict) -> dict: """Convert a single JSON schema property to structured output format (recursive)""" @@ -323,8 +326,7 @@ def unpack_inner_thoughts_from_kwargs(choice: Choice, inner_thoughts_key: str) - except json.JSONDecodeError as e: warnings.warn(f"Failed to strip inner thoughts from kwargs: {e}") - print(f"\nFailed to strip inner thoughts from kwargs: {e}") - print(f"\nTool call arguments: {tool_call.function.arguments}") + logger.error(f"Failed to strip inner thoughts from kwargs: {e}, Tool call arguments: {tool_call.function.arguments}") raise e else: warnings.warn(f"Did not find tool call in message: {str(message)}") diff --git a/letta/llm_api/llm_api_tools.py b/letta/llm_api/llm_api_tools.py index 23591d9d..b8eddf21 100644 --- a/letta/llm_api/llm_api_tools.py +++ b/letta/llm_api/llm_api_tools.py @@ -9,6 +9,9 @@ import requests from letta.constants import CLI_WARNING_PREFIX from letta.errors import LettaConfigurationError, RateLimitExceededError from letta.llm_api.helpers import unpack_all_inner_thoughts_from_kwargs +from letta.log import get_logger + +logger = get_logger(__name__) from letta.llm_api.openai import ( build_openai_chat_completions_request, openai_chat_completions_process_stream, @@ -95,7 +98,7 @@ def retry_with_exponential_backoff( # Sleep for the delay # printd(f"Got a rate limit error ('{http_err}') on LLM backend request, waiting {int(delay)}s then retrying...") - print( + logger.warning( f"{CLI_WARNING_PREFIX}Got a rate limit error ('{http_err}') on LLM backend request, waiting {int(delay)}s then retrying..." ) time.sleep(delay) diff --git a/letta/llm_api/openai.py b/letta/llm_api/openai.py index c1524861..a3c79e98 100644 --- a/letta/llm_api/openai.py +++ b/letta/llm_api/openai.py @@ -578,7 +578,7 @@ def openai_chat_completions_request_stream( # TODO: Use the native OpenAI objects here? yield ChatCompletionChunkResponse(**chunk.model_dump(exclude_none=True)) except Exception as e: - print(f"Error request stream from /v1/chat/completions, url={url}, data={data}:\n{e}") + logger.error(f"Error request stream from /v1/chat/completions, url={url}, data={data}: {e}") raise e diff --git a/letta/services/mcp/base_client.py b/letta/services/mcp/base_client.py index 95259344..0438c34a 100644 --- a/letta/services/mcp/base_client.py +++ b/letta/services/mcp/base_client.py @@ -83,10 +83,10 @@ class AsyncBaseMCPClient: for content_piece in result.content: if isinstance(content_piece, TextContent): parsed_content.append(content_piece.text) - print("parsed_content (text)", parsed_content) + logger.debug(f"MCP tool result parsed content (text): {parsed_content}") else: parsed_content.append(str(content_piece)) - print("parsed_content (other)", parsed_content) + logger.debug(f"MCP tool result parsed content (other): {parsed_content}") if len(parsed_content) > 0: final_content = " ".join(parsed_content) else: