diff --git a/letta/llm_api/llm_client_base.py b/letta/llm_api/llm_client_base.py index 00f0815e..608451a8 100644 --- a/letta/llm_api/llm_client_base.py +++ b/letta/llm_api/llm_client_base.py @@ -67,7 +67,7 @@ class LLMClientBase: try: log_event(name="llm_request_sent", attributes=request_data) - response_data = self.request(request_data, llm_config) + response_data = await self.request_async(request_data, llm_config) if step_id and telemetry_manager: telemetry_manager.create_provider_trace( actor=self.actor, diff --git a/letta/schemas/memory.py b/letta/schemas/memory.py index 982a97ae..4a9e64e4 100644 --- a/letta/schemas/memory.py +++ b/letta/schemas/memory.py @@ -318,14 +318,6 @@ class Memory(BaseModel, validate_assignment=True): llm_config=llm_config, ) - @trace_method - async def compile_in_thread_async(self, tool_usage_rules=None, sources=None, max_files_open=None, llm_config=None) -> str: - """Deprecated: use compile() instead.""" - import warnings - - logger.warning("compile_in_thread_async is deprecated; use compile()", stacklevel=2) - return self.compile(tool_usage_rules=tool_usage_rules, sources=sources, max_files_open=max_files_open, llm_config=llm_config) - def list_block_labels(self) -> List[str]: """Return a list of the block names held inside the memory object""" return [block.label for block in self.blocks] diff --git a/letta/services/passage_manager.py b/letta/services/passage_manager.py index cbd30549..99781b55 100644 --- a/letta/services/passage_manager.py +++ b/letta/services/passage_manager.py @@ -1,9 +1,8 @@ import uuid from datetime import datetime, timezone -from functools import lru_cache from typing import Dict, List, Optional -from openai import AsyncOpenAI, OpenAI +from openai import AsyncOpenAI from sqlalchemy import func, select from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.orm import noload @@ -31,16 +30,6 @@ from letta.utils import enforce_types logger = get_logger(__name__) -# TODO: Add redis-backed caching for backend -@lru_cache(maxsize=8192) -def get_openai_embedding(text: str, model: str, endpoint: str) -> List[float]: - from letta.settings import model_settings - - client = OpenAI(api_key=model_settings.openai_api_key, base_url=endpoint, max_retries=0) - response = client.embeddings.create(input=text, model=model) - return response.data[0].embedding - - @async_redis_cache(key_func=lambda text, model, endpoint: f"{model}:{endpoint}:{text}") async def get_openai_embedding_async(text: str, model: str, endpoint: str) -> list[float]: from letta.settings import model_settings