fix: async functions must call async methods (#8612)
Critical fixes: - llm_client_base.send_llm_request() now calls await self.request_async() instead of self.request() - Remove unused sync get_openai_embedding() that used sync OpenAI client - Remove deprecated compile_in_thread_async() from Memory These were blocking the event loop during LLM requests and embeddings. 🐾 Generated with [Letta Code](https://letta.com) Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
@@ -67,7 +67,7 @@ class LLMClientBase:
|
||||
|
||||
try:
|
||||
log_event(name="llm_request_sent", attributes=request_data)
|
||||
response_data = self.request(request_data, llm_config)
|
||||
response_data = await self.request_async(request_data, llm_config)
|
||||
if step_id and telemetry_manager:
|
||||
telemetry_manager.create_provider_trace(
|
||||
actor=self.actor,
|
||||
|
||||
@@ -318,14 +318,6 @@ class Memory(BaseModel, validate_assignment=True):
|
||||
llm_config=llm_config,
|
||||
)
|
||||
|
||||
@trace_method
|
||||
async def compile_in_thread_async(self, tool_usage_rules=None, sources=None, max_files_open=None, llm_config=None) -> str:
|
||||
"""Deprecated: use compile() instead."""
|
||||
import warnings
|
||||
|
||||
logger.warning("compile_in_thread_async is deprecated; use compile()", stacklevel=2)
|
||||
return self.compile(tool_usage_rules=tool_usage_rules, sources=sources, max_files_open=max_files_open, llm_config=llm_config)
|
||||
|
||||
def list_block_labels(self) -> List[str]:
|
||||
"""Return a list of the block names held inside the memory object"""
|
||||
return [block.label for block in self.blocks]
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from functools import lru_cache
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from openai import AsyncOpenAI, OpenAI
|
||||
from openai import AsyncOpenAI
|
||||
from sqlalchemy import func, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.orm import noload
|
||||
@@ -31,16 +30,6 @@ from letta.utils import enforce_types
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
# TODO: Add redis-backed caching for backend
|
||||
@lru_cache(maxsize=8192)
|
||||
def get_openai_embedding(text: str, model: str, endpoint: str) -> List[float]:
|
||||
from letta.settings import model_settings
|
||||
|
||||
client = OpenAI(api_key=model_settings.openai_api_key, base_url=endpoint, max_retries=0)
|
||||
response = client.embeddings.create(input=text, model=model)
|
||||
return response.data[0].embedding
|
||||
|
||||
|
||||
@async_redis_cache(key_func=lambda text, model, endpoint: f"{model}:{endpoint}:{text}")
|
||||
async def get_openai_embedding_async(text: str, model: str, endpoint: str) -> list[float]:
|
||||
from letta.settings import model_settings
|
||||
|
||||
Reference in New Issue
Block a user