* base requirements * autofix * Configure ruff for Python linting and formatting - Set up minimal ruff configuration with basic checks (E, W, F, I) - Add temporary ignores for common issues during migration - Configure pre-commit hooks to use ruff with pass_filenames - This enables gradual migration from black to ruff * Delete sdj * autofixed only * migrate lint action * more autofixed * more fixes * change precommit * try changing the hook * try this stuff
341 lines
14 KiB
Python
341 lines
14 KiB
Python
import asyncio
|
|
import random
|
|
import time
|
|
from functools import wraps
|
|
from typing import Any, Dict, List
|
|
|
|
from letta.otel.tracing import trace_method
|
|
|
|
try:
|
|
from pinecone import IndexEmbed, PineconeAsyncio
|
|
from pinecone.exceptions.exceptions import (
|
|
ForbiddenException,
|
|
NotFoundException,
|
|
PineconeApiException,
|
|
ServiceException,
|
|
UnauthorizedException,
|
|
)
|
|
|
|
PINECONE_AVAILABLE = True
|
|
except ImportError:
|
|
PINECONE_AVAILABLE = False
|
|
|
|
from letta.constants import (
|
|
PINECONE_CLOUD,
|
|
PINECONE_EMBEDDING_MODEL,
|
|
PINECONE_MAX_BATCH_SIZE,
|
|
PINECONE_MAX_RETRY_ATTEMPTS,
|
|
PINECONE_METRIC,
|
|
PINECONE_REGION,
|
|
PINECONE_RETRY_BACKOFF_FACTOR,
|
|
PINECONE_RETRY_BASE_DELAY,
|
|
PINECONE_RETRY_MAX_DELAY,
|
|
PINECONE_TEXT_FIELD_NAME,
|
|
PINECONE_THROTTLE_DELAY,
|
|
)
|
|
from letta.log import get_logger
|
|
from letta.schemas.user import User
|
|
from letta.settings import settings
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
|
|
def pinecone_retry(
|
|
max_attempts: int = PINECONE_MAX_RETRY_ATTEMPTS,
|
|
base_delay: float = PINECONE_RETRY_BASE_DELAY,
|
|
max_delay: float = PINECONE_RETRY_MAX_DELAY,
|
|
backoff_factor: float = PINECONE_RETRY_BACKOFF_FACTOR,
|
|
):
|
|
"""
|
|
Decorator to retry Pinecone operations with exponential backoff.
|
|
|
|
Args:
|
|
max_attempts: Maximum number of retry attempts
|
|
base_delay: Base delay in seconds for the first retry
|
|
max_delay: Maximum delay in seconds between retries
|
|
backoff_factor: Factor to increase delay after each failed attempt
|
|
"""
|
|
|
|
def decorator(func):
|
|
@wraps(func)
|
|
async def wrapper(*args, **kwargs):
|
|
operation_name = func.__name__
|
|
start_time = time.time()
|
|
|
|
for attempt in range(max_attempts):
|
|
try:
|
|
logger.debug(f"[Pinecone] Starting {operation_name} (attempt {attempt + 1}/{max_attempts})")
|
|
result = await func(*args, **kwargs)
|
|
|
|
execution_time = time.time() - start_time
|
|
logger.info(f"[Pinecone] {operation_name} completed successfully in {execution_time:.2f}s")
|
|
return result
|
|
|
|
except (ServiceException, PineconeApiException) as e:
|
|
# retryable server errors
|
|
if attempt == max_attempts - 1:
|
|
execution_time = time.time() - start_time
|
|
logger.error(f"[Pinecone] {operation_name} failed after {max_attempts} attempts in {execution_time:.2f}s: {str(e)}")
|
|
raise
|
|
|
|
# calculate delay with exponential backoff and jitter
|
|
delay = min(base_delay * (backoff_factor**attempt), max_delay)
|
|
jitter = random.uniform(0, delay * 0.1) # add up to 10% jitter
|
|
total_delay = delay + jitter
|
|
|
|
logger.warning(
|
|
f"[Pinecone] {operation_name} failed (attempt {attempt + 1}/{max_attempts}): {str(e)}. Retrying in {total_delay:.2f}s"
|
|
)
|
|
await asyncio.sleep(total_delay)
|
|
|
|
except (UnauthorizedException, ForbiddenException) as e:
|
|
# non-retryable auth errors
|
|
execution_time = time.time() - start_time
|
|
logger.error(f"[Pinecone] {operation_name} failed with auth error in {execution_time:.2f}s: {str(e)}")
|
|
raise
|
|
|
|
except NotFoundException as e:
|
|
# non-retryable not found errors
|
|
execution_time = time.time() - start_time
|
|
logger.warning(f"[Pinecone] {operation_name} failed with not found error in {execution_time:.2f}s: {str(e)}")
|
|
raise
|
|
|
|
except Exception as e:
|
|
# other unexpected errors - retry once then fail
|
|
if attempt == max_attempts - 1:
|
|
execution_time = time.time() - start_time
|
|
logger.error(f"[Pinecone] {operation_name} failed after {max_attempts} attempts in {execution_time:.2f}s: {str(e)}")
|
|
raise
|
|
|
|
delay = min(base_delay * (backoff_factor**attempt), max_delay)
|
|
jitter = random.uniform(0, delay * 0.1)
|
|
total_delay = delay + jitter
|
|
|
|
logger.warning(
|
|
f"[Pinecone] {operation_name} failed with unexpected error (attempt {attempt + 1}/{max_attempts}): {str(e)}. Retrying in {total_delay:.2f}s"
|
|
)
|
|
await asyncio.sleep(total_delay)
|
|
|
|
return wrapper
|
|
|
|
return decorator
|
|
|
|
|
|
def should_use_pinecone(verbose: bool = False):
|
|
if verbose:
|
|
logger.info(
|
|
"Pinecone check: enable_pinecone=%s, api_key=%s, agent_index=%s, source_index=%s",
|
|
settings.enable_pinecone,
|
|
bool(settings.pinecone_api_key),
|
|
bool(settings.pinecone_agent_index),
|
|
bool(settings.pinecone_source_index),
|
|
)
|
|
|
|
return all(
|
|
(
|
|
PINECONE_AVAILABLE,
|
|
settings.enable_pinecone,
|
|
settings.pinecone_api_key,
|
|
settings.pinecone_agent_index,
|
|
settings.pinecone_source_index,
|
|
)
|
|
)
|
|
|
|
|
|
@pinecone_retry()
|
|
@trace_method
|
|
async def upsert_pinecone_indices():
|
|
if not PINECONE_AVAILABLE:
|
|
raise ImportError("Pinecone is not available. Please install pinecone to use this feature.")
|
|
|
|
indices = get_pinecone_indices()
|
|
logger.info(f"[Pinecone] Upserting {len(indices)} indices: {indices}")
|
|
|
|
for index_name in indices:
|
|
async with PineconeAsyncio(api_key=settings.pinecone_api_key) as pc:
|
|
if not await pc.has_index(index_name):
|
|
logger.info(f"[Pinecone] Creating index {index_name} with model {PINECONE_EMBEDDING_MODEL}")
|
|
await pc.create_index_for_model(
|
|
name=index_name,
|
|
cloud=PINECONE_CLOUD,
|
|
region=PINECONE_REGION,
|
|
embed=IndexEmbed(model=PINECONE_EMBEDDING_MODEL, field_map={"text": PINECONE_TEXT_FIELD_NAME}, metric=PINECONE_METRIC),
|
|
)
|
|
logger.info(f"[Pinecone] Successfully created index {index_name}")
|
|
else:
|
|
logger.debug(f"[Pinecone] Index {index_name} already exists")
|
|
|
|
|
|
def get_pinecone_indices() -> List[str]:
|
|
return [settings.pinecone_agent_index, settings.pinecone_source_index]
|
|
|
|
|
|
@pinecone_retry()
|
|
@trace_method
|
|
async def upsert_file_records_to_pinecone_index(file_id: str, source_id: str, chunks: List[str], actor: User):
|
|
if not PINECONE_AVAILABLE:
|
|
raise ImportError("Pinecone is not available. Please install pinecone to use this feature.")
|
|
|
|
logger.info(f"[Pinecone] Preparing to upsert {len(chunks)} chunks for file {file_id} source {source_id}")
|
|
|
|
records = []
|
|
for i, chunk in enumerate(chunks):
|
|
record = {
|
|
"_id": f"{file_id}_{i}",
|
|
PINECONE_TEXT_FIELD_NAME: chunk,
|
|
"file_id": file_id,
|
|
"source_id": source_id,
|
|
}
|
|
records.append(record)
|
|
|
|
logger.debug(f"[Pinecone] Created {len(records)} records for file {file_id}")
|
|
return await upsert_records_to_pinecone_index(records, actor)
|
|
|
|
|
|
@pinecone_retry()
|
|
@trace_method
|
|
async def delete_file_records_from_pinecone_index(file_id: str, actor: User):
|
|
if not PINECONE_AVAILABLE:
|
|
raise ImportError("Pinecone is not available. Please install pinecone to use this feature.")
|
|
|
|
namespace = actor.organization_id
|
|
logger.info(f"[Pinecone] Deleting records for file {file_id} from index {settings.pinecone_source_index} namespace {namespace}")
|
|
|
|
try:
|
|
async with PineconeAsyncio(api_key=settings.pinecone_api_key) as pc:
|
|
description = await pc.describe_index(name=settings.pinecone_source_index)
|
|
async with pc.IndexAsyncio(host=description.index.host) as dense_index:
|
|
await dense_index.delete(
|
|
filter={
|
|
"file_id": {"$eq": file_id},
|
|
},
|
|
namespace=namespace,
|
|
)
|
|
logger.info(f"[Pinecone] Successfully deleted records for file {file_id}")
|
|
except NotFoundException:
|
|
logger.warning(f"[Pinecone] Namespace {namespace} not found for file {file_id} and org {actor.organization_id}")
|
|
|
|
|
|
@pinecone_retry()
|
|
@trace_method
|
|
async def delete_source_records_from_pinecone_index(source_id: str, actor: User):
|
|
if not PINECONE_AVAILABLE:
|
|
raise ImportError("Pinecone is not available. Please install pinecone to use this feature.")
|
|
|
|
namespace = actor.organization_id
|
|
logger.info(f"[Pinecone] Deleting records for source {source_id} from index {settings.pinecone_source_index} namespace {namespace}")
|
|
|
|
try:
|
|
async with PineconeAsyncio(api_key=settings.pinecone_api_key) as pc:
|
|
description = await pc.describe_index(name=settings.pinecone_source_index)
|
|
async with pc.IndexAsyncio(host=description.index.host) as dense_index:
|
|
await dense_index.delete(filter={"source_id": {"$eq": source_id}}, namespace=namespace)
|
|
logger.info(f"[Pinecone] Successfully deleted records for source {source_id}")
|
|
except NotFoundException:
|
|
logger.warning(f"[Pinecone] Namespace {namespace} not found for source {source_id} and org {actor.organization_id}")
|
|
|
|
|
|
@pinecone_retry()
|
|
@trace_method
|
|
async def upsert_records_to_pinecone_index(records: List[dict], actor: User):
|
|
if not PINECONE_AVAILABLE:
|
|
raise ImportError("Pinecone is not available. Please install pinecone to use this feature.")
|
|
|
|
logger.info(f"[Pinecone] Upserting {len(records)} records to index {settings.pinecone_source_index} for org {actor.organization_id}")
|
|
|
|
async with PineconeAsyncio(api_key=settings.pinecone_api_key) as pc:
|
|
description = await pc.describe_index(name=settings.pinecone_source_index)
|
|
async with pc.IndexAsyncio(host=description.index.host) as dense_index:
|
|
# process records in batches to avoid exceeding pinecone limits
|
|
total_batches = (len(records) + PINECONE_MAX_BATCH_SIZE - 1) // PINECONE_MAX_BATCH_SIZE
|
|
logger.debug(f"[Pinecone] Processing {total_batches} batches of max {PINECONE_MAX_BATCH_SIZE} records each")
|
|
|
|
for i in range(0, len(records), PINECONE_MAX_BATCH_SIZE):
|
|
batch = records[i : i + PINECONE_MAX_BATCH_SIZE]
|
|
batch_num = (i // PINECONE_MAX_BATCH_SIZE) + 1
|
|
|
|
logger.debug(f"[Pinecone] Upserting batch {batch_num}/{total_batches} with {len(batch)} records")
|
|
await dense_index.upsert_records(actor.organization_id, batch)
|
|
|
|
# throttle between batches (except the last one)
|
|
if batch_num < total_batches:
|
|
jitter = random.uniform(0, PINECONE_THROTTLE_DELAY * 0.2) # ±20% jitter
|
|
throttle_delay = PINECONE_THROTTLE_DELAY + jitter
|
|
logger.debug(f"[Pinecone] Throttling for {throttle_delay:.3f}s before next batch")
|
|
await asyncio.sleep(throttle_delay)
|
|
|
|
logger.info(f"[Pinecone] Successfully upserted all {len(records)} records in {total_batches} batches")
|
|
|
|
|
|
@pinecone_retry()
|
|
@trace_method
|
|
async def search_pinecone_index(query: str, limit: int, filter: Dict[str, Any], actor: User) -> Dict[str, Any]:
|
|
if not PINECONE_AVAILABLE:
|
|
raise ImportError("Pinecone is not available. Please install pinecone to use this feature.")
|
|
|
|
namespace = actor.organization_id
|
|
logger.info(
|
|
f"[Pinecone] Searching index {settings.pinecone_source_index} namespace {namespace} with query length {len(query)} chars, limit {limit}"
|
|
)
|
|
logger.debug(f"[Pinecone] Search filter: {filter}")
|
|
|
|
async with PineconeAsyncio(api_key=settings.pinecone_api_key) as pc:
|
|
description = await pc.describe_index(name=settings.pinecone_source_index)
|
|
async with pc.IndexAsyncio(host=description.index.host) as dense_index:
|
|
try:
|
|
# search the dense index with reranking
|
|
search_results = await dense_index.search(
|
|
namespace=namespace,
|
|
query={
|
|
"top_k": limit,
|
|
"inputs": {"text": query},
|
|
"filter": filter,
|
|
},
|
|
rerank={"model": "bge-reranker-v2-m3", "top_n": limit, "rank_fields": [PINECONE_TEXT_FIELD_NAME]},
|
|
)
|
|
|
|
result_count = len(search_results.get("matches", []))
|
|
logger.info(f"[Pinecone] Search completed, found {result_count} matches")
|
|
return search_results
|
|
|
|
except Exception as e:
|
|
logger.warning(f"[Pinecone] Failed to search namespace {namespace}: {str(e)}")
|
|
raise e
|
|
|
|
|
|
@pinecone_retry()
|
|
@trace_method
|
|
async def list_pinecone_index_for_files(file_id: str, actor: User, limit: int = None, pagination_token: str = None) -> List[str]:
|
|
if not PINECONE_AVAILABLE:
|
|
raise ImportError("Pinecone is not available. Please install pinecone to use this feature.")
|
|
|
|
namespace = actor.organization_id
|
|
logger.info(f"[Pinecone] Listing records for file {file_id} from index {settings.pinecone_source_index} namespace {namespace}")
|
|
logger.debug(f"[Pinecone] List params - limit: {limit}, pagination_token: {pagination_token}")
|
|
|
|
try:
|
|
async with PineconeAsyncio(api_key=settings.pinecone_api_key) as pc:
|
|
description = await pc.describe_index(name=settings.pinecone_source_index)
|
|
async with pc.IndexAsyncio(host=description.index.host) as dense_index:
|
|
kwargs = {"namespace": namespace, "prefix": file_id}
|
|
if limit is not None:
|
|
kwargs["limit"] = limit
|
|
if pagination_token is not None:
|
|
kwargs["pagination_token"] = pagination_token
|
|
|
|
try:
|
|
result = []
|
|
async for ids in dense_index.list(**kwargs):
|
|
result.extend(ids)
|
|
|
|
logger.info(f"[Pinecone] Successfully listed {len(result)} records for file {file_id}")
|
|
return result
|
|
|
|
except Exception as e:
|
|
logger.warning(f"[Pinecone] Failed to list records for file {file_id} in namespace {namespace}: {str(e)}")
|
|
raise e
|
|
|
|
except NotFoundException:
|
|
logger.warning(f"[Pinecone] Namespace {namespace} not found for file {file_id} and org {actor.organization_id}")
|
|
return []
|