import asyncio import base64 import hashlib import os from concurrent.futures import ThreadPoolExecutor from functools import lru_cache from typing import Optional from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes from letta.settings import settings # Eagerly load the cryptography backend at module import time. _CRYPTO_BACKEND = default_backend() # Dedicated thread pool for CPU-intensive crypto operations # Prevents crypto from blocking health checks and other operations _crypto_executor = ThreadPoolExecutor(max_workers=8, thread_name_prefix="CryptoWorker") # Common API key prefixes that should not be considered encrypted # These are plaintext credentials that happen to be long strings PLAINTEXT_PREFIXES = ( "sk-", # OpenAI, Anthropic "pk-", # Public keys "api-", # Generic API keys "key-", # Generic keys "token-", # Generic tokens "Bearer ", # Auth headers "xoxb-", # Slack bot tokens "xoxp-", # Slack user tokens "ghp_", # GitHub personal access tokens "gho_", # GitHub OAuth tokens "ghu_", # GitHub user-to-server tokens "ghs_", # GitHub server-to-server tokens "ghr_", # GitHub refresh tokens "AKIA", # AWS access key IDs "ABIA", # AWS STS tokens "ACCA", # AWS CloudFront "ASIA", # AWS temporary credentials ) class CryptoUtils: """Utility class for AES-256-GCM encryption/decryption of sensitive data.""" # AES-256 requires 32 bytes key KEY_SIZE = 32 # GCM standard IV size is 12 bytes (96 bits) IV_SIZE = 12 # GCM tag size is 16 bytes (128 bits) TAG_SIZE = 16 # Salt size for key derivation SALT_SIZE = 16 # WARNING: DO NOT CHANGE THIS VALUE UNLESS YOU ARE SURE WHAT YOU ARE DOING # EXISTING ENCRYPTED SECRETS MUST BE DECRYPTED WITH THE SAME ITERATIONS # Number of PBKDF2 iterations PBKDF2_ITERATIONS = 100000 @classmethod @lru_cache(maxsize=256) def _derive_key_cached(cls, master_key: str, salt: bytes) -> bytes: """ Derive an AES key from the master key using PBKDF2 with caching. This is a CPU-intensive operation (100k iterations of PBKDF2-HMAC-SHA256) that can take 100-500ms. Results are cached since key derivation is deterministic. Uses Python's standard hashlib.pbkdf2_hmac which produces identical output to the cryptography library's PBKDF2HMAC for the same parameters. WARNING: This is a synchronous blocking operation. Use _derive_key_async() in async contexts to avoid blocking the event loop. """ return hashlib.pbkdf2_hmac( hash_name="sha256", password=master_key.encode(), salt=salt, iterations=cls.PBKDF2_ITERATIONS, dklen=cls.KEY_SIZE, ) @classmethod def _derive_key(cls, master_key: str, salt: bytes) -> bytes: """Derive an AES key from the master key using PBKDF2 (cached).""" return cls._derive_key_cached(master_key, salt) @classmethod async def _derive_key_async(cls, master_key: str, salt: bytes) -> bytes: """ Async version of _derive_key that runs PBKDF2 in a dedicated thread pool. Uses a dedicated crypto thread pool (8 workers) to prevent PBKDF2 operations from exhausting the default ThreadPoolExecutor (16 threads) and blocking health checks and other operations during high load. PBKDF2 with 100k iterations typically takes 100-500ms per operation. """ loop = asyncio.get_running_loop() return await loop.run_in_executor(_crypto_executor, cls._derive_key, master_key, salt) @classmethod def encrypt(cls, plaintext: str, master_key: Optional[str] = None) -> str: """ Encrypt a string using AES-256-GCM (synchronous version). WARNING: This performs CPU-intensive PBKDF2 key derivation that can block for 100-500ms. Use encrypt_async() in async contexts to avoid blocking the event loop. Args: plaintext: The string to encrypt master_key: Optional master key (defaults to settings.encryption_key) Returns: Base64 encoded string containing: salt + iv + ciphertext + tag Raises: ValueError: If no encryption key is configured """ if master_key is None: master_key = settings.encryption_key if not master_key: raise ValueError( "No encryption key configured. Please set the LETTA_ENCRYPTION_KEY environment variable (not fully supported yet for Letta v0.12.1 and below)." ) # Generate random salt and IV salt = os.urandom(cls.SALT_SIZE) iv = os.urandom(cls.IV_SIZE) # Derive key from master key (CPU-intensive, but cached) key = cls._derive_key(master_key, salt) # Create cipher cipher = Cipher(algorithms.AES(key), modes.GCM(iv), backend=_CRYPTO_BACKEND) encryptor = cipher.encryptor() # Encrypt the plaintext ciphertext = encryptor.update(plaintext.encode()) + encryptor.finalize() # Get the authentication tag tag = encryptor.tag # Combine salt + iv + ciphertext + tag encrypted_data = salt + iv + ciphertext + tag # Return as base64 encoded string return base64.b64encode(encrypted_data).decode("utf-8") @classmethod async def encrypt_async(cls, plaintext: str, master_key: Optional[str] = None) -> str: """ Encrypt a string using AES-256-GCM (async version). Runs the CPU-intensive PBKDF2 key derivation in a thread pool to avoid blocking the event loop. Args: plaintext: The string to encrypt master_key: Optional master key (defaults to settings.encryption_key) Returns: Base64 encoded string containing: salt + iv + ciphertext + tag Raises: ValueError: If no encryption key is configured """ if master_key is None: master_key = settings.encryption_key if not master_key: raise ValueError( "No encryption key configured. Please set the LETTA_ENCRYPTION_KEY environment variable (not fully supported yet for Letta v0.12.1 and below)." ) # Generate random salt and IV salt = os.urandom(cls.SALT_SIZE) iv = os.urandom(cls.IV_SIZE) # Derive key from master key (async to avoid blocking) key = await cls._derive_key_async(master_key, salt) # Create cipher cipher = Cipher(algorithms.AES(key), modes.GCM(iv), backend=_CRYPTO_BACKEND) encryptor = cipher.encryptor() # Encrypt the plaintext ciphertext = encryptor.update(plaintext.encode()) + encryptor.finalize() # Get the authentication tag tag = encryptor.tag # Combine salt + iv + ciphertext + tag encrypted_data = salt + iv + ciphertext + tag # Return as base64 encoded string return base64.b64encode(encrypted_data).decode("utf-8") @classmethod def decrypt(cls, encrypted: str, master_key: Optional[str] = None) -> str: """ Decrypt a string that was encrypted using AES-256-GCM (synchronous version). WARNING: This performs CPU-intensive PBKDF2 key derivation that can block for 100-500ms. Use decrypt_async() in async contexts to avoid blocking the event loop. Args: encrypted: Base64 encoded encrypted string master_key: Optional master key (defaults to settings.encryption_key) Returns: The decrypted plaintext string Raises: ValueError: If no encryption key is configured or decryption fails """ if master_key is None: master_key = settings.encryption_key if not master_key: raise ValueError( "No encryption key configured. Please set the LETTA_ENCRYPTION_KEY environment variable (not fully supported yet for Letta v0.12.1 and below)." ) try: # Decode from base64 encrypted_data = base64.b64decode(encrypted) # Extract components salt = encrypted_data[: cls.SALT_SIZE] iv = encrypted_data[cls.SALT_SIZE : cls.SALT_SIZE + cls.IV_SIZE] ciphertext = encrypted_data[cls.SALT_SIZE + cls.IV_SIZE : -cls.TAG_SIZE] tag = encrypted_data[-cls.TAG_SIZE :] # Derive key from master key (CPU-intensive, but cached) key = cls._derive_key(master_key, salt) # Create cipher cipher = Cipher(algorithms.AES(key), modes.GCM(iv, tag), backend=_CRYPTO_BACKEND) decryptor = cipher.decryptor() # Decrypt the ciphertext plaintext = decryptor.update(ciphertext) + decryptor.finalize() return plaintext.decode("utf-8") except Exception as e: raise ValueError(f"Failed to decrypt data: {str(e)}") @classmethod async def decrypt_async(cls, encrypted: str, master_key: Optional[str] = None) -> str: """ Decrypt a string that was encrypted using AES-256-GCM (async version). Runs the CPU-intensive PBKDF2 key derivation in a thread pool to avoid blocking the event loop. Args: encrypted: Base64 encoded encrypted string master_key: Optional master key (defaults to settings.encryption_key) Returns: The decrypted plaintext string Raises: ValueError: If no encryption key is configured or decryption fails """ if master_key is None: master_key = settings.encryption_key if not master_key: raise ValueError( "No encryption key configured. Please set the LETTA_ENCRYPTION_KEY environment variable (not fully supported yet for Letta v0.12.1 and below)." ) try: # Decode from base64 encrypted_data = base64.b64decode(encrypted) # Extract components salt = encrypted_data[: cls.SALT_SIZE] iv = encrypted_data[cls.SALT_SIZE : cls.SALT_SIZE + cls.IV_SIZE] ciphertext = encrypted_data[cls.SALT_SIZE + cls.IV_SIZE : -cls.TAG_SIZE] tag = encrypted_data[-cls.TAG_SIZE :] # Derive key from master key (async to avoid blocking) key = await cls._derive_key_async(master_key, salt) # Create cipher cipher = Cipher(algorithms.AES(key), modes.GCM(iv, tag), backend=_CRYPTO_BACKEND) decryptor = cipher.decryptor() # Decrypt the ciphertext plaintext = decryptor.update(ciphertext) + decryptor.finalize() return plaintext.decode("utf-8") except Exception as e: raise ValueError(f"Failed to decrypt data: {str(e)}") @classmethod def is_encrypted(cls, value: str) -> bool: """ Check if a string appears to be encrypted (base64 encoded with correct size). This is a heuristic check that excludes common API key patterns to reduce false positives. Strings matching known API key prefixes are assumed to be plaintext credentials, not encrypted values. """ # Exclude strings that look like known API key formats if any(value.startswith(prefix) for prefix in PLAINTEXT_PREFIXES): return False try: decoded = base64.b64decode(value) # Check if length is consistent with our encryption format # Minimum size: salt(16) + iv(12) + tag(16) + at least 1 byte of ciphertext return len(decoded) >= cls.SALT_SIZE + cls.IV_SIZE + cls.TAG_SIZE + 1 except Exception: return False @classmethod def is_encryption_available(cls) -> bool: """ Check if encryption is available (encryption key is configured). Returns: True if encryption key is configured, False otherwise """ return bool(settings.encryption_key)