fix: exclude common API key prefixes from encryption detection (#6624)

* fix: exclude common API key prefixes from encryption detection

Add a list of known API key prefixes (OpenAI, Anthropic, GitHub, AWS,
Slack, etc.) to prevent is_encrypted() from incorrectly identifying
plaintext credentials as encrypted values.

* update

* test
This commit is contained in:
jnjpng
2025-12-10 18:16:58 -08:00
committed by Caren Thomas
parent 15cede7281
commit 17a90538ca
2 changed files with 135 additions and 1 deletions

View File

@@ -9,6 +9,28 @@ from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
from letta.settings import settings
# Common API key prefixes that should not be considered encrypted
# These are plaintext credentials that happen to be long strings
PLAINTEXT_PREFIXES = (
"sk-", # OpenAI, Anthropic
"pk-", # Public keys
"api-", # Generic API keys
"key-", # Generic keys
"token-", # Generic tokens
"Bearer ", # Auth headers
"xoxb-", # Slack bot tokens
"xoxp-", # Slack user tokens
"ghp_", # GitHub personal access tokens
"gho_", # GitHub OAuth tokens
"ghu_", # GitHub user-to-server tokens
"ghs_", # GitHub server-to-server tokens
"ghr_", # GitHub refresh tokens
"AKIA", # AWS access key IDs
"ABIA", # AWS STS tokens
"ACCA", # AWS CloudFront
"ASIA", # AWS temporary credentials
)
class CryptoUtils:
"""Utility class for AES-256-GCM encryption/decryption of sensitive data."""
@@ -127,8 +149,14 @@ class CryptoUtils:
"""
Check if a string appears to be encrypted (base64 encoded with correct size).
This is a heuristic check and may have false positives.
This is a heuristic check that excludes common API key patterns to reduce
false positives. Strings matching known API key prefixes are assumed to be
plaintext credentials, not encrypted values.
"""
# Exclude strings that look like known API key formats
if any(value.startswith(prefix) for prefix in PLAINTEXT_PREFIXES):
return False
try:
decoded = base64.b64decode(value)
# Check if length is consistent with our encryption format

View File

@@ -233,3 +233,109 @@ class TestCryptoUtils:
encrypted = CryptoUtils.encrypt(plaintext, self.MOCK_KEY_1)
decrypted = CryptoUtils.decrypt(encrypted, self.MOCK_KEY_1)
assert decrypted == plaintext, f"Whitespace handling failed for: {repr(plaintext)}"
class TestIsEncrypted:
"""Test suite for is_encrypted heuristic detection."""
MOCK_KEY = "test-master-key-1234567890abcdef"
def test_actually_encrypted_values_detected(self):
"""Test that actually encrypted values are correctly identified."""
test_values = ["short", "medium length string", "a"]
for plaintext in test_values:
encrypted = CryptoUtils.encrypt(plaintext, self.MOCK_KEY)
assert CryptoUtils.is_encrypted(encrypted), f"Failed to detect encrypted value for: {plaintext}"
def test_openai_api_keys_not_detected(self):
"""Test that OpenAI API keys are not detected as encrypted."""
openai_keys = [
"sk-1234567890abcdefghijklmnopqrstuvwxyz1234567890ab",
"sk-proj-1234567890abcdefghijklmnopqrstuvwxyz",
"sk-ant-api03-1234567890abcdefghijklmnopqrstuvwxyz",
]
for key in openai_keys:
assert not CryptoUtils.is_encrypted(key), f"OpenAI key incorrectly detected as encrypted: {key}"
def test_github_tokens_not_detected(self):
"""Test that GitHub tokens are not detected as encrypted."""
github_tokens = [
"ghp_1234567890abcdefghijklmnopqrstuvwxyz",
"gho_1234567890abcdefghijklmnopqrstuvwxyz",
"ghu_1234567890abcdefghijklmnopqrstuvwxyz",
"ghs_1234567890abcdefghijklmnopqrstuvwxyz",
"ghr_1234567890abcdefghijklmnopqrstuvwxyz",
]
for token in github_tokens:
assert not CryptoUtils.is_encrypted(token), f"GitHub token incorrectly detected as encrypted: {token}"
def test_aws_keys_not_detected(self):
"""Test that AWS access keys are not detected as encrypted."""
aws_keys = [
"AKIAIOSFODNN7EXAMPLE",
"ASIAJEXAMPLEXEG2JICEA",
"ABIA1234567890ABCDEF",
"ACCA1234567890ABCDEF",
]
for key in aws_keys:
assert not CryptoUtils.is_encrypted(key), f"AWS key incorrectly detected as encrypted: {key}"
def test_slack_tokens_not_detected(self):
"""Test that Slack tokens are not detected as encrypted."""
slack_tokens = [
"xoxb-1234567890-1234567890123-abcdefghijklmnopqrstuvwx",
"xoxp-1234567890-1234567890123-1234567890123-abcdefghij",
]
for token in slack_tokens:
assert not CryptoUtils.is_encrypted(token), f"Slack token incorrectly detected as encrypted: {token}"
def test_bearer_tokens_not_detected(self):
"""Test that Bearer tokens are not detected as encrypted."""
bearer_tokens = [
"Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U",
"Bearer some-long-token-string-1234567890abcdefghijklmnop",
]
for token in bearer_tokens:
assert not CryptoUtils.is_encrypted(token), f"Bearer token incorrectly detected as encrypted: {token}"
def test_generic_prefixes_not_detected(self):
"""Test that strings with generic API key prefixes are not detected as encrypted."""
generic_keys = [
"pk-1234567890abcdefghijklmnopqrstuvwxyz",
"api-1234567890abcdefghijklmnopqrstuvwxyz",
"key-1234567890abcdefghijklmnopqrstuvwxyz",
"token-1234567890abcdefghijklmnopqrstuvwxyz",
]
for key in generic_keys:
assert not CryptoUtils.is_encrypted(key), f"Generic key incorrectly detected as encrypted: {key}"
def test_short_strings_not_detected(self):
"""Test that short strings are not detected as encrypted."""
short_strings = ["short", "abc", "1234567890", ""]
for s in short_strings:
assert not CryptoUtils.is_encrypted(s), f"Short string incorrectly detected as encrypted: {s}"
def test_invalid_base64_not_detected(self):
"""Test that invalid base64 strings are not detected as encrypted."""
invalid_strings = [
"not-valid-base64!@#$",
"spaces are invalid",
"special!chars@here",
]
for s in invalid_strings:
assert not CryptoUtils.is_encrypted(s), f"Invalid base64 incorrectly detected as encrypted: {s}"
def test_valid_base64_but_too_short_not_detected(self):
"""Test that valid base64 strings that are too short are not detected."""
# base64 encode something short (less than SALT + IV + TAG + 1 = 45 bytes)
short_data = base64.b64encode(b"x" * 40).decode()
assert not CryptoUtils.is_encrypted(short_data)