fix: remove overly aggressive binary file detection (#464)

Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
Charles Packer
2026-01-04 21:40:49 -08:00
committed by GitHub
parent 36dcb891b0
commit e21bfb9b31

View File

@@ -28,26 +28,18 @@ async function isBinaryFile(filePath: string): Promise<boolean> {
if (buffer[i] === 0) return true;
}
// Try to decode as UTF-8 and check if valid
try {
const text = buffer.slice(0, bytesRead).toString("utf-8");
// Check for replacement characters (indicates invalid UTF-8)
if (text.includes("\uFFFD")) return true;
// Count control characters (excluding whitespace)
let controlCharCount = 0;
for (let i = 0; i < text.length; i++) {
const code = text.charCodeAt(i);
// Allow tab(9), newline(10), carriage return(13)
if (code < 9 || (code > 13 && code < 32)) {
controlCharCount++;
}
// Count control characters (excluding whitespace)
// This catches files that are mostly control characters but lack null bytes
const text = buffer.slice(0, bytesRead).toString("utf-8");
let controlCharCount = 0;
for (let i = 0; i < text.length; i++) {
const code = text.charCodeAt(i);
// Allow tab(9), newline(10), carriage return(13)
if (code < 9 || (code > 13 && code < 32)) {
controlCharCount++;
}
return controlCharCount / text.length > 0.3;
} catch {
// Invalid UTF-8 = binary
return true;
}
return controlCharCount / text.length > 0.3;
} finally {
await fd.close();
}