From f1408a3ce17d4f86008540c48a6edd8d315210e6 Mon Sep 17 00:00:00 2001
From: Charles Packer <packercharles@gmail.com>
Date: Sat, 31 Jan 2026 20:12:48 -0800
Subject: [PATCH] fix(cli): smarter mojibake detection to preserve valid
 Unicode (#764)

Co-authored-by: Letta <noreply@letta.com>
---
 src/cli/components/Text.tsx | 70 ++++++++++++++++++++++++++++---------
 1 file changed, 54 insertions(+), 16 deletions(-)

diff --git a/src/cli/components/Text.tsx b/src/cli/components/Text.tsx
index aa125d2..0fd0ebe 100644
--- a/src/cli/components/Text.tsx
+++ b/src/cli/components/Text.tsx
@@ -4,6 +4,56 @@ import type { ReactNode } from "react";
 const isBun = typeof Bun !== "undefined";
 const decoder = new TextDecoder("utf-8", { fatal: false });
 
+function isContinuationByte(byte: number): boolean {
+  return byte >= 0x80 && byte <= 0xbf;
+}
+
+function looksLikeMojibake(value: string): boolean {
+  let sawUtf8Sequence = false;
+
+  for (let i = 0; i < value.length; i++) {
+    const byte = value.charCodeAt(i);
+
+    // If any code unit is outside byte range, it's real Unicode already.
+    if (byte > 0xff) return false;
+
+    if (byte >= 0xc2 && byte <= 0xdf) {
+      if (i + 1 < value.length && isContinuationByte(value.charCodeAt(i + 1))) {
+        sawUtf8Sequence = true;
+        i += 1;
+        continue;
+      }
+    }
+
+    if (byte >= 0xe0 && byte <= 0xef) {
+      if (
+        i + 2 < value.length &&
+        isContinuationByte(value.charCodeAt(i + 1)) &&
+        isContinuationByte(value.charCodeAt(i + 2))
+      ) {
+        sawUtf8Sequence = true;
+        i += 2;
+        continue;
+      }
+    }
+
+    if (byte >= 0xf0 && byte <= 0xf4) {
+      if (
+        i + 3 < value.length &&
+        isContinuationByte(value.charCodeAt(i + 1)) &&
+        isContinuationByte(value.charCodeAt(i + 2)) &&
+        isContinuationByte(value.charCodeAt(i + 3))
+      ) {
+        sawUtf8Sequence = true;
+        i += 3;
+        continue;
+      }
+    }
+  }
+
+  return sawUtf8Sequence;
+}
+
 function fixBunEncoding(value: ReactNode): ReactNode {
   if (!isBun) return value;
 
@@ -11,25 +61,13 @@ function fixBunEncoding(value: ReactNode): ReactNode {
     // Quick check: if no non-ASCII characters, return as-is
     if (!/[\x80-\xFF]/.test(value)) return value;
 
-    const bytes: number[] = [];
+    if (!looksLikeMojibake(value)) return value;
 
+    const bytes = new Uint8Array(value.length);
     for (let i = 0; i < value.length; i++) {
-      const code = value.charCodeAt(i);
-
-      // Check for 2-byte UTF-8 sequence: 0xC2 followed by 0x80-0xBF
-      if (code === 0xc2 && i + 1 < value.length) {
-        const nextCode = value.charCodeAt(i + 1);
-        if (nextCode >= 0x80 && nextCode <= 0xbf) {
-          bytes.push(0xc2, nextCode);
-          i++;
-          continue;
-        }
-      }
-
-      bytes.push(code);
+      bytes[i] = value.charCodeAt(i);
     }
-
-    return decoder.decode(new Uint8Array(bytes));
+    return decoder.decode(bytes);
   }
 
   // Handle arrays of children