From 9a49bec70d22ca8a76e8fe1aa9292fd705e2bf3e Mon Sep 17 00:00:00 2001
From: Michael Ward <michael@scripthungry.com>
Date: Tue, 17 Feb 2026 20:00:30 +0000
Subject: [PATCH 1/5] Extracts UTF-8 code point decoding to the BitString
 utility class, adds parameter validation

---
 assets/js/bitstring.mjs            |  21 ++++
 assets/js/erlang/unicode.mjs       | 149 +----------------------------
 test/javascript/bitstring_test.mjs |  37 +++++++
 3 files changed, 63 insertions(+), 144 deletions(-)

diff --git a/assets/js/bitstring.mjs b/assets/js/bitstring.mjs
index 40168b7a9..f4c9b8695 100644
--- a/assets/js/bitstring.mjs
+++ b/assets/js/bitstring.mjs
@@ -247,6 +247,27 @@ export default class Bitstring {
     }
   }
 
+  // Decodes a UTF-8 sequence starting at the given position.
+  // Returns the decoded Unicode code point value.
+  // bytes: Uint8Array containing the UTF-8 encoded data
+  // start: byte index where the sequence begins
+  // length: number of bytes in the UTF-8 sequence (1-4)
+  static decodeUtf8CodePoint(bytes, start, length) {
+    if (length === 1) return bytes[start];
+
+    // First byte masks: 2-byte=0x1f, 3-byte=0x0f, 4-byte=0x07
+    const firstByteMasks = {2: 0x1f, 3: 0x0f, 4: 0x07};
+
+    let codePoint = bytes[start] & firstByteMasks[length];
+
+    // Process continuation bytes (all use 0x3f mask, shift by 6 each)
+    for (let i = 1; i < length; i++) {
+      codePoint = (codePoint << 6) | (bytes[start + i] & 0x3f);
+    }
+
+    return codePoint;
+  }
+
   static fromBits(bits) {
     const bitCount = bits.length;
     const byteCount = Math.ceil(bitCount / 8);
diff --git a/assets/js/erlang/unicode.mjs b/assets/js/erlang/unicode.mjs
index 8479f3528..c47b0b2e2 100644
--- a/assets/js/erlang/unicode.mjs
+++ b/assets/js/erlang/unicode.mjs
@@ -98,34 +98,6 @@ const Erlang_Unicode = {
       // Checks if a byte is a valid UTF-8 continuation byte (10xxxxxx).
       const isValidContinuation = (byte) => (byte & 0xc0) === 0x80;
 
-      // Decodes a UTF-8 sequence starting at the given position.
-      // Returns the decoded Unicode code point value.
-      const decodeCodePoint = (start, length) => {
-        if (length === 1) {
-          return bytes[start];
-        }
-
-        if (length === 2) {
-          return ((bytes[start] & 0x1f) << 6) | (bytes[start + 1] & 0x3f);
-        }
-
-        if (length === 3) {
-          return (
-            ((bytes[start] & 0x0f) << 12) |
-            ((bytes[start + 1] & 0x3f) << 6) |
-            (bytes[start + 2] & 0x3f)
-          );
-        }
-
-        // length === 4
-        return (
-          ((bytes[start] & 0x07) << 18) |
-          ((bytes[start + 1] & 0x3f) << 12) |
-          ((bytes[start + 2] & 0x3f) << 6) |
-          (bytes[start + 3] & 0x3f)
-        );
-      };
-
       // Validates that a code point is within UTF-8 rules:
       // - Not an overlong encoding (using more bytes than necessary)
       // - Not a UTF-16 surrogate (U+D800–U+DFFF)
@@ -156,7 +128,7 @@ const Erlang_Unicode = {
         }
 
         // Decode and validate the code point value
-        const codePoint = decodeCodePoint(start, length);
+        const codePoint = Bitstring.decodeUtf8CodePoint(bytes, start, length);
 
         return isValidCodePoint(codePoint, length);
       };
@@ -387,34 +359,6 @@ const Erlang_Unicode = {
       // Checks if a byte is a valid UTF-8 continuation byte (10xxxxxx).
       const isValidContinuation = (byte) => (byte & 0xc0) === 0x80;
 
-      // Decodes a UTF-8 sequence starting at the given position.
-      // Returns the decoded Unicode code point value.
-      const decodeCodePoint = (start, length) => {
-        if (length === 1) {
-          return bytes[start];
-        }
-
-        if (length === 2) {
-          return ((bytes[start] & 0x1f) << 6) | (bytes[start + 1] & 0x3f);
-        }
-
-        if (length === 3) {
-          return (
-            ((bytes[start] & 0x0f) << 12) |
-            ((bytes[start + 1] & 0x3f) << 6) |
-            (bytes[start + 2] & 0x3f)
-          );
-        }
-
-        // length === 4
-        return (
-          ((bytes[start] & 0x07) << 18) |
-          ((bytes[start + 1] & 0x3f) << 12) |
-          ((bytes[start + 2] & 0x3f) << 6) |
-          (bytes[start + 3] & 0x3f)
-        );
-      };
-
       // Validates that a code point is within UTF-8 rules:
       // - Not an overlong encoding (using more bytes than necessary)
       // - Not a UTF-16 surrogate (U+D800–U+DFFF)
@@ -445,7 +389,7 @@ const Erlang_Unicode = {
         }
 
         // Decode and validate the code point value
-        const codePoint = decodeCodePoint(start, length);
+        const codePoint = Bitstring.decodeUtf8CodePoint(bytes, start, length);
 
         return isValidCodePoint(codePoint, length);
       };
@@ -703,33 +647,6 @@ const Erlang_Unicode = {
       // Checks if a byte is a valid UTF-8 continuation byte (10xxxxxx).
       const isValidContinuation = (byte) => (byte & 0xc0) === 0x80;
 
-      // Decodes a UTF-8 sequence starting at the given position.
-      // Returns the decoded Unicode code point value.
-      const decodeCodePoint = (start, length) => {
-        if (length === 1) {
-          return bytes[start];
-        }
-
-        if (length === 2) {
-          return ((bytes[start] & 0x1f) << 6) | (bytes[start + 1] & 0x3f);
-        }
-
-        if (length === 3) {
-          return (
-            ((bytes[start] & 0x0f) << 12) |
-            ((bytes[start + 1] & 0x3f) << 6) |
-            (bytes[start + 2] & 0x3f)
-          );
-        }
-        // length === 4
-        return (
-          ((bytes[start] & 0x07) << 18) |
-          ((bytes[start + 1] & 0x3f) << 12) |
-          ((bytes[start + 2] & 0x3f) << 6) |
-          (bytes[start + 3] & 0x3f)
-        );
-      };
-
       // Validates that a code point is within UTF-8 rules:
       // - Not an overlong encoding (using more bytes than necessary)
       // - Not a UTF-16 surrogate (U+D800–U+DFFF)
@@ -760,7 +677,7 @@ const Erlang_Unicode = {
         }
 
         // Decode and validate the code point value
-        const codePoint = decodeCodePoint(start, length);
+        const codePoint = Bitstring.decodeUtf8CodePoint(bytes, start, length);
 
         return isValidCodePoint(codePoint, length);
       };
@@ -870,34 +787,6 @@ const Erlang_Unicode = {
       // Checks if a byte is a valid UTF-8 continuation byte (10xxxxxx).
       const isValidContinuation = (byte) => (byte & 0xc0) === 0x80;
 
-      // Decodes a UTF-8 sequence starting at the given position.
-      // Returns the decoded Unicode code point value.
-      const decodeCodePoint = (start, length) => {
-        if (length === 1) {
-          return bytes[start];
-        }
-
-        if (length === 2) {
-          return ((bytes[start] & 0x1f) << 6) | (bytes[start + 1] & 0x3f);
-        }
-
-        if (length === 3) {
-          return (
-            ((bytes[start] & 0x0f) << 12) |
-            ((bytes[start + 1] & 0x3f) << 6) |
-            (bytes[start + 2] & 0x3f)
-          );
-        }
-
-        // length === 4
-        return (
-          ((bytes[start] & 0x07) << 18) |
-          ((bytes[start + 1] & 0x3f) << 12) |
-          ((bytes[start + 2] & 0x3f) << 6) |
-          (bytes[start + 3] & 0x3f)
-        );
-      };
-
       // Validates that a code point is within UTF-8 rules:
       // - Not an overlong encoding (using more bytes than necessary)
       // - Not a UTF-16 surrogate (U+D800–U+DFFF)
@@ -928,7 +817,7 @@ const Erlang_Unicode = {
         }
 
         // Decode and validate the code point value
-        const codePoint = decodeCodePoint(start, length);
+        const codePoint = Bitstring.decodeUtf8CodePoint(bytes, start, length);
         return isValidCodePoint(codePoint, length);
       };
 
@@ -1036,34 +925,6 @@ const Erlang_Unicode = {
       // Checks if a byte is a valid UTF-8 continuation byte (10xxxxxx).
       const isValidContinuation = (byte) => (byte & 0xc0) === 0x80;
 
-      // Decodes a UTF-8 sequence starting at the given position.
-      // Returns the decoded Unicode code point value.
-      const decodeCodePoint = (start, length) => {
-        if (length === 1) {
-          return bytes[start];
-        }
-
-        if (length === 2) {
-          return ((bytes[start] & 0x1f) << 6) | (bytes[start + 1] & 0x3f);
-        }
-
-        if (length === 3) {
-          return (
-            ((bytes[start] & 0x0f) << 12) |
-            ((bytes[start + 1] & 0x3f) << 6) |
-            (bytes[start + 2] & 0x3f)
-          );
-        }
-
-        // length === 4
-        return (
-          ((bytes[start] & 0x07) << 18) |
-          ((bytes[start + 1] & 0x3f) << 12) |
-          ((bytes[start + 2] & 0x3f) << 6) |
-          (bytes[start + 3] & 0x3f)
-        );
-      };
-
       // Validates that a code point is within UTF-8 rules:
       // - Not an overlong encoding (using more bytes than necessary)
       // - Not a UTF-16 surrogate (U+D800–U+DFFF)
@@ -1096,7 +957,7 @@ const Erlang_Unicode = {
         }
 
         // Decode and validate the code point value
-        const codePoint = decodeCodePoint(start, length);
+        const codePoint = Bitstring.decodeUtf8CodePoint(bytes, start, length);
 
         return isValidCodePoint(codePoint, length);
       };
diff --git a/test/javascript/bitstring_test.mjs b/test/javascript/bitstring_test.mjs
index d822d1ffa..88b725447 100644
--- a/test/javascript/bitstring_test.mjs
+++ b/test/javascript/bitstring_test.mjs
@@ -1231,6 +1231,43 @@ describe("Bitstring", () => {
     });
   });
 
+  describe("decodeUtf8CodePoint()", () => {
+    it("decodes 1-byte UTF-8 sequence (ASCII)", () => {
+      // 'A' = 0x41 = U+0041
+      const bytes = new Uint8Array([0x41]);
+      const codePoint = Bitstring.decodeUtf8CodePoint(bytes, 0, 1);
+      assert.equal(codePoint, 0x41);
+    });
+
+    it("decodes 2-byte UTF-8 sequence", () => {
+      // '£' = 0xC2 0xA3 = U+00A3 (pound sign)
+      const bytes = new Uint8Array([0xc2, 0xa3]);
+      const codePoint = Bitstring.decodeUtf8CodePoint(bytes, 0, 2);
+      assert.equal(codePoint, 0xa3);
+    });
+
+    it("decodes 3-byte UTF-8 sequence", () => {
+      // '€' = 0xE2 0x82 0xAC = U+20AC (euro sign)
+      const bytes = new Uint8Array([0xe2, 0x82, 0xac]);
+      const codePoint = Bitstring.decodeUtf8CodePoint(bytes, 0, 3);
+      assert.equal(codePoint, 0x20ac);
+    });
+
+    it("decodes 4-byte UTF-8 sequence", () => {
+      // '𐍈' = 0xF0 0x90 0x8D 0x88 = U+10348 (Gothic letter hwair)
+      const bytes = new Uint8Array([0xf0, 0x90, 0x8d, 0x88]);
+      const codePoint = Bitstring.decodeUtf8CodePoint(bytes, 0, 4);
+      assert.equal(codePoint, 0x10348);
+    });
+
+    it("decodes from non-zero start position", () => {
+      // Test decoding '£' starting at position 2
+      const bytes = new Uint8Array([0x41, 0x42, 0xc2, 0xa3]);
+      const codePoint = Bitstring.decodeUtf8CodePoint(bytes, 2, 2);
+      assert.equal(codePoint, 0xa3);
+    });
+  });
+
   describe("fromBits()", () => {
     it("empty", () => {
       const result = Bitstring.fromBits([]);

From 105d2d91dd0e3bc511079f260848483f0c378872 Mon Sep 17 00:00:00 2001
From: Michael Ward <michael@scripthungry.com>
Date: Wed, 18 Feb 2026 15:01:43 +0000
Subject: [PATCH 2/5] Extracts UTF-8 continuation byte validation to BitString
 class

---
 assets/js/bitstring.mjs            |  5 +++++
 assets/js/erlang/unicode.mjs       | 32 ++++++++++--------------------
 test/javascript/bitstring_test.mjs | 14 +++++++++++++
 3 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/assets/js/bitstring.mjs b/assets/js/bitstring.mjs
index f4c9b8695..881118d75 100644
--- a/assets/js/bitstring.mjs
+++ b/assets/js/bitstring.mjs
@@ -593,6 +593,11 @@ export default class Bitstring {
     return bitstring.text !== false;
   }
 
+  // Checks if a byte is a valid UTF-8 continuation byte (10xxxxxx).
+  static isValidUtf8ContinuationByte(byte) {
+    return (byte & 0xc0) === 0x80;
+  }
+
   static maybeResolveHex(bitstring) {
     if (bitstring.hex === null) {
       $.maybeSetBytesFromText(bitstring);
diff --git a/assets/js/erlang/unicode.mjs b/assets/js/erlang/unicode.mjs
index c47b0b2e2..e54076495 100644
--- a/assets/js/erlang/unicode.mjs
+++ b/assets/js/erlang/unicode.mjs
@@ -95,9 +95,6 @@ const Erlang_Unicode = {
     // and rejecting overlong encodings, surrogates, and out-of-range values.
     // Time complexity: O(n) where n is the number of bytes.
     const findValidUtf8Length = (bytes) => {
-      // Checks if a byte is a valid UTF-8 continuation byte (10xxxxxx).
-      const isValidContinuation = (byte) => (byte & 0xc0) === 0x80;
-
       // Validates that a code point is within UTF-8 rules:
       // - Not an overlong encoding (using more bytes than necessary)
       // - Not a UTF-16 surrogate (U+D800–U+DFFF)
@@ -124,7 +121,8 @@ const Erlang_Unicode = {
 
         // Verify all continuation bytes have correct pattern (10xxxxxx)
         for (let i = 1; i < length; i++) {
-          if (!isValidContinuation(bytes[start + i])) return false;
+          if (!Bitstring.isValidUtf8ContinuationByte(bytes[start + i]))
+            return false;
         }
 
         // Decode and validate the code point value
@@ -146,7 +144,8 @@ const Erlang_Unicode = {
 
         // Check all available continuation bytes
         for (let i = 1; i < availableBytes; i++) {
-          if (!isValidContinuation(bytes[start + i])) return false;
+          if (!Bitstring.isValidUtf8ContinuationByte(bytes[start + i]))
+            return false;
         }
 
         return true;
@@ -356,9 +355,6 @@ const Erlang_Unicode = {
     // and rejecting overlong encodings, surrogates, and out-of-range values.
     // Time complexity: O(n) where n is the number of bytes.
     const findValidUtf8Length = (bytes) => {
-      // Checks if a byte is a valid UTF-8 continuation byte (10xxxxxx).
-      const isValidContinuation = (byte) => (byte & 0xc0) === 0x80;
-
       // Validates that a code point is within UTF-8 rules:
       // - Not an overlong encoding (using more bytes than necessary)
       // - Not a UTF-16 surrogate (U+D800–U+DFFF)
@@ -385,7 +381,8 @@ const Erlang_Unicode = {
 
         // Verify all continuation bytes have correct pattern (10xxxxxx)
         for (let i = 1; i < length; i++) {
-          if (!isValidContinuation(bytes[start + i])) return false;
+          if (!Bitstring.isValidUtf8ContinuationByte(bytes[start + i]))
+            return false;
         }
 
         // Decode and validate the code point value
@@ -644,9 +641,6 @@ const Erlang_Unicode = {
     // and rejecting overlong encodings, surrogates, and out-of-range values.
     // Time complexity: O(n) where n is the number of bytes.
     const findValidUtf8Length = (bytes) => {
-      // Checks if a byte is a valid UTF-8 continuation byte (10xxxxxx).
-      const isValidContinuation = (byte) => (byte & 0xc0) === 0x80;
-
       // Validates that a code point is within UTF-8 rules:
       // - Not an overlong encoding (using more bytes than necessary)
       // - Not a UTF-16 surrogate (U+D800–U+DFFF)
@@ -673,7 +667,8 @@ const Erlang_Unicode = {
 
         // Verify all continuation bytes have correct pattern (10xxxxxx)
         for (let i = 1; i < length; i++) {
-          if (!isValidContinuation(bytes[start + i])) return false;
+          if (!Bitstring.isValidUtf8ContinuationByte(bytes[start + i]))
+            return false;
         }
 
         // Decode and validate the code point value
@@ -784,9 +779,6 @@ const Erlang_Unicode = {
     // and rejecting overlong encodings, surrogates, and out-of-range values.
     // Time complexity: O(n) where n is the number of bytes.
     const findValidUtf8Length = (bytes) => {
-      // Checks if a byte is a valid UTF-8 continuation byte (10xxxxxx).
-      const isValidContinuation = (byte) => (byte & 0xc0) === 0x80;
-
       // Validates that a code point is within UTF-8 rules:
       // - Not an overlong encoding (using more bytes than necessary)
       // - Not a UTF-16 surrogate (U+D800–U+DFFF)
@@ -813,7 +805,8 @@ const Erlang_Unicode = {
 
         // Verify all continuation bytes have correct pattern (10xxxxxx)
         for (let i = 1; i < length; i++) {
-          if (!isValidContinuation(bytes[start + i])) return false;
+          if (!Bitstring.isValidUtf8ContinuationByte(bytes[start + i]))
+            return false;
         }
 
         // Decode and validate the code point value
@@ -922,9 +915,6 @@ const Erlang_Unicode = {
     // and rejecting overlong encodings, surrogates, and out-of-range values.
     // Time complexity: O(n) where n is the number of bytes.
     const findValidUtf8Length = (bytes) => {
-      // Checks if a byte is a valid UTF-8 continuation byte (10xxxxxx).
-      const isValidContinuation = (byte) => (byte & 0xc0) === 0x80;
-
       // Validates that a code point is within UTF-8 rules:
       // - Not an overlong encoding (using more bytes than necessary)
       // - Not a UTF-16 surrogate (U+D800–U+DFFF)
@@ -951,7 +941,7 @@ const Erlang_Unicode = {
 
         // Verify all continuation bytes have correct pattern (10xxxxxx)
         for (let i = 1; i < length; i++) {
-          if (!isValidContinuation(bytes[start + i])) {
+          if (!Bitstring.isValidUtf8ContinuationByte(bytes[start + i])) {
             return false;
           }
         }
diff --git a/test/javascript/bitstring_test.mjs b/test/javascript/bitstring_test.mjs
index 88b725447..0a53ca436 100644
--- a/test/javascript/bitstring_test.mjs
+++ b/test/javascript/bitstring_test.mjs
@@ -5264,6 +5264,20 @@ describe("Bitstring", () => {
     });
   });
 
+  describe("isValidUtf8ContinuationByte()", () => {
+    it("valid continuation byte (10xxxxxx pattern)", () => {
+      assert.isTrue(Bitstring.isValidUtf8ContinuationByte(0x80)); // 10000000
+      assert.isTrue(Bitstring.isValidUtf8ContinuationByte(0xbf)); // 10111111
+    });
+
+    it("invalid continuation byte (not 10xxxxxx pattern)", () => {
+      assert.isFalse(Bitstring.isValidUtf8ContinuationByte(0x00)); // 00000000 (ASCII)
+      assert.isFalse(Bitstring.isValidUtf8ContinuationByte(0x7f)); // 01111111 (ASCII)
+      assert.isFalse(Bitstring.isValidUtf8ContinuationByte(0xc0)); // 11000000 (2-byte start)
+      assert.isFalse(Bitstring.isValidUtf8ContinuationByte(0xff)); // 11111111 (invalid)
+    });
+  });
+
   describe("maybeResolveHex()", () => {
     it("when hex field is already set", () => {
       const bitstring = Type.bitstring("Hologram");

From 7015625b214ae92310278d6d214735ccda8be3fa Mon Sep 17 00:00:00 2001
From: Michael Ward <michael@scripthungry.com>
Date: Wed, 18 Feb 2026 15:33:08 +0000
Subject: [PATCH 3/5] Extracts Utf-8 code point validation to BitString class

---
 assets/js/bitstring.mjs            |  18 ++++++
 assets/js/erlang/unicode.mjs       | 100 ++---------------------------
 test/javascript/bitstring_test.mjs |  30 +++++++++
 3 files changed, 53 insertions(+), 95 deletions(-)

diff --git a/assets/js/bitstring.mjs b/assets/js/bitstring.mjs
index 881118d75..dfbcd8181 100644
--- a/assets/js/bitstring.mjs
+++ b/assets/js/bitstring.mjs
@@ -593,6 +593,24 @@ export default class Bitstring {
     return bitstring.text !== false;
   }
 
+  // Validates that a code point is within UTF-8 rules:
+  // - Not an overlong encoding (using more bytes than necessary)
+  // - Not a UTF-16 surrogate (U+D800–U+DFFF)
+  // - Not above maximum Unicode (U+10FFFF)
+  static isValidUtf8CodePoint(codePoint, encodingLength) {
+    // Check for overlong encodings (security issue)
+    const minValueForLength = {1: 0, 2: 0x80, 3: 0x800, 4: 0x10000};
+
+    // Reject code points that could have been encoded with fewer bytes (overlong)
+    if (codePoint < minValueForLength[encodingLength]) return false;
+    // Reject UTF-16 surrogates (U+D800–U+DFFF)
+    if (codePoint >= 0xd800 && codePoint <= 0xdfff) return false;
+    // Reject code points beyond Unicode range (> U+10FFFF)
+    if (codePoint > 0x10ffff) return false;
+
+    return true;
+  }
+
   // Checks if a byte is a valid UTF-8 continuation byte (10xxxxxx).
   static isValidUtf8ContinuationByte(byte) {
     return (byte & 0xc0) === 0x80;
diff --git a/assets/js/erlang/unicode.mjs b/assets/js/erlang/unicode.mjs
index e54076495..064b25164 100644
--- a/assets/js/erlang/unicode.mjs
+++ b/assets/js/erlang/unicode.mjs
@@ -95,24 +95,6 @@ const Erlang_Unicode = {
     // and rejecting overlong encodings, surrogates, and out-of-range values.
     // Time complexity: O(n) where n is the number of bytes.
     const findValidUtf8Length = (bytes) => {
-      // Validates that a code point is within UTF-8 rules:
-      // - Not an overlong encoding (using more bytes than necessary)
-      // - Not a UTF-16 surrogate (U+D800–U+DFFF)
-      // - Not above maximum Unicode (U+10FFFF)
-      const isValidCodePoint = (codePoint, encodingLength) => {
-        // Check for overlong encodings (security issue)
-        const minValueForLength = [0, 0, 0x80, 0x800, 0x10000];
-        if (codePoint < minValueForLength[encodingLength]) return false;
-
-        // Reject UTF-16 surrogates (U+D800–U+DFFF)
-        if (codePoint >= 0xd800 && codePoint <= 0xdfff) return false;
-
-        // Reject code points beyond Unicode range (> U+10FFFF)
-        if (codePoint > 0x10ffff) return false;
-
-        return true;
-      };
-
       // Validates a complete UTF-8 sequence at the given position.
       // Checks: sufficient bytes, valid continuations, and valid code point.
       const isValidSequence = (start, length) => {
@@ -128,7 +110,7 @@ const Erlang_Unicode = {
         // Decode and validate the code point value
         const codePoint = Bitstring.decodeUtf8CodePoint(bytes, start, length);
 
-        return isValidCodePoint(codePoint, length);
+        return Bitstring.isValidUtf8CodePoint(codePoint, length);
       };
 
       // Checks if there's a truncated (incomplete) sequence at position.
@@ -355,24 +337,6 @@ const Erlang_Unicode = {
     // and rejecting overlong encodings, surrogates, and out-of-range values.
     // Time complexity: O(n) where n is the number of bytes.
     const findValidUtf8Length = (bytes) => {
-      // Validates that a code point is within UTF-8 rules:
-      // - Not an overlong encoding (using more bytes than necessary)
-      // - Not a UTF-16 surrogate (U+D800–U+DFFF)
-      // - Not above maximum Unicode (U+10FFFF)
-      const isValidCodePoint = (codePoint, encodingLength) => {
-        // Check for overlong encodings (security issue)
-        const minValueForLength = [0, 0, 0x80, 0x800, 0x10000];
-        if (codePoint < minValueForLength[encodingLength]) return false;
-
-        // Reject UTF-16 surrogates (U+D800–U+DFFF)
-        if (codePoint >= 0xd800 && codePoint <= 0xdfff) return false;
-
-        // Reject code points beyond Unicode range (> U+10FFFF)
-        if (codePoint > 0x10ffff) return false;
-
-        return true;
-      };
-
       // Validates a complete UTF-8 sequence at the given position.
       // Checks: sufficient bytes, valid continuations, and valid code point.
       const isValidSequence = (start, length) => {
@@ -388,7 +352,7 @@ const Erlang_Unicode = {
         // Decode and validate the code point value
         const codePoint = Bitstring.decodeUtf8CodePoint(bytes, start, length);
 
-        return isValidCodePoint(codePoint, length);
+        return Bitstring.isValidUtf8CodePoint(codePoint, length);
       };
 
       // Main loop: scan forward, validating each sequence
@@ -641,24 +605,6 @@ const Erlang_Unicode = {
     // and rejecting overlong encodings, surrogates, and out-of-range values.
     // Time complexity: O(n) where n is the number of bytes.
     const findValidUtf8Length = (bytes) => {
-      // Validates that a code point is within UTF-8 rules:
-      // - Not an overlong encoding (using more bytes than necessary)
-      // - Not a UTF-16 surrogate (U+D800–U+DFFF)
-      // - Not above maximum Unicode (U+10FFFF)
-      const isValidCodePoint = (codePoint, encodingLength) => {
-        // Check for overlong encodings (security issue)
-        const minValueForLength = [0, 0, 0x80, 0x800, 0x10000];
-        if (codePoint < minValueForLength[encodingLength]) return false;
-
-        // Reject UTF-16 surrogates (U+D800–U+DFFF)
-        if (codePoint >= 0xd800 && codePoint <= 0xdfff) return false;
-
-        // Reject code points beyond Unicode range (> U+10FFFF)
-        if (codePoint > 0x10ffff) return false;
-
-        return true;
-      };
-
       // Validates a complete UTF-8 sequence at the given position.
       // Checks: sufficient bytes, valid continuations, and valid code point.
       const isValidSequence = (start, length) => {
@@ -674,7 +620,7 @@ const Erlang_Unicode = {
         // Decode and validate the code point value
         const codePoint = Bitstring.decodeUtf8CodePoint(bytes, start, length);
 
-        return isValidCodePoint(codePoint, length);
+        return Bitstring.isValidUtf8CodePoint(codePoint, length);
       };
 
       // Main loop: scan forward, validating each sequence
@@ -779,24 +725,6 @@ const Erlang_Unicode = {
     // and rejecting overlong encodings, surrogates, and out-of-range values.
     // Time complexity: O(n) where n is the number of bytes.
     const findValidUtf8Length = (bytes) => {
-      // Validates that a code point is within UTF-8 rules:
-      // - Not an overlong encoding (using more bytes than necessary)
-      // - Not a UTF-16 surrogate (U+D800–U+DFFF)
-      // - Not above maximum Unicode (U+10FFFF)
-      const isValidCodePoint = (codePoint, encodingLength) => {
-        // Check for overlong encodings (security issue)
-        const minValueForLength = [0, 0, 0x80, 0x800, 0x10000];
-        if (codePoint < minValueForLength[encodingLength]) return false;
-
-        // Reject UTF-16 surrogates (U+D800–U+DFFF)
-        if (codePoint >= 0xd800 && codePoint <= 0xdfff) return false;
-
-        // Reject code points beyond Unicode range (> U+10FFFF)
-        if (codePoint > 0x10ffff) return false;
-
-        return true;
-      };
-
       // Validates a complete UTF-8 sequence at the given position.
       // Checks: sufficient bytes, valid continuations, and valid code point.
       const isValidSequence = (start, length) => {
@@ -811,7 +739,7 @@ const Erlang_Unicode = {
 
         // Decode and validate the code point value
         const codePoint = Bitstring.decodeUtf8CodePoint(bytes, start, length);
-        return isValidCodePoint(codePoint, length);
+        return Bitstring.isValidUtf8CodePoint(codePoint, length);
       };
 
       // Main loop: scan forward, validating each sequence
@@ -915,24 +843,6 @@ const Erlang_Unicode = {
     // and rejecting overlong encodings, surrogates, and out-of-range values.
     // Time complexity: O(n) where n is the number of bytes.
     const findValidUtf8Length = (bytes) => {
-      // Validates that a code point is within UTF-8 rules:
-      // - Not an overlong encoding (using more bytes than necessary)
-      // - Not a UTF-16 surrogate (U+D800–U+DFFF)
-      // - Not above maximum Unicode (U+10FFFF)
-      const isValidCodePoint = (codePoint, encodingLength) => {
-        // Check for overlong encodings (security issue)
-        const minValueForLength = [0, 0, 0x80, 0x800, 0x10000];
-        if (codePoint < minValueForLength[encodingLength]) return false;
-
-        // Reject UTF-16 surrogates (U+D800–U+DFFF)
-        if (codePoint >= 0xd800 && codePoint <= 0xdfff) return false;
-
-        // Reject code points beyond Unicode range (> U+10FFFF)
-        if (codePoint > 0x10ffff) return false;
-
-        return true;
-      };
-
       // Validates a complete UTF-8 sequence at the given position.
       // Checks: sufficient bytes, valid continuations, and valid code point.
       const isValidSequence = (start, length) => {
@@ -949,7 +859,7 @@ const Erlang_Unicode = {
         // Decode and validate the code point value
         const codePoint = Bitstring.decodeUtf8CodePoint(bytes, start, length);
 
-        return isValidCodePoint(codePoint, length);
+        return Bitstring.isValidUtf8CodePoint(codePoint, length);
       };
 
       // Main loop: scan forward, validating each sequence
diff --git a/test/javascript/bitstring_test.mjs b/test/javascript/bitstring_test.mjs
index 0a53ca436..6f1916f56 100644
--- a/test/javascript/bitstring_test.mjs
+++ b/test/javascript/bitstring_test.mjs
@@ -5278,6 +5278,36 @@ describe("Bitstring", () => {
     });
   });
 
+  describe("isValidUtf8CodePoint()", () => {
+    it("valid codepoint", () => {
+      assert.isTrue(Bitstring.isValidUtf8CodePoint(0x41, 1)); // ASCII 'A'
+      assert.isTrue(Bitstring.isValidUtf8CodePoint(0xa9, 2)); // © (copyright)
+      assert.isTrue(Bitstring.isValidUtf8CodePoint(0x20ac, 3)); // € (euro)
+      assert.isTrue(Bitstring.isValidUtf8CodePoint(0x10348, 4)); // 𐍈 (Gothic letter)
+      assert.isTrue(Bitstring.isValidUtf8CodePoint(0x10ffff, 4)); // Maximum valid Unicode
+    });
+
+    it("overlong encoding (codepoint too small for encoding length)", () => {
+      // 'A' (0x41) must use 1 byte, not 2
+      assert.isFalse(Bitstring.isValidUtf8CodePoint(0x41, 2));
+      // 0x7FF requires 2 bytes, but attempting 3-byte encoding
+      assert.isFalse(Bitstring.isValidUtf8CodePoint(0x7ff, 3));
+      // 0xFFFF requires 3 bytes, but attempting 4-byte encoding
+      assert.isFalse(Bitstring.isValidUtf8CodePoint(0xffff, 4));
+    });
+
+    it("UTF-16 surrogate (U+D800–U+DFFF)", () => {
+      assert.isFalse(Bitstring.isValidUtf8CodePoint(0xd800, 3)); // Start of surrogate range
+      assert.isFalse(Bitstring.isValidUtf8CodePoint(0xdc00, 3)); // Middle of surrogate range
+      assert.isFalse(Bitstring.isValidUtf8CodePoint(0xdfff, 3)); // End of surrogate range
+    });
+
+    it("beyond Unicode range (> U+10FFFF)", () => {
+      assert.isFalse(Bitstring.isValidUtf8CodePoint(0x110000, 4));
+      assert.isFalse(Bitstring.isValidUtf8CodePoint(0x200000, 4));
+    });
+  });
+
   describe("maybeResolveHex()", () => {
     it("when hex field is already set", () => {
       const bitstring = Type.bitstring("Hologram");

From b9bfe2d681c06d792a3599abf8deebf68d907316 Mon Sep 17 00:00:00 2001
From: Michael Ward <michael@scripthungry.com>
Date: Wed, 18 Feb 2026 16:34:55 +0000
Subject: [PATCH 4/5] Extracts UTF-8 sequence validation to BitString class

---
 assets/js/bitstring.mjs            |  19 +++++
 assets/js/erlang/unicode.mjs       | 130 +++++++----------------------
 test/javascript/bitstring_test.mjs |  84 +++++++++++++++----
 3 files changed, 120 insertions(+), 113 deletions(-)

diff --git a/assets/js/bitstring.mjs b/assets/js/bitstring.mjs
index dfbcd8181..5bee0cc43 100644
--- a/assets/js/bitstring.mjs
+++ b/assets/js/bitstring.mjs
@@ -616,6 +616,25 @@ export default class Bitstring {
     return (byte & 0xc0) === 0x80;
   }
 
+  // Validates a UTF-8 sequence at the given position assuming the leader byte
+  // has already been confirmed valid for `length` (e.g. via getUtf8SequenceLength).
+  // Checks: sufficient bytes, valid continuation bytes, and valid code point.
+  // Precondition: `length` is the value returned by getUtf8SequenceLength(bytes[start]).
+  static isValidUtf8Sequence(bytes, start, length) {
+    // Check if we have enough bytes
+    if (start + length > bytes.length) return false;
+
+    // Verify all continuation bytes have correct pattern (10xxxxxx)
+    for (let i = 1; i < length; i++) {
+      if (!$.isValidUtf8ContinuationByte(bytes[start + i])) return false;
+    }
+
+    // Decode and validate the code point value
+    const codePoint = $.decodeUtf8CodePoint(bytes, start, length);
+
+    return $.isValidUtf8CodePoint(codePoint, length);
+  }
+
   static maybeResolveHex(bitstring) {
     if (bitstring.hex === null) {
       $.maybeSetBytesFromText(bitstring);
diff --git a/assets/js/erlang/unicode.mjs b/assets/js/erlang/unicode.mjs
index 064b25164..c9ee533ba 100644
--- a/assets/js/erlang/unicode.mjs
+++ b/assets/js/erlang/unicode.mjs
@@ -95,24 +95,6 @@ const Erlang_Unicode = {
     // and rejecting overlong encodings, surrogates, and out-of-range values.
     // Time complexity: O(n) where n is the number of bytes.
     const findValidUtf8Length = (bytes) => {
-      // Validates a complete UTF-8 sequence at the given position.
-      // Checks: sufficient bytes, valid continuations, and valid code point.
-      const isValidSequence = (start, length) => {
-        // Check if we have enough bytes
-        if (start + length > bytes.length) return false;
-
-        // Verify all continuation bytes have correct pattern (10xxxxxx)
-        for (let i = 1; i < length; i++) {
-          if (!Bitstring.isValidUtf8ContinuationByte(bytes[start + i]))
-            return false;
-        }
-
-        // Decode and validate the code point value
-        const codePoint = Bitstring.decodeUtf8CodePoint(bytes, start, length);
-
-        return Bitstring.isValidUtf8CodePoint(codePoint, length);
-      };
-
       // Checks if there's a truncated (incomplete) sequence at position.
       // Returns true if bytes could be a valid prefix of a UTF-8 sequence.
       const isTruncatedSequence = (start) => {
@@ -138,7 +120,11 @@ const Erlang_Unicode = {
 
       while (pos < bytes.length) {
         const seqLength = Bitstring.getUtf8SequenceLength(bytes[pos]);
-        if (seqLength === false || !isValidSequence(pos, seqLength)) break;
+        if (
+          seqLength === false ||
+          !Bitstring.isValidUtf8Sequence(bytes, pos, seqLength)
+        )
+          break;
         pos += seqLength;
       }
 
@@ -337,30 +323,16 @@ const Erlang_Unicode = {
     // and rejecting overlong encodings, surrogates, and out-of-range values.
     // Time complexity: O(n) where n is the number of bytes.
     const findValidUtf8Length = (bytes) => {
-      // Validates a complete UTF-8 sequence at the given position.
-      // Checks: sufficient bytes, valid continuations, and valid code point.
-      const isValidSequence = (start, length) => {
-        // Check if we have enough bytes
-        if (start + length > bytes.length) return false;
-
-        // Verify all continuation bytes have correct pattern (10xxxxxx)
-        for (let i = 1; i < length; i++) {
-          if (!Bitstring.isValidUtf8ContinuationByte(bytes[start + i]))
-            return false;
-        }
-
-        // Decode and validate the code point value
-        const codePoint = Bitstring.decodeUtf8CodePoint(bytes, start, length);
-
-        return Bitstring.isValidUtf8CodePoint(codePoint, length);
-      };
-
-      // Main loop: scan forward, validating each sequence
+      // Scan forward, validating each sequence
       let pos = 0;
 
       while (pos < bytes.length) {
         const seqLength = Bitstring.getUtf8SequenceLength(bytes[pos]);
-        if (seqLength === false || !isValidSequence(pos, seqLength)) break;
+        if (
+          seqLength === false ||
+          !Bitstring.isValidUtf8Sequence(bytes, pos, seqLength)
+        )
+          break;
         pos += seqLength;
       }
 
@@ -605,29 +577,16 @@ const Erlang_Unicode = {
     // and rejecting overlong encodings, surrogates, and out-of-range values.
     // Time complexity: O(n) where n is the number of bytes.
     const findValidUtf8Length = (bytes) => {
-      // Validates a complete UTF-8 sequence at the given position.
-      // Checks: sufficient bytes, valid continuations, and valid code point.
-      const isValidSequence = (start, length) => {
-        // Check if we have enough bytes
-        if (start + length > bytes.length) return false;
-
-        // Verify all continuation bytes have correct pattern (10xxxxxx)
-        for (let i = 1; i < length; i++) {
-          if (!Bitstring.isValidUtf8ContinuationByte(bytes[start + i]))
-            return false;
-        }
-
-        // Decode and validate the code point value
-        const codePoint = Bitstring.decodeUtf8CodePoint(bytes, start, length);
-
-        return Bitstring.isValidUtf8CodePoint(codePoint, length);
-      };
-
-      // Main loop: scan forward, validating each sequence
+      // Scan forward, validating each sequence
       let pos = 0;
+
       while (pos < bytes.length) {
         const seqLength = Bitstring.getUtf8SequenceLength(bytes[pos]);
-        if (seqLength === false || !isValidSequence(pos, seqLength)) break;
+        if (
+          seqLength === false ||
+          !Bitstring.isValidUtf8Sequence(bytes, pos, seqLength)
+        )
+          break;
         pos += seqLength;
       }
 
@@ -725,28 +684,16 @@ const Erlang_Unicode = {
     // and rejecting overlong encodings, surrogates, and out-of-range values.
     // Time complexity: O(n) where n is the number of bytes.
     const findValidUtf8Length = (bytes) => {
-      // Validates a complete UTF-8 sequence at the given position.
-      // Checks: sufficient bytes, valid continuations, and valid code point.
-      const isValidSequence = (start, length) => {
-        // Check if we have enough bytes
-        if (start + length > bytes.length) return false;
-
-        // Verify all continuation bytes have correct pattern (10xxxxxx)
-        for (let i = 1; i < length; i++) {
-          if (!Bitstring.isValidUtf8ContinuationByte(bytes[start + i]))
-            return false;
-        }
-
-        // Decode and validate the code point value
-        const codePoint = Bitstring.decodeUtf8CodePoint(bytes, start, length);
-        return Bitstring.isValidUtf8CodePoint(codePoint, length);
-      };
-
-      // Main loop: scan forward, validating each sequence
+      // scan forward, validating each sequence
       let pos = 0;
+
       while (pos < bytes.length) {
         const seqLength = Bitstring.getUtf8SequenceLength(bytes[pos]);
-        if (seqLength === false || !isValidSequence(pos, seqLength)) break;
+        if (
+          seqLength === false ||
+          !Bitstring.isValidUtf8Sequence(bytes, pos, seqLength)
+        )
+          break;
         pos += seqLength;
       }
 
@@ -843,30 +790,15 @@ const Erlang_Unicode = {
     // and rejecting overlong encodings, surrogates, and out-of-range values.
     // Time complexity: O(n) where n is the number of bytes.
     const findValidUtf8Length = (bytes) => {
-      // Validates a complete UTF-8 sequence at the given position.
-      // Checks: sufficient bytes, valid continuations, and valid code point.
-      const isValidSequence = (start, length) => {
-        // Check if we have enough bytes
-        if (start + length > bytes.length) return false;
-
-        // Verify all continuation bytes have correct pattern (10xxxxxx)
-        for (let i = 1; i < length; i++) {
-          if (!Bitstring.isValidUtf8ContinuationByte(bytes[start + i])) {
-            return false;
-          }
-        }
-
-        // Decode and validate the code point value
-        const codePoint = Bitstring.decodeUtf8CodePoint(bytes, start, length);
-
-        return Bitstring.isValidUtf8CodePoint(codePoint, length);
-      };
-
-      // Main loop: scan forward, validating each sequence
+      // Scan forward, validating each sequence
       let pos = 0;
       while (pos < bytes.length) {
         const seqLength = Bitstring.getUtf8SequenceLength(bytes[pos]);
-        if (seqLength === false || !isValidSequence(pos, seqLength)) break;
+        if (
+          seqLength === false ||
+          !Bitstring.isValidUtf8Sequence(bytes, pos, seqLength)
+        )
+          break;
         pos += seqLength;
       }
 
diff --git a/test/javascript/bitstring_test.mjs b/test/javascript/bitstring_test.mjs
index 6f1916f56..a2b906ad7 100644
--- a/test/javascript/bitstring_test.mjs
+++ b/test/javascript/bitstring_test.mjs
@@ -5264,20 +5264,6 @@ describe("Bitstring", () => {
     });
   });
 
-  describe("isValidUtf8ContinuationByte()", () => {
-    it("valid continuation byte (10xxxxxx pattern)", () => {
-      assert.isTrue(Bitstring.isValidUtf8ContinuationByte(0x80)); // 10000000
-      assert.isTrue(Bitstring.isValidUtf8ContinuationByte(0xbf)); // 10111111
-    });
-
-    it("invalid continuation byte (not 10xxxxxx pattern)", () => {
-      assert.isFalse(Bitstring.isValidUtf8ContinuationByte(0x00)); // 00000000 (ASCII)
-      assert.isFalse(Bitstring.isValidUtf8ContinuationByte(0x7f)); // 01111111 (ASCII)
-      assert.isFalse(Bitstring.isValidUtf8ContinuationByte(0xc0)); // 11000000 (2-byte start)
-      assert.isFalse(Bitstring.isValidUtf8ContinuationByte(0xff)); // 11111111 (invalid)
-    });
-  });
-
   describe("isValidUtf8CodePoint()", () => {
     it("valid codepoint", () => {
       assert.isTrue(Bitstring.isValidUtf8CodePoint(0x41, 1)); // ASCII 'A'
@@ -5308,6 +5294,76 @@ describe("Bitstring", () => {
     });
   });
 
+  describe("isValidUtf8ContinuationByte()", () => {
+    it("valid continuation byte (10xxxxxx pattern)", () => {
+      assert.isTrue(Bitstring.isValidUtf8ContinuationByte(0x80)); // 10000000
+      assert.isTrue(Bitstring.isValidUtf8ContinuationByte(0xbf)); // 10111111
+    });
+
+    it("invalid continuation byte (not 10xxxxxx pattern)", () => {
+      assert.isFalse(Bitstring.isValidUtf8ContinuationByte(0x00)); // 00000000 (ASCII)
+      assert.isFalse(Bitstring.isValidUtf8ContinuationByte(0x7f)); // 01111111 (ASCII)
+      assert.isFalse(Bitstring.isValidUtf8ContinuationByte(0xc0)); // 11000000 (2-byte start)
+      assert.isFalse(Bitstring.isValidUtf8ContinuationByte(0xff)); // 11111111 (invalid)
+    });
+  });
+
+  describe("isValidUtf8Sequence()", () => {
+    it("valid 1-byte sequence (ASCII)", () => {
+      // ASCII 'A'
+      const bytes = new Uint8Array([0x41]);
+      assert.isTrue(Bitstring.isValidUtf8Sequence(bytes, 0, 1));
+    });
+
+    it("valid 2-byte sequence", () => {
+      // é (U+00E9): 0xC3 0xA9
+      const bytes = new Uint8Array([0xc3, 0xa9]);
+      assert.isTrue(Bitstring.isValidUtf8Sequence(bytes, 0, 2));
+    });
+
+    it("valid 3-byte sequence", () => {
+      // € (U+20AC): 0xE2 0x82 0xAC
+      const bytes = new Uint8Array([0xe2, 0x82, 0xac]);
+      assert.isTrue(Bitstring.isValidUtf8Sequence(bytes, 0, 3));
+    });
+
+    it("valid 4-byte sequence", () => {
+      // 𐍈 (U+10348): 0xF0 0x90 0x8D 0x88
+      const bytes = new Uint8Array([0xf0, 0x90, 0x8d, 0x88]);
+      assert.isTrue(Bitstring.isValidUtf8Sequence(bytes, 0, 4));
+    });
+
+    it("not enough bytes available", () => {
+      const bytes = new Uint8Array([0xc3, 0xa9]); // 2 bytes
+      // Try to validate 3-byte sequence starting at position 0
+      assert.isFalse(Bitstring.isValidUtf8Sequence(bytes, 0, 3));
+    });
+
+    it("invalid continuation byte", () => {
+      // 0xC3 starts a 2-byte sequence, but 0x41 (ASCII 'A') is not a valid continuation
+      const bytes = new Uint8Array([0xc3, 0x41]);
+      assert.isFalse(Bitstring.isValidUtf8Sequence(bytes, 0, 2));
+    });
+
+    it("overlong encoding", () => {
+      // 'A' (0x41) encoded as 2-byte sequence: 0xC1 0x81 (overlong)
+      const bytes = new Uint8Array([0xc1, 0x81]);
+      assert.isFalse(Bitstring.isValidUtf8Sequence(bytes, 0, 2));
+    });
+
+    it("UTF-16 surrogate", () => {
+      // U+D800 (surrogate) encoded as 3-byte sequence: 0xED 0xA0 0x80
+      const bytes = new Uint8Array([0xed, 0xa0, 0x80]);
+      assert.isFalse(Bitstring.isValidUtf8Sequence(bytes, 0, 3));
+    });
+
+    it("beyond Unicode range", () => {
+      // U+110000 (beyond max) encoded as 4-byte sequence: 0xF4 0x90 0x80 0x80
+      const bytes = new Uint8Array([0xf4, 0x90, 0x80, 0x80]);
+      assert.isFalse(Bitstring.isValidUtf8Sequence(bytes, 0, 4));
+    });
+  });
+
   describe("maybeResolveHex()", () => {
     it("when hex field is already set", () => {
       const bitstring = Type.bitstring("Hologram");

From 9ab7bc37b117477b18319b0f1e2d8193bd1e9c5d Mon Sep 17 00:00:00 2001
From: Michael Ward <michael@scripthungry.com>
Date: Wed, 18 Feb 2026 17:08:58 +0000
Subject: [PATCH 5/5] Extracts truncated UTF-8 sequence validation to BitString
 class

---
 assets/js/bitstring.mjs            | 21 ++++++++
 assets/js/erlang/unicode.mjs       | 31 +++---------
 test/javascript/bitstring_test.mjs | 79 ++++++++++++++++++++++++++++++
 3 files changed, 107 insertions(+), 24 deletions(-)

diff --git a/assets/js/bitstring.mjs b/assets/js/bitstring.mjs
index 5bee0cc43..d80dd9937 100644
--- a/assets/js/bitstring.mjs
+++ b/assets/js/bitstring.mjs
@@ -635,6 +635,27 @@ export default class Bitstring {
     return $.isValidUtf8CodePoint(codePoint, length);
   }
 
+  // Checks if there's a truncated (incomplete) UTF-8 sequence at the given position.
+  // Returns true if bytes could be a valid prefix of a UTF-8 sequence.
+  // bytes: Uint8Array containing UTF-8 encoded data
+  // start: byte index to check for truncation
+  static isTruncatedUtf8Sequence(bytes, start) {
+    const leaderByte = bytes[start];
+    const expectedLength = $.getUtf8SequenceLength(leaderByte);
+
+    if (expectedLength === false) return false;
+
+    const availableBytes = bytes.length - start;
+    if (availableBytes >= expectedLength) return false;
+
+    // Check all available continuation bytes
+    for (let i = 1; i < availableBytes; i++) {
+      if (!$.isValidUtf8ContinuationByte(bytes[start + i])) return false;
+    }
+
+    return true;
+  }
+
   static maybeResolveHex(bitstring) {
     if (bitstring.hex === null) {
       $.maybeSetBytesFromText(bitstring);
diff --git a/assets/js/erlang/unicode.mjs b/assets/js/erlang/unicode.mjs
index c9ee533ba..a433c4ff0 100644
--- a/assets/js/erlang/unicode.mjs
+++ b/assets/js/erlang/unicode.mjs
@@ -95,27 +95,7 @@ const Erlang_Unicode = {
     // and rejecting overlong encodings, surrogates, and out-of-range values.
     // Time complexity: O(n) where n is the number of bytes.
     const findValidUtf8Length = (bytes) => {
-      // Checks if there's a truncated (incomplete) sequence at position.
-      // Returns true if bytes could be a valid prefix of a UTF-8 sequence.
-      const isTruncatedSequence = (start) => {
-        const leaderByte = bytes[start];
-        const expectedLength = Bitstring.getUtf8SequenceLength(leaderByte);
-
-        if (expectedLength === false) return false;
-
-        const availableBytes = bytes.length - start;
-        if (availableBytes >= expectedLength) return false;
-
-        // Check all available continuation bytes
-        for (let i = 1; i < availableBytes; i++) {
-          if (!Bitstring.isValidUtf8ContinuationByte(bytes[start + i]))
-            return false;
-        }
-
-        return true;
-      };
-
-      // Main loop: scan forward, validating each sequence
+      // Scan forward, validating each sequence
       let pos = 0;
 
       while (pos < bytes.length) {
@@ -128,7 +108,7 @@ const Erlang_Unicode = {
         pos += seqLength;
       }
 
-      return {validLength: pos, isTruncated: isTruncatedSequence(pos)};
+      return pos;
     };
 
     // Converts a binary to a list of codepoints.
@@ -160,7 +140,7 @@ const Erlang_Unicode = {
     const handleInvalidUtf8FromBinary = (invalidBinary) => {
       Bitstring.maybeSetBytesFromText(invalidBinary);
       const bytes = invalidBinary.bytes ?? new Uint8Array(0);
-      const {validLength, isTruncated} = findValidUtf8Length(bytes);
+      const validLength = findValidUtf8Length(bytes);
 
       const validPrefix = Bitstring.fromBytes(bytes.slice(0, validLength));
       const invalidRest = Bitstring.fromBytes(bytes.slice(validLength));
@@ -168,6 +148,8 @@ const Erlang_Unicode = {
       const codepoints =
         validLength > 0 ? convertBinaryToCodepoints(validPrefix) : [];
 
+      const isTruncated = Bitstring.isTruncatedUtf8Sequence(bytes, validLength);
+
       if (isTruncated) {
         return createIncompleteTuple(codepoints, invalidRest);
       }
@@ -187,7 +169,8 @@ const Erlang_Unicode = {
       // Check if it's a truncated sequence
       Bitstring.maybeSetBytesFromText(invalidBinary);
       const bytes = invalidBinary.bytes ?? new Uint8Array(0);
-      const {isTruncated} = findValidUtf8Length(bytes);
+      const validLength = findValidUtf8Length(bytes);
+      const isTruncated = Bitstring.isTruncatedUtf8Sequence(bytes, validLength);
 
       if (isTruncated) {
         // Incomplete: rest is the binary directly (not wrapped in list)
diff --git a/test/javascript/bitstring_test.mjs b/test/javascript/bitstring_test.mjs
index a2b906ad7..dd371b89b 100644
--- a/test/javascript/bitstring_test.mjs
+++ b/test/javascript/bitstring_test.mjs
@@ -5264,6 +5264,85 @@ describe("Bitstring", () => {
     });
   });
 
+  describe("isTruncatedUtf8Sequence()", () => {
+    // Happy path: truncated 2-byte sequence
+    it("returns true for truncated 2-byte sequence with valid continuation byte", () => {
+      // 0xC2 requires 2 bytes, but only 1 byte available (0x80 is valid continuation)
+      const bytes = new Uint8Array([0xc2]);
+      assert.equal(Bitstring.isTruncatedUtf8Sequence(bytes, 0), true);
+    });
+
+    // Happy path: truncated 3-byte sequence
+    it("returns true for truncated 3-byte sequence with valid continuation bytes", () => {
+      // 0xE2 requires 3 bytes, but only 2 bytes available (both valid continuations)
+      const bytes = new Uint8Array([0xe2, 0x82]);
+      assert.equal(Bitstring.isTruncatedUtf8Sequence(bytes, 0), true);
+    });
+
+    // Happy path: truncated 4-byte sequence
+    it("returns true for truncated 4-byte sequence with valid continuation bytes", () => {
+      // 0xF0 requires 4 bytes, but only 3 bytes available (all valid continuations)
+      const bytes = new Uint8Array([0xf0, 0x90, 0x8d]);
+      assert.equal(Bitstring.isTruncatedUtf8Sequence(bytes, 0), true);
+    });
+
+    // Edge case: start position in middle of data
+    it("returns true for truncated sequence starting at non-zero position", () => {
+      // Valid ASCII prefix, then truncated 2-byte sequence
+      const bytes = new Uint8Array([0x41, 0xc2]); // 'A' + truncated '£'
+      assert.equal(Bitstring.isTruncatedUtf8Sequence(bytes, 1), true);
+    });
+
+    // Edge case: multiple valid continuation bytes before truncation
+    it("returns true for 4-byte sequence with 2 valid continuation bytes (truncated)", () => {
+      // 0xF0 (4-byte) with 2 valid continuation bytes available
+      const bytes = new Uint8Array([0xf0, 0x90, 0x8d]);
+      assert.equal(Bitstring.isTruncatedUtf8Sequence(bytes, 0), true);
+    });
+
+    // False path: invalid leader byte
+    it("returns false for invalid leader byte", () => {
+      // 0xC0 is invalid (overlong encoding marker)
+      const bytes = new Uint8Array([0xc0]);
+      assert.equal(Bitstring.isTruncatedUtf8Sequence(bytes, 0), false);
+    });
+
+    // False path: invalid leader byte (out of range)
+    it("returns false for leader byte >= 0xF5", () => {
+      // 0xF5 and above are invalid (> U+10FFFF)
+      const bytes = new Uint8Array([0xf5]);
+      assert.equal(Bitstring.isTruncatedUtf8Sequence(bytes, 0), false);
+    });
+
+    // False path: enough bytes available
+    it("returns false when enough bytes are available for complete sequence", () => {
+      // 0xC2 requires 2 bytes, and 2 bytes are available
+      const bytes = new Uint8Array([0xc2, 0xa3]);
+      assert.equal(Bitstring.isTruncatedUtf8Sequence(bytes, 0), false);
+    });
+
+    // False path: invalid continuation byte in truncated sequence
+    it("returns false when continuation byte is invalid", () => {
+      // 0xC2 requires 2 bytes, but only 1 available with invalid continuation (0x00)
+      const bytes = new Uint8Array([0xc2, 0x00]);
+      assert.equal(Bitstring.isTruncatedUtf8Sequence(bytes, 0), false);
+    });
+
+    // False path: ASCII byte
+    it("returns false for ASCII byte (1-byte sequence)", () => {
+      // ASCII bytes are 1-byte sequences, always complete
+      const bytes = new Uint8Array([0x41]); // 'A'
+      assert.equal(Bitstring.isTruncatedUtf8Sequence(bytes, 0), false);
+    });
+
+    // False path: truncated sequence with invalid continuation at end
+    it("returns false when truncated sequence has invalid continuation byte at start", () => {
+      // 0xE2 requires 3 bytes, 2 available, but second byte (0x00) is invalid continuation
+      const bytes = new Uint8Array([0xe2, 0x00]);
+      assert.equal(Bitstring.isTruncatedUtf8Sequence(bytes, 0), false);
+    });
+  });
+
   describe("isValidUtf8CodePoint()", () => {
     it("valid codepoint", () => {
       assert.isTrue(Bitstring.isValidUtf8CodePoint(0x41, 1)); // ASCII 'A'