From 9a49bec70d22ca8a76e8fe1aa9292fd705e2bf3e Mon Sep 17 00:00:00 2001
From: Michael Ward <michael@scripthungry.com>
Date: Tue, 17 Feb 2026 20:00:30 +0000
Subject: [PATCH 1/3] Extracts UTF-8 code point decoding to the BitString
 utility class, adds parameter validation

---
 assets/js/bitstring.mjs            |  21 ++++
 assets/js/erlang/unicode.mjs       | 149 +----------------------------
 test/javascript/bitstring_test.mjs |  37 +++++++
 3 files changed, 63 insertions(+), 144 deletions(-)

diff --git a/assets/js/bitstring.mjs b/assets/js/bitstring.mjs
index 40168b7a9..f4c9b8695 100644
--- a/assets/js/bitstring.mjs
+++ b/assets/js/bitstring.mjs
@@ -247,6 +247,27 @@ export default class Bitstring {
     }
   }
 
+  // Decodes a UTF-8 sequence starting at the given position.
+  // Returns the decoded Unicode code point value.
+  // bytes: Uint8Array containing the UTF-8 encoded data
+  // start: byte index where the sequence begins
+  // length: number of bytes in the UTF-8 sequence (1-4)
+  static decodeUtf8CodePoint(bytes, start, length) {
+    if (length === 1) return bytes[start];
+
+    // First byte masks: 2-byte=0x1f, 3-byte=0x0f, 4-byte=0x07
+    const firstByteMasks = {2: 0x1f, 3: 0x0f, 4: 0x07};
+
+    let codePoint = bytes[start] & firstByteMasks[length];
+
+    // Process continuation bytes (all use 0x3f mask, shift by 6 each)
+    for (let i = 1; i < length; i++) {
+      codePoint = (codePoint << 6) | (bytes[start + i] & 0x3f);
+    }
+
+    return codePoint;
+  }
+
   static fromBits(bits) {
     const bitCount = bits.length;
     const byteCount = Math.ceil(bitCount / 8);
diff --git a/assets/js/erlang/unicode.mjs b/assets/js/erlang/unicode.mjs
index 8479f3528..c47b0b2e2 100644
--- a/assets/js/erlang/unicode.mjs
+++ b/assets/js/erlang/unicode.mjs
@@ -98,34 +98,6 @@ const Erlang_Unicode = {
       // Checks if a byte is a valid UTF-8 continuation byte (10xxxxxx).
       const isValidContinuation = (byte) => (byte & 0xc0) === 0x80;
 
-      // Decodes a UTF-8 sequence starting at the given position.
-      // Returns the decoded Unicode code point value.
-      const decodeCodePoint = (start, length) => {
-        if (length === 1) {
-          return bytes[start];
-        }
-
-        if (length === 2) {
-          return ((bytes[start] & 0x1f) << 6) | (bytes[start + 1] & 0x3f);
-        }
-
-        if (length === 3) {
-          return (
-            ((bytes[start] & 0x0f) << 12) |
-            ((bytes[start + 1] & 0x3f) << 6) |
-            (bytes[start + 2] & 0x3f)
-          );
-        }
-
-        // length === 4
-        return (
-          ((bytes[start] & 0x07) << 18) |
-          ((bytes[start + 1] & 0x3f) << 12) |
-          ((bytes[start + 2] & 0x3f) << 6) |
-          (bytes[start + 3] & 0x3f)
-        );
-      };
-
       // Validates that a code point is within UTF-8 rules:
       // - Not an overlong encoding (using more bytes than necessary)
       // - Not a UTF-16 surrogate (U+D800–U+DFFF)
@@ -156,7 +128,7 @@ const Erlang_Unicode = {
         }
 
         // Decode and validate the code point value
-        const codePoint = decodeCodePoint(start, length);
+        const codePoint = Bitstring.decodeUtf8CodePoint(bytes, start, length);
 
         return isValidCodePoint(codePoint, length);
       };
@@ -387,34 +359,6 @@ const Erlang_Unicode = {
       // Checks if a byte is a valid UTF-8 continuation byte (10xxxxxx).
       const isValidContinuation = (byte) => (byte & 0xc0) === 0x80;
 
-      // Decodes a UTF-8 sequence starting at the given position.
-      // Returns the decoded Unicode code point value.
-      const decodeCodePoint = (start, length) => {
-        if (length === 1) {
-          return bytes[start];
-        }
-
-        if (length === 2) {
-          return ((bytes[start] & 0x1f) << 6) | (bytes[start + 1] & 0x3f);
-        }
-
-        if (length === 3) {
-          return (
-            ((bytes[start] & 0x0f) << 12) |
-            ((bytes[start + 1] & 0x3f) << 6) |
-            (bytes[start + 2] & 0x3f)
-          );
-        }
-
-        // length === 4
-        return (
-          ((bytes[start] & 0x07) << 18) |
-          ((bytes[start + 1] & 0x3f) << 12) |
-          ((bytes[start + 2] & 0x3f) << 6) |
-          (bytes[start + 3] & 0x3f)
-        );
-      };
-
       // Validates that a code point is within UTF-8 rules:
       // - Not an overlong encoding (using more bytes than necessary)
       // - Not a UTF-16 surrogate (U+D800–U+DFFF)
@@ -445,7 +389,7 @@ const Erlang_Unicode = {
         }
 
         // Decode and validate the code point value
-        const codePoint = decodeCodePoint(start, length);
+        const codePoint = Bitstring.decodeUtf8CodePoint(bytes, start, length);
 
         return isValidCodePoint(codePoint, length);
       };
@@ -703,33 +647,6 @@ const Erlang_Unicode = {
       // Checks if a byte is a valid UTF-8 continuation byte (10xxxxxx).
       const isValidContinuation = (byte) => (byte & 0xc0) === 0x80;
 
-      // Decodes a UTF-8 sequence starting at the given position.
-      // Returns the decoded Unicode code point value.
-      const decodeCodePoint = (start, length) => {
-        if (length === 1) {
-          return bytes[start];
-        }
-
-        if (length === 2) {
-          return ((bytes[start] & 0x1f) << 6) | (bytes[start + 1] & 0x3f);
-        }
-
-        if (length === 3) {
-          return (
-            ((bytes[start] & 0x0f) << 12) |
-            ((bytes[start + 1] & 0x3f) << 6) |
-            (bytes[start + 2] & 0x3f)
-          );
-        }
-        // length === 4
-        return (
-          ((bytes[start] & 0x07) << 18) |
-          ((bytes[start + 1] & 0x3f) << 12) |
-          ((bytes[start + 2] & 0x3f) << 6) |
-          (bytes[start + 3] & 0x3f)
-        );
-      };
-
       // Validates that a code point is within UTF-8 rules:
       // - Not an overlong encoding (using more bytes than necessary)
       // - Not a UTF-16 surrogate (U+D800–U+DFFF)
@@ -760,7 +677,7 @@ const Erlang_Unicode = {
         }
 
         // Decode and validate the code point value
-        const codePoint = decodeCodePoint(start, length);
+        const codePoint = Bitstring.decodeUtf8CodePoint(bytes, start, length);
 
         return isValidCodePoint(codePoint, length);
       };
@@ -870,34 +787,6 @@ const Erlang_Unicode = {
       // Checks if a byte is a valid UTF-8 continuation byte (10xxxxxx).
       const isValidContinuation = (byte) => (byte & 0xc0) === 0x80;
 
-      // Decodes a UTF-8 sequence starting at the given position.
-      // Returns the decoded Unicode code point value.
-      const decodeCodePoint = (start, length) => {
-        if (length === 1) {
-          return bytes[start];
-        }
-
-        if (length === 2) {
-          return ((bytes[start] & 0x1f) << 6) | (bytes[start + 1] & 0x3f);
-        }
-
-        if (length === 3) {
-          return (
-            ((bytes[start] & 0x0f) << 12) |
-            ((bytes[start + 1] & 0x3f) << 6) |
-            (bytes[start + 2] & 0x3f)
-          );
-        }
-
-        // length === 4
-        return (
-          ((bytes[start] & 0x07) << 18) |
-          ((bytes[start + 1] & 0x3f) << 12) |
-          ((bytes[start + 2] & 0x3f) << 6) |
-          (bytes[start + 3] & 0x3f)
-        );
-      };
-
       // Validates that a code point is within UTF-8 rules:
       // - Not an overlong encoding (using more bytes than necessary)
       // - Not a UTF-16 surrogate (U+D800–U+DFFF)
@@ -928,7 +817,7 @@ const Erlang_Unicode = {
         }
 
         // Decode and validate the code point value
-        const codePoint = decodeCodePoint(start, length);
+        const codePoint = Bitstring.decodeUtf8CodePoint(bytes, start, length);
         return isValidCodePoint(codePoint, length);
       };
 
@@ -1036,34 +925,6 @@ const Erlang_Unicode = {
       // Checks if a byte is a valid UTF-8 continuation byte (10xxxxxx).
       const isValidContinuation = (byte) => (byte & 0xc0) === 0x80;
 
-      // Decodes a UTF-8 sequence starting at the given position.
-      // Returns the decoded Unicode code point value.
-      const decodeCodePoint = (start, length) => {
-        if (length === 1) {
-          return bytes[start];
-        }
-
-        if (length === 2) {
-          return ((bytes[start] & 0x1f) << 6) | (bytes[start + 1] & 0x3f);
-        }
-
-        if (length === 3) {
-          return (
-            ((bytes[start] & 0x0f) << 12) |
-            ((bytes[start + 1] & 0x3f) << 6) |
-            (bytes[start + 2] & 0x3f)
-          );
-        }
-
-        // length === 4
-        return (
-          ((bytes[start] & 0x07) << 18) |
-          ((bytes[start + 1] & 0x3f) << 12) |
-          ((bytes[start + 2] & 0x3f) << 6) |
-          (bytes[start + 3] & 0x3f)
-        );
-      };
-
       // Validates that a code point is within UTF-8 rules:
       // - Not an overlong encoding (using more bytes than necessary)
       // - Not a UTF-16 surrogate (U+D800–U+DFFF)
@@ -1096,7 +957,7 @@ const Erlang_Unicode = {
         }
 
         // Decode and validate the code point value
-        const codePoint = decodeCodePoint(start, length);
+        const codePoint = Bitstring.decodeUtf8CodePoint(bytes, start, length);
 
         return isValidCodePoint(codePoint, length);
       };
diff --git a/test/javascript/bitstring_test.mjs b/test/javascript/bitstring_test.mjs
index d822d1ffa..88b725447 100644
--- a/test/javascript/bitstring_test.mjs
+++ b/test/javascript/bitstring_test.mjs
@@ -1231,6 +1231,43 @@ describe("Bitstring", () => {
     });
   });
 
+  describe("decodeUtf8CodePoint()", () => {
+    it("decodes 1-byte UTF-8 sequence (ASCII)", () => {
+      // 'A' = 0x41 = U+0041
+      const bytes = new Uint8Array([0x41]);
+      const codePoint = Bitstring.decodeUtf8CodePoint(bytes, 0, 1);
+      assert.equal(codePoint, 0x41);
+    });
+
+    it("decodes 2-byte UTF-8 sequence", () => {
+      // '£' = 0xC2 0xA3 = U+00A3 (pound sign)
+      const bytes = new Uint8Array([0xc2, 0xa3]);
+      const codePoint = Bitstring.decodeUtf8CodePoint(bytes, 0, 2);
+      assert.equal(codePoint, 0xa3);
+    });
+
+    it("decodes 3-byte UTF-8 sequence", () => {
+      // '€' = 0xE2 0x82 0xAC = U+20AC (euro sign)
+      const bytes = new Uint8Array([0xe2, 0x82, 0xac]);
+      const codePoint = Bitstring.decodeUtf8CodePoint(bytes, 0, 3);
+      assert.equal(codePoint, 0x20ac);
+    });
+
+    it("decodes 4-byte UTF-8 sequence", () => {
+      // '𐍈' = 0xF0 0x90 0x8D 0x88 = U+10348 (Gothic letter hwair)
+      const bytes = new Uint8Array([0xf0, 0x90, 0x8d, 0x88]);
+      const codePoint = Bitstring.decodeUtf8CodePoint(bytes, 0, 4);
+      assert.equal(codePoint, 0x10348);
+    });
+
+    it("decodes from non-zero start position", () => {
+      // Test decoding '£' starting at position 2
+      const bytes = new Uint8Array([0x41, 0x42, 0xc2, 0xa3]);
+      const codePoint = Bitstring.decodeUtf8CodePoint(bytes, 2, 2);
+      assert.equal(codePoint, 0xa3);
+    });
+  });
+
   describe("fromBits()", () => {
     it("empty", () => {
       const result = Bitstring.fromBits([]);

From 105d2d91dd0e3bc511079f260848483f0c378872 Mon Sep 17 00:00:00 2001
From: Michael Ward <michael@scripthungry.com>
Date: Wed, 18 Feb 2026 15:01:43 +0000
Subject: [PATCH 2/3] Extracts UTF-8 continuation byte validation to BitString
 class

---
 assets/js/bitstring.mjs            |  5 +++++
 assets/js/erlang/unicode.mjs       | 32 ++++++++++--------------------
 test/javascript/bitstring_test.mjs | 14 +++++++++++++
 3 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/assets/js/bitstring.mjs b/assets/js/bitstring.mjs
index f4c9b8695..881118d75 100644
--- a/assets/js/bitstring.mjs
+++ b/assets/js/bitstring.mjs
@@ -593,6 +593,11 @@ export default class Bitstring {
     return bitstring.text !== false;
   }
 
+  // Checks if a byte is a valid UTF-8 continuation byte (10xxxxxx).
+  static isValidUtf8ContinuationByte(byte) {
+    return (byte & 0xc0) === 0x80;
+  }
+
   static maybeResolveHex(bitstring) {
     if (bitstring.hex === null) {
       $.maybeSetBytesFromText(bitstring);
diff --git a/assets/js/erlang/unicode.mjs b/assets/js/erlang/unicode.mjs
index c47b0b2e2..e54076495 100644
--- a/assets/js/erlang/unicode.mjs
+++ b/assets/js/erlang/unicode.mjs
@@ -95,9 +95,6 @@ const Erlang_Unicode = {
     // and rejecting overlong encodings, surrogates, and out-of-range values.
     // Time complexity: O(n) where n is the number of bytes.
     const findValidUtf8Length = (bytes) => {
-      // Checks if a byte is a valid UTF-8 continuation byte (10xxxxxx).
-      const isValidContinuation = (byte) => (byte & 0xc0) === 0x80;
-
       // Validates that a code point is within UTF-8 rules:
       // - Not an overlong encoding (using more bytes than necessary)
       // - Not a UTF-16 surrogate (U+D800–U+DFFF)
@@ -124,7 +121,8 @@ const Erlang_Unicode = {
 
         // Verify all continuation bytes have correct pattern (10xxxxxx)
         for (let i = 1; i < length; i++) {
-          if (!isValidContinuation(bytes[start + i])) return false;
+          if (!Bitstring.isValidUtf8ContinuationByte(bytes[start + i]))
+            return false;
         }
 
         // Decode and validate the code point value
@@ -146,7 +144,8 @@ const Erlang_Unicode = {
 
         // Check all available continuation bytes
         for (let i = 1; i < availableBytes; i++) {
-          if (!isValidContinuation(bytes[start + i])) return false;
+          if (!Bitstring.isValidUtf8ContinuationByte(bytes[start + i]))
+            return false;
         }
 
         return true;
@@ -356,9 +355,6 @@ const Erlang_Unicode = {
     // and rejecting overlong encodings, surrogates, and out-of-range values.
     // Time complexity: O(n) where n is the number of bytes.
     const findValidUtf8Length = (bytes) => {
-      // Checks if a byte is a valid UTF-8 continuation byte (10xxxxxx).
-      const isValidContinuation = (byte) => (byte & 0xc0) === 0x80;
-
       // Validates that a code point is within UTF-8 rules:
       // - Not an overlong encoding (using more bytes than necessary)
       // - Not a UTF-16 surrogate (U+D800–U+DFFF)
@@ -385,7 +381,8 @@ const Erlang_Unicode = {
 
         // Verify all continuation bytes have correct pattern (10xxxxxx)
         for (let i = 1; i < length; i++) {
-          if (!isValidContinuation(bytes[start + i])) return false;
+          if (!Bitstring.isValidUtf8ContinuationByte(bytes[start + i]))
+            return false;
         }
 
         // Decode and validate the code point value
@@ -644,9 +641,6 @@ const Erlang_Unicode = {
     // and rejecting overlong encodings, surrogates, and out-of-range values.
     // Time complexity: O(n) where n is the number of bytes.
     const findValidUtf8Length = (bytes) => {
-      // Checks if a byte is a valid UTF-8 continuation byte (10xxxxxx).
-      const isValidContinuation = (byte) => (byte & 0xc0) === 0x80;
-
       // Validates that a code point is within UTF-8 rules:
       // - Not an overlong encoding (using more bytes than necessary)
       // - Not a UTF-16 surrogate (U+D800–U+DFFF)
@@ -673,7 +667,8 @@ const Erlang_Unicode = {
 
         // Verify all continuation bytes have correct pattern (10xxxxxx)
         for (let i = 1; i < length; i++) {
-          if (!isValidContinuation(bytes[start + i])) return false;
+          if (!Bitstring.isValidUtf8ContinuationByte(bytes[start + i]))
+            return false;
         }
 
         // Decode and validate the code point value
@@ -784,9 +779,6 @@ const Erlang_Unicode = {
     // and rejecting overlong encodings, surrogates, and out-of-range values.
     // Time complexity: O(n) where n is the number of bytes.
     const findValidUtf8Length = (bytes) => {
-      // Checks if a byte is a valid UTF-8 continuation byte (10xxxxxx).
-      const isValidContinuation = (byte) => (byte & 0xc0) === 0x80;
-
       // Validates that a code point is within UTF-8 rules:
       // - Not an overlong encoding (using more bytes than necessary)
       // - Not a UTF-16 surrogate (U+D800–U+DFFF)
@@ -813,7 +805,8 @@ const Erlang_Unicode = {
 
         // Verify all continuation bytes have correct pattern (10xxxxxx)
         for (let i = 1; i < length; i++) {
-          if (!isValidContinuation(bytes[start + i])) return false;
+          if (!Bitstring.isValidUtf8ContinuationByte(bytes[start + i]))
+            return false;
         }
 
         // Decode and validate the code point value
@@ -922,9 +915,6 @@ const Erlang_Unicode = {
     // and rejecting overlong encodings, surrogates, and out-of-range values.
     // Time complexity: O(n) where n is the number of bytes.
     const findValidUtf8Length = (bytes) => {
-      // Checks if a byte is a valid UTF-8 continuation byte (10xxxxxx).
-      const isValidContinuation = (byte) => (byte & 0xc0) === 0x80;
-
       // Validates that a code point is within UTF-8 rules:
       // - Not an overlong encoding (using more bytes than necessary)
       // - Not a UTF-16 surrogate (U+D800–U+DFFF)
@@ -951,7 +941,7 @@ const Erlang_Unicode = {
 
         // Verify all continuation bytes have correct pattern (10xxxxxx)
         for (let i = 1; i < length; i++) {
-          if (!isValidContinuation(bytes[start + i])) {
+          if (!Bitstring.isValidUtf8ContinuationByte(bytes[start + i])) {
             return false;
           }
         }
diff --git a/test/javascript/bitstring_test.mjs b/test/javascript/bitstring_test.mjs
index 88b725447..0a53ca436 100644
--- a/test/javascript/bitstring_test.mjs
+++ b/test/javascript/bitstring_test.mjs
@@ -5264,6 +5264,20 @@ describe("Bitstring", () => {
     });
   });
 
+  describe("isValidUtf8ContinuationByte()", () => {
+    it("valid continuation byte (10xxxxxx pattern)", () => {
+      assert.isTrue(Bitstring.isValidUtf8ContinuationByte(0x80)); // 10000000
+      assert.isTrue(Bitstring.isValidUtf8ContinuationByte(0xbf)); // 10111111
+    });
+
+    it("invalid continuation byte (not 10xxxxxx pattern)", () => {
+      assert.isFalse(Bitstring.isValidUtf8ContinuationByte(0x00)); // 00000000 (ASCII)
+      assert.isFalse(Bitstring.isValidUtf8ContinuationByte(0x7f)); // 01111111 (ASCII)
+      assert.isFalse(Bitstring.isValidUtf8ContinuationByte(0xc0)); // 11000000 (2-byte start)
+      assert.isFalse(Bitstring.isValidUtf8ContinuationByte(0xff)); // 11111111 (invalid)
+    });
+  });
+
   describe("maybeResolveHex()", () => {
     it("when hex field is already set", () => {
       const bitstring = Type.bitstring("Hologram");

From 7015625b214ae92310278d6d214735ccda8be3fa Mon Sep 17 00:00:00 2001
From: Michael Ward <michael@scripthungry.com>
Date: Wed, 18 Feb 2026 15:33:08 +0000
Subject: [PATCH 3/3] Extracts Utf-8 code point validation to BitString class

---
 assets/js/bitstring.mjs            |  18 ++++++
 assets/js/erlang/unicode.mjs       | 100 ++---------------------------
 test/javascript/bitstring_test.mjs |  30 +++++++++
 3 files changed, 53 insertions(+), 95 deletions(-)

diff --git a/assets/js/bitstring.mjs b/assets/js/bitstring.mjs
index 881118d75..dfbcd8181 100644
--- a/assets/js/bitstring.mjs
+++ b/assets/js/bitstring.mjs
@@ -593,6 +593,24 @@ export default class Bitstring {
     return bitstring.text !== false;
   }
 
+  // Validates that a code point is within UTF-8 rules:
+  // - Not an overlong encoding (using more bytes than necessary)
+  // - Not a UTF-16 surrogate (U+D800–U+DFFF)
+  // - Not above maximum Unicode (U+10FFFF)
+  static isValidUtf8CodePoint(codePoint, encodingLength) {
+    // Check for overlong encodings (security issue)
+    const minValueForLength = {1: 0, 2: 0x80, 3: 0x800, 4: 0x10000};
+
+    // Reject code points that could have been encoded with fewer bytes (overlong)
+    if (codePoint < minValueForLength[encodingLength]) return false;
+    // Reject UTF-16 surrogates (U+D800–U+DFFF)
+    if (codePoint >= 0xd800 && codePoint <= 0xdfff) return false;
+    // Reject code points beyond Unicode range (> U+10FFFF)
+    if (codePoint > 0x10ffff) return false;
+
+    return true;
+  }
+
   // Checks if a byte is a valid UTF-8 continuation byte (10xxxxxx).
   static isValidUtf8ContinuationByte(byte) {
     return (byte & 0xc0) === 0x80;
diff --git a/assets/js/erlang/unicode.mjs b/assets/js/erlang/unicode.mjs
index e54076495..064b25164 100644
--- a/assets/js/erlang/unicode.mjs
+++ b/assets/js/erlang/unicode.mjs
@@ -95,24 +95,6 @@ const Erlang_Unicode = {
     // and rejecting overlong encodings, surrogates, and out-of-range values.
     // Time complexity: O(n) where n is the number of bytes.
     const findValidUtf8Length = (bytes) => {
-      // Validates that a code point is within UTF-8 rules:
-      // - Not an overlong encoding (using more bytes than necessary)
-      // - Not a UTF-16 surrogate (U+D800–U+DFFF)
-      // - Not above maximum Unicode (U+10FFFF)
-      const isValidCodePoint = (codePoint, encodingLength) => {
-        // Check for overlong encodings (security issue)
-        const minValueForLength = [0, 0, 0x80, 0x800, 0x10000];
-        if (codePoint < minValueForLength[encodingLength]) return false;
-
-        // Reject UTF-16 surrogates (U+D800–U+DFFF)
-        if (codePoint >= 0xd800 && codePoint <= 0xdfff) return false;
-
-        // Reject code points beyond Unicode range (> U+10FFFF)
-        if (codePoint > 0x10ffff) return false;
-
-        return true;
-      };
-
       // Validates a complete UTF-8 sequence at the given position.
       // Checks: sufficient bytes, valid continuations, and valid code point.
       const isValidSequence = (start, length) => {
@@ -128,7 +110,7 @@ const Erlang_Unicode = {
         // Decode and validate the code point value
         const codePoint = Bitstring.decodeUtf8CodePoint(bytes, start, length);
 
-        return isValidCodePoint(codePoint, length);
+        return Bitstring.isValidUtf8CodePoint(codePoint, length);
       };
 
       // Checks if there's a truncated (incomplete) sequence at position.
@@ -355,24 +337,6 @@ const Erlang_Unicode = {
     // and rejecting overlong encodings, surrogates, and out-of-range values.
     // Time complexity: O(n) where n is the number of bytes.
     const findValidUtf8Length = (bytes) => {
-      // Validates that a code point is within UTF-8 rules:
-      // - Not an overlong encoding (using more bytes than necessary)
-      // - Not a UTF-16 surrogate (U+D800–U+DFFF)
-      // - Not above maximum Unicode (U+10FFFF)
-      const isValidCodePoint = (codePoint, encodingLength) => {
-        // Check for overlong encodings (security issue)
-        const minValueForLength = [0, 0, 0x80, 0x800, 0x10000];
-        if (codePoint < minValueForLength[encodingLength]) return false;
-
-        // Reject UTF-16 surrogates (U+D800–U+DFFF)
-        if (codePoint >= 0xd800 && codePoint <= 0xdfff) return false;
-
-        // Reject code points beyond Unicode range (> U+10FFFF)
-        if (codePoint > 0x10ffff) return false;
-
-        return true;
-      };
-
       // Validates a complete UTF-8 sequence at the given position.
       // Checks: sufficient bytes, valid continuations, and valid code point.
       const isValidSequence = (start, length) => {
@@ -388,7 +352,7 @@ const Erlang_Unicode = {
         // Decode and validate the code point value
         const codePoint = Bitstring.decodeUtf8CodePoint(bytes, start, length);
 
-        return isValidCodePoint(codePoint, length);
+        return Bitstring.isValidUtf8CodePoint(codePoint, length);
       };
 
       // Main loop: scan forward, validating each sequence
@@ -641,24 +605,6 @@ const Erlang_Unicode = {
     // and rejecting overlong encodings, surrogates, and out-of-range values.
     // Time complexity: O(n) where n is the number of bytes.
     const findValidUtf8Length = (bytes) => {
-      // Validates that a code point is within UTF-8 rules:
-      // - Not an overlong encoding (using more bytes than necessary)
-      // - Not a UTF-16 surrogate (U+D800–U+DFFF)
-      // - Not above maximum Unicode (U+10FFFF)
-      const isValidCodePoint = (codePoint, encodingLength) => {
-        // Check for overlong encodings (security issue)
-        const minValueForLength = [0, 0, 0x80, 0x800, 0x10000];
-        if (codePoint < minValueForLength[encodingLength]) return false;
-
-        // Reject UTF-16 surrogates (U+D800–U+DFFF)
-        if (codePoint >= 0xd800 && codePoint <= 0xdfff) return false;
-
-        // Reject code points beyond Unicode range (> U+10FFFF)
-        if (codePoint > 0x10ffff) return false;
-
-        return true;
-      };
-
       // Validates a complete UTF-8 sequence at the given position.
       // Checks: sufficient bytes, valid continuations, and valid code point.
       const isValidSequence = (start, length) => {
@@ -674,7 +620,7 @@ const Erlang_Unicode = {
         // Decode and validate the code point value
         const codePoint = Bitstring.decodeUtf8CodePoint(bytes, start, length);
 
-        return isValidCodePoint(codePoint, length);
+        return Bitstring.isValidUtf8CodePoint(codePoint, length);
       };
 
       // Main loop: scan forward, validating each sequence
@@ -779,24 +725,6 @@ const Erlang_Unicode = {
     // and rejecting overlong encodings, surrogates, and out-of-range values.
     // Time complexity: O(n) where n is the number of bytes.
     const findValidUtf8Length = (bytes) => {
-      // Validates that a code point is within UTF-8 rules:
-      // - Not an overlong encoding (using more bytes than necessary)
-      // - Not a UTF-16 surrogate (U+D800–U+DFFF)
-      // - Not above maximum Unicode (U+10FFFF)
-      const isValidCodePoint = (codePoint, encodingLength) => {
-        // Check for overlong encodings (security issue)
-        const minValueForLength = [0, 0, 0x80, 0x800, 0x10000];
-        if (codePoint < minValueForLength[encodingLength]) return false;
-
-        // Reject UTF-16 surrogates (U+D800–U+DFFF)
-        if (codePoint >= 0xd800 && codePoint <= 0xdfff) return false;
-
-        // Reject code points beyond Unicode range (> U+10FFFF)
-        if (codePoint > 0x10ffff) return false;
-
-        return true;
-      };
-
       // Validates a complete UTF-8 sequence at the given position.
       // Checks: sufficient bytes, valid continuations, and valid code point.
       const isValidSequence = (start, length) => {
@@ -811,7 +739,7 @@ const Erlang_Unicode = {
 
         // Decode and validate the code point value
         const codePoint = Bitstring.decodeUtf8CodePoint(bytes, start, length);
-        return isValidCodePoint(codePoint, length);
+        return Bitstring.isValidUtf8CodePoint(codePoint, length);
       };
 
       // Main loop: scan forward, validating each sequence
@@ -915,24 +843,6 @@ const Erlang_Unicode = {
     // and rejecting overlong encodings, surrogates, and out-of-range values.
     // Time complexity: O(n) where n is the number of bytes.
     const findValidUtf8Length = (bytes) => {
-      // Validates that a code point is within UTF-8 rules:
-      // - Not an overlong encoding (using more bytes than necessary)
-      // - Not a UTF-16 surrogate (U+D800–U+DFFF)
-      // - Not above maximum Unicode (U+10FFFF)
-      const isValidCodePoint = (codePoint, encodingLength) => {
-        // Check for overlong encodings (security issue)
-        const minValueForLength = [0, 0, 0x80, 0x800, 0x10000];
-        if (codePoint < minValueForLength[encodingLength]) return false;
-
-        // Reject UTF-16 surrogates (U+D800–U+DFFF)
-        if (codePoint >= 0xd800 && codePoint <= 0xdfff) return false;
-
-        // Reject code points beyond Unicode range (> U+10FFFF)
-        if (codePoint > 0x10ffff) return false;
-
-        return true;
-      };
-
       // Validates a complete UTF-8 sequence at the given position.
       // Checks: sufficient bytes, valid continuations, and valid code point.
       const isValidSequence = (start, length) => {
@@ -949,7 +859,7 @@ const Erlang_Unicode = {
         // Decode and validate the code point value
         const codePoint = Bitstring.decodeUtf8CodePoint(bytes, start, length);
 
-        return isValidCodePoint(codePoint, length);
+        return Bitstring.isValidUtf8CodePoint(codePoint, length);
       };
 
       // Main loop: scan forward, validating each sequence
diff --git a/test/javascript/bitstring_test.mjs b/test/javascript/bitstring_test.mjs
index 0a53ca436..6f1916f56 100644
--- a/test/javascript/bitstring_test.mjs
+++ b/test/javascript/bitstring_test.mjs
@@ -5278,6 +5278,36 @@ describe("Bitstring", () => {
     });
   });
 
+  describe("isValidUtf8CodePoint()", () => {
+    it("valid codepoint", () => {
+      assert.isTrue(Bitstring.isValidUtf8CodePoint(0x41, 1)); // ASCII 'A'
+      assert.isTrue(Bitstring.isValidUtf8CodePoint(0xa9, 2)); // © (copyright)
+      assert.isTrue(Bitstring.isValidUtf8CodePoint(0x20ac, 3)); // € (euro)
+      assert.isTrue(Bitstring.isValidUtf8CodePoint(0x10348, 4)); // 𐍈 (Gothic letter)
+      assert.isTrue(Bitstring.isValidUtf8CodePoint(0x10ffff, 4)); // Maximum valid Unicode
+    });
+
+    it("overlong encoding (codepoint too small for encoding length)", () => {
+      // 'A' (0x41) must use 1 byte, not 2
+      assert.isFalse(Bitstring.isValidUtf8CodePoint(0x41, 2));
+      // 0x7FF requires 2 bytes, but attempting 3-byte encoding
+      assert.isFalse(Bitstring.isValidUtf8CodePoint(0x7ff, 3));
+      // 0xFFFF requires 3 bytes, but attempting 4-byte encoding
+      assert.isFalse(Bitstring.isValidUtf8CodePoint(0xffff, 4));
+    });
+
+    it("UTF-16 surrogate (U+D800–U+DFFF)", () => {
+      assert.isFalse(Bitstring.isValidUtf8CodePoint(0xd800, 3)); // Start of surrogate range
+      assert.isFalse(Bitstring.isValidUtf8CodePoint(0xdc00, 3)); // Middle of surrogate range
+      assert.isFalse(Bitstring.isValidUtf8CodePoint(0xdfff, 3)); // End of surrogate range
+    });
+
+    it("beyond Unicode range (> U+10FFFF)", () => {
+      assert.isFalse(Bitstring.isValidUtf8CodePoint(0x110000, 4));
+      assert.isFalse(Bitstring.isValidUtf8CodePoint(0x200000, 4));
+    });
+  });
+
   describe("maybeResolveHex()", () => {
     it("when hex field is already set", () => {
       const bitstring = Type.bitstring("Hologram");