diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 33ec4eb..f11e809 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -33,13 +33,16 @@ jobs:
       uses: actions-rs/cargo@v1
       with:
         command: test
+        args: --all
     - name: Check formatting
       uses: actions-rs/cargo@v1
       with:
         command: fmt
-        args: -- --check
+        args: --all --check
     - name: Catch common mistakes
       uses: actions-rs/cargo@v1
       with:
         command: clippy
-        args: --all-targets -- -D warnings
+        args: --all-targets --workspace -- -D warnings
+    - name: Regenerate Unicode tables
+      run: mv src/tables.rs tables.rs.bak && cd tables && cargo run && cd .. && diff src/tables.rs tables.rs.bak
diff --git a/Cargo.toml b/Cargo.toml
index a9fa2c3..adfea40 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,3 +9,6 @@ repository = "https://github.com/withoutboats/heck"
 keywords = ["string", "case", "camel", "snake", "unicode"]
 categories = ["no-std"]
 include = ["src/**/*", "LICENSE-*", "README.md", "CHANGELOG.md"]
+
+[workspace]
+members = ["tables"]
diff --git a/README.md b/README.md
index 9b9b4e8..b4659c9 100644
--- a/README.md
+++ b/README.md
@@ -8,24 +8,42 @@ consistent, and reasonably well performing.
 
 ## Definition of a word boundary
 
-Word boundaries are defined by non-alphanumeric characters, as well as
-within those words in this manner:
-
-1. If an uppercase character is followed by lowercase letters, a word
-boundary is considered to be just prior to that uppercase character.
-2. If multiple uppercase characters are consecutive, they are considered to
-be within a single word, except that the last will be part of the next word
-if it is followed by lowercase characters (see rule 1).
-
-That is, "HelloWorld" is segmented `Hello|World` whereas "XMLHttpRequest" is
-segmented `XML|Http|Request`.
-
-Characters not within words (such as spaces, punctuations, and underscores)
-are not included in the output string except as they are a part of the case
-being converted to. Multiple adjacent word boundaries (such as a series of
-underscores) are folded into one. ("hello__world" in snake case is therefore
-"hello_world", not the exact same string). Leading or trailing word boundary
-indicators are dropped, except insofar as CamelCase capitalizes the first word.
+The definition of a word boundary is based on the
+[identifier word boundary](https://www.unicode.org/reports/tr55/#Identifier-Chunks)
+in Unicode Technical Standard 55. The rules are as follows:
+
+- The set of characters that can be in a word is
+  [`[\p{ID_Continue}\p{ID_Compat_Math_Continue}\p{Cn}\p{Co}\p{Alphabetic}\p{N}-[\p{P}-\p{Po}]]`][1],
+  plus U+05F3, U+05F4, and U+0F0B. This notably includes
+  alphabetic and numeric characters, accents and other combining marks,
+  emoji, a few mathematical symbols, a few non-word-separating punctuation marks,
+  unassigned characters, and private-use characters.
+
+- Characters that cannot be in a word separate words.
+  For example, `foo_bar` is segmented `foo`|`bar`
+  because words cannout contain `_`.
+  These characters will be excluded from the output string.
+
+- Words cannot be empty. For example, `_foo__bar_` is segmented `foo`|`bar`,
+  and in snake_case becomes `foo_bar`.
+
+- There is a word boundary between a lowercase (or non-Greek titlecase)
+  and an uppercase (or titlecase) letter. For example, `fooBar` is segmented
+  `foo`|`Bar` because `oB` is a lowercase letter followed by an uppercase letter.
+
+- An uppercase letter followed by a lowercase letter
+  has a word boundary before it. For example, `XMLHttpRequest` is segmented
+  `XML`|`Http`|`Request`; the `Ht` in `HttpRequest` is an uppercase letter
+  followed by a lowercase letter, so there is a word boundary before it.
+
+ - There is always a word boundary before a non-Greek titlecase letter
+   (U+01C5 'ǅ', U+01C8 'ǈ', U+01CB 'ǋ', or U+01F2 'ǲ').
+
+ - For the purpose of the preceding three rules, a letter followed
+   by some number of nonspacing marks (like accents or other diacritics)
+   is treated as if it was the letter alone. For example, `áB` is segmented `á`|`B`.
+
+[1]: https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%5Cp%7BID_Continue%7D%5Cp%7BID_Compat_Math_Continue%7D%5Cp%7BCn%7D%5Cp%7BCo%7D%5Cp%7BAlphabetic%7D%5Cp%7BN%7D-%5B%5Cp%7BP%7D-%5Cp%7BPo%7D%5D%5D&abb=on&g=&i=
 
 ## Cases contained in this library:
 
diff --git a/src/lib.rs b/src/lib.rs
index ab8a015..9ba5a9c 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -6,25 +6,42 @@
 //!
 //! ## Definition of a word boundary
 //!
-//! Word boundaries are defined by non-alphanumeric characters, as well as
-//! within those words in this manner:
+//! The definition of a word boundary is based on the
+//! [identifier word boundary](https://www.unicode.org/reports/tr55/#Identifier-Chunks)
+//! in Unicode Technical Standard 55. The rules are as follows:
 //!
-//! 1. If an uppercase character is followed by lowercase letters, a word
-//! boundary is considered to be just prior to that uppercase character.
-//! 2. If multiple uppercase characters are consecutive, they are considered to
-//! be within a single word, except that the last will be part of the next word
-//! if it is followed by lowercase characters (see rule 1).
+//! - The set of characters that can be in a word is
+//!   [`[\p{ID_Continue}\p{ID_Compat_Math_Continue}\p{Cn}\p{Co}\p{Alphabetic}\p{N}-[\p{P}-\p{Po}]]`][1],
+//!   plus U+05F3, U+05F4, and U+0F0B. This notably includes
+//!   alphabetic and numeric characters, accents and other combining marks,
+//!   emoji, a few mathematical symbols, a few non-word-separating punctuation marks,
+//!   unassigned characters, and private-use characters.
 //!
-//! That is, "HelloWorld" is segmented `Hello|World` whereas "XMLHttpRequest" is
-//! segmented `XML|Http|Request`.
+//! - Characters that cannot be in a word separate words.
+//!   For example, `foo_bar` is segmented `foo`|`bar`
+//!   because words cannout contain `_`.
+//!   These characters will be excluded from the output string.
 //!
-//! Characters not within words (such as spaces, punctuations, and underscores)
-//! are not included in the output string except as they are a part of the case
-//! being converted to. Multiple adjacent word boundaries (such as a series of
-//! underscores) are folded into one. ("hello__world" in snake case is therefore
-//! "hello_world", not the exact same string). Leading or trailing word boundary
-//! indicators are dropped, except insofar as CamelCase capitalizes the first
-//! word.
+//! - Words cannot be empty. For example, `_foo__bar_` is segmented `foo`|`bar`,
+//!   and in snake_case becomes `foo_bar`.
+//!
+//! - There is a word boundary between a lowercase (or non-Greek titlecase)
+//!   and an uppercase (or titlecase) letter. For example, `fooBar` is segmented
+//!   `foo`|`Bar` because `oB` is a lowercase letter followed by an uppercase letter.
+//!
+//! - An uppercase letter followed by a lowercase letter
+//!   has a word boundary before it. For example, `XMLHttpRequest` is segmented
+//!   `XML`|`Http`|`Request`; the `Ht` in `HttpRequest` is an uppercase letter
+//!   followed by a lowercase letter, so there is a word boundary before it.
+//!
+//!  - There is always a word boundary before a non-Greek titlecase letter
+//!    (U+01C5 'ǅ', U+01C8 'ǈ', U+01CB 'ǋ', or U+01F2 'ǲ').
+//!
+//!  - For the purpose of the preceding three rules, a letter followed
+//!    by some number of nonspacing marks (like accents or other diacritics)
+//!    is treated as if it was the letter alone. For example, `áB` is segmented `á`|`B`.
+//!
+//! [1]: https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%5Cp%7BID_Continue%7D%5Cp%7BID_Compat_Math_Continue%7D%5Cp%7BCn%7D%5Cp%7BCo%7D%5Cp%7BAlphabetic%7D%5Cp%7BN%7D-%5B%5Cp%7BP%7D-%5Cp%7BPo%7D%5D%5D&abb=on&g=&i=
 //!
 //! ### Cases contained in this library:
 //!
@@ -47,6 +64,7 @@ mod lower_camel;
 mod shouty_kebab;
 mod shouty_snake;
 mod snake;
+mod tables;
 mod title;
 mod train;
 mod upper_camel;
@@ -58,6 +76,7 @@ pub use shouty_snake::{
     AsShoutySnakeCase, AsShoutySnakeCase as AsShoutySnekCase, ToShoutySnakeCase, ToShoutySnekCase,
 };
 pub use snake::{AsSnakeCase, AsSnakeCase as AsSnekCase, ToSnakeCase, ToSnekCase};
+pub use tables::UNICODE_VERSION;
 pub use title::{AsTitleCase, ToTitleCase};
 pub use train::{AsTrainCase, ToTrainCase};
 pub use upper_camel::{
@@ -66,6 +85,8 @@ pub use upper_camel::{
 
 use core::fmt;
 
+use tables::{is_non_greek_titlecase, CasedLetterKind};
+
 fn transform<F, G>(
     s: &str,
     mut with_word: F,
@@ -76,82 +97,100 @@ where
     F: FnMut(&str, &mut fmt::Formatter) -> fmt::Result,
     G: FnMut(&mut fmt::Formatter) -> fmt::Result,
 {
-    /// Tracks the current 'mode' of the transformation algorithm as it scans
-    /// the input string.
-    ///
-    /// The mode is a tri-state which tracks the case of the last cased
-    /// character of the current word. If there is no cased character
-    /// (either lowercase or uppercase) since the previous word boundary,
-    /// than the mode is `Boundary`. If the last cased character is lowercase,
-    /// then the mode is `Lowercase`. Othertherwise, the mode is
-    /// `Uppercase`.
-    #[derive(Clone, Copy, PartialEq)]
-    enum WordMode {
-        /// There have been no lowercase or uppercase characters in the current
-        /// word.
-        Boundary,
-        /// The previous cased character in the current word is lowercase.
-        Lowercase,
-        /// The previous cased character in the current word is uppercase.
-        Uppercase,
-    }
-
     let mut first_word = true;
 
-    for word in s.split(|c: char| !c.is_alphanumeric()) {
-        let mut char_indices = word.char_indices().peekable();
-        let mut init = 0;
-        let mut mode = WordMode::Boundary;
-
-        while let Some((i, c)) = char_indices.next() {
-            if let Some(&(next_i, next)) = char_indices.peek() {
-                // The mode including the current character, assuming the
-                // current character does not result in a word boundary.
-                let next_mode = if c.is_lowercase() {
-                    WordMode::Lowercase
-                } else if c.is_uppercase() {
-                    WordMode::Uppercase
-                } else {
-                    mode
-                };
-
-                // Word boundary after if current is not uppercase and next
-                // is uppercase
-                if next_mode == WordMode::Lowercase && next.is_uppercase() {
-                    if !first_word {
-                        boundary(f)?;
+    for word in s.split(|c: char| !tables::allowed_in_word(c)) {
+        let mut start_of_word_idx = 0;
+        // Whether the previous character seen, ignoring nonspacing marks,
+        // was lowercase or non-Greek titlecase.
+        // Used for determining CamelBoundaries.
+        let mut prev_was_lowercase_or_non_greek_titlecase = false;
+        // If the previous character seen, ignoring nonspacing marks,
+        // was uppercase or titlecase, then this stores that character's index.
+        // Otherwise, it stores `None`.
+        // Used for determining HATBoundaries.
+        let mut index_of_preceding_uppercase_or_titlecase_letter: Option<usize> = None;
+
+        for (i, c) in word.char_indices() {
+            match tables::letter_casing(c) {
+                None => {
+                    // Nonspacing marks are ignored for the purpose of determining boundaries.
+                    if !tables::is_nonspacing_mark(c) {
+                        prev_was_lowercase_or_non_greek_titlecase = false;
+                        index_of_preceding_uppercase_or_titlecase_letter = None;
+                    }
+                }
+                Some(CasedLetterKind::Lowercase) => {
+                    prev_was_lowercase_or_non_greek_titlecase = true;
+                    // There is a HATBoundary before an uppercase or titlecase letter followed by a lowercase letter
+                    if let Some(preceding_idx) = index_of_preceding_uppercase_or_titlecase_letter {
+                        index_of_preceding_uppercase_or_titlecase_letter = None;
+                        if preceding_idx != start_of_word_idx {
+                            if !first_word {
+                                boundary(f)?;
+                            } else {
+                                first_word = false;
+                            }
+                            with_word(&word[start_of_word_idx..preceding_idx], f)?;
+                            start_of_word_idx = preceding_idx;
+                        }
                     }
-                    with_word(&word[init..next_i], f)?;
-                    first_word = false;
-                    init = next_i;
-                    mode = WordMode::Boundary;
-
-                // Otherwise if current and previous are uppercase and next
-                // is lowercase, word boundary before
-                } else if mode == WordMode::Uppercase && c.is_uppercase() && next.is_lowercase() {
-                    if !first_word {
-                        boundary(f)?;
+                }
+                Some(CasedLetterKind::Uppercase) => {
+                    index_of_preceding_uppercase_or_titlecase_letter = Some(i);
+                    // There is a CamelBoundary before an uppercase letter
+                    // that is preceded by a lowercase or non-Greek titlecase letter
+                    if prev_was_lowercase_or_non_greek_titlecase {
+                        prev_was_lowercase_or_non_greek_titlecase = false;
+                        if !first_word {
+                            boundary(f)?;
+                        } else {
+                            first_word = false;
+                        }
+                        with_word(&word[start_of_word_idx..i], f)?;
+                        start_of_word_idx = i;
+                    }
+                }
+                Some(CasedLetterKind::Titlecase) => {
+                    index_of_preceding_uppercase_or_titlecase_letter = Some(i);
+                    // There is always a HATBoundary before a non-Greek titlecase letter
+                    if is_non_greek_titlecase(c) {
+                        prev_was_lowercase_or_non_greek_titlecase = true;
+                        if i != start_of_word_idx {
+                            if !first_word {
+                                boundary(f)?;
+                            } else {
+                                first_word = false;
+                            }
+                            with_word(&word[start_of_word_idx..i], f)?;
+                            start_of_word_idx = i;
+                        }
                     } else {
-                        first_word = false;
+                        // There is a CamelBoundary before a titlecase letter
+                        // that is preceded by a lowercase or non-Greek titlecase letter
+                        if prev_was_lowercase_or_non_greek_titlecase {
+                            prev_was_lowercase_or_non_greek_titlecase = false;
+                            if !first_word {
+                                boundary(f)?;
+                            } else {
+                                first_word = false;
+                            }
+                            with_word(&word[start_of_word_idx..i], f)?;
+                            start_of_word_idx = i;
+                        }
                     }
-                    with_word(&word[init..i], f)?;
-                    init = i;
-                    mode = WordMode::Boundary;
-
-                // Otherwise no word boundary, just update the mode
-                } else {
-                    mode = next_mode;
                 }
+            }
+        }
+
+        if start_of_word_idx != word.len() {
+            // Collect trailing characters as a word
+            if !first_word {
+                boundary(f)?;
             } else {
-                // Collect trailing characters as a word
-                if !first_word {
-                    boundary(f)?;
-                } else {
-                    first_word = false;
-                }
-                with_word(&word[init..], f)?;
-                break;
+                first_word = false;
             }
+            with_word(&word[start_of_word_idx..], f)?;
         }
     }
 
@@ -179,13 +218,25 @@ fn uppercase(s: &str, f: &mut fmt::Formatter) -> fmt::Result {
     Ok(())
 }
 
-fn capitalize(s: &str, f: &mut fmt::Formatter) -> fmt::Result {
-    let mut char_indices = s.char_indices();
-    if let Some((_, c)) = char_indices.next() {
-        write!(f, "{}", c.to_uppercase())?;
-        if let Some((i, _)) = char_indices.next() {
-            lowercase(&s[i..], f)?;
+fn titlecase(s: &str, f: &mut fmt::Formatter) -> fmt::Result {
+    // Find the first cased character
+    if let Some(titlecase_idx) =
+        s.find(|c| tables::letter_casing(c).is_some() || c.is_lowercase() || c.is_uppercase())
+    {
+        // Everything before the first cased character is passed through unchanged.
+        f.write_str(&s[..titlecase_idx])?;
+
+        let rem = &s[titlecase_idx..];
+        let mut char_indices = rem.char_indices();
+        if let Some((_, c)) = char_indices.next() {
+            write!(f, "{}", tables::to_titlecase(c))?;
+            if let Some((i, _)) = char_indices.next() {
+                lowercase(&rem[i..], f)?;
+            }
         }
+    } else {
+        // If there are no cased characters, pass through the string unchanged
+        write!(f, "{}", s)?;
     }
 
     Ok(())
diff --git a/src/lower_camel.rs b/src/lower_camel.rs
index a31fc33..50d6dac 100644
--- a/src/lower_camel.rs
+++ b/src/lower_camel.rs
@@ -5,7 +5,7 @@ use alloc::{
     string::{String, ToString},
 };
 
-use crate::{capitalize, lowercase, transform};
+use crate::{lowercase, titlecase, transform};
 
 /// This trait defines a lower camel case conversion.
 ///
@@ -53,7 +53,7 @@ impl<T: AsRef<str>> fmt::Display for AsLowerCamelCase<T> {
                     first = false;
                     lowercase(s, f)
                 } else {
-                    capitalize(s, f)
+                    titlecase(s, f)
                 }
             },
             |_| Ok(()),
diff --git a/src/snake.rs b/src/snake.rs
index c3c8576..d7a2c6c 100644
--- a/src/snake.rs
+++ b/src/snake.rs
@@ -87,14 +87,14 @@ mod tests {
     t!(test12: "99BOTTLES" => "99bottles");
     t!(test13: "FieldNamE11" => "field_nam_e11");
     t!(test14: "abc123def456" => "abc123def456");
-    t!(test16: "abc123DEF456" => "abc123_def456");
+    t!(test16: "abc123DEF456" => "abc123def456");
     t!(test17: "abc123Def456" => "abc123_def456");
-    t!(test18: "abc123DEf456" => "abc123_d_ef456");
+    t!(test18: "abc123DEf456" => "abc123d_ef456");
     t!(test19: "ABC123def456" => "abc123def456");
     t!(test20: "ABC123DEF456" => "abc123def456");
     t!(test21: "ABC123Def456" => "abc123_def456");
     t!(test22: "ABC123DEf456" => "abc123d_ef456");
-    t!(test23: "ABC123dEEf456FOO" => "abc123d_e_ef456_foo");
+    t!(test23: "ABC123dEEf456FOO" => "abc123d_e_ef456foo");
     t!(test24: "abcDEF" => "abc_def");
     t!(test25: "ABcDE" => "a_bc_de");
 }
diff --git a/src/tables.rs b/src/tables.rs
new file mode 100644
index 0000000..2b21d03
--- /dev/null
+++ b/src/tables.rs
@@ -0,0 +1,1009 @@
+//! Automatically generated by `tables`.
+//! Do not edit manually.
+
+/// The version of Unicode that the data included with this crate is based on.
+///
+/// This crate also relies on Unicode data provided by the standard library;
+/// that data is versioned according to [`char::UNICODE_VERSION`].
+pub const UNICODE_VERSION: (u8, u8, u8) = (15, 1, 0);
+
+/// Whether this character can be part of a word.
+pub fn allowed_in_word(c: char) -> bool {
+    const BOTTOM_BITS_MASK: u32 = !((!0_u32) << 6);
+    let cp: u32 = c.into();
+    let top_bits = cp >> 6;
+    if top_bits < 0x7F0 {
+        let leaf_idx: u8 = ALLOWED_IN_WORD_ROOT[usize::try_from(top_bits).unwrap()];
+        let leaf = ALLOWED_IN_WORD_LEAVES[usize::from(leaf_idx)];
+        (leaf >> (cp & BOTTOM_BITS_MASK)) & 1 == 1
+    } else {
+        !matches!(cp, 0x0E0001 | 0x0E0020..=0x0E007F)
+    }
+}
+
+static ALLOWED_IN_WORD_ROOT: [u8; 2032] = [
+    0x01, 0x02, 0x03, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x07, 0x08,
+    0x00, 0x00, 0x09, 0x00, 0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00, 0x0F, 0x10, 0x00, 0x00, 0x11,
+    0x12, 0x13, 0x14, 0x15, 0x00, 0x16, 0x00, 0x17, 0x00, 0x08, 0x00, 0x18, 0x00, 0x19, 0x00, 0x1A,
+    0x00, 0x1B, 0x1C, 0x00, 0x00, 0x1D, 0x00, 0x1E, 0x1F, 0x20, 0x00, 0x00, 0x21, 0x00, 0x22, 0x23,
+    0x00, 0x24, 0x25, 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x27, 0x28, 0x00,
+    0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2A, 0x2B, 0x2C, 0x2D, 0x00, 0x00, 0x2E,
+    0x2F, 0x00, 0x00, 0x00, 0x00, 0x30, 0x00, 0x31, 0x25, 0x00, 0x32, 0x00, 0x00, 0x33, 0x00, 0x34,
+    0x35, 0x36, 0x00, 0x37, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x39,
+    0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x41, 0x42, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
+    0x43, 0x44, 0x45, 0x00, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x46, 0x47, 0x41,
+    0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x48, 0x49, 0x41,
+    0x00, 0x00, 0x00, 0x4A, 0x00, 0x19, 0x00, 0x00, 0x4B, 0x4C, 0x4D, 0x4E, 0x41, 0x41, 0x41, 0x4F,
+    0x50, 0x00, 0x51, 0x00, 0x00, 0x00, 0x52, 0x53, 0x54, 0x55, 0x56, 0x41, 0x41, 0x41, 0x41, 0x41,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x57, 0x58, 0x00, 0x00, 0x00, 0x00, 0x59, 0x5A, 0x00, 0x5B, 0x5C, 0x00, 0x5D, 0x00,
+    0x5E, 0x5F, 0x00, 0x60, 0x61, 0x62, 0x00, 0x63, 0x00, 0x64, 0x00, 0x65, 0x00, 0x66, 0x00, 0x67,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
+    0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x68, 0x00, 0x69, 0x6A,
+    0x00, 0x00, 0x00, 0x00, 0x36, 0x6B, 0x00, 0x6C, 0x6D, 0x6E, 0x00, 0x1F, 0x6F, 0x70, 0x00, 0x71,
+    0x00, 0x00, 0x00, 0x00, 0x72, 0x73, 0x74, 0x75, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x62, 0x76,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x78, 0x00, 0x00, 0x79, 0x00, 0x00, 0x00, 0x00, 0x7A, 0x00, 0x7B, 0x73, 0x00, 0x7C, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7D, 0x00, 0x00, 0x7E, 0x7F, 0x00,
+    0x00, 0x80, 0x35, 0x81, 0x00, 0x82, 0x00, 0x83, 0x84, 0x00, 0x68, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x85, 0x00, 0x86, 0x00, 0x00, 0x00, 0x87, 0x00, 0x88, 0x89, 0x00, 0x34, 0x00, 0x00, 0x00,
+    0x26, 0x00, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x15, 0x1F, 0x8B, 0x8C, 0x00, 0x8D, 0x00, 0x00, 0x00,
+    0x00, 0x8E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8F, 0x00, 0x90, 0x00, 0x91,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x92, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x93,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x94, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x61, 0x00, 0x95, 0x96, 0x07, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x97, 0x00, 0x00, 0x00, 0x00, 0x15,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x98, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x57, 0x41, 0x99,
+    0x41, 0x41, 0x41, 0x46, 0x9A, 0x9B, 0x9C, 0x9D, 0x41, 0x9E, 0x00, 0x00, 0x41, 0x9F, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0xA0, 0xA1, 0xA2, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0xA3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0xA4, 0x00, 0xA5, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00,
+    0xA6, 0x41, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0x00, 0x00, 0x41, 0x41, 0x41, 0x41,
+    0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0xAF, 0x41, 0xB0, 0x41, 0xB1,
+    0xB2, 0xB3, 0xB4, 0x00, 0x41, 0x41, 0x41, 0x41, 0x41, 0xB5, 0xB6, 0xB7, 0x41, 0x41, 0xB8, 0x44,
+];
+
+static ALLOWED_IN_WORD_LEAVES: [u64; 185] = [
+    0xFFFFFFFFFFFFFFFF,
+    0x03FF000000000000,
+    0x07FFFFFE07FFFFFE,
+    0x76AC040000000000,
+    0xFF7FFFFFFF7FFFFF,
+    0x0000501F0003FFC3,
+    0xBFDFFFFFFFFFFFFF,
+    0xFFFFFFFFFFFFFFCF,
+    0xFFBFFFFFFFFFFFFF,
+    0xFFFFFFFFFFFFFCFB,
+    0xFFFFFFFF03FFFFFF,
+    0xBFFFFFFFFFFF19FF,
+    0xFFFFFFFFFFFFFFB6,
+    0xFFFFFFFF07FF0000,
+    0xFFFFC3FFFFFFFFFF,
+    0x9FFFFDFF9FEFFFFF,
+    0xFFFFFFFFFFFF4000,
+    0x3C3FFFFFFFFFFFFF,
+    0x8000FFFFFFFFFFFF,
+    0xFFFFFFFFBFFFFFFF,
+    0xFFFFFFFFFFFCFEFF,
+    0xFFFFFFFBFFFFFFFF,
+    0xFFFEFFCFFFFFFFFF,
+    0xD3F3FFFFFFFFFFFF,
+    0xFFFCFFFFFFFFFFFF,
+    0xFFFEFFFFFFFFFFFF,
+    0xF807FFFFFFFFFFFF,
+    0x7F7FFFFFFFFFFFFF,
+    0xFFFFFFFFFFFFFFEF,
+    0xFDFFFFFFFFFF7FFF,
+    0xFFEFFFFFFFFFFFFF,
+    0x7FFFFFFFFFFFFFFF,
+    0xFFFFFFFFF3FF7FFF,
+    0xC2AFFFFF03000801,
+    0x3FFFFFFFFFFFFFDF,
+    0xFFFFFFFFF8002040,
+    0xFFFFFFFFFFFF03FF,
+    0xFFFFFFFF3FFFFFFF,
+    0xF7FFFFFFFFFFFFFF,
+    0xFFFFFE00FFFFFFFF,
+    0xFFFFFFFFFC00FFFF,
+    0xFFFFFFFFFFFFFFFE,
+    0xFFFF9FFFFFFFFFFF,
+    0xFFFFFFFFE7FFFFFE,
+    0xFFFFC7FFFFFFFFFF,
+    0xFF9FFFFFFFFFFFFF,
+    0xFFFFFFFFF08FFFFF,
+    0xFFFFFFFFFFFFB800,
+    0xFFFFFFFFFFFFFFCE,
+    0x000000003FFFFFFF,
+    0xBFFFC080FFFFFFFF,
+    0x800FF80003FFFFFF,
+    0x0FFFFFFFFFFFFFFF,
+    0x07FFFFFFFFFFFFFF,
+    0x3FFFFFFFFFFFFFFF,
+    0xFFFFFFFFFFF7FF00,
+    0x5FFFFFFFFFFFFFFF,
+    0x9FFF1FFF1FFF1FFC,
+    0x0000000000003000,
+    0x9FFF002000000000,
+    0x00000000FFFF9FFF,
+    0xFFFFFFE21FFFFFFE,
+    0xF3FFFD503F2FFC84,
+    0xFFFFFFFFFFFF43E0,
+    0x000000000000F3FF,
+    0x0000000000000000,
+    0x0000000040000084,
+    0xFFFFFF8000000000,
+    0xFFFFFFFFFFFFF800,
+    0xFFC000000FFFFFFF,
+    0xFFC0000000000000,
+    0x00000000000FFFFF,
+    0x0030000000000000,
+    0x0000000000400000,
+    0x21FFF81FFFFFFFFF,
+    0x0000800000000000,
+    0xFFFFFFFFC0000000,
+    0x0000000004000000,
+    0xFFF0000000000000,
+    0x0000FFFFFFC00000,
+    0x1F3EFFFE000000E0,
+    0xFFFFFFFEFFFFFFFF,
+    0xFFFFFFFF003CFFFF,
+    0xFFFF7FF000000000,
+    0x000003FF80000000,
+    0x00000000FFFEFF00,
+    0xFFFE0000000003FF,
+    0x000000000000FFFF,
+    0x3FFFFFFFFFFFFF80,
+    0xFFFFFFFFFFFF1FFF,
+    0xBFF0FFFFFFFFFFFF,
+    0xFF03FFFFFFFFFFFF,
+    0xFFFFFFFCFF800000,
+    0xFFFFFFFFFFFFF9FF,
+    0xFC3FF0FFFFFFFFFF,
+    0xFF0FFFFFFFFFFFFF,
+    0xE8FFFFFFFFFF3FFF,
+    0xFFFF3FFFFFFFFFFF,
+    0xFFFFFFFF7FFFFFFF,
+    0xFFFFFFFF3FFFC001,
+    0xFC7FFFFF0FFFFFFF,
+    0xFFFCFFFF3FFFFFFF,
+    0xFFFFF3FFF7FFFFFF,
+    0xFFFFF7FFFFFFFFFF,
+    0xFFFFFDFFFFFFFFFF,
+    0x0003FFFFFFFFFFFF,
+    0xFFFFFFFFFFFFFFF8,
+    0xFFFFFFFFFFFF0000,
+    0x0FFFFFFFFFFF7FFF,
+    0x0000FFFFFC00FFFF,
+    0xFFFFF08000080000,
+    0x07FFFFFE03FF0001,
+    0xFFFFFFE007FFFFFE,
+    0xC1FF8080FFFFFFFF,
+    0x007FFFFFFFFFFFF8,
+    0x01FFFFFFFFFFFFFF,
+    0xFFFFFFFEE0008C00,
+    0xE00000000000FFFF,
+    0xFFFFFFFFFFFEFFFF,
+    0xFFFF7FFFFFFFFFFF,
+    0xFE7FFFFFFF7FFFFF,
+    0x7FFFFFFF7FFFFFFF,
+    0x7FFFFFFFFE00FFFF,
+    0xFF80FFFFFFFFFEFF,
+    0xFFFFFFFFE1FFFFFF,
+    0xFFFFDFFFFFFFFFFF,
+    0xFFFFFFFFFC1FFFFF,
+    0xFFFFFFFFFFFFFC3F,
+    0xFFFFFFFFFFFFC07F,
+    0xFFFFFFFFFFFFDFFC,
+    0xFFCFFFFFFFFFFFF0,
+    0xFFFFFFFF17FFDE1F,
+    0xC0FFFFFFFFFFFFFF,
+    0xFFFFFFFFD3FF07FF,
+    0xFFFFFFFFFFFFFFBF,
+    0xFFFFFFFFFF000001,
+    0xFFFFE000FFFFFFF1,
+    0xFDFFFFFFFFFFFFFF,
+    0xFFFFFFFFFFFFFF8F,
+    0xFFFFFFFFFFFFFF80,
+    0xFFFFFFF823FFFFFF,
+    0xFFFFFFFFFFFFFC00,
+    0xFFFCFFFFFFFFFFC1,
+    0xFE7FFFFFFFFFFFFF,
+    0xFFFFFFFFFFFF0007,
+    0x7FFC0000001FFFFF,
+    0xFFE0FFFFFFFFFFFF,
+    0xFFF9FFFFFFFFFFFF,
+    0x0000FFFFFFFFFFFF,
+    0xFFDFFFFFFFFFFFFF,
+    0x007FFFFFFFFFFFFF,
+    0xFFFFFFFFF87FFFFF,
+    0xFFFFFFF06FFFFFFF,
+    0xFFFFFFFFFFFFFFF0,
+    0x0000018000000000,
+    0xF807E3E000000000,
+    0x00003C0000000FE7,
+    0xFFFFF80000000000,
+    0xFFFFFFFFFFFFFFDC,
+    0xFFFFFFFFFF800000,
+    0xF87FFFFFFFFFFFFF,
+    0x00201FFFFFFFFFFF,
+    0xFFFFFFFFFFFFF010,
+    0xFFFFFFFFFFFF7FFF,
+    0xFFFEEFFFFFFFFFFF,
+    0xFFFFBFFFFFFFFFFF,
+    0x0000F00000000000,
+    0x00018000FFF00000,
+    0xFFC0000000010001,
+    0xFFFF000000001FFF,
+    0xFFFF03FFFFFF03FF,
+    0xFFFFC000000003FF,
+    0x0000003FFFFFFFFF,
+    0xF00000000000FFF8,
+    0xFFFFFFC0FFFCFE00,
+    0xE000E0000F000000,
+    0x0780000000000000,
+    0xFFFEF000FC000000,
+    0x000000000000F000,
+    0x00000000FC00FF00,
+    0xFFFCC0000000FF00,
+    0xE000C000FFF00000,
+    0x400000000000FE00,
+    0xFE00FE00F0003FC0,
+    0x0000000000080000,
+];
+
+#[cfg(test)]
+#[test]
+fn test_allowed_in_words_casing_closure() {
+    for c in '\0'..=char::MAX {
+        if allowed_in_word(c) {
+            assert!(c.to_uppercase().all(allowed_in_word));
+            assert!(c.to_lowercase().all(allowed_in_word));
+        }
+    }
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+#[repr(u8)]
+pub enum CasedLetterKind {
+    Lowercase = 1,
+    Uppercase = 2,
+    Titlecase = 3,
+}
+
+/// The case of this letter, or `None` if it is not a cased letter.
+pub fn letter_casing(c: char) -> Option<CasedLetterKind> {
+    const BOTTOM_BITS_MASK: u32 = !((!0_u32) << 6);
+    let cp: u32 = c.into();
+    let top_bits = cp >> 6;
+    if top_bits < 0x7A6 {
+        let leaf_idx: u8 = LETTER_CASING_ROOT[usize::try_from(top_bits).unwrap()];
+        let leaf = LETTER_CASING_LEAVES[usize::from(leaf_idx)];
+        match (leaf >> ((cp & BOTTOM_BITS_MASK) * 2)) & 3 {
+            0 => None,
+            1 => Some(CasedLetterKind::Lowercase),
+            2 => Some(CasedLetterKind::Uppercase),
+            3 => Some(CasedLetterKind::Titlecase),
+            _ => unreachable!(),
+        }
+    } else {
+        None
+    }
+}
+
+/// Whether the character is a non-Greek titlecase letter.
+pub fn is_non_greek_titlecase(c: char) -> bool {
+    matches!(c, '\u{01C5}' | '\u{01C8}' | '\u{01CB}' | '\u{01F2}')
+}
+
+static LETTER_CASING_ROOT: [u8; 1958] = [
+    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x00, 0x00, 0x0B, 0x0C, 0x0D,
+    0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x15, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15, 0x17,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x18, 0x00, 0x19, 0x1A, 0x1B, 0x00, 0x1C, 0x1C, 0x1D, 0x1C, 0x1E, 0x1F, 0x20, 0x21,
+    0x00, 0x00, 0x00, 0x00, 0x22, 0x23, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x0E, 0x25, 0x1C, 0x26, 0x27, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x28, 0x29, 0x00, 0x2A, 0x2B, 0x2C, 0x2D,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2E, 0x2F, 0x30, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x32, 0x33, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x34, 0x35, 0x36, 0x37, 0x00, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x3A, 0x3B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x15, 0x3C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x3E, 0x3F, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4E, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x4F, 0x50,
+];
+
+static LETTER_CASING_LEAVES: [u128; 81] = [
+    0x00000000000000000000000000000000,
+    0x0015555555555554002AAAAAAAAAAAA8,
+    0x00000400000000000000000000000000,
+    0x55551555555555556AAA2AAAAAAAAAAA,
+    0x99996666666666666666666666666666,
+    0x599A6666666666666666666666659999,
+    0x561699A9A659A6669A56A69AA5A9A669,
+    0x6666A679666666666599999999E79E00,
+    0x69A55566666666666666666666666666,
+    0x55555555555555555555555566666A99,
+    0x00000000555555555555545555555555,
+    0x85406066000000000000000000000000,
+    0x5555555555AAAA8AAAAAAAA9A22A2000,
+    0xA969865566666666666656A595555555,
+    0x55555555AAAAAAAAAAAAAAAAAAAAAAAA,
+    0x66666666666666665555555555555555,
+    0x66666666666666666666666666600006,
+    0x6666666666666666666666665999999A,
+    0xAAAAAAA8666666666666666666666666,
+    0x555555555555555500002AAAAAAAAAAA,
+    0x00000000000000000000000000015555,
+    0xAAAAAAAAAAAAAAAA0000000000000000,
+    0x54155555555555555555555508008AAA,
+    0x05550AAAAAAAAAAAAAAAAAAAAAAAAAAA,
+    0xA82AAAAAAAAAAAAAAAAAAAAA00015555,
+    0x00000000005555555555555555555555,
+    0x55545555554000000000000000000000,
+    0x00000000000000000015555555555555,
+    0x66666666666666666666666666666666,
+    0x66666666666666666555566666666666,
+    0xAAAA5555AAAA55550AAA0555AAAA5555,
+    0x05555555AAAA5555888855550AAA0555,
+    0x13AA5155FFFF5555FFFF5555FFFF5555,
+    0x03AA515002AA555500AA505503AA5150,
+    0xA50401AA4AA222000AA8086A5A908020,
+    0x00000000000000000000000010055800,
+    0x00000000000000000000000000000180,
+    0xA0555966A99996A65555555555555555,
+    0x00000060198001666666666666666666,
+    0x00000000040045555555555555555555,
+    0x00000000066666666666666666666666,
+    0x00000000000000000066666666666666,
+    0x66666665666666600000000000000000,
+    0x69995554666666666666666666666666,
+    0x666666AA6AA666666666656619806666,
+    0x0010180000000000000664460019AA66,
+    0x55555555000000000000000000000000,
+    0x55555555000155550015555555555555,
+    0x55555555555555555555555555555555,
+    0x00000000000000000000554000001555,
+    0x002AAAAAAAAAAAA80000000000000000,
+    0x00000000000000000015555555555554,
+    0x555555555555AAAAAAAAAAAAAAAAAAAA,
+    0x00000000000000000000000055555555,
+    0xAAAAAAAA000000000000000000000000,
+    0x0055555555555555555500AAAAAAAAAA,
+    0xAA2AAAAA000000000000000000000000,
+    0x014555455555554555554A2AAA2AAAAA,
+    0x0000002AAAAAAAAAAAAAAAAAAAAAAAAA,
+    0x00000015555555555555555555555555,
+    0x00000000000000005555555555555555,
+    0x5555555555555555AAAAAAAAAAAAAAAA,
+    0xAAAAAA5555555555555AAAAAAAAAAAAA,
+    0xAAAAAAAAAAAA5555555551555AAAAAAA,
+    0x54455AAAA2A82820A25555555555555A,
+    0x55555555555AAAAAAAAAAAAA55555455,
+    0x2A8A55555555555552AAA2AAA82A8A55,
+    0xAAAAAAAAAA55555555555552AAA022AA,
+    0x555AAAAAAAAAAAAA5555555555555AAA,
+    0x555555555AAAAAAAAAAAAA5555555555,
+    0xAA5555555555555AAAAAAAAAAAAA5555,
+    0xAAAAAAAA5555555555555AAAAAAAAAAA,
+    0xAAAAAAAAAAAA055555555555555AAAAA,
+    0x552AAAAAAAAAAAA55515555555555552,
+    0x555552AAAAAAAAAAAA55515555555555,
+    0x555555552AAAAAAAAAAAA55515555555,
+    0x555555555552AAAAAAAAAAAA55515555,
+    0x00000000000000000000000000655515,
+    0x00000000001554001555555555455555,
+    0x555555555555555AAAAAAAAAAAAAAAAA,
+    0x00000000000000000000000000000055,
+];
+
+/// Whether this character is a nonspacing or enclosing mark.
+pub fn is_nonspacing_mark(c: char) -> bool {
+    const BOTTOM_BITS_MASK: u32 = !((!0_u32) << 7);
+    let cp: u32 = c.into();
+    let top_bits = cp >> 7;
+    if top_bits < 0x3D3 {
+        let leaf_idx: u8 = NONSPACING_MARKS_ROOT[usize::try_from(top_bits).unwrap()];
+        let leaf = NONSPACING_MARKS_LEAVES[usize::from(leaf_idx)];
+        (leaf >> (cp & BOTTOM_BITS_MASK)) & 1 == 1
+    } else {
+        matches!(cp, 0x0E0100..=0x0E01EF)
+    }
+}
+
+static NONSPACING_MARKS_ROOT: [u8; 979] = [
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x02, 0x00, 0x03, 0x04, 0x05, 0x06, 0x07,
+    0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+    0x18, 0x19, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1B, 0x1C,
+    0x1D, 0x1E, 0x1F, 0x00, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x26, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x27, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x28, 0x29, 0x2A, 0x00, 0x00, 0x00, 0x00,
+    0x2B, 0x2C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2D, 0x2E, 0x00, 0x00,
+    0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, 0x00, 0x35, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x38, 0x00, 0x39, 0x3A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x3B, 0x3C, 0x00, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x3E, 0x3F, 0x40,
+    0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x00, 0x48, 0x49, 0x00, 0x4A, 0x4B, 0x4C, 0x4D, 0x00,
+    0x4E, 0x00, 0x4F, 0x50, 0x51, 0x52, 0x00, 0x00, 0x53, 0x54, 0x55, 0x56, 0x00, 0x57, 0x58, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x5A, 0x5B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5C, 0x5D,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5F, 0x00,
+    0x00, 0x00, 0x60, 0x61, 0x62, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x63, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x65, 0x66, 0x5B, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x69, 0x6A,
+];
+
+static NONSPACING_MARKS_LEAVES: [u128; 107] = [
+    0x00000000000000000000000000000000,
+    0x0000FFFFFFFFFFFFFFFFFFFFFFFFFFFF,
+    0x000000000000000000000000000003F8,
+    0x00000000000000B6BFFFFFFFFFFE0000,
+    0x00010000FFFFF8000000000007FF0000,
+    0x00003D9F9FC000000000000000000000,
+    0x00000000000007FFFFFF000000020000,
+    0x200FF800000000000001FFC000000000,
+    0x000000000E00000000003EEFFBC00000,
+    0xFFFFFFFBFFFFFC0000000000FF000000,
+    0x0000000C00FE21FE1400000000000007,
+    0x4000000C0000201E1000000000000002,
+    0x00230000000239861000000000000006,
+    0xFC00000C000021BE1000000000000006,
+    0x0000000C0060201E9000000000000002,
+    0x00000000000020010000000000000004,
+    0x0000000C00603DC1D000000000000011,
+    0x0000000C000030409000000000000002,
+    0x0000000C0000201E1800000000000003,
+    0x00000000005C04000000000000000002,
+    0x0000000000007F8007F2000000000000,
+    0x0000000000007F001FF2000000000000,
+    0x7FFE00000000000002A0000003000000,
+    0x00000000000000401FFFFFFFFEFFE0DF,
+    0x001E0001C300000066FDE00000000000,
+    0x00000000000000000000000020002064,
+    0x00000000E00000000000000000000000,
+    0x000C0000000C0000000C0000001C0000,
+    0x00000000200FFE403FB0000000000000,
+    0x0000000000000000000000000000B800,
+    0x00000000000000000000020000000060,
+    0x00000000000000000E04018700000000,
+    0x9FF81FE57F4000000000000009800000,
+    0x0000000000007FFFFFFF000000000000,
+    0x000FF8000000000417D000000000000F,
+    0x0003A3400000000000003B3C00000003,
+    0x000000000000000000CFF00000000000,
+    0x031021FDFFF700000000000000000000,
+    0xFFFFFFFFFFFFFFFF0000000000000000,
+    0x0001FFFFFFFF00000000000000000000,
+    0x00038000000000000000000000000000,
+    0x80000000000000000000000000000000,
+    0xFFFFFFFF000000000000000000000000,
+    0x000000000000000000003C0000000000,
+    0x00000000000000000000000006000000,
+    0x3FF78000000000000000000000000000,
+    0x000300000000000000000000C0000000,
+    0x00000000000000000000106000000844,
+    0x8003FFFF000000300000000000000000,
+    0x000000000003FF8000003FC000000000,
+    0x000000200000000033C8000000000007,
+    0x100000000000100800667E0000000000,
+    0x0040300000000002C19D000000000000,
+    0x00002120000000000000000000000000,
+    0x00000000000000000000000040000000,
+    0x00000000000000000000FFFF0000FFFF,
+    0x20000000000000000000000000000000,
+    0x00000001000000000000000000000000,
+    0x07C00000000000000000000000000000,
+    0x0000000000000000870000000000F06E,
+    0x00000060000000000000000000000000,
+    0x0000000000000000000000F000000000,
+    0xE0000000000000000000180000000000,
+    0x000000000001FFC00000000000000000,
+    0x0000000000000000000000000000003C,
+    0x801900000000007FFF00000000000002,
+    0x00000000000000040678000000000003,
+    0x0008000000000000001FEF8000000007,
+    0x0000000000009E007FC0000000000003,
+    0x000000000000000240D3800000000000,
+    0x000007F8800000000000000000000000,
+    0x001F1FC0000000011800000000000003,
+    0x000000004000005CFF00000000000000,
+    0x000000000000000D85F8000000000000,
+    0x0000000030000001B03C000000000000,
+    0x0000000000000001A7F8000000000000,
+    0x000000000000000000BF280000000000,
+    0x000000000000000000000FBCE0000000,
+    0x000000000000000006FF800000000000,
+    0x00000000000000085800000000000000,
+    0x000000010CF000000000000000000000,
+    0x000000000E7E008079F80000000007FE,
+    0x000000000000000000000000037FFC00,
+    0x0000000000000000BF7F000000000000,
+    0x0000000000000000006DFCFFFFFC0000,
+    0x00000000000000BFB47E000000000000,
+    0x00000000000000000000000000A30000,
+    0x00180000000000000000000000000000,
+    0x000000000000000507C0000000000003,
+    0x00000000003FFF810000000000000000,
+    0x001F0000000000000000000000000000,
+    0x0000000000000000007F000000000000,
+    0x00000000000080000000000000000000,
+    0x00000010000000000000000000078000,
+    0x00000000000000000000000060000000,
+    0x000000000000007FFFFF3FFFFFFFFFFF,
+    0xF8000380000000000000000000000000,
+    0x000000000000000000003C0000000FE7,
+    0x000000000000001C0000000000000000,
+    0x00201FFFFFFFFFFFF87FFFFFFFFFFFFF,
+    0x00000000000000000000FFFEF8000010,
+    0x0000000000000000000007DBF9FFFF7F,
+    0x00000000000000000000000000008000,
+    0x0000F000000000000000400000000000,
+    0x0000F000000000000000000000000000,
+    0x00000000007F00000000000000000000,
+    0x00000000000007F00000000000000000,
+];
+
+use core::{
+    fmt::{self, Write},
+    iter,
+};
+
+#[derive(Clone, Debug)]
+pub enum ToTitlecase {
+    Zero,
+    One(char),
+    Two(char, char),
+    Three(char, char, char),
+}
+
+impl Iterator for ToTitlecase {
+    type Item = char;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match *self {
+            Self::Zero => None,
+            Self::One(c) => {
+                *self = Self::Zero;
+                Some(c)
+            }
+            Self::Two(b, c) => {
+                *self = Self::One(c);
+                Some(b)
+            }
+            Self::Three(a, b, c) => {
+                *self = Self::Two(b, c);
+                Some(a)
+            }
+        }
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        let size = match self {
+            Self::Zero => 0,
+            Self::One(_) => 1,
+            Self::Two(..) => 2,
+            Self::Three(..) => 3,
+        };
+        (size, Some(size))
+    }
+}
+
+impl iter::ExactSizeIterator for ToTitlecase {}
+
+impl iter::FusedIterator for ToTitlecase {}
+
+impl fmt::Display for ToTitlecase {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        for c in self.clone() {
+            f.write_char(c)?;
+        }
+        Ok(())
+    }
+}
+
+/// Returns an iterator that yields the titlecase mapping of this `char` as one or more `char`s.
+pub fn to_titlecase(c: char) -> ToTitlecase {
+    // ASCII fast path
+    if c.is_ascii() {
+        ToTitlecase::One(c.to_ascii_uppercase())
+    } else if let Ok(idx) = TITLECASE_MAPPINGS.binary_search_by_key(&c, |&(c2, _)| c2) {
+        match TITLECASE_MAPPINGS[idx].1 {
+            [None, ..] => ToTitlecase::Zero,
+            [Some(a), None, ..] => ToTitlecase::One(a),
+            [Some(a), Some(b), None] => ToTitlecase::Two(a, b),
+            [Some(a), Some(b), Some(c)] => ToTitlecase::Three(a, b, c),
+        }
+    } else {
+        let mut uppercase = c.to_uppercase();
+        match uppercase.size_hint().0 {
+            0 => ToTitlecase::Zero,
+            1 => ToTitlecase::One(uppercase.next().unwrap()),
+            2 => ToTitlecase::Two(uppercase.next().unwrap(), uppercase.next().unwrap()),
+            3 => ToTitlecase::Three(
+                uppercase.next().unwrap(),
+                uppercase.next().unwrap(),
+                uppercase.next().unwrap(),
+            ),
+            _ => unreachable!(),
+        }
+    }
+}
+
+/// Sorted list of characters and their titlecase mappings.
+/// Only characters whose titlecase differs from uppercase are included.
+static TITLECASE_MAPPINGS: [(char, [Option<char>; 3]); 135] = [
+    ('ß', [Some('S'), Some('s'), None]),
+    ('Ǆ', [Some('ǅ'), None, None]),
+    ('ǅ', [Some('ǅ'), None, None]),
+    ('ǆ', [Some('ǅ'), None, None]),
+    ('Ǉ', [Some('ǈ'), None, None]),
+    ('ǈ', [Some('ǈ'), None, None]),
+    ('ǉ', [Some('ǈ'), None, None]),
+    ('Ǌ', [Some('ǋ'), None, None]),
+    ('ǋ', [Some('ǋ'), None, None]),
+    ('ǌ', [Some('ǋ'), None, None]),
+    ('Ǳ', [Some('ǲ'), None, None]),
+    ('ǲ', [Some('ǲ'), None, None]),
+    ('ǳ', [Some('ǲ'), None, None]),
+    ('և', [Some('Ե'), Some('ւ'), None]),
+    ('ა', [Some('ა'), None, None]),
+    ('ბ', [Some('ბ'), None, None]),
+    ('გ', [Some('გ'), None, None]),
+    ('დ', [Some('დ'), None, None]),
+    ('ე', [Some('ე'), None, None]),
+    ('ვ', [Some('ვ'), None, None]),
+    ('ზ', [Some('ზ'), None, None]),
+    ('თ', [Some('თ'), None, None]),
+    ('ი', [Some('ი'), None, None]),
+    ('კ', [Some('კ'), None, None]),
+    ('ლ', [Some('ლ'), None, None]),
+    ('მ', [Some('მ'), None, None]),
+    ('ნ', [Some('ნ'), None, None]),
+    ('ო', [Some('ო'), None, None]),
+    ('პ', [Some('პ'), None, None]),
+    ('ჟ', [Some('ჟ'), None, None]),
+    ('რ', [Some('რ'), None, None]),
+    ('ს', [Some('ს'), None, None]),
+    ('ტ', [Some('ტ'), None, None]),
+    ('უ', [Some('უ'), None, None]),
+    ('ფ', [Some('ფ'), None, None]),
+    ('ქ', [Some('ქ'), None, None]),
+    ('ღ', [Some('ღ'), None, None]),
+    ('ყ', [Some('ყ'), None, None]),
+    ('შ', [Some('შ'), None, None]),
+    ('ჩ', [Some('ჩ'), None, None]),
+    ('ც', [Some('ც'), None, None]),
+    ('ძ', [Some('ძ'), None, None]),
+    ('წ', [Some('წ'), None, None]),
+    ('ჭ', [Some('ჭ'), None, None]),
+    ('ხ', [Some('ხ'), None, None]),
+    ('ჯ', [Some('ჯ'), None, None]),
+    ('ჰ', [Some('ჰ'), None, None]),
+    ('ჱ', [Some('ჱ'), None, None]),
+    ('ჲ', [Some('ჲ'), None, None]),
+    ('ჳ', [Some('ჳ'), None, None]),
+    ('ჴ', [Some('ჴ'), None, None]),
+    ('ჵ', [Some('ჵ'), None, None]),
+    ('ჶ', [Some('ჶ'), None, None]),
+    ('ჷ', [Some('ჷ'), None, None]),
+    ('ჸ', [Some('ჸ'), None, None]),
+    ('ჹ', [Some('ჹ'), None, None]),
+    ('ჺ', [Some('ჺ'), None, None]),
+    ('ჽ', [Some('ჽ'), None, None]),
+    ('ჾ', [Some('ჾ'), None, None]),
+    ('ჿ', [Some('ჿ'), None, None]),
+    ('ᾀ', [Some('ᾈ'), None, None]),
+    ('ᾁ', [Some('ᾉ'), None, None]),
+    ('ᾂ', [Some('ᾊ'), None, None]),
+    ('ᾃ', [Some('ᾋ'), None, None]),
+    ('ᾄ', [Some('ᾌ'), None, None]),
+    ('ᾅ', [Some('ᾍ'), None, None]),
+    ('ᾆ', [Some('ᾎ'), None, None]),
+    ('ᾇ', [Some('ᾏ'), None, None]),
+    ('ᾈ', [Some('ᾈ'), None, None]),
+    ('ᾉ', [Some('ᾉ'), None, None]),
+    ('ᾊ', [Some('ᾊ'), None, None]),
+    ('ᾋ', [Some('ᾋ'), None, None]),
+    ('ᾌ', [Some('ᾌ'), None, None]),
+    ('ᾍ', [Some('ᾍ'), None, None]),
+    ('ᾎ', [Some('ᾎ'), None, None]),
+    ('ᾏ', [Some('ᾏ'), None, None]),
+    ('ᾐ', [Some('ᾘ'), None, None]),
+    ('ᾑ', [Some('ᾙ'), None, None]),
+    ('ᾒ', [Some('ᾚ'), None, None]),
+    ('ᾓ', [Some('ᾛ'), None, None]),
+    ('ᾔ', [Some('ᾜ'), None, None]),
+    ('ᾕ', [Some('ᾝ'), None, None]),
+    ('ᾖ', [Some('ᾞ'), None, None]),
+    ('ᾗ', [Some('ᾟ'), None, None]),
+    ('ᾘ', [Some('ᾘ'), None, None]),
+    ('ᾙ', [Some('ᾙ'), None, None]),
+    ('ᾚ', [Some('ᾚ'), None, None]),
+    ('ᾛ', [Some('ᾛ'), None, None]),
+    ('ᾜ', [Some('ᾜ'), None, None]),
+    ('ᾝ', [Some('ᾝ'), None, None]),
+    ('ᾞ', [Some('ᾞ'), None, None]),
+    ('ᾟ', [Some('ᾟ'), None, None]),
+    ('ᾠ', [Some('ᾨ'), None, None]),
+    ('ᾡ', [Some('ᾩ'), None, None]),
+    ('ᾢ', [Some('ᾪ'), None, None]),
+    ('ᾣ', [Some('ᾫ'), None, None]),
+    ('ᾤ', [Some('ᾬ'), None, None]),
+    ('ᾥ', [Some('ᾭ'), None, None]),
+    ('ᾦ', [Some('ᾮ'), None, None]),
+    ('ᾧ', [Some('ᾯ'), None, None]),
+    ('ᾨ', [Some('ᾨ'), None, None]),
+    ('ᾩ', [Some('ᾩ'), None, None]),
+    ('ᾪ', [Some('ᾪ'), None, None]),
+    ('ᾫ', [Some('ᾫ'), None, None]),
+    ('ᾬ', [Some('ᾬ'), None, None]),
+    ('ᾭ', [Some('ᾭ'), None, None]),
+    ('ᾮ', [Some('ᾮ'), None, None]),
+    ('ᾯ', [Some('ᾯ'), None, None]),
+    ('ᾲ', [Some('Ὰ'), Some('ͅ'), None]),
+    ('ᾳ', [Some('ᾼ'), None, None]),
+    ('ᾴ', [Some('Ά'), Some('ͅ'), None]),
+    ('ᾷ', [Some('Α'), Some('͂'), Some('ͅ')]),
+    ('ᾼ', [Some('ᾼ'), None, None]),
+    ('ῂ', [Some('Ὴ'), Some('ͅ'), None]),
+    ('ῃ', [Some('ῌ'), None, None]),
+    ('ῄ', [Some('Ή'), Some('ͅ'), None]),
+    ('ῇ', [Some('Η'), Some('͂'), Some('ͅ')]),
+    ('ῌ', [Some('ῌ'), None, None]),
+    ('ῲ', [Some('Ὼ'), Some('ͅ'), None]),
+    ('ῳ', [Some('ῼ'), None, None]),
+    ('ῴ', [Some('Ώ'), Some('ͅ'), None]),
+    ('ῷ', [Some('Ω'), Some('͂'), Some('ͅ')]),
+    ('ῼ', [Some('ῼ'), None, None]),
+    ('ﬀ', [Some('F'), Some('f'), None]),
+    ('ﬁ', [Some('F'), Some('i'), None]),
+    ('ﬂ', [Some('F'), Some('l'), None]),
+    ('ﬃ', [Some('F'), Some('f'), Some('i')]),
+    ('ﬄ', [Some('F'), Some('f'), Some('l')]),
+    ('ﬅ', [Some('S'), Some('t'), None]),
+    ('ﬆ', [Some('S'), Some('t'), None]),
+    ('ﬓ', [Some('Մ'), Some('ն'), None]),
+    ('ﬔ', [Some('Մ'), Some('ե'), None]),
+    ('ﬕ', [Some('Մ'), Some('ի'), None]),
+    ('ﬖ', [Some('Վ'), Some('ն'), None]),
+    ('ﬗ', [Some('Մ'), Some('խ'), None]),
+];
diff --git a/src/title.rs b/src/title.rs
index 2453430..cc22971 100644
--- a/src/title.rs
+++ b/src/title.rs
@@ -5,7 +5,7 @@ use alloc::{
     string::{String, ToString},
 };
 
-use crate::{capitalize, transform};
+use crate::{titlecase, transform};
 
 /// This trait defines a title case conversion.
 ///
@@ -45,7 +45,7 @@ pub struct AsTitleCase<T: AsRef<str>>(pub T);
 
 impl<T: AsRef<str>> fmt::Display for AsTitleCase<T> {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        transform(self.0.as_ref(), capitalize, |f| write!(f, " "), f)
+        transform(self.0.as_ref(), titlecase, |f| write!(f, " "), f)
     }
 }
 
diff --git a/src/train.rs b/src/train.rs
index 4fcc195..cc2f873 100644
--- a/src/train.rs
+++ b/src/train.rs
@@ -2,7 +2,7 @@ use core::fmt;
 
 use alloc::{borrow::ToOwned, string::ToString};
 
-use crate::{capitalize, transform};
+use crate::{titlecase, transform};
 
 /// This trait defines a train case conversion.
 ///
@@ -42,7 +42,7 @@ pub struct AsTrainCase<T: AsRef<str>>(pub T);
 
 impl<T: AsRef<str>> fmt::Display for AsTrainCase<T> {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        transform(self.0.as_ref(), capitalize, |f| write!(f, "-"), f)
+        transform(self.0.as_ref(), titlecase, |f| write!(f, "-"), f)
     }
 }
 
@@ -67,21 +67,50 @@ mod tests {
     t!(test6: "SHOUTY_SNAKE_CASE" => "Shouty-Snake-Case");
     t!(test7: "snake_case" => "Snake-Case");
     t!(test8: "this-contains_ ALLKinds OfWord_Boundaries" => "This-Contains-All-Kinds-Of-Word-Boundaries");
-    #[cfg(feature = "unicode")]
     t!(test9: "XΣXΣ baﬄe" => "Xσxς-Baﬄe");
     t!(test10: "XMLHttpRequest" => "Xml-Http-Request");
     t!(test11: "FIELD_NAME11" => "Field-Name11");
-    t!(test12: "99BOTTLES" => "99bottles");
+    t!(test12: "99BOTTLES" => "99Bottles");
     t!(test13: "FieldNamE11" => "Field-Nam-E11");
     t!(test14: "abc123def456" => "Abc123def456");
-    t!(test16: "abc123DEF456" => "Abc123-Def456");
+    t!(test16: "abc123DEF456" => "Abc123def456");
     t!(test17: "abc123Def456" => "Abc123-Def456");
-    t!(test18: "abc123DEf456" => "Abc123-D-Ef456");
+    t!(test18: "abc123DEf456" => "Abc123d-Ef456");
     t!(test19: "ABC123def456" => "Abc123def456");
     t!(test20: "ABC123DEF456" => "Abc123def456");
     t!(test21: "ABC123Def456" => "Abc123-Def456");
     t!(test22: "ABC123DEf456" => "Abc123d-Ef456");
-    t!(test23: "ABC123dEEf456FOO" => "Abc123d-E-Ef456-Foo");
+    t!(test23: "ABC123dEEf456FOO" => "Abc123d-E-Ef456foo");
     t!(test24: "abcDEF" => "Abc-Def");
     t!(test25: "ABcDE" => "A-Bc-De");
+    t!(test26: "ǄO" => "ǅo");
+    t!(test27: "ǆO" => "ǅ-O");
+    t!(test28: "ǆo" => "ǅo");
+    t!(test29: "∇𝐀" => "∇𝐀");
+    t!(test30: "∇𝔞" => "∇𝔞");
+    t!(test31: "𝔞" => "𝔞");
+    t!(test32: "🐈‍⬛🐈" => "\u{200d}");
+    t!(test33: "🐈‍⬛🐈a" => "\u{200d}-A");
+    t!(test34: "A🐈‍⬛🐈a" => "A-\u{200D}-A");
+    t!(test35: "☕" => "");
+    t!(test36: "a*️⃣b" => "A-\u{fe0f}-B");
+    t!(test37: "a*b" => "A-B");
+    t!(test38: "\u{0301}a" => "\u{0301}A");
+    t!(test39: "a\u{0301}B" => "A\u{0301}-B");
+    t!(test40: "ﬄololo" => "Fflololo");
+
+    t!(uts55_test1: "TypeII" => "Type-Ii");
+    t!(uts55_test2: "OCaml" => "O-Caml");
+    t!(uts55_test3: "HTTPЗапрос" => "Http-Запрос");
+    t!(uts55_test4: "UAX9ClauseHL4" => "Uax9-Clause-Hl4");
+    t!(uts55_test5: "LOUD_SNAKE" => "Loud-Snake");
+
+    t!(uts55_test6: "Fancy_Snake" => "Fancy-Snake");
+    t!(uts55_test7: "snake-kebab" => "Snake-Kebab");
+    t!(uts55_test8: "Paral·lel" => "Paral·lel");
+    t!(uts55_test9: "microB" => "Micro-B");
+    t!(uts55_test10: "microᖯ" => "Microᖯ");
+    t!(uts55_test11: "HTTPसर्वर" => "Httpसर्वर");
+    t!(uts55_test12: "dromedaryCamel" => "Dromedary-Camel");
+    t!(uts55_test13: "snakeELEPHANTSnake" => "Snake-Elephant-Snake");
 }
diff --git a/src/upper_camel.rs b/src/upper_camel.rs
index c6f29df..f8980f9 100644
--- a/src/upper_camel.rs
+++ b/src/upper_camel.rs
@@ -5,7 +5,7 @@ use alloc::{
     string::{String, ToString},
 };
 
-use crate::{capitalize, transform};
+use crate::{titlecase, transform};
 
 /// This trait defines an upper camel case conversion.
 ///
@@ -58,7 +58,7 @@ pub struct AsUpperCamelCase<T: AsRef<str>>(pub T);
 
 impl<T: AsRef<str>> fmt::Display for AsUpperCamelCase<T> {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        transform(self.0.as_ref(), capitalize, |_| Ok(()), f)
+        transform(self.0.as_ref(), titlecase, |_| Ok(()), f)
     }
 }
 
diff --git a/tables/Cargo.toml b/tables/Cargo.toml
new file mode 100644
index 0000000..8897a01
--- /dev/null
+++ b/tables/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+name = "tables"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+bitvec = "1.0.1"
+regex = "1.10.3"
+reqwest = { version = "0.11", features = ["blocking"] }
+rustc-hash = "1.1.0"
diff --git a/tables/src/allowed_in_word.rs b/tables/src/allowed_in_word.rs
new file mode 100644
index 0000000..e89c420
--- /dev/null
+++ b/tables/src/allowed_in_word.rs
@@ -0,0 +1,221 @@
+//! Construct a lookup table to find whether a particular character is allowed in words.
+
+use std::{
+    any::type_name,
+    collections::hash_map,
+    error::Error,
+    io::{self, Read},
+    mem::size_of,
+};
+
+use bitvec::prelude::*;
+use rustc_hash::FxHashMap;
+
+use crate::unicode_data::{set_by_general_category, set_by_prop, CodepointBitArr, DataFiles};
+
+/// Change this to u128 for wider leaves
+type LeafElement = u64;
+
+const ENTRIES_PER_LEAF: usize = LeafElement::BITS as usize;
+
+/// `true` for all punctuation other than `Other_Punctuation`
+/// (`[\p{Punctuation}-\p{Other_Punctuation}]`)
+fn punctuation_non_other(data: &DataFiles) -> CodepointBitArr {
+    let mut arr = BitArray::ZERO;
+    set_by_general_category(&mut arr, data, "Pc|Pd|Ps|Pe|Pi|Pf", true);
+    arr
+}
+
+/// `true` for all unassigned and private use characters
+fn unassigned_private_use(data: &DataFiles) -> CodepointBitArr {
+    let mut arr = BitArray::ZERO;
+    set_by_general_category(&mut arr, data, "[A-Za-z]+", true);
+    set_by_general_category(&mut arr, data, "Cn|Co", false);
+    !arr
+}
+
+/// `true` for all codepoints that can be part of a word:
+/// `[\p{ID_Continue}\p{ID_Compat_Math_Continue}\p{Cn}\p{Co}\p{Alphabetic}\p{N}-[\p{P}-\p{Po}]]`,
+/// plus the extra characters listed below.
+pub fn allowed_in_word(data: &DataFiles) -> CodepointBitArr {
+    let mut word_component = unassigned_private_use(data);
+
+    set_by_prop(
+        &mut word_component,
+        &data.derived_core_properties,
+        "ID_Continue|Alphabetic",
+        true,
+    );
+
+    set_by_general_category(&mut word_component, data, "Nd|Nl|No", true);
+
+    set_by_prop(
+        &mut word_component,
+        &data.prop_list,
+        "ID_Compat_Math_Continue",
+        true,
+    );
+
+    // Choose from characters in https://www.unicode.org/reports/tr31/#Specific_Character_Adjustments
+    // that are not Punctuation other than Other_Punctuation
+    // (U+00B7 is already in ID_Continue).
+    for cp in [
+        0x05F3, // HEBREW PUNCTUATION GERESH https://en.wikipedia.org/wiki/Geresh
+        0x05F4, // HEBREW PUNCTUATION GERSHAYIM https://en.wikipedia.org/wiki/Gershayim
+        0x0F0B, // TIBETAN MARK INTERSYLLABIC TSHEG https://w3c.github.io/tlreq/#language_overview
+    ] {
+        word_component.set(cp, true);
+    }
+
+    word_component &= !punctuation_non_other(data);
+
+    word_component
+}
+
+fn build_tree(allowed_in_word: &BitSlice) -> (Vec<u8>, Vec<LeafElement>) {
+    let mut chunk_to_leaf_idx_map: FxHashMap<LeafElement, u8> = FxHashMap::from_iter([(!0, 0)]);
+    let mut root = Vec::with_capacity(allowed_in_word.len().div_ceil(ENTRIES_PER_LEAF));
+    let mut leaves = vec![!0];
+    let chunks_iter = allowed_in_word.chunks_exact(ENTRIES_PER_LEAF);
+    assert!(chunks_iter.remainder().is_empty());
+    let mut chunks_iter = chunks_iter.map(|l| {
+        LeafElement::from_le_bytes(
+            l.bytes()
+                .collect::<Result<Vec<_>, _>>()
+                .unwrap()
+                .try_into()
+                .unwrap(),
+        )
+    });
+    for chunk in &mut chunks_iter {
+        match chunk_to_leaf_idx_map.entry(chunk) {
+            hash_map::Entry::Occupied(o) => {
+                root.push(*o.get());
+            }
+            hash_map::Entry::Vacant(v) => {
+                let new_index = u8::try_from(leaves.len()).expect("too many leaves");
+                v.insert(new_index);
+                root.push(new_index);
+                leaves.push(chunk);
+            }
+        }
+    }
+    (root, leaves)
+}
+
+fn list_of_ranges(cps: impl Iterator<Item = u32>) -> Vec<(u32, u32)> {
+    let mut vec = Vec::new();
+    for cp in cps {
+        if let Some((_, prev)) = vec.last_mut() {
+            if *prev + 1 == cp {
+                *prev = cp;
+                continue;
+            }
+        }
+        vec.push((cp, cp))
+    }
+    vec
+}
+
+pub fn write_table(
+    out: &mut impl io::Write,
+    allowed_in_word: &CodepointBitArr,
+) -> Result<(), Box<dyn Error>> {
+    let bits_to_shift = ENTRIES_PER_LEAF.ilog2();
+
+    let first_cp_not_in_tree =
+        (allowed_in_word[..0x40000].last_zero().unwrap() + 1).next_multiple_of(ENTRIES_PER_LEAF);
+
+    let first_cp_not_in_tree_shifted = first_cp_not_in_tree >> bits_to_shift;
+
+    writeln!(
+        out,
+        "/// Whether this character can be part of a word.
+pub fn allowed_in_word(c: char) -> bool {{
+    const BOTTOM_BITS_MASK: u32 = !((!0_u32) << {bits_to_shift});
+    let cp: u32 = c.into();
+    let top_bits = cp >> {bits_to_shift};
+    if top_bits < 0x{first_cp_not_in_tree_shifted:X} {{
+        let leaf_idx: u8 = ALLOWED_IN_WORD_ROOT[usize::try_from(top_bits).unwrap()];
+        let leaf = ALLOWED_IN_WORD_LEAVES[usize::from(leaf_idx)];
+        (leaf >> (cp & BOTTOM_BITS_MASK)) & 1 == 1
+    }} else {{"
+    )?;
+
+    let mut late_zeros = list_of_ranges(
+        allowed_in_word[first_cp_not_in_tree..]
+            .iter_zeros()
+            .map(|n| u32::try_from(n + first_cp_not_in_tree).unwrap()),
+    )
+    .into_iter();
+    if let Some(first_late_zero) = late_zeros.next() {
+        write!(out, "        !matches!(cp, 0x{:06X}", first_late_zero.0)?;
+        if first_late_zero.0 != first_late_zero.1 {
+            write!(out, "..=0x{:06X}", first_late_zero.1)?;
+        }
+        for late_zero in late_zeros {
+            write!(out, " | 0x{:06X}", late_zero.0)?;
+            if late_zero.0 != late_zero.1 {
+                write!(out, "..=0x{:06X}", late_zero.1)?;
+            }
+        }
+        writeln!(out, ")")?;
+    } else {
+        writeln!(out, "true")?;
+    }
+
+    writeln!(
+        out,
+        "    }}
+}}",
+    )?;
+
+    let (root, leaves) = build_tree(&allowed_in_word[..first_cp_not_in_tree]);
+    eprintln!(
+        "allowed_in_words: {} bytes of static data",
+        root.len() + leaves.len() * size_of::<LeafElement>()
+    );
+
+    write!(
+        out,
+        "\nstatic ALLOWED_IN_WORD_ROOT: [u8; {}] = [",
+        root.len()
+    )?;
+
+    for line in root.chunks(16) {
+        write!(out, "\n   ")?;
+        for byte in line {
+            write!(out, " 0x{byte:02X},")?;
+        }
+    }
+
+    writeln!(
+        out,
+        "\n];
+
+static ALLOWED_IN_WORD_LEAVES: [{}; {}] = [",
+        type_name::<LeafElement>(),
+        leaves.len()
+    )?;
+
+    for leaf in leaves {
+        writeln!(out, "    0x{leaf:016X},")?;
+    }
+    writeln!(
+        out,
+        "];
+
+#[cfg(test)]
+#[test]
+fn test_allowed_in_words_casing_closure() {{
+    for c in '\\0'..=char::MAX {{
+        if allowed_in_word(c) {{
+            assert!(c.to_uppercase().all(allowed_in_word));
+            assert!(c.to_lowercase().all(allowed_in_word));
+        }}
+    }}
+}}"
+    )?;
+
+    Ok(())
+}
diff --git a/tables/src/letter_casing.rs b/tables/src/letter_casing.rs
new file mode 100644
index 0000000..13a3409
--- /dev/null
+++ b/tables/src/letter_casing.rs
@@ -0,0 +1,199 @@
+//! Construct a lookup table for the casing status of a letter
+//! (lowercase, uppercase, or titlecase).
+//!
+//! This table only concerns itself with letters:
+//! for obtaining the case of characters which are not letters,
+//! use the functions from `core`.
+
+use std::{any::type_name, collections::hash_map, error::Error, io, mem::size_of};
+
+use bitvec::prelude::*;
+use rustc_hash::FxHashMap;
+
+use crate::unicode_data::{set_by_general_category, set_by_prop, CodepointBitArr, DataFiles};
+
+/// Change this to u64 for smaller leaves
+type LeafElement = u128;
+
+const ENTRIES_PER_LEAF: usize = LeafElement::BITS as usize / 2;
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+#[repr(u8)]
+pub enum CasedLetterKind {
+    Lowercase = 1,
+    Uppercase = 2,
+    Titlecase = 3,
+}
+
+pub fn letter_casing(
+    data: &DataFiles,
+    word_components: &BitSlice,
+) -> (Vec<Option<CasedLetterKind>>, Vec<u32>) {
+    let mut lowercase = CodepointBitArr::ZERO;
+    set_by_general_category(&mut lowercase, data, "Ll", true);
+    lowercase &= word_components;
+    let mut uppercase = CodepointBitArr::ZERO;
+    set_by_general_category(&mut uppercase, data, "Lu", true);
+    uppercase &= word_components;
+    let mut titlecase = CodepointBitArr::ZERO;
+    set_by_general_category(&mut titlecase, data, "Lt", true);
+    titlecase &= word_components;
+
+    let last = [
+        lowercase.last_one(),
+        uppercase.last_one(),
+        titlecase.last_one(),
+    ]
+    .into_iter()
+    .max()
+    .flatten()
+    .unwrap();
+
+    let mut casing_vec = vec![None; last + 1];
+    for cp in lowercase.iter_ones() {
+        casing_vec[cp] = Some(CasedLetterKind::Lowercase);
+    }
+    for cp in uppercase.iter_ones() {
+        casing_vec[cp] = Some(CasedLetterKind::Uppercase);
+    }
+    for cp in titlecase.iter_ones() {
+        casing_vec[cp] = Some(CasedLetterKind::Titlecase);
+    }
+
+    set_by_prop(&mut titlecase, &data.scripts, "Greek", false);
+
+    (
+        casing_vec,
+        titlecase
+            .iter_ones()
+            .map(|cp| u32::try_from(cp).unwrap())
+            .collect(),
+    )
+}
+
+fn build_casing_tree(casings_list: &[Option<CasedLetterKind>]) -> (Vec<u8>, Vec<LeafElement>) {
+    let mut chunk_to_leaf_idx_map: FxHashMap<LeafElement, u8> = FxHashMap::from_iter([(!0, 0)]);
+    let mut root = Vec::with_capacity(casings_list.len().div_ceil(ENTRIES_PER_LEAF));
+    let mut leaves = Vec::new();
+    let chunks_iter = casings_list.chunks_exact(ENTRIES_PER_LEAF);
+    assert!(chunks_iter.remainder().is_empty());
+    let mut chunks_iter = chunks_iter.map(|c| {
+        let mut chunk_uint: LeafElement = 0;
+        for (index, elem) in c.iter().copied().enumerate() {
+            let bits = elem.map_or(0, |k| k as u8);
+            chunk_uint |= LeafElement::from(bits) << (index * 2);
+        }
+        chunk_uint
+    });
+    for chunk in &mut chunks_iter {
+        match chunk_to_leaf_idx_map.entry(chunk) {
+            hash_map::Entry::Occupied(o) => {
+                root.push(*o.get());
+            }
+            hash_map::Entry::Vacant(v) => {
+                let new_index = u8::try_from(leaves.len()).expect("too many leaves");
+                v.insert(new_index);
+                root.push(new_index);
+                leaves.push(chunk);
+            }
+        }
+    }
+    (root, leaves)
+}
+
+pub fn write_table(
+    out: &mut impl io::Write,
+    data: &DataFiles,
+    allowed_in_word: &CodepointBitArr,
+) -> Result<(), Box<dyn Error>> {
+    let (mut casing_vec, non_greek) = letter_casing(data, allowed_in_word);
+
+    let bits_to_shift = ENTRIES_PER_LEAF.ilog2();
+
+    let first_cp_not_in_tree = casing_vec.len().next_multiple_of(ENTRIES_PER_LEAF);
+    for _ in casing_vec.len()..first_cp_not_in_tree {
+        casing_vec.push(None);
+    }
+
+    let first_cp_not_in_tree_shifted = first_cp_not_in_tree >> bits_to_shift;
+
+    let mut non_greek = non_greek.into_iter();
+
+    write!(
+        out,
+        "
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+#[repr(u8)]
+pub enum CasedLetterKind {{
+    Lowercase = 1,
+    Uppercase = 2,
+    Titlecase = 3,
+}}
+
+/// The case of this letter, or `None` if it is not a cased letter.
+pub fn letter_casing(c: char) -> Option<CasedLetterKind> {{
+    const BOTTOM_BITS_MASK: u32 = !((!0_u32) << {bits_to_shift});
+    let cp: u32 = c.into();
+    let top_bits = cp >> {bits_to_shift};
+    if top_bits < 0x{first_cp_not_in_tree_shifted:X} {{
+        let leaf_idx: u8 = LETTER_CASING_ROOT[usize::try_from(top_bits).unwrap()];
+        let leaf = LETTER_CASING_LEAVES[usize::from(leaf_idx)];
+        match (leaf >> ((cp & BOTTOM_BITS_MASK) * 2)) & 3 {{
+            0 => None,
+            1 => Some(CasedLetterKind::Lowercase),
+            2 => Some(CasedLetterKind::Uppercase),
+            3 => Some(CasedLetterKind::Titlecase),
+            _ => unreachable!(),
+        }}
+    }} else {{
+        None
+    }}
+}}
+
+/// Whether the character is a non-Greek titlecase letter.
+pub fn is_non_greek_titlecase(c: char) -> bool {{
+    matches!(c, '\\u{{{:04X}}}'",
+        non_greek.next().unwrap()
+    )?;
+
+    for cp in non_greek {
+        write!(out, " | '\\u{{{cp:04X}}}'")?;
+    }
+
+    writeln!(
+        out,
+        ")
+}}
+"
+    )?;
+
+    let (root, leaves) = build_casing_tree(&casing_vec);
+    eprintln!(
+        "letter_casing: {} bytes of static data",
+        root.len() + leaves.len() * size_of::<LeafElement>()
+    );
+
+    write!(out, "static LETTER_CASING_ROOT: [u8; {}] = [", root.len())?;
+
+    for line in root.chunks(16) {
+        write!(out, "\n   ")?;
+        for byte in line {
+            write!(out, " 0x{byte:02X},")?;
+        }
+    }
+
+    writeln!(
+        out,
+        "\n];
+
+static LETTER_CASING_LEAVES: [{}; {}] = [",
+        type_name::<LeafElement>(),
+        leaves.len()
+    )?;
+
+    for leaf in leaves {
+        writeln!(out, "    0x{leaf:032X},")?;
+    }
+    writeln!(out, "];")?;
+    Ok(())
+}
diff --git a/tables/src/main.rs b/tables/src/main.rs
new file mode 100644
index 0000000..4dd6fea
--- /dev/null
+++ b/tables/src/main.rs
@@ -0,0 +1,51 @@
+use std::{
+    fs::OpenOptions,
+    io::{BufWriter, Write},
+};
+
+mod allowed_in_word;
+mod letter_casing;
+mod nonspacing_marks;
+mod titlecase;
+mod unicode_data;
+
+use unicode_data::data_files;
+
+/// Update this on new Unicode releases
+const UNICODE_VERSION: (u8, u8, u8) = (15, 1, 0);
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let data = data_files()?;
+
+    let mut out = BufWriter::new(
+        OpenOptions::new()
+            .write(true)
+            .create(true)
+            .truncate(true)
+            .open("../src/tables.rs")?,
+    );
+
+    writeln!(
+        &mut out,
+        "//! Automatically generated by `tables`.
+//! Do not edit manually.
+
+/// The version of Unicode that the data included with this crate is based on.
+///
+/// This crate also relies on Unicode data provided by the standard library;
+/// that data is versioned according to [`char::UNICODE_VERSION`].
+pub const UNICODE_VERSION: (u8, u8, u8) = {UNICODE_VERSION:?};
+"
+    )?;
+
+    let allowed_in_word = allowed_in_word::allowed_in_word(&data);
+    allowed_in_word::write_table(&mut out, &allowed_in_word)?;
+
+    letter_casing::write_table(&mut out, &data, &allowed_in_word)?;
+
+    nonspacing_marks::write_table(&mut out, &data)?;
+
+    titlecase::write_table(&mut out, &data)?;
+
+    Ok(())
+}
diff --git a/tables/src/nonspacing_marks.rs b/tables/src/nonspacing_marks.rs
new file mode 100644
index 0000000..5f97bb2
--- /dev/null
+++ b/tables/src/nonspacing_marks.rs
@@ -0,0 +1,172 @@
+//! Construct a lookup table to find whether a particular character is a nonspacing mark
+//! (general category `Nonspacing_Mark` or `Enclosing_Mark`).
+//! These characters are ignored when determining word boundaries.
+
+use std::{
+    any::type_name,
+    collections::hash_map,
+    error::Error,
+    io::{self, Read},
+    mem::size_of,
+};
+
+use bitvec::prelude::*;
+use rustc_hash::FxHashMap;
+
+use crate::unicode_data::{set_by_general_category, CodepointBitArr, DataFiles};
+
+type LeafElement = u128;
+const ENTRIES_PER_LEAF: usize = LeafElement::BITS as usize;
+
+fn nonspacing_marks(data: &DataFiles) -> CodepointBitArr {
+    let mut arr = BitArray::ZERO;
+    set_by_general_category(&mut arr, data, "Mn|Me", true);
+
+    arr
+}
+
+fn build_tree(nonspacing_marks: &BitSlice) -> (Vec<u8>, Vec<LeafElement>) {
+    let mut chunk_to_leaf_idx_map: FxHashMap<LeafElement, u8> = FxHashMap::from_iter([(0, 0)]);
+    let mut root = Vec::with_capacity(nonspacing_marks.len().div_ceil(ENTRIES_PER_LEAF));
+    let mut leaves = vec![0];
+    let chunks_iter = nonspacing_marks.chunks_exact(ENTRIES_PER_LEAF);
+    assert!(chunks_iter.remainder().is_empty());
+    let mut chunks_iter = chunks_iter.map(|l| {
+        LeafElement::from_le_bytes(
+            l.bytes()
+                .collect::<Result<Vec<_>, _>>()
+                .unwrap()
+                .try_into()
+                .unwrap(),
+        )
+    });
+    for chunk in &mut chunks_iter {
+        match chunk_to_leaf_idx_map.entry(chunk) {
+            hash_map::Entry::Occupied(o) => {
+                root.push(*o.get());
+            }
+            hash_map::Entry::Vacant(v) => {
+                let new_index = u8::try_from(leaves.len()).expect("too many leaves");
+                v.insert(new_index);
+                root.push(new_index);
+                leaves.push(chunk);
+            }
+        }
+    }
+    (root, leaves)
+}
+
+fn list_of_ranges(nonspacing_marks: &BitSlice, add: usize) -> Vec<(u32, u32)> {
+    let mut vec = Vec::new();
+    for i in nonspacing_marks.iter_ones() {
+        let cp = u32::try_from(i + add).unwrap();
+        if let Some((_, prev)) = vec.last_mut() {
+            if *prev + 1 == cp {
+                *prev = cp;
+                continue;
+            }
+        }
+        vec.push((cp, cp))
+    }
+    vec
+}
+
+pub fn write_table(out: &mut impl io::Write, data: &DataFiles) -> Result<(), Box<dyn Error>> {
+    let marks = nonspacing_marks(data);
+
+    let bits_to_shift = ENTRIES_PER_LEAF.ilog2();
+
+    let first_cp_not_in_tree =
+        (marks[..0x40000].last_one().unwrap() + 1).next_multiple_of(ENTRIES_PER_LEAF);
+
+    /*for i in 3..10 {
+        let entries_per_leaf: usize = 1 << i;
+        let bytes_per_leaf = entries_per_leaf / 8;
+        let first_cp_not_in_tree =
+            (marks[..0x40000].last_one().unwrap() + 1).next_multiple_of(entries_per_leaf);
+        let leaves = marks[..first_cp_not_in_tree]
+            .chunks_exact(entries_per_leaf)
+            .collect::<FxHashSet<_>>();
+        dbg!((
+            bytes_per_leaf,
+            leaves.len(),
+            leaves.len() * bytes_per_leaf + first_cp_not_in_tree / entries_per_leaf
+        ));
+    }*/
+
+    let first_cp_not_in_tree_shifted = first_cp_not_in_tree >> bits_to_shift;
+
+    writeln!(
+        out,
+        "
+/// Whether this character is a nonspacing or enclosing mark.
+pub fn is_nonspacing_mark(c: char) -> bool {{
+    const BOTTOM_BITS_MASK: u32 = !((!0_u32) << {bits_to_shift});
+    let cp: u32 = c.into();
+    let top_bits = cp >> {bits_to_shift};
+    if top_bits < 0x{first_cp_not_in_tree_shifted:X} {{
+        let leaf_idx: u8 = NONSPACING_MARKS_ROOT[usize::try_from(top_bits).unwrap()];
+        let leaf = NONSPACING_MARKS_LEAVES[usize::from(leaf_idx)];
+        (leaf >> (cp & BOTTOM_BITS_MASK)) & 1 == 1
+    }} else {{"
+    )?;
+
+    let mut late_marks =
+        list_of_ranges(&marks[first_cp_not_in_tree..], first_cp_not_in_tree).into_iter();
+
+    if let Some(first_late_mark) = late_marks.next() {
+        write!(
+            out,
+            "        matches!(cp, 0x{:06X}..=0x{:06X}",
+            first_late_mark.0, first_late_mark.1
+        )?;
+        for late_mark in late_marks {
+            write!(out, " | 0x{:06X}..=0x{:06X}", late_mark.0, late_mark.1)?;
+        }
+        writeln!(out, ")")?;
+    } else {
+        writeln!(out, "false")?;
+    }
+
+    writeln!(
+        out,
+        "    }}
+}}
+",
+    )?;
+
+    let (root, leaves) = build_tree(&marks[..first_cp_not_in_tree]);
+    eprintln!(
+        "nonspacing_marks: {} bytes of static data",
+        root.len() + leaves.len() * size_of::<LeafElement>()
+    );
+
+    write!(
+        out,
+        "static NONSPACING_MARKS_ROOT: [u8; {}] = [",
+        root.len()
+    )?;
+
+    for line in root.chunks(16) {
+        write!(out, "\n   ")?;
+        for byte in line {
+            write!(out, " 0x{byte:02X},")?;
+        }
+    }
+
+    writeln!(
+        out,
+        "\n];
+
+static NONSPACING_MARKS_LEAVES: [{}; {}] = [",
+        type_name::<LeafElement>(),
+        leaves.len()
+    )?;
+
+    for leaf in leaves {
+        writeln!(out, "    0x{leaf:032X},")?;
+    }
+    writeln!(out, "];")?;
+
+    Ok(())
+}
diff --git a/tables/src/titlecase.rs b/tables/src/titlecase.rs
new file mode 100644
index 0000000..2e7bbcd
--- /dev/null
+++ b/tables/src/titlecase.rs
@@ -0,0 +1,189 @@
+//! Construct table for titlecase character mappings.
+//! Only characters whose titlecase differs from their uppercase are included.
+
+use std::{error::Error, io, mem::size_of};
+
+use regex::Regex;
+use rustc_hash::FxHashMap;
+
+use crate::unicode_data::DataFiles;
+
+fn titlecases(data: &DataFiles) -> Vec<(char, Vec<char>)> {
+    let mut map = FxHashMap::default();
+
+    // Single character mappings
+    let regex = Regex::new(
+        r"^([0-9A-F]+);(?:.*?);(?:.*?);(?:.*?);(?:.*?);(?:.*?);(?:.*?);(?:.*?);(?:.*?);(?:.*?);(?:.*?);(?:.*?);([0-9A-F]*);(?:.*?);([0-9A-F]+)",
+    ).unwrap();
+    for line in data.unicode_data.lines() {
+        if let Some(captures) = regex.captures(line) {
+            if let Some(titlecase) = captures.get(3) {
+                // Only include if different from uppercase
+                if titlecase.as_str() != &captures[2] {
+                    let cp =
+                        char::from_u32(u32::from_str_radix(&captures[1], 16).unwrap()).unwrap();
+                    let titlecase_cp =
+                        char::from_u32(u32::from_str_radix(titlecase.as_str(), 16).unwrap())
+                            .unwrap();
+                    assert!(!map.contains_key(&cp));
+                    map.insert(cp, vec![titlecase_cp]);
+                }
+            }
+        }
+    }
+
+    // Multi character mappings
+    let regex =
+        Regex::new(r"^([0-9A-F]+);(?:[0-9A-F ]*);([0-9A-F ]*);([0-9A-F ]*);[^0-9A-Fa-f_]*#")
+            .unwrap();
+    for line in data.special_casing.lines() {
+        if let Some(captures) = regex.captures(line) {
+            let titlecase_mapping = captures[2].trim();
+            let uppercase_mapping = captures[3].trim();
+            if titlecase_mapping != uppercase_mapping {
+                let cp = char::from_u32(u32::from_str_radix(&captures[1], 16).unwrap()).unwrap();
+                assert!(!map.contains_key(&cp));
+                map.insert(
+                    cp,
+                    titlecase_mapping
+                        .split_whitespace()
+                        .map(|s| char::from_u32(u32::from_str_radix(s, 16).unwrap()).unwrap())
+                        .collect(),
+                );
+            }
+        }
+    }
+
+    let mut vec: Vec<(char, Vec<char>)> = map.into_iter().collect();
+    vec.sort_unstable_by_key(|(c, _)| *c);
+    vec
+}
+
+pub fn write_table(out: &mut impl io::Write, data: &DataFiles) -> Result<(), Box<dyn Error>> {
+    let titlecase_mappings = titlecases(data);
+    let max_expansion = titlecase_mappings.iter().map(|t| t.1.len()).max().unwrap();
+
+    eprintln!(
+        "titlecase: {} bytes of static data",
+        (max_expansion + 1) * size_of::<char>() * titlecase_mappings.len()
+    );
+
+    writeln!(
+        out,
+        "
+use core::{{
+    fmt::{{self, Write}},
+    iter,
+}};
+
+#[derive(Clone, Debug)]
+pub enum ToTitlecase {{
+    Zero,
+    One(char),
+    Two(char, char),
+    Three(char, char, char),
+}}
+
+impl Iterator for ToTitlecase {{
+    type Item = char;
+
+    fn next(&mut self) -> Option<Self::Item> {{
+        match *self {{
+            Self::Zero => None,
+            Self::One(c) => {{
+                *self = Self::Zero;
+                Some(c)
+            }}
+            Self::Two(b, c) => {{
+                *self = Self::One(c);
+                Some(b)
+            }}
+            Self::Three(a, b, c) => {{
+                *self = Self::Two(b, c);
+                Some(a)
+            }}
+        }}
+    }}
+
+    fn size_hint(&self) -> (usize, Option<usize>) {{
+        let size = match self {{
+            Self::Zero => 0,
+            Self::One(_) => 1,
+            Self::Two(..) => 2,
+            Self::Three(..) => 3,
+        }};
+        (size, Some(size))
+    }}
+}}
+
+impl iter::ExactSizeIterator for ToTitlecase {{}}
+
+impl iter::FusedIterator for ToTitlecase {{}}
+
+impl fmt::Display for ToTitlecase {{
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {{
+        for c in self.clone() {{
+            f.write_char(c)?;
+        }}
+        Ok(())
+    }}
+}}
+
+/// Returns an iterator that yields the titlecase mapping of this `char` as one or more `char`s.
+pub fn to_titlecase(c: char) -> ToTitlecase {{
+    // ASCII fast path
+    if c.is_ascii() {{
+        ToTitlecase::One(c.to_ascii_uppercase())
+    }} else if let Ok(idx) = TITLECASE_MAPPINGS.binary_search_by_key(&c, |&(c2, _)| c2) {{
+        match TITLECASE_MAPPINGS[idx].1 {{
+            [None, ..] => ToTitlecase::Zero,
+            [Some(a), None, ..] => ToTitlecase::One(a),
+            [Some(a), Some(b), None] => ToTitlecase::Two(a, b),
+            [Some(a), Some(b), Some(c)] => ToTitlecase::Three(a, b, c),
+        }}
+    }} else {{
+        let mut uppercase = c.to_uppercase();
+        match uppercase.size_hint().0 {{
+            0 => ToTitlecase::Zero,
+            1 => ToTitlecase::One(uppercase.next().unwrap()),
+            2 => ToTitlecase::Two(uppercase.next().unwrap(), uppercase.next().unwrap()),
+            3 => ToTitlecase::Three(
+                uppercase.next().unwrap(),
+                uppercase.next().unwrap(),
+                uppercase.next().unwrap(),
+            ),
+            _ => unreachable!(),
+        }}
+    }}
+}}
+
+/// Sorted list of characters and their titlecase mappings.
+/// Only characters whose titlecase differs from uppercase are included.
+static TITLECASE_MAPPINGS: [(char, [Option<char>; {max_expansion}]); {}] = [",
+        titlecase_mappings.len()
+    )?;
+    for (c, mapping) in titlecase_mappings {
+        write!(out, "    ('{c}', [")?;
+
+        let mut mapping = mapping.into_iter();
+
+        if let Some(fc) = mapping.next() {
+            write!(out, "Some('{fc}')")?;
+        } else {
+            write!(out, "None")?;
+        }
+
+        for _ in 1..max_expansion {
+            if let Some(c) = mapping.next() {
+                write!(out, ", Some('{c}')")?;
+            } else {
+                write!(out, ", None")?;
+            }
+        }
+
+        writeln!(out, "]),")?;
+    }
+    writeln!(out, "];")?;
+
+    Ok(())
+}
diff --git a/tables/src/unicode_data.rs b/tables/src/unicode_data.rs
new file mode 100644
index 0000000..b4f9cfb
--- /dev/null
+++ b/tables/src/unicode_data.rs
@@ -0,0 +1,95 @@
+use bitvec::BitArr;
+use regex::Regex;
+
+use crate::UNICODE_VERSION;
+
+pub type CodepointBitArr = BitArr!(for 0x110000);
+
+/// Download the specified Unicode data file from the Unicode website,
+/// using the version specified in [`UNICODE_VERSION`].
+fn fetch_unicode_file(file: &str) -> Result<String, Box<dyn std::error::Error>> {
+    Ok(reqwest::blocking::get(format!(
+        "https://www.unicode.org/Public/{}.{}.{}/ucd/{file}",
+        UNICODE_VERSION.0, UNICODE_VERSION.1, UNICODE_VERSION.2
+    ))?
+    .error_for_status()?
+    .text()?)
+}
+
+#[derive(Debug)]
+pub struct DataFiles {
+    pub unicode_data: String,
+    pub derived_core_properties: String,
+    pub prop_list: String,
+    pub scripts: String,
+    pub special_casing: String,
+}
+
+/// Retrieve all the data files we need.
+pub fn data_files() -> Result<DataFiles, Box<dyn std::error::Error>> {
+    Ok(DataFiles {
+        unicode_data: fetch_unicode_file("UnicodeData.txt")?,
+        derived_core_properties: fetch_unicode_file("DerivedCoreProperties.txt")?,
+        prop_list: fetch_unicode_file("PropList.txt")?,
+        scripts: fetch_unicode_file("Scripts.txt")?,
+        special_casing: fetch_unicode_file("SpecialCasing.txt")?,
+    })
+}
+
+/// - `arr`: bit array (1 bit per unicode code point)
+/// - `data_file`: Unicode data file to look for properties in
+/// - `props`: regex matching one or more Unicode properties
+/// - `set_to`: what we should set the bits corresponding to matching code points to
+pub fn set_by_prop(arr: &mut CodepointBitArr, data_file: &str, props: &str, set_to: bool) {
+    let regex_string = format!(r"^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s*;\s*(?:{props})");
+    let regex = Regex::new(&regex_string).unwrap();
+    for line in data_file.lines() {
+        if let Some(captures) = regex.captures(line) {
+            let start = usize::from_str_radix(&captures[1], 16).unwrap();
+            let codepoint_range = start
+                ..=captures
+                    .get(2)
+                    .map_or(start, |m| usize::from_str_radix(m.as_str(), 16).unwrap());
+            for cp in codepoint_range {
+                arr.set(cp, set_to);
+            }
+        }
+    }
+}
+
+/// - `arr`: bit array (1 bit per unicode code point)
+/// - `props`: regex matching one or more Unicode character categories
+/// - `set_to`: what we should set the bits corresponding to matching code points to
+pub fn set_by_general_category(
+    arr: &mut CodepointBitArr,
+    data: &DataFiles,
+    categories: &str,
+    set_to: bool,
+) {
+    let regex_string = format!(r"^([0-9A-F]+);(.*?);({categories});");
+    let regex = Regex::new(&regex_string).unwrap();
+    let mut range_start: Option<(usize, String, String)> = None;
+    for line in data.unicode_data.lines() {
+        if let Some(captures) = regex.captures(line) {
+            let cp = usize::from_str_radix(&captures[1], 16).unwrap();
+
+            if let Some((range_start_cp, prefix, category)) = range_start {
+                assert_eq!(captures[2].strip_suffix(", Last>"), Some(prefix).as_deref());
+                assert_eq!(category, &captures[3]);
+                range_start = None;
+                for cp_within_range in range_start_cp..=cp {
+                    arr.set(cp_within_range, set_to);
+                }
+            } else if let Some(prefix) = captures[2].strip_suffix(", First>") {
+                assert!(range_start.is_none());
+                range_start = Some((cp, prefix.to_owned(), captures[3].to_owned()));
+            } else {
+                assert!(range_start.is_none());
+                arr.set(cp, set_to);
+            }
+        } else {
+            assert!(range_start.is_none());
+        }
+    }
+    assert!(range_start.is_none());
+}