From ad2547feb92152896c5ece42d33790b947abd4c9 Mon Sep 17 00:00:00 2001 From: Jules Bertholet Date: Thu, 14 Mar 2024 14:14:02 -0400 Subject: [PATCH] Properly handle final sigma lowercase The rules for determining how a Greek sigma should be lowercased are more complex than just "is it the last character in the word." The full rule is defined in the Unicode Standard under "Final_Sigma": Implementing the rules ourselves would require shipping static data for the `Case_Ignorable` Unicode property. So this commit instead uses the standard library implementation, which unfortunately costs a heap allocation. --- src/lib.rs | 9 +-------- src/snake.rs | 6 ++++++ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index ab8a015..dea5d47 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -159,14 +159,7 @@ where } fn lowercase(s: &str, f: &mut fmt::Formatter) -> fmt::Result { - let mut chars = s.chars().peekable(); - while let Some(c) = chars.next() { - if c == 'Σ' && chars.peek().is_none() { - write!(f, "ς")?; - } else { - write!(f, "{}", c.to_lowercase())?; - } - } + write!(f, "{}", s.to_lowercase())?; Ok(()) } diff --git a/src/snake.rs b/src/snake.rs index c3c8576..a116e46 100644 --- a/src/snake.rs +++ b/src/snake.rs @@ -97,4 +97,10 @@ mod tests { t!(test23: "ABC123dEEf456FOO" => "abc123d_e_ef456_foo"); t!(test24: "abcDEF" => "abc_def"); t!(test25: "ABcDE" => "a_bc_de"); + t!(test26: "Σ" => "σ"); + t!(test27: "ファΣ" => "ファσ"); + t!(test28: "X̂Σ" => "x̂ς"); + t!(test29: "XΣ̂" => "xς̂"); + t!(test30: "XΣフ" => "xςフ"); + t!(test31: "XΣA" => "xσa"); }