From 5bbf2fc30cc6fb5e5b27552e7bebcc39edb2061e Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Tue, 31 Mar 2026 10:04:39 +0000 Subject: [PATCH] feat(builtins): implement sort -V version sort Adds natural/version sort that compares strings by splitting into alternating non-digit and digit chunks. Non-digit chunks compare lexically; digit chunks compare numerically. Handles version strings like 1.2.10 > 1.2.9 and file-1.10.txt > file-1.2.txt. Closes #907 --- crates/bashkit/src/builtins/sortuniq.rs | 77 ++++++++++++++++++- .../tests/spec_cases/bash/sortuniq.test.sh | 47 +++++++++++ 2 files changed, 122 insertions(+), 2 deletions(-) diff --git a/crates/bashkit/src/builtins/sortuniq.rs b/crates/bashkit/src/builtins/sortuniq.rs index b61e489b..04fe28f0 100644 --- a/crates/bashkit/src/builtins/sortuniq.rs +++ b/crates/bashkit/src/builtins/sortuniq.rs @@ -92,6 +92,75 @@ fn parse_human_numeric(s: &str) -> f64 { num_part.parse::().unwrap_or(0.0) * multiplier } +/// Compare two strings using version/natural sort order. +/// Splits strings into alternating non-digit and digit chunks and compares +/// each: non-digit chunks lexically, digit chunks numerically. +fn version_cmp(a: &str, b: &str) -> std::cmp::Ordering { + let mut ai = a.chars().peekable(); + let mut bi = b.chars().peekable(); + + loop { + match (ai.peek(), bi.peek()) { + (None, None) => return std::cmp::Ordering::Equal, + (None, Some(_)) => return std::cmp::Ordering::Less, + (Some(_), None) => return std::cmp::Ordering::Greater, + _ => {} + } + + // Collect non-digit prefix from both + let mut a_text = String::new(); + let mut b_text = String::new(); + while let Some(&c) = ai.peek() { + if c.is_ascii_digit() { + break; + } + a_text.push(c); + ai.next(); + } + while let Some(&c) = bi.peek() { + if c.is_ascii_digit() { + break; + } + b_text.push(c); + bi.next(); + } + if a_text != b_text { + return a_text.cmp(&b_text); + } + + // Collect digit chunk from both + let mut a_num = String::new(); + let mut b_num = String::new(); + while let Some(&c) = ai.peek() { + if !c.is_ascii_digit() { + break; + } + a_num.push(c); + ai.next(); + } + while let Some(&c) = bi.peek() { + if !c.is_ascii_digit() { + break; + } + b_num.push(c); + bi.next(); + } + if a_num.is_empty() && b_num.is_empty() { + continue; + } + let an: u64 = a_num.parse().unwrap_or(0); + let bn: u64 = b_num.parse().unwrap_or(0); + if an != bn { + return an.cmp(&bn); + } + // Equal numeric value but different representations (e.g. "01" vs "1"): + // shorter string (fewer leading zeros) sorts first + if a_num.len() != b_num.len() { + return a_num.len().cmp(&b_num.len()); + } + } +} + /// Parse month abbreviation to ordinal (1-12, 0 for unknown) fn month_ordinal(s: &str) -> u32 { match s.trim().to_uppercase().as_str() { @@ -122,6 +191,7 @@ impl Builtin for Sort { let mut check_sorted = false; let mut human_numeric = false; let mut month_sort = false; + let mut version_sort = false; let mut merge = false; let mut delimiter: Option = None; let mut key_field: Option = None; @@ -146,7 +216,7 @@ impl Builtin for Sort { } else if let Some(val) = p.flag_value_opt("-o") { output_file = Some(val.to_string()); } else { - let flags = p.bool_flags("rnufscChMmz"); + let flags = p.bool_flags("rnufscChMmVz"); if !flags.is_empty() { for c in flags { match c { @@ -157,6 +227,7 @@ impl Builtin for Sort { 's' => stable = true, 'c' | 'C' => check_sorted = true, 'h' => human_numeric = true, + 'V' => version_sort = true, 'M' => month_sort = true, 'm' => merge = true, 'z' => zero_terminated = true, @@ -292,7 +363,9 @@ impl Builtin for Sort { let sort_fn = |a: &String, b: &String| -> std::cmp::Ordering { let ka = get_key(a); let kb = get_key(b); - if human_numeric { + if version_sort { + version_cmp(&ka, &kb) + } else if human_numeric { let na = parse_human_numeric(&ka); let nb = parse_human_numeric(&kb); na.partial_cmp(&nb).unwrap_or(std::cmp::Ordering::Equal) diff --git a/crates/bashkit/tests/spec_cases/bash/sortuniq.test.sh b/crates/bashkit/tests/spec_cases/bash/sortuniq.test.sh index f2cfe487..b62fec30 100644 --- a/crates/bashkit/tests/spec_cases/bash/sortuniq.test.sh +++ b/crates/bashkit/tests/spec_cases/bash/sortuniq.test.sh @@ -332,3 +332,50 @@ printf '003\n010\n001\n' | sort -n 003 010 ### end + +### sort_version_basic +# sort -V with version numbers +printf '1.10\n1.2\n1.1\n' | sort -V +### expect +1.1 +1.2 +1.10 +### end + +### sort_version_semver +# sort -V with semantic versions +printf 'v2.0.1\nv1.9.0\nv2.0.0\nv1.10.0\n' | sort -V +### expect +v1.9.0 +v1.10.0 +v2.0.0 +v2.0.1 +### end + +### sort_version_files +# sort -V with filenames containing version numbers +printf 'file-1.10.txt\nfile-1.2.txt\nfile-1.1.txt\n' | sort -V +### expect +file-1.1.txt +file-1.2.txt +file-1.10.txt +### end + +### sort_version_reverse +# sort -rV reverse version sort +printf '1.1\n1.10\n1.2\n' | sort -rV +### expect +1.10 +1.2 +1.1 +### end + +### sort_version_mixed +# sort -V with mixed content +printf 'a1\na10\na2\na20\n' | sort -V +### expect +a1 +a2 +a10 +a20 +### end