diff --git a/crates/bashkit/src/builtins/sortuniq.rs b/crates/bashkit/src/builtins/sortuniq.rs index 6eb3ca0e..47253c4c 100644 --- a/crates/bashkit/src/builtins/sortuniq.rs +++ b/crates/bashkit/src/builtins/sortuniq.rs @@ -41,6 +41,39 @@ fn extract_key(line: &str, delimiter: Option, key_field: usize) -> String } } +/// Extract leading numeric prefix from a string for `sort -n`. +/// Real coreutils `sort -n` parses the leading numeric portion (optional sign, +/// digits, optional decimal point and digits) and treats the rest as non-numeric. +/// Non-numeric strings have value 0. +fn extract_numeric_prefix(s: &str) -> f64 { + let s = s.trim_start(); + if s.is_empty() { + return 0.0; + } + let chars: Vec = s.chars().collect(); + let mut end = 0; + // Optional sign + if end < chars.len() && (chars[end] == '+' || chars[end] == '-') { + end += 1; + } + // Digits + while end < chars.len() && chars[end].is_ascii_digit() { + end += 1; + } + // Optional decimal point + digits + if end < chars.len() && chars[end] == '.' { + end += 1; + while end < chars.len() && chars[end].is_ascii_digit() { + end += 1; + } + } + if end == 0 || (end == 1 && (chars[0] == '+' || chars[0] == '-')) { + return 0.0; + } + let num_str: String = chars[..end].iter().collect(); + num_str.parse().unwrap_or(0.0) +} + /// Parse human-numeric value (e.g., "10K" → 10_000, "5M" → 5_000_000) fn parse_human_numeric(s: &str) -> f64 { let s = s.trim(); @@ -289,21 +322,26 @@ impl Builtin for Sort { let mb = month_ordinal(&kb); ma.cmp(&mb) } else if numeric { - let na: f64 = ka - .split_whitespace() - .next() - .and_then(|s| s.parse().ok()) - .unwrap_or(0.0); - let nb: f64 = kb - .split_whitespace() - .next() - .and_then(|s| s.parse().ok()) - .unwrap_or(0.0); - na.partial_cmp(&nb).unwrap_or(std::cmp::Ordering::Equal) + let na = extract_numeric_prefix(&ka); + let nb = extract_numeric_prefix(&kb); + match na.partial_cmp(&nb).unwrap_or(std::cmp::Ordering::Equal) { + std::cmp::Ordering::Equal => a.cmp(b), + ord => ord, + } } else if fold_case { - ka.to_lowercase().cmp(&kb.to_lowercase()) + let ord = ka.to_lowercase().cmp(&kb.to_lowercase()); + if ord == std::cmp::Ordering::Equal && key_field.is_some() { + a.cmp(b) + } else { + ord + } } else { - ka.cmp(&kb) + let ord = ka.cmp(&kb); + if ord == std::cmp::Ordering::Equal && key_field.is_some() { + a.cmp(b) + } else { + ord + } } }; diff --git a/crates/bashkit/tests/spec_cases/bash/sortuniq.test.sh b/crates/bashkit/tests/spec_cases/bash/sortuniq.test.sh index a85842b9..f2cfe487 100644 --- a/crates/bashkit/tests/spec_cases/bash/sortuniq.test.sh +++ b/crates/bashkit/tests/spec_cases/bash/sortuniq.test.sh @@ -268,3 +268,67 @@ a b c ### end + +### sort_numeric_prefix_strings +# sort -n extracts leading numeric prefix from strings +printf '0003-msg.md\n0001-msg.md\n0002-msg.md\n' | sort -n +### expect +0001-msg.md +0002-msg.md +0003-msg.md +### end + +### sort_numeric_mixed_prefix_lengths +# sort -n with mixed prefix lengths +printf '10-exec\n20-tools\n5-first\n' | sort -n +### expect +5-first +10-exec +20-tools +### end + +### sort_numeric_nonnumeric_as_zero +# sort -n treats non-numeric lines as 0, tiebreak lexically +printf 'zzz\n2-second\naaa\n1-first\n' | sort -n +### expect +aaa +zzz +1-first +2-second +### end + +### sort_field_delim_k2 +# sort -t/ -k2,2 +printf 'assemble/20-tools\nassemble/10-init\nassemble/30-end\n' | sort -t/ -k2,2 +### expect +assemble/10-init +assemble/20-tools +assemble/30-end +### end + +### sort_field_delim_k1 +# sort -t/ -k1,1 with equal keys falls back to full line +printf 'z/20-tools\na/10-init\nm/30-end\n' | sort -t/ -k1,1 +### expect +a/10-init +m/30-end +z/20-tools +### end + +### sort_numeric_reverse +# sort -n -r +printf '1\n3\n2\n' | sort -n -r +### expect +3 +2 +1 +### end + +### sort_numeric_zero_padded +# sort -n with zero-padded numbers +printf '003\n010\n001\n' | sort -n +### expect +001 +003 +010 +### end diff --git a/supply-chain/config.toml b/supply-chain/config.toml index 7a2e6efc..62eab192 100644 --- a/supply-chain/config.toml +++ b/supply-chain/config.toml @@ -1371,7 +1371,7 @@ version = "0.1.25" criteria = "safe-to-deploy" [[exemptions.unicode-segmentation]] -version = "1.13.1" +version = "1.13.2" criteria = "safe-to-deploy" [[exemptions.unicode-width]]