Skip to content

Commit aa9255f

Browse files
authored
fix(builtins): sort -n extracts leading numeric prefix from strings (#838)
## Summary - `sort -n` now parses the leading numeric portion of strings (e.g. `0003-msg.md` → 3) instead of requiring the entire token to be a valid number - Adds full-line fallback comparison when sort keys are equal, fixing field-based sorting (`-t`/`-k`) when keys tie - Adds 7 new spec tests covering numeric prefix extraction, mixed prefix lengths, non-numeric-as-zero, field-based delimiter sorting, reverse numeric, and zero-padded numbers ## Test plan - [x] New spec tests pass: `sort_numeric_prefix_strings`, `sort_numeric_mixed_prefix_lengths`, `sort_numeric_nonnumeric_as_zero`, `sort_field_delim_k2`, `sort_field_delim_k1`, `sort_numeric_reverse`, `sort_numeric_zero_padded` - [x] All existing sort/uniq spec tests still pass - [x] `cargo fmt --check` clean - [x] `cargo clippy -p bashkit -- -D warnings` clean Closes #833
1 parent 978d076 commit aa9255f

File tree

3 files changed

+116
-14
lines changed

3 files changed

+116
-14
lines changed

crates/bashkit/src/builtins/sortuniq.rs

Lines changed: 51 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,39 @@ fn extract_key(line: &str, delimiter: Option<char>, key_field: usize) -> String
4141
}
4242
}
4343

44+
/// Extract leading numeric prefix from a string for `sort -n`.
45+
/// Real coreutils `sort -n` parses the leading numeric portion (optional sign,
46+
/// digits, optional decimal point and digits) and treats the rest as non-numeric.
47+
/// Non-numeric strings have value 0.
48+
fn extract_numeric_prefix(s: &str) -> f64 {
49+
let s = s.trim_start();
50+
if s.is_empty() {
51+
return 0.0;
52+
}
53+
let chars: Vec<char> = s.chars().collect();
54+
let mut end = 0;
55+
// Optional sign
56+
if end < chars.len() && (chars[end] == '+' || chars[end] == '-') {
57+
end += 1;
58+
}
59+
// Digits
60+
while end < chars.len() && chars[end].is_ascii_digit() {
61+
end += 1;
62+
}
63+
// Optional decimal point + digits
64+
if end < chars.len() && chars[end] == '.' {
65+
end += 1;
66+
while end < chars.len() && chars[end].is_ascii_digit() {
67+
end += 1;
68+
}
69+
}
70+
if end == 0 || (end == 1 && (chars[0] == '+' || chars[0] == '-')) {
71+
return 0.0;
72+
}
73+
let num_str: String = chars[..end].iter().collect();
74+
num_str.parse().unwrap_or(0.0)
75+
}
76+
4477
/// Parse human-numeric value (e.g., "10K" → 10_000, "5M" → 5_000_000)
4578
fn parse_human_numeric(s: &str) -> f64 {
4679
let s = s.trim();
@@ -289,21 +322,26 @@ impl Builtin for Sort {
289322
let mb = month_ordinal(&kb);
290323
ma.cmp(&mb)
291324
} else if numeric {
292-
let na: f64 = ka
293-
.split_whitespace()
294-
.next()
295-
.and_then(|s| s.parse().ok())
296-
.unwrap_or(0.0);
297-
let nb: f64 = kb
298-
.split_whitespace()
299-
.next()
300-
.and_then(|s| s.parse().ok())
301-
.unwrap_or(0.0);
302-
na.partial_cmp(&nb).unwrap_or(std::cmp::Ordering::Equal)
325+
let na = extract_numeric_prefix(&ka);
326+
let nb = extract_numeric_prefix(&kb);
327+
match na.partial_cmp(&nb).unwrap_or(std::cmp::Ordering::Equal) {
328+
std::cmp::Ordering::Equal => a.cmp(b),
329+
ord => ord,
330+
}
303331
} else if fold_case {
304-
ka.to_lowercase().cmp(&kb.to_lowercase())
332+
let ord = ka.to_lowercase().cmp(&kb.to_lowercase());
333+
if ord == std::cmp::Ordering::Equal && key_field.is_some() {
334+
a.cmp(b)
335+
} else {
336+
ord
337+
}
305338
} else {
306-
ka.cmp(&kb)
339+
let ord = ka.cmp(&kb);
340+
if ord == std::cmp::Ordering::Equal && key_field.is_some() {
341+
a.cmp(b)
342+
} else {
343+
ord
344+
}
307345
}
308346
};
309347

crates/bashkit/tests/spec_cases/bash/sortuniq.test.sh

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,3 +268,67 @@ a
268268
b
269269
c
270270
### end
271+
272+
### sort_numeric_prefix_strings
273+
# sort -n extracts leading numeric prefix from strings
274+
printf '0003-msg.md\n0001-msg.md\n0002-msg.md\n' | sort -n
275+
### expect
276+
0001-msg.md
277+
0002-msg.md
278+
0003-msg.md
279+
### end
280+
281+
### sort_numeric_mixed_prefix_lengths
282+
# sort -n with mixed prefix lengths
283+
printf '10-exec\n20-tools\n5-first\n' | sort -n
284+
### expect
285+
5-first
286+
10-exec
287+
20-tools
288+
### end
289+
290+
### sort_numeric_nonnumeric_as_zero
291+
# sort -n treats non-numeric lines as 0, tiebreak lexically
292+
printf 'zzz\n2-second\naaa\n1-first\n' | sort -n
293+
### expect
294+
aaa
295+
zzz
296+
1-first
297+
2-second
298+
### end
299+
300+
### sort_field_delim_k2
301+
# sort -t/ -k2,2
302+
printf 'assemble/20-tools\nassemble/10-init\nassemble/30-end\n' | sort -t/ -k2,2
303+
### expect
304+
assemble/10-init
305+
assemble/20-tools
306+
assemble/30-end
307+
### end
308+
309+
### sort_field_delim_k1
310+
# sort -t/ -k1,1 with equal keys falls back to full line
311+
printf 'z/20-tools\na/10-init\nm/30-end\n' | sort -t/ -k1,1
312+
### expect
313+
a/10-init
314+
m/30-end
315+
z/20-tools
316+
### end
317+
318+
### sort_numeric_reverse
319+
# sort -n -r
320+
printf '1\n3\n2\n' | sort -n -r
321+
### expect
322+
3
323+
2
324+
1
325+
### end
326+
327+
### sort_numeric_zero_padded
328+
# sort -n with zero-padded numbers
329+
printf '003\n010\n001\n' | sort -n
330+
### expect
331+
001
332+
003
333+
010
334+
### end

supply-chain/config.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1371,7 +1371,7 @@ version = "0.1.25"
13711371
criteria = "safe-to-deploy"
13721372

13731373
[[exemptions.unicode-segmentation]]
1374-
version = "1.13.1"
1374+
version = "1.13.2"
13751375
criteria = "safe-to-deploy"
13761376

13771377
[[exemptions.unicode-width]]

0 commit comments

Comments
 (0)