@@ -25,22 +25,152 @@ use crate::interpreter::ExecResult;
2525/// -o Write output to FILE
2626pub struct Sort ;
2727
28- /// Extract the sort key from a line based on field delimiter and key spec
29- fn extract_key ( line : & str , delimiter : Option < char > , key_field : usize ) -> String {
28+ /// A parsed sort key definition from `-k KEYDEF`.
29+ /// Format: `START[.CHAR][FLAGS][,END[.CHAR][FLAGS]]`
30+ #[ derive( Clone , Debug ) ]
31+ struct KeySpec {
32+ start_field : usize ,
33+ start_char : usize , // 0 = whole field
34+ end_field : usize , // 0 = end of line
35+ end_char : usize , // 0 = end of field
36+ numeric : bool ,
37+ reverse : bool ,
38+ fold_case : bool ,
39+ human_numeric : bool ,
40+ month_sort : bool ,
41+ #[ allow( dead_code) ] // Used when combined with sort -V feature
42+ version_sort : bool ,
43+ }
44+
45+ impl KeySpec {
46+ /// Parse a KEYDEF string like "2", "2,3", "2.3,3.4", "2n,2", "2nr"
47+ fn parse ( spec : & str ) -> Self {
48+ let ( start_part, end_part) = if let Some ( comma) = spec. find ( ',' ) {
49+ ( & spec[ ..comma] , Some ( & spec[ comma + 1 ..] ) )
50+ } else {
51+ ( spec, None )
52+ } ;
53+
54+ let ( start_field, start_char, start_flags) = Self :: parse_field_spec ( start_part) ;
55+ let ( end_field, end_char, end_flags) = if let Some ( ep) = end_part {
56+ let ( f, c, fl) = Self :: parse_field_spec ( ep) ;
57+ ( f, c, fl)
58+ } else {
59+ ( 0 , 0 , String :: new ( ) )
60+ } ;
61+
62+ // Merge flags from both start and end parts
63+ let all_flags: String = format ! ( "{}{}" , start_flags, end_flags) ;
64+
65+ KeySpec {
66+ start_field,
67+ start_char,
68+ end_field,
69+ end_char,
70+ numeric : all_flags. contains ( 'n' ) ,
71+ reverse : all_flags. contains ( 'r' ) ,
72+ fold_case : all_flags. contains ( 'f' ) ,
73+ human_numeric : all_flags. contains ( 'h' ) ,
74+ month_sort : all_flags. contains ( 'M' ) ,
75+ version_sort : all_flags. contains ( 'V' ) ,
76+ }
77+ }
78+
79+ /// Parse "FIELD[.CHAR][FLAGS]" → (field, char_pos, flags_string)
80+ fn parse_field_spec ( s : & str ) -> ( usize , usize , String ) {
81+ let mut i = 0 ;
82+ let chars: Vec < char > = s. chars ( ) . collect ( ) ;
83+ // Parse field number
84+ while i < chars. len ( ) && chars[ i] . is_ascii_digit ( ) {
85+ i += 1 ;
86+ }
87+ let field: usize = s[ ..i] . parse ( ) . unwrap_or ( 0 ) ;
88+
89+ // Parse optional .CHAR
90+ let mut char_pos = 0 ;
91+ if i < chars. len ( ) && chars[ i] == '.' {
92+ i += 1 ;
93+ let start = i;
94+ while i < chars. len ( ) && chars[ i] . is_ascii_digit ( ) {
95+ i += 1 ;
96+ }
97+ char_pos = s[ start..i] . parse ( ) . unwrap_or ( 0 ) ;
98+ }
99+
100+ // Remaining chars are flags
101+ let flags = s[ i..] . to_string ( ) ;
102+ ( field, char_pos, flags)
103+ }
104+ }
105+
106+ /// Split a line into fields using the given delimiter
107+ fn split_fields ( line : & str , delimiter : Option < char > ) -> Vec < & str > {
30108 if let Some ( delim) = delimiter {
31- line. split ( delim)
32- . nth ( key_field. saturating_sub ( 1 ) )
33- . unwrap_or ( "" )
34- . to_string ( )
109+ line. split ( delim) . collect ( )
35110 } else {
36- // Default: whitespace-separated fields
37- line. split_whitespace ( )
38- . nth ( key_field. saturating_sub ( 1 ) )
39- . unwrap_or ( "" )
40- . to_string ( )
111+ line. split_whitespace ( ) . collect ( )
41112 }
42113}
43114
115+ /// Extract the sort key from a line based on field delimiter and key spec
116+ fn extract_key_spec ( line : & str , delimiter : Option < char > , key : & KeySpec ) -> String {
117+ let fields = split_fields ( line, delimiter) ;
118+ if fields. is_empty ( ) || key. start_field == 0 {
119+ return line. to_string ( ) ;
120+ }
121+
122+ let start_idx = key. start_field . saturating_sub ( 1 ) ;
123+ if start_idx >= fields. len ( ) {
124+ return String :: new ( ) ;
125+ }
126+
127+ let end_idx = if key. end_field == 0 {
128+ fields. len ( ) - 1
129+ } else {
130+ ( key. end_field . saturating_sub ( 1 ) ) . min ( fields. len ( ) - 1 )
131+ } ;
132+
133+ if start_idx > end_idx {
134+ return String :: new ( ) ;
135+ }
136+
137+ if start_idx == end_idx {
138+ let field = fields[ start_idx] ;
139+ let start_c = if key. start_char > 0 {
140+ ( key. start_char - 1 ) . min ( field. len ( ) )
141+ } else {
142+ 0
143+ } ;
144+ let end_c = if key. end_char > 0 {
145+ key. end_char . min ( field. len ( ) )
146+ } else {
147+ field. len ( )
148+ } ;
149+ if start_c >= end_c {
150+ return String :: new ( ) ;
151+ }
152+ return field[ start_c..end_c] . to_string ( ) ;
153+ }
154+
155+ // Multi-field key
156+ let mut result = String :: new ( ) ;
157+ for ( i, field) in fields. iter ( ) . enumerate ( ) . take ( end_idx + 1 ) . skip ( start_idx) {
158+ if i > start_idx {
159+ result. push ( delimiter. unwrap_or ( ' ' ) ) ;
160+ }
161+ if i == start_idx && key. start_char > 0 {
162+ let sc = ( key. start_char - 1 ) . min ( field. len ( ) ) ;
163+ result. push_str ( & field[ sc..] ) ;
164+ } else if i == end_idx && key. end_char > 0 {
165+ let ec = key. end_char . min ( field. len ( ) ) ;
166+ result. push_str ( & field[ ..ec] ) ;
167+ } else {
168+ result. push_str ( field) ;
169+ }
170+ }
171+ result
172+ }
173+
44174/// Extract leading numeric prefix from a string for `sort -n`.
45175/// Real coreutils `sort -n` parses the leading numeric portion (optional sign,
46176/// digits, optional decimal point and digits) and treats the rest as non-numeric.
@@ -124,7 +254,7 @@ impl Builtin for Sort {
124254 let mut month_sort = false ;
125255 let mut merge = false ;
126256 let mut delimiter: Option < char > = None ;
127- let mut key_field : Option < usize > = None ;
257+ let mut key_specs : Vec < KeySpec > = Vec :: new ( ) ;
128258 let mut output_file: Option < String > = None ;
129259 let mut zero_terminated = false ;
130260 let mut files = Vec :: new ( ) ;
@@ -134,15 +264,7 @@ impl Builtin for Sort {
134264 if let Some ( val) = p. flag_value_opt ( "-t" ) {
135265 delimiter = val. chars ( ) . next ( ) ;
136266 } else if let Some ( val) = p. flag_value_opt ( "-k" ) {
137- // Parse key: "2" or "2,2" or "2n"
138- let field_str: String = val. chars ( ) . take_while ( |c| c. is_ascii_digit ( ) ) . collect ( ) ;
139- key_field = field_str. parse ( ) . ok ( ) ;
140- if val. contains ( 'n' ) {
141- numeric = true ;
142- }
143- if val. contains ( 'r' ) {
144- reverse = true ;
145- }
267+ key_specs. push ( KeySpec :: parse ( val) ) ;
146268 } else if let Some ( val) = p. flag_value_opt ( "-o" ) {
147269 output_file = Some ( val. to_string ( ) ) ;
148270 } else {
@@ -280,43 +402,59 @@ impl Builtin for Sort {
280402 }
281403
282404 // Get the key extractor
283- let get_key = |line : & str | -> String {
284- if let Some ( kf) = key_field {
285- extract_key ( line, delimiter, kf)
405+ /// Compare two keys using the specified sort mode flags
406+ fn compare_keys (
407+ ka : & str ,
408+ kb : & str ,
409+ is_numeric : bool ,
410+ is_human : bool ,
411+ is_month : bool ,
412+ is_fold_case : bool ,
413+ ) -> std:: cmp:: Ordering {
414+ if is_human {
415+ let na = parse_human_numeric ( ka) ;
416+ let nb = parse_human_numeric ( kb) ;
417+ na. partial_cmp ( & nb) . unwrap_or ( std:: cmp:: Ordering :: Equal )
418+ } else if is_month {
419+ month_ordinal ( ka) . cmp ( & month_ordinal ( kb) )
420+ } else if is_numeric {
421+ let na = extract_numeric_prefix ( ka) ;
422+ let nb = extract_numeric_prefix ( kb) ;
423+ na. partial_cmp ( & nb) . unwrap_or ( std:: cmp:: Ordering :: Equal )
424+ } else if is_fold_case {
425+ ka. to_lowercase ( ) . cmp ( & kb. to_lowercase ( ) )
286426 } else {
287- line . to_string ( )
427+ ka . cmp ( kb )
288428 }
289- } ;
429+ }
290430
291431 // Sort the lines
292432 let sort_fn = |a : & String , b : & String | -> std:: cmp:: Ordering {
293- let ka = get_key ( a) ;
294- let kb = get_key ( b) ;
295- if human_numeric {
296- let na = parse_human_numeric ( & ka) ;
297- let nb = parse_human_numeric ( & kb) ;
298- na. partial_cmp ( & nb) . unwrap_or ( std:: cmp:: Ordering :: Equal )
299- } else if month_sort {
300- let ma = month_ordinal ( & ka) ;
301- let mb = month_ordinal ( & kb) ;
302- ma. cmp ( & mb)
303- } else if numeric {
304- let na = extract_numeric_prefix ( & ka) ;
305- let nb = extract_numeric_prefix ( & kb) ;
306- match na. partial_cmp ( & nb) . unwrap_or ( std:: cmp:: Ordering :: Equal ) {
307- std:: cmp:: Ordering :: Equal => a. cmp ( b) ,
308- ord => ord,
309- }
310- } else if fold_case {
311- let ord = ka. to_lowercase ( ) . cmp ( & kb. to_lowercase ( ) ) ;
312- if ord == std:: cmp:: Ordering :: Equal && key_field. is_some ( ) {
313- a. cmp ( b)
314- } else {
315- ord
433+ if !key_specs. is_empty ( ) {
434+ // Multi-key sort: compare by each key spec in order
435+ for key in & key_specs {
436+ let ka = extract_key_spec ( a, delimiter, key) ;
437+ let kb = extract_key_spec ( b, delimiter, key) ;
438+ // Per-key flags override global flags
439+ let ord = compare_keys (
440+ & ka,
441+ & kb,
442+ key. numeric || numeric,
443+ key. human_numeric || human_numeric,
444+ key. month_sort || month_sort,
445+ key. fold_case || fold_case,
446+ ) ;
447+ let ord = if key. reverse { ord. reverse ( ) } else { ord } ;
448+ if ord != std:: cmp:: Ordering :: Equal {
449+ return ord;
450+ }
316451 }
452+ // All keys equal — fall back to full-line comparison
453+ a. cmp ( b)
317454 } else {
318- let ord = ka. cmp ( & kb) ;
319- if ord == std:: cmp:: Ordering :: Equal && key_field. is_some ( ) {
455+ // No key specs — use global flags on whole line
456+ let ord = compare_keys ( a, b, numeric, human_numeric, month_sort, fold_case) ;
457+ if ord == std:: cmp:: Ordering :: Equal {
320458 a. cmp ( b)
321459 } else {
322460 ord
0 commit comments