@@ -25,20 +25,151 @@ use crate::interpreter::ExecResult;
2525/// -o Write output to FILE
2626pub struct Sort ;
2727
28- /// Extract the sort key from a line based on field delimiter and key spec
29- fn extract_key ( line : & str , delimiter : Option < char > , key_field : usize ) -> String {
28+ /// A parsed sort key definition from `-k KEYDEF`.
29+ /// Format: `START[.CHAR][FLAGS][,END[.CHAR][FLAGS]]`
30+ #[ derive( Clone , Debug ) ]
31+ struct KeySpec {
32+ start_field : usize ,
33+ start_char : usize , // 0 = whole field
34+ end_field : usize , // 0 = end of line
35+ end_char : usize , // 0 = end of field
36+ numeric : bool ,
37+ reverse : bool ,
38+ fold_case : bool ,
39+ human_numeric : bool ,
40+ month_sort : bool ,
41+ #[ allow( dead_code) ] // Used when combined with sort -V feature
42+ version_sort : bool ,
43+ }
44+
45+ impl KeySpec {
46+ /// Parse a KEYDEF string like "2", "2,3", "2.3,3.4", "2n,2", "2nr"
47+ fn parse ( spec : & str ) -> Self {
48+ let ( start_part, end_part) = if let Some ( comma) = spec. find ( ',' ) {
49+ ( & spec[ ..comma] , Some ( & spec[ comma + 1 ..] ) )
50+ } else {
51+ ( spec, None )
52+ } ;
53+
54+ let ( start_field, start_char, start_flags) = Self :: parse_field_spec ( start_part) ;
55+ let ( end_field, end_char, end_flags) = if let Some ( ep) = end_part {
56+ let ( f, c, fl) = Self :: parse_field_spec ( ep) ;
57+ ( f, c, fl)
58+ } else {
59+ // No comma: -k2 means "field 2 only" (same as -k2,2)
60+ ( start_field, 0 , String :: new ( ) )
61+ } ;
62+
63+ // Merge flags from both start and end parts
64+ let all_flags: String = format ! ( "{}{}" , start_flags, end_flags) ;
65+
66+ KeySpec {
67+ start_field,
68+ start_char,
69+ end_field,
70+ end_char,
71+ numeric : all_flags. contains ( 'n' ) ,
72+ reverse : all_flags. contains ( 'r' ) ,
73+ fold_case : all_flags. contains ( 'f' ) ,
74+ human_numeric : all_flags. contains ( 'h' ) ,
75+ month_sort : all_flags. contains ( 'M' ) ,
76+ version_sort : all_flags. contains ( 'V' ) ,
77+ }
78+ }
79+
80+ /// Parse "FIELD[.CHAR][FLAGS]" → (field, char_pos, flags_string)
81+ fn parse_field_spec ( s : & str ) -> ( usize , usize , String ) {
82+ let mut i = 0 ;
83+ let chars: Vec < char > = s. chars ( ) . collect ( ) ;
84+ // Parse field number
85+ while i < chars. len ( ) && chars[ i] . is_ascii_digit ( ) {
86+ i += 1 ;
87+ }
88+ let field: usize = s[ ..i] . parse ( ) . unwrap_or ( 0 ) ;
89+
90+ // Parse optional .CHAR
91+ let mut char_pos = 0 ;
92+ if i < chars. len ( ) && chars[ i] == '.' {
93+ i += 1 ;
94+ let start = i;
95+ while i < chars. len ( ) && chars[ i] . is_ascii_digit ( ) {
96+ i += 1 ;
97+ }
98+ char_pos = s[ start..i] . parse ( ) . unwrap_or ( 0 ) ;
99+ }
100+
101+ // Remaining chars are flags
102+ let flags = s[ i..] . to_string ( ) ;
103+ ( field, char_pos, flags)
104+ }
105+ }
106+
107+ /// Split a line into fields using the given delimiter
108+ fn split_fields ( line : & str , delimiter : Option < char > ) -> Vec < & str > {
30109 if let Some ( delim) = delimiter {
31- line. split ( delim)
32- . nth ( key_field. saturating_sub ( 1 ) )
33- . unwrap_or ( "" )
34- . to_string ( )
110+ line. split ( delim) . collect ( )
35111 } else {
36- // Default: whitespace-separated fields
37- line. split_whitespace ( )
38- . nth ( key_field. saturating_sub ( 1 ) )
39- . unwrap_or ( "" )
40- . to_string ( )
112+ line. split_whitespace ( ) . collect ( )
113+ }
114+ }
115+
116+ /// Extract the sort key from a line based on field delimiter and key spec
117+ fn extract_key_spec ( line : & str , delimiter : Option < char > , key : & KeySpec ) -> String {
118+ let fields = split_fields ( line, delimiter) ;
119+ if fields. is_empty ( ) || key. start_field == 0 {
120+ return line. to_string ( ) ;
121+ }
122+
123+ let start_idx = key. start_field . saturating_sub ( 1 ) ;
124+ if start_idx >= fields. len ( ) {
125+ return String :: new ( ) ;
126+ }
127+
128+ let end_idx = if key. end_field == 0 {
129+ fields. len ( ) - 1
130+ } else {
131+ ( key. end_field . saturating_sub ( 1 ) ) . min ( fields. len ( ) - 1 )
132+ } ;
133+
134+ if start_idx > end_idx {
135+ return String :: new ( ) ;
136+ }
137+
138+ if start_idx == end_idx {
139+ let field = fields[ start_idx] ;
140+ let start_c = if key. start_char > 0 {
141+ ( key. start_char - 1 ) . min ( field. len ( ) )
142+ } else {
143+ 0
144+ } ;
145+ let end_c = if key. end_char > 0 {
146+ key. end_char . min ( field. len ( ) )
147+ } else {
148+ field. len ( )
149+ } ;
150+ if start_c >= end_c {
151+ return String :: new ( ) ;
152+ }
153+ return field[ start_c..end_c] . to_string ( ) ;
41154 }
155+
156+ // Multi-field key
157+ let mut result = String :: new ( ) ;
158+ for ( i, field) in fields. iter ( ) . enumerate ( ) . take ( end_idx + 1 ) . skip ( start_idx) {
159+ if i > start_idx {
160+ result. push ( delimiter. unwrap_or ( ' ' ) ) ;
161+ }
162+ if i == start_idx && key. start_char > 0 {
163+ let sc = ( key. start_char - 1 ) . min ( field. len ( ) ) ;
164+ result. push_str ( & field[ sc..] ) ;
165+ } else if i == end_idx && key. end_char > 0 {
166+ let ec = key. end_char . min ( field. len ( ) ) ;
167+ result. push_str ( & field[ ..ec] ) ;
168+ } else {
169+ result. push_str ( field) ;
170+ }
171+ }
172+ result
42173}
43174
44175/// Extract leading numeric prefix from a string for `sort -n`.
@@ -194,7 +325,7 @@ impl Builtin for Sort {
194325 let mut version_sort = false ;
195326 let mut merge = false ;
196327 let mut delimiter: Option < char > = None ;
197- let mut key_field : Option < usize > = None ;
328+ let mut key_specs : Vec < KeySpec > = Vec :: new ( ) ;
198329 let mut output_file: Option < String > = None ;
199330 let mut zero_terminated = false ;
200331 let mut files = Vec :: new ( ) ;
@@ -204,15 +335,7 @@ impl Builtin for Sort {
204335 if let Some ( val) = p. flag_value_opt ( "-t" ) {
205336 delimiter = val. chars ( ) . next ( ) ;
206337 } else if let Some ( val) = p. flag_value_opt ( "-k" ) {
207- // Parse key: "2" or "2,2" or "2n"
208- let field_str: String = val. chars ( ) . take_while ( |c| c. is_ascii_digit ( ) ) . collect ( ) ;
209- key_field = field_str. parse ( ) . ok ( ) ;
210- if val. contains ( 'n' ) {
211- numeric = true ;
212- }
213- if val. contains ( 'r' ) {
214- reverse = true ;
215- }
338+ key_specs. push ( KeySpec :: parse ( val) ) ;
216339 } else if let Some ( val) = p. flag_value_opt ( "-o" ) {
217340 output_file = Some ( val. to_string ( ) ) ;
218341 } else {
@@ -351,48 +474,75 @@ impl Builtin for Sort {
351474 }
352475
353476 // Get the key extractor
354- let get_key = |line : & str | -> String {
355- if let Some ( kf) = key_field {
356- extract_key ( line, delimiter, kf)
477+ /// Compare two keys using the specified sort mode flags
478+ fn compare_keys (
479+ ka : & str ,
480+ kb : & str ,
481+ is_numeric : bool ,
482+ is_human : bool ,
483+ is_month : bool ,
484+ is_fold_case : bool ,
485+ ) -> std:: cmp:: Ordering {
486+ if is_human {
487+ let na = parse_human_numeric ( ka) ;
488+ let nb = parse_human_numeric ( kb) ;
489+ na. partial_cmp ( & nb) . unwrap_or ( std:: cmp:: Ordering :: Equal )
490+ } else if is_month {
491+ month_ordinal ( ka) . cmp ( & month_ordinal ( kb) )
492+ } else if is_numeric {
493+ let na = extract_numeric_prefix ( ka) ;
494+ let nb = extract_numeric_prefix ( kb) ;
495+ na. partial_cmp ( & nb) . unwrap_or ( std:: cmp:: Ordering :: Equal )
496+ } else if is_fold_case {
497+ ka. to_lowercase ( ) . cmp ( & kb. to_lowercase ( ) )
357498 } else {
358- line . to_string ( )
499+ ka . cmp ( kb )
359500 }
360- } ;
501+ }
361502
362503 // Sort the lines
363504 let sort_fn = |a : & String , b : & String | -> std:: cmp:: Ordering {
364- let ka = get_key ( a) ;
365- let kb = get_key ( b) ;
366- if version_sort {
367- version_cmp ( & ka, & kb)
368- } else if human_numeric {
369- let na = parse_human_numeric ( & ka) ;
370- let nb = parse_human_numeric ( & kb) ;
371- na. partial_cmp ( & nb) . unwrap_or ( std:: cmp:: Ordering :: Equal )
372- } else if month_sort {
373- let ma = month_ordinal ( & ka) ;
374- let mb = month_ordinal ( & kb) ;
375- ma. cmp ( & mb)
376- } else if numeric {
377- let na = extract_numeric_prefix ( & ka) ;
378- let nb = extract_numeric_prefix ( & kb) ;
379- match na. partial_cmp ( & nb) . unwrap_or ( std:: cmp:: Ordering :: Equal ) {
380- std:: cmp:: Ordering :: Equal => a. cmp ( b) ,
381- ord => ord,
382- }
383- } else if fold_case {
384- let ord = ka. to_lowercase ( ) . cmp ( & kb. to_lowercase ( ) ) ;
385- if ord == std:: cmp:: Ordering :: Equal && key_field. is_some ( ) {
386- a. cmp ( b)
387- } else {
388- ord
505+ if !key_specs. is_empty ( ) {
506+ // Multi-key sort: compare by each key spec in order
507+ for key in & key_specs {
508+ let ka = extract_key_spec ( a, delimiter, key) ;
509+ let kb = extract_key_spec ( b, delimiter, key) ;
510+ // Per-key flags override global flags
511+ let ord = if key. version_sort || version_sort {
512+ version_cmp ( & ka, & kb)
513+ } else {
514+ compare_keys (
515+ & ka,
516+ & kb,
517+ key. numeric || numeric,
518+ key. human_numeric || human_numeric,
519+ key. month_sort || month_sort,
520+ key. fold_case || fold_case,
521+ )
522+ } ;
523+ let ord = if key. reverse { ord. reverse ( ) } else { ord } ;
524+ if ord != std:: cmp:: Ordering :: Equal {
525+ return ord;
526+ }
389527 }
528+ // All keys equal — fall back to full-line comparison
529+ a. cmp ( b)
390530 } else {
391- let ord = ka. cmp ( & kb) ;
392- if ord == std:: cmp:: Ordering :: Equal && key_field. is_some ( ) {
393- a. cmp ( b)
531+ // No key specs — use global flags on whole line
532+ if version_sort {
533+ let ord = version_cmp ( a, b) ;
534+ if ord == std:: cmp:: Ordering :: Equal {
535+ a. cmp ( b)
536+ } else {
537+ ord
538+ }
394539 } else {
395- ord
540+ let ord = compare_keys ( a, b, numeric, human_numeric, month_sort, fold_case) ;
541+ if ord == std:: cmp:: Ordering :: Equal {
542+ a. cmp ( b)
543+ } else {
544+ ord
545+ }
396546 }
397547 }
398548 } ;
@@ -761,11 +911,38 @@ mod tests {
761911 }
762912
763913 #[ tokio:: test]
764- async fn test_extract_key ( ) {
765- assert_eq ! ( extract_key( "a:b:c" , Some ( ':' ) , 2 ) , "b" ) ;
766- assert_eq ! ( extract_key( "hello world" , None , 1 ) , "hello" ) ;
767- assert_eq ! ( extract_key( "hello world" , None , 2 ) , "world" ) ;
768- assert_eq ! ( extract_key( "x" , None , 5 ) , "" ) ;
914+ async fn test_extract_key_spec ( ) {
915+ let key = KeySpec :: parse ( "2" ) ;
916+ assert_eq ! ( extract_key_spec( "a:b:c" , Some ( ':' ) , & key) , "b" ) ;
917+ let key1 = KeySpec :: parse ( "1" ) ;
918+ assert_eq ! ( extract_key_spec( "hello world" , None , & key1) , "hello" ) ;
919+ let key2 = KeySpec :: parse ( "2" ) ;
920+ assert_eq ! ( extract_key_spec( "hello world" , None , & key2) , "world" ) ;
921+ let key5 = KeySpec :: parse ( "5" ) ;
922+ assert_eq ! ( extract_key_spec( "x" , None , & key5) , "" ) ;
923+ }
924+
925+ #[ tokio:: test]
926+ async fn test_keyspec_parse ( ) {
927+ let k = KeySpec :: parse ( "2n,3r" ) ;
928+ assert_eq ! ( k. start_field, 2 ) ;
929+ assert_eq ! ( k. end_field, 3 ) ;
930+ assert ! ( k. numeric) ;
931+ assert ! ( k. reverse) ;
932+
933+ let k2 = KeySpec :: parse ( "1.2,1.5f" ) ;
934+ assert_eq ! ( k2. start_field, 1 ) ;
935+ assert_eq ! ( k2. start_char, 2 ) ;
936+ assert_eq ! ( k2. end_field, 1 ) ;
937+ assert_eq ! ( k2. end_char, 5 ) ;
938+ assert ! ( k2. fold_case) ;
939+ }
940+
941+ #[ tokio:: test]
942+ async fn test_version_cmp ( ) {
943+ assert_eq ! ( version_cmp( "1.2" , "1.10" ) , std:: cmp:: Ordering :: Less ) ;
944+ assert_eq ! ( version_cmp( "2.0" , "1.9" ) , std:: cmp:: Ordering :: Greater ) ;
945+ assert_eq ! ( version_cmp( "1.0" , "1.0" ) , std:: cmp:: Ordering :: Equal ) ;
769946 }
770947
771948 #[ tokio:: test]
0 commit comments