@@ -43,7 +43,8 @@ fn evaluate(args: &[&str]) -> std::result::Result<String, String> {
4343
4444 // Handle keyword operations first
4545 if args. len ( ) >= 2 && args[ 0 ] == "length" {
46- return Ok ( args[ 1 ] . len ( ) . to_string ( ) ) ;
46+ // TM-UNI-015: Use char count, not byte count
47+ return Ok ( args[ 1 ] . chars ( ) . count ( ) . to_string ( ) ) ;
4748 }
4849
4950 if args. len ( ) >= 4 && args[ 0 ] == "substr" {
@@ -54,12 +55,13 @@ fn evaluate(args: &[&str]) -> std::result::Result<String, String> {
5455 let len: usize = args[ 3 ]
5556 . parse ( )
5657 . map_err ( |_| "non-integer argument" . to_string ( ) ) ?;
57- if pos == 0 || pos > s. len ( ) {
58+ let char_count = s. chars ( ) . count ( ) ;
59+ if pos == 0 || pos > char_count {
5860 return Ok ( String :: new ( ) ) ;
5961 }
60- let start = pos - 1 ; // 1 -based to 0 -based
61- let end = ( start + len ) . min ( s . len ( ) ) ;
62- return Ok ( s [ start..end ] . to_string ( ) ) ;
62+ // TM-UNI-015: Use char -based slicing, not byte -based
63+ let result : String = s . chars ( ) . skip ( pos - 1 ) . take ( len) . collect ( ) ;
64+ return Ok ( result ) ;
6365 }
6466
6567 if args. len ( ) >= 3 && args[ 0 ] == "index" {
@@ -638,4 +640,41 @@ mod tests {
638640 assert_eq ! ( result. stdout. trim( ) , "0" ) ;
639641 assert_eq ! ( result. exit_code, 1 ) ; // "0" => falsy
640642 }
643+
644+ // Issue #434: length should count chars, not bytes
645+ #[ tokio:: test]
646+ async fn test_length_multibyte_utf8 ( ) {
647+ let fs = Arc :: new ( crate :: fs:: InMemoryFs :: new ( ) ) ;
648+ let mut variables = HashMap :: new ( ) ;
649+ let mut cwd = std:: path:: PathBuf :: from ( "/" ) ;
650+ let env = HashMap :: new ( ) ;
651+ // "café" = 4 chars but 5 bytes (é is 2 bytes)
652+ let args = vec ! [ "length" . to_string( ) , "café" . to_string( ) ] ;
653+ let ctx = Context :: new_for_test ( & args, & env, & mut variables, & mut cwd, fs. clone ( ) , None ) ;
654+ let result = Expr . execute ( ctx) . await . unwrap ( ) ;
655+ assert_eq ! ( result. stdout. trim( ) , "4" , "should count chars, not bytes" ) ;
656+ }
657+
658+ // Issue #434: substr should use char-based slicing
659+ #[ tokio:: test]
660+ async fn test_substr_multibyte_utf8 ( ) {
661+ let fs = Arc :: new ( crate :: fs:: InMemoryFs :: new ( ) ) ;
662+ let mut variables = HashMap :: new ( ) ;
663+ let mut cwd = std:: path:: PathBuf :: from ( "/" ) ;
664+ let env = HashMap :: new ( ) ;
665+ // "日本語" - extract first 2 chars
666+ let args = vec ! [
667+ "substr" . to_string( ) ,
668+ "日本語" . to_string( ) ,
669+ "1" . to_string( ) ,
670+ "2" . to_string( ) ,
671+ ] ;
672+ let ctx = Context :: new_for_test ( & args, & env, & mut variables, & mut cwd, fs. clone ( ) , None ) ;
673+ let result = Expr . execute ( ctx) . await . unwrap ( ) ;
674+ assert_eq ! (
675+ result. stdout. trim( ) ,
676+ "日本" ,
677+ "should extract chars, not bytes"
678+ ) ;
679+ }
641680}
0 commit comments