Skip to content

Commit 87e30bb

Browse files
chaliyclaude
andauthored
fix(builtins): use char count instead of byte length in expr (#466)
## Summary - `expr length` used `len()` (byte count) instead of `chars().count()` for multi-byte UTF-8 - `expr substr` used byte-based slicing instead of char-based iteration - Both now use char-based operations ## Test plan - [x] Unit test: `test_length_multibyte_utf8` - [x] Unit test: `test_substr_multibyte_utf8` Closes #434 Co-authored-by: Claude <noreply@anthropic.com>
1 parent 0388c46 commit 87e30bb

File tree

1 file changed

+44
-5
lines changed

1 file changed

+44
-5
lines changed

crates/bashkit/src/builtins/expr.rs

Lines changed: 44 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ fn evaluate(args: &[&str]) -> std::result::Result<String, String> {
4343

4444
// Handle keyword operations first
4545
if args.len() >= 2 && args[0] == "length" {
46-
return Ok(args[1].len().to_string());
46+
// TM-UNI-015: Use char count, not byte count
47+
return Ok(args[1].chars().count().to_string());
4748
}
4849

4950
if args.len() >= 4 && args[0] == "substr" {
@@ -54,12 +55,13 @@ fn evaluate(args: &[&str]) -> std::result::Result<String, String> {
5455
let len: usize = args[3]
5556
.parse()
5657
.map_err(|_| "non-integer argument".to_string())?;
57-
if pos == 0 || pos > s.len() {
58+
let char_count = s.chars().count();
59+
if pos == 0 || pos > char_count {
5860
return Ok(String::new());
5961
}
60-
let start = pos - 1; // 1-based to 0-based
61-
let end = (start + len).min(s.len());
62-
return Ok(s[start..end].to_string());
62+
// TM-UNI-015: Use char-based slicing, not byte-based
63+
let result: String = s.chars().skip(pos - 1).take(len).collect();
64+
return Ok(result);
6365
}
6466

6567
if args.len() >= 3 && args[0] == "index" {
@@ -638,4 +640,41 @@ mod tests {
638640
assert_eq!(result.stdout.trim(), "0");
639641
assert_eq!(result.exit_code, 1); // "0" => falsy
640642
}
643+
644+
// Issue #434: length should count chars, not bytes
645+
#[tokio::test]
646+
async fn test_length_multibyte_utf8() {
647+
let fs = Arc::new(crate::fs::InMemoryFs::new());
648+
let mut variables = HashMap::new();
649+
let mut cwd = std::path::PathBuf::from("/");
650+
let env = HashMap::new();
651+
// "café" = 4 chars but 5 bytes (é is 2 bytes)
652+
let args = vec!["length".to_string(), "café".to_string()];
653+
let ctx = Context::new_for_test(&args, &env, &mut variables, &mut cwd, fs.clone(), None);
654+
let result = Expr.execute(ctx).await.unwrap();
655+
assert_eq!(result.stdout.trim(), "4", "should count chars, not bytes");
656+
}
657+
658+
// Issue #434: substr should use char-based slicing
659+
#[tokio::test]
660+
async fn test_substr_multibyte_utf8() {
661+
let fs = Arc::new(crate::fs::InMemoryFs::new());
662+
let mut variables = HashMap::new();
663+
let mut cwd = std::path::PathBuf::from("/");
664+
let env = HashMap::new();
665+
// "日本語" - extract first 2 chars
666+
let args = vec![
667+
"substr".to_string(),
668+
"日本語".to_string(),
669+
"1".to_string(),
670+
"2".to_string(),
671+
];
672+
let ctx = Context::new_for_test(&args, &env, &mut variables, &mut cwd, fs.clone(), None);
673+
let result = Expr.execute(ctx).await.unwrap();
674+
assert_eq!(
675+
result.stdout.trim(),
676+
"日本",
677+
"should extract chars, not bytes"
678+
);
679+
}
641680
}

0 commit comments

Comments
 (0)