Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 82 additions & 0 deletions crates/bashkit/src/builtins/printf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,18 @@ fn format_string(format: &str, args: &[String], arg_index: &mut usize) -> String
output.push(val as char);
}
}
'u' => {
// \uHHHH - 4-digit unicode escape
if let Some(c) = parse_unicode_escape(&mut chars, 4) {
output.push(c);
}
}
'U' => {
// \UHHHHHHHH - 8-digit unicode escape
if let Some(c) = parse_unicode_escape(&mut chars, 8) {
output.push(c);
}
}
_ => {
output.push('\\');
output.push(next);
Expand Down Expand Up @@ -451,6 +463,18 @@ fn expand_escapes(s: &str) -> String {
output.push(val as char);
}
}
'u' => {
// \uHHHH - 4-digit unicode escape
if let Some(c) = parse_unicode_escape(&mut chars, 4) {
output.push(c);
}
}
'U' => {
// \UHHHHHHHH - 8-digit unicode escape
if let Some(c) = parse_unicode_escape(&mut chars, 8) {
output.push(c);
}
}
_ => {
output.push('\\');
output.push(next);
Expand All @@ -467,6 +491,30 @@ fn expand_escapes(s: &str) -> String {
output
}

/// Parse a unicode escape sequence (\uHHHH or \UHHHHHHHH) from a char iterator.
/// `max_digits` is 4 for \u and 8 for \U.
fn parse_unicode_escape(
chars: &mut std::iter::Peekable<std::str::Chars<'_>>,
max_digits: usize,
) -> Option<char> {
let mut hex = String::new();
for _ in 0..max_digits {
if let Some(&c) = chars.peek() {
if c.is_ascii_hexdigit() {
hex.push(chars.next().unwrap());
} else {
break;
}
} else {
break;
}
}
if hex.is_empty() {
return None;
}
u32::from_str_radix(&hex, 16).ok().and_then(char::from_u32)
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -533,4 +581,38 @@ mod tests {
let mut idx = 0;
assert_eq!(format_string("%04x", &args, &mut idx), "00ff");
}

#[test]
fn test_unicode_escape_u() {
// \u03bc -> μ (Greek small letter mu)
let args = vec![];
let mut idx = 0;
assert_eq!(format_string("\\u03bc", &args, &mut idx), "\u{03bc}");
}

#[test]
fn test_unicode_escape_big_u() {
// \U000003bc -> μ
let args = vec![];
let mut idx = 0;
assert_eq!(format_string("\\U000003bc", &args, &mut idx), "\u{03bc}");
}

#[test]
fn test_unicode_escape_ascii() {
// \u0041 -> A
let args = vec![];
let mut idx = 0;
assert_eq!(
format_string("\\u0041\\u0042\\u0043", &args, &mut idx),
"ABC"
);
}

#[test]
fn test_unicode_escape_in_expand() {
// %b format also handles \u escapes
assert_eq!(expand_escapes("\\u03bc"), "\u{03bc}");
assert_eq!(expand_escapes("\\U000003bc"), "\u{03bc}");
}
}
2 changes: 1 addition & 1 deletion crates/bashkit/src/interpreter/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4977,7 +4977,7 @@ impl Interpreter {
} else {
self.expand_variable(name)
};
result.push_str(&value.len().to_string());
result.push_str(&value.chars().count().to_string());
}
WordPart::ParameterExpansion {
name,
Expand Down
6 changes: 3 additions & 3 deletions crates/bashkit/tests/spec_cases/bash/unicode.test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,15 @@ echo $'\U000003bc'

### unicode_printf_u
# printf \u escape
### skip: TODO printf \u unicode escape not implemented
### bash_diff: system bash printf \u requires UTF-8 locale
printf '\u03bc\n'
### expect
μ
### end

### unicode_printf_U
# printf \U escape
### skip: TODO printf \U unicode escape not implemented
### bash_diff: system bash printf \U requires UTF-8 locale
printf '\U000003bc\n'
### expect
μ
Expand All @@ -65,7 +65,7 @@ café

### unicode_string_length
# String length of unicode string
### skip: TODO ${#x} counts bytes instead of characters for unicode
### bash_diff: system bash ${#x} counts bytes in POSIX locale, chars in UTF-8
x=café
echo ${#x}
### expect
Expand Down
2 changes: 1 addition & 1 deletion crates/bashkit/tests/spec_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
//! - `### skip: reason` - Skip test entirely (not run in any test)
//! - `### bash_diff: reason` - Known difference from real bash (runs in spec tests, excluded from comparison)
//!
//! ## Skipped Tests (18 total)
//! ## Skipped Tests (15 total)
//!
//! Actual `### skip:` markers across spec test files:
//!
Expand Down
Loading