Skip to content

Commit 72768ec

Browse files
chaliyclaude
andauthored
feat(parser): implement $'...' ANSI-C quoting and $"..." syntax (#371)
## Summary - Implement `$'...'` ANSI-C quoting with full escape sequence support (`\n`, `\t`, `\xHH`, `\uHHHH`, `\UHHHHHHHH`, `\NNN` octal, etc.) - Implement `$"..."` locale translation synonym (treated as regular double-quote) - Remove 12 `### skip:` markers from spec tests that now pass - Add `### bash_diff:` markers for unicode tests where system bash behavior differs ## Test plan - [x] All bash spec tests pass (100% pass rate) - [x] Bash comparison tests pass (known differences marked with bash_diff) - [x] `cargo fmt --check` clean - [x] `cargo clippy -D warnings` clean - [x] All integration and unit tests pass Closes #353 Co-authored-by: Claude <noreply@anthropic.com>
1 parent 254a4ad commit 72768ec

File tree

3 files changed

+207
-13
lines changed

3 files changed

+207
-13
lines changed

crates/bashkit/src/parser/lexer.rs

Lines changed: 204 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -372,9 +372,101 @@ impl<'a> Lexer<'a> {
372372
continue;
373373
} else if ch == '$' {
374374
// Handle variable references and command substitution
375-
word.push(ch);
376375
self.advance();
377376

377+
// $'...' — ANSI-C quoting: resolve escapes at parse time
378+
if self.peek_char() == Some('\'') {
379+
self.advance(); // consume opening '
380+
word.push_str(&self.read_dollar_single_quoted_content());
381+
continue;
382+
}
383+
384+
// $"..." — locale translation synonym, treated like "..."
385+
if self.peek_char() == Some('"') {
386+
self.advance(); // consume opening "
387+
while let Some(c) = self.peek_char() {
388+
if c == '"' {
389+
self.advance();
390+
break;
391+
}
392+
if c == '\\' {
393+
self.advance();
394+
if let Some(next) = self.peek_char() {
395+
match next {
396+
'\n' => {
397+
self.advance();
398+
}
399+
'"' | '\\' | '$' | '`' => {
400+
word.push(next);
401+
self.advance();
402+
}
403+
_ => {
404+
word.push('\\');
405+
word.push(next);
406+
self.advance();
407+
}
408+
}
409+
continue;
410+
}
411+
}
412+
if c == '$' {
413+
word.push(c);
414+
self.advance();
415+
if let Some(nc) = self.peek_char() {
416+
if nc == '{' {
417+
word.push(nc);
418+
self.advance();
419+
while let Some(bc) = self.peek_char() {
420+
word.push(bc);
421+
self.advance();
422+
if bc == '}' {
423+
break;
424+
}
425+
}
426+
} else if nc == '(' {
427+
word.push(nc);
428+
self.advance();
429+
let mut depth = 1;
430+
while let Some(pc) = self.peek_char() {
431+
word.push(pc);
432+
self.advance();
433+
if pc == '(' {
434+
depth += 1;
435+
} else if pc == ')' {
436+
depth -= 1;
437+
if depth == 0 {
438+
break;
439+
}
440+
}
441+
}
442+
} else if nc.is_ascii_alphanumeric()
443+
|| nc == '_'
444+
|| matches!(nc, '?' | '#' | '@' | '*' | '!' | '$' | '-')
445+
{
446+
word.push(nc);
447+
self.advance();
448+
if nc.is_ascii_alphabetic() || nc == '_' {
449+
while let Some(vc) = self.peek_char() {
450+
if vc.is_ascii_alphanumeric() || vc == '_' {
451+
word.push(vc);
452+
self.advance();
453+
} else {
454+
break;
455+
}
456+
}
457+
}
458+
}
459+
}
460+
continue;
461+
}
462+
word.push(c);
463+
self.advance();
464+
}
465+
continue;
466+
}
467+
468+
word.push(ch); // push the '$'
469+
378470
// Check for $( - command substitution or arithmetic
379471
if self.peek_char() == Some('(') {
380472
word.push('(');
@@ -616,6 +708,117 @@ impl<'a> Lexer<'a> {
616708
Some(Token::LiteralWord(content))
617709
}
618710

711+
/// Read ANSI-C quoted content ($'...').
712+
/// Opening $' already consumed. Returns the resolved string.
713+
fn read_dollar_single_quoted_content(&mut self) -> String {
714+
let mut out = String::new();
715+
while let Some(ch) = self.peek_char() {
716+
if ch == '\'' {
717+
self.advance();
718+
break;
719+
}
720+
if ch == '\\' {
721+
self.advance();
722+
if let Some(esc) = self.peek_char() {
723+
self.advance();
724+
match esc {
725+
'n' => out.push('\n'),
726+
't' => out.push('\t'),
727+
'r' => out.push('\r'),
728+
'a' => out.push('\x07'),
729+
'b' => out.push('\x08'),
730+
'f' => out.push('\x0C'),
731+
'v' => out.push('\x0B'),
732+
'e' | 'E' => out.push('\x1B'),
733+
'\\' => out.push('\\'),
734+
'\'' => out.push('\''),
735+
'"' => out.push('"'),
736+
'?' => out.push('?'),
737+
'x' => {
738+
let mut hex = String::new();
739+
for _ in 0..2 {
740+
if let Some(h) = self.peek_char() {
741+
if h.is_ascii_hexdigit() {
742+
hex.push(h);
743+
self.advance();
744+
} else {
745+
break;
746+
}
747+
}
748+
}
749+
if let Ok(val) = u8::from_str_radix(&hex, 16) {
750+
out.push(val as char);
751+
}
752+
}
753+
'u' => {
754+
let mut hex = String::new();
755+
for _ in 0..4 {
756+
if let Some(h) = self.peek_char() {
757+
if h.is_ascii_hexdigit() {
758+
hex.push(h);
759+
self.advance();
760+
} else {
761+
break;
762+
}
763+
}
764+
}
765+
if let Ok(val) = u32::from_str_radix(&hex, 16) {
766+
if let Some(c) = char::from_u32(val) {
767+
out.push(c);
768+
}
769+
}
770+
}
771+
'U' => {
772+
let mut hex = String::new();
773+
for _ in 0..8 {
774+
if let Some(h) = self.peek_char() {
775+
if h.is_ascii_hexdigit() {
776+
hex.push(h);
777+
self.advance();
778+
} else {
779+
break;
780+
}
781+
}
782+
}
783+
if let Ok(val) = u32::from_str_radix(&hex, 16) {
784+
if let Some(c) = char::from_u32(val) {
785+
out.push(c);
786+
}
787+
}
788+
}
789+
'0'..='7' => {
790+
let mut oct = String::new();
791+
oct.push(esc);
792+
for _ in 0..2 {
793+
if let Some(o) = self.peek_char() {
794+
if o.is_ascii_digit() && o < '8' {
795+
oct.push(o);
796+
self.advance();
797+
} else {
798+
break;
799+
}
800+
}
801+
}
802+
if let Ok(val) = u8::from_str_radix(&oct, 8) {
803+
out.push(val as char);
804+
}
805+
}
806+
_ => {
807+
out.push('\\');
808+
out.push(esc);
809+
}
810+
}
811+
} else {
812+
out.push('\\');
813+
}
814+
continue;
815+
}
816+
out.push(ch);
817+
self.advance();
818+
}
819+
out
820+
}
821+
619822
fn read_double_quoted_string(&mut self) -> Option<Token> {
620823
self.advance(); // consume opening "
621824
let mut content = String::new();

crates/bashkit/tests/spec_cases/bash/quote.test.sh

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -163,23 +163,20 @@ a\tb
163163

164164
### quote_dollar_single_basic
165165
# $'' basic
166-
### skip: TODO $'' (dollar single quote) not implemented
167166
echo $'foo'
168167
### expect
169168
foo
170169
### end
171170

172171
### quote_dollar_single_quotes
173172
# $'' with quotes
174-
### skip: TODO $'' (dollar single quote) not implemented
175173
echo $'single \' double \"'
176174
### expect
177175
single ' double "
178176
### end
179177
180178
### quote_dollar_single_newlines
181179
# $'' with newlines
182-
### skip: TODO $'' (dollar single quote) not implemented
183180
echo $'col1\ncol2\ncol3'
184181
### expect
185182
col1
@@ -189,7 +186,6 @@ col3
189186
190187
### quote_dollar_double_synonym
191188
# $"" is a synonym for ""
192-
### skip: TODO $"" (dollar double quote) not implemented
193189
echo $"foo"
194190
x=x
195191
echo $"foo $x"
@@ -200,39 +196,34 @@ foo x
200196
201197
### quote_dollar_single_hex
202198
# $'' with hex escapes
203-
### skip: TODO $'' (dollar single quote) not implemented
204199
echo $'\x41\x42\x43'
205200
### expect
206201
ABC
207202
### end
208203
209204
### quote_dollar_single_octal
210205
# $'' with octal escapes
211-
### skip: TODO $'' (dollar single quote) not implemented
212206
echo $'\101\102\103'
213207
### expect
214208
ABC
215209
### end
216210
217211
### quote_dollar_single_unicode_u
218212
# $'' with \u unicode escape
219-
### skip: TODO $'' (dollar single quote) not implemented
220213
echo $'\u0041\u0042'
221214
### expect
222215
AB
223216
### end
224217
225218
### quote_dollar_single_unicode_U
226219
# $'' with \U unicode escape
227-
### skip: TODO $'' (dollar single quote) not implemented
228220
echo $'\U00000041\U00000042'
229221
### expect
230222
AB
231223
### end
232224
233225
### quote_dollar_single_special
234226
# $'' with special escapes
235-
### skip: TODO $'' (dollar single quote) not implemented
236227
printf '%s' $'\a' | od -A n -t x1 | tr -d ' \n'
237228
echo
238229
printf '%s' $'\b' | od -A n -t x1 | tr -d ' \n'

crates/bashkit/tests/spec_cases/bash/unicode.test.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,15 @@ echo "μ"
2525

2626
### unicode_dollar_single
2727
# Unicode in $'' via \u escape
28-
### skip: TODO $'' (dollar single quote) not implemented
28+
### bash_diff: system bash may not support \u in $''
2929
echo $'\u03bc'
3030
### expect
3131
μ
3232
### end
3333

3434
### unicode_dollar_single_U
3535
# Unicode in $'' via \U escape
36-
### skip: TODO $'' (dollar single quote) not implemented
36+
### bash_diff: system bash may not support \U in $''
3737
echo $'\U000003bc'
3838
### expect
3939
μ
@@ -136,7 +136,7 @@ hello 🌍
136136

137137
### unicode_dollar_single_ascii
138138
# $'' with unicode for ASCII range
139-
### skip: TODO $'' (dollar single quote) not implemented
139+
### bash_diff: system bash may not support \u in $''
140140
echo $'\u0041\u0042\u0043'
141141
### expect
142142
ABC

0 commit comments

Comments
 (0)