Skip to content

Commit 01e16d8

Browse files
committed
fix(builtins): resolve 10 eval-surfaced interpreter bugs
Fix 7 bugs and unignore 10 previously-skipped spec tests: - tail: support `tail -n +N` (start from line N) syntax - tr: implement POSIX character classes ([:lower:], [:upper:], etc.) - grep: add BRE mode — literal ( ) are escaped, \( \) become groups - sed: proper BRE-to-ERE conversion for patterns with literal parens - awk: implement match() 3rd-arg capture array (gawk extension) - interpreter: execute VFS scripts by path after chmod +x Tests fixed: tr_class_upper_from_pipe, while_read_pipe_vars, tail_plus_n_offset, script_chmod_exec_by_path, grep_bre_literal_paren, grep_bre_literal_paren_pattern, awk_field_multiply_accumulate, awk_match_capture_array, sed_capture_group_complex_bre, sed_ere_capture_group_extract Skipped test count: 87 → 77 https://claude.ai/code/session_016XS5TJwtYPBB7ao42BTFNx
1 parent c9a8cad commit 01e16d8

File tree

11 files changed

+338
-64
lines changed

11 files changed

+338
-64
lines changed

crates/bashkit/src/builtins/awk.rs

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1889,14 +1889,41 @@ impl AwkInterpreter {
18891889
}
18901890
let s = self.eval_expr(&args[0]).as_string();
18911891
let pattern = self.eval_expr(&args[1]).as_string();
1892+
// Extract capture array name from 3rd arg (gawk extension)
1893+
let arr_name = if args.len() >= 3 {
1894+
if let AwkExpr::Variable(name) = &args[2] {
1895+
Some(name.clone())
1896+
} else {
1897+
None
1898+
}
1899+
} else {
1900+
None
1901+
};
18921902
if let Ok(re) = Regex::new(&pattern) {
1893-
if let Some(m) = re.find(&s) {
1903+
if let Some(caps) = re.captures(&s) {
1904+
let m = caps.get(0).unwrap();
18941905
let rstart = m.start() + 1; // awk is 1-indexed
18951906
let rlength = m.end() - m.start();
18961907
self.state
18971908
.set_variable("RSTART", AwkValue::Number(rstart as f64));
18981909
self.state
18991910
.set_variable("RLENGTH", AwkValue::Number(rlength as f64));
1911+
// Populate capture array if 3rd arg provided
1912+
if let Some(ref arr) = arr_name {
1913+
// arr[0] = entire match
1914+
let full_key = format!("{}[0]", arr);
1915+
self.state
1916+
.set_variable(&full_key, AwkValue::String(m.as_str().to_string()));
1917+
// arr[1..N] = capture groups
1918+
for i in 1..caps.len() {
1919+
let key = format!("{}[{}]", arr, i);
1920+
let val = caps
1921+
.get(i)
1922+
.map(|c| c.as_str().to_string())
1923+
.unwrap_or_default();
1924+
self.state.set_variable(&key, AwkValue::String(val));
1925+
}
1926+
}
19001927
AwkValue::Number(rstart as f64)
19011928
} else {
19021929
self.state.set_variable("RSTART", AwkValue::Number(0.0));

crates/bashkit/src/builtins/cuttr.rs

Lines changed: 101 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -202,31 +202,91 @@ impl Builtin for Tr {
202202
}
203203
}
204204

205-
/// Expand a character set specification like "a-z" into a list of characters
205+
/// Expand a character set specification like "a-z" into a list of characters.
206+
/// Supports POSIX character classes: [:lower:], [:upper:], [:digit:], [:alpha:], [:alnum:], [:space:]
206207
fn expand_char_set(spec: &str) -> Vec<char> {
207208
let mut chars = Vec::new();
208-
let mut iter = spec.chars().peekable();
209-
210-
while let Some(c) = iter.next() {
211-
if iter.peek() == Some(&'-') {
212-
iter.next(); // consume '-'
213-
if let Some(&end) = iter.peek() {
214-
iter.next(); // consume end char
215-
// Expand range
216-
let start = c as u32;
217-
let end = end as u32;
218-
for code in start..=end {
219-
if let Some(ch) = char::from_u32(code) {
220-
chars.push(ch);
209+
let mut i = 0;
210+
let bytes = spec.as_bytes();
211+
212+
while i < bytes.len() {
213+
// Check for POSIX character class [:class:]
214+
if bytes[i] == b'[' && i + 1 < bytes.len() && bytes[i + 1] == b':' {
215+
if let Some(end) = spec[i + 2..].find(":]") {
216+
let class_name = &spec[i + 2..i + 2 + end];
217+
match class_name {
218+
"lower" => chars.extend('a'..='z'),
219+
"upper" => chars.extend('A'..='Z'),
220+
"digit" => chars.extend('0'..='9'),
221+
"alpha" => {
222+
chars.extend('a'..='z');
223+
chars.extend('A'..='Z');
224+
}
225+
"alnum" => {
226+
chars.extend('a'..='z');
227+
chars.extend('A'..='Z');
228+
chars.extend('0'..='9');
229+
}
230+
"space" => chars.extend([' ', '\t', '\n', '\r', '\x0b', '\x0c']),
231+
"blank" => chars.extend([' ', '\t']),
232+
"print" | "graph" => {
233+
for code in 0x20u8..=0x7e {
234+
chars.push(code as char);
235+
}
236+
}
237+
_ => {
238+
// Unknown class, treat literally
239+
chars.push('[');
240+
i += 1;
241+
continue;
221242
}
222243
}
223-
} else {
224-
// Trailing dash, treat literally
225-
chars.push(c);
226-
chars.push('-');
244+
i += 2 + end + 2; // skip past [: + class + :]
245+
continue;
227246
}
247+
}
248+
249+
let c = bytes[i] as char;
250+
// Check for range like a-z
251+
if i + 2 < bytes.len() && bytes[i + 1] == b'-' {
252+
let end = bytes[i + 2] as char;
253+
let start = c as u32;
254+
let end = end as u32;
255+
for code in start..=end {
256+
if let Some(ch) = char::from_u32(code) {
257+
chars.push(ch);
258+
}
259+
}
260+
i += 3;
261+
} else if i + 1 == bytes.len() - 1 && bytes[i + 1] == b'-' {
262+
// Trailing dash
263+
chars.push(c);
264+
chars.push('-');
265+
i += 2;
228266
} else {
267+
// Handle escape sequences
268+
if c == '\\' && i + 1 < bytes.len() {
269+
match bytes[i + 1] {
270+
b'n' => {
271+
chars.push('\n');
272+
i += 2;
273+
continue;
274+
}
275+
b't' => {
276+
chars.push('\t');
277+
i += 2;
278+
continue;
279+
}
280+
b'\\' => {
281+
chars.push('\\');
282+
i += 2;
283+
continue;
284+
}
285+
_ => {}
286+
}
287+
}
229288
chars.push(c);
289+
i += 1;
230290
}
231291
}
232292

@@ -338,6 +398,29 @@ mod tests {
338398
assert_eq!(expand_char_set("0-2"), vec!['0', '1', '2']);
339399
}
340400

401+
#[test]
402+
fn test_expand_char_class_lower() {
403+
let lower = expand_char_set("[:lower:]");
404+
assert_eq!(lower.len(), 26);
405+
assert_eq!(lower[0], 'a');
406+
assert_eq!(lower[25], 'z');
407+
}
408+
409+
#[test]
410+
fn test_expand_char_class_upper() {
411+
let upper = expand_char_set("[:upper:]");
412+
assert_eq!(upper.len(), 26);
413+
assert_eq!(upper[0], 'A');
414+
assert_eq!(upper[25], 'Z');
415+
}
416+
417+
#[tokio::test]
418+
async fn test_tr_char_class_lower_to_upper() {
419+
let result = run_tr(&["[:lower:]", "[:upper:]"], Some("hello world\n")).await;
420+
assert_eq!(result.exit_code, 0);
421+
assert_eq!(result.stdout, "HELLO WORLD\n");
422+
}
423+
341424
#[test]
342425
fn test_parse_field_spec() {
343426
assert_eq!(parse_field_spec("1"), vec![1]);

crates/bashkit/src/builtins/grep.rs

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ struct GrepOptions {
5151
count_only: bool,
5252
files_with_matches: bool,
5353
fixed_strings: bool,
54+
extended_regex: bool,
5455
only_matching: bool,
5556
word_regex: bool,
5657
quiet: bool,
@@ -78,6 +79,7 @@ impl GrepOptions {
7879
count_only: false,
7980
files_with_matches: false,
8081
fixed_strings: false,
82+
extended_regex: false,
8183
only_matching: false,
8284
word_regex: false,
8385
quiet: false,
@@ -114,8 +116,8 @@ impl GrepOptions {
114116
'o' => opts.only_matching = true,
115117
'w' => opts.word_regex = true,
116118
'F' => opts.fixed_strings = true,
117-
'E' => {} // Extended regex is default
118-
'P' => {} // Perl regex - regex crate supports most Perl features
119+
'E' => opts.extended_regex = true,
120+
'P' => opts.extended_regex = true, // Perl regex implies ERE
119121
'q' => opts.quiet = true,
120122
'x' => opts.whole_line = true,
121123
'H' => opts.show_filename = true,
@@ -298,6 +300,11 @@ impl GrepOptions {
298300
}
299301
let pat = if self.fixed_strings {
300302
regex::escape(p)
303+
} else if !self.extended_regex {
304+
// BRE mode: convert to ERE for the regex crate
305+
// In BRE: ( ) are literal, \( \) are groups
306+
// In ERE/regex crate: ( ) are groups, \( \) are literal
307+
bre_to_ere(p)
301308
} else {
302309
p.clone()
303310
};
@@ -335,6 +342,43 @@ impl GrepOptions {
335342
}
336343
}
337344

345+
/// Convert a BRE (Basic Regular Expression) pattern to ERE for the regex crate.
346+
/// In BRE: ( ) { } are literal; \( \) \{ \} \+ \? \| are metacharacters.
347+
/// In ERE/regex crate: ( ) { } + ? | are metacharacters.
348+
fn bre_to_ere(pattern: &str) -> String {
349+
let mut result = String::with_capacity(pattern.len());
350+
let chars: Vec<char> = pattern.chars().collect();
351+
let mut i = 0;
352+
353+
while i < chars.len() {
354+
if chars[i] == '\\' && i + 1 < chars.len() {
355+
match chars[i + 1] {
356+
// BRE escaped metacharacters → ERE unescaped
357+
'(' | ')' | '{' | '}' | '+' | '?' | '|' => {
358+
result.push(chars[i + 1]);
359+
i += 2;
360+
}
361+
// Other escapes pass through
362+
_ => {
363+
result.push('\\');
364+
result.push(chars[i + 1]);
365+
i += 2;
366+
}
367+
}
368+
} else if chars[i] == '(' || chars[i] == ')' || chars[i] == '{' || chars[i] == '}' {
369+
// BRE literal chars → escape them for ERE
370+
result.push('\\');
371+
result.push(chars[i]);
372+
i += 1;
373+
} else {
374+
result.push(chars[i]);
375+
i += 1;
376+
}
377+
}
378+
379+
result
380+
}
381+
338382
#[async_trait]
339383
impl Builtin for Grep {
340384
async fn execute(&self, ctx: Context<'_>) -> Result<ExecResult> {

0 commit comments

Comments
 (0)