From c9a0d403f81e9984b4f0f1fdc3f9636ded323e0a Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 25 Feb 2026 15:04:46 +0000 Subject: [PATCH 1/3] chore: trigger CI re-run From fd1ab7377274f7b55044c6b5c847a7747959bb6c Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 25 Feb 2026 15:31:11 +0000 Subject: [PATCH 2/3] chore: trigger fresh CI workflow From 0aed613c1ec5348b24bddb4448cf9a1b97bc26b8 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 25 Feb 2026 22:27:10 +0000 Subject: [PATCH 3/3] feat(interpreter): implement extglob patterns (@, ?, *, +, !) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add extended glob pattern matching gated on shopt -s extglob: - @(pat|pat) — matches exactly one of the alternatives - ?(pat|pat) — matches zero or one - *(pat|pat) — matches zero or more - +(pat|pat) — matches one or more - !(pat|pat) — matches anything except the alternatives Changes: - Lexer: consume extglob OP(...) as single word tokens - Interpreter: add extglob matching in glob_match_impl with recursive backtracking for *, +, ! operators - Works in case statements and [[ == ]] conditionals Adds 15 spec tests. https://claude.ai/code/session_012rzB3FRw7yoQWCG1mxyW7J --- crates/bashkit/src/interpreter/mod.rs | 250 +++++++++++++++++- crates/bashkit/src/parser/lexer.rs | 26 ++ .../tests/spec_cases/bash/extglob.test.sh | 118 +++++++++ specs/009-implementation-status.md | 9 +- 4 files changed, 392 insertions(+), 11 deletions(-) create mode 100644 crates/bashkit/tests/spec_cases/bash/extglob.test.sh diff --git a/crates/bashkit/src/interpreter/mod.rs b/crates/bashkit/src/interpreter/mod.rs index f58e94b7..e6e3b9d3 100644 --- a/crates/bashkit/src/interpreter/mod.rs +++ b/crates/bashkit/src/interpreter/mod.rs @@ -2128,6 +2128,20 @@ impl Interpreter { } } + /// Check if pattern contains extglob operators + fn contains_extglob(&self, s: &str) -> bool { + if !self.is_extglob() { + return false; + } + let bytes = s.as_bytes(); + for i in 0..bytes.len().saturating_sub(1) { + if matches!(bytes[i], b'@' | b'?' | b'*' | b'+' | b'!') && bytes[i + 1] == b'(' { + return true; + } + } + false + } + /// Check if a value matches a shell pattern fn pattern_matches(&self, value: &str, pattern: &str) -> bool { // Handle special case of * (match anything) @@ -2135,9 +2149,12 @@ impl Interpreter { return true; } - // Glob pattern matching with *, ?, and [] support - if pattern.contains('*') || pattern.contains('?') || pattern.contains('[') { - // Simple wildcard matching + // Glob pattern matching with *, ?, [], and extglob support + if pattern.contains('*') + || pattern.contains('?') + || pattern.contains('[') + || self.contains_extglob(pattern) + { self.glob_match(value, pattern) } else { // Literal match @@ -2150,8 +2167,70 @@ impl Interpreter { self.glob_match_impl(value, pattern, false) } + /// Parse an extglob pattern-list from pattern string starting after '('. + /// Returns (alternatives, rest_of_pattern) or None if malformed. + fn parse_extglob_pattern_list(pattern: &str) -> Option<(Vec, String)> { + let mut depth = 1; + let mut end = 0; + let chars: Vec = pattern.chars().collect(); + while end < chars.len() { + match chars[end] { + '(' => depth += 1, + ')' => { + depth -= 1; + if depth == 0 { + let inner: String = chars[..end].iter().collect(); + let rest: String = chars[end + 1..].iter().collect(); + // Split on | at depth 0 + let mut alts = Vec::new(); + let mut current = String::new(); + let mut d = 0; + for c in inner.chars() { + match c { + '(' => { + d += 1; + current.push(c); + } + ')' => { + d -= 1; + current.push(c); + } + '|' if d == 0 => { + alts.push(current.clone()); + current.clear(); + } + _ => current.push(c), + } + } + alts.push(current); + return Some((alts, rest)); + } + } + '\\' => { + end += 1; // skip escaped char + } + _ => {} + } + end += 1; + } + None // unclosed paren + } + /// Glob match with optional case-insensitive mode fn glob_match_impl(&self, value: &str, pattern: &str, nocase: bool) -> bool { + let extglob = self.is_extglob(); + + // Check for extglob at the start of pattern + if extglob && pattern.len() >= 2 { + let bytes = pattern.as_bytes(); + if matches!(bytes[0], b'@' | b'?' | b'*' | b'+' | b'!') && bytes[1] == b'(' { + let op = bytes[0]; + if let Some((alts, rest)) = Self::parse_extglob_pattern_list(&pattern[2..]) { + return self.match_extglob(op, &alts, &rest, value, nocase); + } + } + } + let mut value_chars = value.chars().peekable(); let mut pattern_chars = pattern.chars().peekable(); @@ -2160,6 +2239,15 @@ impl Interpreter { (None, None) => return true, (None, Some(_)) => return false, (Some('*'), _) => { + // Check for extglob *(...) + let mut pc_clone = pattern_chars.clone(); + pc_clone.next(); + if extglob && pc_clone.peek() == Some(&'(') { + // Extglob *(pattern-list) — collect remaining pattern + let remaining_pattern: String = pattern_chars.collect(); + let remaining_value: String = value_chars.collect(); + return self.glob_match_impl(&remaining_value, &remaining_pattern, nocase); + } pattern_chars.next(); // * matches zero or more characters if pattern_chars.peek().is_none() { @@ -2178,11 +2266,22 @@ impl Interpreter { let remaining_pattern: String = pattern_chars.collect(); return self.glob_match_impl("", &remaining_pattern, nocase); } - (Some('?'), Some(_)) => { - pattern_chars.next(); - value_chars.next(); + (Some('?'), _) => { + // Check for extglob ?(...) + let mut pc_clone = pattern_chars.clone(); + pc_clone.next(); + if extglob && pc_clone.peek() == Some(&'(') { + let remaining_pattern: String = pattern_chars.collect(); + let remaining_value: String = value_chars.collect(); + return self.glob_match_impl(&remaining_value, &remaining_pattern, nocase); + } + if value_chars.peek().is_some() { + pattern_chars.next(); + value_chars.next(); + } else { + return false; + } } - (Some('?'), None) => return false, (Some('['), Some(v)) => { pattern_chars.next(); // consume '[' let match_char = if nocase { v.to_ascii_lowercase() } else { v }; @@ -2201,6 +2300,20 @@ impl Interpreter { } (Some('['), None) => return false, (Some(p), Some(v)) => { + // Check for extglob operators: @(, +(, !( + if extglob && matches!(p, '@' | '+' | '!') { + let mut pc_clone = pattern_chars.clone(); + pc_clone.next(); + if pc_clone.peek() == Some(&'(') { + let remaining_pattern: String = pattern_chars.collect(); + let remaining_value: String = value_chars.collect(); + return self.glob_match_impl( + &remaining_value, + &remaining_pattern, + nocase, + ); + } + } let matches = if nocase { p.eq_ignore_ascii_case(&v) } else { @@ -2218,6 +2331,121 @@ impl Interpreter { } } + /// Match an extglob pattern against a value. + /// op: b'@', b'?', b'*', b'+', b'!' + /// alts: the | separated alternatives + /// rest: pattern after the closing ) + fn match_extglob( + &self, + op: u8, + alts: &[String], + rest: &str, + value: &str, + nocase: bool, + ) -> bool { + match op { + b'@' => { + // @(a|b) — exactly one of the alternatives + for alt in alts { + let full = format!("{}{}", alt, rest); + if self.glob_match_impl(value, &full, nocase) { + return true; + } + } + false + } + b'?' => { + // ?(a|b) — zero or one of the alternatives + // Try zero: skip the extglob entirely + if self.glob_match_impl(value, rest, nocase) { + return true; + } + // Try one + for alt in alts { + let full = format!("{}{}", alt, rest); + if self.glob_match_impl(value, &full, nocase) { + return true; + } + } + false + } + b'+' => { + // +(a|b) — one or more of the alternatives + for alt in alts { + let full = format!("{}{}", alt, rest); + if self.glob_match_impl(value, &full, nocase) { + return true; + } + // Try alt followed by more +(a|b)rest + // We need to try consuming `alt` prefix then matching +(...)rest again + for split in 1..=value.len() { + let prefix = &value[..split]; + let suffix = &value[split..]; + if self.glob_match_impl(prefix, alt, nocase) { + // Rebuild the extglob for the suffix + let inner = alts.join("|"); + let re_pattern = format!("+({}){}", inner, rest); + if self.glob_match_impl(suffix, &re_pattern, nocase) { + return true; + } + } + } + } + false + } + b'*' => { + // *(a|b) — zero or more of the alternatives + // Try zero + if self.glob_match_impl(value, rest, nocase) { + return true; + } + // Try one or more (same as +(...)) + for alt in alts { + let full = format!("{}{}", alt, rest); + if self.glob_match_impl(value, &full, nocase) { + return true; + } + for split in 1..=value.len() { + let prefix = &value[..split]; + let suffix = &value[split..]; + if self.glob_match_impl(prefix, alt, nocase) { + let inner = alts.join("|"); + let re_pattern = format!("*({}){}", inner, rest); + if self.glob_match_impl(suffix, &re_pattern, nocase) { + return true; + } + } + } + } + false + } + b'!' => { + // !(a|b) — match anything except one of the alternatives + // Try every possible split point: prefix must NOT match any alt, rest matches + // Actually: !(pat) matches anything that doesn't match @(pat) + let inner = alts.join("|"); + let positive = format!("@({}){}", inner, rest); + !self.glob_match_impl(value, &positive, nocase) + && self.glob_match_impl(value, rest, nocase) + || { + // !(pat) can also consume characters — try each split + for split in 1..=value.len() { + let prefix = &value[..split]; + let suffix = &value[split..]; + // prefix must not match any alt + let prefix_matches_any = + alts.iter().any(|a| self.glob_match_impl(prefix, a, nocase)); + if !prefix_matches_any && self.glob_match_impl(suffix, rest, nocase) { + return true; + } + } + false + } + } + _ => false, + } + } + /// Match a bracket expression [abc], [a-z], [!abc], [^abc] /// Returns Some(true) if matched, Some(false) if not matched, None if invalid fn match_bracket_expr( @@ -6304,6 +6532,14 @@ impl Interpreter { .unwrap_or(false) } + /// Check if extglob shopt is enabled + fn is_extglob(&self) -> bool { + self.variables + .get("SHOPT_extglob") + .map(|v| v == "1") + .unwrap_or(false) + } + /// Expand glob for a single item, applying noglob/failglob/nullglob. /// Returns Err(pattern) if failglob triggers, Ok(items) otherwise. async fn expand_glob_item(&self, item: &str) -> std::result::Result, String> { diff --git a/crates/bashkit/src/parser/lexer.rs b/crates/bashkit/src/parser/lexer.rs index 7e137104..ee1ce750 100644 --- a/crates/bashkit/src/parser/lexer.rs +++ b/crates/bashkit/src/parser/lexer.rs @@ -548,6 +548,32 @@ impl<'a> Lexer<'a> { _ => {} } } + } else if ch == '(' && word.ends_with(['@', '?', '*', '+', '!']) { + // Extglob: @(...), ?(...), *(...), +(...), !(...) + // Consume through matching ) including nested parens + word.push(ch); + self.advance(); + let mut depth = 1; + while let Some(c) = self.peek_char() { + word.push(c); + self.advance(); + match c { + '(' => depth += 1, + ')' => { + depth -= 1; + if depth == 0 { + break; + } + } + '\\' => { + if let Some(esc) = self.peek_char() { + word.push(esc); + self.advance(); + } + } + _ => {} + } + } } else if self.is_word_char(ch) { word.push(ch); self.advance(); diff --git a/crates/bashkit/tests/spec_cases/bash/extglob.test.sh b/crates/bashkit/tests/spec_cases/bash/extglob.test.sh new file mode 100644 index 00000000..928f88bc --- /dev/null +++ b/crates/bashkit/tests/spec_cases/bash/extglob.test.sh @@ -0,0 +1,118 @@ +### extglob_at_basic +# @(a|b) matches exactly one alternative +shopt -s extglob +case "foo" in @(foo|bar)) echo "match";; *) echo "no";; esac +### expect +match +### end + +### extglob_at_no_match +# @(a|b) doesn't match non-alternatives +shopt -s extglob +case "baz" in @(foo|bar)) echo "match";; *) echo "no";; esac +### expect +no +### end + +### extglob_question_zero +# ?(a|b) matches zero occurrences +shopt -s extglob +case "" in ?(foo|bar)) echo "match";; *) echo "no";; esac +### expect +match +### end + +### extglob_question_one +# ?(a|b) matches one occurrence +shopt -s extglob +case "foo" in ?(foo|bar)) echo "match";; *) echo "no";; esac +### expect +match +### end + +### extglob_question_no_two +# ?(a|b) does NOT match two occurrences +shopt -s extglob +case "foobar" in ?(foo|bar)) echo "match";; *) echo "no";; esac +### expect +no +### end + +### extglob_plus_one +# +(a|b) matches one occurrence +shopt -s extglob +case "foo" in +(foo|bar)) echo "match";; *) echo "no";; esac +### expect +match +### end + +### extglob_plus_multiple +# +(a|b) matches multiple occurrences +shopt -s extglob +case "foobar" in +(foo|bar)) echo "match";; *) echo "no";; esac +### expect +match +### end + +### extglob_plus_no_zero +# +(a|b) does NOT match zero +shopt -s extglob +case "" in +(foo|bar)) echo "match";; *) echo "no";; esac +### expect +no +### end + +### extglob_star_zero +# *(a|b) matches zero occurrences +shopt -s extglob +case "" in *(foo|bar)) echo "match";; *) echo "no";; esac +### expect +match +### end + +### extglob_star_multiple +# *(a|b) matches multiple occurrences +shopt -s extglob +case "foobarfoo" in *(foo|bar)) echo "match";; *) echo "no";; esac +### expect +match +### end + +### extglob_not_basic +# !(a|b) matches anything except alternatives +shopt -s extglob +case "baz" in !(foo|bar)) echo "match";; *) echo "no";; esac +### expect +match +### end + +### extglob_not_reject +# !(a|b) rejects exact matches +shopt -s extglob +case "foo" in !(foo|bar)) echo "match";; *) echo "no";; esac +### expect +no +### end + +### extglob_conditional +# extglob in [[ == ]] +shopt -s extglob +[[ "hello" == @(hello|world) ]] && echo "yes" || echo "no" +### expect +yes +### end + +### extglob_conditional_no +# extglob in [[ != ]] +shopt -s extglob +[[ "xyz" == @(hello|world) ]] && echo "yes" || echo "no" +### expect +no +### end + +### extglob_off_literal +# Without extglob, @(...) is literal +case "@(foo)" in '@(foo)') echo "literal";; *) echo "no";; esac +### expect +literal +### end diff --git a/specs/009-implementation-status.md b/specs/009-implementation-status.md index 2b34c221..c02a3df6 100644 --- a/specs/009-implementation-status.md +++ b/specs/009-implementation-status.md @@ -103,17 +103,17 @@ Bashkit implements IEEE 1003.1-2024 Shell Command Language. See ## Spec Test Coverage -**Total spec test cases:** 1437 (1432 pass, 5 skip) +**Total spec test cases:** 1452 (1447 pass, 5 skip) | Category | Cases | In CI | Pass | Skip | Notes | |----------|-------|-------|------|------|-------| -| Bash (core) | 1019 | Yes | 1014 | 5 | `bash_spec_tests` in CI | +| Bash (core) | 1034 | Yes | 1029 | 5 | `bash_spec_tests` in CI | | AWK | 96 | Yes | 96 | 0 | loops, arrays, -v, ternary, field assign, getline, %.6g | | Grep | 76 | Yes | 76 | 0 | -z, -r, -a, -b, -H, -h, -f, -P, --include, --exclude, binary detect | | Sed | 75 | Yes | 75 | 0 | hold space, change, regex ranges, -E | | JQ | 114 | Yes | 114 | 0 | reduce, walk, regex funcs, --arg/--argjson, combined flags, input/inputs, env | | Python | 57 | Yes | 57 | 0 | embedded Python (Monty) | -| **Total** | **1437** | **Yes** | **1432** | **5** | | +| **Total** | **1452** | **Yes** | **1447** | **5** | | ### Bash Spec Tests Breakdown @@ -172,6 +172,7 @@ Bashkit implements IEEE 1003.1-2024 Shell Command Language. See | string-ops.test.sh | 14 | string replacement (prefix/suffix anchored), `${var:?}`, case conversion | | read-builtin.test.sh | 10 | `read` builtin, IFS splitting, `-r`, `-a` (array), `-n` (nchars), here-string | | expr.test.sh | 13 | `expr` arithmetic, string ops, pattern matching, exit codes | +| extglob.test.sh | 15 | `@()`, `?()`, `*()`, `+()`, `!()` extended globs | | dirstack.test.sh | 12 | `pushd`, `popd`, `dirs` directory stack operations | ## Shell Features @@ -183,7 +184,7 @@ Features that may be added in the future (not intentionally excluded): | Feature | Priority | Notes | |---------|----------|-------| | Coprocesses `coproc` | Low | Rarely used | -| Extended globs `@()` `!()` | Medium | Requires `shopt -s extglob` | +| ~~Extended globs `@()` `!()` `?()` `*()` `+()`~~ | ~~Medium~~ | Implemented: all five extglob operators | | ~~Associative arrays `declare -A`~~ | ~~Medium~~ | Implemented: key-value access, iteration, unset, `${!m[@]}` | | ~~`[[ =~ ]]` regex matching~~ | ~~Medium~~ | Implemented: `[[ ]]` conditionals with `=~` and BASH_REMATCH | | ~~`getopts`~~ | ~~Medium~~ | Implemented: POSIX option parsing |