Skip to content

Commit 2cb9ba1

Browse files
chaliyclaude
andauthored
feat(interpreter): implement extglob patterns (@, ?, *, +, !) (#273)
## Summary - Implements all five extended glob operators gated on `shopt -s extglob` - `@(pat|pat)` — exactly one alternative - `?(pat|pat)` — zero or one - `*(pat|pat)` — zero or more (recursive backtracking) - `+(pat|pat)` — one or more (recursive backtracking) - `!(pat|pat)` — anything except the alternatives - Lexer updated to consume `OP(...)` as single word tokens - Works in `case` statements and `[[ == ]]` conditionals - Nested extglob patterns supported via recursive parsing ## Test plan - [x] 15 new spec tests covering all five operators - [x] All 1452 spec tests pass (1447 pass, 5 skip) - [x] `cargo clippy` clean - [x] `cargo fmt` clean - [x] Existing glob/case tests still pass --------- Co-authored-by: Claude <noreply@anthropic.com>
1 parent 76c9b11 commit 2cb9ba1

File tree

4 files changed

+392
-11
lines changed

4 files changed

+392
-11
lines changed

crates/bashkit/src/interpreter/mod.rs

Lines changed: 243 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2128,16 +2128,33 @@ impl Interpreter {
21282128
}
21292129
}
21302130

2131+
/// Check if pattern contains extglob operators
2132+
fn contains_extglob(&self, s: &str) -> bool {
2133+
if !self.is_extglob() {
2134+
return false;
2135+
}
2136+
let bytes = s.as_bytes();
2137+
for i in 0..bytes.len().saturating_sub(1) {
2138+
if matches!(bytes[i], b'@' | b'?' | b'*' | b'+' | b'!') && bytes[i + 1] == b'(' {
2139+
return true;
2140+
}
2141+
}
2142+
false
2143+
}
2144+
21312145
/// Check if a value matches a shell pattern
21322146
fn pattern_matches(&self, value: &str, pattern: &str) -> bool {
21332147
// Handle special case of * (match anything)
21342148
if pattern == "*" {
21352149
return true;
21362150
}
21372151

2138-
// Glob pattern matching with *, ?, and [] support
2139-
if pattern.contains('*') || pattern.contains('?') || pattern.contains('[') {
2140-
// Simple wildcard matching
2152+
// Glob pattern matching with *, ?, [], and extglob support
2153+
if pattern.contains('*')
2154+
|| pattern.contains('?')
2155+
|| pattern.contains('[')
2156+
|| self.contains_extglob(pattern)
2157+
{
21412158
self.glob_match(value, pattern)
21422159
} else {
21432160
// Literal match
@@ -2150,8 +2167,70 @@ impl Interpreter {
21502167
self.glob_match_impl(value, pattern, false)
21512168
}
21522169

2170+
/// Parse an extglob pattern-list from pattern string starting after '('.
2171+
/// Returns (alternatives, rest_of_pattern) or None if malformed.
2172+
fn parse_extglob_pattern_list(pattern: &str) -> Option<(Vec<String>, String)> {
2173+
let mut depth = 1;
2174+
let mut end = 0;
2175+
let chars: Vec<char> = pattern.chars().collect();
2176+
while end < chars.len() {
2177+
match chars[end] {
2178+
'(' => depth += 1,
2179+
')' => {
2180+
depth -= 1;
2181+
if depth == 0 {
2182+
let inner: String = chars[..end].iter().collect();
2183+
let rest: String = chars[end + 1..].iter().collect();
2184+
// Split on | at depth 0
2185+
let mut alts = Vec::new();
2186+
let mut current = String::new();
2187+
let mut d = 0;
2188+
for c in inner.chars() {
2189+
match c {
2190+
'(' => {
2191+
d += 1;
2192+
current.push(c);
2193+
}
2194+
')' => {
2195+
d -= 1;
2196+
current.push(c);
2197+
}
2198+
'|' if d == 0 => {
2199+
alts.push(current.clone());
2200+
current.clear();
2201+
}
2202+
_ => current.push(c),
2203+
}
2204+
}
2205+
alts.push(current);
2206+
return Some((alts, rest));
2207+
}
2208+
}
2209+
'\\' => {
2210+
end += 1; // skip escaped char
2211+
}
2212+
_ => {}
2213+
}
2214+
end += 1;
2215+
}
2216+
None // unclosed paren
2217+
}
2218+
21532219
/// Glob match with optional case-insensitive mode
21542220
fn glob_match_impl(&self, value: &str, pattern: &str, nocase: bool) -> bool {
2221+
let extglob = self.is_extglob();
2222+
2223+
// Check for extglob at the start of pattern
2224+
if extglob && pattern.len() >= 2 {
2225+
let bytes = pattern.as_bytes();
2226+
if matches!(bytes[0], b'@' | b'?' | b'*' | b'+' | b'!') && bytes[1] == b'(' {
2227+
let op = bytes[0];
2228+
if let Some((alts, rest)) = Self::parse_extglob_pattern_list(&pattern[2..]) {
2229+
return self.match_extglob(op, &alts, &rest, value, nocase);
2230+
}
2231+
}
2232+
}
2233+
21552234
let mut value_chars = value.chars().peekable();
21562235
let mut pattern_chars = pattern.chars().peekable();
21572236

@@ -2160,6 +2239,15 @@ impl Interpreter {
21602239
(None, None) => return true,
21612240
(None, Some(_)) => return false,
21622241
(Some('*'), _) => {
2242+
// Check for extglob *(...)
2243+
let mut pc_clone = pattern_chars.clone();
2244+
pc_clone.next();
2245+
if extglob && pc_clone.peek() == Some(&'(') {
2246+
// Extglob *(pattern-list) — collect remaining pattern
2247+
let remaining_pattern: String = pattern_chars.collect();
2248+
let remaining_value: String = value_chars.collect();
2249+
return self.glob_match_impl(&remaining_value, &remaining_pattern, nocase);
2250+
}
21632251
pattern_chars.next();
21642252
// * matches zero or more characters
21652253
if pattern_chars.peek().is_none() {
@@ -2178,11 +2266,22 @@ impl Interpreter {
21782266
let remaining_pattern: String = pattern_chars.collect();
21792267
return self.glob_match_impl("", &remaining_pattern, nocase);
21802268
}
2181-
(Some('?'), Some(_)) => {
2182-
pattern_chars.next();
2183-
value_chars.next();
2269+
(Some('?'), _) => {
2270+
// Check for extglob ?(...)
2271+
let mut pc_clone = pattern_chars.clone();
2272+
pc_clone.next();
2273+
if extglob && pc_clone.peek() == Some(&'(') {
2274+
let remaining_pattern: String = pattern_chars.collect();
2275+
let remaining_value: String = value_chars.collect();
2276+
return self.glob_match_impl(&remaining_value, &remaining_pattern, nocase);
2277+
}
2278+
if value_chars.peek().is_some() {
2279+
pattern_chars.next();
2280+
value_chars.next();
2281+
} else {
2282+
return false;
2283+
}
21842284
}
2185-
(Some('?'), None) => return false,
21862285
(Some('['), Some(v)) => {
21872286
pattern_chars.next(); // consume '['
21882287
let match_char = if nocase { v.to_ascii_lowercase() } else { v };
@@ -2201,6 +2300,20 @@ impl Interpreter {
22012300
}
22022301
(Some('['), None) => return false,
22032302
(Some(p), Some(v)) => {
2303+
// Check for extglob operators: @(, +(, !(
2304+
if extglob && matches!(p, '@' | '+' | '!') {
2305+
let mut pc_clone = pattern_chars.clone();
2306+
pc_clone.next();
2307+
if pc_clone.peek() == Some(&'(') {
2308+
let remaining_pattern: String = pattern_chars.collect();
2309+
let remaining_value: String = value_chars.collect();
2310+
return self.glob_match_impl(
2311+
&remaining_value,
2312+
&remaining_pattern,
2313+
nocase,
2314+
);
2315+
}
2316+
}
22042317
let matches = if nocase {
22052318
p.eq_ignore_ascii_case(&v)
22062319
} else {
@@ -2218,6 +2331,121 @@ impl Interpreter {
22182331
}
22192332
}
22202333

2334+
/// Match an extglob pattern against a value.
2335+
/// op: b'@', b'?', b'*', b'+', b'!'
2336+
/// alts: the | separated alternatives
2337+
/// rest: pattern after the closing )
2338+
fn match_extglob(
2339+
&self,
2340+
op: u8,
2341+
alts: &[String],
2342+
rest: &str,
2343+
value: &str,
2344+
nocase: bool,
2345+
) -> bool {
2346+
match op {
2347+
b'@' => {
2348+
// @(a|b) — exactly one of the alternatives
2349+
for alt in alts {
2350+
let full = format!("{}{}", alt, rest);
2351+
if self.glob_match_impl(value, &full, nocase) {
2352+
return true;
2353+
}
2354+
}
2355+
false
2356+
}
2357+
b'?' => {
2358+
// ?(a|b) — zero or one of the alternatives
2359+
// Try zero: skip the extglob entirely
2360+
if self.glob_match_impl(value, rest, nocase) {
2361+
return true;
2362+
}
2363+
// Try one
2364+
for alt in alts {
2365+
let full = format!("{}{}", alt, rest);
2366+
if self.glob_match_impl(value, &full, nocase) {
2367+
return true;
2368+
}
2369+
}
2370+
false
2371+
}
2372+
b'+' => {
2373+
// +(a|b) — one or more of the alternatives
2374+
for alt in alts {
2375+
let full = format!("{}{}", alt, rest);
2376+
if self.glob_match_impl(value, &full, nocase) {
2377+
return true;
2378+
}
2379+
// Try alt followed by more +(a|b)rest
2380+
// We need to try consuming `alt` prefix then matching +(...)rest again
2381+
for split in 1..=value.len() {
2382+
let prefix = &value[..split];
2383+
let suffix = &value[split..];
2384+
if self.glob_match_impl(prefix, alt, nocase) {
2385+
// Rebuild the extglob for the suffix
2386+
let inner = alts.join("|");
2387+
let re_pattern = format!("+({}){}", inner, rest);
2388+
if self.glob_match_impl(suffix, &re_pattern, nocase) {
2389+
return true;
2390+
}
2391+
}
2392+
}
2393+
}
2394+
false
2395+
}
2396+
b'*' => {
2397+
// *(a|b) — zero or more of the alternatives
2398+
// Try zero
2399+
if self.glob_match_impl(value, rest, nocase) {
2400+
return true;
2401+
}
2402+
// Try one or more (same as +(...))
2403+
for alt in alts {
2404+
let full = format!("{}{}", alt, rest);
2405+
if self.glob_match_impl(value, &full, nocase) {
2406+
return true;
2407+
}
2408+
for split in 1..=value.len() {
2409+
let prefix = &value[..split];
2410+
let suffix = &value[split..];
2411+
if self.glob_match_impl(prefix, alt, nocase) {
2412+
let inner = alts.join("|");
2413+
let re_pattern = format!("*({}){}", inner, rest);
2414+
if self.glob_match_impl(suffix, &re_pattern, nocase) {
2415+
return true;
2416+
}
2417+
}
2418+
}
2419+
}
2420+
false
2421+
}
2422+
b'!' => {
2423+
// !(a|b) — match anything except one of the alternatives
2424+
// Try every possible split point: prefix must NOT match any alt, rest matches
2425+
// Actually: !(pat) matches anything that doesn't match @(pat)
2426+
let inner = alts.join("|");
2427+
let positive = format!("@({}){}", inner, rest);
2428+
!self.glob_match_impl(value, &positive, nocase)
2429+
&& self.glob_match_impl(value, rest, nocase)
2430+
|| {
2431+
// !(pat) can also consume characters — try each split
2432+
for split in 1..=value.len() {
2433+
let prefix = &value[..split];
2434+
let suffix = &value[split..];
2435+
// prefix must not match any alt
2436+
let prefix_matches_any =
2437+
alts.iter().any(|a| self.glob_match_impl(prefix, a, nocase));
2438+
if !prefix_matches_any && self.glob_match_impl(suffix, rest, nocase) {
2439+
return true;
2440+
}
2441+
}
2442+
false
2443+
}
2444+
}
2445+
_ => false,
2446+
}
2447+
}
2448+
22212449
/// Match a bracket expression [abc], [a-z], [!abc], [^abc]
22222450
/// Returns Some(true) if matched, Some(false) if not matched, None if invalid
22232451
fn match_bracket_expr(
@@ -6304,6 +6532,14 @@ impl Interpreter {
63046532
.unwrap_or(false)
63056533
}
63066534

6535+
/// Check if extglob shopt is enabled
6536+
fn is_extglob(&self) -> bool {
6537+
self.variables
6538+
.get("SHOPT_extglob")
6539+
.map(|v| v == "1")
6540+
.unwrap_or(false)
6541+
}
6542+
63076543
/// Expand glob for a single item, applying noglob/failglob/nullglob.
63086544
/// Returns Err(pattern) if failglob triggers, Ok(items) otherwise.
63096545
async fn expand_glob_item(&self, item: &str) -> std::result::Result<Vec<String>, String> {

crates/bashkit/src/parser/lexer.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,32 @@ impl<'a> Lexer<'a> {
548548
_ => {}
549549
}
550550
}
551+
} else if ch == '(' && word.ends_with(['@', '?', '*', '+', '!']) {
552+
// Extglob: @(...), ?(...), *(...), +(...), !(...)
553+
// Consume through matching ) including nested parens
554+
word.push(ch);
555+
self.advance();
556+
let mut depth = 1;
557+
while let Some(c) = self.peek_char() {
558+
word.push(c);
559+
self.advance();
560+
match c {
561+
'(' => depth += 1,
562+
')' => {
563+
depth -= 1;
564+
if depth == 0 {
565+
break;
566+
}
567+
}
568+
'\\' => {
569+
if let Some(esc) = self.peek_char() {
570+
word.push(esc);
571+
self.advance();
572+
}
573+
}
574+
_ => {}
575+
}
576+
}
551577
} else if self.is_word_char(ch) {
552578
word.push(ch);
553579
self.advance();

0 commit comments

Comments
 (0)