Skip to content

Commit 3aa5c76

Browse files
committed
fix(builtins): awk array features — SUBSEP, multi-subscript, pre-increment
- Initialize SUBSEP to \x1c (ASCII FS) in AwkState::default() - Parse multi-subscript arr[e1,e2] joining expressions with SUBSEP - Add SUBSEP_CONCAT BinOp evaluation - Handle ++arr[key] and --arr[key] in pre-increment/decrement parser Closes #396
1 parent a943566 commit 3aa5c76

File tree

1 file changed

+126
-16
lines changed
  • crates/bashkit/src/builtins

1 file changed

+126
-16
lines changed

crates/bashkit/src/builtins/awk.rs

Lines changed: 126 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -197,8 +197,11 @@ impl AwkValue {
197197

198198
impl Default for AwkState {
199199
fn default() -> Self {
200+
let mut variables = HashMap::new();
201+
// POSIX SUBSEP: subscript separator for multi-dimensional arrays
202+
variables.insert("SUBSEP".to_string(), AwkValue::String("\x1c".to_string()));
200203
Self {
201-
variables: HashMap::new(),
204+
variables,
202205
fields: Vec::new(),
203206
fs: " ".to_string(),
204207
ofs: " ".to_string(),
@@ -1346,28 +1349,62 @@ impl<'a> AwkParser<'a> {
13461349
));
13471350
}
13481351

1349-
// Pre-increment: ++var
1352+
// Pre-increment: ++var or ++arr[key]
13501353
if self.input[self.pos..].starts_with("++") {
13511354
self.pos += 2;
13521355
self.skip_whitespace();
1353-
if let Ok(AwkExpr::Variable(name)) = self.parse_primary() {
1354-
return Ok(AwkExpr::PreIncrement(name));
1356+
match self.parse_primary()? {
1357+
AwkExpr::Variable(name) => return Ok(AwkExpr::PreIncrement(name)),
1358+
AwkExpr::FuncCall(ref fname, ref args)
1359+
if fname == "__array_access" && args.len() == 2 =>
1360+
{
1361+
if let AwkExpr::Variable(arr_name) = &args[0] {
1362+
return Ok(AwkExpr::CompoundArrayAssign(
1363+
arr_name.clone(),
1364+
Box::new(args[1].clone()),
1365+
"+".to_string(),
1366+
Box::new(AwkExpr::Number(1.0)),
1367+
));
1368+
}
1369+
return Err(Error::Execution(
1370+
"awk: expected variable after ++".to_string(),
1371+
));
1372+
}
1373+
_ => {
1374+
return Err(Error::Execution(
1375+
"awk: expected variable after ++".to_string(),
1376+
))
1377+
}
13551378
}
1356-
return Err(Error::Execution(
1357-
"awk: expected variable after ++".to_string(),
1358-
));
13591379
}
13601380

1361-
// Pre-decrement: --var
1381+
// Pre-decrement: --var or --arr[key]
13621382
if self.input[self.pos..].starts_with("--") {
13631383
self.pos += 2;
13641384
self.skip_whitespace();
1365-
if let Ok(AwkExpr::Variable(name)) = self.parse_primary() {
1366-
return Ok(AwkExpr::PreDecrement(name));
1385+
match self.parse_primary()? {
1386+
AwkExpr::Variable(name) => return Ok(AwkExpr::PreDecrement(name)),
1387+
AwkExpr::FuncCall(ref fname, ref args)
1388+
if fname == "__array_access" && args.len() == 2 =>
1389+
{
1390+
if let AwkExpr::Variable(arr_name) = &args[0] {
1391+
return Ok(AwkExpr::CompoundArrayAssign(
1392+
arr_name.clone(),
1393+
Box::new(args[1].clone()),
1394+
"-".to_string(),
1395+
Box::new(AwkExpr::Number(1.0)),
1396+
));
1397+
}
1398+
return Err(Error::Execution(
1399+
"awk: expected variable after --".to_string(),
1400+
));
1401+
}
1402+
_ => {
1403+
return Err(Error::Execution(
1404+
"awk: expected variable after --".to_string(),
1405+
))
1406+
}
13671407
}
1368-
return Err(Error::Execution(
1369-
"awk: expected variable after --".to_string(),
1370-
));
13711408
}
13721409

13731410
let c = self.input.chars().nth(self.pos).unwrap();
@@ -1556,17 +1593,39 @@ impl<'a> AwkParser<'a> {
15561593
return Ok(AwkExpr::FuncCall(name, args));
15571594
}
15581595

1559-
// Array indexing: arr[index]
1596+
// Array indexing: arr[index] or arr[e1,e2,...] (multi-subscript with SUBSEP)
15601597
if self.pos < self.input.len() && self.input.chars().nth(self.pos).unwrap() == '[' {
15611598
self.pos += 1; // consume '['
1562-
let index_expr = self.parse_expression()?;
1599+
let mut subscripts = vec![self.parse_expression()?];
15631600
self.skip_whitespace();
1601+
// Handle multi-subscript: arr[e1, e2, ...] joined by SUBSEP
1602+
while self.pos < self.input.len()
1603+
&& self.input.chars().nth(self.pos).unwrap() == ','
1604+
{
1605+
self.pos += 1; // consume ','
1606+
self.skip_whitespace();
1607+
subscripts.push(self.parse_expression()?);
1608+
self.skip_whitespace();
1609+
}
15641610
if self.pos >= self.input.len() || self.input.chars().nth(self.pos).unwrap() != ']'
15651611
{
15661612
return Err(Error::Execution("awk: expected ']'".to_string()));
15671613
}
15681614
self.pos += 1; // consume ']'
1569-
// Store as arr[index] where index is evaluated at runtime
1615+
let index_expr = if subscripts.len() == 1 {
1616+
subscripts.remove(0)
1617+
} else {
1618+
// Join multiple subscripts with SUBSEP
1619+
let mut result = subscripts.remove(0);
1620+
for sub in subscripts {
1621+
result = AwkExpr::BinOp(
1622+
Box::new(result),
1623+
"SUBSEP_CONCAT".to_string(),
1624+
Box::new(sub),
1625+
);
1626+
}
1627+
result
1628+
};
15701629
return Ok(AwkExpr::FuncCall(
15711630
"__array_access".to_string(),
15721631
vec![AwkExpr::Variable(name), index_expr],
@@ -1758,6 +1817,10 @@ impl AwkInterpreter {
17581817
AwkValue::Number(1.0)
17591818
}
17601819
}
1820+
"SUBSEP_CONCAT" => {
1821+
let subsep = self.state.get_variable("SUBSEP").as_string();
1822+
AwkValue::String(format!("{}{}{}", l.as_string(), subsep, r.as_string()))
1823+
}
17611824
_ => AwkValue::Uninitialized,
17621825
}
17631826
}
@@ -3357,4 +3420,51 @@ mod tests {
33573420
count
33583421
);
33593422
}
3423+
3424+
#[tokio::test]
3425+
async fn test_awk_array_assign_field_ref_subscript() {
3426+
// Issue #396.1: arr[$1] = $3 should work with field refs as subscripts
3427+
let result = run_awk(
3428+
&["{ arr[$1] = $2 } END { print arr[\"hello\"] }"],
3429+
Some("hello world\n"),
3430+
)
3431+
.await
3432+
.unwrap();
3433+
assert_eq!(result.exit_code, 0);
3434+
assert_eq!(result.stdout.trim(), "world");
3435+
}
3436+
3437+
#[tokio::test]
3438+
async fn test_awk_multi_subscript() {
3439+
// Issue #396.2: a["x","y"] multi-subscript with SUBSEP
3440+
let result = run_awk(&[r#"BEGIN { a["x","y"] = 1; print a["x","y"] }"#], Some(""))
3441+
.await
3442+
.unwrap();
3443+
assert_eq!(result.exit_code, 0);
3444+
assert_eq!(result.stdout.trim(), "1");
3445+
}
3446+
3447+
#[tokio::test]
3448+
async fn test_awk_subsep_defined() {
3449+
// Issue #396.3: SUBSEP should be defined as \034
3450+
let result = run_awk(&[r#"BEGIN { print length(SUBSEP) }"#], Some(""))
3451+
.await
3452+
.unwrap();
3453+
assert_eq!(result.exit_code, 0);
3454+
assert_eq!(result.stdout.trim(), "1");
3455+
}
3456+
3457+
#[tokio::test]
3458+
async fn test_awk_preincrement_array() {
3459+
// Issue #396.4: ++arr[key] should work
3460+
let result = run_awk(
3461+
&["{ ++count[$1] } END { for (k in count) print k, count[k] }"],
3462+
Some("a\nb\na\n"),
3463+
)
3464+
.await
3465+
.unwrap();
3466+
assert_eq!(result.exit_code, 0);
3467+
assert!(result.stdout.contains("a 2"));
3468+
assert!(result.stdout.contains("b 1"));
3469+
}
33603470
}

0 commit comments

Comments
 (0)