Skip to content

Commit 9d92c1b

Browse files
committed
fix(builtins): awk array features — SUBSEP, multi-subscript, pre-increment
- Initialize SUBSEP to \x1c (ASCII FS) in AwkState::default() - Parse multi-subscript arr[e1,e2] joining expressions with SUBSEP - Add SUBSEP_CONCAT BinOp evaluation - Handle ++arr[key] and --arr[key] in pre-increment/decrement parser Closes #396
1 parent f527d64 commit 9d92c1b

File tree

1 file changed

+124
-16
lines changed
  • crates/bashkit/src/builtins

1 file changed

+124
-16
lines changed

crates/bashkit/src/builtins/awk.rs

Lines changed: 124 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -197,8 +197,11 @@ impl AwkValue {
197197

198198
impl Default for AwkState {
199199
fn default() -> Self {
200+
let mut variables = HashMap::new();
201+
// POSIX SUBSEP: subscript separator for multi-dimensional arrays
202+
variables.insert("SUBSEP".to_string(), AwkValue::String("\x1c".to_string()));
200203
Self {
201-
variables: HashMap::new(),
204+
variables,
202205
fields: Vec::new(),
203206
fs: " ".to_string(),
204207
ofs: " ".to_string(),
@@ -1355,28 +1358,62 @@ impl<'a> AwkParser<'a> {
13551358
));
13561359
}
13571360

1358-
// Pre-increment: ++var
1361+
// Pre-increment: ++var or ++arr[key]
13591362
if self.input[self.pos..].starts_with("++") {
13601363
self.pos += 2;
13611364
self.skip_whitespace();
1362-
if let Ok(AwkExpr::Variable(name)) = self.parse_primary() {
1363-
return Ok(AwkExpr::PreIncrement(name));
1365+
match self.parse_primary()? {
1366+
AwkExpr::Variable(name) => return Ok(AwkExpr::PreIncrement(name)),
1367+
AwkExpr::FuncCall(ref fname, ref args)
1368+
if fname == "__array_access" && args.len() == 2 =>
1369+
{
1370+
if let AwkExpr::Variable(arr_name) = &args[0] {
1371+
return Ok(AwkExpr::CompoundArrayAssign(
1372+
arr_name.clone(),
1373+
Box::new(args[1].clone()),
1374+
"+".to_string(),
1375+
Box::new(AwkExpr::Number(1.0)),
1376+
));
1377+
}
1378+
return Err(Error::Execution(
1379+
"awk: expected variable after ++".to_string(),
1380+
));
1381+
}
1382+
_ => {
1383+
return Err(Error::Execution(
1384+
"awk: expected variable after ++".to_string(),
1385+
))
1386+
}
13641387
}
1365-
return Err(Error::Execution(
1366-
"awk: expected variable after ++".to_string(),
1367-
));
13681388
}
13691389

1370-
// Pre-decrement: --var
1390+
// Pre-decrement: --var or --arr[key]
13711391
if self.input[self.pos..].starts_with("--") {
13721392
self.pos += 2;
13731393
self.skip_whitespace();
1374-
if let Ok(AwkExpr::Variable(name)) = self.parse_primary() {
1375-
return Ok(AwkExpr::PreDecrement(name));
1394+
match self.parse_primary()? {
1395+
AwkExpr::Variable(name) => return Ok(AwkExpr::PreDecrement(name)),
1396+
AwkExpr::FuncCall(ref fname, ref args)
1397+
if fname == "__array_access" && args.len() == 2 =>
1398+
{
1399+
if let AwkExpr::Variable(arr_name) = &args[0] {
1400+
return Ok(AwkExpr::CompoundArrayAssign(
1401+
arr_name.clone(),
1402+
Box::new(args[1].clone()),
1403+
"-".to_string(),
1404+
Box::new(AwkExpr::Number(1.0)),
1405+
));
1406+
}
1407+
return Err(Error::Execution(
1408+
"awk: expected variable after --".to_string(),
1409+
));
1410+
}
1411+
_ => {
1412+
return Err(Error::Execution(
1413+
"awk: expected variable after --".to_string(),
1414+
))
1415+
}
13761416
}
1377-
return Err(Error::Execution(
1378-
"awk: expected variable after --".to_string(),
1379-
));
13801417
}
13811418

13821419
let c = self.current_char().unwrap();
@@ -1562,16 +1599,36 @@ impl<'a> AwkParser<'a> {
15621599
return Ok(AwkExpr::FuncCall(name, args));
15631600
}
15641601

1565-
// Array indexing: arr[index]
1602+
// Array indexing: arr[index] or arr[e1,e2,...] (multi-subscript with SUBSEP)
15661603
if self.pos < self.input.len() && self.current_char().unwrap() == '[' {
15671604
self.pos += 1; // consume '['
1568-
let index_expr = self.parse_expression()?;
1605+
let mut subscripts = vec![self.parse_expression()?];
15691606
self.skip_whitespace();
1607+
// Handle multi-subscript: arr[e1, e2, ...] joined by SUBSEP
1608+
while self.pos < self.input.len() && self.current_char().unwrap() == ',' {
1609+
self.pos += 1; // consume ','
1610+
self.skip_whitespace();
1611+
subscripts.push(self.parse_expression()?);
1612+
self.skip_whitespace();
1613+
}
15701614
if self.pos >= self.input.len() || self.current_char().unwrap() != ']' {
15711615
return Err(Error::Execution("awk: expected ']'".to_string()));
15721616
}
15731617
self.pos += 1; // consume ']'
1574-
// Store as arr[index] where index is evaluated at runtime
1618+
let index_expr = if subscripts.len() == 1 {
1619+
subscripts.remove(0)
1620+
} else {
1621+
// Join multiple subscripts with SUBSEP
1622+
let mut result = subscripts.remove(0);
1623+
for sub in subscripts {
1624+
result = AwkExpr::BinOp(
1625+
Box::new(result),
1626+
"SUBSEP_CONCAT".to_string(),
1627+
Box::new(sub),
1628+
);
1629+
}
1630+
result
1631+
};
15751632
return Ok(AwkExpr::FuncCall(
15761633
"__array_access".to_string(),
15771634
vec![AwkExpr::Variable(name), index_expr],
@@ -1763,6 +1820,10 @@ impl AwkInterpreter {
17631820
AwkValue::Number(1.0)
17641821
}
17651822
}
1823+
"SUBSEP_CONCAT" => {
1824+
let subsep = self.state.get_variable("SUBSEP").as_string();
1825+
AwkValue::String(format!("{}{}{}", l.as_string(), subsep, r.as_string()))
1826+
}
17661827
_ => AwkValue::Uninitialized,
17671828
}
17681829
}
@@ -3382,4 +3443,51 @@ mod tests {
33823443
assert_eq!(result.exit_code, 0);
33833444
assert_eq!(result.stdout.trim(), "café");
33843445
}
3446+
3447+
#[tokio::test]
3448+
async fn test_awk_array_assign_field_ref_subscript() {
3449+
// Issue #396.1: arr[$1] = $3 should work with field refs as subscripts
3450+
let result = run_awk(
3451+
&["{ arr[$1] = $2 } END { print arr[\"hello\"] }"],
3452+
Some("hello world\n"),
3453+
)
3454+
.await
3455+
.unwrap();
3456+
assert_eq!(result.exit_code, 0);
3457+
assert_eq!(result.stdout.trim(), "world");
3458+
}
3459+
3460+
#[tokio::test]
3461+
async fn test_awk_multi_subscript() {
3462+
// Issue #396.2: a["x","y"] multi-subscript with SUBSEP
3463+
let result = run_awk(&[r#"BEGIN { a["x","y"] = 1; print a["x","y"] }"#], Some(""))
3464+
.await
3465+
.unwrap();
3466+
assert_eq!(result.exit_code, 0);
3467+
assert_eq!(result.stdout.trim(), "1");
3468+
}
3469+
3470+
#[tokio::test]
3471+
async fn test_awk_subsep_defined() {
3472+
// Issue #396.3: SUBSEP should be defined as \034
3473+
let result = run_awk(&[r#"BEGIN { print length(SUBSEP) }"#], Some(""))
3474+
.await
3475+
.unwrap();
3476+
assert_eq!(result.exit_code, 0);
3477+
assert_eq!(result.stdout.trim(), "1");
3478+
}
3479+
3480+
#[tokio::test]
3481+
async fn test_awk_preincrement_array() {
3482+
// Issue #396.4: ++arr[key] should work
3483+
let result = run_awk(
3484+
&["{ ++count[$1] } END { for (k in count) print k, count[k] }"],
3485+
Some("a\nb\na\n"),
3486+
)
3487+
.await
3488+
.unwrap();
3489+
assert_eq!(result.exit_code, 0);
3490+
assert!(result.stdout.contains("a 2"));
3491+
assert!(result.stdout.contains("b 1"));
3492+
}
33853493
}

0 commit comments

Comments
 (0)