Skip to content

Commit 2a93e90

Browse files
chaliyclaude
andauthored
feat(awk): add user-defined function support (#303)
## Summary - Parse `function name(params) { body }` at program level - Add `AwkFunctionDef` struct with params and body - Support `return expr` statement with `AwkFlow::Return` - Look up user-defined functions in `call_function` fallback - Local variable scoping for function parameters ## Test plan - [x] All 45 existing awk unit tests pass - [ ] CI green Closes #280 Co-authored-by: Claude <noreply@anthropic.com>
1 parent 3b9fad9 commit 2a93e90

File tree

1 file changed

+149
-6
lines changed
  • crates/bashkit/src/builtins

1 file changed

+149
-6
lines changed

crates/bashkit/src/builtins/awk.rs

Lines changed: 149 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,13 @@ struct AwkProgram {
3030
begin_actions: Vec<AwkAction>,
3131
main_rules: Vec<AwkRule>,
3232
end_actions: Vec<AwkAction>,
33+
functions: HashMap<String, AwkFunctionDef>,
34+
}
35+
36+
#[derive(Debug, Clone)]
37+
struct AwkFunctionDef {
38+
params: Vec<String>,
39+
body: Vec<AwkAction>,
3340
}
3441

3542
#[derive(Debug)]
@@ -68,7 +75,7 @@ enum AwkExpr {
6875
FieldAssign(Box<AwkExpr>, Box<AwkExpr>), // $n = val
6976
}
7077

71-
#[derive(Debug)]
78+
#[derive(Debug, Clone)]
7279
enum AwkAction {
7380
Print(Vec<AwkExpr>),
7481
Printf(String, Vec<AwkExpr>),
@@ -86,6 +93,7 @@ enum AwkAction {
8693
Getline, // getline — read next input record into $0
8794
#[allow(dead_code)] // Exit code support for future
8895
Exit(Option<AwkExpr>),
96+
Return(Option<AwkExpr>),
8997
Expression(AwkExpr),
9098
}
9199

@@ -100,7 +108,7 @@ struct AwkState {
100108
fnr: usize,
101109
}
102110

103-
#[derive(Debug, Clone)]
111+
#[derive(Debug, Clone, PartialEq)]
104112
enum AwkValue {
105113
Number(f64),
106114
String(String),
@@ -330,6 +338,7 @@ impl<'a> AwkParser<'a> {
330338
begin_actions: Vec::new(),
331339
main_rules: Vec::new(),
332340
end_actions: Vec::new(),
341+
functions: HashMap::new(),
333342
};
334343

335344
self.skip_whitespace();
@@ -340,8 +349,12 @@ impl<'a> AwkParser<'a> {
340349
break;
341350
}
342351

343-
// Check for BEGIN/END
344-
if self.matches_keyword("BEGIN") {
352+
// Check for function/BEGIN/END
353+
if self.matches_keyword("function") {
354+
self.skip_whitespace();
355+
let (name, func_def) = self.parse_function_def()?;
356+
program.functions.insert(name, func_def);
357+
} else if self.matches_keyword("BEGIN") {
345358
self.skip_whitespace();
346359
let actions = self.parse_action_block()?;
347360
program.begin_actions.extend(actions);
@@ -374,6 +387,68 @@ impl<'a> AwkParser<'a> {
374387
Ok(program)
375388
}
376389

390+
/// Parse a user-defined function: function name(params) { body }
391+
fn parse_function_def(&mut self) -> Result<(String, AwkFunctionDef)> {
392+
// Parse function name
393+
let name = self.read_identifier()?;
394+
self.skip_whitespace();
395+
396+
// Expect '('
397+
if self.pos >= self.input.len() || self.input.chars().nth(self.pos).unwrap() != '(' {
398+
return Err(Error::Execution(
399+
"awk: expected '(' after function name".to_string(),
400+
));
401+
}
402+
self.pos += 1;
403+
404+
// Parse parameter list
405+
let mut params = Vec::new();
406+
self.skip_whitespace();
407+
while self.pos < self.input.len() && self.input.chars().nth(self.pos).unwrap() != ')' {
408+
if !params.is_empty() {
409+
if self.input.chars().nth(self.pos).unwrap() == ',' {
410+
self.pos += 1;
411+
}
412+
self.skip_whitespace();
413+
}
414+
if self.pos < self.input.len() && self.input.chars().nth(self.pos).unwrap() != ')' {
415+
params.push(self.read_identifier()?);
416+
self.skip_whitespace();
417+
}
418+
}
419+
420+
// Expect ')'
421+
if self.pos >= self.input.len() || self.input.chars().nth(self.pos).unwrap() != ')' {
422+
return Err(Error::Execution(
423+
"awk: expected ')' after function parameters".to_string(),
424+
));
425+
}
426+
self.pos += 1;
427+
self.skip_whitespace();
428+
429+
// Parse function body as action block
430+
let body = self.parse_action_block()?;
431+
432+
Ok((name, AwkFunctionDef { params, body }))
433+
}
434+
435+
/// Read an identifier (alphanumeric + underscore)
436+
fn read_identifier(&mut self) -> Result<String> {
437+
let start = self.pos;
438+
while self.pos < self.input.len() {
439+
let c = self.input.chars().nth(self.pos).unwrap();
440+
if c.is_alphanumeric() || c == '_' {
441+
self.pos += 1;
442+
} else {
443+
break;
444+
}
445+
}
446+
if self.pos == start {
447+
return Err(Error::Execution("awk: expected identifier".to_string()));
448+
}
449+
Ok(self.input[start..self.pos].to_string())
450+
}
451+
377452
fn matches_keyword(&mut self, keyword: &str) -> bool {
378453
if self.input[self.pos..].starts_with(keyword) {
379454
let after = self.pos + keyword.len();
@@ -537,6 +612,17 @@ impl<'a> AwkParser<'a> {
537612
}
538613
return Ok(AwkAction::Exit(None));
539614
}
615+
if self.matches_keyword("return") {
616+
self.skip_whitespace();
617+
if self.pos < self.input.len() {
618+
let c = self.input.chars().nth(self.pos).unwrap();
619+
if c != '}' && c != ';' {
620+
let expr = self.parse_expression()?;
621+
return Ok(AwkAction::Return(Some(expr)));
622+
}
623+
}
624+
return Ok(AwkAction::Return(None));
625+
}
540626
if self.matches_keyword("if") {
541627
return self.parse_if();
542628
}
@@ -1091,7 +1177,7 @@ impl<'a> AwkParser<'a> {
10911177
let remaining = &self.input[self.pos..];
10921178
let keywords = [
10931179
"in", "if", "else", "while", "for", "do", "break", "continue", "next", "exit",
1094-
"delete", "getline", "print", "printf",
1180+
"return", "delete", "getline", "print", "printf", "function",
10951181
];
10961182
for kw in keywords {
10971183
if remaining.starts_with(kw) {
@@ -1547,6 +1633,7 @@ enum AwkFlow {
15471633
Break, // Break out of loop
15481634
LoopContinue, // Continue to next loop iteration
15491635
Exit(Option<i32>), // Exit program with optional code
1636+
Return(AwkValue), // Return from user-defined function
15501637
}
15511638

15521639
struct AwkInterpreter {
@@ -1556,6 +1643,8 @@ struct AwkInterpreter {
15561643
input_lines: Vec<String>,
15571644
/// Current line index within input_lines
15581645
line_index: usize,
1646+
/// User-defined functions
1647+
functions: HashMap<String, AwkFunctionDef>,
15591648
}
15601649

15611650
impl AwkInterpreter {
@@ -1565,6 +1654,7 @@ impl AwkInterpreter {
15651654
output: String::new(),
15661655
input_lines: Vec::new(),
15671656
line_index: 0,
1657+
functions: HashMap::new(),
15681658
}
15691659
}
15701660

@@ -2042,8 +2132,53 @@ impl AwkInterpreter {
20422132
self.eval_expr(&args[2])
20432133
}
20442134
}
2045-
_ => AwkValue::Uninitialized,
2135+
_ => {
2136+
// Check for user-defined function
2137+
if let Some(func) = self.functions.get(name).cloned() {
2138+
self.call_user_function(&func, args)
2139+
} else {
2140+
AwkValue::Uninitialized
2141+
}
2142+
}
2143+
}
2144+
}
2145+
2146+
fn call_user_function(&mut self, func: &AwkFunctionDef, args: &[AwkExpr]) -> AwkValue {
2147+
// Save current local variables that will be shadowed
2148+
let mut saved: Vec<(String, AwkValue)> = Vec::new();
2149+
for param in &func.params {
2150+
saved.push((param.clone(), self.state.get_variable(param)));
2151+
}
2152+
2153+
// Bind arguments to parameters
2154+
for (i, param) in func.params.iter().enumerate() {
2155+
let val = if i < args.len() {
2156+
self.eval_expr(&args[i])
2157+
} else {
2158+
AwkValue::Uninitialized
2159+
};
2160+
self.state.set_variable(param, val);
20462161
}
2162+
2163+
// Execute function body, capture return value
2164+
let mut return_value = AwkValue::Uninitialized;
2165+
for action in &func.body.clone() {
2166+
match self.exec_action(action) {
2167+
AwkFlow::Return(val) => {
2168+
return_value = val;
2169+
break;
2170+
}
2171+
AwkFlow::Exit(_) => break,
2172+
_ => {}
2173+
}
2174+
}
2175+
2176+
// Restore saved variables
2177+
for (name, val) in saved {
2178+
self.state.set_variable(&name, val);
2179+
}
2180+
2181+
return_value
20472182
}
20482183

20492184
fn format_string(&self, format: &str, values: &[AwkValue]) -> String {
@@ -2448,6 +2583,13 @@ impl AwkInterpreter {
24482583
let code = expr.as_ref().map(|e| self.eval_expr(e).as_number() as i32);
24492584
AwkFlow::Exit(code)
24502585
}
2586+
AwkAction::Return(expr) => {
2587+
let val = expr
2588+
.as_ref()
2589+
.map(|e| self.eval_expr(e))
2590+
.unwrap_or(AwkValue::Uninitialized);
2591+
AwkFlow::Return(val)
2592+
}
24512593
AwkAction::Expression(expr) => {
24522594
self.eval_expr(expr);
24532595
AwkFlow::Continue
@@ -2565,6 +2707,7 @@ impl Builtin for Awk {
25652707
let program = parser.parse()?;
25662708

25672709
let mut interp = AwkInterpreter::new();
2710+
interp.functions = program.functions.clone();
25682711
interp.state.fs = Self::process_escape_sequences(&field_sep);
25692712

25702713
// Set pre-assigned variables (-v)

0 commit comments

Comments
 (0)