Skip to content

Commit 94ac5dd

Browse files
chaliyclaude
andauthored
fix(builtins): add jq -R raw input and awk printf parens (#388)
## Summary - **jq**: Implement `-R`/`--raw-input` flag — treats each input line as a JSON string instead of parsing as JSON. Also supports `-Rs` (slurp entire input as single string). Unblocks CSV processing patterns like `jq -Rs 'split("\n")'`. - **awk**: Support `printf("format", args)` parenthesized syntax in addition to existing `printf "format", args`. Common in awk scripts for JSON generation. ## Root cause - Issue #368: `complex_markdown_toc` eval failures were caused by awk `printf("format")` syntax not being supported, not tr character classes as initially suspected - Issue #369: `data_csv_to_json` eval failures were caused by missing `jq -R` raw input mode, preventing CSV-to-JSON conversion patterns ## Test plan - [x] Added unit tests for `jq -R`, `jq -Rs`, and `jq -Rs` with split - [x] Added unit tests for awk `printf("format", args)` and CSV-to-JSON pattern - [x] Added spec tests: `jq_raw_input`, `jq_raw_input_slurp`, `awk_printf_parens`, `awk_printf_parens_begin` - [x] All existing tests pass (AWK: 98, JQ: 115, Bash: 1214) - [x] `cargo clippy` clean, `cargo fmt` clean Closes #368 Closes #369 Co-authored-by: Claude <noreply@anthropic.com>
1 parent 385ded2 commit 94ac5dd

File tree

4 files changed

+119
-0
lines changed

4 files changed

+119
-0
lines changed

crates/bashkit/src/builtins/awk.rs

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -682,6 +682,14 @@ impl<'a> AwkParser<'a> {
682682
fn parse_printf(&mut self) -> Result<AwkAction> {
683683
self.skip_whitespace();
684684

685+
// Handle optional parenthesized form: printf("format", args)
686+
let has_parens =
687+
self.pos < self.input.len() && self.input.chars().nth(self.pos).unwrap() == '(';
688+
if has_parens {
689+
self.pos += 1;
690+
self.skip_whitespace();
691+
}
692+
685693
// Parse format string
686694
if self.pos >= self.input.len() || self.input.chars().nth(self.pos).unwrap() != '"' {
687695
return Err(Error::Execution(
@@ -701,6 +709,13 @@ impl<'a> AwkParser<'a> {
701709
self.skip_whitespace();
702710
}
703711

712+
if has_parens
713+
&& self.pos < self.input.len()
714+
&& self.input.chars().nth(self.pos).unwrap() == ')'
715+
{
716+
self.pos += 1;
717+
}
718+
704719
Ok(AwkAction::Printf(format, args))
705720
}
706721

@@ -3269,4 +3284,32 @@ mod tests {
32693284
.unwrap();
32703285
assert_eq!(result.stdout, "329\n");
32713286
}
3287+
3288+
#[tokio::test]
3289+
async fn test_awk_printf_parens() {
3290+
// printf with parenthesized syntax: printf("format", args)
3291+
let result = run_awk(
3292+
&[r#"BEGIN{printf("["); printf("%s", "x"); printf("]"); print ""}"#],
3293+
Some(""),
3294+
)
3295+
.await
3296+
.unwrap();
3297+
assert_eq!(result.stdout, "[x]\n");
3298+
}
3299+
3300+
#[tokio::test]
3301+
async fn test_awk_printf_parens_csv() {
3302+
// CSV to JSON pattern using printf with parens
3303+
let result = run_awk(
3304+
&[
3305+
"-F,",
3306+
r#"NR==1{for(i=1;i<=NF;i++) h[i]=$i; next} {printf("%s{", (NR>2?",":"")); for(i=1;i<=NF;i++){printf("%s\"%s\":\"%s\"", (i>1?",":""), h[i], $i)}; printf("}")} END{print ""}"#,
3307+
],
3308+
Some("name,age\nalice,30\nbob,25\n"),
3309+
)
3310+
.await
3311+
.unwrap();
3312+
assert!(result.stdout.contains("alice"));
3313+
assert!(result.stdout.contains("bob"));
3314+
}
32723315
}

crates/bashkit/src/builtins/jq.rs

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ impl Builtin for Jq {
173173
// Parse arguments for flags using index-based loop to support
174174
// multi-arg flags like --arg name value and --argjson name value.
175175
let mut raw_output = false;
176+
let mut raw_input = false;
176177
let mut compact_output = false;
177178
let mut null_input = false;
178179
let mut sort_keys = false;
@@ -210,6 +211,8 @@ impl Builtin for Jq {
210211

211212
if arg == "--raw-output" {
212213
raw_output = true;
214+
} else if arg == "--raw-input" {
215+
raw_input = true;
213216
} else if arg == "--compact-output" {
214217
compact_output = true;
215218
} else if arg == "--null-input" {
@@ -269,6 +272,7 @@ impl Builtin for Jq {
269272
for ch in arg[1..].chars() {
270273
match ch {
271274
'r' => raw_output = true,
275+
'R' => raw_input = true,
272276
'c' => compact_output = true,
273277
'n' => null_input = true,
274278
'S' => sort_keys = true,
@@ -377,6 +381,15 @@ impl Builtin for Jq {
377381
let inputs_to_process: Vec<Val> = if null_input {
378382
// -n flag: use null as input
379383
vec![Val::from(serde_json::Value::Null)]
384+
} else if raw_input && slurp {
385+
// -Rs flag: read entire input as single string
386+
vec![Val::from(serde_json::Value::String(input.to_string()))]
387+
} else if raw_input {
388+
// -R flag: each line becomes a JSON string value
389+
input
390+
.lines()
391+
.map(|line| Val::from(serde_json::Value::String(line.to_string())))
392+
.collect()
380393
} else if slurp {
381394
// -s flag: read all inputs into a single array
382395
match Self::parse_json_values(input) {
@@ -1189,4 +1202,35 @@ mod tests {
11891202
let result = run_jq_with_args(&["-snr", r#""hello""#], "").await.unwrap();
11901203
assert_eq!(result.trim(), "hello");
11911204
}
1205+
1206+
#[tokio::test]
1207+
async fn test_jq_raw_input() {
1208+
// -R: each line becomes a JSON string
1209+
let result = run_jq_with_args(&["-R", "."], "hello\nworld\n")
1210+
.await
1211+
.unwrap();
1212+
assert_eq!(result.trim(), "\"hello\"\n\"world\"");
1213+
}
1214+
1215+
#[tokio::test]
1216+
async fn test_jq_raw_input_slurp() {
1217+
// -Rs: entire input as one string
1218+
let result = run_jq_with_args(&["-Rs", "."], "hello\nworld\n")
1219+
.await
1220+
.unwrap();
1221+
assert_eq!(result.trim(), "\"hello\\nworld\\n\"");
1222+
}
1223+
1224+
#[tokio::test]
1225+
async fn test_jq_raw_input_split() {
1226+
// -R -s then split: CSV-like processing
1227+
let result = run_jq_with_args(
1228+
&["-Rs", r#"split("\n") | map(select(length>0))"#],
1229+
"a,b,c\n1,2,3\n",
1230+
)
1231+
.await
1232+
.unwrap();
1233+
assert!(result.contains("a,b,c"));
1234+
assert!(result.contains("1,2,3"));
1235+
}
11921236
}

crates/bashkit/tests/spec_cases/awk/awk.test.sh

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -650,3 +650,17 @@ printf '10\n2\n' | awk '{if ($1 > 5) print $1}'
650650
### expect
651651
10
652652
### end
653+
654+
### awk_printf_parens
655+
# printf with parenthesized form
656+
printf 'x\n' | awk '{printf("[%s]", $1); print ""}'
657+
### expect
658+
[x]
659+
### end
660+
661+
### awk_printf_parens_begin
662+
# printf with parens in BEGIN block
663+
echo x | awk 'BEGIN{printf("["); printf("%s", "hi"); printf("]"); print ""}'
664+
### expect
665+
[hi]
666+
### end

crates/bashkit/tests/spec_cases/jq/jq.test.sh

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -954,3 +954,21 @@ echo '42' | jq -e '.'
954954
### expect
955955
42
956956
### end
957+
958+
### jq_raw_input
959+
# -R flag: each line treated as string
960+
printf 'hello\nworld\n' | jq -R '.'
961+
### expect
962+
"hello"
963+
"world"
964+
### end
965+
966+
### jq_raw_input_slurp
967+
# -Rs flag: entire input as one string, then split
968+
printf 'a,b\n1,2\n' | jq -Rs 'split("\n") | map(select(length>0))'
969+
### expect
970+
[
971+
"a,b",
972+
"1,2"
973+
]
974+
### end

0 commit comments

Comments
 (0)