Skip to content

Commit 834adf5

Browse files
authored
fix(parser): prevent word-splitting inside quoted strings during array assignment (#1082)
## Summary - Fix `QuotedWord` tokens in `collect_array_elements()` not being marked `quoted=true`, causing IFS word-splitting on double-quoted variable expansions like `arr=(-a "test ${X} done")` - Add quote-aware `split_array_elements()` for the single-token array literal code path - Add 3 spec tests: quoted expansion, single-quoted elements, mixed quoted/unquoted elements ## Test plan - [x] Spec test `quoted_expansion_no_word_split_in_array` — verifies exact bug from #1071 - [x] Spec test `quoted_single_quote_no_word_split_in_array` — single-quoted elements preserved - [x] Spec test `quoted_mixed_elements_in_array` — mixed quoted/unquoted/variable expansion - [x] All existing array spec tests still pass - [x] `bash_comparison_tests` pass (differential test against real bash) - [x] Smoke test via `cargo run --bin bashkit -- -c '...'` produces correct output - [x] `cargo clippy` clean, `cargo fmt` clean Closes #1071
1 parent 6ac3af0 commit 834adf5

File tree

3 files changed

+105
-4
lines changed

3 files changed

+105
-4
lines changed

crates/bashkit/src/parser/mod.rs

Lines changed: 68 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1792,6 +1792,59 @@ impl<'a> Parser<'a> {
17921792
}
17931793

17941794
/// Strip surrounding quotes from a string value
1795+
/// Split array element text respecting single and double quotes.
1796+
/// Returns Vec of (element_text, was_quoted).
1797+
/// Quoted elements have their outer quotes stripped.
1798+
fn split_array_elements(s: &str) -> Vec<(String, bool)> {
1799+
let mut result = Vec::new();
1800+
let mut current = String::new();
1801+
let mut chars = s.chars().peekable();
1802+
let mut in_double_quote = false;
1803+
let mut in_single_quote = false;
1804+
let mut is_quoted = false;
1805+
1806+
while let Some(c) = chars.next() {
1807+
match c {
1808+
'"' if !in_single_quote => {
1809+
in_double_quote = !in_double_quote;
1810+
is_quoted = true;
1811+
// Don't include the quote character in output
1812+
}
1813+
'\'' if !in_double_quote => {
1814+
in_single_quote = !in_single_quote;
1815+
is_quoted = true;
1816+
// Don't include the quote character in output
1817+
}
1818+
'\\' if in_double_quote => {
1819+
// In double quotes, backslash escapes certain chars
1820+
if let Some(&next) = chars.peek() {
1821+
if matches!(next, '$' | '`' | '"' | '\\' | '\n') {
1822+
current.push(chars.next().unwrap());
1823+
} else {
1824+
current.push(c);
1825+
}
1826+
} else {
1827+
current.push(c);
1828+
}
1829+
}
1830+
c if c.is_ascii_whitespace() && !in_double_quote && !in_single_quote => {
1831+
if !current.is_empty() {
1832+
result.push((current.clone(), is_quoted));
1833+
current.clear();
1834+
is_quoted = false;
1835+
}
1836+
}
1837+
_ => {
1838+
current.push(c);
1839+
}
1840+
}
1841+
}
1842+
if !current.is_empty() {
1843+
result.push((current, is_quoted));
1844+
}
1845+
result
1846+
}
1847+
17951848
fn strip_quotes(s: &str) -> &str {
17961849
if s.len() >= 2
17971850
&& ((s.starts_with('"') && s.ends_with('"'))
@@ -1879,6 +1932,10 @@ impl<'a> Parser<'a> {
18791932
parts: vec![WordPart::Literal(elem_clone)],
18801933
quoted: true,
18811934
}
1935+
} else if matches!(&self.current_token, Some(tokens::Token::QuotedWord(_))) {
1936+
let mut w = self.parse_word(elem_clone);
1937+
w.quoted = true;
1938+
w
18821939
} else {
18831940
self.parse_word(elem_clone)
18841941
};
@@ -1906,9 +1963,17 @@ impl<'a> Parser<'a> {
19061963
// Array literal in the token itself: arr=(a b c)
19071964
if value_str.starts_with('(') && value_str.ends_with(')') {
19081965
let inner = &value_str[1..value_str.len() - 1];
1909-
let elements: Vec<Word> = inner
1910-
.split_whitespace()
1911-
.map(|s| self.parse_word(s.to_string()))
1966+
let elements: Vec<Word> = Self::split_array_elements(inner)
1967+
.into_iter()
1968+
.map(|(s, quoted)| {
1969+
if quoted {
1970+
let mut w = self.parse_word(s);
1971+
w.quoted = true;
1972+
w
1973+
} else {
1974+
self.parse_word(s)
1975+
}
1976+
})
19121977
.collect();
19131978
return Some((
19141979
Assignment {

crates/bashkit/tests/spec_cases/bash/arrays.test.sh

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,42 @@ echo "${arr[1]}"
236236
y
237237
### end
238238

239+
### quoted_expansion_no_word_split_in_array
240+
# arr=("test ${X} done") should NOT word-split inside quotes
241+
X="hello world"
242+
arr=(-a "test ${X} done")
243+
echo "count: ${#arr[@]}"
244+
printf "<%s>\n" "${arr[@]}"
245+
### expect
246+
count: 2
247+
<-a>
248+
<test hello world done>
249+
### end
250+
251+
### quoted_single_quote_no_word_split_in_array
252+
# arr=('multi word') should NOT word-split inside single quotes
253+
arr=('hello world' 'foo bar')
254+
echo "count: ${#arr[@]}"
255+
printf "<%s>\n" "${arr[@]}"
256+
### expect
257+
count: 2
258+
<hello world>
259+
<foo bar>
260+
### end
261+
262+
### quoted_mixed_elements_in_array
263+
# Mix of quoted and unquoted elements preserves quoting
264+
X="a b"
265+
arr=(plain "quoted ${X} end" 'literal $X')
266+
echo "count: ${#arr[@]}"
267+
printf "<%s>\n" "${arr[@]}"
268+
### expect
269+
count: 3
270+
<plain>
271+
<quoted a b end>
272+
<literal $X>
273+
### end
274+
239275
### unquoted_expansion_word_split_in_array
240276
# arr=($x) should word-split on IFS
241277
x="alpha beta gamma"

specs/009-implementation-status.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ Bashkit implements IEEE 1003.1-2024 Shell Command Language. See
126126
| arithmetic.test.sh | 75 | includes logical, bitwise, compound assign, increment/decrement, `let` builtin, `declare -i` arithmetic |
127127
| array-slicing.test.sh | 8 | array slice operations |
128128
| array-splat.test.sh | 2 | `"${arr[@]}"` individual element splatting in assignments |
129-
| arrays.test.sh | 31 | indices, `${arr[@]}` / `${arr[*]}`, negative indexing `${arr[-1]}` |
129+
| arrays.test.sh | 34 | indices, `${arr[@]}` / `${arr[*]}`, negative indexing `${arr[-1]}`, quoted expansion |
130130
| assoc-arrays.test.sh | 22 | associative arrays `declare -A` |
131131
| awk-printf-width.test.sh | 4 | AWK printf width/precision memory limits |
132132
| background.test.sh | 2 | background job handling |

0 commit comments

Comments
 (0)