Skip to content

Commit a2717bd

Browse files
chaliyclaude
andauthored
feat: cat -v, sort -m, brace/date/lexer fixes (#234)
## Summary - **cat -v/-n/-e/-t**: Show non-printable characters (^M for CR, ^[ for ESC, etc.) - **sort -m**: Merge pre-sorted files via k-way merge - **Brace space fix**: `echo { a,b,c }` no longer triggers brace expansion or compound command parsing; `{` and `}` treated as literal words in argument position - **Date format fix**: `date +"%Y-%m-%d %H:%M:%S"` now works — strip surrounding quotes from format argument - **Lexer word concatenation**: Adjacent quoted segments are concatenated into single word tokens (e.g., `+"%Y"` → `+%Y` as one token) - **Array indices test**: Rewrote to avoid spec format newline ambiguity ## Test plan - [x] Removed 5 skip markers (neg_array_indices_empty, neg_brace_no_expand_space, echo_escape_r, date_combined_format, sort_merge) - [x] `cargo test --all-features` passes (69 pass, 49 ignored) - [x] `cargo test --test spec_tests` passes (all 13 test groups including bash_comparison_tests) - [x] `cargo clippy --all-targets --all-features -- -D warnings` clean - [x] `cargo fmt --check` clean - [x] Updated specs/009-implementation-status.md and specs/005-builtins.md Co-authored-by: Claude <noreply@anthropic.com>
1 parent aa32ab9 commit a2717bd

File tree

12 files changed

+187
-43
lines changed

12 files changed

+187
-43
lines changed

crates/bashkit/src/builtins/cat.rs

Lines changed: 66 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,40 +14,93 @@ pub struct Cat;
1414
impl Builtin for Cat {
1515
async fn execute(&self, ctx: Context<'_>) -> Result<ExecResult> {
1616
let mut output = String::new();
17+
let mut show_nonprinting = false;
18+
let mut number_lines = false;
19+
let mut files: Vec<&str> = Vec::new();
1720

18-
// If no arguments and stdin is provided, output stdin
19-
if ctx.args.is_empty() {
21+
// Parse flags
22+
for arg in ctx.args {
23+
if arg.starts_with('-') && arg.len() > 1 && !arg.starts_with("--") {
24+
for ch in arg[1..].chars() {
25+
match ch {
26+
'v' => show_nonprinting = true,
27+
'n' => number_lines = true,
28+
'e' => show_nonprinting = true, // -e implies -v + show $ at EOL (simplified)
29+
't' => show_nonprinting = true, // -t implies -v + show ^I for tabs (simplified)
30+
_ => {}
31+
}
32+
}
33+
} else {
34+
files.push(arg);
35+
}
36+
}
37+
38+
let mut raw = String::new();
39+
40+
if files.is_empty() {
2041
if let Some(stdin) = ctx.stdin {
21-
output.push_str(stdin);
42+
raw.push_str(stdin);
2243
}
2344
} else {
24-
// Read files
25-
for arg in ctx.args {
26-
// Handle - as stdin
27-
if arg == "-" {
45+
for file in &files {
46+
if *file == "-" {
2847
if let Some(stdin) = ctx.stdin {
29-
output.push_str(stdin);
48+
raw.push_str(stdin);
3049
}
3150
} else {
32-
let path = if Path::new(arg).is_absolute() {
33-
arg.to_string()
51+
let path = if Path::new(file).is_absolute() {
52+
file.to_string()
3453
} else {
35-
ctx.cwd.join(arg).to_string_lossy().to_string()
54+
ctx.cwd.join(file).to_string_lossy().to_string()
3655
};
3756

3857
match ctx.fs.read_file(Path::new(&path)).await {
3958
Ok(content) => {
4059
let text = String::from_utf8_lossy(&content);
41-
output.push_str(&text);
60+
raw.push_str(&text);
4261
}
4362
Err(e) => {
44-
return Ok(ExecResult::err(format!("cat: {}: {}\n", arg, e), 1));
63+
return Ok(ExecResult::err(format!("cat: {}: {}\n", file, e), 1));
4564
}
4665
}
4766
}
4867
}
4968
}
5069

70+
if show_nonprinting {
71+
for ch in raw.chars() {
72+
match ch {
73+
'\n' | '\t' => output.push(ch), // pass through newline and tab
74+
c if (c as u32) < 32 => {
75+
// Control characters: ^@, ^A, ..., ^Z, ^[, ^\, ^], ^^, ^_
76+
output.push('^');
77+
output.push((c as u8 + 64) as char);
78+
}
79+
'\x7f' => {
80+
output.push('^');
81+
output.push('?');
82+
}
83+
c => output.push(c),
84+
}
85+
}
86+
} else {
87+
output = raw;
88+
}
89+
90+
if number_lines {
91+
let lines: Vec<&str> = output.split('\n').collect();
92+
let mut numbered = String::new();
93+
for (i, line) in lines.iter().enumerate() {
94+
if i < lines.len() - 1 || !line.is_empty() {
95+
numbered.push_str(&format!(" {}\t{}", i + 1, line));
96+
if i < lines.len() - 1 {
97+
numbered.push('\n');
98+
}
99+
}
100+
}
101+
output = numbered;
102+
}
103+
51104
Ok(ExecResult::ok(output))
52105
}
53106
}

crates/bashkit/src/builtins/date.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,8 +343,13 @@ impl Builtin for Date {
343343
}
344344

345345
let default_format = "%a %b %e %H:%M:%S %Z %Y".to_string();
346+
let format_owned;
346347
let format = match &format_arg {
347-
Some(fmt) => &fmt[1..], // Strip leading '+'
348+
Some(fmt) => {
349+
let without_plus = &fmt[1..]; // Strip leading '+'
350+
format_owned = strip_surrounding_quotes(without_plus).to_string();
351+
&format_owned
352+
}
348353
None => &default_format,
349354
};
350355

crates/bashkit/src/builtins/sortuniq.rs

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ impl Builtin for Sort {
9292
let mut check_sorted = false;
9393
let mut human_numeric = false;
9494
let mut month_sort = false;
95+
let mut merge = false;
9596
let mut delimiter: Option<char> = None;
9697
let mut key_field: Option<usize> = None;
9798
let mut output_file: Option<String> = None;
@@ -148,6 +149,7 @@ impl Builtin for Sort {
148149
'c' | 'C' => check_sorted = true,
149150
'h' => human_numeric = true,
150151
'M' => month_sort = true,
152+
'm' => merge = true,
151153
'z' => zero_terminated = true,
152154
_ => {}
153155
}
@@ -195,6 +197,62 @@ impl Builtin for Sort {
195197
}
196198
}
197199

200+
// Merge mode: k-way merge of pre-sorted inputs
201+
if merge && !files.is_empty() {
202+
let mut streams: Vec<Vec<String>> = Vec::new();
203+
for file in &files {
204+
let path = if file.starts_with('/') {
205+
std::path::PathBuf::from(file)
206+
} else {
207+
ctx.cwd.join(file)
208+
};
209+
match ctx.fs.read_file(&path).await {
210+
Ok(content) => {
211+
let text = String::from_utf8_lossy(&content);
212+
let lines: Vec<String> = text
213+
.split(line_sep)
214+
.filter(|l| !l.is_empty())
215+
.map(|l| l.to_string())
216+
.collect();
217+
streams.push(lines);
218+
}
219+
Err(e) => {
220+
return Ok(ExecResult::err(format!("sort: {}: {}\n", file, e), 1));
221+
}
222+
}
223+
}
224+
// k-way merge using indices
225+
let mut indices: Vec<usize> = vec![0; streams.len()];
226+
let mut merged = Vec::new();
227+
loop {
228+
let mut best: Option<(usize, &str)> = None;
229+
for (i, stream) in streams.iter().enumerate() {
230+
if indices[i] < stream.len() {
231+
let line = &stream[indices[i]];
232+
if let Some((_, best_line)) = best {
233+
if line.as_str() < best_line {
234+
best = Some((i, line));
235+
}
236+
} else {
237+
best = Some((i, line));
238+
}
239+
}
240+
}
241+
if let Some((i, line)) = best {
242+
merged.push(line.to_string());
243+
indices[i] += 1;
244+
} else {
245+
break;
246+
}
247+
}
248+
let sep = if zero_terminated { "\0" } else { "\n" };
249+
let mut output = merged.join(sep);
250+
if !output.is_empty() {
251+
output.push_str(sep);
252+
}
253+
return Ok(ExecResult::ok(output));
254+
}
255+
198256
// Check sorted mode
199257
if check_sorted {
200258
for i in 1..all_lines.len() {

crates/bashkit/src/interpreter/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4697,6 +4697,11 @@ impl Interpreter {
46974697
let suffix: String = chars[end + 1..].iter().collect();
46984698
let brace_content: String = chars[start + 1..end].iter().collect();
46994699

4700+
// Brace content with leading/trailing space is not expanded
4701+
if brace_content.starts_with(' ') || brace_content.ends_with(' ') {
4702+
return vec![s.to_string()];
4703+
}
4704+
47004705
// Check for range expansion like {1..5} or {a..z}
47014706
if let Some(range_result) = self.try_expand_range(&brace_content) {
47024707
let mut results = Vec::new();

crates/bashkit/src/parser/lexer.rs

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -306,24 +306,40 @@ impl<'a> Lexer<'a> {
306306
// Handle quoted strings within words (e.g., a="Hello" or VAR="value")
307307
// This handles the case where a word like `a=` is followed by a quoted string
308308
if ch == '"' || ch == '\'' {
309-
// Check if this is a quoted value in an assignment (word ends with = or +=)
310-
if word.ends_with('=') || word.ends_with("+=") {
311-
// Include the quoted string as part of this word
312-
let quote_char = ch;
313-
word.push(ch);
314-
self.advance();
315-
while let Some(c) = self.peek_char() {
316-
word.push(c);
309+
if word.is_empty() {
310+
// Start of a new token — let the main tokenizer handle quotes
311+
break;
312+
}
313+
// Word already has content — concatenate the quoted segment
314+
// This handles: VAR="val", date +"%Y", echo foo"bar"
315+
let quote_char = ch;
316+
self.advance(); // consume opening quote
317+
while let Some(c) = self.peek_char() {
318+
if c == quote_char {
319+
self.advance(); // consume closing quote
320+
break;
321+
}
322+
if c == '\\' && quote_char == '"' {
317323
self.advance();
318-
if c == quote_char && !word.ends_with(&format!("\\{}", quote_char)) {
319-
break;
324+
if let Some(next) = self.peek_char() {
325+
match next {
326+
'"' | '\\' | '$' | '`' => {
327+
word.push(next);
328+
self.advance();
329+
}
330+
_ => {
331+
word.push('\\');
332+
word.push(next);
333+
self.advance();
334+
}
335+
}
336+
continue;
320337
}
321338
}
322-
continue;
323-
} else {
324-
// Not after =, so this is a separate quoted token
325-
break;
339+
word.push(c);
340+
self.advance();
326341
}
342+
continue;
327343
} else if ch == '$' {
328344
// Handle variable references and command substitution
329345
word.push(ch);

crates/bashkit/src/parser/mod.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1695,6 +1695,18 @@ impl<'a> Parser<'a> {
16951695
target: Word::literal(dst_fd.to_string()),
16961696
});
16971697
}
1698+
// { and } as arguments (not in command position) are literal words
1699+
Some(tokens::Token::LeftBrace) | Some(tokens::Token::RightBrace)
1700+
if !words.is_empty() =>
1701+
{
1702+
let sym = if matches!(self.current_token, Some(tokens::Token::LeftBrace)) {
1703+
"{"
1704+
} else {
1705+
"}"
1706+
};
1707+
words.push(Word::literal(sym));
1708+
self.advance();
1709+
}
16981710
Some(tokens::Token::Newline)
16991711
| Some(tokens::Token::Semicolon)
17001712
| Some(tokens::Token::Pipe)

crates/bashkit/tests/spec_cases/bash/date.test.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,6 @@ valid
120120
### end
121121

122122
### date_combined_format
123-
### skip: quoted format string not handling space correctly
124123
# Multiple format specifiers
125124
date +"%Y-%m-%d %H:%M:%S" | grep -qE '^[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}$' && echo "valid"
126125
### expect

crates/bashkit/tests/spec_cases/bash/echo.test.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@ hello world
7070
### end
7171

7272
### echo_escape_r
73-
### skip: carriage return display varies by terminal, test verifies CR is processed
7473
# Echo with carriage return - raw output contains CR character
7574
echo -e "hello\rworld" | cat -v | grep -q 'hello.*world' && echo "valid"
7675
### expect

crates/bashkit/tests/spec_cases/bash/negative-tests.test.sh

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
### neg_array_indices_empty
2-
### skip: empty array indices expansion outputs extra newline
32
# Empty array has no indices
4-
arr=(); echo "${!arr[@]}"
3+
arr=(); echo ">${!arr[@]}<"
54
### expect
6-
5+
><
76
### end
87

98
### neg_test_nonexistent_file
@@ -29,7 +28,6 @@ echo {item}
2928
### end
3029

3130
### neg_brace_no_expand_space
32-
### skip: brace with space parsing issue
3331
# Brace with space doesn't expand
3432
echo { a,b,c }
3533
### expect

crates/bashkit/tests/spec_cases/bash/sortuniq.test.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,6 @@ echo $?
155155
### end
156156

157157
### sort_merge
158-
### skip: sort -m (merge) not implemented
159158
printf 'a\nc\n' > /tmp/f1 && printf 'b\nd\n' > /tmp/f2 && sort -m /tmp/f1 /tmp/f2
160159
### expect
161160
a

0 commit comments

Comments
 (0)