diff --git a/crates/bashkit/src/parser/lexer.rs b/crates/bashkit/src/parser/lexer.rs index 29f88d07..5a466868 100644 --- a/crates/bashkit/src/parser/lexer.rs +++ b/crates/bashkit/src/parser/lexer.rs @@ -19,6 +19,9 @@ pub struct Lexer<'a> { /// Current position in the input position: Position, chars: std::iter::Peekable>, + /// Rest-of-line text captured during heredoc parsing. + /// In `cat < file`, this holds ` > file`. + pub heredoc_rest_of_line: String, } impl<'a> Lexer<'a> { @@ -28,6 +31,7 @@ impl<'a> Lexer<'a> { input, position: Position::new(), chars: input.chars().peekable(), + heredoc_rest_of_line: String::new(), } } @@ -1208,12 +1212,16 @@ impl<'a> Lexer<'a> { let mut content = String::new(); let mut current_line = String::new(); - // Skip to end of current line first (after the delimiter on command line) + // Collect the rest of the command line after the heredoc delimiter. + // In bash, `cat < file` means `> file` is still part of + // the command and should be parsed for redirections. + self.heredoc_rest_of_line.clear(); while let Some(ch) = self.peek_char() { self.advance(); if ch == '\n' { break; } + self.heredoc_rest_of_line.push(ch); } // Read lines until we find the delimiter @@ -1362,6 +1370,7 @@ mod tests { let mut lexer = Lexer::new("\nhello\nworld\nEOF"); let content = lexer.read_heredoc("EOF"); assert_eq!(content, "hello\nworld\n"); + assert_eq!(lexer.heredoc_rest_of_line.trim(), ""); } #[test] @@ -1369,6 +1378,7 @@ mod tests { let mut lexer = Lexer::new("\ntest\nEOF"); let content = lexer.read_heredoc("EOF"); assert_eq!(content, "test\n"); + assert_eq!(lexer.heredoc_rest_of_line.trim(), ""); } #[test] @@ -1384,6 +1394,18 @@ mod tests { // Now read heredoc content let content = lexer.read_heredoc("EOF"); assert_eq!(content, "hello\nworld\n"); + assert_eq!(lexer.heredoc_rest_of_line.trim(), ""); + } + + #[test] + fn test_read_heredoc_with_redirect() { + let mut lexer = Lexer::new("cat < file.txt\nhello\nEOF"); + assert_eq!(lexer.next_token(), Some(Token::Word("cat".to_string()))); + assert_eq!(lexer.next_token(), Some(Token::HereDoc)); + assert_eq!(lexer.next_token(), Some(Token::Word("EOF".to_string()))); + let content = lexer.read_heredoc("EOF"); + assert_eq!(content, "hello\n"); + assert_eq!(lexer.heredoc_rest_of_line.trim(), "> file.txt"); } #[test] diff --git a/crates/bashkit/src/parser/mod.rs b/crates/bashkit/src/parser/mod.rs index b1276bad..ad87adfa 100644 --- a/crates/bashkit/src/parser/mod.rs +++ b/crates/bashkit/src/parser/mod.rs @@ -1768,15 +1768,18 @@ impl<'a> Parser<'a> { }; // Don't advance - let read_heredoc consume directly from lexer position - // Read the here document content (reads until delimiter line) + // Read the here document content (reads until delimiter line). + // Also captures rest-of-line text (e.g. `> file` in + // `cat < file`) into lexer.heredoc_rest_of_line. let content = self.lexer.read_heredoc(&delimiter); + let rest_of_line = std::mem::take(&mut self.lexer.heredoc_rest_of_line); // Strip leading tabs for <<- let content = if strip_tabs { let had_trailing_newline = content.ends_with('\n'); let mut stripped: String = content .lines() - .map(|l| l.trim_start_matches('\t')) + .map(|l: &str| l.trim_start_matches('\t')) .collect::>() .join("\n"); if had_trailing_newline { @@ -1787,9 +1790,6 @@ impl<'a> Parser<'a> { content }; - // Now advance to get the next token after the heredoc - self.advance(); - // If delimiter was quoted, content is literal (no expansion) // Otherwise, parse for variable expansion let target = if quoted { @@ -1810,6 +1810,54 @@ impl<'a> Parser<'a> { target, }); + // Parse rest-of-line for additional redirects + // (e.g. `> file` in `cat < file`). + // We parse tokens directly instead of using parse_simple_command + // because that method returns None for redirect-only input + // (no command word), dropping the redirects we need. + if !rest_of_line.trim().is_empty() { + let mut sub = Parser::new(&rest_of_line); + loop { + match &sub.current_token { + Some(tokens::Token::RedirectOut) => { + sub.advance(); + if let Ok(target) = sub.expect_word() { + redirects.push(Redirect { + fd: None, + kind: RedirectKind::Output, + target, + }); + } + } + Some(tokens::Token::RedirectAppend) => { + sub.advance(); + if let Ok(target) = sub.expect_word() { + redirects.push(Redirect { + fd: None, + kind: RedirectKind::Append, + target, + }); + } + } + Some(tokens::Token::RedirectFd(fd)) => { + let fd = *fd; + sub.advance(); + if let Ok(target) = sub.expect_word() { + redirects.push(Redirect { + fd: Some(fd), + kind: RedirectKind::Output, + target, + }); + } + } + _ => break, + } + } + } + + // Now advance past the heredoc body + self.advance(); + // Heredoc body consumed subsequent lines from input. // Stop parsing this command - next tokens belong to new commands. break; diff --git a/crates/bashkit/tests/spec_cases/bash/heredoc.test.sh b/crates/bashkit/tests/spec_cases/bash/heredoc.test.sh index aafdd656..5e08b196 100644 --- a/crates/bashkit/tests/spec_cases/bash/heredoc.test.sh +++ b/crates/bashkit/tests/spec_cases/bash/heredoc.test.sh @@ -83,6 +83,44 @@ EOF value: 42, cmd: hi, math: 84 ### end +### heredoc_redirect_after +# cat < file should write heredoc content to file, not stdout +cat < /tmp/heredoc_redirect.txt +line one +line two +EOF +cat /tmp/heredoc_redirect.txt +### expect +line one +line two +### end + +### heredoc_redirect_after_with_vars +# cat < file with variable expansion +NAME=world +cat < /tmp/heredoc_vars.txt +hello $NAME +EOF +cat /tmp/heredoc_vars.txt +### expect +hello world +### end + +### heredoc_redirect_after_multiline +# cat < file with multiline YAML-like content (issue #345) +mkdir -p /tmp/app +cat < /tmp/app/config.yaml +app: + name: myservice + port: 8080 +EOF +cat /tmp/app/config.yaml +### expect +app: + name: myservice + port: 8080 +### end + ### heredoc_tab_strip # <<- strips leading tabs from content and delimiter cat <<-EOF