Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
246 changes: 246 additions & 0 deletions crates/bashkit/src/builtins/join.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,246 @@
//! join builtin command - join lines of two sorted files on a common field

use async_trait::async_trait;

use super::{Builtin, Context, resolve_path};
use crate::error::Result;
use crate::interpreter::ExecResult;

/// The join builtin command.
///
/// Usage: join [-1 FIELD] [-2 FIELD] [-t CHAR] [-a FILENUM] [-e STRING] FILE1 FILE2
///
/// Join lines of two sorted files on a common field (default: first field).
pub struct Join;

struct JoinOptions {
field1: usize, // 1-based field number for file1
field2: usize, // 1-based field number for file2
separator: char, // field separator
unpaired: Vec<usize>, // which file's unpairable lines to show (1 or 2)
empty: String, // replacement for missing fields
}

#[async_trait]
impl Builtin for Join {
async fn execute(&self, ctx: Context<'_>) -> Result<ExecResult> {
let mut opts = JoinOptions {
field1: 1,
field2: 1,
separator: ' ',
unpaired: Vec::new(),
empty: String::new(),
};

let mut files: Vec<&str> = Vec::new();
let mut i = 0;

while i < ctx.args.len() {
match ctx.args[i].as_str() {
"-1" => {
i += 1;
opts.field1 = ctx.args.get(i).and_then(|s| s.parse().ok()).unwrap_or(1);
}
"-2" => {
i += 1;
opts.field2 = ctx.args.get(i).and_then(|s| s.parse().ok()).unwrap_or(1);
}
"-t" => {
i += 1;
if let Some(s) = ctx.args.get(i) {
opts.separator = s.chars().next().unwrap_or(' ');
}
}
"-a" => {
i += 1;
if let Some(n) = ctx.args.get(i).and_then(|s| s.parse::<usize>().ok()) {
opts.unpaired.push(n);
}
}
"-e" => {
i += 1;
if let Some(s) = ctx.args.get(i) {
opts.empty = s.clone();
}
}
_ => files.push(&ctx.args[i]),
}
i += 1;
}

if files.len() < 2 {
return Ok(ExecResult::err("join: missing operand\n".to_string(), 1));
}

let content1 = read_input(ctx.fs.as_ref(), ctx.cwd, files[0], ctx.stdin).await?;
let content2 = read_input(ctx.fs.as_ref(), ctx.cwd, files[1], None).await?;

let lines1: Vec<&str> = content1.lines().collect();
let lines2: Vec<&str> = content2.lines().collect();

let sep = opts.separator;
let mut output = String::new();
let mut j = 0;

for line1 in &lines1 {
let fields1: Vec<&str> = line1.split(sep).collect();
let key1 = fields1.get(opts.field1 - 1).copied().unwrap_or("");

let mut matched = false;
while j < lines2.len() {
let fields2: Vec<&str> = lines2[j].split(sep).collect();
let key2 = fields2.get(opts.field2 - 1).copied().unwrap_or("");

match key1.cmp(key2) {
std::cmp::Ordering::Equal => {
matched = true;
// Output: key, remaining fields from file1, remaining fields from file2
output.push_str(key1);
for (k, f) in fields1.iter().enumerate() {
if k != opts.field1 - 1 {
output.push(sep);
output.push_str(f);
}
}
for (k, f) in fields2.iter().enumerate() {
if k != opts.field2 - 1 {
output.push(sep);
output.push_str(f);
}
}
output.push('\n');
j += 1;
break;
}
std::cmp::Ordering::Greater => {
if opts.unpaired.contains(&2) {
output.push_str(lines2[j]);
output.push('\n');
}
j += 1;
}
std::cmp::Ordering::Less => {
break;
}
}
}

if !matched && opts.unpaired.contains(&1) {
output.push_str(line1);
output.push('\n');
}
}

// Remaining unmatched lines from file2
if opts.unpaired.contains(&2) {
while j < lines2.len() {
output.push_str(lines2[j]);
output.push('\n');
j += 1;
}
}

Ok(ExecResult::ok(output))
}
}

async fn read_input(
fs: &dyn crate::fs::FileSystem,
cwd: &std::path::Path,
file: &str,
stdin: Option<&str>,
) -> Result<String> {
if file == "-" {
Ok(stdin.unwrap_or("").to_string())
} else {
let path = resolve_path(cwd, file);
let bytes = fs.read_file(&path).await?;
Ok(String::from_utf8_lossy(&bytes).to_string())
}
}

#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
use super::*;
use crate::fs::{FileSystem, InMemoryFs};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::Arc;

async fn run_join(args: &[&str], fs: Arc<dyn FileSystem>) -> ExecResult {
let args: Vec<String> = args.iter().map(|s| s.to_string()).collect();
let env = HashMap::new();
let mut variables = HashMap::new();
let mut cwd = PathBuf::from("/");
let ctx = Context {
args: &args,
env: &env,
variables: &mut variables,
cwd: &mut cwd,
fs,
stdin: None,
#[cfg(feature = "http_client")]
http_client: None,
#[cfg(feature = "git")]
git_client: None,
};
Join.execute(ctx).await.expect("join failed")
}

#[tokio::test]
async fn test_join_basic() {
let fs = Arc::new(InMemoryFs::new()) as Arc<dyn FileSystem>;
fs.write_file(Path::new("/f1"), b"a 1\nb 2\nc 3")
.await
.unwrap();
fs.write_file(Path::new("/f2"), b"a x\nb y\nc z")
.await
.unwrap();
let result = run_join(&["/f1", "/f2"], fs).await;
assert_eq!(result.exit_code, 0);
assert!(result.stdout.contains("a 1 x"));
assert!(result.stdout.contains("b 2 y"));
assert!(result.stdout.contains("c 3 z"));
}

#[tokio::test]
async fn test_join_custom_field() {
let fs = Arc::new(InMemoryFs::new()) as Arc<dyn FileSystem>;
fs.write_file(Path::new("/f1"), b"x a\ny b").await.unwrap();
fs.write_file(Path::new("/f2"), b"a 1\nb 2").await.unwrap();
let result = run_join(&["-1", "2", "/f1", "/f2"], fs).await;
assert_eq!(result.exit_code, 0);
assert!(result.stdout.contains("a x 1"));
}

#[tokio::test]
async fn test_join_custom_separator() {
let fs = Arc::new(InMemoryFs::new()) as Arc<dyn FileSystem>;
fs.write_file(Path::new("/f1"), b"a:1\nb:2").await.unwrap();
fs.write_file(Path::new("/f2"), b"a:x\nb:y").await.unwrap();
let result = run_join(&["-t", ":", "/f1", "/f2"], fs).await;
assert_eq!(result.exit_code, 0);
assert!(result.stdout.contains("a:1:x"));
}

#[tokio::test]
async fn test_join_missing_operand() {
let fs = Arc::new(InMemoryFs::new()) as Arc<dyn FileSystem>;
let result = run_join(&["/f1"], fs).await;
assert_eq!(result.exit_code, 1);
}

#[tokio::test]
async fn test_join_unpairable() {
let fs = Arc::new(InMemoryFs::new()) as Arc<dyn FileSystem>;
fs.write_file(Path::new("/f1"), b"a 1\nb 2\nc 3")
.await
.unwrap();
fs.write_file(Path::new("/f2"), b"a x\nc z").await.unwrap();
let result = run_join(&["-a", "1", "/f1", "/f2"], fs).await;
assert_eq!(result.exit_code, 0);
// "b 2" should appear as unpairable from file1
assert!(result.stdout.contains("b 2"));
}
}
4 changes: 4 additions & 0 deletions crates/bashkit/src/builtins/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ mod grep;
mod headtail;
mod hextools;
mod inspect;
mod join;
mod jq;
mod ls;
mod navigation;
Expand All @@ -64,6 +65,7 @@ mod seq;
mod sleep;
mod sortuniq;
mod source;
mod split;
mod strings;
mod system;
mod test;
Expand Down Expand Up @@ -109,6 +111,7 @@ pub use grep::Grep;
pub use headtail::{Head, Tail};
pub use hextools::{Hexdump, Od, Xxd};
pub use inspect::{File, Less, Stat};
pub use join::Join;
pub use jq::Jq;
pub(crate) use ls::glob_match;
pub use ls::{Find, Ls, Rmdir};
Expand All @@ -124,6 +127,7 @@ pub use seq::Seq;
pub use sleep::Sleep;
pub use sortuniq::{Sort, Uniq};
pub use source::Source;
pub use split::Split;
pub use strings::Strings;
pub use system::{DEFAULT_HOSTNAME, DEFAULT_USERNAME, Hostname, Id, Uname, Whoami};
pub use test::{Bracket, Test};
Expand Down
Loading
Loading