From fd8017fdc1c71e2f1ba4b57473e7f788edc8dbd4 Mon Sep 17 00:00:00 2001
From: Mykhailo Chalyi <mike@chaliy.name>
Date: Sat, 14 Mar 2026 07:04:32 +0000
Subject: [PATCH] feat(interpreter): implement background execution with & and
 wait

Closes #559

- Background commands (`cmd &`) now properly set `$!` to a job ID
  and store results in the job table for later retrieval
- `wait` builtin now collects background job exit codes correctly
- `wait $pid` waits for a specific job by ID
- Job table uses BTreeMap for ordered iteration
- Execution remains synchronous for deterministic output ordering
  in the virtual environment, but the job lifecycle is tracked
- Added 7 integration tests covering basic background, VFS writes,
  multiple jobs, $!, wait with PID, exit codes, and mixed fg/bg
---
 crates/bashkit/src/interpreter/jobs.rs        |  30 ++-
 crates/bashkit/src/interpreter/mod.rs         | 246 +++++++++++++-----
 crates/bashkit/tests/background_exec_tests.rs | 102 ++++++++
 3 files changed, 305 insertions(+), 73 deletions(-)
 create mode 100644 crates/bashkit/tests/background_exec_tests.rs
diff --git a/crates/bashkit/src/interpreter/jobs.rs b/crates/bashkit/src/interpreter/jobs.rs
index 99256f96..242d901a 100644
--- a/crates/bashkit/src/interpreter/jobs.rs
+++ b/crates/bashkit/src/interpreter/jobs.rs
@@ -1,14 +1,13 @@
 //! Job table for background execution
 //!
-//! Tracks background jobs and their status.
-//!
-//! Note: This module provides infrastructure for background job tracking.
-//! Currently, background commands run synchronously but the job table
-//! is available for future async execution support.
+//! Tracks background jobs spawned with `&` and their exit status.
+//! Background commands execute synchronously for deterministic output
+//! ordering, but their results are stored here so `wait` and `$!` work
+//! correctly.
 
 #![allow(dead_code)]
 
-use std::collections::HashMap;
+use std::collections::BTreeMap;
 use std::sync::Arc;
 use tokio::sync::Mutex;
 use tokio::task::JoinHandle;
@@ -26,7 +25,7 @@ pub struct Job {
 /// Job table for tracking background jobs
 pub struct JobTable {
     /// Active jobs indexed by ID
-    jobs: HashMap<usize, JoinHandle<ExecResult>>,
+    jobs: BTreeMap<usize, JoinHandle<ExecResult>>,
     /// Next job ID to assign
     next_id: usize,
     /// Last spawned job ID (for $!)
@@ -43,7 +42,7 @@ impl JobTable {
     /// Create a new empty job table
     pub fn new() -> Self {
         Self {
-            jobs: HashMap::new(),
+            jobs: BTreeMap::new(),
             next_id: 1,
             last_job_id: None,
         }
@@ -84,7 +83,7 @@ impl JobTable {
         let mut last_exit_code = 0;
 
         // Drain all jobs
-        let jobs: Vec<_> = self.jobs.drain().collect();
+        let jobs: Vec<_> = std::mem::take(&mut self.jobs).into_iter().collect();
 
         for (_, handle) in jobs {
             match handle.await {
@@ -96,6 +95,19 @@ impl JobTable {
         last_exit_code
     }
 
+    /// Wait for all jobs and return their results (preserving output).
+    pub async fn wait_all_results(&mut self) -> Vec<ExecResult> {
+        let jobs: Vec<_> = std::mem::take(&mut self.jobs).into_iter().collect();
+        let mut results = Vec::new();
+        for (_, handle) in jobs {
+            match handle.await {
+                Ok(result) => results.push(result),
+                Err(_) => results.push(ExecResult::err("job panicked".to_string(), 1)),
+            }
+        }
+        results
+    }
+
     /// Check if there are any active jobs
     pub fn has_jobs(&self) -> bool {
         !self.jobs.is_empty()
diff --git a/crates/bashkit/src/interpreter/mod.rs b/crates/bashkit/src/interpreter/mod.rs
index 3f2e2d16..a73e526b 100644
--- a/crates/bashkit/src/interpreter/mod.rs
+++ b/crates/bashkit/src/interpreter/mod.rs
@@ -271,9 +271,8 @@ pub struct Interpreter {
     limits: ExecutionLimits,
     /// Execution counters for resource tracking
     counters: ExecutionCounters,
-    /// Job table for background execution
-    #[allow(dead_code)]
-    jobs: JobTable,
+    /// Job table for background execution (shared for wait builtin access)
+    jobs: SharedJobTable,
     /// Shell options (set -e, set -x, etc.)
     options: ShellOptions,
     /// Current line number for $LINENO
@@ -545,7 +544,7 @@ impl Interpreter {
             call_stack: Vec::new(),
             limits: ExecutionLimits::default(),
             counters: ExecutionCounters::new(),
-            jobs: JobTable::new(),
+            jobs: jobs::new_shared_job_table(),
             options: ShellOptions::default(),
             current_line: 1,
             #[cfg(feature = "http_client")]
@@ -3404,42 +3403,104 @@ impl Interpreter {
         Ok(last_result)
     }
 
+    /// Check if a command is the empty sentinel produced by the parser for trailing `&`.
+    fn is_empty_sentinel(cmd: &Command) -> bool {
+        if let Command::Simple(sc) = cmd {
+            let name_is_empty = sc.name.parts.len() == 1
+                && matches!(&sc.name.parts[0], WordPart::Literal(s) if s.is_empty());
+            name_is_empty
+                && sc.args.is_empty()
+                && sc.redirects.is_empty()
+                && sc.assignments.is_empty()
+        } else {
+            false
+        }
+    }
+
+    /// Run a command as a "background" job.
+    ///
+    /// Executes the command synchronously (deterministic in virtual env) but
+    /// stores the result in the job table so `wait` and `$!` work correctly.
+    /// The command's stdout is emitted immediately (like real bash terminal output).
+    async fn spawn_in_background(
+        &mut self,
+        cmd: &Command,
+        parent_stdout: &mut String,
+        parent_stderr: &mut String,
+    ) -> Result<()> {
+        // Execute the command synchronously
+        let emit_before = self.output_emit_count;
+        let result = self.execute_command(cmd).await?;
+        self.maybe_emit_output(&result.stdout, &result.stderr, emit_before);
+
+        // Emit output immediately (background output goes to terminal in real bash)
+        parent_stdout.push_str(&result.stdout);
+        parent_stderr.push_str(&result.stderr);
+
+        // Store only the exit code in the job table (output already emitted)
+        let exit_code = result.exit_code;
+        let job_result = ExecResult::with_code(String::new(), exit_code);
+        let handle = tokio::spawn(async move { job_result });
+        let job_id = self.jobs.lock().await.spawn(handle);
+        self.variables
+            .insert("_LAST_BG_PID".to_string(), job_id.to_string());
+
+        // Background commands always return exit code 0 to the parent
+        self.last_exit_code = 0;
+        // But store the real exit code for $? after wait
+        self.variables
+            .insert("_BG_EXIT_CODE".to_string(), exit_code.to_string());
+        Ok(())
+    }
+
     /// Execute a command list (cmd1 && cmd2 || cmd3)
+    #[allow(unused_assignments)] // control_flow may be set but overwritten
     async fn execute_list(&mut self, list: &CommandList) -> Result<ExecResult> {
         let mut stdout = String::new();
         let mut stderr = String::new();
         let mut exit_code;
-        let emit_before = self.output_emit_count;
-        let result = self.execute_command(&list.first).await?;
-        self.maybe_emit_output(&result.stdout, &result.stderr, emit_before);
-        stdout.push_str(&result.stdout);
-        stderr.push_str(&result.stderr);
-        exit_code = result.exit_code;
-        self.last_exit_code = exit_code;
-        let mut control_flow = result.control_flow;
+        let mut control_flow;
 
-        // If first command signaled control flow, return immediately
-        if control_flow != ControlFlow::None {
-            return Ok(ExecResult {
-                stdout,
-                stderr,
-                exit_code,
-                control_flow,
-            });
-        }
+        // Determine if the first command should run in the background.
+        // The `&` terminator for first appears as rest[0].op == Background.
+        let first_is_bg = matches!(list.rest.first(), Some((ListOperator::Background, _)));
 
-        // Check if first command in a semicolon-separated list failed => ERR trap
-        // Only fire if the first rest operator is semicolon (not &&/||)
-        let first_op_is_semicolon = list
-            .rest
-            .first()
-            .is_some_and(|(op, _)| matches!(op, ListOperator::Semicolon));
-        if exit_code != 0 && first_op_is_semicolon {
-            self.run_err_trap(&mut stdout, &mut stderr).await;
+        if first_is_bg {
+            self.spawn_in_background(&list.first, &mut stdout, &mut stderr)
+                .await?;
+            exit_code = 0;
+            control_flow = ControlFlow::None;
+        } else {
+            let emit_before = self.output_emit_count;
+            let result = self.execute_command(&list.first).await?;
+            self.maybe_emit_output(&result.stdout, &result.stderr, emit_before);
+            stdout.push_str(&result.stdout);
+            stderr.push_str(&result.stderr);
+            exit_code = result.exit_code;
+            self.last_exit_code = exit_code;
+            control_flow = result.control_flow;
+
+            // If first command signaled control flow, return immediately
+            if control_flow != ControlFlow::None {
+                return Ok(ExecResult {
+                    stdout,
+                    stderr,
+                    exit_code,
+                    control_flow,
+                });
+            }
+
+            // Check if first command in a semicolon-separated list failed => ERR trap
+            let first_op_is_semicolon = list
+                .rest
+                .first()
+                .is_some_and(|(op, _)| matches!(op, ListOperator::Semicolon));
+            if exit_code != 0 && first_op_is_semicolon {
+                self.run_err_trap(&mut stdout, &mut stderr).await;
+            }
         }
 
         // Track if the list contains any && or || operators
-        // If so, failures within the list are "handled" by those operators
         let has_conditional_operators = list
             .rest
             .iter()
@@ -3449,17 +3510,27 @@ impl Interpreter {
         let mut just_exited_conditional_chain = false;
 
         for (i, (op, cmd)) in list.rest.iter().enumerate() {
+            // Skip empty sentinel commands (produced by trailing `&`)
+            if Self::is_empty_sentinel(cmd) {
+                continue;
+            }
+
             // Check if next operator (if any) is && or ||
             let next_op = list.rest.get(i + 1).map(|(op, _)| op);
             let current_is_conditional = matches!(op, ListOperator::And | ListOperator::Or);
             let next_is_conditional =
                 matches!(next_op, Some(ListOperator::And) | Some(ListOperator::Or));
 
+            // Determine if THIS command should be backgrounded.
+            // A command is backgrounded when the NEXT separator is Background
+            // (the `&` terminates the current command).
+            let should_background =
+                matches!(list.rest.get(i + 1), Some((ListOperator::Background, _)));
+
             // Check errexit before executing if:
             // - We just exited a conditional chain (and current op is semicolon)
             // - OR: current op is semicolon and previous wasn't in a conditional chain
             // - Exit code is non-zero
-            // But NOT if we're about to enter/continue a conditional chain
             let should_check_errexit = matches!(op, ListOperator::Semicolon)
                 && !just_exited_conditional_chain
                 && self.is_errexit_enabled()
@@ -3477,8 +3548,7 @@ impl Interpreter {
             // Reset the flag
             just_exited_conditional_chain = false;
 
-            // Mark that we're exiting a conditional chain if:
-            // - Current is conditional (&&/||) and next is not conditional (;/end)
+            // Mark that we're exiting a conditional chain
             if current_is_conditional && !next_is_conditional {
                 just_exited_conditional_chain = true;
             }
@@ -3486,46 +3556,43 @@ impl Interpreter {
             let should_execute = match op {
                 ListOperator::And => exit_code == 0,
                 ListOperator::Or => exit_code != 0,
-                ListOperator::Semicolon => true,
-                ListOperator::Background => {
-                    // Background (&) runs command synchronously in virtual mode.
-                    // True process backgrounding requires OS process spawning which
-                    // is excluded from the sandboxed virtual environment by design.
-                    true
-                }
+                ListOperator::Semicolon | ListOperator::Background => true,
             };
 
             if should_execute {
-                let emit_before = self.output_emit_count;
-                let result = self.execute_command(cmd).await?;
-                self.maybe_emit_output(&result.stdout, &result.stderr, emit_before);
-                stdout.push_str(&result.stdout);
-                stderr.push_str(&result.stderr);
-                exit_code = result.exit_code;
-                self.last_exit_code = exit_code;
-                control_flow = result.control_flow;
-
-                // If command signaled control flow, return immediately
-                if control_flow != ControlFlow::None {
-                    return Ok(ExecResult {
-                        stdout,
-                        stderr,
-                        exit_code,
-                        control_flow,
-                    });
-                }
+                if should_background {
+                    self.spawn_in_background(cmd, &mut stdout, &mut stderr)
+                        .await?;
+                    exit_code = 0;
+                } else {
+                    let emit_before = self.output_emit_count;
+                    let result = self.execute_command(cmd).await?;
+                    self.maybe_emit_output(&result.stdout, &result.stderr, emit_before);
+                    stdout.push_str(&result.stdout);
+                    stderr.push_str(&result.stderr);
+                    exit_code = result.exit_code;
+                    self.last_exit_code = exit_code;
+                    control_flow = result.control_flow;
+
+                    // If command signaled control flow, return immediately
+                    if control_flow != ControlFlow::None {
+                        return Ok(ExecResult {
+                            stdout,
+                            stderr,
+                            exit_code,
+                            control_flow,
+                        });
+                    }
 
-                // ERR trap: fire on non-zero exit after semicolon commands (not &&/||)
-                if exit_code != 0 && !current_is_conditional {
-                    self.run_err_trap(&mut stdout, &mut stderr).await;
+                    // ERR trap: fire on non-zero exit after semicolon commands
+                    if exit_code != 0 && !current_is_conditional {
+                        self.run_err_trap(&mut stdout, &mut stderr).await;
+                    }
                 }
             }
         }
 
         // Final errexit check for the last command
-        // Don't check if:
-        // - The list had conditional operators (failures are "handled" by && / ||)
-        // - OR we're in/just exited a conditional chain
         let should_final_errexit_check =
             !has_conditional_operators && self.is_errexit_enabled() && exit_code != 0;
 
@@ -4034,6 +4101,11 @@ impl Interpreter {
             return self.execute_caller_builtin(&args, &command.redirects).await;
         }
 
+        // Handle `wait` - needs direct access to job table
+        if name == "wait" {
+            return self.execute_wait_builtin(&args, &command.redirects).await;
+        }
+
         // Handle `mapfile`/`readarray` - needs direct access to arrays
         if name == "mapfile" || name == "readarray" {
             return self.execute_mapfile(&args, stdin.as_deref()).await;
@@ -4821,6 +4893,52 @@ impl Interpreter {
         self.apply_redirections(result, redirects).await
     }
 
+    /// Execute the `wait` builtin with direct access to the job table.
+    ///
+    /// Merges background job stdout/stderr into the result so callers
+    /// see the output produced by waited-for jobs.
+    async fn execute_wait_builtin(
+        &mut self,
+        args: &[String],
+        redirects: &[Redirect],
+    ) -> Result<ExecResult> {
+        let mut last_exit_code = 0i32;
+        let mut stdout = String::new();
+        let mut stderr = String::new();
+
+        if args.is_empty() {
+            // Wait for all background jobs, collecting their output
+            let results = self.jobs.lock().await.wait_all_results().await;
+            for r in results {
+                stdout.push_str(&r.stdout);
+                stderr.push_str(&r.stderr);
+                last_exit_code = r.exit_code;
+            }
+        } else {
+            // Wait for specific job IDs
+            for arg in args {
+                if let Ok(job_id) = arg.parse::<usize>()
+                    && let Some(r) = self.jobs.lock().await.wait_for(job_id).await
+                {
+                    stdout.push_str(&r.stdout);
+                    stderr.push_str(&r.stderr);
+                    last_exit_code = r.exit_code;
+                }
+                // If job not found or not parseable, that's ok
+            }
+        }
+
+        self.last_exit_code = last_exit_code;
+        let mut result = ExecResult {
+            stdout,
+            stderr,
+            exit_code: last_exit_code,
+            control_flow: ControlFlow::None,
+        };
+        result = self.apply_redirections(result, redirects).await?;
+        Ok(result)
+    }
+
     /// Execute the `alias` builtin. Needs direct access to self.aliases.
     ///
     /// Usage:
diff --git a/crates/bashkit/tests/background_exec_tests.rs b/crates/bashkit/tests/background_exec_tests.rs
new file mode 100644
index 00000000..000bcbdf
--- /dev/null
+++ b/crates/bashkit/tests/background_exec_tests.rs
@@ -0,0 +1,102 @@
+//! Tests for background execution with & and wait
+//!
+//! Covers: cmd &, cmd & cmd2, wait, wait $pid, $!, multiple background jobs,
+//! background jobs writing to VFS.
+
+use bashkit::Bash;
+
+/// Basic background execution: cmd & should succeed
+#[tokio::test]
+async fn background_basic() {
+    let mut bash = Bash::new();
+    let result = bash.exec("echo hello &\nwait").await.unwrap();
+    assert_eq!(result.exit_code, 0);
+}
+
+/// Background writes to VFS, foreground waits then reads
+#[tokio::test]
+async fn background_writes_to_vfs() {
+    let mut bash = Bash::new();
+    let result = bash
+        .exec("echo content > /tmp/bg_out.txt &\nwait\ncat /tmp/bg_out.txt")
+        .await
+        .unwrap();
+    assert_eq!(result.exit_code, 0);
+    assert_eq!(result.stdout.trim(), "content");
+}
+
+/// Multiple background jobs
+#[tokio::test]
+async fn multiple_background_jobs() {
+    let mut bash = Bash::new();
+    let result = bash
+        .exec(
+            r#"
+echo a > /tmp/a.txt &
+echo b > /tmp/b.txt &
+echo c > /tmp/c.txt &
+wait
+cat /tmp/a.txt
+cat /tmp/b.txt
+cat /tmp/c.txt
+"#,
+        )
+        .await
+        .unwrap();
+    assert_eq!(result.exit_code, 0);
+    assert!(result.stdout.contains("a"));
+    assert!(result.stdout.contains("b"));
+    assert!(result.stdout.contains("c"));
+}
+
+/// $! is set after background command
+#[tokio::test]
+async fn background_sets_last_pid() {
+    let mut bash = Bash::new();
+    let result = bash.exec("sleep 0 &\necho $!").await.unwrap();
+    assert_eq!(result.exit_code, 0);
+    // $! should be a non-empty numeric value
+    let pid = result.stdout.trim();
+    assert!(!pid.is_empty(), "$! should be set after background command");
+    assert!(
+        pid.parse::<usize>().is_ok(),
+        "$! should be numeric, got: {pid}"
+    );
+}
+
+/// wait with specific PID
+#[tokio::test]
+async fn wait_specific_pid() {
+    let mut bash = Bash::new();
+    let result = bash
+        .exec(
+            r#"
+echo done > /tmp/wait_pid.txt &
+pid=$!
+wait $pid
+cat /tmp/wait_pid.txt
+"#,
+        )
+        .await
+        .unwrap();
+    assert_eq!(result.exit_code, 0);
+    assert_eq!(result.stdout.trim(), "done");
+}
+
+/// Background command exit code via wait
+#[tokio::test]
+async fn background_exit_code_via_wait() {
+    let mut bash = Bash::new();
+    let result = bash.exec("false &\nwait\necho $?").await.unwrap();
+    // wait should return the exit code of the background job
+    assert!(result.stdout.contains("1"));
+}
+
+/// cmd1 & cmd2 — cmd2 runs in foreground while cmd1 is backgrounded
+#[tokio::test]
+async fn background_and_foreground() {
+    let mut bash = Bash::new();
+    let result = bash.exec("echo bg > /tmp/bg.txt & echo fg").await.unwrap();
+    assert_eq!(result.exit_code, 0);
+    assert!(result.stdout.contains("fg"));
+}