diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index babc70b..6da9b84 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -85,6 +85,27 @@ jobs: cd artifacts sha256sum *.tar.gz > checksums-sha256.txt + - name: Extract changelog for release + id: changelog + run: | + tag="${GITHUB_REF#refs/tags/}" + version="${tag#v}" + # Extract the section between this version's heading and the next heading + notes=$(awk -v ver="$version" ' + /^## \[/ { + if (found) exit + if ($0 ~ "\\[" ver "\\]") found=1; next + } + found { print } + ' CHANGELOG.md) + # Fail if we got nothing — forces you to update CHANGELOG.md before tagging + if [ -z "$notes" ]; then + echo "::error::No CHANGELOG.md entry found for version ${version}" + exit 1 + fi + # Write to file to avoid quoting issues + echo "$notes" > release-notes.md + - name: Create GitHub Release env: GH_TOKEN: ${{ github.token }} @@ -92,7 +113,7 @@ jobs: tag="${GITHUB_REF#refs/tags/}" gh release create "${tag}" \ --title "${tag}" \ - --generate-notes \ + --notes-file release-notes.md \ --draft \ artifacts/*.tar.gz \ artifacts/checksums-sha256.txt diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..db92df1 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,61 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +## [0.4.0] - 2026-04-08 + +### Added + +- Self-update command with passive version checking +- Setup steps (`setup = true`) that always run even when checkpoint-skipped and are not counted in pass/fail +- Test file `name` field now defaults to the file stem when omitted (e.g. `login-flow.test.toml` defaults to "login-flow") +- Changelog based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) + +### Changed + +- Release workflow now extracts notes from CHANGELOG.md instead of auto-generating from commits + +## [0.3.1] - 2026-04-04 + +### Added + +- Agent setup hint in CLI help output +- Coding agent quick-start prompt in README and getting-started docs +- Links to bugatti.dev in README and CLI help + +## [0.3.0] - 2026-04-04 + +### Added + +- Documentation site, llms.txt, and docs CI workflow +- Checkpoint save/restore and step skip with checkpoint support +- `--from-checkpoint` CLI flag and timestamp-based run IDs +- Checkpoint timeout configuration +- Comprehensive docs for includes, shared test files, skip, and checkpoints +- Node.js Express and Python Flask example projects +- Release workflow and installer script +- Readiness URL checks for long-lived services +- CLI skip flags for harness commands (`--skip-setup`, `--skip-teardown`) +- Long-lived subprocess management with readiness checks +- Result marker parser, report generation, and run artifacts +- Claude Code provider adapter +- Test discovery, step expansion with cycle detection, and end-to-end pipeline +- Config types and `bugatti.config.toml` parsing +- Test file types and `*.test.toml` parsing +- CLI scaffold with `bugatti test` subcommand + +### Fixed + +- Clippy warnings for release build +- Docs deploy workflow triggers and Node version +- Result marker parser handling of embedded markers + +[Unreleased]: https://github.com/codesoda/bugatti-cli/compare/v0.4.0...HEAD +[0.4.0]: https://github.com/codesoda/bugatti-cli/compare/v0.3.1...v0.4.0 +[0.3.1]: https://github.com/codesoda/bugatti-cli/compare/v0.3.0...v0.3.1 +[0.3.0]: https://github.com/codesoda/bugatti-cli/releases/tag/v0.3.0 diff --git a/Cargo.lock b/Cargo.lock index 474d5ce..8b4c09f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -117,7 +117,7 @@ dependencies = [ [[package]] name = "bugatti" -version = "0.3.1" +version = "0.4.0" dependencies = [ "chrono", "clap", diff --git a/Cargo.toml b/Cargo.toml index 0d3ca0b..cc6532b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "bugatti" -version = "0.3.1" +version = "0.4.0" edition = "2021" description = "A CLI for plain-English, agent-assisted local application verification using *.test.toml files" diff --git a/src/config.rs b/src/config.rs index 5a2a39d..ba8c73d 100644 --- a/src/config.rs +++ b/src/config.rs @@ -373,6 +373,7 @@ readiness_url = "http://localhost:3000/health" include_glob: None, step_timeout_secs: None, skip: false, + setup: false, checkpoint: None, }], }; diff --git a/src/executor.rs b/src/executor.rs index 0516102..2aaeea5 100644 --- a/src/executor.rs +++ b/src/executor.rs @@ -70,6 +70,8 @@ pub struct StepOutcome { pub instruction: String, /// Source file for provenance. pub source_file: PathBuf, + /// Whether this was a setup step (not counted in test pass/fail). + pub setup: bool, /// The result: either a parsed verdict, a protocol error, or a timeout. pub result: StepResult, /// Full transcript text captured from the provider. @@ -410,7 +412,7 @@ pub fn execute_steps( // Checkpoint restore: find the last checkpoint among leading skipped steps // and restore it before executing non-skipped steps. if let Some(cp_config) = checkpoint_config { - let first_non_skipped = steps.iter().position(|s| !s.skip); + let first_non_skipped = steps.iter().position(|s| !s.skip && !s.setup); if let Some(boundary) = first_non_skipped { if boundary > 0 { // Find the last checkpoint among skipped steps before the boundary @@ -466,8 +468,8 @@ pub fn execute_steps( }) .unwrap_or_else(|| step.source_file.display().to_string()); - // Handle skipped steps - if step.skip { + // Handle skipped steps (setup steps bypass skip) + if step.skip && !step.setup { println!( "SKIP {}/{} ... {} (from {})", step.step_id + 1, @@ -479,6 +481,7 @@ pub fn execute_steps( step_id: step.step_id, instruction: step.instruction.clone(), source_file: step.source_file.clone(), + setup: false, result: StepResult::Verdict(StepVerdict::Ok), transcript: String::new(), log_events: vec![], @@ -488,14 +491,16 @@ pub fn execute_steps( continue; } + let step_label = if step.setup { "SETUP" } else { "STEP" }; tracing::info!( step_id = step.step_id, total = total_steps, + setup = step.setup, source = %step.source_file.display(), "step execution begin" ); println!( - "STEP {}/{} ... {} (from {})", + "{step_label} {}/{} ... {} (from {})", step.step_id + 1, total_steps, instruction_summary, @@ -525,7 +530,14 @@ pub fn execute_steps( let (step_result, transcript) = match result { Ok((transcript_text, verdict)) => (verdict, transcript_text), - Err((transcript_text, err_result)) => (err_result, transcript_text), + Err((transcript_text, err_result)) => { + // Setup steps tolerate missing RESULT markers — the agent just runs the command + if step.setup && matches!(err_result, StepResult::ProtocolError(_)) { + (StepResult::Verdict(StepVerdict::Ok), transcript_text) + } else { + (err_result, transcript_text) + } + } }; // Parse BUGATTI_LOG events from the transcript @@ -606,6 +618,7 @@ pub fn execute_steps( step_id: step.step_id, instruction: step.instruction.clone(), source_file: step.source_file.clone(), + setup: step.setup, result: step_result, transcript, log_events, @@ -690,7 +703,13 @@ pub fn execute_steps( } } - let all_passed = outcomes.iter().all(|o| o.result.is_pass()); + // Setup steps don't count toward pass/fail — only test steps determine the verdict. + // A failed setup step aborts the run (handled above), so if we reach here, + // all setup steps succeeded. + let all_passed = outcomes + .iter() + .filter(|o| !o.setup) + .all(|o| o.result.is_pass()); let total_duration = run_start.elapsed(); // Print final run status (after teardown) @@ -759,23 +778,33 @@ fn print_run_summary(outcomes: &[StepOutcome], total_duration: Duration, total_s println!(); println!("═══════════════════════════════════════════════════"); - let completed = outcomes.len(); - let ok_count = outcomes + let test_outcomes: Vec<_> = outcomes.iter().filter(|o| !o.setup).collect(); + let setup_count = outcomes.iter().filter(|o| o.setup).count(); + let completed = test_outcomes.len(); + let ok_count = test_outcomes .iter() .filter(|o| matches!(o.result, StepResult::Verdict(StepVerdict::Ok))) .count(); - let warn_count = outcomes + let warn_count = test_outcomes .iter() .filter(|o| matches!(o.result, StepResult::Verdict(StepVerdict::Warn(_)))) .count(); - let fail_count = outcomes.iter().filter(|o| o.result.is_failure()).count(); - let skipped = total_steps - completed; + let fail_count = test_outcomes + .iter() + .filter(|o| o.result.is_failure()) + .count(); + let skipped = total_steps - completed - setup_count; - let all_passed = outcomes.iter().all(|o| o.result.is_pass()); + let all_passed = test_outcomes.iter().all(|o| o.result.is_pass()); let status = if all_passed { "PASSED" } else { "FAILED" }; + let setup_part = if setup_count > 0 { + format!(", {setup_count} setup") + } else { + String::new() + }; println!( - "Run {status}: {ok_count} ok, {warn_count} warn, {fail_count} failed, {skipped} skipped ({:.1}s)", + "Run {status}: {ok_count} ok, {warn_count} warn, {fail_count} failed, {skipped} skipped{setup_part} ({:.1}s)", total_duration.as_secs_f64() ); println!("═══════════════════════════════════════════════════"); @@ -1011,6 +1040,7 @@ mod tests { parent_chain: vec![], step_timeout_secs: None, skip: false, + setup: false, checkpoint: None, }, ExpandedStep { @@ -1021,6 +1051,7 @@ mod tests { parent_chain: vec![], step_timeout_secs: None, skip: false, + setup: false, checkpoint: None, }, ] @@ -1915,4 +1946,270 @@ mod tests { // No steps should have executed assert_eq!(outcome.steps.len(), 0); } + + // --- Setup step tests --- + + #[test] + fn setup_step_tolerates_missing_result_marker() { + let steps = vec![ExpandedStep { + step_id: 0, + instruction: "Start the browser in headed mode".to_string(), + source_file: PathBuf::from("/test/root.test.toml"), + source_step_index: 0, + parent_chain: vec![], + step_timeout_secs: None, + skip: false, + setup: true, + checkpoint: None, + }]; + let (run_id, session_id) = test_run_ids(); + let (_tmp, artifact_dir) = test_artifact_dir(); + + // Agent responds without a RESULT marker — should be OK for setup steps + let mut session = MockSession::new(vec![vec![ + Ok(OutputChunk::Text( + "Browser started in headed mode.".to_string(), + )), + Ok(OutputChunk::Done), + ]]); + + let outcome = execute_steps( + &mut session, + &steps, + &run_id, + &session_id, + &artifact_dir, + None, + None, + None, + std::path::Path::new("."), + &AtomicBool::new(false), + ) + .unwrap(); + + assert!(outcome.all_passed); + assert_eq!(outcome.steps.len(), 1); + assert!(outcome.steps[0].setup); + assert_eq!( + outcome.steps[0].result, + StepResult::Verdict(StepVerdict::Ok) + ); + } + + #[test] + fn setup_step_bypasses_skip() { + let steps = vec![ + ExpandedStep { + step_id: 0, + instruction: "Start the browser".to_string(), + source_file: PathBuf::from("/test/root.test.toml"), + source_step_index: 0, + parent_chain: vec![], + step_timeout_secs: None, + skip: true, + setup: true, + checkpoint: None, + }, + ExpandedStep { + step_id: 1, + instruction: "Seed the database".to_string(), + source_file: PathBuf::from("/test/root.test.toml"), + source_step_index: 1, + parent_chain: vec![], + step_timeout_secs: None, + skip: true, + setup: false, + checkpoint: None, + }, + ExpandedStep { + step_id: 2, + instruction: "Verify the homepage".to_string(), + source_file: PathBuf::from("/test/root.test.toml"), + source_step_index: 2, + parent_chain: vec![], + step_timeout_secs: None, + skip: false, + setup: false, + checkpoint: None, + }, + ]; + let (run_id, session_id) = test_run_ids(); + let (_tmp, artifact_dir) = test_artifact_dir(); + + // Only 2 steps should execute: the setup step (bypasses skip) and the non-skipped step + let mut session = MockSession::new(vec![ + vec![ + Ok(OutputChunk::Text("Browser started.\nRESULT OK".to_string())), + Ok(OutputChunk::Done), + ], + vec![ + Ok(OutputChunk::Text("Homepage loaded.\nRESULT OK".to_string())), + Ok(OutputChunk::Done), + ], + ]); + + let outcome = execute_steps( + &mut session, + &steps, + &run_id, + &session_id, + &artifact_dir, + None, + None, + None, + std::path::Path::new("."), + &AtomicBool::new(false), + ) + .unwrap(); + + assert!(outcome.all_passed); + // 3 outcomes: setup (executed), skipped (auto-ok), test (executed) + assert_eq!(outcome.steps.len(), 3); + // Step 0: setup ran + assert!(outcome.steps[0].setup); + assert_eq!( + outcome.steps[0].result, + StepResult::Verdict(StepVerdict::Ok) + ); + // Step 1: skipped + assert!(!outcome.steps[1].setup); + assert_eq!(outcome.steps[1].duration, Duration::ZERO); + // Step 2: test ran + assert!(!outcome.steps[2].setup); + assert_eq!( + outcome.steps[2].result, + StepResult::Verdict(StepVerdict::Ok) + ); + } + + #[test] + fn setup_step_not_counted_in_all_passed() { + // A setup step + a failing test step: all_passed should be false + // because the test step failed, not because of the setup step. + let steps = vec![ + ExpandedStep { + step_id: 0, + instruction: "Start the browser".to_string(), + source_file: PathBuf::from("/test/root.test.toml"), + source_step_index: 0, + parent_chain: vec![], + step_timeout_secs: None, + skip: false, + setup: true, + checkpoint: None, + }, + ExpandedStep { + step_id: 1, + instruction: "Check the homepage".to_string(), + source_file: PathBuf::from("/test/root.test.toml"), + source_step_index: 1, + parent_chain: vec![], + step_timeout_secs: None, + skip: false, + setup: false, + checkpoint: None, + }, + ]; + let (run_id, session_id) = test_run_ids(); + let (_tmp, artifact_dir) = test_artifact_dir(); + + let mut session = MockSession::new(vec![ + vec![ + Ok(OutputChunk::Text("Browser started.".to_string())), + Ok(OutputChunk::Done), + ], + vec![ + Ok(OutputChunk::Text( + "RESULT ERROR: page not found".to_string(), + )), + Ok(OutputChunk::Done), + ], + ]); + + let outcome = execute_steps( + &mut session, + &steps, + &run_id, + &session_id, + &artifact_dir, + None, + None, + None, + std::path::Path::new("."), + &AtomicBool::new(false), + ) + .unwrap(); + + assert!(!outcome.all_passed); + assert_eq!(outcome.steps.len(), 2); + // Setup step succeeded (no RESULT marker, tolerated) + assert!(outcome.steps[0].setup); + assert_eq!( + outcome.steps[0].result, + StepResult::Verdict(StepVerdict::Ok) + ); + // Test step failed + assert!(!outcome.steps[1].setup); + assert!(outcome.steps[1].result.is_failure()); + } + + #[test] + fn setup_step_failure_aborts_run() { + let steps = vec![ + ExpandedStep { + step_id: 0, + instruction: "Start the browser".to_string(), + source_file: PathBuf::from("/test/root.test.toml"), + source_step_index: 0, + parent_chain: vec![], + step_timeout_secs: None, + skip: false, + setup: true, + checkpoint: None, + }, + ExpandedStep { + step_id: 1, + instruction: "Check the homepage".to_string(), + source_file: PathBuf::from("/test/root.test.toml"), + source_step_index: 1, + parent_chain: vec![], + step_timeout_secs: None, + skip: false, + setup: false, + checkpoint: None, + }, + ]; + let (run_id, session_id) = test_run_ids(); + let (_tmp, artifact_dir) = test_artifact_dir(); + + // Setup step gets a provider error — should abort, second step never runs + let mut session = MockSession::new(vec![ + vec![Err(ProviderError::SessionCrashed( + "process died".to_string(), + ))], + vec![ + Ok(OutputChunk::Text("RESULT OK".to_string())), + Ok(OutputChunk::Done), + ], + ]); + + let outcome = execute_steps( + &mut session, + &steps, + &run_id, + &session_id, + &artifact_dir, + None, + None, + None, + std::path::Path::new("."), + &AtomicBool::new(false), + ) + .unwrap(); + + // Only the setup step executed, and it failed + assert_eq!(outcome.steps.len(), 1); + assert!(outcome.steps[0].setup); + assert!(outcome.steps[0].result.is_failure()); + } } diff --git a/src/exit_code.rs b/src/exit_code.rs index 22e87ee..843baaa 100644 --- a/src/exit_code.rs +++ b/src/exit_code.rs @@ -134,6 +134,7 @@ mod tests { step_id: i, instruction: format!("step {i}"), source_file: PathBuf::from("test.test.toml"), + setup: false, result, transcript: String::new(), log_events: vec![], diff --git a/src/expand.rs b/src/expand.rs index f3e5bbf..24cfe28 100644 --- a/src/expand.rs +++ b/src/expand.rs @@ -19,6 +19,8 @@ pub struct ExpandedStep { pub step_timeout_secs: Option, /// If true, this step is skipped during execution. pub skip: bool, + /// If true, this is a setup step that always runs (even when checkpoint-skipped). + pub setup: bool, /// Optional checkpoint name for save/restore. pub checkpoint: Option, } @@ -112,6 +114,7 @@ fn expand_steps_inner( parent_chain: parent_chain.to_vec(), step_timeout_secs: step.step_timeout_secs, skip: step.skip, + setup: step.setup, checkpoint: step.checkpoint.clone(), }); *step_id += 1; diff --git a/src/report.rs b/src/report.rs index 754611e..4c1691b 100644 --- a/src/report.rs +++ b/src/report.rs @@ -185,13 +185,17 @@ pub fn report_path(artifact_dir: &ArtifactDir) -> std::path::PathBuf { } fn write_step_section(report: &mut String, outcome: &StepOutcome, artifact_dir: &ArtifactDir) { - let status_icon = match &outcome.result { - StepResult::Verdict(StepVerdict::Ok) => "OK", - StepResult::Verdict(StepVerdict::Warn(_)) => "WARN", - StepResult::Verdict(StepVerdict::Error(_)) => "ERROR", - StepResult::ProtocolError(_) => "PROTOCOL ERROR", - StepResult::Timeout => "TIMEOUT", - StepResult::ProviderFailed(_) => "PROVIDER ERROR", + let status_icon = if outcome.setup { + "SETUP" + } else { + match &outcome.result { + StepResult::Verdict(StepVerdict::Ok) => "OK", + StepResult::Verdict(StepVerdict::Warn(_)) => "WARN", + StepResult::Verdict(StepVerdict::Error(_)) => "ERROR", + StepResult::ProtocolError(_) => "PROTOCOL ERROR", + StepResult::Timeout => "TIMEOUT", + StepResult::ProviderFailed(_) => "PROVIDER ERROR", + } }; let _ = writeln!( @@ -337,6 +341,7 @@ mod tests { step_id, instruction: instruction.to_string(), source_file: PathBuf::from("tests/login.test.toml"), + setup: false, result: StepResult::Verdict(StepVerdict::Ok), transcript: "Checked.\nRESULT OK".to_string(), log_events: vec![], @@ -350,6 +355,7 @@ mod tests { step_id, instruction: "Check response time".to_string(), source_file: PathBuf::from("tests/perf.test.toml"), + setup: false, result: StepResult::Verdict(StepVerdict::Warn(msg.to_string())), transcript: format!("Checked.\nBUGATTI_LOG slow response\nRESULT WARN: {msg}"), log_events: vec![LogEvent { @@ -367,6 +373,7 @@ mod tests { step_id, instruction: "Verify login works".to_string(), source_file: PathBuf::from("tests/login.test.toml"), + setup: false, result: StepResult::Verdict(StepVerdict::Error(msg.to_string())), transcript: format!( "Tried login.\nBUGATTI_LOG Auth failed\nBUGATTI_LOG Retried once\nRESULT ERROR: {msg}" @@ -623,6 +630,7 @@ mod tests { step_id: 0, instruction: "Check something".to_string(), source_file: PathBuf::from("test.test.toml"), + setup: false, result: StepResult::ProtocolError("no RESULT marker".to_string()), transcript: "Some output without marker".to_string(), log_events: vec![], diff --git a/src/test_file.rs b/src/test_file.rs index 1b9e130..67486c3 100644 --- a/src/test_file.rs +++ b/src/test_file.rs @@ -2,19 +2,27 @@ use serde::Deserialize; use std::path::Path; /// A parsed test file loaded from a *.test.toml file. -#[derive(Debug, Clone, Deserialize, PartialEq)] -#[serde(deny_unknown_fields)] +#[derive(Debug, Clone, PartialEq)] pub struct TestFile { - /// Name of the test. + /// Name of the test. Defaults to the file stem if omitted. pub name: String, /// Optional per-test overrides. - #[serde(default)] pub overrides: Option, /// Ordered list of steps to execute. - #[serde(default)] pub steps: Vec, } +/// Raw deserialization target where `name` is optional. +#[derive(Deserialize)] +#[serde(deny_unknown_fields)] +struct RawTestFile { + name: Option, + #[serde(default)] + overrides: Option, + #[serde(default)] + steps: Vec, +} + /// Per-test overrides that merge over the global config. #[derive(Debug, Clone, Default, Deserialize, PartialEq)] #[serde(deny_unknown_fields)] @@ -49,6 +57,10 @@ pub struct Step { /// If true, this step is skipped during execution (counts as passed). #[serde(default)] pub skip: bool, + /// If true, this is a setup step that always runs (even when checkpoint-skipped). + /// Setup steps are not evaluated as pass/fail — they either complete or abort the run. + #[serde(default)] + pub setup: bool, /// Optional checkpoint name — saved after this step passes, restored if this step is skipped. #[serde(default)] pub checkpoint: Option, @@ -104,11 +116,29 @@ pub fn parse_test_file(path: &Path) -> Result { path: path_str.clone(), source: e, })?; - let test_file: TestFile = toml::from_str(&contents).map_err(|e| TestFileError::ParseError { + let raw: RawTestFile = toml::from_str(&contents).map_err(|e| TestFileError::ParseError { path: path_str.clone(), source: e, })?; + let default_name = path + .file_name() + .and_then(|f| f.to_str()) + .unwrap_or("unknown") + .strip_suffix(".test.toml") + .unwrap_or_else(|| { + path.file_stem() + .and_then(|s| s.to_str()) + .unwrap_or("unknown") + }) + .to_string(); + + let test_file = TestFile { + name: raw.name.unwrap_or(default_name), + overrides: raw.overrides, + steps: raw.steps, + }; + // Validate each step has exactly one of: instruction, include_path, include_glob for (i, step) in test_file.steps.iter().enumerate() { let has_instruction = step.instruction.is_some(); @@ -336,6 +366,23 @@ step_timeout_secs = 900 assert_eq!(test_file.steps[1].step_timeout_secs, Some(900)); } + #[test] + fn name_defaults_to_file_stem() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("login-flow.test.toml"); + fs::write( + &path, + r#" +[[steps]] +instruction = "Do something" +"#, + ) + .unwrap(); + + let test_file = parse_test_file(&path).unwrap(); + assert_eq!(test_file.name, "login-flow"); + } + #[test] fn parse_error_file_not_found() { let path = Path::new("/nonexistent/path/test.test.toml"); diff --git a/tests/pipeline_integration.rs b/tests/pipeline_integration.rs index a63f0db..d738aa1 100644 --- a/tests/pipeline_integration.rs +++ b/tests/pipeline_integration.rs @@ -350,6 +350,7 @@ instruction = "This will fail" step_id: 0, instruction: "This will fail".to_string(), source_file: test_path.clone(), + setup: false, result: StepResult::Verdict(StepVerdict::Error("something broke".to_string())), transcript: "RESULT ERROR: something broke".to_string(), log_events: vec![],