diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index babc70b..6da9b84 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -85,6 +85,27 @@ jobs:
           cd artifacts
           sha256sum *.tar.gz > checksums-sha256.txt
 
+      - name: Extract changelog for release
+        id: changelog
+        run: |
+          tag="${GITHUB_REF#refs/tags/}"
+          version="${tag#v}"
+          # Extract the section between this version's heading and the next heading
+          notes=$(awk -v ver="$version" '
+            /^## \[/ {
+              if (found) exit
+              if ($0 ~ "\\[" ver "\\]") found=1; next
+            }
+            found { print }
+          ' CHANGELOG.md)
+          # Fail if we got nothing — forces you to update CHANGELOG.md before tagging
+          if [ -z "$notes" ]; then
+            echo "::error::No CHANGELOG.md entry found for version ${version}"
+            exit 1
+          fi
+          # Write to file to avoid quoting issues
+          echo "$notes" > release-notes.md
+
       - name: Create GitHub Release
         env:
           GH_TOKEN: ${{ github.token }}
@@ -92,7 +113,7 @@ jobs:
           tag="${GITHUB_REF#refs/tags/}"
           gh release create "${tag}" \
             --title "${tag}" \
-            --generate-notes \
+            --notes-file release-notes.md \
             --draft \
             artifacts/*.tar.gz \
             artifacts/checksums-sha256.txt
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..db92df1
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,61 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+## [0.4.0] - 2026-04-08
+
+### Added
+
+- Self-update command with passive version checking
+- Setup steps (`setup = true`) that always run even when checkpoint-skipped and are not counted in pass/fail
+- Test file `name` field now defaults to the file stem when omitted (e.g. `login-flow.test.toml` defaults to "login-flow")
+- Changelog based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/)
+
+### Changed
+
+- Release workflow now extracts notes from CHANGELOG.md instead of auto-generating from commits
+
+## [0.3.1] - 2026-04-04
+
+### Added
+
+- Agent setup hint in CLI help output
+- Coding agent quick-start prompt in README and getting-started docs
+- Links to bugatti.dev in README and CLI help
+
+## [0.3.0] - 2026-04-04
+
+### Added
+
+- Documentation site, llms.txt, and docs CI workflow
+- Checkpoint save/restore and step skip with checkpoint support
+- `--from-checkpoint` CLI flag and timestamp-based run IDs
+- Checkpoint timeout configuration
+- Comprehensive docs for includes, shared test files, skip, and checkpoints
+- Node.js Express and Python Flask example projects
+- Release workflow and installer script
+- Readiness URL checks for long-lived services
+- CLI skip flags for harness commands (`--skip-setup`, `--skip-teardown`)
+- Long-lived subprocess management with readiness checks
+- Result marker parser, report generation, and run artifacts
+- Claude Code provider adapter
+- Test discovery, step expansion with cycle detection, and end-to-end pipeline
+- Config types and `bugatti.config.toml` parsing
+- Test file types and `*.test.toml` parsing
+- CLI scaffold with `bugatti test` subcommand
+
+### Fixed
+
+- Clippy warnings for release build
+- Docs deploy workflow triggers and Node version
+- Result marker parser handling of embedded markers
+
+[Unreleased]: https://github.com/codesoda/bugatti-cli/compare/v0.4.0...HEAD
+[0.4.0]: https://github.com/codesoda/bugatti-cli/compare/v0.3.1...v0.4.0
+[0.3.1]: https://github.com/codesoda/bugatti-cli/compare/v0.3.0...v0.3.1
+[0.3.0]: https://github.com/codesoda/bugatti-cli/releases/tag/v0.3.0
diff --git a/Cargo.lock b/Cargo.lock
index 474d5ce..8b4c09f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -117,7 +117,7 @@ dependencies = [
 
 [[package]]
 name = "bugatti"
-version = "0.3.1"
+version = "0.4.0"
 dependencies = [
  "chrono",
  "clap",
diff --git a/Cargo.toml b/Cargo.toml
index 0d3ca0b..cc6532b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "bugatti"
-version = "0.3.1"
+version = "0.4.0"
 edition = "2021"
 description = "A CLI for plain-English, agent-assisted local application verification using *.test.toml files"
 
diff --git a/src/config.rs b/src/config.rs
index 5a2a39d..ba8c73d 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -373,6 +373,7 @@ readiness_url = "http://localhost:3000/health"
                 include_glob: None,
                 step_timeout_secs: None,
                 skip: false,
+                setup: false,
                 checkpoint: None,
             }],
         };
diff --git a/src/executor.rs b/src/executor.rs
index 0516102..2aaeea5 100644
--- a/src/executor.rs
+++ b/src/executor.rs
@@ -70,6 +70,8 @@ pub struct StepOutcome {
     pub instruction: String,
     /// Source file for provenance.
     pub source_file: PathBuf,
+    /// Whether this was a setup step (not counted in test pass/fail).
+    pub setup: bool,
     /// The result: either a parsed verdict, a protocol error, or a timeout.
     pub result: StepResult,
     /// Full transcript text captured from the provider.
@@ -410,7 +412,7 @@ pub fn execute_steps(
     // Checkpoint restore: find the last checkpoint among leading skipped steps
     // and restore it before executing non-skipped steps.
     if let Some(cp_config) = checkpoint_config {
-        let first_non_skipped = steps.iter().position(|s| !s.skip);
+        let first_non_skipped = steps.iter().position(|s| !s.skip && !s.setup);
         if let Some(boundary) = first_non_skipped {
             if boundary > 0 {
                 // Find the last checkpoint among skipped steps before the boundary
@@ -466,8 +468,8 @@ pub fn execute_steps(
             })
             .unwrap_or_else(|| step.source_file.display().to_string());
 
-        // Handle skipped steps
-        if step.skip {
+        // Handle skipped steps (setup steps bypass skip)
+        if step.skip && !step.setup {
             println!(
                 "SKIP {}/{} ... {} (from {})",
                 step.step_id + 1,
@@ -479,6 +481,7 @@ pub fn execute_steps(
                 step_id: step.step_id,
                 instruction: step.instruction.clone(),
                 source_file: step.source_file.clone(),
+                setup: false,
                 result: StepResult::Verdict(StepVerdict::Ok),
                 transcript: String::new(),
                 log_events: vec![],
@@ -488,14 +491,16 @@ pub fn execute_steps(
             continue;
         }
 
+        let step_label = if step.setup { "SETUP" } else { "STEP" };
         tracing::info!(
             step_id = step.step_id,
             total = total_steps,
+            setup = step.setup,
             source = %step.source_file.display(),
             "step execution begin"
         );
         println!(
-            "STEP {}/{} ... {} (from {})",
+            "{step_label} {}/{} ... {} (from {})",
             step.step_id + 1,
             total_steps,
             instruction_summary,
@@ -525,7 +530,14 @@ pub fn execute_steps(
 
         let (step_result, transcript) = match result {
             Ok((transcript_text, verdict)) => (verdict, transcript_text),
-            Err((transcript_text, err_result)) => (err_result, transcript_text),
+            Err((transcript_text, err_result)) => {
+                // Setup steps tolerate missing RESULT markers — the agent just runs the command
+                if step.setup && matches!(err_result, StepResult::ProtocolError(_)) {
+                    (StepResult::Verdict(StepVerdict::Ok), transcript_text)
+                } else {
+                    (err_result, transcript_text)
+                }
+            }
         };
 
         // Parse BUGATTI_LOG events from the transcript
@@ -606,6 +618,7 @@ pub fn execute_steps(
             step_id: step.step_id,
             instruction: step.instruction.clone(),
             source_file: step.source_file.clone(),
+            setup: step.setup,
             result: step_result,
             transcript,
             log_events,
@@ -690,7 +703,13 @@ pub fn execute_steps(
         }
     }
 
-    let all_passed = outcomes.iter().all(|o| o.result.is_pass());
+    // Setup steps don't count toward pass/fail — only test steps determine the verdict.
+    // A failed setup step aborts the run (handled above), so if we reach here,
+    // all setup steps succeeded.
+    let all_passed = outcomes
+        .iter()
+        .filter(|o| !o.setup)
+        .all(|o| o.result.is_pass());
     let total_duration = run_start.elapsed();
 
     // Print final run status (after teardown)
@@ -759,23 +778,33 @@ fn print_run_summary(outcomes: &[StepOutcome], total_duration: Duration, total_s
     println!();
     println!("═══════════════════════════════════════════════════");
 
-    let completed = outcomes.len();
-    let ok_count = outcomes
+    let test_outcomes: Vec<_> = outcomes.iter().filter(|o| !o.setup).collect();
+    let setup_count = outcomes.iter().filter(|o| o.setup).count();
+    let completed = test_outcomes.len();
+    let ok_count = test_outcomes
         .iter()
         .filter(|o| matches!(o.result, StepResult::Verdict(StepVerdict::Ok)))
         .count();
-    let warn_count = outcomes
+    let warn_count = test_outcomes
         .iter()
         .filter(|o| matches!(o.result, StepResult::Verdict(StepVerdict::Warn(_))))
         .count();
-    let fail_count = outcomes.iter().filter(|o| o.result.is_failure()).count();
-    let skipped = total_steps - completed;
+    let fail_count = test_outcomes
+        .iter()
+        .filter(|o| o.result.is_failure())
+        .count();
+    let skipped = total_steps - completed - setup_count;
 
-    let all_passed = outcomes.iter().all(|o| o.result.is_pass());
+    let all_passed = test_outcomes.iter().all(|o| o.result.is_pass());
     let status = if all_passed { "PASSED" } else { "FAILED" };
 
+    let setup_part = if setup_count > 0 {
+        format!(", {setup_count} setup")
+    } else {
+        String::new()
+    };
     println!(
-        "Run {status}: {ok_count} ok, {warn_count} warn, {fail_count} failed, {skipped} skipped ({:.1}s)",
+        "Run {status}: {ok_count} ok, {warn_count} warn, {fail_count} failed, {skipped} skipped{setup_part} ({:.1}s)",
         total_duration.as_secs_f64()
     );
     println!("═══════════════════════════════════════════════════");
@@ -1011,6 +1040,7 @@ mod tests {
                 parent_chain: vec![],
                 step_timeout_secs: None,
                 skip: false,
+                setup: false,
                 checkpoint: None,
             },
             ExpandedStep {
@@ -1021,6 +1051,7 @@ mod tests {
                 parent_chain: vec![],
                 step_timeout_secs: None,
                 skip: false,
+                setup: false,
                 checkpoint: None,
             },
         ]
@@ -1915,4 +1946,270 @@ mod tests {
         // No steps should have executed
         assert_eq!(outcome.steps.len(), 0);
     }
+
+    // --- Setup step tests ---
+
+    #[test]
+    fn setup_step_tolerates_missing_result_marker() {
+        let steps = vec![ExpandedStep {
+            step_id: 0,
+            instruction: "Start the browser in headed mode".to_string(),
+            source_file: PathBuf::from("/test/root.test.toml"),
+            source_step_index: 0,
+            parent_chain: vec![],
+            step_timeout_secs: None,
+            skip: false,
+            setup: true,
+            checkpoint: None,
+        }];
+        let (run_id, session_id) = test_run_ids();
+        let (_tmp, artifact_dir) = test_artifact_dir();
+
+        // Agent responds without a RESULT marker — should be OK for setup steps
+        let mut session = MockSession::new(vec![vec![
+            Ok(OutputChunk::Text(
+                "Browser started in headed mode.".to_string(),
+            )),
+            Ok(OutputChunk::Done),
+        ]]);
+
+        let outcome = execute_steps(
+            &mut session,
+            &steps,
+            &run_id,
+            &session_id,
+            &artifact_dir,
+            None,
+            None,
+            None,
+            std::path::Path::new("."),
+            &AtomicBool::new(false),
+        )
+        .unwrap();
+
+        assert!(outcome.all_passed);
+        assert_eq!(outcome.steps.len(), 1);
+        assert!(outcome.steps[0].setup);
+        assert_eq!(
+            outcome.steps[0].result,
+            StepResult::Verdict(StepVerdict::Ok)
+        );
+    }
+
+    #[test]
+    fn setup_step_bypasses_skip() {
+        let steps = vec![
+            ExpandedStep {
+                step_id: 0,
+                instruction: "Start the browser".to_string(),
+                source_file: PathBuf::from("/test/root.test.toml"),
+                source_step_index: 0,
+                parent_chain: vec![],
+                step_timeout_secs: None,
+                skip: true,
+                setup: true,
+                checkpoint: None,
+            },
+            ExpandedStep {
+                step_id: 1,
+                instruction: "Seed the database".to_string(),
+                source_file: PathBuf::from("/test/root.test.toml"),
+                source_step_index: 1,
+                parent_chain: vec![],
+                step_timeout_secs: None,
+                skip: true,
+                setup: false,
+                checkpoint: None,
+            },
+            ExpandedStep {
+                step_id: 2,
+                instruction: "Verify the homepage".to_string(),
+                source_file: PathBuf::from("/test/root.test.toml"),
+                source_step_index: 2,
+                parent_chain: vec![],
+                step_timeout_secs: None,
+                skip: false,
+                setup: false,
+                checkpoint: None,
+            },
+        ];
+        let (run_id, session_id) = test_run_ids();
+        let (_tmp, artifact_dir) = test_artifact_dir();
+
+        // Only 2 steps should execute: the setup step (bypasses skip) and the non-skipped step
+        let mut session = MockSession::new(vec![
+            vec![
+                Ok(OutputChunk::Text("Browser started.\nRESULT OK".to_string())),
+                Ok(OutputChunk::Done),
+            ],
+            vec![
+                Ok(OutputChunk::Text("Homepage loaded.\nRESULT OK".to_string())),
+                Ok(OutputChunk::Done),
+            ],
+        ]);
+
+        let outcome = execute_steps(
+            &mut session,
+            &steps,
+            &run_id,
+            &session_id,
+            &artifact_dir,
+            None,
+            None,
+            None,
+            std::path::Path::new("."),
+            &AtomicBool::new(false),
+        )
+        .unwrap();
+
+        assert!(outcome.all_passed);
+        // 3 outcomes: setup (executed), skipped (auto-ok), test (executed)
+        assert_eq!(outcome.steps.len(), 3);
+        // Step 0: setup ran
+        assert!(outcome.steps[0].setup);
+        assert_eq!(
+            outcome.steps[0].result,
+            StepResult::Verdict(StepVerdict::Ok)
+        );
+        // Step 1: skipped
+        assert!(!outcome.steps[1].setup);
+        assert_eq!(outcome.steps[1].duration, Duration::ZERO);
+        // Step 2: test ran
+        assert!(!outcome.steps[2].setup);
+        assert_eq!(
+            outcome.steps[2].result,
+            StepResult::Verdict(StepVerdict::Ok)
+        );
+    }
+
+    #[test]
+    fn setup_step_not_counted_in_all_passed() {
+        // A setup step + a failing test step: all_passed should be false
+        // because the test step failed, not because of the setup step.
+        let steps = vec![
+            ExpandedStep {
+                step_id: 0,
+                instruction: "Start the browser".to_string(),
+                source_file: PathBuf::from("/test/root.test.toml"),
+                source_step_index: 0,
+                parent_chain: vec![],
+                step_timeout_secs: None,
+                skip: false,
+                setup: true,
+                checkpoint: None,
+            },
+            ExpandedStep {
+                step_id: 1,
+                instruction: "Check the homepage".to_string(),
+                source_file: PathBuf::from("/test/root.test.toml"),
+                source_step_index: 1,
+                parent_chain: vec![],
+                step_timeout_secs: None,
+                skip: false,
+                setup: false,
+                checkpoint: None,
+            },
+        ];
+        let (run_id, session_id) = test_run_ids();
+        let (_tmp, artifact_dir) = test_artifact_dir();
+
+        let mut session = MockSession::new(vec![
+            vec![
+                Ok(OutputChunk::Text("Browser started.".to_string())),
+                Ok(OutputChunk::Done),
+            ],
+            vec![
+                Ok(OutputChunk::Text(
+                    "RESULT ERROR: page not found".to_string(),
+                )),
+                Ok(OutputChunk::Done),
+            ],
+        ]);
+
+        let outcome = execute_steps(
+            &mut session,
+            &steps,
+            &run_id,
+            &session_id,
+            &artifact_dir,
+            None,
+            None,
+            None,
+            std::path::Path::new("."),
+            &AtomicBool::new(false),
+        )
+        .unwrap();
+
+        assert!(!outcome.all_passed);
+        assert_eq!(outcome.steps.len(), 2);
+        // Setup step succeeded (no RESULT marker, tolerated)
+        assert!(outcome.steps[0].setup);
+        assert_eq!(
+            outcome.steps[0].result,
+            StepResult::Verdict(StepVerdict::Ok)
+        );
+        // Test step failed
+        assert!(!outcome.steps[1].setup);
+        assert!(outcome.steps[1].result.is_failure());
+    }
+
+    #[test]
+    fn setup_step_failure_aborts_run() {
+        let steps = vec![
+            ExpandedStep {
+                step_id: 0,
+                instruction: "Start the browser".to_string(),
+                source_file: PathBuf::from("/test/root.test.toml"),
+                source_step_index: 0,
+                parent_chain: vec![],
+                step_timeout_secs: None,
+                skip: false,
+                setup: true,
+                checkpoint: None,
+            },
+            ExpandedStep {
+                step_id: 1,
+                instruction: "Check the homepage".to_string(),
+                source_file: PathBuf::from("/test/root.test.toml"),
+                source_step_index: 1,
+                parent_chain: vec![],
+                step_timeout_secs: None,
+                skip: false,
+                setup: false,
+                checkpoint: None,
+            },
+        ];
+        let (run_id, session_id) = test_run_ids();
+        let (_tmp, artifact_dir) = test_artifact_dir();
+
+        // Setup step gets a provider error — should abort, second step never runs
+        let mut session = MockSession::new(vec![
+            vec![Err(ProviderError::SessionCrashed(
+                "process died".to_string(),
+            ))],
+            vec![
+                Ok(OutputChunk::Text("RESULT OK".to_string())),
+                Ok(OutputChunk::Done),
+            ],
+        ]);
+
+        let outcome = execute_steps(
+            &mut session,
+            &steps,
+            &run_id,
+            &session_id,
+            &artifact_dir,
+            None,
+            None,
+            None,
+            std::path::Path::new("."),
+            &AtomicBool::new(false),
+        )
+        .unwrap();
+
+        // Only the setup step executed, and it failed
+        assert_eq!(outcome.steps.len(), 1);
+        assert!(outcome.steps[0].setup);
+        assert!(outcome.steps[0].result.is_failure());
+    }
 }
diff --git a/src/exit_code.rs b/src/exit_code.rs
index 22e87ee..843baaa 100644
--- a/src/exit_code.rs
+++ b/src/exit_code.rs
@@ -134,6 +134,7 @@ mod tests {
                 step_id: i,
                 instruction: format!("step {i}"),
                 source_file: PathBuf::from("test.test.toml"),
+                setup: false,
                 result,
                 transcript: String::new(),
                 log_events: vec![],
diff --git a/src/expand.rs b/src/expand.rs
index f3e5bbf..24cfe28 100644
--- a/src/expand.rs
+++ b/src/expand.rs
@@ -19,6 +19,8 @@ pub struct ExpandedStep {
     pub step_timeout_secs: Option<u64>,
     /// If true, this step is skipped during execution.
     pub skip: bool,
+    /// If true, this is a setup step that always runs (even when checkpoint-skipped).
+    pub setup: bool,
     /// Optional checkpoint name for save/restore.
     pub checkpoint: Option<String>,
 }
@@ -112,6 +114,7 @@ fn expand_steps_inner(
                 parent_chain: parent_chain.to_vec(),
                 step_timeout_secs: step.step_timeout_secs,
                 skip: step.skip,
+                setup: step.setup,
                 checkpoint: step.checkpoint.clone(),
             });
             *step_id += 1;
diff --git a/src/report.rs b/src/report.rs
index 754611e..4c1691b 100644
--- a/src/report.rs
+++ b/src/report.rs
@@ -185,13 +185,17 @@ pub fn report_path(artifact_dir: &ArtifactDir) -> std::path::PathBuf {
 }
 
 fn write_step_section(report: &mut String, outcome: &StepOutcome, artifact_dir: &ArtifactDir) {
-    let status_icon = match &outcome.result {
-        StepResult::Verdict(StepVerdict::Ok) => "OK",
-        StepResult::Verdict(StepVerdict::Warn(_)) => "WARN",
-        StepResult::Verdict(StepVerdict::Error(_)) => "ERROR",
-        StepResult::ProtocolError(_) => "PROTOCOL ERROR",
-        StepResult::Timeout => "TIMEOUT",
-        StepResult::ProviderFailed(_) => "PROVIDER ERROR",
+    let status_icon = if outcome.setup {
+        "SETUP"
+    } else {
+        match &outcome.result {
+            StepResult::Verdict(StepVerdict::Ok) => "OK",
+            StepResult::Verdict(StepVerdict::Warn(_)) => "WARN",
+            StepResult::Verdict(StepVerdict::Error(_)) => "ERROR",
+            StepResult::ProtocolError(_) => "PROTOCOL ERROR",
+            StepResult::Timeout => "TIMEOUT",
+            StepResult::ProviderFailed(_) => "PROVIDER ERROR",
+        }
     };
 
     let _ = writeln!(
@@ -337,6 +341,7 @@ mod tests {
             step_id,
             instruction: instruction.to_string(),
             source_file: PathBuf::from("tests/login.test.toml"),
+            setup: false,
             result: StepResult::Verdict(StepVerdict::Ok),
             transcript: "Checked.\nRESULT OK".to_string(),
             log_events: vec![],
@@ -350,6 +355,7 @@ mod tests {
             step_id,
             instruction: "Check response time".to_string(),
             source_file: PathBuf::from("tests/perf.test.toml"),
+            setup: false,
             result: StepResult::Verdict(StepVerdict::Warn(msg.to_string())),
             transcript: format!("Checked.\nBUGATTI_LOG slow response\nRESULT WARN: {msg}"),
             log_events: vec![LogEvent {
@@ -367,6 +373,7 @@ mod tests {
             step_id,
             instruction: "Verify login works".to_string(),
             source_file: PathBuf::from("tests/login.test.toml"),
+            setup: false,
             result: StepResult::Verdict(StepVerdict::Error(msg.to_string())),
             transcript: format!(
                 "Tried login.\nBUGATTI_LOG Auth failed\nBUGATTI_LOG Retried once\nRESULT ERROR: {msg}"
@@ -623,6 +630,7 @@ mod tests {
                 step_id: 0,
                 instruction: "Check something".to_string(),
                 source_file: PathBuf::from("test.test.toml"),
+                setup: false,
                 result: StepResult::ProtocolError("no RESULT marker".to_string()),
                 transcript: "Some output without marker".to_string(),
                 log_events: vec![],
diff --git a/src/test_file.rs b/src/test_file.rs
index 1b9e130..67486c3 100644
--- a/src/test_file.rs
+++ b/src/test_file.rs
@@ -2,19 +2,27 @@ use serde::Deserialize;
 use std::path::Path;
 
 /// A parsed test file loaded from a *.test.toml file.
-#[derive(Debug, Clone, Deserialize, PartialEq)]
-#[serde(deny_unknown_fields)]
+#[derive(Debug, Clone, PartialEq)]
 pub struct TestFile {
-    /// Name of the test.
+    /// Name of the test. Defaults to the file stem if omitted.
     pub name: String,
     /// Optional per-test overrides.
-    #[serde(default)]
     pub overrides: Option<TestOverrides>,
     /// Ordered list of steps to execute.
-    #[serde(default)]
     pub steps: Vec<Step>,
 }
 
+/// Raw deserialization target where `name` is optional.
+#[derive(Deserialize)]
+#[serde(deny_unknown_fields)]
+struct RawTestFile {
+    name: Option<String>,
+    #[serde(default)]
+    overrides: Option<TestOverrides>,
+    #[serde(default)]
+    steps: Vec<Step>,
+}
+
 /// Per-test overrides that merge over the global config.
 #[derive(Debug, Clone, Default, Deserialize, PartialEq)]
 #[serde(deny_unknown_fields)]
@@ -49,6 +57,10 @@ pub struct Step {
     /// If true, this step is skipped during execution (counts as passed).
     #[serde(default)]
     pub skip: bool,
+    /// If true, this is a setup step that always runs (even when checkpoint-skipped).
+    /// Setup steps are not evaluated as pass/fail — they either complete or abort the run.
+    #[serde(default)]
+    pub setup: bool,
     /// Optional checkpoint name — saved after this step passes, restored if this step is skipped.
     #[serde(default)]
     pub checkpoint: Option<String>,
@@ -104,11 +116,29 @@ pub fn parse_test_file(path: &Path) -> Result<TestFile, TestFileError> {
         path: path_str.clone(),
         source: e,
     })?;
-    let test_file: TestFile = toml::from_str(&contents).map_err(|e| TestFileError::ParseError {
+    let raw: RawTestFile = toml::from_str(&contents).map_err(|e| TestFileError::ParseError {
         path: path_str.clone(),
         source: e,
     })?;
 
+    let default_name = path
+        .file_name()
+        .and_then(|f| f.to_str())
+        .unwrap_or("unknown")
+        .strip_suffix(".test.toml")
+        .unwrap_or_else(|| {
+            path.file_stem()
+                .and_then(|s| s.to_str())
+                .unwrap_or("unknown")
+        })
+        .to_string();
+
+    let test_file = TestFile {
+        name: raw.name.unwrap_or(default_name),
+        overrides: raw.overrides,
+        steps: raw.steps,
+    };
+
     // Validate each step has exactly one of: instruction, include_path, include_glob
     for (i, step) in test_file.steps.iter().enumerate() {
         let has_instruction = step.instruction.is_some();
@@ -336,6 +366,23 @@ step_timeout_secs = 900
         assert_eq!(test_file.steps[1].step_timeout_secs, Some(900));
     }
 
+    #[test]
+    fn name_defaults_to_file_stem() {
+        let dir = tempfile::tempdir().unwrap();
+        let path = dir.path().join("login-flow.test.toml");
+        fs::write(
+            &path,
+            r#"
+[[steps]]
+instruction = "Do something"
+"#,
+        )
+        .unwrap();
+
+        let test_file = parse_test_file(&path).unwrap();
+        assert_eq!(test_file.name, "login-flow");
+    }
+
     #[test]
     fn parse_error_file_not_found() {
         let path = Path::new("/nonexistent/path/test.test.toml");
diff --git a/tests/pipeline_integration.rs b/tests/pipeline_integration.rs
index a63f0db..d738aa1 100644
--- a/tests/pipeline_integration.rs
+++ b/tests/pipeline_integration.rs
@@ -350,6 +350,7 @@ instruction = "This will fail"
             step_id: 0,
             instruction: "This will fail".to_string(),
             source_file: test_path.clone(),
+            setup: false,
             result: StepResult::Verdict(StepVerdict::Error("something broke".to_string())),
             transcript: "RESULT ERROR: something broke".to_string(),
             log_events: vec![],