Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions src/commands/simulate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,26 @@ impl Simulate {
if self.compression > 12 {
bail!("--compression must be between 0 and 12");
}

// --sample without --vcf is nonsensical.
if self.vcf.sample.is_some() && self.vcf.vcf.is_none() {
bail!("--sample requires --vcf");
}

// Validate VCF sample configuration upfront so the user gets a clear
// error before the simulation loop starts.
if let Some(vcf_path) = &self.vcf.vcf {
crate::vcf::validate_vcf_sample(vcf_path, self.vcf.sample.as_deref())?;
}

// Validate output parent directory exists.
if let Some(parent) = self.output.output.parent()
&& !parent.as_os_str().is_empty()
&& !parent.exists()
{
bail!("Output directory does not exist: {}", parent.display());
}

Ok(())
}

Expand Down
39 changes: 34 additions & 5 deletions src/vcf/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,27 @@ pub fn load_variants_for_contig(
Ok(variants)
}

/// Validate that a VCF file has the expected sample configuration.
///
/// Opens the VCF, reads the header, and checks:
/// - If `sample_name` is `Some`, that the named sample exists.
/// - If `sample_name` is `None`, that the VCF has exactly one sample.
///
/// Call this during argument validation to surface sample errors before
/// the per-contig simulation loop begins.
///
/// # Errors
/// Returns an error if the VCF cannot be read or the sample configuration
/// is invalid.
pub(crate) fn validate_vcf_sample(path: &Path, sample_name: Option<&str>) -> Result<()> {
let mut reader = vcf::io::reader::Builder::default()
.build_from_path(path)
.with_context(|| format!("Failed to open VCF: {}", path.display()))?;
let header = reader.read_header()?;
resolve_sample_index(&header, sample_name)?;
Ok(())
}

/// Resolve the sample index from a VCF header.
///
/// If `sample_name` is `Some`, looks up that sample. If `None` and the VCF has
Expand All @@ -99,10 +120,18 @@ fn resolve_sample_index(header: &vcf::Header, sample_name: Option<&str>) -> Resu

match sample_name {
Some(name) => {
let idx = sample_names
.iter()
.position(|s| s == name)
.ok_or_else(|| anyhow::anyhow!("Sample '{name}' not found in VCF header"))?;
let idx = sample_names.iter().position(|s| s == name).ok_or_else(|| {
if sample_names.is_empty() {
anyhow::anyhow!("Sample '{name}' not found in VCF (VCF has no sample columns)")
} else {
let available: Vec<&str> =
sample_names.iter().map(String::as_str).take(10).collect();
anyhow::anyhow!(
"Sample '{name}' not found in VCF. Available samples: {}",
available.join(", ")
)
}
})?;
Ok(idx)
}
None => {
Expand All @@ -115,7 +144,7 @@ fn resolve_sample_index(header: &vcf::Header, sample_name: Option<&str>) -> Resu
"VCF has {} samples but no --sample was specified. \
Available samples: {}",
sample_names.len(),
sample_names.iter().take(5).cloned().collect::<Vec<_>>().join(", ")
sample_names.iter().take(10).cloned().collect::<Vec<_>>().join(", ")
)
}
}
Expand Down
31 changes: 31 additions & 0 deletions tests/helpers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,37 @@ impl TestEnv {
path
}

/// Write a VCF file with the given sample names and contig definitions but
/// no variant records.
///
/// Useful for testing sample-level validation logic without needing actual
/// variants. Uses raw VCF text to support arbitrary sample configurations
/// (single-sample, multi-sample, zero-sample).
///
/// # Panics
/// Panics on I/O errors.
#[must_use]
pub fn write_vcf_header_only(
&self,
sample_names: &[&str],
contig_lengths: &[(&str, usize)],
) -> PathBuf {
let path = self.dir.path().join("samples.vcf");
let mut f = std::fs::File::create(&path).unwrap();
writeln!(f, "##fileformat=VCFv4.3").unwrap();
for &(name, len) in contig_lengths {
writeln!(f, "##contig=<ID={name},length={len}>").unwrap();
}
writeln!(f, "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">").unwrap();
write!(f, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT").unwrap();
for name in sample_names {
write!(f, "\t{name}").unwrap();
}
writeln!(f).unwrap();
f.flush().unwrap();
path
}

/// Write a BED file with the given intervals and return the path.
///
/// Each entry is `(contig, start, end)` in 0-based half-open coordinates.
Expand Down
139 changes: 139 additions & 0 deletions tests/test_simulate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2970,3 +2970,142 @@ fn test_simulate_then_eval() {
}
panic!("ALL row not found in eval output");
}

// ── VCF and CLI validation tests ──────────────────────────────────────────

#[test]
fn test_sample_without_vcf_fails() {
let env = simple_env();
let out = env.output_prefix();
let out_str = out.to_str().unwrap();
let ref_str = env.fasta_path.to_str().unwrap();

let (ok, _stdout, stderr) = run_simulate(&[
"simulate",
"-r",
ref_str,
"-o",
out_str,
"--sample",
"NA12878",
"--coverage",
"1",
]);
assert!(!ok, "should fail when --sample is given without --vcf");
assert!(
stderr.contains("--sample requires --vcf"),
"error should mention --sample requires --vcf, got: {stderr}"
);
}

#[test]
fn test_wrong_sample_name_fails() {
let env = simple_env();
let out = env.output_prefix();
let out_str = out.to_str().unwrap();
let ref_str = env.fasta_path.to_str().unwrap();

let vcf_path = env.write_vcf_header_only(&["SampleA"], &[("chr1", 1000)]);
let vcf_str = vcf_path.to_str().unwrap();

let (ok, _stdout, stderr) = run_simulate(&[
"simulate",
"-r",
ref_str,
"-v",
vcf_str,
"--sample",
"NoSuchSample",
"-o",
out_str,
"--coverage",
"1",
]);
assert!(!ok, "should fail when sample name is not in VCF");
assert!(
stderr.contains("NoSuchSample") && stderr.contains("not found"),
"error should name the missing sample, got: {stderr}"
);
assert!(stderr.contains("SampleA"), "error should list available samples, got: {stderr}");
}

#[test]
fn test_multi_sample_vcf_without_sample_flag_fails() {
let env = simple_env();
let out = env.output_prefix();
let out_str = out.to_str().unwrap();
let ref_str = env.fasta_path.to_str().unwrap();

let vcf_path = env.write_vcf_header_only(&["SampleA", "SampleB", "SampleC"], &[("chr1", 1000)]);
let vcf_str = vcf_path.to_str().unwrap();

let (ok, _stdout, stderr) =
run_simulate(&["simulate", "-r", ref_str, "-v", vcf_str, "-o", out_str, "--coverage", "1"]);
assert!(!ok, "should fail when multi-sample VCF has no --sample");
assert!(
stderr.contains("3 samples") && stderr.contains("--sample"),
"error should mention sample count and --sample flag, got: {stderr}"
);
assert!(
stderr.contains("SampleA") && stderr.contains("SampleB") && stderr.contains("SampleC"),
"error should list available samples, got: {stderr}"
);
}

#[test]
fn test_single_sample_vcf_without_sample_flag_works() {
let env = simple_env();
let out = env.output_prefix();
let out_str = out.to_str().unwrap();
let ref_str = env.fasta_path.to_str().unwrap();

let vcf_path = env.write_vcf_header_only(&["OnlySample"], &[("chr1", 1000)]);
let vcf_str = vcf_path.to_str().unwrap();

let (ok, _stdout, stderr) =
run_simulate(&["simulate", "-r", ref_str, "-v", vcf_str, "-o", out_str, "--coverage", "1"]);
assert!(ok, "single-sample VCF without --sample should succeed: {stderr}");
}

#[test]
fn test_multi_sample_vcf_with_correct_sample_works() {
let env = simple_env();
let out = env.output_prefix();
let out_str = out.to_str().unwrap();
let ref_str = env.fasta_path.to_str().unwrap();

let vcf_path = env.write_vcf_header_only(&["SampleA", "SampleB"], &[("chr1", 1000)]);
let vcf_str = vcf_path.to_str().unwrap();

let (ok, _stdout, stderr) = run_simulate(&[
"simulate",
"-r",
ref_str,
"-v",
vcf_str,
"--sample",
"SampleB",
"-o",
out_str,
"--coverage",
"1",
]);
assert!(ok, "multi-sample VCF with correct --sample should succeed: {stderr}");
}

#[test]
fn test_output_directory_does_not_exist_fails() {
let env = simple_env();
let ref_str = env.fasta_path.to_str().unwrap();

let bad_out = env.dir.path().join("no_such_dir").join("output");
let out_str = bad_out.to_str().unwrap();

let (ok, _stdout, stderr) =
run_simulate(&["simulate", "-r", ref_str, "-o", out_str, "--coverage", "1"]);
assert!(!ok, "should fail when output directory doesn't exist");
assert!(
stderr.contains("Output directory does not exist"),
"error should mention missing output directory, got: {stderr}"
);
}
Loading