From 87900a3a856a04461fd8f77ef32461f5b20f37f0 Mon Sep 17 00:00:00 2001 From: Louis Le Nezet Date: Mon, 22 Dec 2025 14:45:47 +0100 Subject: [PATCH 1/5] Update bealge5 --- modules/nf-core/beagle5/beagle/main.nf | 52 +++++++---- modules/nf-core/beagle5/beagle/meta.yml | 4 + .../nf-core/beagle5/beagle/tests/main.nf.test | 87 ++++++++++++------- .../beagle5/beagle/tests/main.nf.test.snap | 77 +++++++++++++--- 4 files changed, 158 insertions(+), 62 deletions(-) diff --git a/modules/nf-core/beagle5/beagle/main.nf b/modules/nf-core/beagle5/beagle/main.nf index 812fa5ff..b9b21554 100644 --- a/modules/nf-core/beagle5/beagle/main.nf +++ b/modules/nf-core/beagle5/beagle/main.nf @@ -1,47 +1,65 @@ process BEAGLE5_BEAGLE { - tag "$meta.id" + tag "${meta.id}" label 'process_high' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/beagle:5.5_27Feb25.75f--hdfd78af_0': - 'biocontainers/beagle:5.5_27Feb25.75f--hdfd78af_0' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/beagle:5.5_27Feb25.75f--hdfd78af_0' + : 'biocontainers/beagle:5.5_27Feb25.75f--hdfd78af_0'}" input: - tuple val(meta), path(vcf), path(vcf_index), path(refpanel), path(refpanel_index), path(genmap), path(exclsamples), path(exclmarkers) + // Including `val(region)` to prevent errors with multi-chromosome VCFs and single-chromosome reference panels. + // This enhances clarity and simplifies implementation in the subworkflow. + tuple val(meta), path(vcf), path(vcf_index), path(refpanel), path(refpanel_index), path(genmap), path(exclsamples), path(exclmarkers), val(region) output: - tuple val(meta), path("*.vcf.gz") , emit: vcf - tuple val(meta), path("*.log") , emit: log - path "versions.yml" , emit: versions + tuple val(meta), path("*.vcf.gz"), emit: vcf + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}.bglout" - def ref_command = refpanel ? "ref=$refpanel" : "" - def map_command = genmap ? "map=$genmap" : "" - def excludesamples_command = exclsamples ? "excludesamples=$exclsamples" : "" - def excludemarkers_command = exclmarkers ? "excludemarkers=$exclmarkers" : "" + + def ref_command = refpanel ? "ref=${refpanel}" : "" + def map_command = genmap ? "map=${genmap}" : "" + def region_cmd = region ? "chrom=${region}" : "" + + def excludesamples_command = exclsamples ? "excludesamples=${exclsamples}" : "" + def excludemarkers_command = exclmarkers ? "excludemarkers=${exclmarkers}" : "" def avail_mem = 3072 if (!task.memory) { - log.info '[beagle] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + log.info('[beagle] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') } else { - avail_mem = (task.memory.mega*0.8).intValue() + avail_mem = (task.memory.mega * 0.8).intValue() } """ beagle -Xmx${avail_mem}M \\ gt=${vcf} \\ out=${prefix} \\ - $args \\ + ${args} \\ ${ref_command} \\ ${map_command} \\ + ${region_cmd} \\ ${excludesamples_command} \\ - ${excludemarkers_command} \\ + ${excludemarkers_command} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + beagle: \$(beagle 2>&1 |head -n1 | sed -rn 's/beagle\\.(.*)\\.jar \\(version (.*)\\)/\\2rev\\1/p') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}.bglout" + """ + echo | gzip > ${prefix}.vcf.gz + touch ${prefix}.log cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/beagle5/beagle/meta.yml b/modules/nf-core/beagle5/beagle/meta.yml index 100d915b..8f32c9a7 100644 --- a/modules/nf-core/beagle5/beagle/meta.yml +++ b/modules/nf-core/beagle5/beagle/meta.yml @@ -57,6 +57,10 @@ input: the analysis pattern: "*.*" ontologies: [] + - region: + type: string + description: Region to perform imputation + pattern: "(chr)?\\d*:\\d*-\\d*" output: vcf: - - meta: diff --git a/modules/nf-core/beagle5/beagle/tests/main.nf.test b/modules/nf-core/beagle5/beagle/tests/main.nf.test index 695478ed..6157c15b 100644 --- a/modules/nf-core/beagle5/beagle/tests/main.nf.test +++ b/modules/nf-core/beagle5/beagle/tests/main.nf.test @@ -16,11 +16,11 @@ nextflow_process { process { """ input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz.csi", checkIfExists: true), - [], [], [], [], [] - ] + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz.csi", checkIfExists: true), + [], [], [], [], [], [] + ] """ } } @@ -29,11 +29,11 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - path(process.out.vcf[0][1]).vcf.variantsMD5, - file(process.out.log[0][1]).name, - process.out.versions - ).match() - } + path(process.out.vcf[0][1]).vcf.variantsMD5, + file(process.out.log[0][1]).name, + process.out.versions + ).match() + } ) } } @@ -44,13 +44,13 @@ nextflow_process { process { """ input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz.csi", checkIfExists: true), + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz.csi", checkIfExists: true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true), - [], [], [] - ] + [], [], [], "chr22" + ] """ } } @@ -59,30 +59,28 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - path(process.out.vcf[0][1]).vcf.variantsMD5, - file(process.out.log[0][1]).name, - process.out.versions - ).match() - } + path(process.out.vcf[0][1]).vcf.variantsMD5, + file(process.out.log[0][1]).name, + process.out.versions + ).match() + } ) } } test("test-beagle5-beagle-ref-map") { - tag "test" - when { process { """ input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz.csi", checkIfExists: true), + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz.csi", checkIfExists: true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true), - file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/phaseimpute/hum_data/reference_genome/GRCh38_chr22.plink.map"), - [], [] - ] + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genetic_map/genome.GRCh38.chr22.plink.map"), + [], [], "chr22" + ] """ } } @@ -91,11 +89,34 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - path(process.out.vcf[0][1]).vcf.variantsMD5, - file(process.out.log[0][1]).name, - process.out.versions - ).match() - } + path(process.out.vcf[0][1]).vcf.variantsMD5, + file(process.out.log[0][1]).name, + process.out.versions + ).match() + } + ) + } + } + + test("test-beagle5-beagle-ref-map - stub") { + options '-stub' + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz.csi", checkIfExists: true), + [], [], [], [], [], [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } ) } } diff --git a/modules/nf-core/beagle5/beagle/tests/main.nf.test.snap b/modules/nf-core/beagle5/beagle/tests/main.nf.test.snap index fc5761c3..03a01280 100644 --- a/modules/nf-core/beagle5/beagle/tests/main.nf.test.snap +++ b/modules/nf-core/beagle5/beagle/tests/main.nf.test.snap @@ -1,44 +1,97 @@ { "test-beagle5-beagle": { "content": [ - "1afe4a7e2a485a60086313a543a48ffd", + "17649c1b1fd157f032bb238afba1df8d", "test.bglout.log", [ "versions.yml:md5,4243bfcec3e194574fd1907ca839c0de" ] ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2025-11-07T11:52:21.575406872" + "timestamp": "2025-11-26T18:29:03.735342674" + }, + "test-beagle5-beagle-ref-map - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bglout.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bglout.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,4243bfcec3e194574fd1907ca839c0de" + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bglout.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bglout.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,4243bfcec3e194574fd1907ca839c0de" + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-26T18:33:48.428917226" }, "test-beagle5-beagle-ref-map": { "content": [ - "1afe4a7e2a485a60086313a543a48ffd", + "9cab6e975108d5f6dbb7773b794711c2", "test.bglout.log", [ "versions.yml:md5,4243bfcec3e194574fd1907ca839c0de" ] ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2025-11-07T12:01:09.103903793" + "timestamp": "2025-11-26T18:34:49.85652516" }, "test-beagle5-beagle-ref": { "content": [ - "1afe4a7e2a485a60086313a543a48ffd", + "9cab6e975108d5f6dbb7773b794711c2", "test.bglout.log", [ "versions.yml:md5,4243bfcec3e194574fd1907ca839c0de" ] ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2025-11-07T11:52:29.603582259" + "timestamp": "2025-11-26T18:34:41.457528892" } } \ No newline at end of file From 1372a000c4663f90a877d86047db16308fa79f18 Mon Sep 17 00:00:00 2001 From: Louis Le Nezet Date: Mon, 22 Dec 2025 14:46:26 +0100 Subject: [PATCH 2/5] Move to nf-core sbwf --- subworkflows/local/vcf_impute_beagle5/main.nf | 77 ------ .../local/vcf_impute_beagle5/meta.yml | 90 ------- .../vcf_impute_beagle5/tests/main.nf.test | 203 --------------- .../tests/main.nf.test.snap | 160 ------------ .../vcf_impute_beagle5/tests/nextflow.config | 20 -- .../local/vcf_impute_beagle5/tests/tags.yml | 2 - .../nf-core/vcf_impute_beagle5/main.nf | 121 +++++++++ .../nf-core/vcf_impute_beagle5/meta.yml | 100 ++++++++ .../vcf_impute_beagle5/tests/main.nf.test | 237 ++++++++++++++++++ .../tests/main.nf.test.snap | 206 +++++++++++++++ .../vcf_impute_beagle5/tests/nextflow.config | 25 ++ 11 files changed, 689 insertions(+), 552 deletions(-) delete mode 100644 subworkflows/local/vcf_impute_beagle5/main.nf delete mode 100644 subworkflows/local/vcf_impute_beagle5/meta.yml delete mode 100644 subworkflows/local/vcf_impute_beagle5/tests/main.nf.test delete mode 100644 subworkflows/local/vcf_impute_beagle5/tests/main.nf.test.snap delete mode 100644 subworkflows/local/vcf_impute_beagle5/tests/nextflow.config delete mode 100644 subworkflows/local/vcf_impute_beagle5/tests/tags.yml create mode 100644 subworkflows/nf-core/vcf_impute_beagle5/main.nf create mode 100644 subworkflows/nf-core/vcf_impute_beagle5/meta.yml create mode 100644 subworkflows/nf-core/vcf_impute_beagle5/tests/main.nf.test create mode 100644 subworkflows/nf-core/vcf_impute_beagle5/tests/main.nf.test.snap create mode 100644 subworkflows/nf-core/vcf_impute_beagle5/tests/nextflow.config diff --git a/subworkflows/local/vcf_impute_beagle5/main.nf b/subworkflows/local/vcf_impute_beagle5/main.nf deleted file mode 100644 index 6f0f6df2..00000000 --- a/subworkflows/local/vcf_impute_beagle5/main.nf +++ /dev/null @@ -1,77 +0,0 @@ -include { BEAGLE5_BEAGLE } from '../../../modules/nf-core/beagle5/beagle' -include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index' -include { BCFTOOLS_VIEW } from '../../../modules/nf-core/bcftools/view' - - -workflow VCF_IMPUTE_BEAGLE5 { - take: - ch_input // channel: [ [id, chr], vcf, tbi ] - ch_panel // channel: [ [id, chr], vcf, tbi ] - ch_map // channel: [ [chr], map] - - main: - ch_versions = channel.empty() - - // Branch input files based on format - ch_input - .branch { _meta, vcf, _tbi -> - bcf: vcf.toString().contains('.bcf') - vcf: vcf.toString().contains('.vcf') - } - .set { ch_input_branched } - - // Convert BCF to VCF if necessary - BCFTOOLS_VIEW( - ch_input_branched.bcf, - [], - [], - [] - ) - ch_versions = ch_versions.mix(BCFTOOLS_VIEW.out.versions.first()) - - // Combine VCF files - ch_ready_vcf = ch_input_branched.vcf - .mix(BCFTOOLS_VIEW.out.vcf.join(BCFTOOLS_VIEW.out.csi)) - - // Prepare input channels for BEAGLE5 by combining VCF, panel, and map files - ch_beagle_input = ch_ready_vcf - .map { meta, vcf, index -> - [ meta.chr, meta, vcf, index ] - } - .combine( - ch_panel.map { meta, vcf, idx -> - [ meta.chr, meta, vcf, idx ] - }, - by: 0 - ) - .combine( - ch_map.map { meta, map -> - [ meta.chr, map ] - }, - by: 0 - ) - .map { _chr, target_meta, vcf, vcf_index, metaP, panel_vcf, panel_vcf_index, map -> - [ target_meta + [ panel_id: metaP.panel_id ], vcf, vcf_index, panel_vcf, panel_vcf_index, map, [], [] ] - } - - - // Run BEAGLE5 imputation - BEAGLE5_BEAGLE(ch_beagle_input) - ch_versions = ch_versions.mix(BEAGLE5_BEAGLE.out.versions.first()) - - // Index the imputed VCF files - BCFTOOLS_INDEX(BEAGLE5_BEAGLE.out.vcf) - ch_versions = ch_versions.mix(BCFTOOLS_INDEX.out.versions.first()) - - - ch_vcf_index = BEAGLE5_BEAGLE.out.vcf - .join( - BCFTOOLS_INDEX.out.tbi - .mix(BCFTOOLS_INDEX.out.csi) - ) - .map{ meta, vcf, index -> [meta + [tools: "beagle5"], vcf, index] } - - emit: - vcf_index = ch_vcf_index // channel: [ [id, chr, tools], vcf, index ] - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/local/vcf_impute_beagle5/meta.yml b/subworkflows/local/vcf_impute_beagle5/meta.yml deleted file mode 100644 index bd2129e2..00000000 --- a/subworkflows/local/vcf_impute_beagle5/meta.yml +++ /dev/null @@ -1,90 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json -name: "VCF_IMPUTE_BEAGLE5" -description: | - Subworkflow to impute VCF files using BEAGLE5 software. The subworkflow - takes VCF files, phased reference panel, and genetic maps to perform imputation - and outputs phased and imputed VCF files. -keywords: - - VCF - - imputation - - beagle5 - - phasing -components: - - beagle5/beagle - - bcftools/index - - bcftools/view - - bcftools/concat -input: - - ch_input: - description: Channel with input data - structure: - - meta: - type: map - description: | - Metadata map containing sample information - Need to have "id" as sample name and "chr" as chromosome name - - vcf: - type: file - description: Input VCF files - pattern: "*.{vcf,bcf,vcf.gz}" - - index: - type: file - description: Input index file - pattern: "*.{tbi,csi}" - - ch_panel: - description: Channel with phased reference panel data - structure: - - meta: - type: map - description: | - Metadata map that will be combined with the input data map - Need to have the "chr" as chromosome name and "panel_id" as panel name - - vcf: - type: file - description: Reference panel VCF files by chromosomes - pattern: "*.{vcf,bcf,vcf.gz}" - - index: - type: file - description: Reference panel VCF index files - pattern: "*.{tbi,csi}" - - ch_map: - description: Channel with genetic map data - structure: - - meta: - type: map - description: | - Metadata map containing chromosome information - Need to have "chr" as chromosome name - - map: - type: file - description: PLINK format genetic map files - pattern: "*.map" -output: - - vcf_index: - description: Channel with imputed and phased VCF files - structure: - - meta: - type: map - description: | - Metadata map combined with the input data map. - It contains the "id" as sample name and "chr" as chromosome name. - {"tools" : "beagle5"} is added. - - vcf: - type: file - description: VCF imputed and phased file by sample - pattern: "*.{vcf,bcf,vcf.gz}" - - index: - type: file - description: VCF index file - pattern: "*.{csi,tbi}" - - - versions: - description: Channel containing software versions file - structure: - - versions.yml: - type: file - description: File containing versions of the software used -authors: - - "@gichas" -maintainers: - - "@gichas" diff --git a/subworkflows/local/vcf_impute_beagle5/tests/main.nf.test b/subworkflows/local/vcf_impute_beagle5/tests/main.nf.test deleted file mode 100644 index a5310888..00000000 --- a/subworkflows/local/vcf_impute_beagle5/tests/main.nf.test +++ /dev/null @@ -1,203 +0,0 @@ -nextflow_workflow { - - name "Test Subworkflow VCF_IMPUTE_BEAGLE5" - script "../main.nf" - - config "./nextflow.config" - - workflow "VCF_IMPUTE_BEAGLE5" - - tag "subworkflows" - tag "subworkflows_local" - tag "subworkflows/vcf_impute_beagle5" - tag "vcf_impute_beagle5" - - tag "beagle5" - tag "beagle5/beagle" - tag "bcftools" - tag "bcftools/index" - - test("Impute with beagle5 one vcf") { - when { - workflow { - """ - input[0] = Channel.of([ - [id: "NA12878", panel_id: "1000GP", chr: "chr22", region: "chr22:16570000-16610000"], - file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bcf", checkIfExist:true), - file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bcf.csi", checkIfExist:true) - ]) - input[1] = Channel.of([ - [panel_id: "1000GP", chr: "chr22"], - file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz", checkIfExist:true), - file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz.csi", checkIfExist:true) - ]) - input[2] = Channel.of([ - [chr: "chr22"], [] - ]) - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - workflow.out.versions, - workflow.out.vcf_index.collect{[ - it[0], - file(it[1]).name, - file(it[2]).name - ] }, - workflow.out.vcf_index.collect{ path(it[1]).vcf.summary }, - workflow.out.vcf_index.collect{ path(it[1]).vcf.variantsMD5 } - ).match() } - ) - } - } - - test("Impute with beagle5 one vcf with map") { - - when { - workflow { - """ - input[0] = Channel.of([ - [id: "NA12878", panel_id: "1000GP", chr: "chr22", region: "chr22:16570000-16610000"], - file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bcf", checkIfExist:true), - file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bcf.csi", checkIfExist:true) - ]) - input[1] = Channel.of([ - [panel_id: "1000GP", chr: "chr22"], - file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz", checkIfExist:true), - file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz.csi", checkIfExist:true) - ]) - input[2] = Channel.of([ - [chr: "chr22"], - file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38_chr22.plink.map", checkIfExist:true) - ]) - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - workflow.out.versions, - workflow.out.vcf_index.collect{[ - it[0], - file(it[1]).name, - file(it[2]).name - ] }, - workflow.out.vcf_index.collect{ path(it[1]).vcf.summary }, - workflow.out.vcf_index.collect{ path(it[1]).vcf.variantsMD5 } - ).match() } - ) - } - } - - test("Impute with beagle5 two vcf") { - - when { - workflow { - """ - input[0] = Channel.of( - [ - [id: "NA12878", panel_id: "1000GP", chr: "chr22", region: "chr22:16570000-16610000"], - file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bcf", checkIfExist:true), - file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bcf.csi", checkIfExist:true) - ], - [ - [id: "NA19401", panel_id: "1000GP", chr: "chr22", region: "chr22:16570000-16610000"], - file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s.bcf", checkIfExist:true), - file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s.bcf.csi", checkIfExist:true) - ] - ) - input[1] = Channel.of([ - [panel_id: "1000GP", chr: "chr22"], - file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz", checkIfExist:true), - file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz.csi", checkIfExist:true) - ]) - input[2] = Channel.of([ - [chr: "chr22"], [] - ]) - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - workflow.out.versions, - workflow.out.vcf_index.collect{[ - it[0], - file(it[1]).name, - file(it[2]).name - ] }, - workflow.out.vcf_index.collect{ path(it[1]).vcf.summary }, - workflow.out.vcf_index.collect{ path(it[1]).vcf.variantsMD5 } - ).match() } - ) - } - } - - test("Impute with beagle5 multiple chromosomes") { - - when { - workflow { - """ - input[0] = Channel.of( - [ - [id: "NA12878", panel_id: "1000GP", chr: "chr21", region: "chr21:16570000-16610000"], - file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bcf", checkIfExist:true), - file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bcf.csi", checkIfExist:true) - ], - [ - [id: "NA12878", panel_id: "1000GP", chr: "chr22", region: "chr22:16570000-16610000"], - file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bcf", checkIfExist:true), - file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bcf.csi", checkIfExist:true) - ] - ) - input[1] = Channel.of( - [ - [panel_id: "1000GP", chr: "chr21"], - file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz", checkIfExist:true), - file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz.csi", checkIfExist:true) - ], - [ - [panel_id: "1000GP", chr: "chr22"], - file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz", checkIfExist:true), - file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz.csi", checkIfExist:true) - ] - ) - input[2] = Channel.of( - [ - [chr: "chr21"], - file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38_chr21.plink.map", checkIfExist:true) - ], - [ - [chr: "chr22"], - file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38_chr22.plink.map", checkIfExist:true) - ] - ) - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - workflow.out.versions, - workflow.out.vcf_index.collect{[ - it[0], - file(it[1]).name, - file(it[2]).name - ] }.sort { it[0].chr }, - workflow.out.vcf_index.collect{ path(it[1]).vcf.summary }.sort(), - workflow.out.vcf_index.collect{ path(it[1]).vcf.variantsMD5 }.sort() - ).match() } - ) - } - } -} diff --git a/subworkflows/local/vcf_impute_beagle5/tests/main.nf.test.snap b/subworkflows/local/vcf_impute_beagle5/tests/main.nf.test.snap deleted file mode 100644 index 71d96ebd..00000000 --- a/subworkflows/local/vcf_impute_beagle5/tests/main.nf.test.snap +++ /dev/null @@ -1,160 +0,0 @@ -{ - "Impute with beagle5 multiple chromosomes": { - "content": [ - [ - "versions.yml:md5,3ba3affc995d9a03ef59f8cf9e7fac38", - "versions.yml:md5,753bec56d95889bf214c3c932ba0ee42", - "versions.yml:md5,abf0a598d8e490f400093600b60c148c" - ], - [ - [ - { - "id": "NA12878", - "panel_id": "1000GP", - "chr": "chr21", - "region": "chr21:16570000-16610000", - "tools": "beagle5" - }, - "NA12878_beagle5_imputed.vcf.gz", - "NA12878_beagle5_imputed.vcf.gz.csi" - ], - [ - { - "id": "NA12878", - "panel_id": "1000GP", - "chr": "chr22", - "region": "chr22:16570000-16610000", - "tools": "beagle5" - }, - "NA12878_beagle5_imputed.vcf.gz", - "NA12878_beagle5_imputed.vcf.gz.csi" - ] - ], - [ - "VcfFile [chromosomes=[chr21], sampleCount=1, variantCount=836, phased=true, phasedAutodetect=true]", - "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=903, phased=true, phasedAutodetect=true]" - ], - [ - "5a22a956dcb3bc8c9a39b61353d132b9", - "cb8890251b33aebf3a51ba29e14589b3" - ] - ], - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.0" - }, - "timestamp": "2025-11-27T16:15:00.114808128" - }, - "Impute with beagle5 one vcf": { - "content": [ - [ - "versions.yml:md5,3ba3affc995d9a03ef59f8cf9e7fac38", - "versions.yml:md5,753bec56d95889bf214c3c932ba0ee42", - "versions.yml:md5,abf0a598d8e490f400093600b60c148c" - ], - [ - [ - { - "id": "NA12878", - "panel_id": "1000GP", - "chr": "chr22", - "region": "chr22:16570000-16610000", - "tools": "beagle5" - }, - "NA12878_beagle5_imputed.vcf.gz", - "NA12878_beagle5_imputed.vcf.gz.csi" - ] - ], - [ - "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=903, phased=true, phasedAutodetect=true]" - ], - [ - "5a22a956dcb3bc8c9a39b61353d132b9" - ] - ], - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.0" - }, - "timestamp": "2025-11-27T16:16:24.312538831" - }, - "Impute with beagle5 two vcf": { - "content": [ - [ - "versions.yml:md5,3ba3affc995d9a03ef59f8cf9e7fac38", - "versions.yml:md5,753bec56d95889bf214c3c932ba0ee42", - "versions.yml:md5,abf0a598d8e490f400093600b60c148c" - ], - [ - [ - { - "id": "NA12878", - "panel_id": "1000GP", - "chr": "chr22", - "region": "chr22:16570000-16610000", - "tools": "beagle5" - }, - "NA12878_beagle5_imputed.vcf.gz", - "NA12878_beagle5_imputed.vcf.gz.csi" - ], - [ - { - "id": "NA19401", - "panel_id": "1000GP", - "chr": "chr22", - "region": "chr22:16570000-16610000", - "tools": "beagle5" - }, - "NA19401_beagle5_imputed.vcf.gz", - "NA19401_beagle5_imputed.vcf.gz.csi" - ] - ], - [ - "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=903, phased=true, phasedAutodetect=true]", - "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=903, phased=true, phasedAutodetect=true]" - ], - [ - "5a22a956dcb3bc8c9a39b61353d132b9", - "26cbea01aa6cbf6f29142b60a376b88f" - ] - ], - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.0" - }, - "timestamp": "2025-11-27T16:14:41.750539148" - }, - "Impute with beagle5 one vcf with map": { - "content": [ - [ - "versions.yml:md5,3ba3affc995d9a03ef59f8cf9e7fac38", - "versions.yml:md5,753bec56d95889bf214c3c932ba0ee42", - "versions.yml:md5,abf0a598d8e490f400093600b60c148c" - ], - [ - [ - { - "id": "NA12878", - "panel_id": "1000GP", - "chr": "chr22", - "region": "chr22:16570000-16610000", - "tools": "beagle5" - }, - "NA12878_beagle5_imputed.vcf.gz", - "NA12878_beagle5_imputed.vcf.gz.csi" - ] - ], - [ - "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=903, phased=true, phasedAutodetect=true]" - ], - [ - "5a22a956dcb3bc8c9a39b61353d132b9" - ] - ], - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.0" - }, - "timestamp": "2025-11-27T16:16:37.202770941" - } -} \ No newline at end of file diff --git a/subworkflows/local/vcf_impute_beagle5/tests/nextflow.config b/subworkflows/local/vcf_impute_beagle5/tests/nextflow.config deleted file mode 100644 index 8ea209d8..00000000 --- a/subworkflows/local/vcf_impute_beagle5/tests/nextflow.config +++ /dev/null @@ -1,20 +0,0 @@ -process { - withName: BEAGLE5_BEAGLE { - ext.prefix = { "${meta.id}_beagle5_imputed" } - ext.args = {"gp=true ap=true seed=0 chrom=${meta.region}"} - } - - withName: 'VCF_IMPUTE_BEAGLE5:BCFTOOLS_VIEW' { - ext.args = ["--output-type z", "--write-index=csi"].join(' ') - ext.prefix = { "${meta.id}_converted" } - } - - withName: 'VCF_IMPUTE_BEAGLE5:VCF_BCFTOOLS_INDEX' { - ext.args = "--csi" - } - - withName: 'VCF_IMPUTE_BEAGLE5:BCFTOOLS_CONCAT' { - ext.args = ["--output-type z", "--write-index=csi"].join(' ') - ext.prefix = "concatenated_panel" - } -} diff --git a/subworkflows/local/vcf_impute_beagle5/tests/tags.yml b/subworkflows/local/vcf_impute_beagle5/tests/tags.yml deleted file mode 100644 index 703a739a..00000000 --- a/subworkflows/local/vcf_impute_beagle5/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -subworkflows/vcf_impute_beagle5: - - subworkflows/local/vcf_impute_beagle5/** diff --git a/subworkflows/nf-core/vcf_impute_beagle5/main.nf b/subworkflows/nf-core/vcf_impute_beagle5/main.nf new file mode 100644 index 00000000..237c2a18 --- /dev/null +++ b/subworkflows/nf-core/vcf_impute_beagle5/main.nf @@ -0,0 +1,121 @@ +include { BEAGLE5_BEAGLE } from '../../../modules/nf-core/beagle5/beagle' +include { BCFTOOLS_VIEW } from '../../../modules/nf-core/bcftools/view' +include { GLIMPSE2_LIGATE } from '../../../modules/nf-core/glimpse2/ligate' +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_PHASE } from '../../../modules/nf-core/bcftools/index' +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_LIGATE } from '../../../modules/nf-core/bcftools/index' + +workflow VCF_IMPUTE_BEAGLE5 { + take: + ch_input // channel (mandatory): [ [id], vcf, tbi ] + ch_panel // channel (mandatory): [ [panel, chr], vcf, tbi ] + ch_chunks // channel (optional) : [ [panel, chr], regionout ] + ch_map // channel (optional) : [ [chr], map] + + main: + ch_versions = channel.empty() + + // Branch input files based on format + ch_input + .branch { _meta, vcf, _tbi -> + def vcfStr = vcf.toString() + bcf: vcfStr.endsWith('.bcf') || vcfStr.endsWith('.bcf.gz') + vcf: vcfStr.endsWith('.vcf') || vcfStr.endsWith('.vcf.gz') + other: true + } + .set { ch_input_branched } + + ch_input_branched.other.map { _meta, vcf, _tbi -> + error("ERROR: ${vcf.name} in ch_input channel must be in VCF or BCF format.") + } + + // Convert BCF to VCF if necessary + BCFTOOLS_VIEW( + ch_input_branched.bcf, + [], [], [] + ) + ch_versions = ch_versions.mix(BCFTOOLS_VIEW.out.versions.first()) + + // Combine VCF files + ch_ready_vcf = ch_input_branched.vcf.mix( + BCFTOOLS_VIEW.out.vcf.join( + BCFTOOLS_VIEW.out.tbi.mix(BCFTOOLS_VIEW.out.csi), + failOnMismatch: true, + failOnDuplicate: true, + ) + ) + + // Prepare input channels for BEAGLE5 by combining VCF, panel, and map files + ch_chunks_counts = ch_chunks + .groupTuple() + .map { metaPC, regionouts -> + [metaPC, regionouts.size()] + } + + ch_panel_map = ch_panel + .combine(ch_map, by: 0) + .combine(ch_chunks, by: 0) + .combine(ch_chunks_counts, by: 0) + + ch_panel_map.ifEmpty { + error("ERROR: join operation resulted in an empty channel. Please provide a valid ch_panel and ch_map channel as input.") + } + + ch_beagle_input = ch_ready_vcf + .combine(ch_panel_map) + .map { metaI, input_vcf, input_index, metaPC, panel_vcf, panel_index, map, regionout, regionsize -> + [ + metaI + metaPC + ["regionout": regionout, "regionsize": regionsize], + input_vcf, input_index, + panel_vcf, panel_index, + map, + [], [], + regionout, + ] + } + + // Run BEAGLE5 imputation + BEAGLE5_BEAGLE(ch_beagle_input) + ch_versions = ch_versions.mix(BEAGLE5_BEAGLE.out.versions.first()) + + // Index the imputed VCF files + BCFTOOLS_INDEX_PHASE(BEAGLE5_BEAGLE.out.vcf) + ch_versions = ch_versions.mix(BCFTOOLS_INDEX_PHASE.out.versions.first()) + + // Ligate all phased files in one and index it + ligate_input = BEAGLE5_BEAGLE.out.vcf + .join( + BCFTOOLS_INDEX_PHASE.out.tbi.mix(BCFTOOLS_INDEX_PHASE.out.csi), + failOnMismatch: true, + failOnDuplicate: true, + ) + .map { meta, vcf, index -> + def keysToKeep = meta.keySet() - ['regionout', 'regionsize'] + [ + groupKey(meta.subMap(keysToKeep), meta.regionsize), + vcf, index, + ] + } + .groupTuple() + .map { groupKeyObj, vcf, index -> + // Extract the actual meta from the groupKey + def meta = groupKeyObj.getGroupTarget() + [meta, vcf, index] + } + + GLIMPSE2_LIGATE(ligate_input) + ch_versions = ch_versions.mix(GLIMPSE2_LIGATE.out.versions.first()) + + BCFTOOLS_INDEX_LIGATE(GLIMPSE2_LIGATE.out.merged_variants) + ch_versions = ch_versions.mix(BCFTOOLS_INDEX_LIGATE.out.versions.first()) + + // Join imputed and index files + ch_vcf_index = GLIMPSE2_LIGATE.out.merged_variants.join( + BCFTOOLS_INDEX_LIGATE.out.tbi.mix(BCFTOOLS_INDEX_LIGATE.out.csi), + failOnMismatch: true, + failOnDuplicate: true, + ) + + emit: + vcf_index = ch_vcf_index // channel: [ [id, chr, tools], vcf, index ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/vcf_impute_beagle5/meta.yml b/subworkflows/nf-core/vcf_impute_beagle5/meta.yml new file mode 100644 index 00000000..a207e20b --- /dev/null +++ b/subworkflows/nf-core/vcf_impute_beagle5/meta.yml @@ -0,0 +1,100 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: VCF_IMPUTE_BEAGLE5 +description: | + Subworkflow to impute VCF files using BEAGLE5 software. The subworkflow + takes VCF files, phased reference panel, genetic maps and chunks region to perform imputation + and outputs phased and imputed VCF files. + Meta map of all channels, except ch_input, will be used to perform joint operations. + "regionout" and "regionsize" keys will be added to the meta map to distinguish the different + file before ligation and therefore should not be used. +keywords: + - VCF + - imputation + - beagle5 + - phasing +components: + - beagle5/beagle + - bcftools/index + - bcftools/view + - glimpse2/ligate +input: + - ch_input: + description: Channel with input data + structure: + - meta: + type: map + description: | + Metadata map containing sample information + - vcf: + type: file + description: Input VCF files + pattern: "*.{vcf,bcf}{.gz}?" + - index: + type: file + description: Input index file + pattern: "*.{tbi,csi}" + - ch_panel: + description: Channel with phased reference panel data + structure: + - meta: + type: map + description: | + Metadata map that will be combined with the input data map + - vcf: + type: file + description: Reference panel VCF files by chromosomes + pattern: "*.{vcf,bcf,vcf.gz}" + - index: + type: file + description: Reference panel VCF index files + pattern: "*.{tbi,csi}" + - ch_chunks: + description: Channel containing the region to impute + structure: + - meta: + type: map + description: | + Metadata map containing chromosome information + - regionout: + type: string + description: Region to perform the phasing on + pattern: "[chr]+[0-9]+:[0-9]+-[0-9]+" + - ch_map: + description: Channel with genetic map data + structure: + - meta: + type: map + description: | + Metadata map containing chromosome information + - map: + type: file + description: Plink format genetic map files + pattern: "*.map" +output: + - vcf_index: + description: Channel with imputed and phased VCF files + structure: + - meta: + type: map + description: | + Metadata map of the target input file combined with the reference panel map. + - vcf: + type: file + description: VCF imputed and phased file by sample + pattern: "*.{vcf,bcf,vcf.gz}" + - index: + type: file + description: VCF index file + pattern: "*.{tbi,csi}" + - versions: + description: Channel containing software versions file + structure: + - versions.yml: + type: file + description: File containing versions of the software used +authors: + - "@LouisLeNezet" + - "@gichas" +maintainers: + - "@LouisLeNezet" + - "@gichas" diff --git a/subworkflows/nf-core/vcf_impute_beagle5/tests/main.nf.test b/subworkflows/nf-core/vcf_impute_beagle5/tests/main.nf.test new file mode 100644 index 00000000..448d2376 --- /dev/null +++ b/subworkflows/nf-core/vcf_impute_beagle5/tests/main.nf.test @@ -0,0 +1,237 @@ +nextflow_workflow { + + name "Test Subworkflow VCF_IMPUTE_BEAGLE5" + script "../main.nf" + workflow "VCF_IMPUTE_BEAGLE5" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/vcf_impute_beagle5" + + config "./nextflow.config" + + tag "beagle5" + tag "beagle5/beagle" + tag "bcftools" + tag "bcftools/index" + tag "bcftools/view" + tag "glimpse2" + tag "glimpse2/ligate" + + + test("Impute with beagle5 one vcf - panel - whole chromosome - no map - one chromosomes") { + when { + workflow { + """ + input[0] = Channel.of([ + [id: "NA12878"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz", checkIfExist:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz.csi", checkIfExist:true) + ]) + input[1] = Channel.of([ + [panel: "1000GP", chr: "chr22"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExist:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExist:true) + ]) + input[2] = Channel.of([[panel: "1000GP", chr: "chr22"], "chr22"]) // chunks + input[3] = Channel.of([[panel: "1000GP", chr: "chr22"], []]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcf_index.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString(), + path(it[1]).vcf.summary, + path(it[1]).vcf.header.getGenotypeSamples().sort(), + path(it[1]).vcf.variantsMD5 + ]}, + workflow.out.versions, + workflow.out.versions.collect{ path(it).yaml } + ).match() } + ) + } + } + + test("Impute with beagle5 one bcf gz - panel - map - chunks - two chrosomes") { + setup { + run ("BCFTOOLS_VIEW", alias: "BCFTOOLS_VIEW_BCF" ) { + script "../../../../modules/nf-core/bcftools/view" + process { + """ + input[0] = channel.of([ + [id: "NA12878"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz", checkIfExist:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz.csi", checkIfExist:true) + ]) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + } + when { + params { + bcftools_view_args = "--write-index=csi --output-type b" + } + workflow { + """ + input[0] = BCFTOOLS_VIEW_BCF.out.vcf.join(BCFTOOLS_VIEW_BCF.out.csi) + input[1] = channel.of([ + [panel: "1000GP", chr: "chr22"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExist:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExist:true) + ],[ + [panel: "1000GP", chr: "chr21"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr21.vcf.gz", checkIfExist:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr21.vcf.gz.csi", checkIfExist:true) + ]) + input[2] = channel.of( + [[chr: "chr22", panel: "1000GP"], "chr22:16570065-16592216"], + [[chr: "chr22", panel: "1000GP"], "chr22:16592229-16609999"], + [[chr: "chr21", panel: "1000GP"], "chr21:16570065-16592216"], + [[chr: "chr21", panel: "1000GP"], "chr21:16592229-16609999"] + ) + input[3] = channel.of([ + [panel: "1000GP", chr: "chr22"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genetic_map/genome.GRCh38.chr22.plink.map", checkIfExist:true) + ],[ + [panel: "1000GP", chr: "chr21"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genetic_map/genome.GRCh38.chr21.plink.map", checkIfExist:true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcf_index.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString(), + path(it[1]).vcf.summary, + path(it[1]).vcf.header.getGenotypeSamples().sort(), + path(it[1]).vcf.variantsMD5 + ]}, + workflow.out.versions.collect{ path(it).yaml } + ).match() } + ) + } + } + + test("homo_sapiens - empty channels - stub") { + options "-stub" + when { + workflow { + """ + input[0] = channel.of([ + [id: "NA12878"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz", checkIfExist:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz.csi", checkIfExist:true) + ]) + input[1] = channel.of( + [[panel: "1000GP", chr: "chr22"], [], []], + [[panel: "1000GP", chr: "chr21"], [], []] + ) + input[2] = channel.of( + [[chr: "chr22", panel: "1000GP"], "chr22:16570065-16592216"], + [[chr: "chr22", panel: "1000GP"], "chr22:16592229-16609999"], + [[chr: "chr21", panel: "1000GP"], "chr21:16570065-16592216"], + [[chr: "chr21", panel: "1000GP"], "chr21:16592229-16609999"] + ) + input[3] = channel.of( + [[panel: "1000GP", chr: "chr22"], []], + [[panel: "1000GP", chr: "chr21"], []] + ) + """ + } + } + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out, + workflow.out.versions.collect{ path(it).yaml } + ).match() } + ) + } + } + + test("homo_sapiens - error wrong input format - stub") { + options "-stub" + when { + workflow { + """ + input[0] = channel.of([ + [id: "NA12878"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genetic_map/genome.GRCh38.chr22.minimac.map", checkIfExist:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz.csi", checkIfExist:true) + ]) + input[1] = channel.of( + [[panel: "1000GP", chr: "chr22"], [], []], + [[panel: "1000GP", chr: "chr21"], [], []] + ) + input[2] = channel.of( + [[chr: "chr22", panel: "1000GP"], "chr22:16570065-16592216"], + [[chr: "chr22", panel: "1000GP"], "chr22:16592229-16609999"], + [[chr: "chr21", panel: "1000GP"], "chr21:16570065-16592216"], + [[chr: "chr21", panel: "1000GP"], "chr21:16592229-16609999"] + ) + input[3] = channel.of( + [[panel: "1000GP", chr: "chr22"], []], + [[panel: "1000GP", chr: "chr21"], []] + ) + """ + } + } + then { + assertAll( + { assert workflow.failed }, + { assert workflow.errorMessage.contains("ERROR: genome.GRCh38.chr22.minimac.map in ch_input channel must be in VCF or BCF format.") } + ) + } + } + + test("homo_sapiens - error empty joint - stub") { + options "-stub" + when { + workflow { + """ + input[0] = channel.of([ + [id: "NA12878"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz", checkIfExist:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz.csi", checkIfExist:true) + ]) + input[1] = channel.of( + [[panel: "1000GP_1", chr: "chr22"], [], []], // Wrong panel + [[panel: "1000GP", chr: "chr21"], [], []] + ) + input[2] = channel.of( + [[chr: "chr22", panel: "1000GP"], "chr22:16570065-16592216"], + [[chr: "chr22", panel: "1000GP"], "chr22:16592229-16609999"], + [[chr: "chr21", panel: "1000GP"], "chr21:16570065-16592216"], + [[chr: "chr21", panel: "1000GP"], "chr21:16592229-16609999"] + ) + input[3] = channel.of( + [[panel: "1000GP", chr: "chr22"], []], + [[panel: "1000GP_1", chr: "chr21"], []] // Wrong panel + ) + """ + } + } + then { + assertAll( + { assert workflow.failed }, + { assert workflow.errorMessage.contains("ERROR: join operation resulted in an empty channel. Please provide a valid ch_panel and ch_map channel as input.") } + ) + } + } +} diff --git a/subworkflows/nf-core/vcf_impute_beagle5/tests/main.nf.test.snap b/subworkflows/nf-core/vcf_impute_beagle5/tests/main.nf.test.snap new file mode 100644 index 00000000..035cee7f --- /dev/null +++ b/subworkflows/nf-core/vcf_impute_beagle5/tests/main.nf.test.snap @@ -0,0 +1,206 @@ +{ + "Impute with beagle5 one vcf - panel - whole chromosome - no map - one chromosomes": { + "content": [ + [ + [ + { + "id": "NA12878", + "panel": "1000GP", + "chr": "chr22" + }, + "NA12878_chr22.ligate.vcf.gz", + "NA12878_chr22.ligate.vcf.gz.csi", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=903, phased=true, phasedAutodetect=true]", + [ + "NA12878" + ], + "9cab6e975108d5f6dbb7773b794711c2" + ] + ], + [ + "versions.yml:md5,3ba3affc995d9a03ef59f8cf9e7fac38", + "versions.yml:md5,a4740debfa9b03fbd4cd094f1d5ff5d0", + "versions.yml:md5,e4c10555396b9a57ef3aafc9a4707963", + "versions.yml:md5,ff3101d4f934dc550b466d79e092707c" + ], + [ + { + "VCF_IMPUTE_BEAGLE5:BEAGLE5_BEAGLE": { + "beagle": "5.5rev27Feb25.75f" + } + }, + { + "VCF_IMPUTE_BEAGLE5:BCFTOOLS_INDEX_PHASE": { + "bcftools": 1.22 + } + }, + { + "VCF_IMPUTE_BEAGLE5:BCFTOOLS_INDEX_LIGATE": { + "bcftools": 1.22 + } + }, + { + "VCF_IMPUTE_BEAGLE5:GLIMPSE2_LIGATE": { + "glimpse2": "2.0.0" + } + } + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-12-12T18:12:49.389456693" + }, + "homo_sapiens - empty channels - stub": { + "content": [ + { + "0": [ + [ + { + "id": "NA12878", + "chr": "chr21", + "panel": "1000GP" + }, + "NA12878_chr21.ligate.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "NA12878_chr21.ligate.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "NA12878", + "chr": "chr22", + "panel": "1000GP" + }, + "NA12878_chr22.ligate.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "NA12878_chr22.ligate.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,3ba3affc995d9a03ef59f8cf9e7fac38", + "versions.yml:md5,a4740debfa9b03fbd4cd094f1d5ff5d0", + "versions.yml:md5,e4c10555396b9a57ef3aafc9a4707963", + "versions.yml:md5,ff3101d4f934dc550b466d79e092707c" + ], + "vcf_index": [ + [ + { + "id": "NA12878", + "chr": "chr21", + "panel": "1000GP" + }, + "NA12878_chr21.ligate.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "NA12878_chr21.ligate.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "NA12878", + "chr": "chr22", + "panel": "1000GP" + }, + "NA12878_chr22.ligate.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "NA12878_chr22.ligate.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,3ba3affc995d9a03ef59f8cf9e7fac38", + "versions.yml:md5,a4740debfa9b03fbd4cd094f1d5ff5d0", + "versions.yml:md5,e4c10555396b9a57ef3aafc9a4707963", + "versions.yml:md5,ff3101d4f934dc550b466d79e092707c" + ] + }, + [ + { + "VCF_IMPUTE_BEAGLE5:BEAGLE5_BEAGLE": { + "beagle": "5.5rev27Feb25.75f" + } + }, + { + "VCF_IMPUTE_BEAGLE5:BCFTOOLS_INDEX_PHASE": { + "bcftools": 1.22 + } + }, + { + "VCF_IMPUTE_BEAGLE5:BCFTOOLS_INDEX_LIGATE": { + "bcftools": 1.22 + } + }, + { + "VCF_IMPUTE_BEAGLE5:GLIMPSE2_LIGATE": { + "glimpse2": "2.0.0" + } + } + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-12-12T18:14:04.06517785" + }, + "Impute with beagle5 one bcf gz - panel - map - chunks - two chrosomes": { + "content": [ + [ + [ + { + "id": "NA12878", + "chr": "chr21", + "panel": "1000GP" + }, + "NA12878_chr21.ligate.vcf.gz", + "NA12878_chr21.ligate.vcf.gz.csi", + "VcfFile [chromosomes=[chr21], sampleCount=1, variantCount=836, phased=true, phasedAutodetect=true]", + [ + "NA12878" + ], + "70c852b39eb228a54fabc564d7e536e4" + ], + [ + { + "id": "NA12878", + "chr": "chr22", + "panel": "1000GP" + }, + "NA12878_chr22.ligate.vcf.gz", + "NA12878_chr22.ligate.vcf.gz.csi", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=903, phased=true, phasedAutodetect=true]", + [ + "NA12878" + ], + "9cab6e975108d5f6dbb7773b794711c2" + ] + ], + [ + { + "VCF_IMPUTE_BEAGLE5:BEAGLE5_BEAGLE": { + "beagle": "5.5rev27Feb25.75f" + } + }, + { + "VCF_IMPUTE_BEAGLE5:BCFTOOLS_INDEX_PHASE": { + "bcftools": 1.22 + } + }, + { + "VCF_IMPUTE_BEAGLE5:BCFTOOLS_INDEX_LIGATE": { + "bcftools": 1.22 + } + }, + { + "VCF_IMPUTE_BEAGLE5:BCFTOOLS_VIEW": { + "bcftools": 1.22 + } + }, + { + "VCF_IMPUTE_BEAGLE5:GLIMPSE2_LIGATE": { + "glimpse2": "2.0.0" + } + } + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-12-12T18:13:48.934880404" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/vcf_impute_beagle5/tests/nextflow.config b/subworkflows/nf-core/vcf_impute_beagle5/tests/nextflow.config new file mode 100644 index 00000000..5b205b5e --- /dev/null +++ b/subworkflows/nf-core/vcf_impute_beagle5/tests/nextflow.config @@ -0,0 +1,25 @@ +process { + withName: BCFTOOLS_VIEW { + ext.args = { "--write-index=tbi --output-type z" } + } + + withName: BEAGLE5_BEAGLE { + ext.prefix = { "${meta.id}_${meta.regionout}" } + } + + withName: GLIMPSE2_LIGATE { + ext.prefix = { "${meta.id}_${meta.chr}.ligate" } + } + + withName: BCFTOOLS_INDEX_PHASE { + ext.args = '--tbi' + } + + withName: BCFTOOLS_INDEX_LIGATE { + ext.args = '--csi' + } + + withName: BCFTOOLS_VIEW_BCF { + ext.args = { params.bcftools_view_args ?: "" } + } +} From 0daa20236a6d6cb227f3135a375d731fd817eb7f Mon Sep 17 00:00:00 2001 From: Louis Le Nezet Date: Mon, 22 Dec 2025 14:47:16 +0100 Subject: [PATCH 3/5] Update sbwf usage in main --- conf/steps/imputation_beagle5.config | 29 +- conf/steps/imputation_glimpse2.config | 4 +- workflows/phaseimpute/main.nf | 18 +- workflows/phaseimpute/tests/main.nf.test | 28 - workflows/phaseimpute/tests/main.nf.test.snap | 31 -- workflows/phaseimpute/tests/nextflow.config | 6 + .../phaseimpute/tests/test_bealge5.nf.test | 77 +++ .../tests/test_bealge5.nf.test.snap | 503 ++++++++++++++++++ 8 files changed, 618 insertions(+), 78 deletions(-) create mode 100644 workflows/phaseimpute/tests/test_bealge5.nf.test create mode 100644 workflows/phaseimpute/tests/test_bealge5.nf.test.snap diff --git a/conf/steps/imputation_beagle5.config b/conf/steps/imputation_beagle5.config index c13e8939..a206c84f 100644 --- a/conf/steps/imputation_beagle5.config +++ b/conf/steps/imputation_beagle5.config @@ -16,13 +16,7 @@ process { // Impute the variants with BEAGLE5 withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_BEAGLE5:.*' { publishDir = [ enabled: false ] - tag = {"${meta.id} ${meta.chr}"} - } - - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_BEAGLE5:BEAGLE5_BEAGLE' { - ext.args = { "gp=true ap=true chrom=${meta.chr}" } - ext.prefix = { "${meta.id}.${meta.chr}.beagle5" } - publishDir = [ enabled: false ] + tag = {"${meta.id} ${meta.regionout ?: meta.chr}"} } // Convert BCF to VCF if necessary @@ -32,12 +26,31 @@ process { publishDir = [ enabled: false ] } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_BEAGLE5:BEAGLE5_BEAGLE' { + ext.args = { "gp=true ap=true seed=${params.seed}" } + ext.prefix = { "${meta.id}.batch${meta.batch}.${meta.regionout ? meta.regionout.replace(':','_') : meta.chr}.beagle5" } + publishDir = [ enabled: false ] + } + // Index the imputed VCF files - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_BEAGLE5:BCFTOOLS_INDEX_BEAGLE' { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_BEAGLE5:BCFTOOLS_INDEX_PHASE' { ext.args = '' publishDir = [ enabled: false ] } + // Ligate all results + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_BEAGLE5:GLIMPSE2_LIGATE' { + ext.args = { "--seed ${params.seed}" } + ext.prefix = { "${meta.id}.batch${meta.batch}.${meta.chr}.ligate" } + publishDir = [ enabled: false ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_BEAGLE5:BCFTOOLS_INDEX_LIGATE' { + ext.args = "--tbi" + publishDir = [ enabled: false ] + } + // Concatenate the imputed chromosomes withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_BEAGLE5:.*' { publishDir = [ diff --git a/conf/steps/imputation_glimpse2.config b/conf/steps/imputation_glimpse2.config index 6b82b005..e6a8b513 100644 --- a/conf/steps/imputation_glimpse2.config +++ b/conf/steps/imputation_glimpse2.config @@ -25,7 +25,7 @@ process { publishDir = [ enabled: false ] } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_VCF_IMPUTE_GLIMPSE2:BCFTOOLS_INDEX_1' { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_VCF_IMPUTE_GLIMPSE2:BCFTOOLS_INDEX_PHASE' { ext.args = "--csi" publishDir = [ enabled: false ] } @@ -36,7 +36,7 @@ process { publishDir = [ enabled: false ] } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_VCF_IMPUTE_GLIMPSE2:BCFTOOLS_INDEX_2' { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_VCF_IMPUTE_GLIMPSE2:BCFTOOLS_INDEX_LIGATE' { ext.args = "--tbi" publishDir = [ enabled: false ] } diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 334aa19a..57156602 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -63,7 +63,7 @@ include { BAM_IMPUTE_STITCH } from '../../subworkflows/ include { VCF_CONCATENATE_BCFTOOLS as CONCAT_STITCH } from '../../subworkflows/local/vcf_concatenate_bcftools' // BEAGLE5 subworkflows -include { VCF_IMPUTE_BEAGLE5 } from '../../subworkflows/local/vcf_impute_beagle5' +include { VCF_IMPUTE_BEAGLE5 } from '../../subworkflows/nf-core/vcf_impute_beagle5' include { VCF_CONCATENATE_BCFTOOLS as CONCAT_BEAGLE5 } from '../../subworkflows/local/vcf_concatenate_bcftools' // MINIMAC4 subworkflows @@ -447,23 +447,23 @@ workflow PHASEIMPUTE { } if (params.tools.split(',').contains("beagle5")) { - // Create input channel combining VCF with regions - ch_input_beagle5 = ch_input_type.vcf - .combine(ch_region) - .map { meta_vcf, vcf, index, meta_region, _region -> - [meta_vcf + meta_region, vcf, index] - } + log.info("Impute with BEAGLE5") + ch_chunks_beagle5 = chunkPrepareChannel(ch_chunks, ch_region, "glimpse1") + .map{ meta, _regionin, regionout -> [meta, regionout]} // Impute with BEAGLE5 VCF_IMPUTE_BEAGLE5( - ch_input_beagle5, + ch_input_type.vcf, ch_panel_phased, + ch_chunks_beagle5, ch_map ) ch_versions = ch_versions.mix(VCF_IMPUTE_BEAGLE5.out.versions) // Concatenate by chromosomes - CONCAT_BEAGLE5(VCF_IMPUTE_BEAGLE5.out.vcf_index) + CONCAT_BEAGLE5(VCF_IMPUTE_BEAGLE5.out.vcf_index.map{ + meta, vcf, index -> [meta + [tools:"beagle5"], vcf, index] + }) ch_versions = ch_versions.mix(CONCAT_BEAGLE5.out.versions) // Add results to input validate diff --git a/workflows/phaseimpute/tests/main.nf.test b/workflows/phaseimpute/tests/main.nf.test index 848a8925..eb5dbf57 100644 --- a/workflows/phaseimpute/tests/main.nf.test +++ b/workflows/phaseimpute/tests/main.nf.test @@ -33,34 +33,6 @@ nextflow_pipeline { } - test("Check test_beagle5") { - tag "test_beagle5" - config "../../../conf/test_beagle5.config" - - when { - params { - publish_dir_mode = "copy" - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/phaseimpute/' - outdir = "$outputDir" - } - } - - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - path("${outputDir}/imputation/") - .list() - .collect { getRecursiveFileNames(it, outputDir) } - .flatten(), - path("$outputDir/imputation/beagle5/concat/NA12878.beagle5.vcf.gz").vcf.summary.replaceAll(", phasedAutodetect=(false|true)", ""), - path("$outputDir/imputation/beagle5/concat/NA12878.beagle5.vcf.gz").vcf.header.getGenotypeSamples().sort() - ).match() - } - ) - } - } - test("Check test_minimac4") { tag "test_minimac4" config "../../../conf/test_minimac4.config" diff --git a/workflows/phaseimpute/tests/main.nf.test.snap b/workflows/phaseimpute/tests/main.nf.test.snap index 4423af94..f3dd4579 100644 --- a/workflows/phaseimpute/tests/main.nf.test.snap +++ b/workflows/phaseimpute/tests/main.nf.test.snap @@ -310,37 +310,6 @@ }, "timestamp": "2025-11-13T16:30:13.091032307" }, - "Check test_beagle5": { - "content": [ - [ - "imputation/beagle5/concat/NA12878.beagle5.vcf.gz", - "imputation/beagle5/concat/NA12878.beagle5.vcf.gz.tbi", - "imputation/beagle5/concat/NA19401.beagle5.vcf.gz", - "imputation/beagle5/concat/NA19401.beagle5.vcf.gz.tbi", - "imputation/beagle5/concat/NA20359.beagle5.vcf.gz", - "imputation/beagle5/concat/NA20359.beagle5.vcf.gz.tbi", - "imputation/beagle5/samples/NA12878.beagle5.vcf.gz", - "imputation/beagle5/samples/NA12878.beagle5.vcf.gz.tbi", - "imputation/beagle5/samples/NA19401.beagle5.vcf.gz", - "imputation/beagle5/samples/NA19401.beagle5.vcf.gz.tbi", - "imputation/beagle5/samples/NA20359.beagle5.vcf.gz", - "imputation/beagle5/samples/NA20359.beagle5.vcf.gz.tbi", - "imputation/csv/impute.csv", - "imputation/stats/NA12878.beagle5.bcftools_stats.txt", - "imputation/stats/NA19401.beagle5.bcftools_stats.txt", - "imputation/stats/NA20359.beagle5.bcftools_stats.txt" - ], - "VcfFile [chromosomes=[chr21, chr22], sampleCount=1, variantCount=1739, phased=true]", - [ - "NA12878" - ] - ], - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.0" - }, - "timestamp": "2025-11-13T16:31:24.717148502" - }, "Check test_sim": { "content": [ [ diff --git a/workflows/phaseimpute/tests/nextflow.config b/workflows/phaseimpute/tests/nextflow.config index e9d37248..8d1b2a2e 100644 --- a/workflows/phaseimpute/tests/nextflow.config +++ b/workflows/phaseimpute/tests/nextflow.config @@ -12,4 +12,10 @@ process { ext.suffix = "bcf" publishDir = [ enabled: false ] } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_BEAGLE5:BEAGLE5_BEAGLE' { + ext.args = { "gp=true ap=true seed=${params.seed}" } + ext.prefix = { "${meta.id}.${meta.chr}.beagle5" } + publishDir = [ enabled: false ] + } } diff --git a/workflows/phaseimpute/tests/test_bealge5.nf.test b/workflows/phaseimpute/tests/test_bealge5.nf.test new file mode 100644 index 00000000..57e3f528 --- /dev/null +++ b/workflows/phaseimpute/tests/test_bealge5.nf.test @@ -0,0 +1,77 @@ +nextflow_pipeline { + + name "Test phaseimpute workflow" + script "main.nf" + tag "pipeline" + tag "pipeline/phaseimpute" + + config "./nextflow.config" + + test("Check test_beagle5 - with chunks") { + tag "test_beagle5" + config "../../../conf/test_beagle5.config" + when { + params { + publish_dir_mode = "copy" + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/phaseimpute/' + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + UTILS.getPipelineResults(outputDir, workflow), + ["NA12878.beagle5": UTILS.vcfDetails("$outputDir/imputation/beagle5/concat/NA12878.beagle5.vcf.gz")] + ).match()} + ) + } + } + + test("Check test_beagle5 - with chunks - no map") { + tag "test_beagle5" + config "../../../conf/test_beagle5.config" + when { + params { + publish_dir_mode = "copy" + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/phaseimpute/' + outdir = "$outputDir" + map = null + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + UTILS.getPipelineResults(outputDir, workflow), + ["NA12878.beagle5": UTILS.vcfDetails("$outputDir/imputation/beagle5/concat/NA12878.beagle5.vcf.gz")] + ).match()} + ) + } + } + + test("Check test_beagle5 - without chunks") { + tag "test_beagle5" + config "../../../conf/test_beagle5.config" + when { + params { + publish_dir_mode = "copy" + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/phaseimpute/' + outdir = "$outputDir" + chunks = null + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + UTILS.getPipelineResults(outputDir, workflow), + ["NA12878.beagle5": UTILS.vcfDetails("$outputDir/imputation/beagle5/concat/NA12878.beagle5.vcf.gz")] + ).match()} + ) + } + } +} diff --git a/workflows/phaseimpute/tests/test_bealge5.nf.test.snap b/workflows/phaseimpute/tests/test_bealge5.nf.test.snap new file mode 100644 index 00000000..cf842388 --- /dev/null +++ b/workflows/phaseimpute/tests/test_bealge5.nf.test.snap @@ -0,0 +1,503 @@ +{ + "Check test_beagle5 - without chunks": { + "content": [ + [ + 46, + { + "BCFTOOLS_CONCAT": { + "bcftools": 1.21 + }, + "BCFTOOLS_INDEX_LIGATE": { + "bcftools": 1.21 + }, + "BCFTOOLS_INDEX_PHASE": { + "bcftools": 1.21 + }, + "BCFTOOLS_PLUGINSPLIT": { + "bcftools": 1.21 + }, + "BCFTOOLS_STATS_TOOLS": { + "bcftools": 1.21 + }, + "BEAGLE5_BEAGLE": { + "beagle": "5.5rev27Feb25.75f" + }, + "GLIMPSE2_LIGATE": { + "glimpse2": "2.0.0" + }, + "SAMTOOLS_FAIDX": { + "samtools": 1.21 + }, + "VCFCHREXTRACT": { + "bcftools": 1.2 + }, + "Workflow": { + "nf-core/phaseimpute": "v1.2.0dev" + } + }, + [ + "imputation", + "imputation/beagle5", + "imputation/beagle5/concat", + "imputation/beagle5/concat/NA12878.beagle5.vcf.gz", + "imputation/beagle5/concat/NA12878.beagle5.vcf.gz.tbi", + "imputation/beagle5/concat/NA19401.beagle5.vcf.gz", + "imputation/beagle5/concat/NA19401.beagle5.vcf.gz.tbi", + "imputation/beagle5/concat/NA20359.beagle5.vcf.gz", + "imputation/beagle5/concat/NA20359.beagle5.vcf.gz.tbi", + "imputation/beagle5/samples", + "imputation/beagle5/samples/NA12878.beagle5.vcf.gz", + "imputation/beagle5/samples/NA12878.beagle5.vcf.gz.tbi", + "imputation/beagle5/samples/NA19401.beagle5.vcf.gz", + "imputation/beagle5/samples/NA19401.beagle5.vcf.gz.tbi", + "imputation/beagle5/samples/NA20359.beagle5.vcf.gz", + "imputation/beagle5/samples/NA20359.beagle5.vcf.gz.tbi", + "imputation/csv", + "imputation/csv/impute.csv", + "imputation/stats", + "imputation/stats/NA12878.beagle5.bcftools_stats.txt", + "imputation/stats/NA19401.beagle5.bcftools_stats.txt", + "imputation/stats/NA20359.beagle5.bcftools_stats.txt", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_phaseimpute_software_mqc_versions.yml" + ], + [ + "NA12878.beagle5.bcftools_stats.txt:md5,6fa8e266447c0ff1c22d6f8ae36bf9bb", + "NA19401.beagle5.bcftools_stats.txt:md5,053686e1dbe0a25cad0efed5cf2ad04a", + "NA20359.beagle5.bcftools_stats.txt:md5,9ffd9717a055f8f6fe1c0092da3ebf30", + "bcftools-stats-subtypes.txt:md5,82c393ee7646f820b76e6bf60e5824c4", + "bcftools_stats_indel-lengths.txt:md5,94d4acd5ae34c2890e554f8c19bf57a6", + "bcftools_stats_vqc_Count_Indels.txt:md5,df541ae8fffdb02f3a568b692185350b", + "bcftools_stats_vqc_Count_SNP.txt:md5,8f6b599e789a39582417cc90a8c1e945", + "bcftools_stats_vqc_Count_Transitions.txt:md5,22207fd2a85d947d0228bd6b53b0a331", + "bcftools_stats_vqc_Count_Transversions.txt:md5,163ede7c23d91d4157063b36c7c996d3", + "multiqc_bcftools_stats.txt:md5,7032ae82e0932186d00c3ffa132d962f", + "multiqc_citations.txt:md5,5cbab4ecbe14049d965fd97bd61d252b", + "multiqc_general_stats.txt:md5,401a17402c90373934c85d4b1a08b02c" + ], + [ + + ], + [ + [ + "NA12878.beagle5.vcf.gz", + "1c3989fa496bf014671ce1d91bc00f02" + ], + [ + "NA19401.beagle5.vcf.gz", + "402493e9365b09071880fb6091235214" + ], + [ + "NA20359.beagle5.vcf.gz", + "e7fdcbc30ec4a497ca71e3b4ff9cefd5" + ], + [ + "NA12878.beagle5.vcf.gz", + "1c3989fa496bf014671ce1d91bc00f02" + ], + [ + "NA19401.beagle5.vcf.gz", + "402493e9365b09071880fb6091235214" + ], + [ + "NA20359.beagle5.vcf.gz", + "e7fdcbc30ec4a497ca71e3b4ff9cefd5" + ] + ] + ], + { + "NA12878.beagle5": { + "summary": "VcfFile [chromosomes=[chr21, chr22], sampleCount=1, variantCount=1739, phased=true]", + "samples": [ + "NA12878" + ] + } + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-12-22T14:11:58.397968033" + }, + "Check test_beagle5 - with chunks - no map": { + "content": [ + [ + 46, + { + "BCFTOOLS_CONCAT": { + "bcftools": 1.21 + }, + "BCFTOOLS_INDEX_LIGATE": { + "bcftools": 1.21 + }, + "BCFTOOLS_INDEX_PHASE": { + "bcftools": 1.21 + }, + "BCFTOOLS_PLUGINSPLIT": { + "bcftools": 1.21 + }, + "BCFTOOLS_STATS_TOOLS": { + "bcftools": 1.21 + }, + "BEAGLE5_BEAGLE": { + "beagle": "5.5rev27Feb25.75f" + }, + "GLIMPSE2_LIGATE": { + "glimpse2": "2.0.0" + }, + "SAMTOOLS_FAIDX": { + "samtools": 1.21 + }, + "VCFCHREXTRACT": { + "bcftools": 1.2 + }, + "Workflow": { + "nf-core/phaseimpute": "v1.2.0dev" + } + }, + [ + "imputation", + "imputation/beagle5", + "imputation/beagle5/concat", + "imputation/beagle5/concat/NA12878.beagle5.vcf.gz", + "imputation/beagle5/concat/NA12878.beagle5.vcf.gz.tbi", + "imputation/beagle5/concat/NA19401.beagle5.vcf.gz", + "imputation/beagle5/concat/NA19401.beagle5.vcf.gz.tbi", + "imputation/beagle5/concat/NA20359.beagle5.vcf.gz", + "imputation/beagle5/concat/NA20359.beagle5.vcf.gz.tbi", + "imputation/beagle5/samples", + "imputation/beagle5/samples/NA12878.beagle5.vcf.gz", + "imputation/beagle5/samples/NA12878.beagle5.vcf.gz.tbi", + "imputation/beagle5/samples/NA19401.beagle5.vcf.gz", + "imputation/beagle5/samples/NA19401.beagle5.vcf.gz.tbi", + "imputation/beagle5/samples/NA20359.beagle5.vcf.gz", + "imputation/beagle5/samples/NA20359.beagle5.vcf.gz.tbi", + "imputation/csv", + "imputation/csv/impute.csv", + "imputation/stats", + "imputation/stats/NA12878.beagle5.bcftools_stats.txt", + "imputation/stats/NA19401.beagle5.bcftools_stats.txt", + "imputation/stats/NA20359.beagle5.bcftools_stats.txt", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_phaseimpute_software_mqc_versions.yml" + ], + [ + "NA12878.beagle5.bcftools_stats.txt:md5,6fa8e266447c0ff1c22d6f8ae36bf9bb", + "NA19401.beagle5.bcftools_stats.txt:md5,053686e1dbe0a25cad0efed5cf2ad04a", + "NA20359.beagle5.bcftools_stats.txt:md5,9ffd9717a055f8f6fe1c0092da3ebf30", + "bcftools-stats-subtypes.txt:md5,82c393ee7646f820b76e6bf60e5824c4", + "bcftools_stats_indel-lengths.txt:md5,94d4acd5ae34c2890e554f8c19bf57a6", + "bcftools_stats_vqc_Count_Indels.txt:md5,df541ae8fffdb02f3a568b692185350b", + "bcftools_stats_vqc_Count_SNP.txt:md5,8f6b599e789a39582417cc90a8c1e945", + "bcftools_stats_vqc_Count_Transitions.txt:md5,22207fd2a85d947d0228bd6b53b0a331", + "bcftools_stats_vqc_Count_Transversions.txt:md5,163ede7c23d91d4157063b36c7c996d3", + "multiqc_bcftools_stats.txt:md5,7032ae82e0932186d00c3ffa132d962f", + "multiqc_citations.txt:md5,5cbab4ecbe14049d965fd97bd61d252b", + "multiqc_general_stats.txt:md5,401a17402c90373934c85d4b1a08b02c" + ], + [ + + ], + [ + [ + "NA12878.beagle5.vcf.gz", + "76d5f8432e9a4ff6d65026c8ae00a088" + ], + [ + "NA19401.beagle5.vcf.gz", + "9dea936d94582ffde15f53d3dee82d68" + ], + [ + "NA20359.beagle5.vcf.gz", + "6f7b0e4b5d3004a19563a44e10dec779" + ], + [ + "NA12878.beagle5.vcf.gz", + "76d5f8432e9a4ff6d65026c8ae00a088" + ], + [ + "NA19401.beagle5.vcf.gz", + "9dea936d94582ffde15f53d3dee82d68" + ], + [ + "NA20359.beagle5.vcf.gz", + "6f7b0e4b5d3004a19563a44e10dec779" + ] + ] + ], + { + "NA12878.beagle5": { + "summary": "VcfFile [chromosomes=[chr21, chr22], sampleCount=1, variantCount=1739, phased=true]", + "samples": [ + "NA12878" + ] + } + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-12-22T14:10:49.494820211" + }, + "Check test_beagle5 - with chunks": { + "content": [ + [ + 46, + { + "BCFTOOLS_CONCAT": { + "bcftools": 1.21 + }, + "BCFTOOLS_INDEX_LIGATE": { + "bcftools": 1.21 + }, + "BCFTOOLS_INDEX_PHASE": { + "bcftools": 1.21 + }, + "BCFTOOLS_PLUGINSPLIT": { + "bcftools": 1.21 + }, + "BCFTOOLS_STATS_TOOLS": { + "bcftools": 1.21 + }, + "BEAGLE5_BEAGLE": { + "beagle": "5.5rev27Feb25.75f" + }, + "GLIMPSE2_LIGATE": { + "glimpse2": "2.0.0" + }, + "SAMTOOLS_FAIDX": { + "samtools": 1.21 + }, + "VCFCHREXTRACT": { + "bcftools": 1.2 + }, + "Workflow": { + "nf-core/phaseimpute": "v1.2.0dev" + } + }, + [ + "imputation", + "imputation/beagle5", + "imputation/beagle5/concat", + "imputation/beagle5/concat/NA12878.beagle5.vcf.gz", + "imputation/beagle5/concat/NA12878.beagle5.vcf.gz.tbi", + "imputation/beagle5/concat/NA19401.beagle5.vcf.gz", + "imputation/beagle5/concat/NA19401.beagle5.vcf.gz.tbi", + "imputation/beagle5/concat/NA20359.beagle5.vcf.gz", + "imputation/beagle5/concat/NA20359.beagle5.vcf.gz.tbi", + "imputation/beagle5/samples", + "imputation/beagle5/samples/NA12878.beagle5.vcf.gz", + "imputation/beagle5/samples/NA12878.beagle5.vcf.gz.tbi", + "imputation/beagle5/samples/NA19401.beagle5.vcf.gz", + "imputation/beagle5/samples/NA19401.beagle5.vcf.gz.tbi", + "imputation/beagle5/samples/NA20359.beagle5.vcf.gz", + "imputation/beagle5/samples/NA20359.beagle5.vcf.gz.tbi", + "imputation/csv", + "imputation/csv/impute.csv", + "imputation/stats", + "imputation/stats/NA12878.beagle5.bcftools_stats.txt", + "imputation/stats/NA19401.beagle5.bcftools_stats.txt", + "imputation/stats/NA20359.beagle5.bcftools_stats.txt", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_phaseimpute_software_mqc_versions.yml" + ], + [ + "NA12878.beagle5.bcftools_stats.txt:md5,6fa8e266447c0ff1c22d6f8ae36bf9bb", + "NA19401.beagle5.bcftools_stats.txt:md5,053686e1dbe0a25cad0efed5cf2ad04a", + "NA20359.beagle5.bcftools_stats.txt:md5,9ffd9717a055f8f6fe1c0092da3ebf30", + "bcftools-stats-subtypes.txt:md5,82c393ee7646f820b76e6bf60e5824c4", + "bcftools_stats_indel-lengths.txt:md5,94d4acd5ae34c2890e554f8c19bf57a6", + "bcftools_stats_vqc_Count_Indels.txt:md5,df541ae8fffdb02f3a568b692185350b", + "bcftools_stats_vqc_Count_SNP.txt:md5,8f6b599e789a39582417cc90a8c1e945", + "bcftools_stats_vqc_Count_Transitions.txt:md5,22207fd2a85d947d0228bd6b53b0a331", + "bcftools_stats_vqc_Count_Transversions.txt:md5,163ede7c23d91d4157063b36c7c996d3", + "multiqc_bcftools_stats.txt:md5,7032ae82e0932186d00c3ffa132d962f", + "multiqc_citations.txt:md5,5cbab4ecbe14049d965fd97bd61d252b", + "multiqc_general_stats.txt:md5,401a17402c90373934c85d4b1a08b02c" + ], + [ + + ], + [ + [ + "NA12878.beagle5.vcf.gz", + "1c3989fa496bf014671ce1d91bc00f02" + ], + [ + "NA19401.beagle5.vcf.gz", + "402493e9365b09071880fb6091235214" + ], + [ + "NA20359.beagle5.vcf.gz", + "e7fdcbc30ec4a497ca71e3b4ff9cefd5" + ], + [ + "NA12878.beagle5.vcf.gz", + "1c3989fa496bf014671ce1d91bc00f02" + ], + [ + "NA19401.beagle5.vcf.gz", + "402493e9365b09071880fb6091235214" + ], + [ + "NA20359.beagle5.vcf.gz", + "e7fdcbc30ec4a497ca71e3b4ff9cefd5" + ] + ] + ], + { + "NA12878.beagle5": { + "summary": "VcfFile [chromosomes=[chr21, chr22], sampleCount=1, variantCount=1739, phased=true]", + "samples": [ + "NA12878" + ] + } + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-12-22T14:09:40.496448692" + } +} \ No newline at end of file From 631200780121d7478532376c69b6c16ba1f9435a Mon Sep 17 00:00:00 2001 From: Louis Le Nezet Date: Mon, 22 Dec 2025 14:47:39 +0100 Subject: [PATCH 4/5] Update init sbwf --- modules.json | 25 +++++++++++++++---- .../utils_nfcore_phaseimpute_pipeline/main.nf | 4 +-- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/modules.json b/modules.json index be801f2a..3390c894 100644 --- a/modules.json +++ b/modules.json @@ -24,7 +24,12 @@ "bcftools/index": { "branch": "master", "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", - "installed_by": ["bam_impute_stitch", "bam_vcf_impute_glimpse2", "modules"] + "installed_by": [ + "bam_impute_stitch", + "bam_vcf_impute_glimpse2", + "modules", + "vcf_impute_beagle5" + ] }, "bcftools/merge": { "branch": "master", @@ -62,12 +67,12 @@ "bcftools/view": { "branch": "master", "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", - "installed_by": ["modules"] + "installed_by": ["modules", "vcf_impute_beagle5"] }, "beagle5/beagle": { "branch": "master", - "git_sha": "359cfb69d521fcb8b56313e9a6ca9d66036aa921", - "installed_by": ["modules"] + "git_sha": "94a767478097e92dca1386ea623d9dae9edc025a", + "installed_by": ["modules", "vcf_impute_beagle5"] }, "gawk": { "branch": "master", @@ -102,7 +107,12 @@ "glimpse2/ligate": { "branch": "master", "git_sha": "94a767478097e92dca1386ea623d9dae9edc025a", - "installed_by": ["bam_impute_stitch", "bam_vcf_impute_glimpse2", "modules"] + "installed_by": [ + "bam_impute_stitch", + "bam_vcf_impute_glimpse2", + "modules", + "vcf_impute_beagle5" + ] }, "glimpse2/phase": { "branch": "master", @@ -239,6 +249,11 @@ "branch": "master", "git_sha": "4b406a74dc0449c0401ed87d5bfff4252fd277fd", "installed_by": ["subworkflows"] + }, + "vcf_impute_beagle5": { + "branch": "master", + "git_sha": "e8db6baba21376a4cf0dfc5ba32224eebf51266c", + "installed_by": ["subworkflows"] } } } diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf index 8043c7ae..62d0c6bb 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf @@ -554,8 +554,8 @@ def validateInputBatchTools(ch_input, batch_size, extension, tools) { } if (extension ==~ "(bam|cram)?") { - if (tools.contains("beagle5")) { - error "Beagle5 software cannot run with BAM or CRAM alignement files. Please provide variant calling format files (i.e. VCF or BCF)." + if (tools.contains("beagle5") || tools.contains("minimac4")) { + error "Beagle5 and minimac4 softwares cannot run with BAM or CRAM alignement files. Please provide variant calling format files (i.e. VCF or BCF)." } } From f1914886093e3f5cacfc6a787052a04308fcf7aa Mon Sep 17 00:00:00 2001 From: Louis Le Nezet Date: Mon, 22 Dec 2025 14:48:57 +0100 Subject: [PATCH 5/5] Update docs --- CHANGELOG.md | 1 + docs/usage.md | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 62fe7266..51a812ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#226](https://github.com/nf-core/phaseimpute/pull/226) - Remove `BEDTOOLS` and update automated methods description. - [#239](https://github.com/nf-core/phaseimpute/pull/239) - Move from local to nf-core sbwf for `STITCH` imputation. Set `--chunks` as optional argument. Update `usage.md`. - [#240](https://github.com/nf-core/phaseimpute/pull/240) - Move from local to nf-core sbwf for `GLIMPSE2` imputation. Update `usage.md`. +- [#243](https://github.com/nf-core/phaseimpute/pull/243) - Move from local to nf-core sbwf for `BEAGLE5` imputation. Update `usage.md`. ### `Fixed` diff --git a/docs/usage.md b/docs/usage.md index ca3f8579..67d0f0a7 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -449,7 +449,7 @@ Optionnaly you can provide the following flags: | `GLIMPSE2` | ✅ | ✅ ¹ | ✅ | ✅ | ❌ | ✅ | ✅ | | `QUILT` | ✅ | ✅ ² | ✅ | ❌ | ✅ ⁴ | ❌⁶ | ✅ | | `STITCH` | ✅ | ✅ ² | ✅ | ❌ | ✅ ³ | ✅ | ✅ | -| `BEAGLE5` | ✅ | ✅ ¹ | ✅ | ✅ | ❌ | ❌⁶ | ❌⁶ | +| `BEAGLE5` | ✅ | ✅ ¹ | ✅ | ✅ | ❌ | ✅ | ✅ | | `MINIMAC4` | ✅ | ✅ ¹ | ✅ | ✅ | ✅ ⁵ | ❌⁶ | ❌⁶ | > ¹ Alignment files as well as variant calling format (i.e. BAM, CRAM, VCF or BCF) @@ -661,6 +661,13 @@ nextflow run nf-core/phaseimpute \ The CSV file provided in `--panel` must be prepared with `--steps panelprep` and must contain four columns [panel, chr, vcf, index]. +You can optionally provide chunks to parallelize the imputation process using `--chunks`. +If not provided the full region per chromosome will be used. +See [Chunks section](#samplesheet-chunks) for more information. + +Genetic map can also be provided for better accuracy. +See [Map section](#samplesheet-map) for more information. + ### MINIMAC4 [MINIMAC4](https://github.com/statgen/Minimac4) is a low memory, computationally efficient implementation of the MaCH algorithm for genotype imputation. It is designed to work on phased haplotypes and can handle very large reference panels.