From d6ef004bf4fb6b19db2da8df94eb5cad6463ffa3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Afonso=20Guerra-Assun=C3=A7=C3=A3o?= Date: Sat, 24 Jan 2026 16:41:35 +0000 Subject: [PATCH] Use CSI indexes for BAMs --- docs/output.md | 6 +++--- modules/nf-core/pilon/main.nf | 2 +- modules/nf-core/samtools/flagstat/main.nf | 2 +- modules/nf-core/samtools/idxstats/main.nf | 2 +- modules/nf-core/samtools/index/main.nf | 3 +-- subworkflows/local/assemble/main.nf | 2 +- subworkflows/local/bam_sort_stat/main.nf | 4 ++-- subworkflows/local/mapping/map_sr/main.nf | 18 +++++++++--------- .../local/mapping/map_to_assembly/main.nf | 10 +++++----- subworkflows/local/mapping/map_to_ref/main.nf | 6 +++--- .../local/polishing/pilon/polish_pilon/main.nf | 2 +- .../local/polishing/pilon/run_pilon/main.nf | 8 ++++---- .../nf-core/bam_sort_stats_samtools/main.nf | 17 ++++------------- .../nf-core/bam_stats_samtools/main.nf | 8 ++++---- 14 files changed, 40 insertions(+), 50 deletions(-) diff --git a/docs/output.md b/docs/output.md index 6b13ad6e..d3f9fe7c 100644 --- a/docs/output.md +++ b/docs/output.md @@ -271,19 +271,19 @@ The files in the alignment folder have the following base name structure: - `QC/` - `alignments/`: alignments to assemblies - `_.bam` Alignment - - `_.bai` bam index file + - `_.csi` bam index file - `_.stats` comprehensive statistics from alignment file - `_.idxstats` alignment summary statistics - `_.flagstat` number of alignments for each FLAG type - `shortreads/`: folder containing short read mapping for pilon - `_shortreads.bam` Alignment - - `_shortreads.bai` bam index file + - `_shortreads.csi` bam index file - `_shortreads.stats` comprehensive statistics from alignment file - `_shortreads.idxstats` alignment summary statistics - `_shortreads.flagstat` number of alignments for each FLAG type - `reference/`: folder containing alignment of long reads to reference - `_to_reference.bam` Alignment - - `_to_reference.bai` bam index file + - `_to_reference.csi` bam index file - `_to_reference.stats` comprehensive statistics from alignment file - `_to_reference.idxstats` alignment summary statistics - `_to_reference.flagstat` number of alignments for each FLAG type diff --git a/modules/nf-core/pilon/main.nf b/modules/nf-core/pilon/main.nf index 92cac75a..80145c98 100644 --- a/modules/nf-core/pilon/main.nf +++ b/modules/nf-core/pilon/main.nf @@ -9,7 +9,7 @@ process PILON { input: tuple val(meta), path(fasta) - tuple val(meta2), path(bam), path(bai) + tuple val(meta2), path(bam), path(csi) val pilon_mode output: diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf index c23f3a5c..a4119377 100644 --- a/modules/nf-core/samtools/flagstat/main.nf +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -8,7 +8,7 @@ process SAMTOOLS_FLAGSTAT { 'biocontainers/samtools:1.21--h50ea8bc_0' }" input: - tuple val(meta), path(bam), path(bai) + tuple val(meta), path(bam), path(csi) output: tuple val(meta), path("*.flagstat"), emit: flagstat diff --git a/modules/nf-core/samtools/idxstats/main.nf b/modules/nf-core/samtools/idxstats/main.nf index e2bb6b20..8b4aa4d5 100644 --- a/modules/nf-core/samtools/idxstats/main.nf +++ b/modules/nf-core/samtools/idxstats/main.nf @@ -8,7 +8,7 @@ process SAMTOOLS_IDXSTATS { 'biocontainers/samtools:1.21--h50ea8bc_0' }" input: - tuple val(meta), path(bam), path(bai) + tuple val(meta), path(bam), path(csi) output: tuple val(meta), path("*.idxstats"), emit: idxstats diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf index 31175610..1bce9303 100644 --- a/modules/nf-core/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -11,7 +11,6 @@ process SAMTOOLS_INDEX { tuple val(meta), path(input) output: - tuple val(meta), path("*.bai") , optional:true, emit: bai tuple val(meta), path("*.csi") , optional:true, emit: csi tuple val(meta), path("*.crai"), optional:true, emit: crai path "versions.yml" , emit: versions @@ -37,7 +36,7 @@ process SAMTOOLS_INDEX { stub: def args = task.ext.args ?: '' def extension = file(input).getExtension() == 'cram' ? - "crai" : args.contains("-c") ? "csi" : "bai" + "crai" : "csi" """ touch ${input}.${extension} diff --git a/subworkflows/local/assemble/main.nf b/subworkflows/local/assemble/main.nf index 1030beb9..d97b43a2 100644 --- a/subworkflows/local/assemble/main.nf +++ b/subworkflows/local/assemble/main.nf @@ -158,7 +158,7 @@ workflow ASSEMBLE { */ if (params.skip_alignments) { // Sample sheet layout when skipping assembly and mapping - // sample,ontreads,assembly,ref_fasta,ref_gff,assembly_bam,assembly_bai,ref_bam + // sample,ontreads,assembly,ref_fasta,ref_gff,assembly_bam,assembly_csi,ref_bam ch_input .map { row -> [row.meta, row.ref_bam] } .set { ch_ref_bam } diff --git a/subworkflows/local/bam_sort_stat/main.nf b/subworkflows/local/bam_sort_stat/main.nf index 89f8cdf4..c0bd5957 100644 --- a/subworkflows/local/bam_sort_stat/main.nf +++ b/subworkflows/local/bam_sort_stat/main.nf @@ -21,12 +21,12 @@ workflow BAM_INDEX_STATS_SAMTOOLS { SAMTOOLS_INDEX(bam) - BAM_STATS_SAMTOOLS(bam.join(SAMTOOLS_INDEX.out.bai, by: [0]), fasta) + BAM_STATS_SAMTOOLS(bam.join(SAMTOOLS_INDEX.out.csi, by: [0]), fasta) versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions).mix(BAM_STATS_SAMTOOLS.out.versions) emit: - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] + csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ] stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] diff --git a/subworkflows/local/mapping/map_sr/main.nf b/subworkflows/local/mapping/map_sr/main.nf index 838d7454..fe849877 100644 --- a/subworkflows/local/mapping/map_sr/main.nf +++ b/subworkflows/local/mapping/map_sr/main.nf @@ -14,33 +14,33 @@ workflow MAP_SR { .join(genome_assembly) .set { map_assembly } - ALIGN_SHORT(map_assembly, true, 'bai', false, false) + ALIGN_SHORT(map_assembly, true, 'csi', false, false) versions = ch_versions.mix(ALIGN_SHORT.out.versions) ALIGN_SHORT.out.bam.set { aln_to_assembly_bam } - ALIGN_SHORT.out.index.set { aln_to_assembly_bai } + ALIGN_SHORT.out.index.set { aln_to_assembly_csi } aln_to_assembly_bam - .join(aln_to_assembly_bai) - .set { aln_to_assembly_bam_bai } + .join(aln_to_assembly_csi) + .set { aln_to_assembly_bam_csi } map_assembly .map { meta, _reads, fasta -> [ meta, fasta ] } .set { ch_fasta } - BAM_STATS(aln_to_assembly_bam_bai, ch_fasta) + BAM_STATS(aln_to_assembly_bam_csi, ch_fasta) versions = ch_versions.mix(BAM_STATS.out.versions) aln_to_assembly_bam - .join(aln_to_assembly_bai) - .set { aln_to_assembly_bam_bai } + .join(aln_to_assembly_csi) + .set { aln_to_assembly_bam_csi } emit: aln_to_assembly_bam - aln_to_assembly_bai - aln_to_assembly_bam_bai + aln_to_assembly_csi + aln_to_assembly_bam_csi versions } diff --git a/subworkflows/local/mapping/map_to_assembly/main.nf b/subworkflows/local/mapping/map_to_assembly/main.nf index 2489a8ed..66de7a95 100644 --- a/subworkflows/local/mapping/map_to_assembly/main.nf +++ b/subworkflows/local/mapping/map_to_assembly/main.nf @@ -13,20 +13,20 @@ workflow MAP_TO_ASSEMBLY { .join(genome_assembly) .set { map_assembly } - ALIGN(map_assembly, true, 'bai', false, false) + ALIGN(map_assembly, true, 'csi', false, false) ALIGN.out.bam.set { aln_to_assembly_bam } - ALIGN.out.index.set { aln_to_assembly_bai } + ALIGN.out.index.set { aln_to_assembly_csi } map_assembly .map { meta, _reads, fasta -> [meta, fasta] } .set { ch_fasta } aln_to_assembly_bam - .join(aln_to_assembly_bai) - .set { aln_to_assembly_bam_bai } + .join(aln_to_assembly_csi) + .set { aln_to_assembly_bam_csi } - BAM_STATS(aln_to_assembly_bam_bai, ch_fasta ) + BAM_STATS(aln_to_assembly_bam_csi, ch_fasta ) versions = ch_versions.mix(ALIGN.out.versions).mix(BAM_STATS.out.versions) diff --git a/subworkflows/local/mapping/map_to_ref/main.nf b/subworkflows/local/mapping/map_to_ref/main.nf index 2293e4b8..937c342a 100644 --- a/subworkflows/local/mapping/map_to_ref/main.nf +++ b/subworkflows/local/mapping/map_to_ref/main.nf @@ -13,19 +13,19 @@ workflow MAP_TO_REF { .join(ch_refs) .set { ch_map_ref_in } - ALIGN(ch_map_ref_in, true, 'bai', false, false) + ALIGN(ch_map_ref_in, true, 'csi', false, false) ALIGN.out.bam.set { ch_aln_to_ref_bam } ch_aln_to_ref_bam .join(ALIGN.out.index) - .set { ch_aln_to_ref_bam_bai } + .set { ch_aln_to_ref_bam_csi } ch_map_ref_in .map { meta, _reads, fasta -> [meta, fasta] } .set { ch_fasta } - BAM_STATS(ch_aln_to_ref_bam_bai, ch_fasta) + BAM_STATS(ch_aln_to_ref_bam_csi, ch_fasta) versions = ch_versions.mix(ALIGN.out.versions).mix(BAM_STATS.out.versions) diff --git a/subworkflows/local/polishing/pilon/polish_pilon/main.nf b/subworkflows/local/polishing/pilon/polish_pilon/main.nf index 3b7df47a..c1324ecd 100644 --- a/subworkflows/local/polishing/pilon/polish_pilon/main.nf +++ b/subworkflows/local/polishing/pilon/polish_pilon/main.nf @@ -19,7 +19,7 @@ workflow POLISH_PILON { ch_versions = ch_versions.mix(MAP_SR.out.versions) - RUN_PILON(assembly, MAP_SR.out.aln_to_assembly_bam_bai) + RUN_PILON(assembly, MAP_SR.out.aln_to_assembly_bam_csi) RUN_PILON.out.improved_assembly.set { pilon_polished } diff --git a/subworkflows/local/polishing/pilon/run_pilon/main.nf b/subworkflows/local/polishing/pilon/run_pilon/main.nf index e2c29efb..c4854a31 100644 --- a/subworkflows/local/polishing/pilon/run_pilon/main.nf +++ b/subworkflows/local/polishing/pilon/run_pilon/main.nf @@ -3,15 +3,15 @@ include { PILON } from '../../../../../modules/nf-core/pilon/main' workflow RUN_PILON { take: assembly_in - aln_to_assembly_bam_bai + aln_to_assembly_bam_csi main: assembly_in - .join(aln_to_assembly_bam_bai) + .join(aln_to_assembly_bam_csi) .set { pilon_in } PILON( - pilon_in.map { meta, assembly, _bam, _bai -> [meta, assembly] }, - pilon_in.map { meta, _assembly, bam, bai -> [meta, bam, bai] }, + pilon_in.map { meta, assembly, _bam, _csi -> [meta, assembly] }, + pilon_in.map { meta, _assembly, bam, csi -> [meta, bam, csi] }, "bam", ) versions = PILON.out.versions diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/main.nf b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf index b716375b..f347b392 100644 --- a/subworkflows/nf-core/bam_sort_stats_samtools/main.nf +++ b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf @@ -22,24 +22,15 @@ workflow BAM_SORT_STATS_SAMTOOLS { ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) SAMTOOLS_SORT.out.bam - .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) - .map { - meta, bam, bai, csi -> - if (bai) { - [ meta, bam, bai ] - } else { - [ meta, bam, csi ] - } - } - .set { ch_bam_bai } - - BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta ) + .map { meta, bam, csi -> [ meta, bam, csi ] } + .set { ch_bam_csi } + + BAM_STATS_SAMTOOLS ( ch_bam_csi, ch_fasta ) ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) emit: bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ] stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] diff --git a/subworkflows/nf-core/bam_stats_samtools/main.nf b/subworkflows/nf-core/bam_stats_samtools/main.nf index 44d4c010..aaad51c4 100644 --- a/subworkflows/nf-core/bam_stats_samtools/main.nf +++ b/subworkflows/nf-core/bam_stats_samtools/main.nf @@ -8,19 +8,19 @@ include { SAMTOOLS_FLAGSTAT } from '../../../modules/nf-core/samtools/flagstat/m workflow BAM_STATS_SAMTOOLS { take: - ch_bam_bai // channel: [ val(meta), path(bam), path(bai) ] + ch_bam_csi // channel: [ val(meta), path(bam), path(csi) ] ch_fasta // channel: [ val(meta), path(fasta) ] main: ch_versions = Channel.empty() - SAMTOOLS_STATS ( ch_bam_bai, ch_fasta ) + SAMTOOLS_STATS ( ch_bam_csi, ch_fasta ) ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions) - SAMTOOLS_FLAGSTAT ( ch_bam_bai ) + SAMTOOLS_FLAGSTAT ( ch_bam_csi ) ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) - SAMTOOLS_IDXSTATS ( ch_bam_bai ) + SAMTOOLS_IDXSTATS ( ch_bam_csi ) ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions) emit: