From 589ff090a1fc5e6233c37a92e5f6eb5f689ef01d Mon Sep 17 00:00:00 2001 From: Thomas Harrison Date: Wed, 9 Apr 2025 15:38:50 -0600 Subject: [PATCH 01/38] Adding in nano plot updates --- modules/nf-core/nanoplot/main.nf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/nf-core/nanoplot/main.nf b/modules/nf-core/nanoplot/main.nf index ca0d8454..45f8d943 100644 --- a/modules/nf-core/nanoplot/main.nf +++ b/modules/nf-core/nanoplot/main.nf @@ -28,6 +28,8 @@ process NANOPLOT { NanoPlot \\ $args \\ -t $task.cpus \\ + -p $meta.id \\ + --tsv_stats \\ $input_file cat <<-END_VERSIONS > versions.yml "${task.process}": From 79b1303e9c23554728fe99f68eb7c0e1dfd4305c Mon Sep 17 00:00:00 2001 From: Eduard Casas <123982193+eduard-watchmakergenomics@users.noreply.github.com> Date: Thu, 17 Apr 2025 09:38:06 -0600 Subject: [PATCH 02/38] Added RSEQC genebodycoverage --- aws_batch.config | 36 ++++++++++++++++++++++ conf/modules.config | 11 +++++++ modules/local/rseqc_genebodycoverage.nf | 33 ++++++++++++++++++++ subworkflows/local/bedtools_ucsc_bigbed.nf | 1 + workflows/nanoseq.nf | 15 +++++++++ 5 files changed, 96 insertions(+) create mode 100644 aws_batch.config create mode 100644 modules/local/rseqc_genebodycoverage.nf diff --git a/aws_batch.config b/aws_batch.config new file mode 100644 index 00000000..e2eb8984 --- /dev/null +++ b/aws_batch.config @@ -0,0 +1,36 @@ +/* +======================================================================================== + wmg_nextflow/masterworkflow Nextflow AWS Batch config file +======================================================================================== + Default config options for AWS Batch +---------------------------------------------------------------------------------------- +*/ + + +params { + awsqueue = 'nextflow-with-dockerhub-aws-batch-large' + awsregion = 'us-west-2' + run = 'default' + // Max resource options + max_memory = '256.GB' + max_cpus = 256 + max_time = '240.h' + outdir = "s3://watchmaker-lts/nanoseq/${params.run}/" +} + + + + +process { + executor = 'awsbatch' + queue = 'nextflow-with-dockerhub-aws-batch-large' +} + +aws { + batch { + cliPath = '/home/ec2-user/miniconda/bin/aws' + } + region = 'us-west-2' +} + +workDir = "s3://watchmaker-lts/nanoseq/work/" diff --git a/conf/modules.config b/conf/modules.config index ba13b442..2616aaed 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -535,6 +535,17 @@ if (params.call_variants) { } if (!params.skip_quantification) { + process { + withName: RSEQC_GENEBODYCOVERAGE { + publishDir = [ + path: { "${params.outdir}/rseqc" }, + mode: 'copy', + enabled: true, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + if (params.quantification_method == "bambu") { process { withName: BAMBU { diff --git a/modules/local/rseqc_genebodycoverage.nf b/modules/local/rseqc_genebodycoverage.nf new file mode 100644 index 00000000..346ff230 --- /dev/null +++ b/modules/local/rseqc_genebodycoverage.nf @@ -0,0 +1,33 @@ +process RSEQC_GENEBODYCOVERAGE { + label 'process_high' + container "quay.io/biocontainers/rseqc:3.0.1--py37h516909a_1" + + input: + tuple path(bam), path(bai), path(bed12) + + output: + path("*.pdf") , emit: pdf + path("*.geneBodyCoverage.txt") , emit: rna_txt_ch + path("versions.yml") , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def name = bam.getName().replaceAll(/\.bam$/, '') + + """ + geneBody_coverage.py \\ + $args \\ + --refgene=$bed12 \\ + --input=$bam \\ + --minimum_length=100 \\ + --out-prefix=${name} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(geneBody_coverage.py --version | sed -e "s/geneBody_coverage.py //g") + END_VERSIONS + """ +} diff --git a/subworkflows/local/bedtools_ucsc_bigbed.nf b/subworkflows/local/bedtools_ucsc_bigbed.nf index 0a8d94b3..33216e77 100644 --- a/subworkflows/local/bedtools_ucsc_bigbed.nf +++ b/subworkflows/local/bedtools_ucsc_bigbed.nf @@ -26,6 +26,7 @@ workflow BEDTOOLS_UCSC_BIGBED { emit: bedtools_version + ch_bed12 ch_bigbed bed12tobigbed_version } diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf index ba1c377e..d6fc2975 100644 --- a/workflows/nanoseq.nf +++ b/workflows/nanoseq.nf @@ -105,6 +105,7 @@ include { GET_NANOLYSE_FASTA } from '../modules/local/get_nanolyse_fasta' include { QCAT } from '../modules/local/qcat' include { BAM_RENAME } from '../modules/local/bam_rename' include { BAMBU } from '../modules/local/bambu' +include { RSEQC_GENEBODYCOVERAGE} from '../modules/local/rseqc_genebodycoverage' include { MULTIQC } from '../modules/local/multiqc' /* @@ -387,6 +388,20 @@ workflow NANOSEQ{ ch_featurecounts_gene_multiqc = QUANTIFY_STRINGTIE_FEATURECOUNTS.out.featurecounts_gene_multiqc.ifEmpty([]) ch_featurecounts_transcript_multiqc = QUANTIFY_STRINGTIE_FEATURECOUNTS.out.featurecounts_transcript_multiqc.ifEmpty([]) } + + ch_view_sortbam.subscribe { item -> println "ch_view_sortbam: $item" } + + ch_view_sortbam + .join( BEDTOOLS_UCSC_BIGBED.out.ch_bed12 ) + .map { it -> [ it[3], it[4], it[6] ] } + .set { ch_rseqc } + + ch_rseqc.subscribe { item -> println "ch_rseqc: $item" } + + RSEQC_GENEBODYCOVERAGE ( + ch_rseqc + ) + if (!params.skip_differential_analysis) { /* From 9664705b3029c85f97d1ba0925f1c8297d1123bb Mon Sep 17 00:00:00 2001 From: Eduard Casas <123982193+eduard-watchmakergenomics@users.noreply.github.com> Date: Thu, 17 Apr 2025 11:14:07 -0600 Subject: [PATCH 03/38] Tidying --- workflows/nanoseq.nf | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf index d6fc2975..cbba02ca 100644 --- a/workflows/nanoseq.nf +++ b/workflows/nanoseq.nf @@ -388,18 +388,11 @@ workflow NANOSEQ{ ch_featurecounts_gene_multiqc = QUANTIFY_STRINGTIE_FEATURECOUNTS.out.featurecounts_gene_multiqc.ifEmpty([]) ch_featurecounts_transcript_multiqc = QUANTIFY_STRINGTIE_FEATURECOUNTS.out.featurecounts_transcript_multiqc.ifEmpty([]) } - - ch_view_sortbam.subscribe { item -> println "ch_view_sortbam: $item" } - - ch_view_sortbam - .join( BEDTOOLS_UCSC_BIGBED.out.ch_bed12 ) - .map { it -> [ it[3], it[4], it[6] ] } - .set { ch_rseqc } - - ch_rseqc.subscribe { item -> println "ch_rseqc: $item" } RSEQC_GENEBODYCOVERAGE ( - ch_rseqc + ch_view_sortbam + .join( BEDTOOLS_UCSC_BIGBED.out.ch_bed12 ) + .map { it -> [ it[3], it[4], it[6] ] } ) if (!params.skip_differential_analysis) { From ff89141f3103c98a5031f99414f8f051ca2f13ec Mon Sep 17 00:00:00 2001 From: Eduard Casas <123982193+eduard-watchmakergenomics@users.noreply.github.com> Date: Tue, 22 Apr 2025 14:43:40 -0600 Subject: [PATCH 04/38] using our wmg ecr cache --- modules/local/rseqc_genebodycoverage.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/rseqc_genebodycoverage.nf b/modules/local/rseqc_genebodycoverage.nf index 346ff230..c911a823 100644 --- a/modules/local/rseqc_genebodycoverage.nf +++ b/modules/local/rseqc_genebodycoverage.nf @@ -1,6 +1,6 @@ process RSEQC_GENEBODYCOVERAGE { label 'process_high' - container "quay.io/biocontainers/rseqc:3.0.1--py37h516909a_1" + container "912684371407.dkr.ecr.us-west-2.amazonaws.com/quay.io/biocontainers/rseqc:3.0.1--py37h516909a_1" input: tuple path(bam), path(bai), path(bed12) From 9f3cea8f9e5fd57e2c3b6d85fe22e6d5b63d0c32 Mon Sep 17 00:00:00 2001 From: Dave Matten Date: Thu, 1 May 2025 17:19:39 +0200 Subject: [PATCH 05/38] updated process_medium to process_high in fastqc main --- modules/nf-core/fastqc/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 9ae58381..e4d47b97 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -1,6 +1,6 @@ process FASTQC { tag "$meta.id" - label 'process_medium' + label 'process_high' conda "bioconda::fastqc=0.11.9" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? From 1381f5e07b9f23ee280ca4f0b734f85ceba77bda Mon Sep 17 00:00:00 2001 From: Thomas Harrison Date: Fri, 23 May 2025 14:50:45 -0600 Subject: [PATCH 06/38] first crack at modification --- modules/nf-core/seqtk/main.nf | 58 +++++++++++++++++++++++++++++++++++ workflows/nanoseq.nf | 34 ++++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 modules/nf-core/seqtk/main.nf diff --git a/modules/nf-core/seqtk/main.nf b/modules/nf-core/seqtk/main.nf new file mode 100644 index 00000000..83a03555 --- /dev/null +++ b/modules/nf-core/seqtk/main.nf @@ -0,0 +1,58 @@ +process SEQTK_SAMPLE { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/seqtk:1.4--he4a0461_1' : + 'biocontainers/seqtk:1.4--he4a0461_1' }" + + input: + tuple val(meta), path(reads), val(sample_size) + + output: + tuple val(meta), path("*.fastq.gz"), emit: reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if (!(args ==~ /.*\ -s\ ?[0-9]+.*/)) { + args += " -s100" + } + if ( !sample_size ) { + error "SEQTK/SAMPLE must have a sample_size value included" + } + """ + printf "%s\\n" $reads | while read f; + do + seqtk \\ + sample \\ + $args \\ + \$f \\ + $sample_size \\ + | gzip --no-name > ${prefix}_\$(basename \$f) + done + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + echo "" | gzip > ${prefix}.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ + +} \ No newline at end of file diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf index cbba02ca..9f9f56c6 100644 --- a/workflows/nanoseq.nf +++ b/workflows/nanoseq.nf @@ -89,6 +89,12 @@ if (!params.skip_quantification) { } } +if (params.downsample_depth) { + if (params.downsample_depth < 1 ) { + exit 1, "Invalid downsampling value: ${params.downsample_depth}. Must be greater than 0." + } +} + //////////////////////////////////////////////////// /* -- CONFIG FILES -- */ //////////////////////////////////////////////////// @@ -112,6 +118,7 @@ include { MULTIQC } from '../modules/local/multiqc' * SUBWORKFLOW: Consisting of a mix of local and nf-core/modules */ + include { INPUT_CHECK } from '../subworkflows/local/input_check' include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' include { QCFASTQ_NANOPLOT_FASTQC } from '../subworkflows/local/qcfastq_nanoplot_fastqc' @@ -135,6 +142,7 @@ include { RNA_FUSIONS_JAFFAL } from '../subworkflows/local/rna_fus * MODULE: Installed directly from nf-core/modules */ include { NANOLYSE } from '../modules/nf-core/nanolyse/main' +include { SEQTK_SAMPLE } from '../modules/nf-core/seqtk/main' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' /* @@ -210,6 +218,32 @@ workflow NANOSEQ{ ch_fastq = Channel.empty() } } + // check if we need to do downsampling on ch_fastq if so then do it and update + + if (params.downsample_depth) { + /* + * MODULE: Downsample fastq files using seqtk + */ + ch_fastq + .map { it -> [ it[0], it[1], params.downsample_depth ] } + .set { ch_for_seqtk } + } + + SEQTK_SAMPLE( ch_for_seqtk ) + ch_software_versions = ch_software_versions.mix(SEQTK_SAMPLE.out.versions) + + SEQTK_SAMPLE.out.reads.join(ch_fastq).set{ joined_seqtk} + + // meta, new_reads, barcode, fasta, gtf + + joined_seqtk + .map { it -> [ it[0], it[1], it[3], it[4], it[5], it[6] ] } + .set { ch_fastq } + + + // step one use a map call to filter down the params to just meta and fastq + // step two Running seqtk on the the filtered channel + // step three Join the downsampled fastq with the old fastq channel and creat a output channel that the tools expect if (params.run_nanolyse) { ch_fastq From f7dffeb5df9953b06664229b9b2ac63ee74a87f6 Mon Sep 17 00:00:00 2001 From: Thomas Harrison Date: Fri, 23 May 2025 15:05:06 -0600 Subject: [PATCH 07/38] Have to qualify the docker path --- modules/nf-core/seqtk/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/seqtk/main.nf b/modules/nf-core/seqtk/main.nf index 83a03555..74c1163d 100644 --- a/modules/nf-core/seqtk/main.nf +++ b/modules/nf-core/seqtk/main.nf @@ -5,7 +5,7 @@ process SEQTK_SAMPLE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/seqtk:1.4--he4a0461_1' : - 'biocontainers/seqtk:1.4--he4a0461_1' }" + 'quay.io/biocontainers/seqtk:1.4--he4a0461_1' }" input: tuple val(meta), path(reads), val(sample_size) From 9350b887595c29cc80c5be469722826a953543e2 Mon Sep 17 00:00:00 2001 From: Thomas Harrison Date: Fri, 23 May 2025 15:17:21 -0600 Subject: [PATCH 08/38] moving closig bracket down --- workflows/nanoseq.nf | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf index 9f9f56c6..eaa6451a 100644 --- a/workflows/nanoseq.nf +++ b/workflows/nanoseq.nf @@ -221,29 +221,20 @@ workflow NANOSEQ{ // check if we need to do downsampling on ch_fastq if so then do it and update if (params.downsample_depth) { - /* - * MODULE: Downsample fastq files using seqtk - */ + ch_fastq .map { it -> [ it[0], it[1], params.downsample_depth ] } .set { ch_for_seqtk } - } SEQTK_SAMPLE( ch_for_seqtk ) ch_software_versions = ch_software_versions.mix(SEQTK_SAMPLE.out.versions) SEQTK_SAMPLE.out.reads.join(ch_fastq).set{ joined_seqtk} - // meta, new_reads, barcode, fasta, gtf - joined_seqtk .map { it -> [ it[0], it[1], it[3], it[4], it[5], it[6] ] } .set { ch_fastq } - - - // step one use a map call to filter down the params to just meta and fastq - // step two Running seqtk on the the filtered channel - // step three Join the downsampled fastq with the old fastq channel and creat a output channel that the tools expect + } if (params.run_nanolyse) { ch_fastq @@ -422,10 +413,10 @@ workflow NANOSEQ{ ch_featurecounts_gene_multiqc = QUANTIFY_STRINGTIE_FEATURECOUNTS.out.featurecounts_gene_multiqc.ifEmpty([]) ch_featurecounts_transcript_multiqc = QUANTIFY_STRINGTIE_FEATURECOUNTS.out.featurecounts_transcript_multiqc.ifEmpty([]) } - + RSEQC_GENEBODYCOVERAGE ( ch_view_sortbam - .join( BEDTOOLS_UCSC_BIGBED.out.ch_bed12 ) + .join( BEDTOOLS_UCSC_BIGBED.out.ch_bed12 ) .map { it -> [ it[3], it[4], it[6] ] } ) From eee1d0517d7ddbab6718999a9d7d43e692205aec Mon Sep 17 00:00:00 2001 From: Eduard Casas <123982193+eduard-watchmakergenomics@users.noreply.github.com> Date: Tue, 3 Jun 2025 14:46:27 -0600 Subject: [PATCH 09/38] Added parametres to publish the files --- modules/nf-core/nanoplot/main.nf | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/nf-core/nanoplot/main.nf b/modules/nf-core/nanoplot/main.nf index 45f8d943..d9b1f634 100644 --- a/modules/nf-core/nanoplot/main.nf +++ b/modules/nf-core/nanoplot/main.nf @@ -27,6 +27,9 @@ process NANOPLOT { """ NanoPlot \\ $args \\ + --store \\ + --raw \\ + --tsv_stats \\ -t $task.cpus \\ -p $meta.id \\ --tsv_stats \\ From 38108f373a71995d1efd109bfd706cae193851e2 Mon Sep 17 00:00:00 2001 From: Eduard Casas <123982193+eduard-watchmakergenomics@users.noreply.github.com> Date: Tue, 3 Jun 2025 14:47:28 -0600 Subject: [PATCH 10/38] Added parametres to publish the files --- modules/nf-core/nanoplot/main.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/nf-core/nanoplot/main.nf b/modules/nf-core/nanoplot/main.nf index d9b1f634..dbaa48bc 100644 --- a/modules/nf-core/nanoplot/main.nf +++ b/modules/nf-core/nanoplot/main.nf @@ -29,7 +29,6 @@ process NANOPLOT { $args \\ --store \\ --raw \\ - --tsv_stats \\ -t $task.cpus \\ -p $meta.id \\ --tsv_stats \\ From 1a49de03be1ab9d7fa9d1285bd7fc4d4a5a0c113 Mon Sep 17 00:00:00 2001 From: Thomas Harrison Date: Tue, 8 Jul 2025 08:17:15 -0600 Subject: [PATCH 11/38] First commit of restrander --- conf/modules.config | 12 ++++++++++ modules/local/restrander.nf | 36 +++++++++++++++++++++++++++++ workflows/nanoseq.nf | 45 +++++++++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+) create mode 100644 modules/local/restrander.nf diff --git a/conf/modules.config b/conf/modules.config index 2616aaed..40a066c6 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -45,6 +45,16 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + + // Publish dir for RESTRANDER + withName: RESTRANDER { + publishDir = [ + path: { "${params.outdir}/restrander" }, + mode: 'copy', + enabled: true, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } if (!params.skip_demultiplexing) { @@ -467,6 +477,8 @@ if (params.call_variants) { ] } } + + } if (params.structural_variant_caller == 'sniffles') { process { diff --git a/modules/local/restrander.nf b/modules/local/restrander.nf new file mode 100644 index 00000000..70197c9d --- /dev/null +++ b/modules/local/restrander.nf @@ -0,0 +1,36 @@ +process RESTRANDER { + tag "$meta.id" + label 'process_medium' + + + container "${'912684371407.dkr.ecr.us-west-2.amazonaws.com/restrander:1.0'}" + + input: + tuple val(meta), path(reads), path(input_config) + + output: + tuple val(meta), path("*_restrander.fq.gz"), emit: reads + tuple val(meta), path("*_restrander-unknowns.fq.gz"), emit: unknown_reads + tuple val(meta), path("*.restrander.json"), emit: metrics + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + def prefix = task.ext.prefix ?: "${meta.id}" + + // _restrander-unknowns.fq.gz + + script: + """ + /restrander \\ + ${reads} \\ + ${prefix}_restrander.fq.gz \\ + ${input_config} > ${prefix}.restrander.json + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + restrander: v1.0.1 + END_VERSIONS + """ +} diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf index eaa6451a..20a7e4c3 100644 --- a/workflows/nanoseq.nf +++ b/workflows/nanoseq.nf @@ -143,6 +143,7 @@ include { RNA_FUSIONS_JAFFAL } from '../subworkflows/local/rna_fus */ include { NANOLYSE } from '../modules/nf-core/nanolyse/main' include { SEQTK_SAMPLE } from '../modules/nf-core/seqtk/main' +include { RESTRANDER } from '../modules/local/restrander' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' /* @@ -256,6 +257,7 @@ workflow NANOSEQ{ * MODULE: DNA contaminant removal using NanoLyse */ NANOLYSE ( ch_fastq_nanolyse, ch_nanolyse_fasta ) + NANOLYSE.out.fastq .join( ch_sample ) .map { it -> [ it[0], it[1], it[3], it[4], it[5], it[6] ]} @@ -263,6 +265,46 @@ workflow NANOSEQ{ ch_software_versions = ch_software_versions.mix(NANOLYSE.out.versions.first().ifEmpty(null)) } + // If cDNA then we must run restrander and merge it back with samples + // that restrander cant run on and then bubble dowstream + + /* + 1. We know these are cDNA or direct RNA lets use a branch call to seperate into: + + a. direct RNA where nothing is done + b. cDNA but there is no json config where nothing is done + c. cDNA with json config where we run restrander + + + */ + + if (params.protocol == 'cDNA'){ + ch_fastq.branch{ + config_provided: it[0].restrander_config != null && it[0].restrander_config != '' + no_config: it[0].restrander_config == null || it[0].restrander_config == '' + }.set { ch_fastq_branch } + + ch_fastq_branch.config_provided.map { it -> [ it[0], it[1], it[0].restrander_config] } + .set { ch_fastq_restrander } + + RESTRANDER ( ch_fastq_restrander ) + + // merge restrander fq back with the tuples before restander + // pluck out old fastqs + // merge it back with non-restrandered fastqs + + RESTRANDER.out.reads + .join(ch_fastq_branch.config_provided) + .flatten() + .map { it -> [ it[0], it[1], it[3], it[4], it[5], it[6] ] } + .set { ch_fastq_restrandered } + + ch_fastq_restrandered.mix(ch_fastq_branch.no_config).set { ch_fastq } + + // Also mix in versions and bubble up metrics to somewhere useful + + } + ch_fastqc_multiqc = Channel.empty() if (!params.skip_qc) { @@ -387,6 +429,9 @@ workflow NANOSEQ{ // MULTIPLE_CONDITIONS = ch_sample.map { it -> it[0].split('_')[0..-2].join('_') }.unique().count().val > 1 ch_r_version = Channel.empty() + + + if (params.quantification_method == 'bambu') { ch_sample .map { it -> [ it[2], it[3] ]} From eef5c6e7a104312a183ae8e24054d75e4c9f9d42 Mon Sep 17 00:00:00 2001 From: Thomas Harrison Date: Tue, 8 Jul 2025 08:42:18 -0600 Subject: [PATCH 12/38] squash me --- workflows/nanoseq.nf | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf index 20a7e4c3..3cff6508 100644 --- a/workflows/nanoseq.nf +++ b/workflows/nanoseq.nf @@ -278,15 +278,24 @@ workflow NANOSEQ{ */ + + + ch_fastq.view { "ch_fastq is: ${it}" } if (params.protocol == 'cDNA'){ + + ch_fastq.branch{ config_provided: it[0].restrander_config != null && it[0].restrander_config != '' no_config: it[0].restrander_config == null || it[0].restrander_config == '' }.set { ch_fastq_branch } + ch_fastq_branch.view{ "branch is: ${it}"} + ch_fastq_branch.config_provided.map { it -> [ it[0], it[1], it[0].restrander_config] } .set { ch_fastq_restrander } + + RESTRANDER ( ch_fastq_restrander ) // merge restrander fq back with the tuples before restander From a66497f7130e2bf9affc622229e6707c5cb276af Mon Sep 17 00:00:00 2001 From: Thomas Harrison Date: Tue, 8 Jul 2025 08:44:07 -0600 Subject: [PATCH 13/38] squash me --- workflows/nanoseq.nf | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf index 3cff6508..cee7be0f 100644 --- a/workflows/nanoseq.nf +++ b/workflows/nanoseq.nf @@ -279,8 +279,6 @@ workflow NANOSEQ{ */ - - ch_fastq.view { "ch_fastq is: ${it}" } if (params.protocol == 'cDNA'){ @@ -289,7 +287,7 @@ workflow NANOSEQ{ no_config: it[0].restrander_config == null || it[0].restrander_config == '' }.set { ch_fastq_branch } - ch_fastq_branch.view{ "branch is: ${it}"} + ch_fastq_branch.view() ch_fastq_branch.config_provided.map { it -> [ it[0], it[1], it[0].restrander_config] } .set { ch_fastq_restrander } From 23c3805cf0ed08ac3786902199087435b0da0719 Mon Sep 17 00:00:00 2001 From: Thomas Harrison Date: Tue, 8 Jul 2025 08:45:36 -0600 Subject: [PATCH 14/38] squash me --- workflows/nanoseq.nf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf index cee7be0f..8d5f0c10 100644 --- a/workflows/nanoseq.nf +++ b/workflows/nanoseq.nf @@ -287,7 +287,9 @@ workflow NANOSEQ{ no_config: it[0].restrander_config == null || it[0].restrander_config == '' }.set { ch_fastq_branch } - ch_fastq_branch.view() + ch_fastq_branch.config_provided.view{"Config provided: ${it}"} + + ch_fastq_branch.no_config.view{"No config provided: ${it}"} ch_fastq_branch.config_provided.map { it -> [ it[0], it[1], it[0].restrander_config] } .set { ch_fastq_restrander } From 837a16663752f0eaf6fa57f139ad0de485a7bdfa Mon Sep 17 00:00:00 2001 From: Thomas Harrison Date: Tue, 8 Jul 2025 08:48:18 -0600 Subject: [PATCH 15/38] squash me --- workflows/nanoseq.nf | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf index 8d5f0c10..642a0ce6 100644 --- a/workflows/nanoseq.nf +++ b/workflows/nanoseq.nf @@ -196,6 +196,10 @@ workflow NANOSEQ{ INPUT_CHECK ( ch_input, ch_input_path ) .set { ch_sample } + ch_sample.view{"Sample $it"} + + + if (!params.skip_demultiplexing) { /* @@ -287,9 +291,6 @@ workflow NANOSEQ{ no_config: it[0].restrander_config == null || it[0].restrander_config == '' }.set { ch_fastq_branch } - ch_fastq_branch.config_provided.view{"Config provided: ${it}"} - - ch_fastq_branch.no_config.view{"No config provided: ${it}"} ch_fastq_branch.config_provided.map { it -> [ it[0], it[1], it[0].restrander_config] } .set { ch_fastq_restrander } From f8e714c85df2309a67843200c7b65efb842f02ec Mon Sep 17 00:00:00 2001 From: Thomas Harrison Date: Tue, 8 Jul 2025 08:54:02 -0600 Subject: [PATCH 16/38] squash me --- subworkflows/local/input_check.nf | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index f38041df..56f35cbb 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -30,6 +30,10 @@ def get_sample_info(LinkedHashMap sample, LinkedHashMap genomeMap) { def meta = [:] meta.id = sample.sample + if(sample.restrander_config && sample.restrander_config != '') { + meta.restrander_config = sample.restrander_config + } + // Resolve fasta and gtf file if using iGenomes def fasta = false def gtf = false From 10a6a997ba0cd11f1e22cbeff043eeff4de0a463 Mon Sep 17 00:00:00 2001 From: Thomas Harrison Date: Tue, 8 Jul 2025 08:56:40 -0600 Subject: [PATCH 17/38] squash me --- subworkflows/local/input_check.nf | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 56f35cbb..e7e14c3d 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -29,10 +29,8 @@ workflow INPUT_CHECK { def get_sample_info(LinkedHashMap sample, LinkedHashMap genomeMap) { def meta = [:] meta.id = sample.sample + meta.restrander_config = sample.restrander_config - if(sample.restrander_config && sample.restrander_config != '') { - meta.restrander_config = sample.restrander_config - } // Resolve fasta and gtf file if using iGenomes def fasta = false From 8c72143b561d430ddb42597b77a77f1bd3e8887c Mon Sep 17 00:00:00 2001 From: Thomas Harrison Date: Tue, 8 Jul 2025 08:59:37 -0600 Subject: [PATCH 18/38] squash me --- subworkflows/local/input_check.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index e7e14c3d..ad8f9440 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -27,6 +27,7 @@ workflow INPUT_CHECK { // Function to resolve fasta and gtf file if using iGenomes // Returns [ sample, input_file, barcode, fasta, gtf, is_transcripts, annotation_str, nanopolish_fast5 ] def get_sample_info(LinkedHashMap sample, LinkedHashMap genomeMap) { + print(sample) def meta = [:] meta.id = sample.sample meta.restrander_config = sample.restrander_config From acf6ee26fdb4754a363d531d27b8be69a2b06ee5 Mon Sep 17 00:00:00 2001 From: julietmWM <116255892+julietmWM@users.noreply.github.com> Date: Tue, 22 Jul 2025 16:41:21 +0200 Subject: [PATCH 19/38] Edited check_samplesheet.py so that the restrander config file can be passed through the pipeline. --- bin/check_samplesheet.py | 20 ++++++++++---------- modules/local/restrander.nf | 9 ++++----- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index a10f4479..32a54035 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -49,11 +49,11 @@ def read_head(handle, num_lines=10): def check_samplesheet(file_in, updated_path, file_out): """ This function checks that the samplesheet follows the following structure: - group,replicate,barcode,input_file,fasta,gtf - MCF7,1,,MCF7_directcDNA_replicate1.fastq.gz,genome.fa, - MCF7,2,,MCF7_directcDNA_replicate3.fastq.gz,genome.fa,genome.gtf - K562,1,,K562_directcDNA_replicate1.fastq.gz,genome.fa, - K562,2,,K562_directcDNA_replicate4.fastq.gz,,transcripts.fa + group,replicate,barcode,input_file,fasta,gtf,restrander_config + MCF7,1,,MCF7_directcDNA_replicate1.fastq.gz,genome.fa,,restrander_config.json + MCF7,2,,MCF7_directcDNA_replicate3.fastq.gz,genome.fa,genome.gtf, restrander_config.json + K562,1,,K562_directcDNA_replicate1.fastq.gz,genome.fa,, + K562,2,,K562_directcDNA_replicate4.fastq.gz,,transcripts.fa, """ input_extensions = [] @@ -61,7 +61,7 @@ def check_samplesheet(file_in, updated_path, file_out): with open(file_in, "r") as fin: ## Check header MIN_COLS = 3 - HEADER = ["group", "replicate", "barcode", "input_file", "fasta", "gtf"] + HEADER = ["group", "replicate", "barcode", "input_file", "fasta", "gtf", "restrander_config"] header = fin.readline().strip().split(",") if header[: len(HEADER)] != HEADER: print("ERROR: Please check samplesheet header -> {} != {}".format(",".join(header), ",".join(HEADER))) @@ -80,7 +80,7 @@ def check_samplesheet(file_in, updated_path, file_out): print_error("Invalid number of populated columns (minimum = {})!".format(MIN_COLS), "Line", line) ## Check group name entries - group, replicate, barcode, input_file, fasta, gtf = lspl[: len(HEADER)] + group, replicate, barcode, input_file, fasta, gtf, restrander_config = lspl[: len(HEADER)] if group: if group.find(" ") != -1: print_error("Group entry contains spaces!", "Line", line) @@ -177,8 +177,8 @@ def check_samplesheet(file_in, updated_path, file_out): # is_transcripts = '1' # genome = transcriptome - ## Create sample mapping dictionary = {group: {replicate : [ barcode, input_file, genome, gtf, is_transcripts, nanopolish_fast5 ]}} - sample_info = [barcode, input_file, fasta, gtf, is_transcripts, nanopolish_fast5] + ## Create sample mapping dictionary = {group: {replicate : [ barcode, input_file, genome, gtf, is_transcripts, nanopolish_fast5, restrander_config ]}} + sample_info = [barcode, input_file, fasta, gtf, is_transcripts, nanopolish_fast5, restrander_config] if group not in sample_info_dict: sample_info_dict[group] = {} if replicate not in sample_info_dict[group]: @@ -200,7 +200,7 @@ def check_samplesheet(file_in, updated_path, file_out): make_dir(out_dir) with open(file_out, "w") as fout: fout.write( - ",".join(["sample", "barcode", "input_file", "fasta", "gtf", "is_transcripts", "nanopolish_fast5"]) + ",".join(["sample", "barcode", "input_file", "fasta", "gtf", "is_transcripts", "nanopolish_fast5", "restrander_config"]) + "\n" ) for sample in sorted(sample_info_dict.keys()): diff --git a/modules/local/restrander.nf b/modules/local/restrander.nf index 70197c9d..21ae4e1a 100644 --- a/modules/local/restrander.nf +++ b/modules/local/restrander.nf @@ -2,7 +2,6 @@ process RESTRANDER { tag "$meta.id" label 'process_medium' - container "${'912684371407.dkr.ecr.us-west-2.amazonaws.com/restrander:1.0'}" input: @@ -17,16 +16,16 @@ process RESTRANDER { when: task.ext.when == null || task.ext.when - def prefix = task.ext.prefix ?: "${meta.id}" - // _restrander-unknowns.fq.gz script: """ + prefix=\${task.ext.prefix:-${meta.id}} + /restrander \\ ${reads} \\ - ${prefix}_restrander.fq.gz \\ - ${input_config} > ${prefix}.restrander.json + \${prefix}_restrander.fq.gz \\ + ${input_config} > \${prefix}.restrander.json cat <<-END_VERSIONS > versions.yml "${task.process}": From 6cba1696271857680783730839e3b6b0fc478081 Mon Sep 17 00:00:00 2001 From: julietmWM <116255892+julietmWM@users.noreply.github.com> Date: Tue, 22 Jul 2025 17:53:23 +0200 Subject: [PATCH 20/38] Rebuilt the restrander docker image with Multi_Arch support so changed which container the restrander module was pulling (from 1.0 to 1.1). --- modules/local/restrander.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/restrander.nf b/modules/local/restrander.nf index 21ae4e1a..bd03e7db 100644 --- a/modules/local/restrander.nf +++ b/modules/local/restrander.nf @@ -2,7 +2,7 @@ process RESTRANDER { tag "$meta.id" label 'process_medium' - container "${'912684371407.dkr.ecr.us-west-2.amazonaws.com/restrander:1.0'}" + container "${'912684371407.dkr.ecr.us-west-2.amazonaws.com/restrander:1.1'}" input: tuple val(meta), path(reads), path(input_config) From 1850eef517c2b499417c74522ff1056820348eda Mon Sep 17 00:00:00 2001 From: julietmWM <116255892+julietmWM@users.noreply.github.com> Date: Tue, 22 Jul 2025 18:14:17 +0200 Subject: [PATCH 21/38] Removed /bin/bash ENTRYPOINT from the dockerfile because of a /bin.bash: /bin/bash: cannot execute binary file error. Have updated the pulled image version to 1.2 --- modules/local/restrander.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/restrander.nf b/modules/local/restrander.nf index bd03e7db..068c5f89 100644 --- a/modules/local/restrander.nf +++ b/modules/local/restrander.nf @@ -2,7 +2,7 @@ process RESTRANDER { tag "$meta.id" label 'process_medium' - container "${'912684371407.dkr.ecr.us-west-2.amazonaws.com/restrander:1.1'}" + container "${'912684371407.dkr.ecr.us-west-2.amazonaws.com/restrander:1.2'}" input: tuple val(meta), path(reads), path(input_config) From 03cd574782ef66d32afc51b9496b3c61d6bef15c Mon Sep 17 00:00:00 2001 From: julietmWM <116255892+julietmWM@users.noreply.github.com> Date: Tue, 22 Jul 2025 18:21:15 +0200 Subject: [PATCH 22/38] file prefix issue --- modules/local/restrander.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/restrander.nf b/modules/local/restrander.nf index 068c5f89..cf439f4c 100644 --- a/modules/local/restrander.nf +++ b/modules/local/restrander.nf @@ -16,12 +16,12 @@ process RESTRANDER { when: task.ext.when == null || task.ext.when + + def prefix = task.ext.prefix ?: "${meta.id}" // _restrander-unknowns.fq.gz script: """ - prefix=\${task.ext.prefix:-${meta.id}} - /restrander \\ ${reads} \\ \${prefix}_restrander.fq.gz \\ From 659d2e19b4cac5bb09db20e2910c758bc2afbac3 Mon Sep 17 00:00:00 2001 From: julietmWM <116255892+julietmWM@users.noreply.github.com> Date: Tue, 22 Jul 2025 18:24:42 +0200 Subject: [PATCH 23/38] file prefix issue --- modules/local/restrander.nf | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/modules/local/restrander.nf b/modules/local/restrander.nf index cf439f4c..57d0ba00 100644 --- a/modules/local/restrander.nf +++ b/modules/local/restrander.nf @@ -16,16 +16,15 @@ process RESTRANDER { when: task.ext.when == null || task.ext.when - - def prefix = task.ext.prefix ?: "${meta.id}" // _restrander-unknowns.fq.gz script: + def prefix = task.ext.prefix ?: meta.id """ /restrander \\ ${reads} \\ - \${prefix}_restrander.fq.gz \\ - ${input_config} > \${prefix}.restrander.json + ${prefix}_restrander.fq.gz \\ + ${input_config} > ${prefix}.restrander.json cat <<-END_VERSIONS > versions.yml "${task.process}": From 4e0b087b4a5607fa6c48cf1868bd11dc936b9986 Mon Sep 17 00:00:00 2001 From: julietmWM <116255892+julietmWM@users.noreply.github.com> Date: Mon, 28 Jul 2025 13:20:20 +0200 Subject: [PATCH 24/38] Added line to see the sample sheet before processing. --- workflows/nanoseq.nf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf index 642a0ce6..93e91c07 100644 --- a/workflows/nanoseq.nf +++ b/workflows/nanoseq.nf @@ -190,6 +190,8 @@ workflow NANOSEQ{ */ ch_software_versions = Channel.empty() + ch_input.view{"Input file: $it"} + /* * SUBWORKFLOW: Read in samplesheet, validate and stage input files */ From 5cb5df5b326257b1c945fc8cd2f7f6bc545f38af Mon Sep 17 00:00:00 2001 From: julietmWM <116255892+julietmWM@users.noreply.github.com> Date: Mon, 28 Jul 2025 13:23:12 +0200 Subject: [PATCH 25/38] Tweeking for sample sheet errors. --- workflows/nanoseq.nf | 2 -- 1 file changed, 2 deletions(-) diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf index 93e91c07..642a0ce6 100644 --- a/workflows/nanoseq.nf +++ b/workflows/nanoseq.nf @@ -190,8 +190,6 @@ workflow NANOSEQ{ */ ch_software_versions = Channel.empty() - ch_input.view{"Input file: $it"} - /* * SUBWORKFLOW: Read in samplesheet, validate and stage input files */ From 85e706fc25c53456e5bb5280887132f35e0e61b9 Mon Sep 17 00:00:00 2001 From: julietmWM <116255892+julietmWM@users.noreply.github.com> Date: Mon, 28 Jul 2025 13:30:34 +0200 Subject: [PATCH 26/38] Tweeking for sample sheet errors. --- workflows/nanoseq.nf | 3 +++ 1 file changed, 3 insertions(+) diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf index 642a0ce6..4f0c9174 100644 --- a/workflows/nanoseq.nf +++ b/workflows/nanoseq.nf @@ -196,6 +196,7 @@ workflow NANOSEQ{ INPUT_CHECK ( ch_input, ch_input_path ) .set { ch_sample } + // TODO: must remove line below ch_sample.view{"Sample $it"} @@ -318,6 +319,8 @@ workflow NANOSEQ{ ch_fastqc_multiqc = Channel.empty() if (!params.skip_qc) { + ch_fastq.view{"Sample $it"} + /* * SUBWORKFLOW: Fastq QC with Nanoplot and fastqc */ From fba0a089db8fa9de76828c28148a2832ae840a14 Mon Sep 17 00:00:00 2001 From: julietmWM <116255892+julietmWM@users.noreply.github.com> Date: Mon, 28 Jul 2025 13:32:26 +0200 Subject: [PATCH 27/38] Tweeking for sample sheet errors. --- workflows/nanoseq.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf index 4f0c9174..f1809be8 100644 --- a/workflows/nanoseq.nf +++ b/workflows/nanoseq.nf @@ -319,7 +319,7 @@ workflow NANOSEQ{ ch_fastqc_multiqc = Channel.empty() if (!params.skip_qc) { - ch_fastq.view{"Sample $it"} + ch_fastq.view{"fastq $it"} /* * SUBWORKFLOW: Fastq QC with Nanoplot and fastqc From 62c02d526837c77d49868d0970d1ddbab34e71ea Mon Sep 17 00:00:00 2001 From: julietmWM <116255892+julietmWM@users.noreply.github.com> Date: Mon, 28 Jul 2025 13:48:34 +0200 Subject: [PATCH 28/38] Tweeking for sample sheet errors. --- workflows/nanoseq.nf | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf index f1809be8..2d819f85 100644 --- a/workflows/nanoseq.nf +++ b/workflows/nanoseq.nf @@ -304,12 +304,17 @@ workflow NANOSEQ{ // pluck out old fastqs // merge it back with non-restrandered fastqs + ch_fastq_branch.config_provided.view { "FASTQ branch: $it" } + RESTRANDER.out.reads .join(ch_fastq_branch.config_provided) .flatten() .map { it -> [ it[0], it[1], it[3], it[4], it[5], it[6] ] } .set { ch_fastq_restrandered } + // TODO: remove this + ch_fastq_restrandered.view { "Restrandered FASTQ: $it" } + ch_fastq_restrandered.mix(ch_fastq_branch.no_config).set { ch_fastq } // Also mix in versions and bubble up metrics to somewhere useful From 43e03327b261879e7319c3bdc5cc73e1c14da207 Mon Sep 17 00:00:00 2001 From: mark-alence-watchmaker Date: Tue, 29 Jul 2025 13:53:50 +0000 Subject: [PATCH 29/38] Fixing tuple structure propagation issues --- modules/local/restrander.nf | 4 +- modules/nf-core/nanoplot/main.nf | 2 +- subworkflows/local/align_minimap2.nf | 4 +- subworkflows/local/prepare_genome.nf | 14 +++--- workflows/nanoseq.nf | 70 ++++++++++++++-------------- 5 files changed, 47 insertions(+), 47 deletions(-) diff --git a/modules/local/restrander.nf b/modules/local/restrander.nf index 57d0ba00..db338dac 100644 --- a/modules/local/restrander.nf +++ b/modules/local/restrander.nf @@ -9,7 +9,7 @@ process RESTRANDER { output: tuple val(meta), path("*_restrander.fq.gz"), emit: reads - tuple val(meta), path("*_restrander-unknowns.fq.gz"), emit: unknown_reads + // tuple val(meta), path("*_restrander-unknowns.fq.gz"), emit: unknown_reads tuple val(meta), path("*.restrander.json"), emit: metrics path "versions.yml" , emit: versions @@ -19,7 +19,7 @@ process RESTRANDER { // _restrander-unknowns.fq.gz script: - def prefix = task.ext.prefix ?: meta.id + def prefix = task.ext.prefix ?: reads.getBaseName() """ /restrander \\ ${reads} \\ diff --git a/modules/nf-core/nanoplot/main.nf b/modules/nf-core/nanoplot/main.nf index dbaa48bc..7ba90a8b 100644 --- a/modules/nf-core/nanoplot/main.nf +++ b/modules/nf-core/nanoplot/main.nf @@ -22,7 +22,7 @@ process NANOPLOT { script: def args = task.ext.args ?: '' - def input_file = ("$ontfile".endsWith(".fastq.gz")) ? "--fastq ${ontfile}" : + def input_file = ("$ontfile".endsWith(".fastq.gz") || "$ontfile".endsWith(".fq.gz") || "$ontfile".endsWith(".fastq") || "$ontfile".endsWith(".fq")) ? "--fastq ${ontfile}" : ("$ontfile".endsWith(".txt")) ? "--summary ${ontfile}" : '' """ NanoPlot \\ diff --git a/subworkflows/local/align_minimap2.nf b/subworkflows/local/align_minimap2.nf index 70693a8a..ed0a94b8 100644 --- a/subworkflows/local/align_minimap2.nf +++ b/subworkflows/local/align_minimap2.nf @@ -21,8 +21,8 @@ workflow ALIGN_MINIMAP2 { ch_index .cross(ch_fastq) { it -> it[-1] } .flatten() - .collate(13) - .map { it -> [ it[7], it[8], it[0], it[1], it[2], it[3], it[4], it[5] ] } // [ sample, fastq, fasta, sizes, gtf, bed, is_transcripts, index ] + .collate(14) + .map { it -> [ it[7], it[8], it[0], it[1], it[2], it[3], it[4], it[5] ] } // [ meta, fastq, fasta, sizes, gtf, bed, is_transcripts, index ] .set { ch_index } /* diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index e6047cc0..189a9642 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -11,10 +11,10 @@ workflow PREPARE_GENOME { ch_fastq main: - // Get unique list of all fasta files + // Get unique list of all fasta files - reference FASTA is at position 2 ch_fastq .filter { it[2] } - .map { it -> [ it[2], it[5].toString() ] } // [ fasta, annotation_str ] + .map { it -> [ it[2], it[6].toString() ] } // [ fasta, annotation_str ] .unique() .set { ch_fastq_sizes } @@ -25,10 +25,10 @@ workflow PREPARE_GENOME { ch_chrom_sizes = GET_CHROM_SIZES.out.sizes samtools_version = GET_CHROM_SIZES.out.versions - // Get unique list of all gtf files + // Get unique list of all gtf files - GTF is at position 3 in the tuple ch_fastq .filter { it[3] } - .map { it -> [ it[3], it[5] ] } // [ gtf, annotation_str ] + .map { it -> [ it[3], it[6] ] } // [ gtf, annotation_str ] .unique() .set { ch_fastq_gtf } @@ -44,8 +44,8 @@ workflow PREPARE_GENOME { .map { it -> [ it[1], it[2], it[0] ] } .cross(ch_fastq) { it -> it[-1] } .flatten() - .collate(9) - .map { it -> [ it[5], it[0], it[6], it[1], it[7], it[8] ]} // [ fasta, sizes, gtf, bed, is_transcripts, annotation_str ] + .collate(10) + .map { it -> [ it[5], it[0], it[6], it[1], it[8], it[9] ]} // [ fasta, sizes, gtf, bed, is_transcripts, annotation_str ] .unique() .set { ch_fasta_index } @@ -54,7 +54,7 @@ workflow PREPARE_GENOME { */ ch_fastq .filter { it[2] } - .map { it -> [ it[0], it[2] ] } // [ gtf, annotation_str ] + .map { it -> [ it[0], it[2] ] } // [ meta, fasta ] .unique() .set { ch_fasta } diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf index 2d819f85..9317e34f 100644 --- a/workflows/nanoseq.nf +++ b/workflows/nanoseq.nf @@ -284,42 +284,42 @@ workflow NANOSEQ{ */ - if (params.protocol == 'cDNA'){ - - - ch_fastq.branch{ - config_provided: it[0].restrander_config != null && it[0].restrander_config != '' - no_config: it[0].restrander_config == null || it[0].restrander_config == '' - }.set { ch_fastq_branch } - - - ch_fastq_branch.config_provided.map { it -> [ it[0], it[1], it[0].restrander_config] } - .set { ch_fastq_restrander } - - - - RESTRANDER ( ch_fastq_restrander ) - - // merge restrander fq back with the tuples before restander - // pluck out old fastqs - // merge it back with non-restrandered fastqs - - ch_fastq_branch.config_provided.view { "FASTQ branch: $it" } - - RESTRANDER.out.reads - .join(ch_fastq_branch.config_provided) - .flatten() - .map { it -> [ it[0], it[1], it[3], it[4], it[5], it[6] ] } - .set { ch_fastq_restrandered } - - // TODO: remove this - ch_fastq_restrandered.view { "Restrandered FASTQ: $it" } - - ch_fastq_restrandered.mix(ch_fastq_branch.no_config).set { ch_fastq } - - // Also mix in versions and bubble up metrics to somewhere useful - +if (params.protocol == 'cDNA'){ + + ch_fastq.branch{ + config_provided: it[0].restrander_config != null && it[0].restrander_config != '' + no_config: it[0].restrander_config == null || it[0].restrander_config == '' + }.set { ch_fastq_branch } + + ch_fastq_branch.config_provided + .map { it -> [ it[0], it[1], it[0].restrander_config] } + .set { ch_fastq_restrander } + + RESTRANDER ( ch_fastq_restrander ) + + RESTRANDER.out.reads + .join(ch_fastq_branch.config_provided, by: 0) + .map { tuple -> + println "=== DEBUGGING TUPLE STRUCTURE ===" + println "Tuple size: ${tuple.size()}" + tuple.eachWithIndex { item, index -> + println " tuple[$index] = $item (${item.getClass().getSimpleName()})" + } + println "=================================" + + def meta = tuple[0] + def restranded_files = tuple[1] + def main_restranded_file = restranded_files[1] + + def gtf_file = tuple[6].toString().split(';')[1] // Extract GTF from combined string + [ meta, main_restranded_file, tuple[3], gtf_file, tuple[4], tuple[5], tuple[6] ] } + .view { "After RESTRANDER processing: $it" } + .mix(ch_fastq_branch.no_config) + .set { ch_fastq } + + ch_software_versions = ch_software_versions.mix(RESTRANDER.out.versions.first().ifEmpty(null)) +} ch_fastqc_multiqc = Channel.empty() if (!params.skip_qc) { From 46055772fc4838b5b54d6baa7aedb9eae5cc6d83 Mon Sep 17 00:00:00 2001 From: julietmWM <116255892+julietmWM@users.noreply.github.com> Date: Tue, 5 Aug 2025 12:03:43 +0200 Subject: [PATCH 30/38] Cleaning up the Restrander-related code and adding some comments. --- modules/local/restrander.nf | 9 +++------ workflows/nanoseq.nf | 32 ++++++++------------------------ 2 files changed, 11 insertions(+), 30 deletions(-) diff --git a/modules/local/restrander.nf b/modules/local/restrander.nf index db338dac..0ea848d3 100644 --- a/modules/local/restrander.nf +++ b/modules/local/restrander.nf @@ -8,16 +8,13 @@ process RESTRANDER { tuple val(meta), path(reads), path(input_config) output: - tuple val(meta), path("*_restrander.fq.gz"), emit: reads - // tuple val(meta), path("*_restrander-unknowns.fq.gz"), emit: unknown_reads - tuple val(meta), path("*.restrander.json"), emit: metrics - path "versions.yml" , emit: versions + tuple val(meta), path("*_restrander.fq.gz") , emit: reads + tuple val(meta), path("*.restrander.json") , emit: metrics + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when - // _restrander-unknowns.fq.gz - script: def prefix = task.ext.prefix ?: reads.getBaseName() """ diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf index 9317e34f..5d08b205 100644 --- a/workflows/nanoseq.nf +++ b/workflows/nanoseq.nf @@ -270,51 +270,35 @@ workflow NANOSEQ{ ch_software_versions = ch_software_versions.mix(NANOLYSE.out.versions.first().ifEmpty(null)) } - // If cDNA then we must run restrander and merge it back with samples - // that restrander cant run on and then bubble dowstream - - /* - 1. We know these are cDNA or direct RNA lets use a branch call to seperate into: - - a. direct RNA where nothing is done - b. cDNA but there is no json config where nothing is done - c. cDNA with json config where we run restrander - - - */ - - if (params.protocol == 'cDNA'){ + // split the fastq channel into two branches - samples with and without restrander_config ch_fastq.branch{ config_provided: it[0].restrander_config != null && it[0].restrander_config != '' no_config: it[0].restrander_config == null || it[0].restrander_config == '' }.set { ch_fastq_branch } + // only run Restrander on the branch with config provided ch_fastq_branch.config_provided .map { it -> [ it[0], it[1], it[0].restrander_config] } .set { ch_fastq_restrander } + /* + * MODULE: Orientate and quality check cDNA reads with Restrander + */ RESTRANDER ( ch_fastq_restrander ) RESTRANDER.out.reads .join(ch_fastq_branch.config_provided, by: 0) - .map { tuple -> - println "=== DEBUGGING TUPLE STRUCTURE ===" - println "Tuple size: ${tuple.size()}" - tuple.eachWithIndex { item, index -> - println " tuple[$index] = $item (${item.getClass().getSimpleName()})" - } - println "=================================" - + .map { tuple -> def meta = tuple[0] def restranded_files = tuple[1] def main_restranded_file = restranded_files[1] - + def gtf_file = tuple[6].toString().split(';')[1] // Extract GTF from combined string [ meta, main_restranded_file, tuple[3], gtf_file, tuple[4], tuple[5], tuple[6] ] } - .view { "After RESTRANDER processing: $it" } + // merge the restranded files with the rest of the fastq files .mix(ch_fastq_branch.no_config) .set { ch_fastq } From d853ab61fe115275318f73e15e3906aeeb771717 Mon Sep 17 00:00:00 2001 From: julietmWM <116255892+julietmWM@users.noreply.github.com> Date: Tue, 5 Aug 2025 12:10:50 +0200 Subject: [PATCH 31/38] Removing unnecessary .view statements. --- workflows/nanoseq.nf | 7 ------- 1 file changed, 7 deletions(-) diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf index 5d08b205..1dc15f3d 100644 --- a/workflows/nanoseq.nf +++ b/workflows/nanoseq.nf @@ -196,11 +196,6 @@ workflow NANOSEQ{ INPUT_CHECK ( ch_input, ch_input_path ) .set { ch_sample } - // TODO: must remove line below - ch_sample.view{"Sample $it"} - - - if (!params.skip_demultiplexing) { /* @@ -308,8 +303,6 @@ if (params.protocol == 'cDNA'){ ch_fastqc_multiqc = Channel.empty() if (!params.skip_qc) { - ch_fastq.view{"fastq $it"} - /* * SUBWORKFLOW: Fastq QC with Nanoplot and fastqc */ From 63bcfd495a975b4c45e57df0c45fe18d48c0b82b Mon Sep 17 00:00:00 2001 From: julietmWM <116255892+julietmWM@users.noreply.github.com> Date: Tue, 5 Aug 2025 12:22:48 +0200 Subject: [PATCH 32/38] More cleaning. --- workflows/nanoseq.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf index 1dc15f3d..8ecea91d 100644 --- a/workflows/nanoseq.nf +++ b/workflows/nanoseq.nf @@ -113,6 +113,7 @@ include { BAM_RENAME } from '../modules/local/bam_rename' include { BAMBU } from '../modules/local/bambu' include { RSEQC_GENEBODYCOVERAGE} from '../modules/local/rseqc_genebodycoverage' include { MULTIQC } from '../modules/local/multiqc' +include { RESTRANDER } from '../modules/local/restrander' /* * SUBWORKFLOW: Consisting of a mix of local and nf-core/modules @@ -143,7 +144,6 @@ include { RNA_FUSIONS_JAFFAL } from '../subworkflows/local/rna_fus */ include { NANOLYSE } from '../modules/nf-core/nanolyse/main' include { SEQTK_SAMPLE } from '../modules/nf-core/seqtk/main' -include { RESTRANDER } from '../modules/local/restrander' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' /* From bbe6ea636ac0ffa6950f5b34ea5fd3132d215fa2 Mon Sep 17 00:00:00 2001 From: julietmWM <116255892+julietmWM@users.noreply.github.com> Date: Mon, 11 Aug 2025 12:30:28 +0200 Subject: [PATCH 33/38] Debugging non-restrander run errors. --- workflows/nanoseq.nf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf index 8ecea91d..cb86f2e4 100644 --- a/workflows/nanoseq.nf +++ b/workflows/nanoseq.nf @@ -314,6 +314,8 @@ if (params.protocol == 'cDNA'){ ch_samtools_multiqc = Channel.empty() if (!params.skip_alignment) { + ch_fastq.view() + /* * SUBWORKFLOW: Make chromosome size file and covert GTF to BED12 */ From 5c0e5cce9244c7f6723797039051842ee5b59390 Mon Sep 17 00:00:00 2001 From: julietmWM <116255892+julietmWM@users.noreply.github.com> Date: Mon, 11 Aug 2025 13:51:36 +0200 Subject: [PATCH 34/38] Debugging non-restrander run errors. --- workflows/nanoseq.nf | 65 +++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf index cb86f2e4..2738eda2 100644 --- a/workflows/nanoseq.nf +++ b/workflows/nanoseq.nf @@ -265,40 +265,49 @@ workflow NANOSEQ{ ch_software_versions = ch_software_versions.mix(NANOLYSE.out.versions.first().ifEmpty(null)) } -if (params.protocol == 'cDNA'){ + if (params.protocol == 'cDNA'){ - // split the fastq channel into two branches - samples with and without restrander_config - ch_fastq.branch{ - config_provided: it[0].restrander_config != null && it[0].restrander_config != '' - no_config: it[0].restrander_config == null || it[0].restrander_config == '' - }.set { ch_fastq_branch } + // split the fastq channel into two branches - samples with and without restrander_config + ch_fastq.branch{ + config_provided: it[0].restrander_config != null && it[0].restrander_config != '' + no_config: it[0].restrander_config == null || it[0].restrander_config == '' + }.set { ch_fastq_branch } - // only run Restrander on the branch with config provided - ch_fastq_branch.config_provided - .map { it -> [ it[0], it[1], it[0].restrander_config] } - .set { ch_fastq_restrander } + // only run Restrander on the branch with config provided + ch_fastq_branch.config_provided + .map { it -> [ it[0], it[1], it[0].restrander_config] } + .set { ch_fastq_restrander } - /* - * MODULE: Orientate and quality check cDNA reads with Restrander - */ - RESTRANDER ( ch_fastq_restrander ) + /* + * MODULE: Orientate and quality check cDNA reads with Restrander + */ + RESTRANDER ( ch_fastq_restrander ) + + RESTRANDER.out.reads + .join(ch_fastq_branch.config_provided, by: 0) + .map { tuple -> + def meta = tuple[0] + def restranded_files = tuple[1] + def main_restranded_file = restranded_files[1] - RESTRANDER.out.reads - .join(ch_fastq_branch.config_provided, by: 0) - .map { tuple -> - def meta = tuple[0] - def restranded_files = tuple[1] - def main_restranded_file = restranded_files[1] - def gtf_file = tuple[6].toString().split(';')[1] // Extract GTF from combined string - [ meta, main_restranded_file, tuple[3], gtf_file, tuple[4], tuple[5], tuple[6] ] + [ meta, main_restranded_file, tuple[3], tuple[4], tuple[5], tuple[6] ] + } + // merge the restranded files with the rest of the fastq files + .mix(ch_fastq_branch.no_config) + .set { ch_fastq } + + ch_software_versions = ch_software_versions.mix(RESTRANDER.out.versions.first().ifEmpty(null)) } - // merge the restranded files with the rest of the fastq files - .mix(ch_fastq_branch.no_config) - .set { ch_fastq } + ch_fastq.view() - ch_software_versions = ch_software_versions.mix(RESTRANDER.out.versions.first().ifEmpty(null)) -} + + def gtf_file = ch_fastq[6].toString().split(';')[1] // Extract GTF from combined string + ch_fastq + .map { it -> [ it[0], it[1], it[2], gtf_file, it[3], it[4], it[5], it[6] ] } + .set { ch_fastq } + + ch_fastq.view() ch_fastqc_multiqc = Channel.empty() if (!params.skip_qc) { @@ -314,7 +323,7 @@ if (params.protocol == 'cDNA'){ ch_samtools_multiqc = Channel.empty() if (!params.skip_alignment) { - ch_fastq.view() + //ch_fastq.view() /* * SUBWORKFLOW: Make chromosome size file and covert GTF to BED12 From fc70be5ba0158ff586e2c6a732954c088c41b345 Mon Sep 17 00:00:00 2001 From: julietmWM <116255892+julietmWM@users.noreply.github.com> Date: Mon, 11 Aug 2025 13:53:33 +0200 Subject: [PATCH 35/38] Debugging non-restrander run errors. --- workflows/nanoseq.nf | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf index 2738eda2..a23d9978 100644 --- a/workflows/nanoseq.nf +++ b/workflows/nanoseq.nf @@ -301,10 +301,9 @@ workflow NANOSEQ{ } ch_fastq.view() - - def gtf_file = ch_fastq[6].toString().split(';')[1] // Extract GTF from combined string ch_fastq - .map { it -> [ it[0], it[1], it[2], gtf_file, it[3], it[4], it[5], it[6] ] } + .map { it -> [ it[0], it[1], it[2], it[6].toString().split(';')[1], // Extract GTF from combined string + it[3], it[4], it[5], it[6] ] } .set { ch_fastq } ch_fastq.view() From d5b1ac4aa0b4eceb643cc80f74a774fddbd0f426 Mon Sep 17 00:00:00 2001 From: julietmWM <116255892+julietmWM@users.noreply.github.com> Date: Tue, 12 Aug 2025 11:26:50 +0200 Subject: [PATCH 36/38] Debugging non-restrander run errors. --- workflows/nanoseq.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf index a23d9978..1796752d 100644 --- a/workflows/nanoseq.nf +++ b/workflows/nanoseq.nf @@ -302,8 +302,8 @@ workflow NANOSEQ{ ch_fastq.view() ch_fastq - .map { it -> [ it[0], it[1], it[2], it[6].toString().split(';')[1], // Extract GTF from combined string - it[3], it[4], it[5], it[6] ] } + .map { it -> [ it[0], it[1], it[2], it[5].toString().split(';')[1], // Extract GTF from combined string + it[3], it[4], it[5] ] } .set { ch_fastq } ch_fastq.view() From 68dcbc3e7cba04f6f9cf2233ec41c92d9d5903e3 Mon Sep 17 00:00:00 2001 From: julietmWM <116255892+julietmWM@users.noreply.github.com> Date: Tue, 12 Aug 2025 15:03:45 +0200 Subject: [PATCH 37/38] The bug was fixed with the last commit. Now just cleaning up the code. --- workflows/nanoseq.nf | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf index 1796752d..e20583e1 100644 --- a/workflows/nanoseq.nf +++ b/workflows/nanoseq.nf @@ -299,15 +299,13 @@ workflow NANOSEQ{ ch_software_versions = ch_software_versions.mix(RESTRANDER.out.versions.first().ifEmpty(null)) } - ch_fastq.view() + // Extract the GTF file from combined string, add it as own element in the channel ch_fastq - .map { it -> [ it[0], it[1], it[2], it[5].toString().split(';')[1], // Extract GTF from combined string + .map { it -> [ it[0], it[1], it[2], it[5].toString().split(';')[1], it[3], it[4], it[5] ] } .set { ch_fastq } - ch_fastq.view() - ch_fastqc_multiqc = Channel.empty() if (!params.skip_qc) { @@ -322,8 +320,6 @@ workflow NANOSEQ{ ch_samtools_multiqc = Channel.empty() if (!params.skip_alignment) { - //ch_fastq.view() - /* * SUBWORKFLOW: Make chromosome size file and covert GTF to BED12 */ From cc65ad186ab1aecda9981e044459087c7654c990 Mon Sep 17 00:00:00 2001 From: julietmWM <116255892+julietmWM@users.noreply.github.com> Date: Tue, 12 Aug 2025 16:01:40 +0200 Subject: [PATCH 38/38] Added Restrander information to the usage and output docs. --- docs/output.md | 17 ++++++++++++ docs/usage.md | 71 ++++++++++++++++++++++++++++++-------------------- 2 files changed, 60 insertions(+), 28 deletions(-) diff --git a/docs/output.md b/docs/output.md index 42a1a00c..5cb7af6b 100644 --- a/docs/output.md +++ b/docs/output.md @@ -46,6 +46,23 @@ _Documentation_: _Description_: If you would like to run NanoLyse on the raw FASTQ files you can provide `--run_nanolyse` when running the pipeline. By default, the pipeline will filter lambda phage reads. However, you can provide your own FASTA file of "contaminants" with `--nanolyse_fasta`. The filtered FASTQ files will contain raw reads without the specified reference sequences (default: lambda phage sequences). +## cDNA Read Orientation + +
+Output files + +- `restrander/_restrander.fq.gz`: FASTQ file of the stranded reads. The reverse strand reads are replaced with their reverse-complements, ensuring that all reads in the output have the same orientation as the original transcripts. +- `restrander/-unknowns.*_restrander.fq.gz`: FASTQ file of the reads whose strand could not be inferred. +- `restrander/.restrander.json`: Restrander output statistics - includes artefact and strand statistics. + +
+ +_Documentation_: +[Restrander](https://github.com/mritchielab/restrander) + +_Description_: +Restrander is a program designed for orienting and quality-checking cDNA sequencing reads. Restrander will run automatically if the protocol is cDNA and a Restrander config file is present in the sample sheet. + ## Read QC
diff --git a/docs/usage.md b/docs/usage.md index 97dd0a5a..2de4586e 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -10,12 +10,13 @@ You will need to create a file with information about the samples in your experi | Column | Description | | ------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `group` | Group identifier for sample. This will be identical for replicate samples from the same experimental group. | -| `replicate` | Integer representing replicate number. Must start from `1..`. | -| `barcode` | Barcode identifier attributed to that sample during multiplexing. Must be an integer. | -| `input_file` | Full path to FastQ file if previously demultiplexed, BAM file if previously aligned, or a path to a directory with subdirectories containing fastq or fast5 files. FastQ file has to be zipped and have the extension ".fastq.gz" or ".fq.gz". BAM file has to have the extension ".bam". | -| `fasta` | Genome fasta file or transcriptome fasta file for alignment. This can either be a local path, or the appropriate key for a genome available in [iGenomes config file](../conf/igenomes.config). Must have the extension ".fasta", ".fasta.gz", ".fa" or ".fa.gz". | -| `gtf` | Annotation gtf file for transcript discovery and quantification and RNA modification detection. This can either be blank or a local path. Must have the extension ".gtf". | +| `group` | Group identifier for sample. This will be identical for replicate samples from the same experimental group. | +| `replicate` | Integer representing replicate number. Must start from `1..`. | +| `barcode` | Barcode identifier attributed to that sample during multiplexing. Must be an integer. | +| `input_file` | Full path to FastQ file if previously demultiplexed, BAM file if previously aligned, or a path to a directory with subdirectories containing fastq or fast5 files. FastQ file has to be zipped and have the extension ".fastq.gz" or ".fq.gz". BAM file has to have the extension ".bam". | +| `fasta` | Genome fasta file or transcriptome fasta file for alignment. This can either be a local path, or the appropriate key for a genome available in [iGenomes config file](../conf/igenomes.config). Must have the extension ".fasta", ".fasta.gz", ".fa" or ".fa.gz". | +| `gtf` | Annotation gtf file for transcript discovery and quantification and RNA modification detection. This can either be blank or a local path. Must have the extension ".gtf". | +| `restrander_config` | Restrander .json config file that provides the template-switching oligo (TSO) and reverse transcription primer (RTP) sequences. Different configurations are used for different library preparation protocols. This can either be blank or a file path. If blank, Restrander will not run for the sample. | ### Skip demultiplexing @@ -26,13 +27,13 @@ As shown in the examples below, the accepted samplesheet format is different dep ##### Example `samplesheet.csv` for non-demultiplexed fastq inputs ```bash -group,replicate,barcode,input_file,fasta,gtf -WT_MOUSE,1,1,,mm10, -WT_HUMAN,1,2,,hg19, -WT_POMBE,1,3,,/path/to/local/genome.fa, -WT_DENOVO,1,4,,,/path/to/local/transcriptome.fa -WT_LOCAL,2,5,,/path/to/local/genome.fa,/path/to/local/transcriptome.gtf -WT_UNKNOWN,3,6,,, +group,replicate,barcode,input_file,fasta,gtf,restrander_config +WT_MOUSE,1,1,,mm10,, +WT_HUMAN,1,2,,hg19,, +WT_POMBE,1,3,,/path/to/local/genome.fa,, +WT_DENOVO,1,4,,,/path/to/local/transcriptome.fa, +WT_LOCAL,2,5,,/path/to/local/genome.fa,/path/to/local/transcriptome.gtf, +WT_UNKNOWN,3,6,,,, ``` ##### Example command for non-demultiplexed fastq inputs @@ -52,11 +53,11 @@ nextflow run nf-core/nanoseq \ ##### Example `samplesheet.csv` for demultiplexed fastq inputs ```bash -group,replicate,barcode,input_file,fasta,gtf -WT,1,,SAM101A1.fastq.gz,hg19, -WT,2,,SAM101A2.fastq.gz,hg19, -KO,1,,SAM101A3.fastq.gz,hg19, -KO,2,,SAM101A4.fastq.gz,hg19, +group,replicate,barcode,input_file,fasta,gtf,restrander_config +WT,1,,SAM101A1.fastq.gz,hg19,, +WT,2,,SAM101A2.fastq.gz,hg19,, +KO,1,,SAM101A3.fastq.gz,hg19,, +KO,2,,SAM101A4.fastq.gz,hg19,, ``` ##### Example command for demultiplexed fastq inputs @@ -74,11 +75,11 @@ nextflow run nf-core/nanoseq \ ##### Example `samplesheet.csv` for BAM inputs ```bash -group,replicate,barcode,input_file,fasta,gtf -WT,1,,SAM101A1.bam,hg19, -WT,2,,SAM101A2.bam,hg19, -KO,1,,SAM101A3.bam,hg19, -KO,2,,SAM101A4.bam,hg19, +group,replicate,barcode,input_file,fasta,gtf,restrander_config +WT,1,,SAM101A1.bam,hg19,, +WT,2,,SAM101A2.bam,hg19,, +KO,1,,SAM101A3.bam,hg19,, +KO,2,,SAM101A4.bam,hg19,, ``` ##### Example command for BAM inputs @@ -97,11 +98,11 @@ nextflow run nf-core/nanoseq \ ##### Example `samplesheet.csv` for FAST5 and FASTQ input directories ```bash -group,replicate,barcode,input_file,fasta,gtf -WT,1,,/full/path/to/SAM101A1/,hg19.fasta,hg19.gtf -WT,2,,/full/path/to/SAM101A2/,hg19.fasta,hg19.gtf -KO,1,,/full/path/to/SAM101A3/,hg19.fasta,hg19.gtf -KO,2,,/full/path/to/SAM101A4/,hg19.fasta,hg19.gtf +group,replicate,barcode,input_file,fasta,gtf,restrander_config +WT,1,,/full/path/to/SAM101A1/,hg19.fasta,hg19.gtf, +WT,2,,/full/path/to/SAM101A2/,hg19.fasta,hg19.gtf, +KO,1,,/full/path/to/SAM101A3/,hg19.fasta,hg19.gtf, +KO,2,,/full/path/to/SAM101A4/,hg19.fasta,hg19.gtf, ``` ##### Each of the FAST5 and FASTQ input directory should have the following structure: @@ -128,6 +129,20 @@ nextflow run nf-core/nanoseq \ -profile ``` +### Using Restrander + +Restrander is a program used for orienting and quality-checking cDNA sequencing reads. Restrander will automatically run if the protocol is cDNA and a Restrander config file is present in the sample sheet. Examples of Restrander configuration files for several protocols can be found in the [README](https://github.com/jakob-schuster/restrander-vignette?tab=readme-ov-file#configuration-files) for the Restrander vignette. The sample sheet can have a mix of samples with and without Restrander config files. + +##### Example `samplesheet.csv` for using Restrander + +```bash +group,replicate,barcode,input_file,fasta,gtf,restrander_config +WT,1,1,/full/path/to/SAM101A1/,hg19,hg19.gtf, +WT,2,2,/full/path/to/SAM101A2/,hg19,hg19.gtf, +KO,1,3,/full/path/to/SAM101A3/,hg19,hg19.gtf,PCB109.json +KO,2,4,/full/path/to/SAM101A4/,hg19,hg19.gtf,PCB109.json +``` + ## Running the pipeline The typical command for running the pipeline is as follows: