From 589ff090a1fc5e6233c37a92e5f6eb5f689ef01d Mon Sep 17 00:00:00 2001
From: Thomas Harrison <thomas.harrison@watchmakergenomics.com>
Date: Wed, 9 Apr 2025 15:38:50 -0600
Subject: [PATCH 01/38] Adding in nano plot updates

---
 modules/nf-core/nanoplot/main.nf | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/modules/nf-core/nanoplot/main.nf b/modules/nf-core/nanoplot/main.nf
index ca0d8454..45f8d943 100644
--- a/modules/nf-core/nanoplot/main.nf
+++ b/modules/nf-core/nanoplot/main.nf
@@ -28,6 +28,8 @@ process NANOPLOT {
     NanoPlot \\
         $args \\
         -t $task.cpus \\
+        -p $meta.id \\
+        --tsv_stats \\
         $input_file
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

From 79b1303e9c23554728fe99f68eb7c0e1dfd4305c Mon Sep 17 00:00:00 2001
From: Eduard Casas
 <123982193+eduard-watchmakergenomics@users.noreply.github.com>
Date: Thu, 17 Apr 2025 09:38:06 -0600
Subject: [PATCH 02/38] Added RSEQC genebodycoverage

---
 aws_batch.config                           | 36 ++++++++++++++++++++++
 conf/modules.config                        | 11 +++++++
 modules/local/rseqc_genebodycoverage.nf    | 33 ++++++++++++++++++++
 subworkflows/local/bedtools_ucsc_bigbed.nf |  1 +
 workflows/nanoseq.nf                       | 15 +++++++++
 5 files changed, 96 insertions(+)
 create mode 100644 aws_batch.config
 create mode 100644 modules/local/rseqc_genebodycoverage.nf

diff --git a/aws_batch.config b/aws_batch.config
new file mode 100644
index 00000000..e2eb8984
--- /dev/null
+++ b/aws_batch.config
@@ -0,0 +1,36 @@
+/*
+========================================================================================
+    wmg_nextflow/masterworkflow Nextflow AWS Batch config file
+========================================================================================
+    Default config options for AWS Batch
+----------------------------------------------------------------------------------------
+*/
+
+
+params {
+    awsqueue = 'nextflow-with-dockerhub-aws-batch-large'
+    awsregion = 'us-west-2'
+    run = 'default'
+    // Max resource options
+    max_memory                 = '256.GB'
+    max_cpus                   = 256
+    max_time                   = '240.h'
+    outdir = "s3://watchmaker-lts/nanoseq/${params.run}/"
+}
+
+
+
+
+process {
+    executor = 'awsbatch'
+    queue = 'nextflow-with-dockerhub-aws-batch-large'
+}
+
+aws {
+    batch {
+        cliPath = '/home/ec2-user/miniconda/bin/aws'
+    }
+    region = 'us-west-2'
+}
+
+workDir = "s3://watchmaker-lts/nanoseq/work/"
diff --git a/conf/modules.config b/conf/modules.config
index ba13b442..2616aaed 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -535,6 +535,17 @@ if (params.call_variants) {
 }
 
 if (!params.skip_quantification) {
+    process {
+            withName: RSEQC_GENEBODYCOVERAGE {
+                publishDir = [
+                    path: { "${params.outdir}/rseqc" },
+                    mode: 'copy',
+                    enabled: true,
+                    saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+                ]
+            }
+        }
+
     if (params.quantification_method == "bambu") {
         process {
             withName: BAMBU {
diff --git a/modules/local/rseqc_genebodycoverage.nf b/modules/local/rseqc_genebodycoverage.nf
new file mode 100644
index 00000000..346ff230
--- /dev/null
+++ b/modules/local/rseqc_genebodycoverage.nf
@@ -0,0 +1,33 @@
+process RSEQC_GENEBODYCOVERAGE {
+    label 'process_high'
+    container "quay.io/biocontainers/rseqc:3.0.1--py37h516909a_1"
+
+    input:
+    tuple path(bam), path(bai), path(bed12)
+
+    output:
+    path("*.pdf")                  , emit: pdf
+    path("*.geneBodyCoverage.txt") , emit: rna_txt_ch
+    path("versions.yml")           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def name = bam.getName().replaceAll(/\.bam$/, '')
+
+    """
+    geneBody_coverage.py \\
+        $args \\
+        --refgene=$bed12 \\
+        --input=$bam  \\
+        --minimum_length=100 \\
+        --out-prefix=${name}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        rseqc: \$(geneBody_coverage.py --version | sed -e "s/geneBody_coverage.py //g")
+    END_VERSIONS
+    """
+}
diff --git a/subworkflows/local/bedtools_ucsc_bigbed.nf b/subworkflows/local/bedtools_ucsc_bigbed.nf
index 0a8d94b3..33216e77 100644
--- a/subworkflows/local/bedtools_ucsc_bigbed.nf
+++ b/subworkflows/local/bedtools_ucsc_bigbed.nf
@@ -26,6 +26,7 @@ workflow BEDTOOLS_UCSC_BIGBED {
 
     emit:
     bedtools_version
+    ch_bed12
     ch_bigbed
     bed12tobigbed_version
 }
diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf
index ba1c377e..d6fc2975 100644
--- a/workflows/nanoseq.nf
+++ b/workflows/nanoseq.nf
@@ -105,6 +105,7 @@ include { GET_NANOLYSE_FASTA    } from '../modules/local/get_nanolyse_fasta'
 include { QCAT                  } from '../modules/local/qcat'
 include { BAM_RENAME            } from '../modules/local/bam_rename'
 include { BAMBU                 } from '../modules/local/bambu'
+include { RSEQC_GENEBODYCOVERAGE} from '../modules/local/rseqc_genebodycoverage'
 include { MULTIQC               } from '../modules/local/multiqc'
 
 /*
@@ -387,6 +388,20 @@ workflow NANOSEQ{
             ch_featurecounts_gene_multiqc       = QUANTIFY_STRINGTIE_FEATURECOUNTS.out.featurecounts_gene_multiqc.ifEmpty([])
             ch_featurecounts_transcript_multiqc = QUANTIFY_STRINGTIE_FEATURECOUNTS.out.featurecounts_transcript_multiqc.ifEmpty([])
         }
+
+        ch_view_sortbam.subscribe { item -> println "ch_view_sortbam: $item" }
+
+        ch_view_sortbam
+            .join( BEDTOOLS_UCSC_BIGBED.out.ch_bed12 )           
+            .map { it -> [ it[3], it[4], it[6] ] }
+            .set { ch_rseqc }
+
+        ch_rseqc.subscribe { item -> println "ch_rseqc: $item" }
+        
+        RSEQC_GENEBODYCOVERAGE (
+            ch_rseqc
+            )
+
         if (!params.skip_differential_analysis) {
 
             /*

From 9664705b3029c85f97d1ba0925f1c8297d1123bb Mon Sep 17 00:00:00 2001
From: Eduard Casas
 <123982193+eduard-watchmakergenomics@users.noreply.github.com>
Date: Thu, 17 Apr 2025 11:14:07 -0600
Subject: [PATCH 03/38] Tidying

---
 workflows/nanoseq.nf | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf
index d6fc2975..cbba02ca 100644
--- a/workflows/nanoseq.nf
+++ b/workflows/nanoseq.nf
@@ -388,18 +388,11 @@ workflow NANOSEQ{
             ch_featurecounts_gene_multiqc       = QUANTIFY_STRINGTIE_FEATURECOUNTS.out.featurecounts_gene_multiqc.ifEmpty([])
             ch_featurecounts_transcript_multiqc = QUANTIFY_STRINGTIE_FEATURECOUNTS.out.featurecounts_transcript_multiqc.ifEmpty([])
         }
-
-        ch_view_sortbam.subscribe { item -> println "ch_view_sortbam: $item" }
-
-        ch_view_sortbam
-            .join( BEDTOOLS_UCSC_BIGBED.out.ch_bed12 )           
-            .map { it -> [ it[3], it[4], it[6] ] }
-            .set { ch_rseqc }
-
-        ch_rseqc.subscribe { item -> println "ch_rseqc: $item" }
         
         RSEQC_GENEBODYCOVERAGE (
-            ch_rseqc
+            ch_view_sortbam
+                .join( BEDTOOLS_UCSC_BIGBED.out.ch_bed12 )           
+                .map { it -> [ it[3], it[4], it[6] ] }
             )
 
         if (!params.skip_differential_analysis) {

From ff89141f3103c98a5031f99414f8f051ca2f13ec Mon Sep 17 00:00:00 2001
From: Eduard Casas
 <123982193+eduard-watchmakergenomics@users.noreply.github.com>
Date: Tue, 22 Apr 2025 14:43:40 -0600
Subject: [PATCH 04/38] using our wmg ecr cache

---
 modules/local/rseqc_genebodycoverage.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/rseqc_genebodycoverage.nf b/modules/local/rseqc_genebodycoverage.nf
index 346ff230..c911a823 100644
--- a/modules/local/rseqc_genebodycoverage.nf
+++ b/modules/local/rseqc_genebodycoverage.nf
@@ -1,6 +1,6 @@
 process RSEQC_GENEBODYCOVERAGE {
     label 'process_high'
-    container "quay.io/biocontainers/rseqc:3.0.1--py37h516909a_1"
+    container "912684371407.dkr.ecr.us-west-2.amazonaws.com/quay.io/biocontainers/rseqc:3.0.1--py37h516909a_1"
 
     input:
     tuple path(bam), path(bai), path(bed12)

From 9f3cea8f9e5fd57e2c3b6d85fe22e6d5b63d0c32 Mon Sep 17 00:00:00 2001
From: Dave Matten <dave.matten@watchmakergenomics.com>
Date: Thu, 1 May 2025 17:19:39 +0200
Subject: [PATCH 05/38] updated process_medium to process_high in fastqc main

---
 modules/nf-core/fastqc/main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf
index 9ae58381..e4d47b97 100644
--- a/modules/nf-core/fastqc/main.nf
+++ b/modules/nf-core/fastqc/main.nf
@@ -1,6 +1,6 @@
 process FASTQC {
     tag "$meta.id"
-    label 'process_medium'
+    label 'process_high'
 
     conda "bioconda::fastqc=0.11.9"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?

From 1381f5e07b9f23ee280ca4f0b734f85ceba77bda Mon Sep 17 00:00:00 2001
From: Thomas Harrison <thomas.harrison@watchmakergenomics.com>
Date: Fri, 23 May 2025 14:50:45 -0600
Subject: [PATCH 06/38] first crack at modification

---
 modules/nf-core/seqtk/main.nf | 58 +++++++++++++++++++++++++++++++++++
 workflows/nanoseq.nf          | 34 ++++++++++++++++++++
 2 files changed, 92 insertions(+)
 create mode 100644 modules/nf-core/seqtk/main.nf

diff --git a/modules/nf-core/seqtk/main.nf b/modules/nf-core/seqtk/main.nf
new file mode 100644
index 00000000..83a03555
--- /dev/null
+++ b/modules/nf-core/seqtk/main.nf
@@ -0,0 +1,58 @@
+process SEQTK_SAMPLE {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/seqtk:1.4--he4a0461_1' :
+        'biocontainers/seqtk:1.4--he4a0461_1' }"
+
+    input:
+    tuple val(meta), path(reads), val(sample_size)
+
+    output:
+    tuple val(meta), path("*.fastq.gz"), emit: reads
+    path "versions.yml"                , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args   = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    if (!(args ==~ /.*\ -s\ ?[0-9]+.*/)) {
+        args += " -s100"
+    }
+    if ( !sample_size ) {
+        error "SEQTK/SAMPLE must have a sample_size value included"
+    }
+    """
+    printf "%s\\n" $reads | while read f;
+    do
+        seqtk \\
+            sample \\
+            $args \\
+            \$f \\
+            $sample_size \\
+            | gzip --no-name > ${prefix}_\$(basename \$f)
+    done
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//')
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+
+    """
+    echo "" | gzip > ${prefix}.fastq.gz
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//')
+    END_VERSIONS
+    """
+
+}
\ No newline at end of file
diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf
index cbba02ca..9f9f56c6 100644
--- a/workflows/nanoseq.nf
+++ b/workflows/nanoseq.nf
@@ -89,6 +89,12 @@ if (!params.skip_quantification) {
     }
 }
 
+if (params.downsample_depth) {
+    if (params.downsample_depth < 1 ) {
+        exit 1, "Invalid downsampling value: ${params.downsample_depth}. Must be greater than 0."
+    }
+}
+
 ////////////////////////////////////////////////////
 /* --          CONFIG FILES                    -- */
 ////////////////////////////////////////////////////
@@ -112,6 +118,7 @@ include { MULTIQC               } from '../modules/local/multiqc'
  * SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
  */
 
+
 include { INPUT_CHECK                      } from '../subworkflows/local/input_check'
 include { PREPARE_GENOME                   } from '../subworkflows/local/prepare_genome'
 include { QCFASTQ_NANOPLOT_FASTQC          } from '../subworkflows/local/qcfastq_nanoplot_fastqc'
@@ -135,6 +142,7 @@ include { RNA_FUSIONS_JAFFAL               } from '../subworkflows/local/rna_fus
  * MODULE: Installed directly from nf-core/modules
  */
 include { NANOLYSE                    } from '../modules/nf-core/nanolyse/main'
+include { SEQTK_SAMPLE                } from '../modules/nf-core/seqtk/main'
 include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
 
 /*
@@ -210,6 +218,32 @@ workflow NANOSEQ{
             ch_fastq = Channel.empty()
         }
     }
+    // check if we need to do downsampling on ch_fastq if so then do it and update
+
+    if (params.downsample_depth) {
+        /*
+         * MODULE: Downsample fastq files using seqtk
+         */
+         ch_fastq
+            .map { it -> [ it[0], it[1], params.downsample_depth ] }
+            .set { ch_for_seqtk }
+        }
+
+        SEQTK_SAMPLE( ch_for_seqtk )
+        ch_software_versions = ch_software_versions.mix(SEQTK_SAMPLE.out.versions)
+
+        SEQTK_SAMPLE.out.reads.join(ch_fastq).set{ joined_seqtk}
+
+        // meta, new_reads, barcode, fasta, gtf
+
+        joined_seqtk
+            .map { it -> [ it[0], it[1], it[3], it[4], it[5], it[6] ] }
+            .set { ch_fastq }
+
+
+    // step one use a map call to filter down the params to just meta and fastq
+    // step two Running seqtk on the the filtered channel
+    // step three Join the downsampled fastq with the old fastq channel and creat a output channel that the tools expect
 
     if (params.run_nanolyse) {
         ch_fastq

From f7dffeb5df9953b06664229b9b2ac63ee74a87f6 Mon Sep 17 00:00:00 2001
From: Thomas Harrison <thomas.harrison@watchmakergenomics.com>
Date: Fri, 23 May 2025 15:05:06 -0600
Subject: [PATCH 07/38] Have to qualify the docker path

---
 modules/nf-core/seqtk/main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/nf-core/seqtk/main.nf b/modules/nf-core/seqtk/main.nf
index 83a03555..74c1163d 100644
--- a/modules/nf-core/seqtk/main.nf
+++ b/modules/nf-core/seqtk/main.nf
@@ -5,7 +5,7 @@ process SEQTK_SAMPLE {
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/seqtk:1.4--he4a0461_1' :
-        'biocontainers/seqtk:1.4--he4a0461_1' }"
+        'quay.io/biocontainers/seqtk:1.4--he4a0461_1' }"
 
     input:
     tuple val(meta), path(reads), val(sample_size)

From 9350b887595c29cc80c5be469722826a953543e2 Mon Sep 17 00:00:00 2001
From: Thomas Harrison <thomas.harrison@watchmakergenomics.com>
Date: Fri, 23 May 2025 15:17:21 -0600
Subject: [PATCH 08/38] moving closig bracket down

---
 workflows/nanoseq.nf | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf
index 9f9f56c6..eaa6451a 100644
--- a/workflows/nanoseq.nf
+++ b/workflows/nanoseq.nf
@@ -221,29 +221,20 @@ workflow NANOSEQ{
     // check if we need to do downsampling on ch_fastq if so then do it and update
 
     if (params.downsample_depth) {
-        /*
-         * MODULE: Downsample fastq files using seqtk
-         */
+
          ch_fastq
             .map { it -> [ it[0], it[1], params.downsample_depth ] }
             .set { ch_for_seqtk }
-        }
 
         SEQTK_SAMPLE( ch_for_seqtk )
         ch_software_versions = ch_software_versions.mix(SEQTK_SAMPLE.out.versions)
 
         SEQTK_SAMPLE.out.reads.join(ch_fastq).set{ joined_seqtk}
 
-        // meta, new_reads, barcode, fasta, gtf
-
         joined_seqtk
             .map { it -> [ it[0], it[1], it[3], it[4], it[5], it[6] ] }
             .set { ch_fastq }
-
-
-    // step one use a map call to filter down the params to just meta and fastq
-    // step two Running seqtk on the the filtered channel
-    // step three Join the downsampled fastq with the old fastq channel and creat a output channel that the tools expect
+    }
 
     if (params.run_nanolyse) {
         ch_fastq
@@ -422,10 +413,10 @@ workflow NANOSEQ{
             ch_featurecounts_gene_multiqc       = QUANTIFY_STRINGTIE_FEATURECOUNTS.out.featurecounts_gene_multiqc.ifEmpty([])
             ch_featurecounts_transcript_multiqc = QUANTIFY_STRINGTIE_FEATURECOUNTS.out.featurecounts_transcript_multiqc.ifEmpty([])
         }
-        
+
         RSEQC_GENEBODYCOVERAGE (
             ch_view_sortbam
-                .join( BEDTOOLS_UCSC_BIGBED.out.ch_bed12 )           
+                .join( BEDTOOLS_UCSC_BIGBED.out.ch_bed12 )
                 .map { it -> [ it[3], it[4], it[6] ] }
             )
 

From eee1d0517d7ddbab6718999a9d7d43e692205aec Mon Sep 17 00:00:00 2001
From: Eduard Casas
 <123982193+eduard-watchmakergenomics@users.noreply.github.com>
Date: Tue, 3 Jun 2025 14:46:27 -0600
Subject: [PATCH 09/38] Added parametres to publish the files

---
 modules/nf-core/nanoplot/main.nf | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/modules/nf-core/nanoplot/main.nf b/modules/nf-core/nanoplot/main.nf
index 45f8d943..d9b1f634 100644
--- a/modules/nf-core/nanoplot/main.nf
+++ b/modules/nf-core/nanoplot/main.nf
@@ -27,6 +27,9 @@ process NANOPLOT {
     """
     NanoPlot \\
         $args \\
+        --store \\
+        --raw \\
+        --tsv_stats \\
         -t $task.cpus \\
         -p $meta.id \\
         --tsv_stats \\

From 38108f373a71995d1efd109bfd706cae193851e2 Mon Sep 17 00:00:00 2001
From: Eduard Casas
 <123982193+eduard-watchmakergenomics@users.noreply.github.com>
Date: Tue, 3 Jun 2025 14:47:28 -0600
Subject: [PATCH 10/38] Added parametres to publish the files

---
 modules/nf-core/nanoplot/main.nf | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/nf-core/nanoplot/main.nf b/modules/nf-core/nanoplot/main.nf
index d9b1f634..dbaa48bc 100644
--- a/modules/nf-core/nanoplot/main.nf
+++ b/modules/nf-core/nanoplot/main.nf
@@ -29,7 +29,6 @@ process NANOPLOT {
         $args \\
         --store \\
         --raw \\
-        --tsv_stats \\
         -t $task.cpus \\
         -p $meta.id \\
         --tsv_stats \\

From 1a49de03be1ab9d7fa9d1285bd7fc4d4a5a0c113 Mon Sep 17 00:00:00 2001
From: Thomas Harrison <thomas.harrison@watchmakergenomics.com>
Date: Tue, 8 Jul 2025 08:17:15 -0600
Subject: [PATCH 11/38] First commit of restrander

---
 conf/modules.config         | 12 ++++++++++
 modules/local/restrander.nf | 36 +++++++++++++++++++++++++++++
 workflows/nanoseq.nf        | 45 +++++++++++++++++++++++++++++++++++++
 3 files changed, 93 insertions(+)
 create mode 100644 modules/local/restrander.nf

diff --git a/conf/modules.config b/conf/modules.config
index 2616aaed..40a066c6 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -45,6 +45,16 @@ process {
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
     }
+
+    // Publish dir for RESTRANDER
+    withName: RESTRANDER {
+        publishDir = [
+            path: { "${params.outdir}/restrander" },
+            mode: 'copy',
+            enabled: true,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
 }
 
 if (!params.skip_demultiplexing) {
@@ -467,6 +477,8 @@ if (params.call_variants) {
                 ]
             }
         }
+
+
     }
     if (params.structural_variant_caller == 'sniffles') {
         process {
diff --git a/modules/local/restrander.nf b/modules/local/restrander.nf
new file mode 100644
index 00000000..70197c9d
--- /dev/null
+++ b/modules/local/restrander.nf
@@ -0,0 +1,36 @@
+process RESTRANDER {
+    tag "$meta.id"
+    label 'process_medium'
+
+
+    container "${'912684371407.dkr.ecr.us-west-2.amazonaws.com/restrander:1.0'}"
+
+    input:
+    tuple val(meta), path(reads), path(input_config)
+
+    output:
+    tuple val(meta), path("*_restrander.fq.gz"), emit: reads
+    tuple val(meta), path("*_restrander-unknowns.fq.gz"), emit: unknown_reads
+    tuple val(meta), path("*.restrander.json"), emit: metrics
+    path "versions.yml"                , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    def prefix = task.ext.prefix ?: "${meta.id}"
+
+    // _restrander-unknowns.fq.gz
+
+    script:
+    """
+    /restrander \\
+        ${reads} \\
+        ${prefix}_restrander.fq.gz \\
+        ${input_config} > ${prefix}.restrander.json
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        restrander: v1.0.1
+    END_VERSIONS
+    """
+}
diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf
index eaa6451a..20a7e4c3 100644
--- a/workflows/nanoseq.nf
+++ b/workflows/nanoseq.nf
@@ -143,6 +143,7 @@ include { RNA_FUSIONS_JAFFAL               } from '../subworkflows/local/rna_fus
  */
 include { NANOLYSE                    } from '../modules/nf-core/nanolyse/main'
 include { SEQTK_SAMPLE                } from '../modules/nf-core/seqtk/main'
+include { RESTRANDER                   } from '../modules/local/restrander'
 include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
 
 /*
@@ -256,6 +257,7 @@ workflow NANOSEQ{
          * MODULE: DNA contaminant removal using NanoLyse
          */
         NANOLYSE ( ch_fastq_nanolyse, ch_nanolyse_fasta )
+
         NANOLYSE.out.fastq
             .join( ch_sample )
             .map { it -> [ it[0], it[1], it[3], it[4], it[5], it[6] ]}
@@ -263,6 +265,46 @@ workflow NANOSEQ{
         ch_software_versions = ch_software_versions.mix(NANOLYSE.out.versions.first().ifEmpty(null))
     }
 
+    // If cDNA then we must run restrander and merge it back with samples
+        // that restrander cant run on and then bubble dowstream
+
+        /*
+            1. We know these are cDNA or direct RNA lets use a branch call to seperate into:
+
+                a. direct RNA where nothing is done
+                b. cDNA but there is no json config where nothing is done
+                c. cDNA with json config where we run restrander
+
+
+        */
+
+    if (params.protocol == 'cDNA'){
+        ch_fastq.branch{
+            config_provided: it[0].restrander_config != null && it[0].restrander_config != ''
+            no_config: it[0].restrander_config == null || it[0].restrander_config == ''
+        }.set { ch_fastq_branch }
+
+        ch_fastq_branch.config_provided.map { it -> [ it[0], it[1], it[0].restrander_config] }
+            .set { ch_fastq_restrander }
+
+        RESTRANDER ( ch_fastq_restrander )
+
+        // merge restrander fq back with the tuples before restander
+        // pluck out old fastqs
+        // merge it back with non-restrandered fastqs
+
+        RESTRANDER.out.reads
+            .join(ch_fastq_branch.config_provided)
+            .flatten()
+            .map { it -> [ it[0], it[1], it[3], it[4], it[5], it[6] ] }
+            .set { ch_fastq_restrandered }
+
+        ch_fastq_restrandered.mix(ch_fastq_branch.no_config).set { ch_fastq }
+
+        // Also mix in versions and bubble up metrics to somewhere useful
+
+    }
+
     ch_fastqc_multiqc = Channel.empty()
     if (!params.skip_qc) {
 
@@ -387,6 +429,9 @@ workflow NANOSEQ{
         //  MULTIPLE_CONDITIONS = ch_sample.map { it -> it[0].split('_')[0..-2].join('_') }.unique().count().val > 1
 
         ch_r_version = Channel.empty()
+
+
+
         if (params.quantification_method == 'bambu') {
             ch_sample
                 .map { it -> [ it[2], it[3] ]}

From eef5c6e7a104312a183ae8e24054d75e4c9f9d42 Mon Sep 17 00:00:00 2001
From: Thomas Harrison <thomas.harrison@watchmakergenomics.com>
Date: Tue, 8 Jul 2025 08:42:18 -0600
Subject: [PATCH 12/38] squash me

---
 workflows/nanoseq.nf | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf
index 20a7e4c3..3cff6508 100644
--- a/workflows/nanoseq.nf
+++ b/workflows/nanoseq.nf
@@ -278,15 +278,24 @@ workflow NANOSEQ{
 
         */
 
+
+
+    ch_fastq.view { "ch_fastq is: ${it}" }
     if (params.protocol == 'cDNA'){
+
+
         ch_fastq.branch{
             config_provided: it[0].restrander_config != null && it[0].restrander_config != ''
             no_config: it[0].restrander_config == null || it[0].restrander_config == ''
         }.set { ch_fastq_branch }
 
+        ch_fastq_branch.view{ "branch is: ${it}"}
+
         ch_fastq_branch.config_provided.map { it -> [ it[0], it[1], it[0].restrander_config] }
             .set { ch_fastq_restrander }
 
+
+
         RESTRANDER ( ch_fastq_restrander )
 
         // merge restrander fq back with the tuples before restander

From a66497f7130e2bf9affc622229e6707c5cb276af Mon Sep 17 00:00:00 2001
From: Thomas Harrison <thomas.harrison@watchmakergenomics.com>
Date: Tue, 8 Jul 2025 08:44:07 -0600
Subject: [PATCH 13/38] squash me

---
 workflows/nanoseq.nf | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf
index 3cff6508..cee7be0f 100644
--- a/workflows/nanoseq.nf
+++ b/workflows/nanoseq.nf
@@ -279,8 +279,6 @@ workflow NANOSEQ{
         */
 
 
-
-    ch_fastq.view { "ch_fastq is: ${it}" }
     if (params.protocol == 'cDNA'){
 
 
@@ -289,7 +287,7 @@ workflow NANOSEQ{
             no_config: it[0].restrander_config == null || it[0].restrander_config == ''
         }.set { ch_fastq_branch }
 
-        ch_fastq_branch.view{ "branch is: ${it}"}
+        ch_fastq_branch.view()
 
         ch_fastq_branch.config_provided.map { it -> [ it[0], it[1], it[0].restrander_config] }
             .set { ch_fastq_restrander }

From 23c3805cf0ed08ac3786902199087435b0da0719 Mon Sep 17 00:00:00 2001
From: Thomas Harrison <thomas.harrison@watchmakergenomics.com>
Date: Tue, 8 Jul 2025 08:45:36 -0600
Subject: [PATCH 14/38] squash me

---
 workflows/nanoseq.nf | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf
index cee7be0f..8d5f0c10 100644
--- a/workflows/nanoseq.nf
+++ b/workflows/nanoseq.nf
@@ -287,7 +287,9 @@ workflow NANOSEQ{
             no_config: it[0].restrander_config == null || it[0].restrander_config == ''
         }.set { ch_fastq_branch }
 
-        ch_fastq_branch.view()
+        ch_fastq_branch.config_provided.view{"Config provided: ${it}"}
+
+        ch_fastq_branch.no_config.view{"No config provided: ${it}"}
 
         ch_fastq_branch.config_provided.map { it -> [ it[0], it[1], it[0].restrander_config] }
             .set { ch_fastq_restrander }

From 837a16663752f0eaf6fa57f139ad0de485a7bdfa Mon Sep 17 00:00:00 2001
From: Thomas Harrison <thomas.harrison@watchmakergenomics.com>
Date: Tue, 8 Jul 2025 08:48:18 -0600
Subject: [PATCH 15/38] squash me

---
 workflows/nanoseq.nf | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf
index 8d5f0c10..642a0ce6 100644
--- a/workflows/nanoseq.nf
+++ b/workflows/nanoseq.nf
@@ -196,6 +196,10 @@ workflow NANOSEQ{
     INPUT_CHECK ( ch_input, ch_input_path )
         .set { ch_sample }
 
+    ch_sample.view{"Sample $it"}
+
+
+
     if (!params.skip_demultiplexing) {
 
         /*
@@ -287,9 +291,6 @@ workflow NANOSEQ{
             no_config: it[0].restrander_config == null || it[0].restrander_config == ''
         }.set { ch_fastq_branch }
 
-        ch_fastq_branch.config_provided.view{"Config provided: ${it}"}
-
-        ch_fastq_branch.no_config.view{"No config provided: ${it}"}
 
         ch_fastq_branch.config_provided.map { it -> [ it[0], it[1], it[0].restrander_config] }
             .set { ch_fastq_restrander }

From f8e714c85df2309a67843200c7b65efb842f02ec Mon Sep 17 00:00:00 2001
From: Thomas Harrison <thomas.harrison@watchmakergenomics.com>
Date: Tue, 8 Jul 2025 08:54:02 -0600
Subject: [PATCH 16/38] squash me

---
 subworkflows/local/input_check.nf | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf
index f38041df..56f35cbb 100644
--- a/subworkflows/local/input_check.nf
+++ b/subworkflows/local/input_check.nf
@@ -30,6 +30,10 @@ def get_sample_info(LinkedHashMap sample, LinkedHashMap genomeMap) {
     def meta = [:]
     meta.id  = sample.sample
 
+    if(sample.restrander_config && sample.restrander_config != '') {
+        meta.restrander_config = sample.restrander_config
+    }
+
     // Resolve fasta and gtf file if using iGenomes
     def fasta = false
     def gtf   = false

From 10a6a997ba0cd11f1e22cbeff043eeff4de0a463 Mon Sep 17 00:00:00 2001
From: Thomas Harrison <thomas.harrison@watchmakergenomics.com>
Date: Tue, 8 Jul 2025 08:56:40 -0600
Subject: [PATCH 17/38] squash me

---
 subworkflows/local/input_check.nf | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf
index 56f35cbb..e7e14c3d 100644
--- a/subworkflows/local/input_check.nf
+++ b/subworkflows/local/input_check.nf
@@ -29,10 +29,8 @@ workflow INPUT_CHECK {
 def get_sample_info(LinkedHashMap sample, LinkedHashMap genomeMap) {
     def meta = [:]
     meta.id  = sample.sample
+    meta.restrander_config = sample.restrander_config
 
-    if(sample.restrander_config && sample.restrander_config != '') {
-        meta.restrander_config = sample.restrander_config
-    }
 
     // Resolve fasta and gtf file if using iGenomes
     def fasta = false

From 8c72143b561d430ddb42597b77a77f1bd3e8887c Mon Sep 17 00:00:00 2001
From: Thomas Harrison <thomas.harrison@watchmakergenomics.com>
Date: Tue, 8 Jul 2025 08:59:37 -0600
Subject: [PATCH 18/38] squash me

---
 subworkflows/local/input_check.nf | 1 +
 1 file changed, 1 insertion(+)

diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf
index e7e14c3d..ad8f9440 100644
--- a/subworkflows/local/input_check.nf
+++ b/subworkflows/local/input_check.nf
@@ -27,6 +27,7 @@ workflow INPUT_CHECK {
 // Function to resolve fasta and gtf file if using iGenomes
 // Returns [ sample, input_file, barcode, fasta, gtf, is_transcripts, annotation_str, nanopolish_fast5 ]
 def get_sample_info(LinkedHashMap sample, LinkedHashMap genomeMap) {
+    print(sample)
     def meta = [:]
     meta.id  = sample.sample
     meta.restrander_config = sample.restrander_config

From acf6ee26fdb4754a363d531d27b8be69a2b06ee5 Mon Sep 17 00:00:00 2001
From: julietmWM <116255892+julietmWM@users.noreply.github.com>
Date: Tue, 22 Jul 2025 16:41:21 +0200
Subject: [PATCH 19/38] Edited check_samplesheet.py so that the restrander
 config file can be passed through the pipeline.

---
 bin/check_samplesheet.py    | 20 ++++++++++----------
 modules/local/restrander.nf |  9 ++++-----
 2 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py
index a10f4479..32a54035 100755
--- a/bin/check_samplesheet.py
+++ b/bin/check_samplesheet.py
@@ -49,11 +49,11 @@ def read_head(handle, num_lines=10):
 def check_samplesheet(file_in, updated_path, file_out):
     """
     This function checks that the samplesheet follows the following structure:
-    group,replicate,barcode,input_file,fasta,gtf
-    MCF7,1,,MCF7_directcDNA_replicate1.fastq.gz,genome.fa,
-    MCF7,2,,MCF7_directcDNA_replicate3.fastq.gz,genome.fa,genome.gtf
-    K562,1,,K562_directcDNA_replicate1.fastq.gz,genome.fa,
-    K562,2,,K562_directcDNA_replicate4.fastq.gz,,transcripts.fa
+    group,replicate,barcode,input_file,fasta,gtf,restrander_config
+    MCF7,1,,MCF7_directcDNA_replicate1.fastq.gz,genome.fa,,restrander_config.json
+    MCF7,2,,MCF7_directcDNA_replicate3.fastq.gz,genome.fa,genome.gtf, restrander_config.json
+    K562,1,,K562_directcDNA_replicate1.fastq.gz,genome.fa,,
+    K562,2,,K562_directcDNA_replicate4.fastq.gz,,transcripts.fa,
     """
 
     input_extensions = []
@@ -61,7 +61,7 @@ def check_samplesheet(file_in, updated_path, file_out):
     with open(file_in, "r") as fin:
         ## Check header
         MIN_COLS = 3
-        HEADER = ["group", "replicate", "barcode", "input_file", "fasta", "gtf"]
+        HEADER = ["group", "replicate", "barcode", "input_file", "fasta", "gtf", "restrander_config"]
         header = fin.readline().strip().split(",")
         if header[: len(HEADER)] != HEADER:
             print("ERROR: Please check samplesheet header -> {} != {}".format(",".join(header), ",".join(HEADER)))
@@ -80,7 +80,7 @@ def check_samplesheet(file_in, updated_path, file_out):
                 print_error("Invalid number of populated columns (minimum = {})!".format(MIN_COLS), "Line", line)
 
             ## Check group name entries
-            group, replicate, barcode, input_file, fasta, gtf = lspl[: len(HEADER)]
+            group, replicate, barcode, input_file, fasta, gtf, restrander_config = lspl[: len(HEADER)]
             if group:
                 if group.find(" ") != -1:
                     print_error("Group entry contains spaces!", "Line", line)
@@ -177,8 +177,8 @@ def check_samplesheet(file_in, updated_path, file_out):
                 #    is_transcripts = '1'
                 #    genome = transcriptome
 
-            ## Create sample mapping dictionary = {group: {replicate : [ barcode, input_file, genome, gtf, is_transcripts, nanopolish_fast5 ]}}
-            sample_info = [barcode, input_file, fasta, gtf, is_transcripts, nanopolish_fast5]
+            ## Create sample mapping dictionary = {group: {replicate : [ barcode, input_file, genome, gtf, is_transcripts, nanopolish_fast5, restrander_config ]}}
+            sample_info = [barcode, input_file, fasta, gtf, is_transcripts, nanopolish_fast5, restrander_config]
             if group not in sample_info_dict:
                 sample_info_dict[group] = {}
             if replicate not in sample_info_dict[group]:
@@ -200,7 +200,7 @@ def check_samplesheet(file_in, updated_path, file_out):
         make_dir(out_dir)
         with open(file_out, "w") as fout:
             fout.write(
-                ",".join(["sample", "barcode", "input_file", "fasta", "gtf", "is_transcripts", "nanopolish_fast5"])
+                ",".join(["sample", "barcode", "input_file", "fasta", "gtf", "is_transcripts", "nanopolish_fast5", "restrander_config"])
                 + "\n"
             )
             for sample in sorted(sample_info_dict.keys()):
diff --git a/modules/local/restrander.nf b/modules/local/restrander.nf
index 70197c9d..21ae4e1a 100644
--- a/modules/local/restrander.nf
+++ b/modules/local/restrander.nf
@@ -2,7 +2,6 @@ process RESTRANDER {
     tag "$meta.id"
     label 'process_medium'
 
-
     container "${'912684371407.dkr.ecr.us-west-2.amazonaws.com/restrander:1.0'}"
 
     input:
@@ -17,16 +16,16 @@ process RESTRANDER {
     when:
     task.ext.when == null || task.ext.when
 
-    def prefix = task.ext.prefix ?: "${meta.id}"
-
     // _restrander-unknowns.fq.gz
 
     script:
     """
+    prefix=\${task.ext.prefix:-${meta.id}}
+
     /restrander \\
         ${reads} \\
-        ${prefix}_restrander.fq.gz \\
-        ${input_config} > ${prefix}.restrander.json
+        \${prefix}_restrander.fq.gz \\
+        ${input_config} > \${prefix}.restrander.json
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

From 6cba1696271857680783730839e3b6b0fc478081 Mon Sep 17 00:00:00 2001
From: julietmWM <116255892+julietmWM@users.noreply.github.com>
Date: Tue, 22 Jul 2025 17:53:23 +0200
Subject: [PATCH 20/38] Rebuilt the restrander docker image with Multi_Arch
 support so changed which container the restrander module was pulling (from
 1.0 to 1.1).

---
 modules/local/restrander.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/restrander.nf b/modules/local/restrander.nf
index 21ae4e1a..bd03e7db 100644
--- a/modules/local/restrander.nf
+++ b/modules/local/restrander.nf
@@ -2,7 +2,7 @@ process RESTRANDER {
     tag "$meta.id"
     label 'process_medium'
 
-    container "${'912684371407.dkr.ecr.us-west-2.amazonaws.com/restrander:1.0'}"
+    container "${'912684371407.dkr.ecr.us-west-2.amazonaws.com/restrander:1.1'}"
 
     input:
     tuple val(meta), path(reads), path(input_config)

From 1850eef517c2b499417c74522ff1056820348eda Mon Sep 17 00:00:00 2001
From: julietmWM <116255892+julietmWM@users.noreply.github.com>
Date: Tue, 22 Jul 2025 18:14:17 +0200
Subject: [PATCH 21/38] Removed /bin/bash ENTRYPOINT from the dockerfile
 because of a /bin.bash: /bin/bash: cannot execute binary file error. Have
 updated the pulled image version to 1.2

---
 modules/local/restrander.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/restrander.nf b/modules/local/restrander.nf
index bd03e7db..068c5f89 100644
--- a/modules/local/restrander.nf
+++ b/modules/local/restrander.nf
@@ -2,7 +2,7 @@ process RESTRANDER {
     tag "$meta.id"
     label 'process_medium'
 
-    container "${'912684371407.dkr.ecr.us-west-2.amazonaws.com/restrander:1.1'}"
+    container "${'912684371407.dkr.ecr.us-west-2.amazonaws.com/restrander:1.2'}"
 
     input:
     tuple val(meta), path(reads), path(input_config)

From 03cd574782ef66d32afc51b9496b3c61d6bef15c Mon Sep 17 00:00:00 2001
From: julietmWM <116255892+julietmWM@users.noreply.github.com>
Date: Tue, 22 Jul 2025 18:21:15 +0200
Subject: [PATCH 22/38] file prefix issue

---
 modules/local/restrander.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/local/restrander.nf b/modules/local/restrander.nf
index 068c5f89..cf439f4c 100644
--- a/modules/local/restrander.nf
+++ b/modules/local/restrander.nf
@@ -16,12 +16,12 @@ process RESTRANDER {
     when:
     task.ext.when == null || task.ext.when
 
+
+    def prefix = task.ext.prefix ?: "${meta.id}"
     // _restrander-unknowns.fq.gz
 
     script:
     """
-    prefix=\${task.ext.prefix:-${meta.id}}
-
     /restrander \\
         ${reads} \\
         \${prefix}_restrander.fq.gz \\

From 659d2e19b4cac5bb09db20e2910c758bc2afbac3 Mon Sep 17 00:00:00 2001
From: julietmWM <116255892+julietmWM@users.noreply.github.com>
Date: Tue, 22 Jul 2025 18:24:42 +0200
Subject: [PATCH 23/38] file prefix issue

---
 modules/local/restrander.nf | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/modules/local/restrander.nf b/modules/local/restrander.nf
index cf439f4c..57d0ba00 100644
--- a/modules/local/restrander.nf
+++ b/modules/local/restrander.nf
@@ -16,16 +16,15 @@ process RESTRANDER {
     when:
     task.ext.when == null || task.ext.when
 
-
-    def prefix = task.ext.prefix ?: "${meta.id}"
     // _restrander-unknowns.fq.gz
 
     script:
+    def prefix = task.ext.prefix ?: meta.id
     """
     /restrander \\
         ${reads} \\
-        \${prefix}_restrander.fq.gz \\
-        ${input_config} > \${prefix}.restrander.json
+        ${prefix}_restrander.fq.gz \\
+        ${input_config} > ${prefix}.restrander.json
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

From 4e0b087b4a5607fa6c48cf1868bd11dc936b9986 Mon Sep 17 00:00:00 2001
From: julietmWM <116255892+julietmWM@users.noreply.github.com>
Date: Mon, 28 Jul 2025 13:20:20 +0200
Subject: [PATCH 24/38] Added line to see the sample sheet before processing.

---
 workflows/nanoseq.nf | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf
index 642a0ce6..93e91c07 100644
--- a/workflows/nanoseq.nf
+++ b/workflows/nanoseq.nf
@@ -190,6 +190,8 @@ workflow NANOSEQ{
      */
     ch_software_versions = Channel.empty()
 
+    ch_input.view{"Input file: $it"}
+
     /*
      * SUBWORKFLOW: Read in samplesheet, validate and stage input files
      */

From 5cb5df5b326257b1c945fc8cd2f7f6bc545f38af Mon Sep 17 00:00:00 2001
From: julietmWM <116255892+julietmWM@users.noreply.github.com>
Date: Mon, 28 Jul 2025 13:23:12 +0200
Subject: [PATCH 25/38] Tweeking for sample sheet errors.

---
 workflows/nanoseq.nf | 2 --
 1 file changed, 2 deletions(-)

diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf
index 93e91c07..642a0ce6 100644
--- a/workflows/nanoseq.nf
+++ b/workflows/nanoseq.nf
@@ -190,8 +190,6 @@ workflow NANOSEQ{
      */
     ch_software_versions = Channel.empty()
 
-    ch_input.view{"Input file: $it"}
-
     /*
      * SUBWORKFLOW: Read in samplesheet, validate and stage input files
      */

From 85e706fc25c53456e5bb5280887132f35e0e61b9 Mon Sep 17 00:00:00 2001
From: julietmWM <116255892+julietmWM@users.noreply.github.com>
Date: Mon, 28 Jul 2025 13:30:34 +0200
Subject: [PATCH 26/38] Tweeking for sample sheet errors.

---
 workflows/nanoseq.nf | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf
index 642a0ce6..4f0c9174 100644
--- a/workflows/nanoseq.nf
+++ b/workflows/nanoseq.nf
@@ -196,6 +196,7 @@ workflow NANOSEQ{
     INPUT_CHECK ( ch_input, ch_input_path )
         .set { ch_sample }
 
+    // TODO: must remove line below
     ch_sample.view{"Sample $it"}
 
 
@@ -318,6 +319,8 @@ workflow NANOSEQ{
     ch_fastqc_multiqc = Channel.empty()
     if (!params.skip_qc) {
 
+        ch_fastq.view{"Sample $it"}
+
         /*
          * SUBWORKFLOW: Fastq QC with Nanoplot and fastqc
          */

From fba0a089db8fa9de76828c28148a2832ae840a14 Mon Sep 17 00:00:00 2001
From: julietmWM <116255892+julietmWM@users.noreply.github.com>
Date: Mon, 28 Jul 2025 13:32:26 +0200
Subject: [PATCH 27/38] Tweeking for sample sheet errors.

---
 workflows/nanoseq.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf
index 4f0c9174..f1809be8 100644
--- a/workflows/nanoseq.nf
+++ b/workflows/nanoseq.nf
@@ -319,7 +319,7 @@ workflow NANOSEQ{
     ch_fastqc_multiqc = Channel.empty()
     if (!params.skip_qc) {
 
-        ch_fastq.view{"Sample $it"}
+        ch_fastq.view{"fastq $it"}
 
         /*
          * SUBWORKFLOW: Fastq QC with Nanoplot and fastqc

From 62c02d526837c77d49868d0970d1ddbab34e71ea Mon Sep 17 00:00:00 2001
From: julietmWM <116255892+julietmWM@users.noreply.github.com>
Date: Mon, 28 Jul 2025 13:48:34 +0200
Subject: [PATCH 28/38] Tweeking for sample sheet errors.

---
 workflows/nanoseq.nf | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf
index f1809be8..2d819f85 100644
--- a/workflows/nanoseq.nf
+++ b/workflows/nanoseq.nf
@@ -304,12 +304,17 @@ workflow NANOSEQ{
         // pluck out old fastqs
         // merge it back with non-restrandered fastqs
 
+        ch_fastq_branch.config_provided.view { "FASTQ branch: $it" }
+
         RESTRANDER.out.reads
             .join(ch_fastq_branch.config_provided)
             .flatten()
             .map { it -> [ it[0], it[1], it[3], it[4], it[5], it[6] ] }
             .set { ch_fastq_restrandered }
 
+        // TODO: remove this
+        ch_fastq_restrandered.view { "Restrandered FASTQ: $it" }
+
         ch_fastq_restrandered.mix(ch_fastq_branch.no_config).set { ch_fastq }
 
         // Also mix in versions and bubble up metrics to somewhere useful

From 43e03327b261879e7319c3bdc5cc73e1c14da207 Mon Sep 17 00:00:00 2001
From: mark-alence-watchmaker <mark.alence@watchmakergenomics.com>
Date: Tue, 29 Jul 2025 13:53:50 +0000
Subject: [PATCH 29/38] Fixing tuple structure propagation issues

---
 modules/local/restrander.nf          |  4 +-
 modules/nf-core/nanoplot/main.nf     |  2 +-
 subworkflows/local/align_minimap2.nf |  4 +-
 subworkflows/local/prepare_genome.nf | 14 +++---
 workflows/nanoseq.nf                 | 70 ++++++++++++++--------------
 5 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/modules/local/restrander.nf b/modules/local/restrander.nf
index 57d0ba00..db338dac 100644
--- a/modules/local/restrander.nf
+++ b/modules/local/restrander.nf
@@ -9,7 +9,7 @@ process RESTRANDER {
 
     output:
     tuple val(meta), path("*_restrander.fq.gz"), emit: reads
-    tuple val(meta), path("*_restrander-unknowns.fq.gz"), emit: unknown_reads
+    // tuple val(meta), path("*_restrander-unknowns.fq.gz"), emit: unknown_reads
     tuple val(meta), path("*.restrander.json"), emit: metrics
     path "versions.yml"                , emit: versions
 
@@ -19,7 +19,7 @@ process RESTRANDER {
     // _restrander-unknowns.fq.gz
 
     script:
-    def prefix = task.ext.prefix ?: meta.id
+    def prefix = task.ext.prefix ?: reads.getBaseName()
     """
     /restrander \\
         ${reads} \\
diff --git a/modules/nf-core/nanoplot/main.nf b/modules/nf-core/nanoplot/main.nf
index dbaa48bc..7ba90a8b 100644
--- a/modules/nf-core/nanoplot/main.nf
+++ b/modules/nf-core/nanoplot/main.nf
@@ -22,7 +22,7 @@ process NANOPLOT {
 
     script:
     def args = task.ext.args ?: ''
-    def input_file = ("$ontfile".endsWith(".fastq.gz")) ? "--fastq ${ontfile}" :
+    def input_file = ("$ontfile".endsWith(".fastq.gz") || "$ontfile".endsWith(".fq.gz") || "$ontfile".endsWith(".fastq") || "$ontfile".endsWith(".fq")) ? "--fastq ${ontfile}" :
         ("$ontfile".endsWith(".txt")) ? "--summary ${ontfile}" : ''
     """
     NanoPlot \\
diff --git a/subworkflows/local/align_minimap2.nf b/subworkflows/local/align_minimap2.nf
index 70693a8a..ed0a94b8 100644
--- a/subworkflows/local/align_minimap2.nf
+++ b/subworkflows/local/align_minimap2.nf
@@ -21,8 +21,8 @@ workflow ALIGN_MINIMAP2 {
     ch_index
         .cross(ch_fastq) { it -> it[-1] }
         .flatten()
-        .collate(13)
-        .map { it -> [ it[7], it[8], it[0], it[1], it[2], it[3], it[4], it[5] ] } // [ sample, fastq, fasta, sizes, gtf, bed, is_transcripts, index ]
+        .collate(14)
+        .map { it -> [ it[7], it[8], it[0], it[1], it[2], it[3], it[4], it[5] ] } // [ meta, fastq, fasta, sizes, gtf, bed, is_transcripts, index ]
         .set { ch_index }
 
     /*
diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf
index e6047cc0..189a9642 100644
--- a/subworkflows/local/prepare_genome.nf
+++ b/subworkflows/local/prepare_genome.nf
@@ -11,10 +11,10 @@ workflow PREPARE_GENOME {
     ch_fastq
 
     main:
-    // Get unique list of all fasta files
+    // Get unique list of all fasta files - reference FASTA is at position 2
     ch_fastq
         .filter { it[2] }
-        .map { it -> [ it[2], it[5].toString() ] }  // [ fasta, annotation_str ]
+        .map { it -> [ it[2], it[6].toString() ] }  // [ fasta, annotation_str ]
         .unique()
         .set { ch_fastq_sizes }
 
@@ -25,10 +25,10 @@ workflow PREPARE_GENOME {
     ch_chrom_sizes = GET_CHROM_SIZES.out.sizes
     samtools_version = GET_CHROM_SIZES.out.versions
 
-    // Get unique list of all gtf files
+    // Get unique list of all gtf files - GTF is at position 3 in the tuple
     ch_fastq
         .filter { it[3] }
-        .map { it -> [ it[3], it[5] ] }  // [ gtf, annotation_str ]
+        .map { it -> [ it[3], it[6] ] }  // [ gtf, annotation_str ]
         .unique()
         .set { ch_fastq_gtf }
 
@@ -44,8 +44,8 @@ workflow PREPARE_GENOME {
         .map { it -> [ it[1], it[2], it[0] ] }
         .cross(ch_fastq) { it -> it[-1] }
         .flatten()
-        .collate(9)
-        .map { it -> [ it[5], it[0], it[6], it[1], it[7], it[8] ]} // [ fasta, sizes, gtf, bed, is_transcripts, annotation_str ]
+        .collate(10)
+        .map { it -> [ it[5], it[0], it[6], it[1], it[8], it[9] ]} // [ fasta, sizes, gtf, bed, is_transcripts, annotation_str ]
         .unique()
         .set { ch_fasta_index }
 
@@ -54,7 +54,7 @@ workflow PREPARE_GENOME {
      */
     ch_fastq
         .filter { it[2] }
-        .map { it -> [ it[0], it[2] ] }  // [ gtf, annotation_str ]
+        .map { it -> [ it[0], it[2] ] }  // [ meta, fasta ]
         .unique()
         .set { ch_fasta }
 
diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf
index 2d819f85..9317e34f 100644
--- a/workflows/nanoseq.nf
+++ b/workflows/nanoseq.nf
@@ -284,42 +284,42 @@ workflow NANOSEQ{
         */
 
 
-    if (params.protocol == 'cDNA'){
-
-
-        ch_fastq.branch{
-            config_provided: it[0].restrander_config != null && it[0].restrander_config != ''
-            no_config: it[0].restrander_config == null || it[0].restrander_config == ''
-        }.set { ch_fastq_branch }
-
-
-        ch_fastq_branch.config_provided.map { it -> [ it[0], it[1], it[0].restrander_config] }
-            .set { ch_fastq_restrander }
-
-
-
-        RESTRANDER ( ch_fastq_restrander )
-
-        // merge restrander fq back with the tuples before restander
-        // pluck out old fastqs
-        // merge it back with non-restrandered fastqs
-
-        ch_fastq_branch.config_provided.view { "FASTQ branch: $it" }
-
-        RESTRANDER.out.reads
-            .join(ch_fastq_branch.config_provided)
-            .flatten()
-            .map { it -> [ it[0], it[1], it[3], it[4], it[5], it[6] ] }
-            .set { ch_fastq_restrandered }
-
-        // TODO: remove this
-        ch_fastq_restrandered.view { "Restrandered FASTQ: $it" }
-
-        ch_fastq_restrandered.mix(ch_fastq_branch.no_config).set { ch_fastq }
-
-        // Also mix in versions and bubble up metrics to somewhere useful
-
+if (params.protocol == 'cDNA'){
+
+    ch_fastq.branch{
+        config_provided: it[0].restrander_config != null && it[0].restrander_config != ''
+        no_config: it[0].restrander_config == null || it[0].restrander_config == ''
+    }.set { ch_fastq_branch }
+
+    ch_fastq_branch.config_provided
+        .map { it -> [ it[0], it[1], it[0].restrander_config] }
+        .set { ch_fastq_restrander }
+
+    RESTRANDER ( ch_fastq_restrander )
+
+    RESTRANDER.out.reads
+        .join(ch_fastq_branch.config_provided, by: 0)
+        .map { tuple -> 
+        println "=== DEBUGGING TUPLE STRUCTURE ==="
+        println "Tuple size: ${tuple.size()}"
+        tuple.eachWithIndex { item, index ->
+            println "  tuple[$index] = $item (${item.getClass().getSimpleName()})"
+        }
+        println "================================="
+        
+        def meta = tuple[0]
+        def restranded_files = tuple[1]
+        def main_restranded_file = restranded_files[1]
+        
+        def gtf_file = tuple[6].toString().split(';')[1]  // Extract GTF from combined string
+        [ meta, main_restranded_file, tuple[3], gtf_file, tuple[4], tuple[5], tuple[6] ]
     }
+    .view { "After RESTRANDER processing: $it" }
+    .mix(ch_fastq_branch.no_config)
+    .set { ch_fastq }
+
+    ch_software_versions = ch_software_versions.mix(RESTRANDER.out.versions.first().ifEmpty(null))
+}
 
     ch_fastqc_multiqc = Channel.empty()
     if (!params.skip_qc) {

From 46055772fc4838b5b54d6baa7aedb9eae5cc6d83 Mon Sep 17 00:00:00 2001
From: julietmWM <116255892+julietmWM@users.noreply.github.com>
Date: Tue, 5 Aug 2025 12:03:43 +0200
Subject: [PATCH 30/38] Cleaning up the Restrander-related code and adding some
 comments.

---
 modules/local/restrander.nf |  9 +++------
 workflows/nanoseq.nf        | 32 ++++++++------------------------
 2 files changed, 11 insertions(+), 30 deletions(-)

diff --git a/modules/local/restrander.nf b/modules/local/restrander.nf
index db338dac..0ea848d3 100644
--- a/modules/local/restrander.nf
+++ b/modules/local/restrander.nf
@@ -8,16 +8,13 @@ process RESTRANDER {
     tuple val(meta), path(reads), path(input_config)
 
     output:
-    tuple val(meta), path("*_restrander.fq.gz"), emit: reads
-    // tuple val(meta), path("*_restrander-unknowns.fq.gz"), emit: unknown_reads
-    tuple val(meta), path("*.restrander.json"), emit: metrics
-    path "versions.yml"                , emit: versions
+    tuple val(meta), path("*_restrander.fq.gz") , emit: reads
+    tuple val(meta), path("*.restrander.json")  , emit: metrics
+    path "versions.yml"                         , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
 
-    // _restrander-unknowns.fq.gz
-
     script:
     def prefix = task.ext.prefix ?: reads.getBaseName()
     """
diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf
index 9317e34f..5d08b205 100644
--- a/workflows/nanoseq.nf
+++ b/workflows/nanoseq.nf
@@ -270,51 +270,35 @@ workflow NANOSEQ{
         ch_software_versions = ch_software_versions.mix(NANOLYSE.out.versions.first().ifEmpty(null))
     }
 
-    // If cDNA then we must run restrander and merge it back with samples
-        // that restrander cant run on and then bubble dowstream
-
-        /*
-            1. We know these are cDNA or direct RNA lets use a branch call to seperate into:
-
-                a. direct RNA where nothing is done
-                b. cDNA but there is no json config where nothing is done
-                c. cDNA with json config where we run restrander
-
-
-        */
-
-
 if (params.protocol == 'cDNA'){
 
+    // split the fastq channel into two branches - samples with and without restrander_config
     ch_fastq.branch{
         config_provided: it[0].restrander_config != null && it[0].restrander_config != ''
         no_config: it[0].restrander_config == null || it[0].restrander_config == ''
     }.set { ch_fastq_branch }
 
+    // only run Restrander on the branch with config provided
     ch_fastq_branch.config_provided
         .map { it -> [ it[0], it[1], it[0].restrander_config] }
         .set { ch_fastq_restrander }
 
+    /*
+     * MODULE: Orientate and quality check cDNA reads with Restrander
+     */
     RESTRANDER ( ch_fastq_restrander )
 
     RESTRANDER.out.reads
         .join(ch_fastq_branch.config_provided, by: 0)
-        .map { tuple -> 
-        println "=== DEBUGGING TUPLE STRUCTURE ==="
-        println "Tuple size: ${tuple.size()}"
-        tuple.eachWithIndex { item, index ->
-            println "  tuple[$index] = $item (${item.getClass().getSimpleName()})"
-        }
-        println "================================="
-        
+        .map { tuple ->
         def meta = tuple[0]
         def restranded_files = tuple[1]
         def main_restranded_file = restranded_files[1]
-        
+
         def gtf_file = tuple[6].toString().split(';')[1]  // Extract GTF from combined string
         [ meta, main_restranded_file, tuple[3], gtf_file, tuple[4], tuple[5], tuple[6] ]
     }
-    .view { "After RESTRANDER processing: $it" }
+    // merge the restranded files with the rest of the fastq files
     .mix(ch_fastq_branch.no_config)
     .set { ch_fastq }
 

From d853ab61fe115275318f73e15e3906aeeb771717 Mon Sep 17 00:00:00 2001
From: julietmWM <116255892+julietmWM@users.noreply.github.com>
Date: Tue, 5 Aug 2025 12:10:50 +0200
Subject: [PATCH 31/38] Removing unnecessary .view statements.

---
 workflows/nanoseq.nf | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf
index 5d08b205..1dc15f3d 100644
--- a/workflows/nanoseq.nf
+++ b/workflows/nanoseq.nf
@@ -196,11 +196,6 @@ workflow NANOSEQ{
     INPUT_CHECK ( ch_input, ch_input_path )
         .set { ch_sample }
 
-    // TODO: must remove line below
-    ch_sample.view{"Sample $it"}
-
-
-
     if (!params.skip_demultiplexing) {
 
         /*
@@ -308,8 +303,6 @@ if (params.protocol == 'cDNA'){
     ch_fastqc_multiqc = Channel.empty()
     if (!params.skip_qc) {
 
-        ch_fastq.view{"fastq $it"}
-
         /*
          * SUBWORKFLOW: Fastq QC with Nanoplot and fastqc
          */

From 63bcfd495a975b4c45e57df0c45fe18d48c0b82b Mon Sep 17 00:00:00 2001
From: julietmWM <116255892+julietmWM@users.noreply.github.com>
Date: Tue, 5 Aug 2025 12:22:48 +0200
Subject: [PATCH 32/38] More cleaning.

---
 workflows/nanoseq.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf
index 1dc15f3d..8ecea91d 100644
--- a/workflows/nanoseq.nf
+++ b/workflows/nanoseq.nf
@@ -113,6 +113,7 @@ include { BAM_RENAME            } from '../modules/local/bam_rename'
 include { BAMBU                 } from '../modules/local/bambu'
 include { RSEQC_GENEBODYCOVERAGE} from '../modules/local/rseqc_genebodycoverage'
 include { MULTIQC               } from '../modules/local/multiqc'
+include { RESTRANDER            } from '../modules/local/restrander'
 
 /*
  * SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
@@ -143,7 +144,6 @@ include { RNA_FUSIONS_JAFFAL               } from '../subworkflows/local/rna_fus
  */
 include { NANOLYSE                    } from '../modules/nf-core/nanolyse/main'
 include { SEQTK_SAMPLE                } from '../modules/nf-core/seqtk/main'
-include { RESTRANDER                   } from '../modules/local/restrander'
 include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
 
 /*

From bbe6ea636ac0ffa6950f5b34ea5fd3132d215fa2 Mon Sep 17 00:00:00 2001
From: julietmWM <116255892+julietmWM@users.noreply.github.com>
Date: Mon, 11 Aug 2025 12:30:28 +0200
Subject: [PATCH 33/38] Debugging non-restrander run errors.

---
 workflows/nanoseq.nf | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf
index 8ecea91d..cb86f2e4 100644
--- a/workflows/nanoseq.nf
+++ b/workflows/nanoseq.nf
@@ -314,6 +314,8 @@ if (params.protocol == 'cDNA'){
     ch_samtools_multiqc = Channel.empty()
     if (!params.skip_alignment) {
 
+        ch_fastq.view()
+
         /*
          * SUBWORKFLOW: Make chromosome size file and covert GTF to BED12
          */

From 5c0e5cce9244c7f6723797039051842ee5b59390 Mon Sep 17 00:00:00 2001
From: julietmWM <116255892+julietmWM@users.noreply.github.com>
Date: Mon, 11 Aug 2025 13:51:36 +0200
Subject: [PATCH 34/38] Debugging non-restrander run errors.

---
 workflows/nanoseq.nf | 65 +++++++++++++++++++++++++-------------------
 1 file changed, 37 insertions(+), 28 deletions(-)

diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf
index cb86f2e4..2738eda2 100644
--- a/workflows/nanoseq.nf
+++ b/workflows/nanoseq.nf
@@ -265,40 +265,49 @@ workflow NANOSEQ{
         ch_software_versions = ch_software_versions.mix(NANOLYSE.out.versions.first().ifEmpty(null))
     }
 
-if (params.protocol == 'cDNA'){
+    if (params.protocol == 'cDNA'){
 
-    // split the fastq channel into two branches - samples with and without restrander_config
-    ch_fastq.branch{
-        config_provided: it[0].restrander_config != null && it[0].restrander_config != ''
-        no_config: it[0].restrander_config == null || it[0].restrander_config == ''
-    }.set { ch_fastq_branch }
+        // split the fastq channel into two branches - samples with and without restrander_config
+        ch_fastq.branch{
+            config_provided: it[0].restrander_config != null && it[0].restrander_config != ''
+            no_config: it[0].restrander_config == null || it[0].restrander_config == ''
+        }.set { ch_fastq_branch }
 
-    // only run Restrander on the branch with config provided
-    ch_fastq_branch.config_provided
-        .map { it -> [ it[0], it[1], it[0].restrander_config] }
-        .set { ch_fastq_restrander }
+        // only run Restrander on the branch with config provided
+        ch_fastq_branch.config_provided
+            .map { it -> [ it[0], it[1], it[0].restrander_config] }
+            .set { ch_fastq_restrander }
 
-    /*
-     * MODULE: Orientate and quality check cDNA reads with Restrander
-     */
-    RESTRANDER ( ch_fastq_restrander )
+        /*
+        * MODULE: Orientate and quality check cDNA reads with Restrander
+        */
+        RESTRANDER ( ch_fastq_restrander )
+
+        RESTRANDER.out.reads
+            .join(ch_fastq_branch.config_provided, by: 0)
+            .map { tuple ->
+            def meta = tuple[0]
+            def restranded_files = tuple[1]
+            def main_restranded_file = restranded_files[1]
 
-    RESTRANDER.out.reads
-        .join(ch_fastq_branch.config_provided, by: 0)
-        .map { tuple ->
-        def meta = tuple[0]
-        def restranded_files = tuple[1]
-        def main_restranded_file = restranded_files[1]
 
-        def gtf_file = tuple[6].toString().split(';')[1]  // Extract GTF from combined string
-        [ meta, main_restranded_file, tuple[3], gtf_file, tuple[4], tuple[5], tuple[6] ]
+            [ meta, main_restranded_file, tuple[3], tuple[4], tuple[5], tuple[6] ]
+        }
+        // merge the restranded files with the rest of the fastq files
+        .mix(ch_fastq_branch.no_config)
+        .set { ch_fastq }
+
+        ch_software_versions = ch_software_versions.mix(RESTRANDER.out.versions.first().ifEmpty(null))
     }
-    // merge the restranded files with the rest of the fastq files
-    .mix(ch_fastq_branch.no_config)
-    .set { ch_fastq }
+    ch_fastq.view()
 
-    ch_software_versions = ch_software_versions.mix(RESTRANDER.out.versions.first().ifEmpty(null))
-}
+
+    def gtf_file = ch_fastq[6].toString().split(';')[1]  // Extract GTF from combined string
+    ch_fastq
+        .map { it -> [ it[0], it[1], it[2], gtf_file, it[3], it[4], it[5], it[6] ] }
+        .set { ch_fastq }
+
+    ch_fastq.view()
 
     ch_fastqc_multiqc = Channel.empty()
     if (!params.skip_qc) {
@@ -314,7 +323,7 @@ if (params.protocol == 'cDNA'){
     ch_samtools_multiqc = Channel.empty()
     if (!params.skip_alignment) {
 
-        ch_fastq.view()
+        //ch_fastq.view()
 
         /*
          * SUBWORKFLOW: Make chromosome size file and covert GTF to BED12

From fc70be5ba0158ff586e2c6a732954c088c41b345 Mon Sep 17 00:00:00 2001
From: julietmWM <116255892+julietmWM@users.noreply.github.com>
Date: Mon, 11 Aug 2025 13:53:33 +0200
Subject: [PATCH 35/38] Debugging non-restrander run errors.

---
 workflows/nanoseq.nf | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf
index 2738eda2..a23d9978 100644
--- a/workflows/nanoseq.nf
+++ b/workflows/nanoseq.nf
@@ -301,10 +301,9 @@ workflow NANOSEQ{
     }
     ch_fastq.view()
 
-
-    def gtf_file = ch_fastq[6].toString().split(';')[1]  // Extract GTF from combined string
     ch_fastq
-        .map { it -> [ it[0], it[1], it[2], gtf_file, it[3], it[4], it[5], it[6] ] }
+        .map { it -> [ it[0], it[1], it[2], it[6].toString().split(';')[1], // Extract GTF from combined string
+         it[3], it[4], it[5], it[6] ] }
         .set { ch_fastq }
 
     ch_fastq.view()

From d5b1ac4aa0b4eceb643cc80f74a774fddbd0f426 Mon Sep 17 00:00:00 2001
From: julietmWM <116255892+julietmWM@users.noreply.github.com>
Date: Tue, 12 Aug 2025 11:26:50 +0200
Subject: [PATCH 36/38] Debugging non-restrander run errors.

---
 workflows/nanoseq.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf
index a23d9978..1796752d 100644
--- a/workflows/nanoseq.nf
+++ b/workflows/nanoseq.nf
@@ -302,8 +302,8 @@ workflow NANOSEQ{
     ch_fastq.view()
 
     ch_fastq
-        .map { it -> [ it[0], it[1], it[2], it[6].toString().split(';')[1], // Extract GTF from combined string
-         it[3], it[4], it[5], it[6] ] }
+        .map { it -> [ it[0], it[1], it[2], it[5].toString().split(';')[1], // Extract GTF from combined string
+         it[3], it[4], it[5] ] }
         .set { ch_fastq }
 
     ch_fastq.view()

From 68dcbc3e7cba04f6f9cf2233ec41c92d9d5903e3 Mon Sep 17 00:00:00 2001
From: julietmWM <116255892+julietmWM@users.noreply.github.com>
Date: Tue, 12 Aug 2025 15:03:45 +0200
Subject: [PATCH 37/38] The bug was fixed with the last commit. Now just
 cleaning up the code.

---
 workflows/nanoseq.nf | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/workflows/nanoseq.nf b/workflows/nanoseq.nf
index 1796752d..e20583e1 100644
--- a/workflows/nanoseq.nf
+++ b/workflows/nanoseq.nf
@@ -299,15 +299,13 @@ workflow NANOSEQ{
 
         ch_software_versions = ch_software_versions.mix(RESTRANDER.out.versions.first().ifEmpty(null))
     }
-    ch_fastq.view()
 
+    // Extract the GTF file from combined string, add it as own element in the channel
     ch_fastq
-        .map { it -> [ it[0], it[1], it[2], it[5].toString().split(';')[1], // Extract GTF from combined string
+        .map { it -> [ it[0], it[1], it[2], it[5].toString().split(';')[1],
          it[3], it[4], it[5] ] }
         .set { ch_fastq }
 
-    ch_fastq.view()
-
     ch_fastqc_multiqc = Channel.empty()
     if (!params.skip_qc) {
 
@@ -322,8 +320,6 @@ workflow NANOSEQ{
     ch_samtools_multiqc = Channel.empty()
     if (!params.skip_alignment) {
 
-        //ch_fastq.view()
-
         /*
          * SUBWORKFLOW: Make chromosome size file and covert GTF to BED12
          */

From cc65ad186ab1aecda9981e044459087c7654c990 Mon Sep 17 00:00:00 2001
From: julietmWM <116255892+julietmWM@users.noreply.github.com>
Date: Tue, 12 Aug 2025 16:01:40 +0200
Subject: [PATCH 38/38] Added Restrander information to the usage and output
 docs.

---
 docs/output.md | 17 ++++++++++++
 docs/usage.md  | 71 ++++++++++++++++++++++++++++++--------------------
 2 files changed, 60 insertions(+), 28 deletions(-)

diff --git a/docs/output.md b/docs/output.md
index 42a1a00c..5cb7af6b 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -46,6 +46,23 @@ _Documentation_:
 _Description_:
 If you would like to run NanoLyse on the raw FASTQ files you can provide `--run_nanolyse` when running the pipeline. By default, the pipeline will filter lambda phage reads. However, you can provide your own FASTA file of "contaminants" with `--nanolyse_fasta`. The filtered FASTQ files will contain raw reads without the specified reference sequences (default: lambda phage sequences).
 
+## cDNA Read Orientation
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `restrander/<SAMPLE>_restrander.fq.gz`: FASTQ file of the stranded reads. The reverse strand reads are replaced with their reverse-complements, ensuring that all reads in the output have the same orientation as the original transcripts.
+- `restrander/<SAMPLE>-unknowns.*_restrander.fq.gz`: FASTQ file of the reads whose strand could not be inferred.
+- `restrander/<SAMPLE>.restrander.json`: Restrander output statistics - includes artefact and strand statistics.
+
+</details>
+
+_Documentation_:
+[Restrander](https://github.com/mritchielab/restrander)
+
+_Description_:
+Restrander is a program designed for orienting and quality-checking cDNA sequencing reads. Restrander will run automatically if the protocol is cDNA and a Restrander config file is present in the sample sheet.
+
 ## Read QC
 
 <details markdown="1">
diff --git a/docs/usage.md b/docs/usage.md
index 97dd0a5a..2de4586e 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -10,12 +10,13 @@ You will need to create a file with information about the samples in your experi
 
 | Column       | Description                                                                                                                                                                                                                                                                               |
 | ------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `group`      | Group identifier for sample. This will be identical for replicate samples from the same experimental group.                                                                                                                                                                               |
-| `replicate`  | Integer representing replicate number. Must start from `1..<number of replicates>`.                                                                                                                                                                                                       |
-| `barcode`    | Barcode identifier attributed to that sample during multiplexing. Must be an integer.                                                                                                                                                                                                     |
-| `input_file` | Full path to FastQ file if previously demultiplexed, BAM file if previously aligned, or a path to a directory with subdirectories containing fastq or fast5 files. FastQ file has to be zipped and have the extension ".fastq.gz" or ".fq.gz". BAM file has to have the extension ".bam". |
-| `fasta`      | Genome fasta file or transcriptome fasta file for alignment. This can either be a local path, or the appropriate key for a genome available in [iGenomes config file](../conf/igenomes.config). Must have the extension ".fasta", ".fasta.gz", ".fa" or ".fa.gz".                         |
-| `gtf`        | Annotation gtf file for transcript discovery and quantification and RNA modification detection. This can either be blank or a local path. Must have the extension ".gtf".                                                                                                                 |
+| `group`             | Group identifier for sample. This will be identical for replicate samples from the same experimental group.                                                                                                                                                                               |
+| `replicate`         | Integer representing replicate number. Must start from `1..<number of replicates>`.                                                                                                                                                                                                       |
+| `barcode`           | Barcode identifier attributed to that sample during multiplexing. Must be an integer.                                                                                                                                                                                                     |
+| `input_file`        | Full path to FastQ file if previously demultiplexed, BAM file if previously aligned, or a path to a directory with subdirectories containing fastq or fast5 files. FastQ file has to be zipped and have the extension ".fastq.gz" or ".fq.gz". BAM file has to have the extension ".bam". |
+| `fasta`             | Genome fasta file or transcriptome fasta file for alignment. This can either be a local path, or the appropriate key for a genome available in [iGenomes config file](../conf/igenomes.config). Must have the extension ".fasta", ".fasta.gz", ".fa" or ".fa.gz".                         |
+| `gtf`               | Annotation gtf file for transcript discovery and quantification and RNA modification detection. This can either be blank or a local path. Must have the extension ".gtf".                                                                                                                 |
+| `restrander_config` | Restrander .json config file that provides the template-switching oligo (TSO) and reverse transcription primer (RTP) sequences. Different configurations are used for different library preparation protocols. This can either be blank or a file path. If blank, Restrander will not run for the sample.                               |
 
 ### Skip demultiplexing
 
@@ -26,13 +27,13 @@ As shown in the examples below, the accepted samplesheet format is different dep
 ##### Example `samplesheet.csv` for non-demultiplexed fastq inputs
 
 ```bash
-group,replicate,barcode,input_file,fasta,gtf
-WT_MOUSE,1,1,,mm10,
-WT_HUMAN,1,2,,hg19,
-WT_POMBE,1,3,,/path/to/local/genome.fa,
-WT_DENOVO,1,4,,,/path/to/local/transcriptome.fa
-WT_LOCAL,2,5,,/path/to/local/genome.fa,/path/to/local/transcriptome.gtf
-WT_UNKNOWN,3,6,,,
+group,replicate,barcode,input_file,fasta,gtf,restrander_config
+WT_MOUSE,1,1,,mm10,,
+WT_HUMAN,1,2,,hg19,,
+WT_POMBE,1,3,,/path/to/local/genome.fa,,
+WT_DENOVO,1,4,,,/path/to/local/transcriptome.fa,
+WT_LOCAL,2,5,,/path/to/local/genome.fa,/path/to/local/transcriptome.gtf,
+WT_UNKNOWN,3,6,,,,
 ```
 
 ##### Example command for non-demultiplexed fastq inputs
@@ -52,11 +53,11 @@ nextflow run nf-core/nanoseq \
 ##### Example `samplesheet.csv` for demultiplexed fastq inputs
 
 ```bash
-group,replicate,barcode,input_file,fasta,gtf
-WT,1,,SAM101A1.fastq.gz,hg19,
-WT,2,,SAM101A2.fastq.gz,hg19,
-KO,1,,SAM101A3.fastq.gz,hg19,
-KO,2,,SAM101A4.fastq.gz,hg19,
+group,replicate,barcode,input_file,fasta,gtf,restrander_config
+WT,1,,SAM101A1.fastq.gz,hg19,,
+WT,2,,SAM101A2.fastq.gz,hg19,,
+KO,1,,SAM101A3.fastq.gz,hg19,,
+KO,2,,SAM101A4.fastq.gz,hg19,,
 ```
 
 ##### Example command for demultiplexed fastq inputs
@@ -74,11 +75,11 @@ nextflow run nf-core/nanoseq \
 ##### Example `samplesheet.csv` for BAM inputs
 
 ```bash
-group,replicate,barcode,input_file,fasta,gtf
-WT,1,,SAM101A1.bam,hg19,
-WT,2,,SAM101A2.bam,hg19,
-KO,1,,SAM101A3.bam,hg19,
-KO,2,,SAM101A4.bam,hg19,
+group,replicate,barcode,input_file,fasta,gtf,restrander_config
+WT,1,,SAM101A1.bam,hg19,,
+WT,2,,SAM101A2.bam,hg19,,
+KO,1,,SAM101A3.bam,hg19,,
+KO,2,,SAM101A4.bam,hg19,,
 ```
 
 ##### Example command for BAM inputs
@@ -97,11 +98,11 @@ nextflow run nf-core/nanoseq \
 ##### Example `samplesheet.csv` for FAST5 and FASTQ input directories
 
 ```bash
-group,replicate,barcode,input_file,fasta,gtf
-WT,1,,/full/path/to/SAM101A1/,hg19.fasta,hg19.gtf
-WT,2,,/full/path/to/SAM101A2/,hg19.fasta,hg19.gtf
-KO,1,,/full/path/to/SAM101A3/,hg19.fasta,hg19.gtf
-KO,2,,/full/path/to/SAM101A4/,hg19.fasta,hg19.gtf
+group,replicate,barcode,input_file,fasta,gtf,restrander_config
+WT,1,,/full/path/to/SAM101A1/,hg19.fasta,hg19.gtf,
+WT,2,,/full/path/to/SAM101A2/,hg19.fasta,hg19.gtf,
+KO,1,,/full/path/to/SAM101A3/,hg19.fasta,hg19.gtf,
+KO,2,,/full/path/to/SAM101A4/,hg19.fasta,hg19.gtf,
 ```
 
 ##### Each of the FAST5 and FASTQ input directory should have the following structure:
@@ -128,6 +129,20 @@ nextflow run nf-core/nanoseq \
     -profile <docker/singularity/institute>
 ```
 
+### Using Restrander
+
+Restrander is a program used for orienting and quality-checking cDNA sequencing reads. Restrander will automatically run if the protocol is cDNA and a Restrander config file is present in the sample sheet. Examples of Restrander configuration files for several protocols can be found in the [README](https://github.com/jakob-schuster/restrander-vignette?tab=readme-ov-file#configuration-files) for the Restrander vignette. The sample sheet can have a mix of samples with and without Restrander config files.
+
+##### Example `samplesheet.csv` for using Restrander
+
+```bash
+group,replicate,barcode,input_file,fasta,gtf,restrander_config
+WT,1,1,/full/path/to/SAM101A1/,hg19,hg19.gtf,
+WT,2,2,/full/path/to/SAM101A2/,hg19,hg19.gtf,
+KO,1,3,/full/path/to/SAM101A3/,hg19,hg19.gtf,PCB109.json
+KO,2,4,/full/path/to/SAM101A4/,hg19,hg19.gtf,PCB109.json
+```
+
 ## Running the pipeline
 
 The typical command for running the pipeline is as follows: