diff --git a/modules/local/cramino/main.nf b/modules/local/cramino/main.nf index b6ab067..eff34a0 100644 --- a/modules/local/cramino/main.nf +++ b/modules/local/cramino/main.nf @@ -1,6 +1,6 @@ process CRAMINO { tag "$meta.id" - label 'process_single' + label 'process_low' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/nf-core/ascat/main.nf b/modules/nf-core/ascat/main.nf index fa82efe..fb34f18 100644 --- a/modules/nf-core/ascat/main.nf +++ b/modules/nf-core/ascat/main.nf @@ -42,7 +42,7 @@ process ASCAT { def penalty = args.penalty ? "$args.penalty" : "NULL" def gc_input = gc_file ? "$gc_file" : "NULL" def rt_input = rt_file ? "$rt_file" : "NULL" - def pdf_plots = args.pdf_plots ? "$args.pdf_plots" : "NULL" + def pdf_plots = args.pdf_plots ? ("$args.pdf_plots" == true ? "TRUE": "FALSE") : "NULL" def minCounts_arg = args.minCounts ? ",minCounts = $args.minCounts" : "" def bed_file_arg = bed_file ? ",BED_file = '$bed_file'": "" diff --git a/nextflow.config b/nextflow.config index 5e93950..bdeccd8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -55,7 +55,7 @@ params { ascat_penalty = 150 ascat_purity = null ascat_longread_bins = 2000 - ascat_pdf_plots = "FALSE" + ascat_pdf_plots = false ascat_allelecounter_flags = "-f 0" ascat_chroms = null // Only use if running on a subset of chromosomes (c(1:22, 'X', 'Y')) diff --git a/nextflow_schema.json b/nextflow_schema.json index 56fefa9..23fa710 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -166,6 +166,10 @@ "ascat_rt_files": { "type": "string", "description": "path to (zip) of RT files" + }, + "ascat_pdf_plots": { + "type": "boolean", + "description": "Boolean for ASCAT production of pdf plots (entered as string)" } } }, diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index d084d29..35fb56b 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -23,7 +23,7 @@ "@type": "Dataset", "creativeWorkStatus": "InProgress", "datePublished": "2025-12-23T12:58:53+00:00", - "description": "# IntGenomicsLab/lrsomatic\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/IntGenomicsLab/lrsomatic)\n[![GitHub Actions CI Status](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/nf-test.yml/badge.svg)](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/linting.yml/badge.svg)](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.17751829-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.17751829)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/IntGenomicsLab/lrsomatic)\n\n## Introduction\n\n**IntGenomicsLab/lrsomatic** is a robust bioinformatics pipeline designed for processing and analyzing **somatic DNA sequencing** data for long-read sequencing technologies from **Oxford Nanopore** and **PacBio**. It supports both canonical base DNA and modified base calling, including specialized applications such as **Fiber-seq**.\n\nThis **end-to-end pipeline** handles the entire workflow \u2014 **from raw read processing and alignment, to comprehensive somatic variant calling**, including single nucleotide variants, indels, structural variants, copy number alterations, and modified bases.\n\nIt can be run in both **matched tumour-normal** and **tumour-only mode**, offering flexibility depending on the users study design.\n\nDeveloped using **Nextflow DSL2**, it offers high portability and scalability across diverse computing environments. By leveraging Docker or Singularity containers, installation is streamlined and results are highly reproducible. Each process runs in an isolated container, simplifying dependency management and updates. Where applicable, pipeline components are sourced from **nf-core/modules**, promoting reuse, interoperability, and consistency within the broader Nextflow and nf-core ecosystems.\n\n## Pipeline summary\n\n**1) Pre-processing:**\n\na. Raw read QC ([`cramino`](https://github.com/wdecoster/cramino))\n\nb. Alignment to the reference genome ([`minimap2`](https://github.com/lh3/minimap2))\n\nc. Post alignment QC ([`cramino`](https://github.com/wdecoster/cramino), [`samtools idxstats`](https://github.com/samtools/samtools), [`samtools flagstats`](https://github.com/samtools/samtools), [`samtools stats`](https://github.com/samtools/samtools))\n\nd. Specific for calling modified base calling ([`Modkit`](https://github.com/nanoporetech/modkit), [`Fibertools`](https://github.com/fiberseq/fibertools-rs))\n\n**2i) Matched mode: small variant calling:**\n\na. Calling Germline SNPs ([`Clair3`](https://github.com/HKU-BAL/Clair3))\n\nb. Phasing and Haplotagging the SNPs in the normal and tumour BAM ([`LongPhase`](https://github.com/twolinin/longphase))\n\nc. Calling somatic SNVs ([`ClairS`](https://github.com/HKU-BAL/ClairS))\n\n**2ii) Tumour only mode: small variant calling:**\n\na. Calling Germline SNPs and somatic SNVs ([`ClairS-TO`](https://github.com/HKU-BAL/ClairS-TO))\n\nb. Phasing and Haplotagging germline SNPs in tumour BAM ([`LongPhase`](https://github.com/twolinin/longphase))\n\n**3) Large variant calling:**\n\na. Somatic structural variant calling ([`Severus`](https://github.com/KolmogorovLab/Severus))\n\nb. Copy number alterion calling; long read version of ([`ASCAT`](https://github.com/VanLoo-lab/ascat))\n\n**4) Annotation:**\n\na. Small variant annotation ([`VEP`](https://github.com/Ensembl/ensembl-vep))\n\nb. Structural variant annotation ([`VEP`](https://github.com/Ensembl/ensembl-vep))\n\n\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst prepare a samplesheet with your input data that looks as follows:\n\n```csv\nsample,bam_tumor,bam_normal,platform,sex,fiber\nsample1,tumour.bam,normal.bam,ont,female,n\nsample2,tumour.bam,,ont,female,y\nsample3,tumour.bam,,pb,male,n\nsample4,tumour.bam,normal.bam,pb,male,y\n```\n\nEach row represents a sample. The bam files should always be unaligned bam files. All fields except for `bam_normal` are required. If `bam_normal` is empty, the pipeline will run in tumour only mode. `platform` should be either `ont` or `pb` for Oxford Nanopore Sequencing or PacBio sequencing, respectively. `sex` refers to the biological sex of the sample and should be either `female` or `male`. Finally, `fiber` specifies whether your sample is Fiber-seq data or not and should have either `y` for Yes or `n` for No.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run IntGenomicsLab/lrsomatic \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\nMore detail is given in our [usage documentation](/docs/usage.md)\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\n## Credits\n\nIntGenomicsLab/lr_somatic was originally written by Luuk Harbers, Robert Forsyth, Alexandra Pan\u010d\u00edkov\u00e1, Marios Eftychiou, Ruben Cools, Laurens Lambrechts, and Jonas Demeulemeester.\n\n## Pipeline output\n\nThis pipeline produces a series of different output files. The main output is an aligned and phased tumour bam file. This bam file can be used by any typical downstream tool that uses bam files as input. Furthermore, we have sample-specific QC outputs from `cramino` (fastq), `cramino` (bam), `mosdepth`, `samtools` (stats/flagstat/idxstats), and optionally `fibertools`. Finally, we have a `multiqc` report from that combines the output from `mosdepth` and `samtools` into one html report.\n\nBesides QC and the aligned and phased bam file, we have output from (structural) variant and copy number callers, of which some are optional. The output from these variant callers can be found in their respective folders. For small and structural variant callers (`clairS`, `clairS-TO`, and `severus`) these will contain, among others, `vcf` files with called variants. For `ascat` these contain files with final copy number information and plots of the copy number profiles.\n\nExample output directory structure:\n\n```\n\u251c\u2500\u2500 Sample 1\n\u2502 \u251c\u2500\u2500 ascat\n\u2502 \u251c\u2500\u2500 bamfiles\n\u2502 \u251c\u2500\u2500 qc\n\u2502 \u2502 \u251c\u2500\u2500 tumor\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_aln\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_ubam\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 fibertoolsrs\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 mosdepth\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 samtools\n\u2502 \u251c\u2500\u2500 variants\n\u2502 \u2502 \u251c\u2500\u2500clairS-TO\n\u2502 \u2502 \u251c\u2500\u2500severus\n\u2502 \u251c\u2500\u2500 vep\n\u2502 \u2502 \u251c\u2500\u2500 germline\n\u2502 \u2502 \u251c\u2500\u2500 somatic\n\u2502 \u2502 \u251c\u2500\u2500 SVs\n\u2502\n\u251c\u2500\u2500 Sample 2\n\u2502 \u251c\u2500\u2500 ascat\n\u2502 \u251c\u2500\u2500 bamfiles\n\u2502 \u251c\u2500\u2500 qc\n\u2502 \u2502 \u251c\u2500\u2500 tumor\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_aln\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_ubam\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 fibertoolsrs\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 mosdepth\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 samtools\n\u2502 \u2502 \u251c\u2500\u2500 normal\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_aln\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_ubam\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 fibertoolsrs\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 mosdepth\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 samtools\n\u2502 \u251c\u2500\u2500 variants\n\u2502 \u2502 \u251c\u2500\u2500 clair3\n\u2502 \u2502 \u251c\u2500\u2500 clairS\n\u2502 \u2502 \u251c\u2500\u2500 severus\n\u2502 \u251c\u2500\u2500 vep\n\u2502 \u2502 \u251c\u2500\u2500 germline\n\u2502 \u2502 \u251c\u2500\u2500 somatic\n\u2502 \u2502 \u251c\u2500\u2500 SVs\n\u251c\u2500\u2500 pipeline_info\n```\n\nmore detail is given in our [output documentation](/docs/output.md)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\n## Citations\n\nIf you use IntGenomicsLab/lrsomatic for your analysis, please cite it using the following doi: [10.5281/zenodo.17751829](https://doi.org/10.5281/zenodo.17751829)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "description": "# IntGenomicsLab/lrsomatic\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/IntGenomicsLab/lrsomatic)\n[![GitHub Actions CI Status](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/nf-test.yml/badge.svg)](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/linting.yml/badge.svg)](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.17751829-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.17751829)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/IntGenomicsLab/lrsomatic)\n\n## Introduction\n\n**IntGenomicsLab/lrsomatic** is a robust bioinformatics pipeline designed for processing and analyzing **somatic DNA sequencing** data for long-read sequencing technologies from **Oxford Nanopore** and **PacBio**. It supports both canonical base DNA and modified base calling, including specialized applications such as **Fiber-seq**.\n\nThis **end-to-end pipeline** handles the entire workflow \u2014 **from raw read processing and alignment, to comprehensive somatic variant calling**, including single nucleotide variants, indels, structural variants, copy number alterations, and modified bases.\n\nIt can be run in both **matched tumour-normal** and **tumour-only mode**, offering flexibility depending on the users study design.\n\nDeveloped using **Nextflow DSL2**, it offers high portability and scalability across diverse computing environments. By leveraging Docker or Singularity containers, installation is streamlined and results are highly reproducible. Each process runs in an isolated container, simplifying dependency management and updates. Where applicable, pipeline components are sourced from **nf-core/modules**, promoting reuse, interoperability, and consistency within the broader Nextflow and nf-core ecosystems.\n\n## Pipeline summary\n\n![image](./assets/lrsomatic_1.0.png)\n\n**1) Pre-processing:**\n\na. Raw read QC ([`cramino`](https://github.com/wdecoster/cramino))\n\nb. Alignment to the reference genome ([`minimap2`](https://github.com/lh3/minimap2))\n\nc. Post alignment QC ([`cramino`](https://github.com/wdecoster/cramino), [`samtools idxstats`](https://github.com/samtools/samtools), [`samtools flagstats`](https://github.com/samtools/samtools), [`samtools stats`](https://github.com/samtools/samtools))\n\nd. Specific for calling modified base calling ([`Modkit`](https://github.com/nanoporetech/modkit), [`Fibertools`](https://github.com/fiberseq/fibertools-rs))\n\n**2i) Matched mode: small variant calling:**\n\na. Calling Germline SNPs ([`Clair3`](https://github.com/HKU-BAL/Clair3))\n\nb. Phasing and Haplotagging the SNPs in the normal and tumour BAM ([`LongPhase`](https://github.com/twolinin/longphase))\n\nc. Calling somatic SNVs ([`ClairS`](https://github.com/HKU-BAL/ClairS))\n\n**2ii) Tumour only mode: small variant calling:**\n\na. Calling Germline SNPs and somatic SNVs ([`ClairS-TO`](https://github.com/HKU-BAL/ClairS-TO))\n\nb. Phasing and Haplotagging germline SNPs in tumour BAM ([`LongPhase`](https://github.com/twolinin/longphase))\n\n**3) Large variant calling:**\n\na. Somatic structural variant calling ([`Severus`](https://github.com/KolmogorovLab/Severus))\n\nb. Copy number alterion calling; long read version of ([`ASCAT`](https://github.com/VanLoo-lab/ascat))\n\n**4) Annotation:**\n\na. Small variant annotation ([`VEP`](https://github.com/Ensembl/ensembl-vep))\n\nb. Structural variant annotation ([`VEP`](https://github.com/Ensembl/ensembl-vep))\n\n\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst prepare a samplesheet with your input data that looks as follows:\n\n```csv\nsample,bam_tumor,bam_normal,platform,sex,fiber\nsample1,tumour.bam,normal.bam,ont,female,n\nsample2,tumour.bam,,ont,female,y\nsample3,tumour.bam,,pb,male,n\nsample4,tumour.bam,normal.bam,pb,male,y\n```\n\nEach row represents a sample. The bam files should always be unaligned bam files. All fields except for `bam_normal` are required. If `bam_normal` is empty, the pipeline will run in tumour only mode. `platform` should be either `ont` or `pb` for Oxford Nanopore Sequencing or PacBio sequencing, respectively. `sex` refers to the biological sex of the sample and should be either `female` or `male`. Finally, `fiber` specifies whether your sample is Fiber-seq data or not and should have either `y` for Yes or `n` for No.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run IntGenomicsLab/lrsomatic \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\nMore detail is given in our [usage documentation](/docs/usage.md)\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\n## Credits\n\nIntGenomicsLab/lr_somatic was originally written by Luuk Harbers, Robert Forsyth, Alexandra Pan\u010d\u00edkov\u00e1, Marios Eftychiou, Ruben Cools, Laurens Lambrechts, and Jonas Demeulemeester.\n\n## Pipeline output\n\nThis pipeline produces a series of different output files. The main output is an aligned and phased tumour bam file. This bam file can be used by any typical downstream tool that uses bam files as input. Furthermore, we have sample-specific QC outputs from `cramino` (fastq), `cramino` (bam), `mosdepth`, `samtools` (stats/flagstat/idxstats), and optionally `fibertools`. Finally, we have a `multiqc` report from that combines the output from `mosdepth` and `samtools` into one html report.\n\nBesides QC and the aligned and phased bam file, we have output from (structural) variant and copy number callers, of which some are optional. The output from these variant callers can be found in their respective folders. For small and structural variant callers (`clairS`, `clairS-TO`, and `severus`) these will contain, among others, `vcf` files with called variants. For `ascat` these contain files with final copy number information and plots of the copy number profiles.\n\nExample output directory structure:\n\n```\n\u251c\u2500\u2500 Sample 1\n\u2502 \u251c\u2500\u2500 ascat\n\u2502 \u251c\u2500\u2500 bamfiles\n\u2502 \u251c\u2500\u2500 qc\n\u2502 \u2502 \u251c\u2500\u2500 tumor\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_aln\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_ubam\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 fibertoolsrs\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 mosdepth\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 samtools\n\u2502 \u251c\u2500\u2500 variants\n\u2502 \u2502 \u251c\u2500\u2500clairS-TO\n\u2502 \u2502 \u251c\u2500\u2500severus\n\u2502 \u251c\u2500\u2500 vep\n\u2502 \u2502 \u251c\u2500\u2500 germline\n\u2502 \u2502 \u251c\u2500\u2500 somatic\n\u2502 \u2502 \u251c\u2500\u2500 SVs\n\u2502\n\u251c\u2500\u2500 Sample 2\n\u2502 \u251c\u2500\u2500 ascat\n\u2502 \u251c\u2500\u2500 bamfiles\n\u2502 \u251c\u2500\u2500 qc\n\u2502 \u2502 \u251c\u2500\u2500 tumor\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_aln\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_ubam\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 fibertoolsrs\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 mosdepth\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 samtools\n\u2502 \u2502 \u251c\u2500\u2500 normal\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_aln\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_ubam\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 fibertoolsrs\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 mosdepth\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 samtools\n\u2502 \u251c\u2500\u2500 variants\n\u2502 \u2502 \u251c\u2500\u2500 clair3\n\u2502 \u2502 \u251c\u2500\u2500 clairS\n\u2502 \u2502 \u251c\u2500\u2500 severus\n\u2502 \u251c\u2500\u2500 vep\n\u2502 \u2502 \u251c\u2500\u2500 germline\n\u2502 \u2502 \u251c\u2500\u2500 somatic\n\u2502 \u2502 \u251c\u2500\u2500 SVs\n\u251c\u2500\u2500 pipeline_info\n```\n\nmore detail is given in our [output documentation](/docs/output.md)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\n## Citations\n\nIf you use IntGenomicsLab/lrsomatic for your analysis, please cite it using the following doi: [10.5281/zenodo.17751829](https://doi.org/10.5281/zenodo.17751829)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" diff --git a/subworkflows/local/prepare_reference_files.nf b/subworkflows/local/prepare_reference_files.nf index d79448c..6bd1195 100644 --- a/subworkflows/local/prepare_reference_files.nf +++ b/subworkflows/local/prepare_reference_files.nf @@ -39,9 +39,11 @@ workflow PREPARE_REFERENCE_FILES { ch_prepared_fasta = [ [:], fasta ] } + // if clair3 model is specified, then download that + // otherwise use info in bam header and download that basecall_meta.map { meta, basecall_model_meta, kinetics_meta -> - def id_new = basecall_model_meta ?: meta.clair3_model + def id_new = basecall_model_meta ? clair3_modelMap.get(basecall_model_meta) : basecall_model_meta def meta_new = [id: id_new] def model = (!meta.clair3_model || meta.clair3_model.toString().trim() in ['', '[]']) ? clair3_modelMap.get(basecall_model_meta) : meta.clair3_model def download_prefix = ( basecall_model_meta == 'hifi_revio' ? "https://www.bio8.cs.hku.hk/clair3/clair3_models/" : "https://cdn.oxfordnanoportal.com/software/analysis/models/clair3" ) @@ -49,13 +51,13 @@ workflow PREPARE_REFERENCE_FILES { return [ meta_new, url ] } .unique() - .set{ model_urls } + .set{ clair3_model_urls } // // MODULE: Download model // - WGET ( model_urls ) + WGET ( clair3_model_urls ) ch_versions = ch_versions.mix(WGET.out.versions) @@ -69,7 +71,7 @@ workflow PREPARE_REFERENCE_FILES { ch_versions = ch_versions.mix(UNTAR.out.versions) - UNTAR.out.untar.set { downloaded_model_files } + UNTAR.out.untar.set { downloaded_clair3_models } // // MODULE: Index the fasta @@ -126,7 +128,7 @@ workflow PREPARE_REFERENCE_FILES { loci_files gc_file rt_file - downloaded_model_files + downloaded_clair3_models versions = ch_versions } diff --git a/subworkflows/local/tumor_normal_happhase.nf b/subworkflows/local/tumor_normal_happhase.nf index 922da7c..81d08f9 100644 --- a/subworkflows/local/tumor_normal_happhase.nf +++ b/subworkflows/local/tumor_normal_happhase.nf @@ -13,7 +13,7 @@ workflow TUMOR_NORMAL_HAPPHASE { fai clair3_modelMap clairs_modelMap - downloaded_model_files + downloaded_clair3_models main: @@ -33,34 +33,42 @@ workflow TUMOR_NORMAL_HAPPHASE { // Get normal bams and add platform/model info for Clair3 usage // remove type from so that information can be merged easier later - downloaded_model_files + downloaded_clair3_models .map{ meta, file -> - def basecall_model = meta.id - return [basecall_model, meta, file] + def clair3_model = meta.id + return [meta, clair3_model, file] } - .set{downloaded_model_files} + .set{downloaded_clair3_models} - mixed_bams.normal + mixed_bams.normal .map{ meta, bam, bai -> - def basecall_model = (!meta.clair3_model || meta.clair3_model.toString().trim() in ['', '[]']) ? meta.basecall_model : meta.clair3_model def new_meta = [id: meta.id, paired_data: meta.paired_data, platform: meta.platform, sex: meta.sex, fiber: meta.fiber, - basecall_model: basecall_model, - clairS_model: meta.clairS_model] - return [ basecall_model, new_meta, bam, bai ] + clair3_model: meta.clair3_model, + clairS_model: meta.clairS_model, + clairSTO_model: meta.clairSTO_model, + kinetics: meta.kinetics] + return [ new_meta, meta.clair3_model, bam, bai ] } .set { normal_bams_model } normal_bams_model - .combine(downloaded_model_files,by:0) - .map{ basecall_model, meta, bam, bai, meta2, model -> + .combine(downloaded_clair3_models,by:1) + .map {clair3_model, meta_bam, bam, bai, meta_model, model -> + def platform = (meta_bam.platform == 'pb') ? 'hifi' : meta_bam.platform + return [meta_bam, bam, bai, model, platform] + } + .set{ normal_bams } + + /* + .map{ basecall_model, meta, bam, bai, meta2, model -> def platform = (meta.platform == "pb") ? "hifi" : "ont" return [meta, bam, bai, model, platform] } - .set{ normal_bams } + */ // normal_bams -> meta: [id, paired_data, platform, sex, fiber, basecall_model] // bam: list of concatenated aligned bams @@ -73,14 +81,15 @@ workflow TUMOR_NORMAL_HAPPHASE { // remove type from so that information can be merged easier later mixed_bams.tumor .map{ meta, bam, bai -> - def basecall_model = (!meta.clair3_model || meta.clair3_model.toString().trim() in ['', '[]']) ? meta.basecall_model : meta.clair3_model def new_meta = [id: meta.id, paired_data: meta.paired_data, platform: meta.platform, sex: meta.sex, fiber: meta.fiber, - basecall_model: basecall_model, - clairS_model: meta.clairS_model] + clair3_model: meta.clair3_model, + clairS_model: meta.clairS_model, + clairSTO_model: meta.clairSTO_model, + kinetics: meta.kinetics] return[new_meta, bam, bai] } .set{ tumor_bams } @@ -162,6 +171,7 @@ workflow TUMOR_NORMAL_HAPPHASE { // Add phased vcf to tumour bams and type information // mix with the normal bams + tumor_bams .join(LONGPHASE_PHASE.out.snv_vcf) .map { meta, bam, bai, vcf -> @@ -211,13 +221,11 @@ workflow TUMOR_NORMAL_HAPPHASE { ) ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions) - // Add index to channel mixed_bams_vcf .join(mixed_hapbams) .join(SAMTOOLS_INDEX.out.bai) .set{ mixed_hapbams } - // mixed_hapbams -> meta: [id, paired_data, platform, sex, type, fiber, basecall_model] // bams: haplotagged aligned bams // bais: indexes for bam files @@ -230,8 +238,10 @@ workflow TUMOR_NORMAL_HAPPHASE { platform: meta.platform, sex: meta.sex, fiber: meta.fiber, - basecall_model: meta.basecall_model, - clairS_model: meta.clairS_model] + clair3_model: meta.clair3_model, + clairS_model: meta.clairS_model, + clairSTO_model: meta.clairSTO_model, + kinetics: meta.kinetics] return[new_meta, [[type: meta.type], hapbam], [[type: meta.type], hapbai]] } .groupTuple(size: 2) @@ -245,7 +255,6 @@ workflow TUMOR_NORMAL_HAPPHASE { } .join(LONGPHASE_PHASE.out.snv_vcf) .set{tumor_normal_severus} - // tumor_normal_severus -> meta: [id, paired_data, platform, sex, fiber, basecall_model] // tumor_bam: haplotagged aligned bam for tumor // tumor_bai: indexes for tumor bam files @@ -256,11 +265,9 @@ workflow TUMOR_NORMAL_HAPPHASE { // Get ClairS input channel tumor_normal_severus .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, vcf -> - def model = (!meta.clairS_model || meta.clairS_model.toString().trim() in ['', '[]']) ? clairs_modelMap.get(meta.basecall_model.toString().trim()) : meta.clairS_model - return[meta , tumor_bam, tumor_bai, normal_bam, normal_bai, model] + return[meta , tumor_bam, tumor_bai, normal_bam, normal_bai, meta.clairS_model] } .set { clairs_input } - // // MODULE: CLAIRS // diff --git a/subworkflows/local/tumor_only_happhase.nf b/subworkflows/local/tumor_only_happhase.nf index 2b5b351..16b8ce9 100644 --- a/subworkflows/local/tumor_only_happhase.nf +++ b/subworkflows/local/tumor_only_happhase.nf @@ -25,8 +25,7 @@ workflow TUMOR_ONLY_HAPPHASE { tumor_bams .map{ meta, bam, bai -> - def clairSTO_model = (!meta.clairSTO_model || meta.clairSTO_model.toString().trim() in ['', '[]']) ? clairSTO_modelMap.get(meta.basecall_model.toString().trim()) : meta.clairSTO_model - return [meta, bam, bai, clairSTO_model] + return [meta, bam, bai, meta.clairSTO_model] } .set{ tumor_bams } @@ -168,7 +167,10 @@ workflow TUMOR_ONLY_HAPPHASE { platform: meta.platform, sex: meta.sex, fiber: meta.fiber, - basecall_model: meta.basecall_model] + clair3_model: meta.clair3_model, + clairS_model: meta.clairS_model, + clairSTO_model: meta.clairSTO_model, + kinetics: meta.kinetics] return [new_meta, hap_bam, hap_bai, [], [], vcf] } .set{ tumor_only_severus } diff --git a/tests/default.nf.test b/tests/default.nf.test index d4daad1..160dc01 100644 --- a/tests/default.nf.test +++ b/tests/default.nf.test @@ -19,6 +19,30 @@ nextflow_pipeline { def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') assertAll( { assert workflow.success}, + { //files exist + assert file("$launchDir/output/sample1/variants/clair3/merge_output.vcf.gz").exists() + assert file("$launchDir/output/sample1/variants/clair3/merge_output.vcf.gz.tbi").exists() + assert file("$launchDir/output/sample1/variants/clairs/indel.vcf.gz").exists() + assert file("$launchDir/output/sample1/variants/clairs/indel.vcf.gz.tbi").exists() + assert file("$launchDir/output/sample1/variants/clairs/snvs.vcf.gz").exists() + assert file("$launchDir/output/sample1/variants/clairs/snvs.vcf.gz.tbi").exists() + assert file("$launchDir/output/sample1/variants/severus/somatic_SVs/severus_somatic.vcf.gz").exists() + assert file("$launchDir/output/sample2/variants/clair3/merge_output.vcf.gz").exists() + assert file("$launchDir/output/sample2/variants/clair3/merge_output.vcf.gz.tbi").exists() + assert file("$launchDir/output/sample2/variants/clairs/indel.vcf.gz").exists() + assert file("$launchDir/output/sample2/variants/clairs/indel.vcf.gz.tbi").exists() + assert file("$launchDir/output/sample2/variants/clairs/snvs.vcf.gz").exists() + assert file("$launchDir/output/sample2/variants/clairs/snvs.vcf.gz.tbi").exists() + assert file("$launchDir/output/sample2/variants/severus/somatic_SVs/severus_somatic.vcf.gz").exists() + assert file("$launchDir/output/sample1/bamfiles/sample1_normal.bam").exists() + assert file("$launchDir/output/sample1/bamfiles/sample1_tumor.bam").exists() + assert file("$launchDir/output/sample1/bamfiles/sample1_normal.bam.bai").exists() + assert file("$launchDir/output/sample1/bamfiles/sample1_tumor.bam.bai").exists() + assert file("$launchDir/output/sample3/variants/clairsto/indel.vcf.gz").exists() + assert file("$launchDir/output/sample3/variants/clairsto/snv.vcf.gz").exists() + assert file("$launchDir/output/sample3/variants/clairsto/somatic.vcf.gz").exists() + assert file("$launchDir/output/sample3/variants/clairsto/germline.vcf.gz").exists() + }, { assert snapshot( // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions removeNextflowVersion("$outputDir/pipeline_info/lrsomatic_software_mqc_versions.yml"), diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 9640193..6a1cd3c 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -309,10 +309,10 @@ "sample1.stats:md5,da6ea076dd90b4e35ab2e23b8e6ead25", "breakpoints_double.csv:md5,57e4f0d5509db44179e7c5044c6bc259", "read_qual.txt:md5,1ad9d1900f8dcb291c97adc65c9d341c", - "sample2_normal.bam:md5,7e6cdc6c51235a205548be160d798d38", - "sample2_normal.bam.bai:md5,7ad774be4cf80ae968f94d760bddf3c8", - "sample2_tumor.bam:md5,9938db44490835ec0e11069b4da37547", - "sample2_tumor.bam.bai:md5,202852a62fc5bf4cf1c196676068606a", + "sample2_normal.bam:md5,554b89692e84b9ddd0615649e2b15820", + "sample2_normal.bam.bai:md5,af193f1922d90b8741212d2bf690c418", + "sample2_tumor.bam:md5,26c4e52c12aa0e874fe52ae3b729beba", + "sample2_tumor.bam.bai:md5,e9e64e13328aa3621e7976f8e3f29a78", "sample2.mosdepth.global.dist.txt:md5,6cdc97a81a603db702cb5a113b8bc62a", "sample2.mosdepth.summary.txt:md5,864370930ec1d695d942f4960bcf8fc6", "sample2.flagstat:md5,cce0bb7ca79e14d8369ccc714adf4be3", @@ -323,7 +323,7 @@ "sample2.flagstat:md5,83e7d7d922941691d2b023f0bd9655aa", "sample2.idxstats:md5,fe8a5d1263481ea7902d575b4d95f655", "sample2.stats:md5,defe74842396209b6cff4b32994287c7", - "breakpoints_double.csv:md5,c5a59c9ea2486f7bb9d5e40fea8f916d", + "breakpoints_double.csv:md5,b71bba578c126b9217765d854b21028a", "read_qual.txt:md5,27edf87814aec6fa18546c8606aae4ed", "sample3_tumor.bam:md5,2308beb1b4be1f0e1d6c8e52bd4f9266", "sample3_tumor.bam.bai:md5,840eb3ad5ed3216a97c6a58563d4dcb1", @@ -337,9 +337,9 @@ ] ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.0" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2026-01-07T12:24:56.969201207" + "timestamp": "2026-01-09T17:08:56.493545818" } } \ No newline at end of file diff --git a/workflows/lrsomatic.nf b/workflows/lrsomatic.nf index b33d50f..a7eb186 100644 --- a/workflows/lrsomatic.nf +++ b/workflows/lrsomatic.nf @@ -65,7 +65,7 @@ workflow LRSOMATIC { 'dna_r10.4.1_e8.2_400bps_sup@v4.2.0': 'r1041_e82_400bps_sup_v420', 'dna_r10.4.1_e8.2_400bps_sup@v4.1.0': 'r1041_e82_400bps_sup_v410', 'dna_r10.4.1_e8.2_260bps_sup@v4.0.0': 'r1041_e82_260bps_sup_v400', - 'hifi_revio' : 'hifi_revio' + 'hifi_revio' : 'hifi' ] def clairs_modelMap = [ @@ -75,7 +75,7 @@ workflow LRSOMATIC { 'dna_r10.4.1_e8.2_400bps_sup@v4.3.0': 'ont_r10_dorado_sup_5khz_ssrs', 'dna_r10.4.1_e8.2_400bps_sup@v5.0.0': 'ont_r10_dorado_sup_5khz_ssrs', 'dna_r10.4.1_e8.2_400bps_sup@v5.2.0': 'ont_r10_dorado_sup_5khz_ssrs', - 'hifi_revio' : 'hifi_revio_ss' + 'hifi_revio' : 'hifi_revio_ssrs' ] @@ -112,7 +112,19 @@ workflow LRSOMATIC { ch_samplesheet .join(basecall_meta) .map { meta, bam, basecall_model_meta, kinetics_meta -> - def meta_new = meta + [ basecall_model: basecall_model_meta, kinetics: kinetics_meta] + def chosen_clair3_model = meta.clair3_model ?: clair3_modelMap.get(basecall_model_meta) + def chosen_clairSTO_model = meta.clairSTO_model ?: clairs_modelMap.get(basecall_model_meta) + def chosen_clairS_model = meta.clairS_model ?: clairs_modelMap.get(basecall_model_meta) + def meta_new =[ id: meta.id, + paired_data: meta.paired_data, + type: meta.type, + platform: meta.platform, + sex: meta.sex, + fiber: meta.fiber, + clair3_model: chosen_clair3_model, + clairS_model: chosen_clairS_model, + clairSTO_model: chosen_clairSTO_model, + kinetics: kinetics_meta] return[ meta_new, bam ] } .groupTuple() @@ -122,6 +134,22 @@ workflow LRSOMATIC { .set{ch_samplesheet} + // + // SUBWORKFLOW: PREPARE_REFERENCE_FILES + // + + PREPARE_REFERENCE_FILES ( + params.fasta, + params.ascat_allele_files, + params.ascat_loci_files, + params.ascat_gc_file, + params.ascat_rt_file, + basecall_meta, + clair3_modelMap + ) + + downloaded_clair3_models = PREPARE_REFERENCE_FILES.out.downloaded_clair3_models + // ch_samplesheet -> meta: [id, paired_data, platform, sex, type, fiber, basecall_model] // bam: list of unaligned bams @@ -142,7 +170,6 @@ workflow LRSOMATIC { .mix ( ch_split.single ) .set { ch_cat_ubams } - // ch_cat_ubams -> meta: [id, paired_data, platform, sex, type, fiber, basecall_model] // bam: list of concatenated unaligned bams @@ -156,20 +183,6 @@ workflow LRSOMATIC { CRAMINO_PRE ( ch_cat_ubams ) } - // - // SUBWORKFLOW: PREPARE_REFERENCE_FILES - // - - PREPARE_REFERENCE_FILES ( - params.fasta, - params.ascat_allele_files, - params.ascat_loci_files, - params.ascat_gc_file, - params.ascat_rt_file, - basecall_meta, - clair3_modelMap - ) - vep_cache = channel.empty() if (!params.skip_vep) { @@ -201,8 +214,6 @@ workflow LRSOMATIC { ch_fasta = PREPARE_REFERENCE_FILES.out.prepped_fasta ch_fai = PREPARE_REFERENCE_FILES.out.prepped_fai - downloaded_model_files = PREPARE_REFERENCE_FILES.out.downloaded_model_files - // ASCAT files allele_files = PREPARE_REFERENCE_FILES.out.allele_files loci_files = PREPARE_REFERENCE_FILES.out.loci_files @@ -353,7 +364,7 @@ workflow LRSOMATIC { ch_fai, clair3_modelMap, clairs_modelMap, - downloaded_model_files + downloaded_clair3_models ) ch_versions = ch_versions.mix(TUMOR_NORMAL_HAPPHASE.out.versions)