From 1ee6ce380db772c4ff40bbe365e6ebd82cbf5d22 Mon Sep 17 00:00:00 2001 From: Niklas Schandry Date: Wed, 19 Mar 2025 09:26:32 +0100 Subject: [PATCH 01/34] version bump to 1.1.0dev --- .nf-core.yml | 2 +- CHANGELOG.md | 12 +++++++++++- nextflow.config | 2 +- ro-crate-metadata.json | 36 ++++++++++++++++++------------------ 4 files changed, 31 insertions(+), 21 deletions(-) diff --git a/.nf-core.yml b/.nf-core.yml index f7103eb4..b1854eb2 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -20,4 +20,4 @@ template: skip_features: - multiqc - igenomes - version: 1.0.1 + version: 1.1.0dev diff --git a/CHANGELOG.md b/CHANGELOG.md index b6879797..0c551b5c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,17 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.0.1 'Aluminium Pigeon' - [2025-03.18] +## v1.1.0 + +### `Added` + +### `Fixed` + +### `Dependencies` + +### `Deprecated` + +## v1.0.1 'Aluminium Pigeon' - [2025-03-19] Bugfix release diff --git a/nextflow.config b/nextflow.config index 3293c406..1b4c472d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -290,7 +290,7 @@ manifest { description = """Assemble genomes from long ONT or pacbio HiFi reads""" mainScript = 'main.nf' nextflowVersion = '!>=24.04.2' - version = '1.0.1' + version = '1.1.0dev' doi = '10.5281/zenodo.14986998' } diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index bb89ad4b..48261909 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -21,8 +21,8 @@ { "@id": "./", "@type": "Dataset", - "creativeWorkStatus": "Stable", - "datePublished": "2025-03-18T13:18:08+00:00", + "creativeWorkStatus": "InProgress", + "datePublished": "2025-03-19T08:25:11+00:00", "description": "

\n \n \n \"nf-core/genomeassembler\"\n \n

\n\n[![GitHub Actions CI Status](https://github.com/nf-core/genomeassembler/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/genomeassembler/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.14986998-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.14986998)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/genomeassembler)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23genomeassembler-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/genomeassembler)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/genomeassembler** is a bioinformatics pipeline that carries out genome assembly, polishing and scaffolding from long reads (ONT or pacbio). Assembly can be done via `flye` or `hifiasm`, polishing can be carried out with `medaka` (ONT), or `pilon` (requires short-reads), and scaffolding can be done using `LINKS`, `Longstitch`, or `RagTag` (if a reference is available). Quality control includes `BUSCO`, `QUAST` and `merqury` (requires short-reads).\nCurrently, this pipeline does not implement phasing of polyploid genomes or HiC scaffolding.\n\n\n \n \"nf-core/genomeassembler\"\n\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,ontreads,hifireads,ref_fasta,ref_gff,shortread_F,shortread_R,paired\nsampleName,ontreads.fa.gz,hifireads.fa.gz,assembly.fasta.gz,reference.fasta,reference.gff,short_F1.fastq,short_F2.fastq,true\n```\n\nEach row represents one genome to be assembled. `sample` should contain the name of the sample, `ontreads` should contain a path to ONT reads (fastq.gz), `hifireads` a path to HiFi reads (fastq.gz), `ref_fasta` and `ref_gff` contain reference genome fasta and annotations. `shortread_F` and `shortread_R` contain paths to short-read data, `paired` indicates if short-reads are paired. Columns can be omitted if they contain no data, with the exception of `shortread_R`, which needs to be present if `shortread_F` is there, even if it is empty.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/genomeassembler \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/genomeassembler/usage) and the [parameter documentation](https://nf-co.re/genomeassembler/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/genomeassembler/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/genomeassembler/output).\n\n## Credits\n\nnf-core/genomeassembler was originally written by [Niklas Schandry](https://github.com/nschan), of the Faculty of Biology of the Ludwig-Maximilians University (LMU) in Munich, Germany.\n\nI thank the following people for their extensive assistance and constructive reviews during the development of this pipeline:\n\n- [Mahesh Binzer-Panchal](https://github.com/mahesh-panchal)\n- [Matthias H\u00f6rtenhuber](https://github.com/mashehu)\n- [Louis Le N\u00e9zet](https://github.com/LouisLeNezet)\n- [J\u00falia Mir Pedrol](https://github.com/mirpedrol)\n- [Daniel Straub](https://github.com/d4straub)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#genomeassembler` channel](https://nfcore.slack.com/channels/genomeassembler) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/genomeassembler for your analysis, please cite it using the following doi: [10.5281/zenodo.14986998](https://doi.org/10.5281/zenodo.14986998)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { @@ -105,7 +105,7 @@ }, "mentions": [ { - "@id": "#b6b9b72e-48fc-4472-a0e9-8030921b5a17" + "@id": "#a74a52d1-f86a-42f5-b7ef-1224396e1e3c" } ], "name": "nf-core/genomeassembler" @@ -134,17 +134,17 @@ ], "creator": [ { - "@id": "https://orcid.org/0000-0002-7860-3560" + "@id": "https://orcid.org/0000-0003-3099-7860" }, { - "@id": "https://orcid.org/0000-0003-1675-0677" + "@id": "https://orcid.org/0000-0002-7860-3560" }, { - "@id": "https://orcid.org/0000-0003-3099-7860" + "@id": "https://orcid.org/0000-0003-1675-0677" } ], "dateCreated": "", - "dateModified": "2025-03-18T14:18:08Z", + "dateModified": "2025-03-19T09:25:11Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", "keywords": [ "nf-core", @@ -170,10 +170,10 @@ }, "url": [ "https://github.com/nf-core/genomeassembler", - "https://nf-co.re/genomeassembler/1.0.1/" + "https://nf-co.re/genomeassembler/dev/" ], "version": [ - "1.0.1" + "1.1.0dev" ] }, { @@ -189,11 +189,11 @@ "version": "!>=24.04.2" }, { - "@id": "#b6b9b72e-48fc-4472-a0e9-8030921b5a17", + "@id": "#a74a52d1-f86a-42f5-b7ef-1224396e1e3c", "@type": "TestSuite", "instance": [ { - "@id": "#10b3c45d-045e-40b5-898c-73a2f6edcb1b" + "@id": "#9c9d46ea-11b2-4050-9976-134db40769ea" } ], "mainEntity": { @@ -202,7 +202,7 @@ "name": "Test suite for nf-core/genomeassembler" }, { - "@id": "#10b3c45d-045e-40b5-898c-73a2f6edcb1b", + "@id": "#9c9d46ea-11b2-4050-9976-134db40769ea", "@type": "TestInstance", "name": "GitHub Actions workflow for testing nf-core/genomeassembler", "resource": "repos/nf-core/genomeassembler/actions/workflows/ci.yml", @@ -340,6 +340,12 @@ "name": "nf-core", "url": "https://nf-co.re/" }, + { + "@id": "https://orcid.org/0000-0003-3099-7860", + "@type": "Person", + "email": "niklas@bio.lmu.de", + "name": "Niklas Schandry" + }, { "@id": "https://orcid.org/0000-0002-7860-3560", "@type": "Person", @@ -351,12 +357,6 @@ "@type": "Person", "email": "mahesh.binzer-panchal@nbis.se", "name": "Mahesh Binzer-Panchal" - }, - { - "@id": "https://orcid.org/0000-0003-3099-7860", - "@type": "Person", - "email": "niklas@bio.lmu.de", - "name": "Niklas Schandry" } ] } \ No newline at end of file From cf8d283dc721d9013176dd26d34a6328ed7d2a1b Mon Sep 17 00:00:00 2001 From: Niklas Schandry Date: Mon, 24 Mar 2025 16:10:20 +0100 Subject: [PATCH 02/34] refactor qc into subworkflow --- .nf-core.yml | 2 +- CHANGELOG.md | 12 +++- nextflow.config | 2 +- ro-crate-metadata.json | 36 +++++------ subworkflows/local/assemble/main.nf | 49 +++------------ .../polishing/medaka/polish_medaka/main.nf | 41 ++----------- .../polishing/pilon/polish_pilon/main.nf | 44 ++----------- subworkflows/local/qc/main.nf | 61 +++++++++++++++++++ subworkflows/local/scaffolding/links/main.nf | 45 ++------------ .../local/scaffolding/longstitch/main.nf | 45 +++----------- subworkflows/local/scaffolding/ragtag/main.nf | 43 ++----------- 11 files changed, 130 insertions(+), 250 deletions(-) create mode 100644 subworkflows/local/qc/main.nf diff --git a/.nf-core.yml b/.nf-core.yml index f7103eb4..b1854eb2 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -20,4 +20,4 @@ template: skip_features: - multiqc - igenomes - version: 1.0.1 + version: 1.1.0dev diff --git a/CHANGELOG.md b/CHANGELOG.md index b6879797..0c551b5c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,17 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.0.1 'Aluminium Pigeon' - [2025-03.18] +## v1.1.0 + +### `Added` + +### `Fixed` + +### `Dependencies` + +### `Deprecated` + +## v1.0.1 'Aluminium Pigeon' - [2025-03-19] Bugfix release diff --git a/nextflow.config b/nextflow.config index 3293c406..1b4c472d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -290,7 +290,7 @@ manifest { description = """Assemble genomes from long ONT or pacbio HiFi reads""" mainScript = 'main.nf' nextflowVersion = '!>=24.04.2' - version = '1.0.1' + version = '1.1.0dev' doi = '10.5281/zenodo.14986998' } diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index bb89ad4b..48261909 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -21,8 +21,8 @@ { "@id": "./", "@type": "Dataset", - "creativeWorkStatus": "Stable", - "datePublished": "2025-03-18T13:18:08+00:00", + "creativeWorkStatus": "InProgress", + "datePublished": "2025-03-19T08:25:11+00:00", "description": "

\n \n \n \"nf-core/genomeassembler\"\n \n

\n\n[![GitHub Actions CI Status](https://github.com/nf-core/genomeassembler/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/genomeassembler/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.14986998-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.14986998)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/genomeassembler)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23genomeassembler-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/genomeassembler)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/genomeassembler** is a bioinformatics pipeline that carries out genome assembly, polishing and scaffolding from long reads (ONT or pacbio). Assembly can be done via `flye` or `hifiasm`, polishing can be carried out with `medaka` (ONT), or `pilon` (requires short-reads), and scaffolding can be done using `LINKS`, `Longstitch`, or `RagTag` (if a reference is available). Quality control includes `BUSCO`, `QUAST` and `merqury` (requires short-reads).\nCurrently, this pipeline does not implement phasing of polyploid genomes or HiC scaffolding.\n\n\n \n \"nf-core/genomeassembler\"\n\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,ontreads,hifireads,ref_fasta,ref_gff,shortread_F,shortread_R,paired\nsampleName,ontreads.fa.gz,hifireads.fa.gz,assembly.fasta.gz,reference.fasta,reference.gff,short_F1.fastq,short_F2.fastq,true\n```\n\nEach row represents one genome to be assembled. `sample` should contain the name of the sample, `ontreads` should contain a path to ONT reads (fastq.gz), `hifireads` a path to HiFi reads (fastq.gz), `ref_fasta` and `ref_gff` contain reference genome fasta and annotations. `shortread_F` and `shortread_R` contain paths to short-read data, `paired` indicates if short-reads are paired. Columns can be omitted if they contain no data, with the exception of `shortread_R`, which needs to be present if `shortread_F` is there, even if it is empty.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/genomeassembler \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/genomeassembler/usage) and the [parameter documentation](https://nf-co.re/genomeassembler/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/genomeassembler/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/genomeassembler/output).\n\n## Credits\n\nnf-core/genomeassembler was originally written by [Niklas Schandry](https://github.com/nschan), of the Faculty of Biology of the Ludwig-Maximilians University (LMU) in Munich, Germany.\n\nI thank the following people for their extensive assistance and constructive reviews during the development of this pipeline:\n\n- [Mahesh Binzer-Panchal](https://github.com/mahesh-panchal)\n- [Matthias H\u00f6rtenhuber](https://github.com/mashehu)\n- [Louis Le N\u00e9zet](https://github.com/LouisLeNezet)\n- [J\u00falia Mir Pedrol](https://github.com/mirpedrol)\n- [Daniel Straub](https://github.com/d4straub)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#genomeassembler` channel](https://nfcore.slack.com/channels/genomeassembler) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/genomeassembler for your analysis, please cite it using the following doi: [10.5281/zenodo.14986998](https://doi.org/10.5281/zenodo.14986998)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { @@ -105,7 +105,7 @@ }, "mentions": [ { - "@id": "#b6b9b72e-48fc-4472-a0e9-8030921b5a17" + "@id": "#a74a52d1-f86a-42f5-b7ef-1224396e1e3c" } ], "name": "nf-core/genomeassembler" @@ -134,17 +134,17 @@ ], "creator": [ { - "@id": "https://orcid.org/0000-0002-7860-3560" + "@id": "https://orcid.org/0000-0003-3099-7860" }, { - "@id": "https://orcid.org/0000-0003-1675-0677" + "@id": "https://orcid.org/0000-0002-7860-3560" }, { - "@id": "https://orcid.org/0000-0003-3099-7860" + "@id": "https://orcid.org/0000-0003-1675-0677" } ], "dateCreated": "", - "dateModified": "2025-03-18T14:18:08Z", + "dateModified": "2025-03-19T09:25:11Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", "keywords": [ "nf-core", @@ -170,10 +170,10 @@ }, "url": [ "https://github.com/nf-core/genomeassembler", - "https://nf-co.re/genomeassembler/1.0.1/" + "https://nf-co.re/genomeassembler/dev/" ], "version": [ - "1.0.1" + "1.1.0dev" ] }, { @@ -189,11 +189,11 @@ "version": "!>=24.04.2" }, { - "@id": "#b6b9b72e-48fc-4472-a0e9-8030921b5a17", + "@id": "#a74a52d1-f86a-42f5-b7ef-1224396e1e3c", "@type": "TestSuite", "instance": [ { - "@id": "#10b3c45d-045e-40b5-898c-73a2f6edcb1b" + "@id": "#9c9d46ea-11b2-4050-9976-134db40769ea" } ], "mainEntity": { @@ -202,7 +202,7 @@ "name": "Test suite for nf-core/genomeassembler" }, { - "@id": "#10b3c45d-045e-40b5-898c-73a2f6edcb1b", + "@id": "#9c9d46ea-11b2-4050-9976-134db40769ea", "@type": "TestInstance", "name": "GitHub Actions workflow for testing nf-core/genomeassembler", "resource": "repos/nf-core/genomeassembler/actions/workflows/ci.yml", @@ -340,6 +340,12 @@ "name": "nf-core", "url": "https://nf-co.re/" }, + { + "@id": "https://orcid.org/0000-0003-3099-7860", + "@type": "Person", + "email": "niklas@bio.lmu.de", + "name": "Niklas Schandry" + }, { "@id": "https://orcid.org/0000-0002-7860-3560", "@type": "Person", @@ -351,12 +357,6 @@ "@type": "Person", "email": "mahesh.binzer-panchal@nbis.se", "name": "Mahesh Binzer-Panchal" - }, - { - "@id": "https://orcid.org/0000-0003-3099-7860", - "@type": "Person", - "email": "niklas@bio.lmu.de", - "name": "Niklas Schandry" } ] } \ No newline at end of file diff --git a/subworkflows/local/assemble/main.nf b/subworkflows/local/assemble/main.nf index bd7c0eef..6f700d6a 100644 --- a/subworkflows/local/assemble/main.nf +++ b/subworkflows/local/assemble/main.nf @@ -2,13 +2,10 @@ include { FLYE } from '../../../modules/nf-core/flye/main' include { HIFIASM } from '../../../modules/nf-core/hifiasm/main' include { HIFIASM as HIFIASM_ONT } from '../../../modules/nf-core/hifiasm/main' include { GFA_2_FA } from '../../../modules/local/gfa2fa/main' -include { MAP_TO_ASSEMBLY } from '../mapping/map_to_assembly/main' include { MAP_TO_REF } from '../mapping/map_to_ref/main' -include { RUN_QUAST } from '../qc/quast/main' -include { RUN_BUSCO } from '../qc/busco/main' -include { MERQURY_QC } from '../qc/merqury/main' include { RUN_LIFTOFF } from '../liftoff/main' include { RAGTAG_SCAFFOLD } from '../../../modules/local/ragtag/main' +include { QC } from '../qc/main' workflow ASSEMBLE { @@ -25,9 +22,6 @@ workflow ASSEMBLE { Channel.empty().set { ch_ref_bam } Channel.empty().set { ch_assembly_bam } Channel.empty().set { ch_assembly } - Channel.empty().set { assembly_quast_reports } - Channel.empty().set { assembly_busco_reports } - Channel.empty().set { assembly_merqury_reports } Channel.empty().set { flye_inputs } Channel.empty().set { hifiasm_inputs } Channel.empty().set { longreads } @@ -172,49 +166,20 @@ workflow ASSEMBLE { } } } - if (params.quast) { + if (params.quast) { if (params.use_ref) { MAP_TO_REF(longreads, ch_refs) MAP_TO_REF.out.ch_aln_to_ref_bam.set { ch_ref_bam } } - - MAP_TO_ASSEMBLY(longreads, ch_assembly) - MAP_TO_ASSEMBLY.out.aln_to_assembly_bam.set { ch_assembly_bam } - - RUN_QUAST(ch_assembly, ch_input, ch_ref_bam, ch_assembly_bam) - RUN_QUAST.out.quast_tsv.set { assembly_quast_reports } - - ch_versions = ch_versions.mix(MAP_TO_ASSEMBLY.out.versions).mix(RUN_QUAST.out.versions) - } } /* QC on initial assembly */ - if (params.busco) { - RUN_BUSCO(ch_assembly) - RUN_BUSCO.out.batch_summary.set { assembly_busco_reports } - ch_versions = ch_versions.mix(RUN_BUSCO.out.versions) - } - - if (params.short_reads) { - MERQURY_QC(ch_assembly, meryl_kmers) - MERQURY_QC.out.stats - .join( - MERQURY_QC.out.spectra_asm_hist - ) - .join( - MERQURY_QC.out.spectra_cn_hist - ) - .join( - MERQURY_QC.out.assembly_qv - ) - .set { assembly_merqury_reports } - - ch_versions = ch_versions.mix(MERQURY_QC.out.versions) - } + QC(ch_input, longreads, ch_assembly, ch_ref_bam, meryl_kmers) + ch_versions = ch_versions.mix(QC.out.versions) if (params.lift_annotations) { RUN_LIFTOFF(ch_assembly, ch_input) @@ -225,8 +190,8 @@ workflow ASSEMBLE { assembly = ch_assembly ref_bam = ch_ref_bam longreads - assembly_quast_reports - assembly_busco_reports - assembly_merqury_reports + assembly_quast_reports = QC.out.quast_out + assembly_busco_reports = QC.out.busco_out + assembly_merqury_reports = QC.out.merqury_report_files versions = ch_versions } diff --git a/subworkflows/local/polishing/medaka/polish_medaka/main.nf b/subworkflows/local/polishing/medaka/polish_medaka/main.nf index cdf52e23..bcfc5205 100644 --- a/subworkflows/local/polishing/medaka/polish_medaka/main.nf +++ b/subworkflows/local/polishing/medaka/polish_medaka/main.nf @@ -1,9 +1,6 @@ include { RUN_MEDAKA } from '../run_medaka/main' -include { MAP_TO_ASSEMBLY } from '../../../mapping/map_to_assembly/main' -include { RUN_BUSCO } from '../../../qc/busco/main' -include { RUN_QUAST } from '../../../qc/quast/main' +include { QC } from '../../../qc/main.nf' include { RUN_LIFTOFF } from '../../../liftoff/main' -include { MERQURY_QC } from '../../../qc/merqury/main' workflow POLISH_MEDAKA { take: @@ -24,35 +21,9 @@ workflow POLISH_MEDAKA { ch_versions = ch_versions.mix(RUN_MEDAKA.out.versions) - MAP_TO_ASSEMBLY(in_reads, polished_assembly) + QC(ch_input, in_reads, polished_assembly, ch_aln_to_ref, meryl_kmers) - ch_versions = ch_versions.mix(MAP_TO_ASSEMBLY.out.versions) - - RUN_QUAST(polished_assembly, ch_input, ch_aln_to_ref, MAP_TO_ASSEMBLY.out.aln_to_assembly_bam) - RUN_QUAST.out.quast_tsv.set { quast_out } - - ch_versions = ch_versions.mix(RUN_QUAST.out.versions) - - RUN_BUSCO(polished_assembly) - RUN_BUSCO.out.batch_summary.set { busco_out } - - ch_versions = ch_versions.mix(RUN_BUSCO.out.versions) - - if (params.short_reads) { - MERQURY_QC(polished_assembly, meryl_kmers) - MERQURY_QC.out.stats - .join( - MERQURY_QC.out.spectra_asm_hist - ) - .join( - MERQURY_QC.out.spectra_cn_hist - ) - .join( - MERQURY_QC.out.assembly_qv - ) - .set { merqury_report_files } - ch_versions = ch_versions.mix(MERQURY_QC.out.versions) - } + ch_versions = ch_versions.mix(QC.out.versions) if (params.lift_annotations) { RUN_LIFTOFF(polished_assembly, ch_input) @@ -63,8 +34,8 @@ workflow POLISH_MEDAKA { emit: polished_assembly - quast_out - busco_out - merqury_report_files + quast_out = QC.out.quast_out + busco_out = QC.out.busco_out + merqury_report_files = QC.out.merqury_report_files versions } diff --git a/subworkflows/local/polishing/pilon/polish_pilon/main.nf b/subworkflows/local/polishing/pilon/polish_pilon/main.nf index cde1ec4a..e4384c37 100644 --- a/subworkflows/local/polishing/pilon/polish_pilon/main.nf +++ b/subworkflows/local/polishing/pilon/polish_pilon/main.nf @@ -1,10 +1,7 @@ include { RUN_PILON } from '../run_pilon/main' include { MAP_SR } from '../../../mapping/map_sr/main' -include { MAP_TO_ASSEMBLY } from '../../../mapping/map_to_assembly/main' -include { RUN_BUSCO } from '../../../qc/busco/main' -include { RUN_QUAST } from '../../../qc/quast/main' include { RUN_LIFTOFF } from '../../../liftoff/main' -include { MERQURY_QC } from '../../../qc/merqury/main' +include { QC } from '../../../qc/main.nf' workflow POLISH_PILON { take: @@ -17,9 +14,6 @@ workflow POLISH_PILON { main: Channel.empty().set { ch_versions } - Channel.empty().set { quast_out } - Channel.empty().set { busco_out } - Channel.empty().set { merqury_report_files } MAP_SR(shortreads, assembly) @@ -31,35 +25,9 @@ workflow POLISH_PILON { ch_versions = ch_versions.mix(RUN_PILON.out.versions) - MAP_TO_ASSEMBLY(in_reads, pilon_polished) + QC(ch_input, in_reads, pilon_polished, ch_aln_to_ref, meryl_kmers) - ch_versions = ch_versions.mix(MAP_TO_ASSEMBLY.out.versions) - - RUN_QUAST(pilon_polished, ch_input, ch_aln_to_ref, MAP_TO_ASSEMBLY.out.aln_to_assembly_bam) - RUN_QUAST.out.quast_tsv.set { quast_out } - - ch_versions = ch_versions.mix(RUN_QUAST.out.versions) - - RUN_BUSCO(pilon_polished) - RUN_BUSCO.out.batch_summary.set { busco_out } - - ch_versions = ch_versions.mix(RUN_BUSCO.out.versions) - - if (params.short_reads) { - MERQURY_QC(pilon_polished, meryl_kmers) - MERQURY_QC.out.stats - .join( - MERQURY_QC.out.spectra_asm_hist - ) - .join( - MERQURY_QC.out.spectra_cn_hist - ) - .join( - MERQURY_QC.out.assembly_qv - ) - .set { merqury_report_files } - ch_versions = ch_versions.mix(MERQURY_QC.out.versions) - } + ch_versions = ch_versions.mix(QC.out.versions) if (params.lift_annotations) { RUN_LIFTOFF(pilon_polished, ch_input) @@ -70,8 +38,8 @@ workflow POLISH_PILON { emit: pilon_polished - quast_out - busco_out - merqury_report_files + quast_out = QC.out.quast_out + busco_out = QC.out.busco_out + merqury_report_files = QC.out.merqury_report_files versions } diff --git a/subworkflows/local/qc/main.nf b/subworkflows/local/qc/main.nf new file mode 100644 index 00000000..bba6b31d --- /dev/null +++ b/subworkflows/local/qc/main.nf @@ -0,0 +1,61 @@ +include { MAP_TO_ASSEMBLY } from '../mapping/map_to_assembly/main' +include { RUN_BUSCO } from './busco/main.nf' +include { RUN_QUAST } from './quast/main.nf' +include { MERQURY_QC } from './merqury/main.nf' + +workflow QC { + take: + inputs + in_reads + scaffolds + aln_to_ref + meryl_kmers + + main: + Channel.empty().set { ch_versions } + Channel.empty().set { quast_out } + Channel.empty().set { busco_out } + Channel.empty().set { merqury_report_files } + Channel.empty().set { map_to_assembly } + + if (params.quast) { + MAP_TO_ASSEMBLY(in_reads, scaffolds) + MAP_TO_ASSEMBLY.out.aln_to_assembly_bam.set { map_to_assembly } + ch_versions = ch_versions.mix(MAP_TO_ASSEMBLY.out.versions) + } + + RUN_QUAST(scaffolds, inputs, aln_to_ref, map_to_assembly) + RUN_QUAST.out.quast_tsv.set { quast_out } + + ch_versions = ch_versions.mix(RUN_QUAST.out.versions) + + RUN_BUSCO(scaffolds) + RUN_BUSCO.out.batch_summary.set { busco_out } + + ch_versions = ch_versions.mix(RUN_BUSCO.out.versions) + + if (params.short_reads) { + MERQURY_QC(scaffolds, meryl_kmers) + MERQURY_QC.out.stats + .join( + MERQURY_QC.out.spectra_asm_hist + ) + .join( + MERQURY_QC.out.spectra_cn_hist + ) + .join( + MERQURY_QC.out.assembly_qv + ) + .set { merqury_report_files } + + ch_versions = ch_versions.mix(MERQURY_QC.out.versions) + } + + versions = ch_versions + + emit: + quast_out + busco_out + merqury_report_files + versions +} diff --git a/subworkflows/local/scaffolding/links/main.nf b/subworkflows/local/scaffolding/links/main.nf index 8119427c..902cf20a 100644 --- a/subworkflows/local/scaffolding/links/main.nf +++ b/subworkflows/local/scaffolding/links/main.nf @@ -1,9 +1,6 @@ include { LINKS } from '../../../../modules/local/links/main' -include { MAP_TO_ASSEMBLY } from '../../mapping/map_to_assembly/main' -include { RUN_QUAST } from '../../qc/quast/main' -include { RUN_BUSCO } from '../../qc/busco/main' +include { QC } from '../../qc/main' include { RUN_LIFTOFF } from '../../liftoff/main' -include { MERQURY_QC } from '../../qc/merqury/main' workflow RUN_LINKS { take: @@ -16,9 +13,6 @@ workflow RUN_LINKS { main: Channel.empty().set { ch_versions } - Channel.empty().set { quast_out } - Channel.empty().set { busco_out } - Channel.empty().set { merqury_report_files } assembly .join(in_reads) @@ -29,36 +23,9 @@ workflow RUN_LINKS { ch_versions = ch_versions.mix(LINKS.out.versions) - MAP_TO_ASSEMBLY(in_reads, scaffolds) + QC(inputs, in_reads, scaffolds, ch_aln_to_ref, meryl_kmers) - ch_versions = ch_versions.mix(MAP_TO_ASSEMBLY.out.versions) - - RUN_QUAST(scaffolds, inputs, ch_aln_to_ref, MAP_TO_ASSEMBLY.out.aln_to_assembly_bam) - RUN_QUAST.out.quast_tsv.set { quast_out } - - ch_versions = ch_versions.mix(RUN_QUAST.out.versions) - - RUN_BUSCO(scaffolds) - RUN_BUSCO.out.batch_summary.set { busco_out } - - ch_versions = ch_versions.mix(RUN_BUSCO.out.versions) - - if (params.short_reads) { - MERQURY_QC(scaffolds, meryl_kmers) - MERQURY_QC.out.stats - .join( - MERQURY_QC.out.spectra_asm_hist - ) - .join( - MERQURY_QC.out.spectra_cn_hist - ) - .join( - MERQURY_QC.out.assembly_qv - ) - .set { merqury_report_files } - - ch_versions = ch_versions.mix(MERQURY_QC.out.versions) - } + ch_versions = ch_versions.mix(QC.out.versions) if (params.lift_annotations) { RUN_LIFTOFF(scaffolds, inputs) @@ -69,8 +36,8 @@ workflow RUN_LINKS { emit: scaffolds - quast_out - busco_out - merqury_report_files + quast_out = QC.out.quast_out + busco_out = QC.out.busco_out + merqury_report_files = QC.out.merqury_report_files versions } diff --git a/subworkflows/local/scaffolding/longstitch/main.nf b/subworkflows/local/scaffolding/longstitch/main.nf index c7fcc16b..bb8dd6a7 100644 --- a/subworkflows/local/scaffolding/longstitch/main.nf +++ b/subworkflows/local/scaffolding/longstitch/main.nf @@ -1,9 +1,6 @@ include { LONGSTITCH } from '../../../../modules/local/longstitch/main' -include { MAP_TO_ASSEMBLY } from '../../mapping/map_to_assembly/main' -include { RUN_QUAST } from '../../qc/quast/main' -include { RUN_BUSCO } from '../../qc/busco/main' +include { QC } from '../../qc/main' include { RUN_LIFTOFF } from '../../liftoff/main' -include { MERQURY_QC } from '../../qc/merqury/main' workflow RUN_LONGSTITCH { take: @@ -17,9 +14,7 @@ workflow RUN_LONGSTITCH { main: Channel.empty().set { ch_versions } - Channel.empty().set { quast_out } - Channel.empty().set { busco_out } - Channel.empty().set { merqury_report_files } + assembly .join(in_reads) .join(genome_size) @@ -30,36 +25,10 @@ workflow RUN_LONGSTITCH { ch_versions = ch_versions.mix(LONGSTITCH.out.versions) - MAP_TO_ASSEMBLY(in_reads, scaffolds) - - ch_versions = ch_versions.mix(MAP_TO_ASSEMBLY.out.versions) - - RUN_QUAST(scaffolds, inputs, ch_aln_to_ref, MAP_TO_ASSEMBLY.out.aln_to_assembly_bam) - RUN_QUAST.out.quast_tsv.set { quast_out } - - ch_versions = ch_versions.mix(RUN_QUAST.out.versions) + QC(inputs, in_reads, scaffolds, ch_aln_to_ref, meryl_kmers) - RUN_BUSCO(scaffolds) - RUN_BUSCO.out.batch_summary.set { busco_out } + ch_versions = ch_versions.mix(QC.out.versions) - ch_versions = ch_versions.mix(RUN_BUSCO.out.versions) - - if (params.short_reads) { - MERQURY_QC(scaffolds, meryl_kmers) - MERQURY_QC.out.stats - .join( - MERQURY_QC.out.spectra_asm_hist - ) - .join( - MERQURY_QC.out.spectra_cn_hist - ) - .join( - MERQURY_QC.out.assembly_qv - ) - .set { merqury_report_files } - - ch_versions = ch_versions.mix(MERQURY_QC.out.versions) - } if (params.lift_annotations) { RUN_LIFTOFF(LONGSTITCH.out.ntlLinks_arks_scaffolds, inputs) ch_versions = ch_versions.mix(RUN_LIFTOFF.out.versions) @@ -69,8 +38,8 @@ workflow RUN_LONGSTITCH { emit: scaffolds - quast_out - busco_out - merqury_report_files + quast_out = QC.out.quast_out + busco_out = QC.out.busco_out + merqury_report_files = QC.out.merqury_report_files versions } diff --git a/subworkflows/local/scaffolding/ragtag/main.nf b/subworkflows/local/scaffolding/ragtag/main.nf index 83574b93..9c62ff3f 100644 --- a/subworkflows/local/scaffolding/ragtag/main.nf +++ b/subworkflows/local/scaffolding/ragtag/main.nf @@ -1,9 +1,6 @@ include { RAGTAG_SCAFFOLD } from '../../../../modules/local/ragtag/main' -include { MAP_TO_ASSEMBLY } from '../../mapping/map_to_assembly/main' -include { RUN_QUAST } from '../../qc/quast/main' -include { RUN_BUSCO } from '../../qc/busco/main' +include { QC } from '../../qc/main' include { RUN_LIFTOFF } from '../../liftoff/main' -include { MERQURY_QC } from '../../qc/merqury/main' workflow RUN_RAGTAG { @@ -32,37 +29,9 @@ workflow RUN_RAGTAG { ch_versions = ch_versions.mix(RAGTAG_SCAFFOLD.out.versions) - MAP_TO_ASSEMBLY(in_reads, ragtag_scaffold_fasta) + QC(inputs, in_reads, ragtag_scaffold_fasta, ch_aln_to_ref, meryl_kmers) - ch_versions = ch_versions.mix(MAP_TO_ASSEMBLY.out.versions) - - - RUN_QUAST(ragtag_scaffold_fasta, inputs, ch_aln_to_ref, MAP_TO_ASSEMBLY.out.aln_to_assembly_bam) - RUN_QUAST.out.quast_tsv.set { quast_out } - - ch_versions = ch_versions.mix(RUN_QUAST.out.versions) - - RUN_BUSCO(ragtag_scaffold_fasta) - RUN_BUSCO.out.batch_summary.set { busco_out } - - ch_versions = ch_versions.mix(RUN_BUSCO.out.versions) - - if (params.short_reads) { - MERQURY_QC(ragtag_scaffold_fasta, meryl_kmers) - MERQURY_QC.out.stats - .join( - MERQURY_QC.out.spectra_asm_hist - ) - .join( - MERQURY_QC.out.spectra_cn_hist - ) - .join( - MERQURY_QC.out.assembly_qv - ) - .set { merqury_report_files } - - ch_versions = ch_versions.mix(MERQURY_QC.out.versions) - } + ch_versions = ch_versions.mix(QC.out.versions) if (params.lift_annotations) { RUN_LIFTOFF(RAGTAG_SCAFFOLD.out.corrected_assembly, inputs) @@ -74,8 +43,8 @@ workflow RUN_RAGTAG { emit: ragtag_scaffold_fasta ragtag_scaffold_agp - quast_out - busco_out - merqury_report_files + quast_out = QC.out.quast_out + busco_out = QC.out.busco_out + merqury_report_files = QC.out.merqury_report_files versions } From f78a29e5014e0c4f4d5720d03d247f1db9013fb0 Mon Sep 17 00:00:00 2001 From: nvnieuwk Date: Tue, 25 Mar 2025 08:27:00 +0000 Subject: [PATCH 03/34] update parameter schema --- nextflow_schema.json | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index e546e810..be119508 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -27,8 +27,7 @@ "type": "string", "format": "directory-path", "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", - "fa_icon": "fas fa-folder-open", - "default": "null" + "fa_icon": "fas fa-folder-open" }, "email": { "type": "string", @@ -231,6 +230,9 @@ }, "pacbio_primers": { "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.fn?a(sta)?$", "description": "file containing pacbio primers for trimming with lima" } } @@ -359,6 +361,7 @@ }, "busco_db": { "type": "string", + "format": "directory-path", "description": "Path to busco db (optional)" }, "busco_lineage": { From 402239697b0a455ea108da616260e686d39a7aea Mon Sep 17 00:00:00 2001 From: nvnieuwk Date: Tue, 25 Mar 2025 08:31:16 +0000 Subject: [PATCH 04/34] update samplesheet schema --- assets/schema_input.json | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 7e60400d..0271e948 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -10,51 +10,50 @@ "sample": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces", + "errorMessage": "Sample name must be provided, has to be a string value and cannot contain spaces", "meta": ["id"] }, "ontreads": { "type": "string", "format": "file-path", + "exists": true, "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "ONT reads cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "errorMessage": "ONT reads cannot contain spaces, has to exist and must have extension '.fq.gz' or '.fastq.gz'" }, "hifireads": { "type": "string", "format": "file-path", + "exists": true, "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "HiFi reads cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "errorMessage": "HiFi reads cannot contain spaces, has to exist and must have extension '.fq.gz' or '.fastq.gz'" }, "ref_fasta": { "type": "string", "format": "file-path", - "pattern": "^\\S+\\.f(ast)?a", - "errorMessage": "Reference fasta cannot contain spaces and must have extension '.fa' or '.fasta'" + "exists": true, + "pattern": "^\\S+\\.f(n|ast)?a", + "errorMessage": "Reference fasta cannot contain spaces, has to exist and must have extension '.fa', '.fna' or '.fasta'" }, "ref_gff": { "type": "string", "format": "file-path", + "exists": true, "pattern": "^\\S+\\.gff(3)?", - "errorMessage": "Reference gff cannot contain spaces and must have extension '.gff' or '.gff3'" + "errorMessage": "Reference gff cannot contain spaces, has to exist and must have extension '.gff' or '.gff3'" }, "shortread_F": { "type": "string", "format": "file-path", + "exists": true, "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "shortread_F cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "errorMessage": "shortread_F cannot contain spaces, has to exist and must have extension '.fq.gz' or '.fastq.gz'" }, "shortread_R": { - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" - }, - { - "type": "string", - "maxLength": 0 - } - ], - "errorMessage": "shortread_R cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "errorMessage": "shortread_R cannot contain spaces, has to exist and must have extension '.fq.gz' or '.fastq.gz'" }, "paired": { "type": "boolean", From c80f18bc54586e476dd6f2d1c3176a78f254a1ab Mon Sep 17 00:00:00 2001 From: nvnieuwk Date: Tue, 25 Mar 2025 08:32:28 +0000 Subject: [PATCH 05/34] bump changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c551b5c..67b104ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` +[]() - Updated the input validation to be more strict. This should prevent some down the line errors in the pipeline + ### `Dependencies` ### `Deprecated` From 4f0baf65fb6d33865a97cad2122f396e6b98bbda Mon Sep 17 00:00:00 2001 From: nvnieuwk Date: Tue, 25 Mar 2025 08:33:26 +0000 Subject: [PATCH 06/34] add PR number --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 67b104ec..01d286c5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` -[]() - Updated the input validation to be more strict. This should prevent some down the line errors in the pipeline +[#133](https://github.com/nf-core/genomeassembler/pull/133) - Updated the input validation to be more strict. This should prevent some down the line errors in the pipeline ### `Dependencies` From a72138a389698fd7f20dfcbc8a0791cdca7ab77e Mon Sep 17 00:00:00 2001 From: Niklas Schandry Date: Tue, 25 Mar 2025 13:39:16 +0100 Subject: [PATCH 07/34] update changelog, use harshil alignment in emit blocks --- CHANGELOG.md | 4 +++- subworkflows/local/assemble/main.nf | 12 ++++++------ .../local/polishing/medaka/polish_medaka/main.nf | 6 +++--- .../local/polishing/pilon/polish_pilon/main.nf | 6 +++--- subworkflows/local/scaffolding/links/main.nf | 6 +++--- subworkflows/local/scaffolding/longstitch/main.nf | 6 +++--- subworkflows/local/scaffolding/ragtag/main.nf | 6 +++--- 7 files changed, 24 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c551b5c..2de6318a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,12 +3,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.1.0 +## v1.1.0dev ### `Added` ### `Fixed` +[#131](https://github.com/nf-core/genomeassembler/pull/131) Refactored QC steps into subworkflow. + ### `Dependencies` ### `Deprecated` diff --git a/subworkflows/local/assemble/main.nf b/subworkflows/local/assemble/main.nf index 6f700d6a..10835031 100644 --- a/subworkflows/local/assemble/main.nf +++ b/subworkflows/local/assemble/main.nf @@ -187,11 +187,11 @@ workflow ASSEMBLE { } emit: - assembly = ch_assembly - ref_bam = ch_ref_bam + assembly = ch_assembly + ref_bam = ch_ref_bam longreads - assembly_quast_reports = QC.out.quast_out - assembly_busco_reports = QC.out.busco_out - assembly_merqury_reports = QC.out.merqury_report_files - versions = ch_versions + assembly_quast_reports = QC.out.quast_out + assembly_busco_reports = QC.out.busco_out + assembly_merqury_reports = QC.out.merqury_report_files + versions = ch_versions } diff --git a/subworkflows/local/polishing/medaka/polish_medaka/main.nf b/subworkflows/local/polishing/medaka/polish_medaka/main.nf index bcfc5205..e4d459fa 100644 --- a/subworkflows/local/polishing/medaka/polish_medaka/main.nf +++ b/subworkflows/local/polishing/medaka/polish_medaka/main.nf @@ -34,8 +34,8 @@ workflow POLISH_MEDAKA { emit: polished_assembly - quast_out = QC.out.quast_out - busco_out = QC.out.busco_out - merqury_report_files = QC.out.merqury_report_files + quast_out = QC.out.quast_out + busco_out = QC.out.busco_out + merqury_report_files = QC.out.merqury_report_files versions } diff --git a/subworkflows/local/polishing/pilon/polish_pilon/main.nf b/subworkflows/local/polishing/pilon/polish_pilon/main.nf index e4384c37..3b7df47a 100644 --- a/subworkflows/local/polishing/pilon/polish_pilon/main.nf +++ b/subworkflows/local/polishing/pilon/polish_pilon/main.nf @@ -38,8 +38,8 @@ workflow POLISH_PILON { emit: pilon_polished - quast_out = QC.out.quast_out - busco_out = QC.out.busco_out - merqury_report_files = QC.out.merqury_report_files + quast_out = QC.out.quast_out + busco_out = QC.out.busco_out + merqury_report_files = QC.out.merqury_report_files versions } diff --git a/subworkflows/local/scaffolding/links/main.nf b/subworkflows/local/scaffolding/links/main.nf index 902cf20a..21edf95d 100644 --- a/subworkflows/local/scaffolding/links/main.nf +++ b/subworkflows/local/scaffolding/links/main.nf @@ -36,8 +36,8 @@ workflow RUN_LINKS { emit: scaffolds - quast_out = QC.out.quast_out - busco_out = QC.out.busco_out - merqury_report_files = QC.out.merqury_report_files + quast_out = QC.out.quast_out + busco_out = QC.out.busco_out + merqury_report_files = QC.out.merqury_report_files versions } diff --git a/subworkflows/local/scaffolding/longstitch/main.nf b/subworkflows/local/scaffolding/longstitch/main.nf index bb8dd6a7..8756225d 100644 --- a/subworkflows/local/scaffolding/longstitch/main.nf +++ b/subworkflows/local/scaffolding/longstitch/main.nf @@ -38,8 +38,8 @@ workflow RUN_LONGSTITCH { emit: scaffolds - quast_out = QC.out.quast_out - busco_out = QC.out.busco_out - merqury_report_files = QC.out.merqury_report_files + quast_out = QC.out.quast_out + busco_out = QC.out.busco_out + merqury_report_files = QC.out.merqury_report_files versions } diff --git a/subworkflows/local/scaffolding/ragtag/main.nf b/subworkflows/local/scaffolding/ragtag/main.nf index 9c62ff3f..286edf8a 100644 --- a/subworkflows/local/scaffolding/ragtag/main.nf +++ b/subworkflows/local/scaffolding/ragtag/main.nf @@ -43,8 +43,8 @@ workflow RUN_RAGTAG { emit: ragtag_scaffold_fasta ragtag_scaffold_agp - quast_out = QC.out.quast_out - busco_out = QC.out.busco_out - merqury_report_files = QC.out.merqury_report_files + quast_out = QC.out.quast_out + busco_out = QC.out.busco_out + merqury_report_files = QC.out.merqury_report_files versions } From 5c22ffedaad108029e87f66a3187d15931054b27 Mon Sep 17 00:00:00 2001 From: Niklas Schandry Date: Wed, 9 Apr 2025 11:22:42 +0200 Subject: [PATCH 08/34] Ragtag patch for consensus assemblies (#136) Replace local ragtag with the nf-core module Use ragtag patch to create consensus assemblies --- CHANGELOG.md | 4 +- conf/modules.config | 11 +- modules.json | 10 + modules/local/ragtag/environment.yml | 7 - modules/local/ragtag/main.nf | 58 ----- modules/nf-core/ragtag/patch/environment.yml | 5 + modules/nf-core/ragtag/patch/main.nf | 109 +++++++++ modules/nf-core/ragtag/patch/meta.yml | 156 +++++++++++++ .../nf-core/ragtag/patch/tests/main.nf.test | 89 ++++++++ .../ragtag/patch/tests/main.nf.test.snap | 215 ++++++++++++++++++ .../nf-core/ragtag/scaffold/environment.yml | 5 + modules/nf-core/ragtag/scaffold/main.nf | 82 +++++++ modules/nf-core/ragtag/scaffold/meta.yml | 106 +++++++++ .../ragtag/scaffold/tests/main.nf.test | 81 +++++++ .../ragtag/scaffold/tests/main.nf.test.snap | 132 +++++++++++ subworkflows/local/assemble/main.nf | 13 +- subworkflows/local/scaffolding/ragtag/main.nf | 12 +- 17 files changed, 1018 insertions(+), 77 deletions(-) delete mode 100644 modules/local/ragtag/environment.yml delete mode 100644 modules/local/ragtag/main.nf create mode 100644 modules/nf-core/ragtag/patch/environment.yml create mode 100644 modules/nf-core/ragtag/patch/main.nf create mode 100644 modules/nf-core/ragtag/patch/meta.yml create mode 100644 modules/nf-core/ragtag/patch/tests/main.nf.test create mode 100644 modules/nf-core/ragtag/patch/tests/main.nf.test.snap create mode 100644 modules/nf-core/ragtag/scaffold/environment.yml create mode 100644 modules/nf-core/ragtag/scaffold/main.nf create mode 100644 modules/nf-core/ragtag/scaffold/meta.yml create mode 100644 modules/nf-core/ragtag/scaffold/tests/main.nf.test create mode 100644 modules/nf-core/ragtag/scaffold/tests/main.nf.test.snap diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e815b88..6c314656 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,10 +9,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` -[#131](https://github.com/nf-core/genomeassembler/pull/131) Refactored QC steps into subworkflow. +[#131](https://github.com/nf-core/genomeassembler/pull/131) - Refactored QC steps into subworkflow. [#133](https://github.com/nf-core/genomeassembler/pull/133) - Updated the input validation to be more strict. This should prevent some down the line errors in the pipeline +[#136](https://github.com/nf-core/genomeassembler/pull/136) - Switched to using ragtag `patch` instead of `scaffold` for `flye_on_hifiasm` + ### `Dependencies` ### `Deprecated` diff --git a/conf/modules.config b/conf/modules.config index 02772c24..589079c6 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -178,13 +178,13 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: '.*ASSEMBLE:.*RAGTAG_SCAFFOLD' { + withName: '.*ASSEMBLE:.*RAGTAG_PATCH' { publishDir = [ path: { "${params.outdir}/${meta.id}/assembly/ragtag/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - ext.prefix = { "${meta.id}_assembly_scaffold" } + ext.prefix = { "${meta.id}_assembly_patch" } } /* @@ -228,7 +228,14 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] ext.prefix = { "${meta.id}_ragtag" } + ext.args = [ + "-C", + "-u", + "-r", + "-w" + ].join(" ").trim() } + withName: LINKS { publishDir = [ path: { "${params.outdir}/${meta.id}/scaffold/links/" }, diff --git a/modules.json b/modules.json index f15bcf2d..7e2d1228 100644 --- a/modules.json +++ b/modules.json @@ -68,6 +68,16 @@ "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, + "ragtag/patch": { + "branch": "master", + "git_sha": "7d163aded9221aef68d8c11cb7a04354a232d89c", + "installed_by": ["modules"] + }, + "ragtag/scaffold": { + "branch": "master", + "git_sha": "7d163aded9221aef68d8c11cb7a04354a232d89c", + "installed_by": ["modules"] + }, "samtools/fastq": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", diff --git a/modules/local/ragtag/environment.yml b/modules/local/ragtag/environment.yml deleted file mode 100644 index 756805f7..00000000 --- a/modules/local/ragtag/environment.yml +++ /dev/null @@ -1,7 +0,0 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -channels: - - conda-forge - - bioconda -dependencies: - - "bioconda::ragtag=2.1.0" diff --git a/modules/local/ragtag/main.nf b/modules/local/ragtag/main.nf deleted file mode 100644 index 9756114c..00000000 --- a/modules/local/ragtag/main.nf +++ /dev/null @@ -1,58 +0,0 @@ -process RAGTAG_SCAFFOLD { - tag "${meta.id}" - label 'process_high' - conda "${moduleDir}/environment.yml" - container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://depot.galaxyproject.org/singularity/ragtag:2.1.0--pyhb7b1952_0' - : 'biocontainers/ragtag:2.1.0--pyhb7b1952_0'}" - - input: - tuple val(meta), path(assembly), path(reference) - - output: - tuple val(meta), path("*.fasta"), emit: corrected_assembly - tuple val(meta), path("*.agp"), emit: corrected_agp - tuple val(meta), path("*.stats"), emit: corrected_stats - path "versions.yml", emit: versions - - script: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - if [[ ${assembly} == *.gz ]] - then - zcat ${assembly} > assembly.fa - else - mv ${assembly} assembly.fa - fi - - ragtag.py scaffold ${reference} assembly.fa \\ - -o "${prefix}" \\ - -t ${task.cpus} \\ - -f 5000 \\ - -w \\ - -C \\ - -u \\ - -r - - mv ${prefix}/ragtag.scaffold.fasta ${prefix}.fasta - mv ${prefix}/ragtag.scaffold.agp ${prefix}.agp - mv ${prefix}/ragtag.scaffold.stats ${prefix}.stats - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - RagTag: \$(echo \$(ragtag.py -v | sed 's/v//')) - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.fasta - touch ${prefix}.agp - touch ${prefix}.stats - cat <<-END_VERSIONS > versions.yml - "${task.process}": - RagTag: \$(echo \$(ragtag.py -v | sed 's/v//')) - END_VERSIONS - """ -} diff --git a/modules/nf-core/ragtag/patch/environment.yml b/modules/nf-core/ragtag/patch/environment.yml new file mode 100644 index 00000000..83cefc79 --- /dev/null +++ b/modules/nf-core/ragtag/patch/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::ragtag=2.1.0" diff --git a/modules/nf-core/ragtag/patch/main.nf b/modules/nf-core/ragtag/patch/main.nf new file mode 100644 index 00000000..2f7f66bf --- /dev/null +++ b/modules/nf-core/ragtag/patch/main.nf @@ -0,0 +1,109 @@ +process RAGTAG_PATCH { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/ragtag:2.1.0--pyhb7b1952_0' + : 'biocontainers/ragtag:2.1.0--pyhb7b1952_0'}" + + input: + tuple val(meta), path(target, name: 'target/*') + tuple val(meta2), path(query, name: 'query/*') + tuple val(meta3), path(exclude) + tuple val(meta4), path(skip) + + output: + tuple val(meta), path("*.patch.fasta"), emit: patch_fasta + tuple val(meta), path("*.patch.agp"), emit: patch_agp + tuple val(meta), path("*.comps.fasta"), emit: patch_components_fasta + tuple val(meta), path("*.ragtag.patch.asm.*"), emit: assembly_alignments, optional: true + tuple val(meta), path("*.ctg.agp"), emit: target_splits_agp + tuple val(meta), path("*.ctg.fasta"), emit: target_splits_fasta + tuple val(meta), path("*.rename.agp"), emit: qry_rename_agp, optional: true + tuple val(meta), path("*.rename.fasta"), emit: qry_rename_fasta, optional: true + tuple val(meta), path("*.patch.err"), emit: stderr + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: "" + def arg_exclude = exclude ? "-e ${exclude}" : "" + def arg_skip = skip ? "-j ${skip}" : "" + """ + if [[ ${target} == *.gz ]] + then + zcat ${target} > target.fa + else + ln -s ${target} target.fa + fi + + if [[ ${query} == *.gz ]] + then + zcat ${query} > query.fa + else + ln -s ${query} query.fa + fi + + tail -F ${prefix}/ragtag.patch.err >&2 & + tailpid=\$! + ragtag.py patch target.fa query.fa \\ + -o "${prefix}" \\ + -t ${task.cpus} \\ + ${arg_exclude} \\ + ${arg_skip} \\ + ${args} # \\ + # 2> >( tee ${prefix}.stderr.log >&2 ) \\ + # | tee ${prefix}.stdout.log + + kill -TERM "\$tailpid" + + mv ${prefix}/ragtag.patch.agp ${prefix}.patch.agp + mv ${prefix}/ragtag.patch.fasta ${prefix}.patch.fasta + mv ${prefix}/ragtag.patch.comps.fasta ${prefix}.comps.fasta + mv ${prefix}/ragtag.patch.ctg.agp ${prefix}.ctg.agp + mv ${prefix}/ragtag.patch.ctg.fasta ${prefix}.ctg.fasta + if [ -f ${prefix}/ragtag.patch.rename.agp ]; then + mv ${prefix}/ragtag.patch.rename.agp ${prefix}.rename.agp + fi + + if [ -f ${prefix}/ragtag.patch.rename.fasta ]; then + mv ${prefix}/ragtag.patch.rename.fasta ${prefix}.rename.fasta + fi + mv ${prefix}/ragtag.patch.err ${prefix}.patch.err + # Move the assembly files from prefix folder, and add prefix + for alignment_file in \$(ls ${prefix}/ragtag.patch.asm.*); + do + mv "\$alignment_file" "\${alignment_file/${prefix}\\//${prefix}_}" + done + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ragtag: \$(echo \$(ragtag.py -v | sed 's/v//')) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def _args = task.ext.args ?: "" + def _arg_exclude = exclude ? "-e ${exclude}" : "" + def _arg_skip = skip ? "-j ${skip}" : "" + """ + touch ${prefix}.patch.agp + touch ${prefix}.patch.fasta + touch ${prefix}.comps.fasta + touch ${prefix}.ctg.agp + touch ${prefix}.ctg.fasta + touch ${prefix}.rename.agp + touch ${prefix}.rename.fasta + touch ${prefix}.ragtag.patch.asm.1 + touch ${prefix}.patch.err + + cat <<-END_VERSIONS > versions.yml + ragtag: \$(echo \$(ragtag.py -v | sed 's/v//')) + END_VERSIONS + """ +} diff --git a/modules/nf-core/ragtag/patch/meta.yml b/modules/nf-core/ragtag/patch/meta.yml new file mode 100644 index 00000000..d74ee3d2 --- /dev/null +++ b/modules/nf-core/ragtag/patch/meta.yml @@ -0,0 +1,156 @@ +name: "ragtag_patch" +description: "Homology-based assembly patching: Make continuous joins and fill gaps + in 'target.fa' using sequences from 'query.fa'" + +keywords: + - assembly + - consensus + - ragtag + - patch +tools: + - "ragtag": + description: "Fast reference-guided genome assembly scaffolding" + homepage: "https://github.com/malonge/RagTag/wiki" + documentation: "https://github.com/malonge/RagTag/wiki" + tool_dev_url: "https://github.com/malonge/RagTag" + doi: "10.1186/s13059-022-02823-7" + licence: ["MIT"] + identifier: biotools:ragtag +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - target: + type: file + description: Target assembly + pattern: "*.{fasta,fasta.gz}" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - query: + type: file + description: Query assembly + pattern: "*.{fasta,fasta.gz}" + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - exclude: + type: file + description: list of target sequences to ignore + pattern: "*.txt" + - - meta4: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - skip: + type: file + description: list of query sequences to ignore + pattern: "*.txt" +output: + - patch_fasta: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.patch.fasta": + type: file + description: FASTA file containing the patched assembly + pattern: "*.patch.fasta" + - patch_agp: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.patch.agp": + type: file + description: AGP file defining how ragtag.patch.fasta is built + pattern: "*.patch.agp" + - patch_components_fasta: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.comps.fasta": + type: file + description: The split target assembly and the renamed query assembly combined + into one FASTA file. This file contains all components in ragtag.patch.agp + pattern: "*.comps.fasta" + - assembly_alignments: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.ragtag.patch.asm.*": + type: file + description: Assembly alignment files + pattern: "*.ragtag.patch.asm.*" + - target_splits_agp: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.ctg.agp": + type: file + description: An AGP file defining how the target assembly was split at gaps + pattern: "*.ctg.agp" + - target_splits_fasta: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.ctg.fasta": + type: file + description: FASTA file containing the target assembly split at gaps + pattern: "*.ctg.fasta" + - qry_rename_agp: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.rename.agp": + type: file + description: An AGP file defining the new names for query sequences + pattern: "*.rename.agp" + - qry_rename_fasta: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.rename.fasta": + type: file + description: A FASTA file with the original query sequence, but with new names + pattern: "*.rename.fasta" + - stderr: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.patch.err": + type: file + description: Standard error logging for all external RagTag commands + pattern: "*.patch.err" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@nschan" +maintainers: + - "@nschan" diff --git a/modules/nf-core/ragtag/patch/tests/main.nf.test b/modules/nf-core/ragtag/patch/tests/main.nf.test new file mode 100644 index 00000000..a7c0fee8 --- /dev/null +++ b/modules/nf-core/ragtag/patch/tests/main.nf.test @@ -0,0 +1,89 @@ +nextflow_process { + + name "Test Process RAGTAG_PATCH" + script "../main.nf" + process "RAGTAG_PATCH" + + tag "modules" + tag "modules_nfcore" + tag "ragtag" + tag "ragtag/patch" + + +test("A. thaliana Col-0 test data - ragtag - patch") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + input[1] = [ + [], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + input[2] = [ + [], + [] + ] + input[3] = [ + [], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.patch_fasta, + process.out.patch_agp, + process.out.patch_components_fasta, + process.out.target_splits_agp, + process.out.target_splits_fasta, + process.out.versions + ).match() + }, + ) + } + + } + test("A. thaliana Col-0 test data - ragtag - patch - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + input[1] = [ + [], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + input[2] = [ + [], + [] + ] + input[3] = [ + [], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/ragtag/patch/tests/main.nf.test.snap b/modules/nf-core/ragtag/patch/tests/main.nf.test.snap new file mode 100644 index 00000000..b1444692 --- /dev/null +++ b/modules/nf-core/ragtag/patch/tests/main.nf.test.snap @@ -0,0 +1,215 @@ +{ + "A. thaliana Col-0 test data - ragtag - patch - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.patch.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.patch.agp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.comps.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test.ragtag.patch.asm.1:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test" + }, + "test.ctg.agp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test" + }, + "test.ctg.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test" + }, + "test.rename.agp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test" + }, + "test.rename.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test" + }, + "test.patch.err:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + "versions.yml:md5,cecbb39907d607affa6522e395b78a1f" + ], + "assembly_alignments": [ + [ + { + "id": "test" + }, + "test.ragtag.patch.asm.1:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "patch_agp": [ + [ + { + "id": "test" + }, + "test.patch.agp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "patch_components_fasta": [ + [ + { + "id": "test" + }, + "test.comps.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "patch_fasta": [ + [ + { + "id": "test" + }, + "test.patch.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "qry_rename_agp": [ + [ + { + "id": "test" + }, + "test.rename.agp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "qry_rename_fasta": [ + [ + { + "id": "test" + }, + "test.rename.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "stderr": [ + [ + { + "id": "test" + }, + "test.patch.err:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "target_splits_agp": [ + [ + { + "id": "test" + }, + "test.ctg.agp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "target_splits_fasta": [ + [ + { + "id": "test" + }, + "test.ctg.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,cecbb39907d607affa6522e395b78a1f" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-04-04T14:10:01.648597527" + }, + "A. thaliana Col-0 test data - ragtag - patch": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.patch.fasta:md5,5cf615df690061ab15e4fee62abf3ebc" + ] + ], + [ + [ + { + "id": "test" + }, + "test.patch.agp:md5,7878fd4e42ecb2bfccd7565d5ed6b625" + ] + ], + [ + [ + { + "id": "test" + }, + "test.comps.fasta:md5,65bd2563dfc2564d5edf0e8d24257032" + ] + ], + [ + [ + { + "id": "test" + }, + "test.ctg.agp:md5,ac3460a377daaf3e3ce37f499e561968" + ] + ], + [ + [ + { + "id": "test" + }, + "test.ctg.fasta:md5,5cf615df690061ab15e4fee62abf3ebc" + ] + ], + [ + "versions.yml:md5,4c0992a27edf294209711ce4f181eb5a" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-04-04T14:25:42.121285998" + } +} \ No newline at end of file diff --git a/modules/nf-core/ragtag/scaffold/environment.yml b/modules/nf-core/ragtag/scaffold/environment.yml new file mode 100644 index 00000000..83cefc79 --- /dev/null +++ b/modules/nf-core/ragtag/scaffold/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::ragtag=2.1.0" diff --git a/modules/nf-core/ragtag/scaffold/main.nf b/modules/nf-core/ragtag/scaffold/main.nf new file mode 100644 index 00000000..c3930c12 --- /dev/null +++ b/modules/nf-core/ragtag/scaffold/main.nf @@ -0,0 +1,82 @@ +process RAGTAG_SCAFFOLD { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/ragtag:2.1.0--pyhb7b1952_0' + : 'biocontainers/ragtag:2.1.0--pyhb7b1952_0'}" + + input: + tuple val(meta), path(assembly, name: 'assembly/*') + tuple val(meta2), path(reference, name: 'reference/*') + tuple val(meta3), path(exclude) + tuple val(meta4), path(skip), path(hard_skip) + + output: + tuple val(meta), path("*.fasta"), emit: corrected_assembly + tuple val(meta), path("*.agp"), emit: corrected_agp + tuple val(meta), path("*.stats"), emit: corrected_stats + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def arg_exclude = exclude ? "-e ${exclude}" : "" + def arg_skip = skip ? "-j ${skip}" : "" + def arg_hard_skip = hard_skip ? "-J ${hard_skip}" : "" + """ + if [[ ${assembly} == *.gz ]] + then + zcat ${assembly} > assembly.fa + else + ln -s ${assembly} assembly.fa + fi + + if [[ ${reference} == *.gz ]] + then + zcat ${reference} > reference.fa + else + ln -s ${reference} reference.fa + fi + + ragtag.py scaffold reference.fa assembly.fa \\ + -o "${prefix}" \\ + -t ${task.cpus} \\ + -C \\ + ${arg_exclude} \\ + ${arg_skip} \\ + ${arg_hard_skip} \\ + ${args} \\ + 2> >( tee ${prefix}.stderr.log >&2 ) \\ + | tee ${prefix}.stdout.log + + mv ${prefix}/ragtag.scaffold.fasta ${prefix}.fasta + mv ${prefix}/ragtag.scaffold.agp ${prefix}.agp + mv ${prefix}/ragtag.scaffold.stats ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ragtag: \$(echo \$(ragtag.py -v | sed 's/v//')) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def _args = task.ext.args ?: '' + def _arg_exclude = exclude ? "-e ${exclude}" : "" + def _arg_skip = skip ? "-j ${skip}" : "" + def _arg_hard_skip = hard_skip ? "-J ${hard_skip}" : "" + """ + touch ${prefix}.fasta + touch ${prefix}.agp + touch ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + ragtag: \$(echo \$(ragtag.py -v | sed 's/v//')) + END_VERSIONS + """ +} diff --git a/modules/nf-core/ragtag/scaffold/meta.yml b/modules/nf-core/ragtag/scaffold/meta.yml new file mode 100644 index 00000000..62eb0e49 --- /dev/null +++ b/modules/nf-core/ragtag/scaffold/meta.yml @@ -0,0 +1,106 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: ragtag_scaffold +description: | + Scaffolding is the process of ordering and orienting draft assembly (query) + sequences into longer sequences. Gaps (stretches of "N" characters) are placed + between adjacent query sequences to indicate the presence of unknown sequence. + RagTag uses whole-genome alignments to a reference assembly to scaffold query sequences. + RagTag does not alter input query sequence in any way and only orders and orients sequences, joining them with gaps. +keywords: + - scaffolding + - ragtag + - assembly + - genome +tools: + - "ragtag": + description: "Fast reference-guided genome assembly scaffolding" + homepage: "https://github.com/malonge/RagTag/wiki" + documentation: "https://github.com/malonge/RagTag/wiki" + tool_dev_url: "https://github.com/malonge/RagTag" + doi: "10.1186/s13059-022-02823-7" + licence: ["MIT"] + identifier: biotools:ragtag + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - assembly: + type: file + description: Assembly to be scaffolded + pattern: "*.{fasta,fasta.gz,fa,fa.gz}" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - reference: + type: file + description: Reference assembly + pattern: "*.{fasta,fasta.gz,fa,fa.gz}" + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - exclude: + type: file + description: list of target sequences to ignore + pattern: "*.txt" + - - meta4: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - skip: + type: file + description: list of query sequences to leave unplaced + pattern: "*.txt" + - hard_skip: + type: file + description: list of query headers to leave unplaced and exclude from 'chr0' + ('-C') + pattern: "*.txt" +output: + - corrected_assembly: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.fasta": + type: file + description: FASTA file containing the patched assembly + pattern: "*.fasta" + - corrected_agp: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.agp": + type: file + description: agp file defining how corrected_assembly is built + pattern: "*.agp" + - corrected_stats: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.stats": + type: file + description: Statistics on the scaffold + pattern: "*.stats" + + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@nschan" +maintainers: + - "@nschan" diff --git a/modules/nf-core/ragtag/scaffold/tests/main.nf.test b/modules/nf-core/ragtag/scaffold/tests/main.nf.test new file mode 100644 index 00000000..51b42642 --- /dev/null +++ b/modules/nf-core/ragtag/scaffold/tests/main.nf.test @@ -0,0 +1,81 @@ +nextflow_process { + + name "Test Process RAGTAG_SCAFFOLD" + script "../main.nf" + process "RAGTAG_SCAFFOLD" + + tag "modules" + tag "modules_nfcore" + tag "ragtag" + tag "ragtag/scaffold" + + test("A. thaliana Col-0 test data - ragtag - scaffold") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + input[1] = [ + [], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + input[2] = [ + [], + [] + ] + input[3] = [ + [], + [], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + test("A. thaliana Col-0 test data - ragtag - scaffold - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + input[2] = [ + [], + [] + ] + input[3] = [ + [], + [], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/ragtag/scaffold/tests/main.nf.test.snap b/modules/nf-core/ragtag/scaffold/tests/main.nf.test.snap new file mode 100644 index 00000000..e4faf0b0 --- /dev/null +++ b/modules/nf-core/ragtag/scaffold/tests/main.nf.test.snap @@ -0,0 +1,132 @@ +{ + "A. thaliana Col-0 test data - ragtag - scaffold": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fasta:md5,70b661fab5364a1c389972a771f97905" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.agp:md5,40fbf2d081c32880d8ce8187c529a80b" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.stats:md5,209e973e4bac1653b8d5fddb7fa13b63" + ] + ], + "3": [ + "versions.yml:md5,48710c1720f668d8ba3397f99892959e" + ], + "corrected_agp": [ + [ + { + "id": "test" + }, + "test.agp:md5,40fbf2d081c32880d8ce8187c529a80b" + ] + ], + "corrected_assembly": [ + [ + { + "id": "test" + }, + "test.fasta:md5,70b661fab5364a1c389972a771f97905" + ] + ], + "corrected_stats": [ + [ + { + "id": "test" + }, + "test.stats:md5,209e973e4bac1653b8d5fddb7fa13b63" + ] + ], + "versions": [ + "versions.yml:md5,48710c1720f668d8ba3397f99892959e" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-04-04T13:37:54.181644032" + }, + "A. thaliana Col-0 test data - ragtag - scaffold - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.agp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,cecbb39907d607affa6522e395b78a1f" + ], + "corrected_agp": [ + [ + { + "id": "test" + }, + "test.agp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "corrected_assembly": [ + [ + { + "id": "test" + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "corrected_stats": [ + [ + { + "id": "test" + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,cecbb39907d607affa6522e395b78a1f" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-04-04T13:38:21.635495713" + } +} \ No newline at end of file diff --git a/subworkflows/local/assemble/main.nf b/subworkflows/local/assemble/main.nf index 10835031..c2e1e412 100644 --- a/subworkflows/local/assemble/main.nf +++ b/subworkflows/local/assemble/main.nf @@ -4,7 +4,7 @@ include { HIFIASM as HIFIASM_ONT } from '../../../modules/nf-core/hifiasm/main' include { GFA_2_FA } from '../../../modules/local/gfa2fa/main' include { MAP_TO_REF } from '../mapping/map_to_ref/main' include { RUN_LIFTOFF } from '../liftoff/main' -include { RAGTAG_SCAFFOLD } from '../../../modules/local/ragtag/main' +include { RAGTAG_PATCH } from '../../../modules/nf-core/ragtag/patch/main' include { QC } from '../qc/main' @@ -120,11 +120,16 @@ workflow ASSEMBLE { .join( GFA_2_FA.out.contigs_fasta ) + .multiMap { meta, flye_fasta, hifiasm_fasta -> + target: [meta, flye_fasta] + query: [meta, hifiasm_fasta] + } .set { ragtag_in } - RAGTAG_SCAFFOLD(ragtag_in) + + RAGTAG_PATCH(ragtag_in.target, ragtag_in.query, [[], []], [[], []] ) // takes: meta, assembly (flye), reference (hifi) - RAGTAG_SCAFFOLD.out.corrected_assembly.set { ch_assembly } - ch_versions = ch_versions.mix(FLYE.out.versions).mix(RAGTAG_SCAFFOLD.out.versions) + RAGTAG_PATCH.out.patch_fasta.set { ch_assembly } + ch_versions = ch_versions.mix(FLYE.out.versions).mix(RAGTAG_PATCH.out.versions).mix(HIFIASM.out.versions) } } /* diff --git a/subworkflows/local/scaffolding/ragtag/main.nf b/subworkflows/local/scaffolding/ragtag/main.nf index 286edf8a..518afb87 100644 --- a/subworkflows/local/scaffolding/ragtag/main.nf +++ b/subworkflows/local/scaffolding/ragtag/main.nf @@ -1,4 +1,4 @@ -include { RAGTAG_SCAFFOLD } from '../../../../modules/local/ragtag/main' +include { RAGTAG_SCAFFOLD } from '../../../../modules/nf-core/ragtag/scaffold/main' include { QC } from '../../qc/main' include { RUN_LIFTOFF } from '../../liftoff/main' @@ -14,14 +14,16 @@ workflow RUN_RAGTAG { main: Channel.empty().set { ch_versions } - Channel.empty().set { quast_out } - Channel.empty().set { busco_out } - Channel.empty().set { merqury_report_files } + assembly .join(references) + .multiMap { meta, assembly_fasta, reference_fasta -> + assembly: [meta, assembly_fasta] + reference: [meta, reference_fasta] + } .set { ragtag_in } - RAGTAG_SCAFFOLD(ragtag_in) + RAGTAG_SCAFFOLD(ragtag_in.assembly, ragtag_in.reference, [[], []], [[], [], []]) RAGTAG_SCAFFOLD.out.corrected_assembly.set { ragtag_scaffold_fasta } From 3b82c96ba517f7aac36076e28016aa78c0051336 Mon Sep 17 00:00:00 2001 From: Niklas Schandry Date: Wed, 9 Apr 2025 15:50:25 +0200 Subject: [PATCH 09/34] fix medaka singularity container url, closes #139 (#140) --- modules/local/medaka/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/medaka/main.nf b/modules/local/medaka/main.nf index 3d001421..7117c7da 100644 --- a/modules/local/medaka/main.nf +++ b/modules/local/medaka/main.nf @@ -6,7 +6,7 @@ process MEDAKA { conda "${moduleDir}/environment.yml" container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://depot.galaxyproject.org/singularity/biocontainers/medaka:1.11.3--py310h87e71ce_0' + ? 'https://depot.galaxyproject.org/singularity/medaka:1.11.3--py310h87e71ce_0' : 'biocontainers/medaka:1.11.3--py310h87e71ce_0'}" input: From 7cc2b53685c2edcf197cd24583f98898429d78e4 Mon Sep 17 00:00:00 2001 From: Niklas Schandry Date: Mon, 14 Apr 2025 13:21:31 +0200 Subject: [PATCH 10/34] update ragtag module (#138) * update ragtag module * update changelog --- CHANGELOG.md | 2 ++ modules.json | 2 +- modules/nf-core/ragtag/patch/main.nf | 6 +++--- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c314656..d87cb8f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` +[#138](https://github.com/nf-core/genomeassembler/pull/138) - Switched to RagTag nf-core module + [#131](https://github.com/nf-core/genomeassembler/pull/131) - Refactored QC steps into subworkflow. [#133](https://github.com/nf-core/genomeassembler/pull/133) - Updated the input validation to be more strict. This should prevent some down the line errors in the pipeline diff --git a/modules.json b/modules.json index 7e2d1228..230b56a1 100644 --- a/modules.json +++ b/modules.json @@ -70,7 +70,7 @@ }, "ragtag/patch": { "branch": "master", - "git_sha": "7d163aded9221aef68d8c11cb7a04354a232d89c", + "git_sha": "62775d90df7565c82bd4ceedca70149529820cff", "installed_by": ["modules"] }, "ragtag/scaffold": { diff --git a/modules/nf-core/ragtag/patch/main.nf b/modules/nf-core/ragtag/patch/main.nf index 2f7f66bf..4e8cf455 100644 --- a/modules/nf-core/ragtag/patch/main.nf +++ b/modules/nf-core/ragtag/patch/main.nf @@ -55,9 +55,9 @@ process RAGTAG_PATCH { -t ${task.cpus} \\ ${arg_exclude} \\ ${arg_skip} \\ - ${args} # \\ - # 2> >( tee ${prefix}.stderr.log >&2 ) \\ - # | tee ${prefix}.stdout.log + ${args} \\ + 2> >( tee ${prefix}.stderr.log >&2 ) \\ + | tee ${prefix}.stdout.log kill -TERM "\$tailpid" From feaeef7f16dac3cc6284ed5618e4ba92f10e7505 Mon Sep 17 00:00:00 2001 From: Niklas Schandry Date: Mon, 14 Apr 2025 13:51:42 +0200 Subject: [PATCH 11/34] remove duplicated lines from fastqc module (#147) --- modules/nf-core/fastqc/main.nf | 4 ---- 1 file changed, 4 deletions(-) diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 26d47863..033f4154 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -2,13 +2,10 @@ process FASTQC { tag "${meta.id}" label 'process_medium' - conda "${moduleDir}/environment.yml" conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" - 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : - 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" input: tuple val(meta), path(reads) @@ -50,7 +47,6 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) - fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ From 55c845d23f8c601310e6c3de50f77dcbde7aed0b Mon Sep 17 00:00:00 2001 From: Niklas Schandry Date: Mon, 14 Apr 2025 15:09:42 +0200 Subject: [PATCH 12/34] check if assembly is skipped in initialization, closes #143 (#145) * check if assembly is skipped in initialization, closes #143 * update changelog --- CHANGELOG.md | 2 ++ .../main.nf | 36 ++++++++++--------- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d87cb8f5..943fba50 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 [#136](https://github.com/nf-core/genomeassembler/pull/136) - Switched to using ragtag `patch` instead of `scaffold` for `flye_on_hifiasm` +[#145](https://github.com/nf-core/genomeassembler/pull/145) - Fixed `--skip_assembly` input validation bug. + ### `Dependencies` ### `Deprecated` diff --git a/subworkflows/local/utils_nfcore_genomeassembler_pipeline/main.nf b/subworkflows/local/utils_nfcore_genomeassembler_pipeline/main.nf index ad5059aa..0f8cf813 100644 --- a/subworkflows/local/utils_nfcore_genomeassembler_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_genomeassembler_pipeline/main.nf @@ -43,7 +43,7 @@ workflow PIPELINE_INITIALISATION { version, true, outdir, - workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1, ) // @@ -52,7 +52,7 @@ workflow PIPELINE_INITIALISATION { UTILS_NFSCHEMA_PLUGIN( workflow, validate_params, - null + null, ) // @@ -67,8 +67,7 @@ workflow PIPELINE_INITIALISATION { // Channel.empty().set { ch_refs } - Channel - .fromPath(params.input) + Channel.fromPath(params.input) .splitCsv(header: true) .map { it -> [meta: [id: it.sample], ontreads: it.ontreads, hifireads: it.hifireads, ref_fasta: it.ref_fasta, ref_gff: it.ref_gff, shortread_F: it.shortread_F, shortread_R: it.shortread_R, paired: it.paired] } .set { ch_samplesheet } @@ -79,10 +78,12 @@ workflow PIPELINE_INITIALISATION { } // check for assembler / read combination def hifi_only = params.hifi && !params.ont ? true : false - if (params.assembler == "flye") { - if (params.hifi) { - if (!hifi_only) { - error('Cannot combine hifi and ont reads with flye') + if (!params.skip_assembly) { + if (params.assembler == "flye") { + if (params.hifi) { + if (!hifi_only) { + error('Cannot combine hifi and ont reads with flye') + } } } } @@ -96,10 +97,11 @@ workflow PIPELINE_INITIALISATION { if (params.scaffold_longstitch) { // If genomesize is not provided, and if ONT is not used in combination with jellyfish // Throw an error - if ( !params.genome_size && (!params.ont && !params.jellyfish) ) { + if (!params.genome_size && (!params.ont && !params.jellyfish)) { error("Scaffolding with longstitch requires genome size.\n Either provide a genome size with --genome_size or estimate from ONT reads using jellyfish and genomescope") } } + emit: samplesheet = ch_samplesheet refs = ch_refs @@ -119,7 +121,7 @@ workflow PIPELINE_COMPLETION { plaintext_email // boolean: Send plain-text email instead of HTML outdir // path: Path to output directory where results will be published monochrome_logs // boolean: Disable ANSI colour codes in log output - hook_url // string: hook URL for notifications + hook_url // string: hook URL for notifications main: summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") @@ -136,7 +138,7 @@ workflow PIPELINE_COMPLETION { plaintext_email, outdir, monochrome_logs, - [] + [], ) } @@ -179,10 +181,10 @@ def toolCitationText() { // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", // Uncomment function in methodsDescriptionText to render in MultiQC report def citation_text = [ - "Tools used in the workflow included:", - "FastQC (Andrews 2010),", - "." - ].join(' ').trim() + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + ".", + ].join(' ').trim() return citation_text } @@ -192,8 +194,8 @@ def toolBibliographyText() { // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", // Uncomment function in methodsDescriptionText to render in MultiQC report def reference_text = [ - "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", - ].join(' ').trim() + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • " + ].join(' ').trim() return reference_text } From 4b678470758ad59055b44602f3a890b181abe835 Mon Sep 17 00:00:00 2001 From: Niklas Schandry Date: Mon, 14 Apr 2025 15:11:19 +0200 Subject: [PATCH 13/34] make collect reads accept files, closes #141 (#142) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * make fix reads accept files, closes #141 * update changelog --------- Co-authored-by: Louis Le Nézet <58640615+LouisLeNezet@users.noreply.github.com> --- CHANGELOG.md | 2 ++ docs/usage.md | 2 +- modules/local/collect_reads/main.nf | 8 ++++---- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 943fba50..0a2897bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 [#138](https://github.com/nf-core/genomeassembler/pull/138) - Switched to RagTag nf-core module +[#142](https://github.com/nf-core/genomeassembler/pull/142) - Switch `--collect` to accept a glob pattern instead of a folder, consistent with input validation. + [#131](https://github.com/nf-core/genomeassembler/pull/131) - Refactored QC steps into subworkflow. [#133](https://github.com/nf-core/genomeassembler/pull/133) - Updated the input validation to be more strict. This should prevent some down the line errors in the pipeline diff --git a/docs/usage.md b/docs/usage.md index 69ae6a96..c87412f5 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -56,7 +56,7 @@ Further columns _can_ be: ### Multiple runs of the same sample -For ONT reads, a folder containing several fastq files can be provided, which will be concatenated into a single file if `--collect` is used. Generally we recommend to provide all reads in a single file. +For ONT reads, a glob pattern can be provided, matching files will be concatenated into a single file if `--collect` is used. Generally we recommend to provide all reads in a single file. ## Running the pipeline diff --git a/modules/local/collect_reads/main.nf b/modules/local/collect_reads/main.nf index c7d2ef82..ee38cb3b 100644 --- a/modules/local/collect_reads/main.nf +++ b/modules/local/collect_reads/main.nf @@ -8,17 +8,17 @@ process COLLECT_READS { : 'community.wave.seqera.io/library/coreutils_grep_gzip_lbzip2_pruned:838ba80435a629f8'}" input: - tuple val(meta), path(read_directory) + tuple val(meta), path(reads) output: - tuple val(meta), path("*.fastq"), emit: combined_reads + tuple val(meta), path("*_all_reads.fq.gz"), emit: combined_reads path "versions.yml", emit: versions script: def prefix = task.ext.prefix ?: "${meta.id}" """ - gunzip -c ${read_directory}/*.gz > ${prefix}_all_reads.fastq + zcat ${reads} | gzip > ${prefix}_all_reads.fq.gz cat <<-END_VERSIONS > versions.yml "${task.process}": gzip: \$(echo \$(gzip --version | head -n1 | sed 's/gzip //')) @@ -28,7 +28,7 @@ process COLLECT_READS { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}_all_reads.fastq + touch ${prefix}_all_reads.fq.gz cat <<-END_VERSIONS > versions.yml "${task.process}": gzip: \$(echo \$(gzip --version | head -n1 | sed 's/gzip //')) From af4e02fc10818ab27ab752fba6722e7db1f15a6c Mon Sep 17 00:00:00 2001 From: Niklas Schandry Date: Mon, 14 Apr 2025 15:32:38 +0200 Subject: [PATCH 14/34] add hifiasm_ont_hifiasm_on_hifiasm profile, code, test, docs (#146) * add hifiasm_ont_hifiasm_on_hifiasm profile, code, test, docs --- .github/workflows/ci.yml | 3 +- CHANGELOG.md | 2 + conf/modules.config | 2 +- configs/hifi_ont_hifiasm_on_hifiasm.config | 12 ++++ docs/usage.md | 19 ++--- nextflow.config | 17 ++--- nextflow_schema.json | 4 +- subworkflows/local/assemble/main.nf | 83 ++++++++++++++-------- 8 files changed, 90 insertions(+), 52 deletions(-) create mode 100644 configs/hifi_ont_hifiasm_on_hifiasm.config diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a268e192..bfe73cf6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,7 +35,8 @@ jobs: - "ont_flye" - "ont_hifiasm" - "hifiont_hifiasm" - - "hifiont_flyehifiasm" + - "hifiont_flye_on_hifiasm" + - "hifiont_hifiasm_on_hifiasm" profile: - "conda" - "docker" diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a2897bf..877cf201 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +[#144](https://github.com/nf-core/genomeassembler/issues/144) - Added hifiasm_on_hifiasm assembly strategy + ### `Fixed` [#138](https://github.com/nf-core/genomeassembler/pull/138) - Switched to RagTag nf-core module diff --git a/conf/modules.config b/conf/modules.config index 589079c6..f895f8d4 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -166,7 +166,7 @@ process { withName: HIFIASM_ONT { ext.args = { [ params.hifiasm_args, "--ont" ].join(" ").trim() } publishDir = [ - path: { "${params.outdir}/${meta.id}/assembly/hifiasm/" }, + path: { "${params.outdir}/${meta.id}/assembly/hifiasm_ont/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] diff --git a/configs/hifi_ont_hifiasm_on_hifiasm.config b/configs/hifi_ont_hifiasm_on_hifiasm.config new file mode 100644 index 00000000..9e548e42 --- /dev/null +++ b/configs/hifi_ont_hifiasm_on_hifiasm.config @@ -0,0 +1,12 @@ +/* + Use this config to: + assemble HIFI reads with hifiasm + assemble ONT reads with hifiasm --ont + scaffold the ONT assembly onto the HiFi assembly +*/ + +params { + hifi = true + ont = true + assembler = "hifiasm_on_hifiasm" +} diff --git a/docs/usage.md b/docs/usage.md index c87412f5..bfe3eeed 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -15,14 +15,15 @@ This pipeline can perform assembly, polishing, scaffolding and annotation lift-o To ease configuration, there are a couple of pre-defined profiles for various combinations of read sources and assemblers (named readtype_assembler) -| ONT | HiFI  | Assembly-strategy  | Profile name | -| --- | ----- | -------------------------------------------------- | --------------------- | -| Yes | No  | flye | `ont_flye` | -| No | Yes  | flye | `hifi_flye` | -| Yes | No | hifiasm | `ont_hifiasm` | -| No | Yes  | hifiasm | `hifi_hifiasm` | -| Yes | Yes  | hifiasm --ul | `hifiont_hifiasm` | -| Yes | Yes  | Scaffolding of ONT assemblies onto HiFi assemblies | `hifiont_flyehifiasm` | +| ONT | HiFI  | Assembly-strategy  | Profile name | +| --- | ----- | ---------------------------------------------------------------------- | ---------------------------- | +| Yes | No  | flye | `ont_flye` | +| No | Yes  | flye | `hifi_flye` | +| Yes | No | hifiasm | `ont_hifiasm` | +| No | Yes  | hifiasm | `hifi_hifiasm` | +| Yes | Yes  | hifiasm --ul | `hifiont_hifiasm` | +| Yes | Yes  | Scaffolding of ONT assemblies (flye) onto HiFi assemblies (hifiasm) | `hifiont_flye_on_hifiasm` | +| Yes | Yes  | Scaffolding of ONT assemblies (hifiasm) onto HiFi assemblies (hifiasm) | `hifiont_hifiasm_on_hifiasm` | ## Samplesheet input @@ -38,7 +39,7 @@ The largest samplesheet format is: ```csv title="samplesheet.csv" sample,ontreads,hifireads,ref_fasta,ref_gff,shortread_F,shortread_R,paired -Sample1,sample1ont.fq.gz,sample1hifi.fq.gz,ref.fa,ref.gff,sample1_r1.fq.gz,sample1_r2,fq.gz +Sample1,sample1ont.fq.gz,sample1hifi.fq.gz,ref.fa,ref.gff,sample1_r1.fq.gz,sample1_r2.fq.gz,true ``` The samplesheet _must_ contain a column name `sample` [string]. diff --git a/nextflow.config b/nextflow.config index 1b4c472d..ae6f3284 100644 --- a/nextflow.config +++ b/nextflow.config @@ -206,14 +206,15 @@ profiles { executor.cpus = 4 executor.memory = 8.GB } - test { includeConfig 'conf/test.config' } - test_full { includeConfig 'conf/test_full.config' } - hifi_flye { includeConfig 'configs/hifi_flye.config' } // Hifi-reads with flye - hifi_hifiasm { includeConfig 'configs/hifi_hifiasm.config' } // hifi-reads with hifiasm - ont_flye { includeConfig 'configs/ont_flye.config' } // ont-reads with flye - ont_hifiasm { includeConfig 'configs/ont_hifiasm.config' } // ont-reads with hifiasm - hifiont_hifiasm { includeConfig 'configs/hifi_ont_hifiasm_ul.config' } // ont and hifi reads with hifiasm --ul - hifiont_flyehifiasm { includeConfig 'configs/hifi_ont_flye_on_hifiasm.config' } // ont and hifi reads. ONT via flye, Hifi via hifiasm, scaffold flye on hifiasm + test { includeConfig 'conf/test.config' } + test_full { includeConfig 'conf/test_full.config' } + hifi_flye { includeConfig 'configs/hifi_flye.config' } // Hifi-reads with flye + hifi_hifiasm { includeConfig 'configs/hifi_hifiasm.config' } // hifi-reads with hifiasm + ont_flye { includeConfig 'configs/ont_flye.config' } // ont-reads with flye + ont_hifiasm { includeConfig 'configs/ont_hifiasm.config' } // ont-reads with hifiasm + hifiont_hifiasm { includeConfig 'configs/hifi_ont_hifiasm_ul.config' } // ont and hifi reads with hifiasm --ul + hifiont_flye_on_hifiasm { includeConfig 'configs/hifi_ont_flye_on_hifiasm.config' } // ont and hifi reads. ONT via flye, Hifi via hifiasm, scaffold flye on hifiasm + hifiont_hifiasm_on_hifiasm { includeConfig 'configs/hifi_ont_hifiasm_on_hifiasm.config' } // ont and hifi reads. ONT via hifiasm, Hifi via hifiasm, scaffold ONT on HiFi } // Load nf-core custom profiles from different Institutions diff --git a/nextflow_schema.json b/nextflow_schema.json index be119508..636070bc 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -245,8 +245,8 @@ "properties": { "assembler": { "type": "string", - "description": "Assembler to use. Valid choices are: `'hifiasm'`, `'flye'`, or `'flye_on_hifiasm'`. `flye_on_hifiasm` will scaffold flye assembly (ont) on hifiasm (hifi) assembly using ragtag", - "enum": ["flye", "hifiasm", "flye_on_hifiasm"], + "description": "Assembler to use. Valid choices are: `'hifiasm'`, `'flye'`, `'flye_on_hifiasm'` or `hifiasm_on_hifiasm`. `flye_on_hifiasm` will scaffold flye assembly (ont) on hifiasm (hifi) assembly using ragtag. `hifiasm_on_hifiasm` will scaffold hifiasm (ont) onto hifiasm (HiFi) using ragtag", + "enum": ["flye", "hifiasm", "flye_on_hifiasm", "hifiasm_on_hifiasm"], "default": "flye" }, "genome_size": { diff --git a/subworkflows/local/assemble/main.nf b/subworkflows/local/assemble/main.nf index c2e1e412..9668d8c9 100644 --- a/subworkflows/local/assemble/main.nf +++ b/subworkflows/local/assemble/main.nf @@ -1,7 +1,8 @@ include { FLYE } from '../../../modules/nf-core/flye/main' include { HIFIASM } from '../../../modules/nf-core/hifiasm/main' include { HIFIASM as HIFIASM_ONT } from '../../../modules/nf-core/hifiasm/main' -include { GFA_2_FA } from '../../../modules/local/gfa2fa/main' +include { GFA_2_FA as GFA_2_FA_HIFI } from '../../../modules/local/gfa2fa/main' +include { GFA_2_FA as GFA_2_FA_ONT} from '../../../modules/local/gfa2fa/main' include { MAP_TO_REF } from '../mapping/map_to_ref/main' include { RUN_LIFTOFF } from '../liftoff/main' include { RAGTAG_PATCH } from '../../../modules/nf-core/ragtag/patch/main' @@ -68,10 +69,10 @@ workflow ASSEMBLE { .join(ont_reads) .set { hifiasm_inputs } HIFIASM(hifiasm_inputs, [[], [], []], [[], [], []]) - GFA_2_FA(HIFIASM.out.processed_contigs) - GFA_2_FA.out.contigs_fasta.set { ch_assembly } + GFA_2_FA_HIFI(HIFIASM.out.processed_contigs) + GFA_2_FA_HIFI.out.contigs_fasta.set { ch_assembly } - ch_versions = ch_versions.mix(HIFIASM.out.versions).mix(GFA_2_FA.out.versions) + ch_versions = ch_versions.mix(HIFIASM.out.versions).mix(GFA_2_FA_HIFI.out.versions) } // ONT reads only if (!params.hifi && params.ont) { @@ -79,10 +80,9 @@ workflow ASSEMBLE { .map { meta, ontreads -> [meta, ontreads, []] } .set { hifiasm_inputs } HIFIASM_ONT(hifiasm_inputs, [[], [], []], [[], [], []]) - GFA_2_FA(HIFIASM_ONT.out.processed_contigs) - GFA_2_FA.out.contigs_fasta.set { ch_assembly } - - ch_versions = ch_versions.mix(HIFIASM_ONT.out.versions).mix(GFA_2_FA.out.versions) + GFA_2_FA_ONT(HIFIASM_ONT.out.processed_contigs) + GFA_2_FA_ONT.out.contigs_fasta.set { ch_assembly } + ch_versions = ch_versions.mix(HIFIASM_ONT.out.versions).mix(GFA_2_FA_ONT.out.versions) } // HiFI reads only if (params.hifi && !params.ont) { @@ -91,45 +91,66 @@ workflow ASSEMBLE { .set { hifiasm_inputs } HIFIASM(hifiasm_inputs, [[], [], []], [[], [], []]) - GFA_2_FA(HIFIASM.out.processed_contigs) - GFA_2_FA.out.contigs_fasta.set { ch_assembly } + GFA_2_FA_HIFI(HIFIASM.out.processed_contigs) + GFA_2_FA_HIFI.out.contigs_fasta.set { ch_assembly } - ch_versions = ch_versions.mix(HIFIASM.out.versions).mix(GFA_2_FA.out.versions) + ch_versions = ch_versions.mix(HIFIASM.out.versions).mix(GFA_2_FA_HIFI.out.versions) } } - if (params.assembler == "flye_on_hifiasm") { + if (params.assembler == "flye_on_hifiasm" | params.assembler == "hifiasm_on_hifiasm") { // Run hifiasm hifi_reads .map { meta, hifireads -> [meta, hifireads, []] } .set { hifiasm_inputs } HIFIASM(hifiasm_inputs, [[], [], []], [[], [], []]) - GFA_2_FA(HIFIASM.out.processed_contigs) + GFA_2_FA_HIFI(HIFIASM.out.processed_contigs) - ch_versions = ch_versions.mix(HIFIASM.out.versions).mix(GFA_2_FA.out.versions) + ch_versions = ch_versions.mix(HIFIASM.out.versions).mix(GFA_2_FA_HIFI.out.versions) + if(params.assembler == "flye_on_hifiasm") { // Run flye - ont_reads - .join(genome_size) - .map { meta, reads, genomesize -> [[id: meta.id, genome_size: genomesize], reads]} - .set { flye_inputs } + ont_reads + .join(genome_size) + .map { meta, reads, genomesize -> [[id: meta.id, genome_size: genomesize], reads]} + .set { flye_inputs } - FLYE(flye_inputs, params.flye_mode) - FLYE.out.fasta - .map { meta, assembly -> [[id: meta.id], assembly] } - .join( - GFA_2_FA.out.contigs_fasta - ) - .multiMap { meta, flye_fasta, hifiasm_fasta -> - target: [meta, flye_fasta] - query: [meta, hifiasm_fasta] - } - .set { ragtag_in } + FLYE(flye_inputs, params.flye_mode) + FLYE.out.fasta + .map { meta, assembly -> [[id: meta.id], assembly] } + .join( + GFA_2_FA_HIFI.out.contigs_fasta + ) + .multiMap { meta, flye_fasta, hifiasm_fasta -> + target: [meta, flye_fasta] + query: [meta, hifiasm_fasta] + } + .set { ragtag_in } + ch_versions = ch_versions.mix(FLYE.out.versions) + } + if(params.assembler == "hifiasm_on_hifiasm") { + // Run hifiasm --ont + ont_reads + .map { meta, ontreads -> [meta, ontreads, []] } + .set { hifiasm_inputs } + HIFIASM_ONT(hifiasm_inputs,[[], [], []], [[], [], []]) + GFA_2_FA_ONT(HIFIASM_ONT.out.processed_contigs) + GFA_2_FA_ONT.out.contigs_fasta + .join( + GFA_2_FA_HIFI.out.contigs_fasta + ) + .multiMap { meta, ont_assembly, hifi_assembly -> + target: [meta, ont_assembly] + query: [meta, hifi_assembly] + } + .set { ragtag_in } + ch_versions = ch_versions.mix(HIFIASM_ONT.out.versions).mix(GFA_2_FA_ONT.out.versions) + } RAGTAG_PATCH(ragtag_in.target, ragtag_in.query, [[], []], [[], []] ) - // takes: meta, assembly (flye), reference (hifi) + // takes: meta, assembly (ont), reference (hifi) RAGTAG_PATCH.out.patch_fasta.set { ch_assembly } - ch_versions = ch_versions.mix(FLYE.out.versions).mix(RAGTAG_PATCH.out.versions).mix(HIFIASM.out.versions) + ch_versions = ch_versions.mix(RAGTAG_PATCH.out.versions) } } /* From e5043a0c0718a12f2fafab9c973373f53b120f00 Mon Sep 17 00:00:00 2001 From: Niklas Schandry Date: Mon, 14 Apr 2025 17:01:59 +0200 Subject: [PATCH 15/34] switch to LINKS nf-core module (#148) * switch to LINKS nf-core module * update changelog * update LINKS args --- CHANGELOG.md | 2 + conf/modules.config | 4 +- modules.json | 5 + modules/local/links/environment.yml | 6 - modules/local/links/main.nf | 45 --- modules/nf-core/links/environment.yml | 6 + modules/nf-core/links/main.nf | 85 +++++ modules/nf-core/links/meta.yml | 175 ++++++++++ modules/nf-core/links/tests/main.nf.test | 163 +++++++++ modules/nf-core/links/tests/main.nf.test.snap | 323 ++++++++++++++++++ modules/nf-core/links/tests/nextflow.config | 5 + subworkflows/local/scaffolding/links/main.nf | 10 +- 12 files changed, 774 insertions(+), 55 deletions(-) delete mode 100644 modules/local/links/environment.yml delete mode 100644 modules/local/links/main.nf create mode 100644 modules/nf-core/links/environment.yml create mode 100644 modules/nf-core/links/main.nf create mode 100644 modules/nf-core/links/meta.yml create mode 100644 modules/nf-core/links/tests/main.nf.test create mode 100644 modules/nf-core/links/tests/main.nf.test.snap create mode 100644 modules/nf-core/links/tests/nextflow.config diff --git a/CHANGELOG.md b/CHANGELOG.md index 877cf201..90afe04a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 [#145](https://github.com/nf-core/genomeassembler/pull/145) - Fixed `--skip_assembly` input validation bug. +[#148](https://github.com/nf-core/genomeassembler/pull/148) - Switched to LINKS nf-core module + ### `Dependencies` ### `Deprecated` diff --git a/conf/modules.config b/conf/modules.config index f895f8d4..19737fe7 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -217,7 +217,6 @@ process { /* ---------- Scaffolding - None of the tools used has a core module. ---------- */ // RagTag @@ -243,7 +242,10 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] ext.prefix = { "${meta.id}_links" } + ext.args = ["-t 40,200", "-d 500,2000,5000"].join(" ").trim() } + + // No nf-core module yet. withName: LONGSTITCH { publishDir = [ path: { "${params.outdir}/${meta.id}/scaffold/longstitch/" }, diff --git a/modules.json b/modules.json index 230b56a1..0838f782 100644 --- a/modules.json +++ b/modules.json @@ -36,6 +36,11 @@ "git_sha": "1c4249137bdcd4392317e34123c00b5049c58d45", "installed_by": ["modules"] }, + "links": { + "branch": "master", + "git_sha": "e29af567bb2a1095fd23f284f777d31eba92310e", + "installed_by": ["modules"] + }, "merqury/merqury": { "branch": "master", "git_sha": "42140b76b12c18dbde34bfa7f2ef09afae8b054f", diff --git a/modules/local/links/environment.yml b/modules/local/links/environment.yml deleted file mode 100644 index 862ed92f..00000000 --- a/modules/local/links/environment.yml +++ /dev/null @@ -1,6 +0,0 @@ -channels: - - conda-forge - - bioconda - -dependencies: - - bioconda::links=2.0.1 diff --git a/modules/local/links/main.nf b/modules/local/links/main.nf deleted file mode 100644 index 9b3a2374..00000000 --- a/modules/local/links/main.nf +++ /dev/null @@ -1,45 +0,0 @@ -process LINKS { - tag "${meta.id}" - label 'process_high' - - conda "${moduleDir}/environment.yml" - container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://depot.galaxyproject.org/singularity/links:2.0.1--h4ac6f70_5' - : 'biocontainers/links:2.0.1--h4ac6f70_5'}" - - input: - tuple val(meta), path(assembly), path(reads) - - output: - tuple val(meta), path("*.scaffolds.fa"), emit: scaffolds - tuple val(meta), path("*.scaffolds"), emit: scaffold_csv - tuple val(meta), path("*.gv"), emit: graph - tuple val(meta), path("*.log"), emit: log - path "versions.yml", emit: versions - - script: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - echo "${reads}" > readfile.fof - LINKS -f ${assembly} -s readfile.fof -j 3 -b ${prefix} -t 40,200 -d 500,2000,5000 - sed -i 's/\\(scaffold[0-9]*\\).*/\\1/' ${prefix}.scaffolds.fa - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - LINKS: \$(echo \$(LINKS | grep -o 'LINKS v.*' | sed 's/LINKS v//')) - END_VERSIONS - """ - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.scaffolds.fa - touch ${prefix}.scaffolds - touch ${prefix}.gv - touch ${prefix}.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - LINKS: \$(echo \$(LINKS | grep -o 'LINKS v.*' | sed 's/LINKS v//')) - END_VERSIONS - """ -} diff --git a/modules/nf-core/links/environment.yml b/modules/nf-core/links/environment.yml new file mode 100644 index 00000000..9b3fd0b6 --- /dev/null +++ b/modules/nf-core/links/environment.yml @@ -0,0 +1,6 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::links=2.0.1" diff --git a/modules/nf-core/links/main.nf b/modules/nf-core/links/main.nf new file mode 100644 index 00000000..c55bc661 --- /dev/null +++ b/modules/nf-core/links/main.nf @@ -0,0 +1,85 @@ +process LINKS { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/links:2.0.1--h4ac6f70_5': + 'biocontainers/links:2.0.1--h4ac6f70_5' }" + + input: + tuple val(meta), path(assembly) + tuple val(meta2), path(reads) + + output: + tuple val(meta), path("*.log"), emit: log + tuple val(meta), path("*.pairing_distribution.csv"), emit: pairing_distribution, optional: true + tuple val(meta), path("*.pairing_issues"), emit: pairing_issues + tuple val(meta), path("*.scaffolds"), emit: scaffolds_csv + tuple val(meta), path("*.scaffolds.fa"), emit: scaffolds_fasta + tuple val(meta), path("*.bloom"), emit: bloom + tuple val(meta), path("*.gv"), emit: scaffolds_graph + tuple val(meta), path("*.assembly_correspondence.tsv"), emit: assembly_correspondence + tuple val(meta), path("*.simplepair_checkpoint.tsv"), emit: simplepair_checkpoint, optional: true + tuple val(meta), path("*.tigpair_checkpoint.tsv"), emit: tigpair_checkpoint + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + // Currently LINKS does not support more than 4 threads + def nthreads = "${task.cpus}" < 4 ? "${task.cpus}" : 4 + def args = task.ext.args ?: "" + """ + if [[ ${assembly} == *.gz ]]; + then + gzip -dc ${assembly} > assembly.fa + else + ln -s ${assembly} assembly.fa + fi + + for read_file in ${reads}; + do + if [[ \$read_file == *.gz ]]; + then + gzip -dc \$read_file > \$(basename \$read_file .gz) + echo \$(basename \$read_file .gz) >> readfile.fof + else + echo \$read_file >> readfile.fof + fi + done + + LINKS -f assembly.fa \\ + -s readfile.fof \\ + -j $nthreads \\ + -b ${prefix} \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + LINKS: \$(echo \$(LINKS | grep -o 'LINKS v.*' | sed 's/LINKS v//')) + END_VERSIONS + """ + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.log + touch ${prefix}.pairing_distribution.csv + touch ${prefix}.pairing_issues + touch ${prefix}.scaffolds + touch ${prefix}.scaffolds.fa + touch ${prefix}.bloom + touch ${prefix}.gv + touch ${prefix}.assembly_correspondence.tsv + touch ${prefix}.simplepair_checkpoint.tsv + touch ${prefix}.tigpair_checkpoint.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + LINKS: \$(echo \$(LINKS | grep -o 'LINKS v.*' | sed 's/LINKS v//')) + END_VERSIONS + """ + } diff --git a/modules/nf-core/links/meta.yml b/modules/nf-core/links/meta.yml new file mode 100644 index 00000000..852cf2bb --- /dev/null +++ b/modules/nf-core/links/meta.yml @@ -0,0 +1,175 @@ +--- +name: "links" +description: | + LINKS is a genomics application for scaffolding genome assemblies with long reads, + such as those produced by Oxford Nanopore Technologies Ltd. + It can be used to scaffold high-quality draft genome assemblies with any long sequences + (eg. ONT reads, PacBio reads, other draft genomes, etc). + It is also used to scaffold contig pairs linked by ARCS/ARKS. + This module is for LINKS >=2.0.0 and does not support MPET input. +keywords: + - scaffold + - long-reads + - genomics +tools: + - "links": + description: "Long Interval Nucleotide K-mer Scaffolder" + homepage: "https://www.bcgsc.ca/resources/software/links" + documentation: "https://github.com/bcgsc/LINKS" + tool_dev_url: "https://github.com/bcgsc/LINKS" + doi: "10.1186/s13742-015-0076-3" + licence: ["GPL v3"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - assembly: + type: file + description: (Multi-)fasta file containing the draft assembly + pattern: "*.{fa,fasta,fa.gz,fasta.gz}" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - reads: + type: file + description: fastq file(s) containing the long reads to be used for scaffolding + pattern: "*.{fq,fastq,fq.gz,fastq.gz}" + +output: + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1']` + - "*.log": + type: file + description: text file; Logs execution time / errors / pairing stats. + pattern: "*.log" + - pairing_distribution: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1']` + - "*.pairing_distribution.csv": + type: file + description: | + comma-separated file; 1st column is the calculated distance + for each pair (template) with reads that assembled logically + within the same contig. 2nd column is the number of pairs at + that distance. + pattern: "*.pairing_distribution.csv" + - pairing_issues: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1']` + - "*.pairing_issues": + type: file + description: | + text file; Lists all pairing issues encountered between contig + pairs and illogical/out-of-bounds pairing. + pattern: "*.pairing_issues" + - scaffolds_csv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1']` + - "*.scaffolds": + type: file + description: comma-separated file; containing the new scaffold(s) + pattern: "*.scaffolds" + - scaffolds_fasta: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1']` + - "*.scaffolds.fa": + type: file + description: fasta file of the new scaffold sequence + pattern: "*.scaffolds.fa" + - bloom: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1']` + - "*.bloom": + type: file + description: | + Bloom filter created by shredding the -f input + into k-mers of size -k + pattern: "*.bloom" + - scaffolds_graph: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1']` + - "*.gv": + type: file + description: | + scaffold graph (for visualizing merges), can be rendered + in neato, graphviz, etc + pattern: "*.gv" + - assembly_correspondence: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1']` + - "*.assembly_correspondence.tsv": + type: file + description: | + correspondence file lists the scaffold ID, + contig ID, original_name, #linking kmer pairs, + links ratio, gap or overlap + pattern: "*.assembly_correspondence.tsv" + - simplepair_checkpoint: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1']` + - "*.simplepair_checkpoint.tsv": + type: file + description: checkpoint file, contains info to rebuild datastructure for .gv graph + pattern: "*.simplepair_checkpoint.tsv" + - tigpair_checkpoint: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1']` + - "*.tigpair_checkpoint.tsv": + type: file + description: | + if -b BASNAME.tigpair_checkpoint.tsv is present, + LINKS will skip the kmer pair extraction and contig pairing stages. + Delete this file to force LINKS to start at the beginning. + This file can be used to: + 1) quickly test parameters (-l min. links / -a min. links ratio), + 2) quickly recover from crash, + 3) explore very large kmer spaces, + 4) scaffold with output of ARCS + pattern: "*.tigpair_checkpoint.tsv" + - versions: + - "versions.yml": + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@nschan" +maintainers: + - "@nschan" diff --git a/modules/nf-core/links/tests/main.nf.test b/modules/nf-core/links/tests/main.nf.test new file mode 100644 index 00000000..4b449ab2 --- /dev/null +++ b/modules/nf-core/links/tests/main.nf.test @@ -0,0 +1,163 @@ +nextflow_process { + name "Test Process LINKS" + script "../main.nf" + process "LINKS" + + tag "modules" + tag "modules_nfcore" + tag "links" + + test("LINKS - LINKS test data") { + config './nextflow.config' + + when { + params { + module_args = "-d 1000,2000,3000,4000,5000,6000,7000,8000,9000,10000,12000,14000,16000,18000,20000" + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/scaffolding/LINKS/contigs.fasta', checkIfExists: true), + ] + input[1] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/scaffolding/LINKS/genome.fasta', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log[0][1]).name, + process.out.pairing_issues, + process.out.scaffolds_csv, + process.out.scaffolds_fasta, + process.out.bloom, + file(process.out.scaffolds_graph[0][1]).name, + process.out.assembly_correspondence, + process.out.tigpair_checkpoint, + process.out.versions + ).match() + } + ) + } + + } + + test("LINKS - longstitch test data 1") { + config './nextflow.config' + + when { + params { + module_args = "-d 1000,2000,3000,4000,5000,6000,7000,8000,9000,10000,12000,14000,16000,18000,20000" + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/scaffolding/longstitch/test_scaffolds1.fa', checkIfExists: true), + ] + input[1] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/scaffolding/longstitch/test_reads1.fa.gz', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log[0][1]).name, + file(process.out.pairing_issues[0][1]).name, + process.out.scaffolds_csv, + process.out.scaffolds_fasta, + process.out.bloom, + file(process.out.scaffolds_graph[0][1]).name, + file(process.out.assembly_correspondence[0][1]).name, + file(process.out.tigpair_checkpoint[0][1]).name, + process.out.versions + ).match() + } + ) + } + + } + test("LINKS - longstitch test data 2") { + config './nextflow.config' + + when { + params { + module_args = "-d 1000,2000,3000,4000,5000,6000,7000,8000,9000,10000,12000,14000,16000,18000,20000" + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/scaffolding/longstitch/test_scaffolds2.fa', checkIfExists: true), + ] + input[1] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/scaffolding/longstitch/test_reads2.fa.gz', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log[0][1]).name, + file(process.out.pairing_issues[0][1]).name, + process.out.scaffolds_csv, + process.out.scaffolds_fasta, + process.out.bloom, + file(process.out.scaffolds_graph[0][1]).name, + file(process.out.assembly_correspondence[0][1]).name, + file(process.out.tigpair_checkpoint[0][1]).name, + process.out.versions + ).match() + } + ) + } + + } + test("LINKS - stub") { + + options "-stub" + config './nextflow.config' + + when { + params { + module_args = "-d 1000,2000,3000,4000,5000,6000,7000,8000,9000,10000,12000,14000,16000,18000,20000" + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/scaffolding/LINKS/contigs.fasta', checkIfExists: true), + ] + input[1] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/scaffolding/LINKS/genome.fasta', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/links/tests/main.nf.test.snap b/modules/nf-core/links/tests/main.nf.test.snap new file mode 100644 index 00000000..0e2507a3 --- /dev/null +++ b/modules/nf-core/links/tests/main.nf.test.snap @@ -0,0 +1,323 @@ +{ + "LINKS - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.pairing_distribution.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + "versions.yml:md5,f58863e433b849b1ef0dfc19cb57656b" + ], + "2": [ + [ + { + "id": "test" + }, + "test.pairing_issues:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test.scaffolds:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test" + }, + "test.scaffolds.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test" + }, + "test.bloom:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test" + }, + "test.gv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test" + }, + "test.assembly_correspondence.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test" + }, + "test.simplepair_checkpoint.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "test" + }, + "test.tigpair_checkpoint.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "assembly_correspondence": [ + [ + { + "id": "test" + }, + "test.assembly_correspondence.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bloom": [ + [ + { + "id": "test" + }, + "test.bloom:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pairing_distribution": [ + [ + { + "id": "test" + }, + "test.pairing_distribution.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pairing_issues": [ + [ + { + "id": "test" + }, + "test.pairing_issues:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "scaffolds_csv": [ + [ + { + "id": "test" + }, + "test.scaffolds:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "scaffolds_fasta": [ + [ + { + "id": "test" + }, + "test.scaffolds.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "scaffolds_graph": [ + [ + { + "id": "test" + }, + "test.gv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "simplepair_checkpoint": [ + [ + { + "id": "test" + }, + "test.simplepair_checkpoint.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tigpair_checkpoint": [ + [ + { + "id": "test" + }, + "test.tigpair_checkpoint.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,f58863e433b849b1ef0dfc19cb57656b" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-04-11T11:49:53.947870525" + }, + "LINKS - longstitch test data 1": { + "content": [ + "test.log", + "test.pairing_issues", + [ + [ + { + "id": "test" + }, + "test.scaffolds:md5,e12db5e5e6a1b5e26d2b50b6256c960d" + ] + ], + [ + [ + { + "id": "test" + }, + "test.scaffolds.fa:md5,afe56607a3d2c3b1e2d605bdf7ca545f" + ] + ], + [ + [ + { + "id": "test" + }, + "test.bloom:md5,b912b31cfadaf81e722f5441ac639f60" + ] + ], + "test.gv", + "test.assembly_correspondence.tsv", + "test.tigpair_checkpoint.tsv", + [ + "versions.yml:md5,f58863e433b849b1ef0dfc19cb57656b" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-04-11T13:39:00.252899964" + }, + "LINKS - longstitch test data 2": { + "content": [ + "test.log", + "test.pairing_issues", + [ + [ + { + "id": "test" + }, + "test.scaffolds:md5,00a943691b987adebe0ab40efced9c7e" + ] + ], + [ + [ + { + "id": "test" + }, + "test.scaffolds.fa:md5,ed43c629e8d440e3cf6fb8b21742557c" + ] + ], + [ + [ + { + "id": "test" + }, + "test.bloom:md5,695262bb4beda52665d2f7ec476a4e7b" + ] + ], + "test.gv", + "test.assembly_correspondence.tsv", + "test.tigpair_checkpoint.tsv", + [ + "versions.yml:md5,f58863e433b849b1ef0dfc19cb57656b" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-04-11T13:39:36.483298325" + }, + "LINKS - LINKS test data": { + "content": [ + "test.log", + [ + [ + { + "id": "test" + }, + "test.pairing_issues:md5,eb8b04b4ec170a319b40d2ee16a5cf96" + ] + ], + [ + [ + { + "id": "test" + }, + "test.scaffolds:md5,afe5339405b830e97095058080550064" + ] + ], + [ + [ + { + "id": "test" + }, + "test.scaffolds.fa:md5,4aa442ee4b05e1608daf7b5b033a4203" + ] + ], + [ + [ + { + "id": "test" + }, + "test.bloom:md5,96f54f577d1c589251ea0cfe624b898d" + ] + ], + "test.gv", + [ + [ + { + "id": "test" + }, + "test.assembly_correspondence.tsv:md5,0efc40db474ba8d5b334ad48add4bd9d" + ] + ], + [ + [ + { + "id": "test" + }, + "test.tigpair_checkpoint.tsv:md5,9208c8fe686b5989eaec1485a74cf44e" + ] + ], + [ + "versions.yml:md5,f58863e433b849b1ef0dfc19cb57656b" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-04-11T11:30:01.790074303" + } +} \ No newline at end of file diff --git a/modules/nf-core/links/tests/nextflow.config b/modules/nf-core/links/tests/nextflow.config new file mode 100644 index 00000000..1279535c --- /dev/null +++ b/modules/nf-core/links/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'LINKS' { + ext.args = params.module_args + } +} diff --git a/subworkflows/local/scaffolding/links/main.nf b/subworkflows/local/scaffolding/links/main.nf index 21edf95d..4493e4c4 100644 --- a/subworkflows/local/scaffolding/links/main.nf +++ b/subworkflows/local/scaffolding/links/main.nf @@ -1,4 +1,4 @@ -include { LINKS } from '../../../../modules/local/links/main' +include { LINKS } from '../../../../modules/nf-core/links/main' include { QC } from '../../qc/main' include { RUN_LIFTOFF } from '../../liftoff/main' @@ -16,10 +16,14 @@ workflow RUN_LINKS { assembly .join(in_reads) + .multiMap { meta, assembly_fa, reads -> + assembly: [meta, assembly_fa] + reads: [meta, reads] + } .set { links_in } - LINKS(links_in) - LINKS.out.scaffolds.set { scaffolds } + LINKS(links_in.assembly, links_in.reads) + LINKS.out.scaffolds_fasta.set { scaffolds } ch_versions = ch_versions.mix(LINKS.out.versions) From 5cf860501020791bc856f2ec87b8426e21c36a5b Mon Sep 17 00:00:00 2001 From: Niklas Schandry Date: Thu, 17 Apr 2025 08:40:25 +0200 Subject: [PATCH 16/34] enable mapping to ref for hifiasm_on_hifiasm (#149) This fixes an oversight where the mapping to reference was skipped with hifiasm_on_hifiasm, because the reads were not specified for this strategy. --- subworkflows/local/assemble/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/assemble/main.nf b/subworkflows/local/assemble/main.nf index 9668d8c9..db6a3204 100644 --- a/subworkflows/local/assemble/main.nf +++ b/subworkflows/local/assemble/main.nf @@ -174,7 +174,7 @@ workflow ASSEMBLE { .map { meta, reads -> [[id: meta.id], reads] } .set { longreads } } - if (params.assembler == "hifiasm" || params.assembler == "flye_on_hifiasm") { + if (params.assembler == "hifiasm" || params.assembler == "flye_on_hifiasm" || params.assembler == "hifiasm_on_hifiasm") { hifiasm_inputs .map { meta, long_reads, _ultralong -> [meta, long_reads] } .set { longreads } From 71a1c855af96a60d6f253d9ce29aae9fa7af0bd1 Mon Sep 17 00:00:00 2001 From: Niklas Schandry Date: Tue, 22 Apr 2025 16:36:12 +0200 Subject: [PATCH 17/34] nf-core/modules update (#151) * modules update * subworkflow update after module update --- modules.json | 42 +- modules/nf-core/busco/busco/environment.yml | 5 +- modules/nf-core/busco/busco/main.nf | 84 ++- modules/nf-core/busco/busco/meta.yml | 33 + .../nf-core/busco/busco/tests/main.nf.test | 89 ++- .../busco/busco/tests/main.nf.test.snap | 79 ++- .../busco/tests/nextflow.augustus.config | 5 - .../nf-core/busco/busco/tests/nextflow.config | 2 +- .../busco/busco/tests/nextflow.metaeuk.config | 5 - .../nf-core/busco/busco/tests/old_test.yml | 624 ------------------ modules/nf-core/busco/busco/tests/tags.yml | 2 - modules/nf-core/fastqc/environment.yml | 2 + modules/nf-core/fastqc/main.nf | 2 +- modules/nf-core/flye/environment.yml | 2 + modules/nf-core/flye/tests/tags.yml | 2 - modules/nf-core/hifiasm/environment.yml | 4 +- modules/nf-core/hifiasm/main.nf | 175 +++-- modules/nf-core/hifiasm/meta.yml | 117 ++-- modules/nf-core/hifiasm/tests/main.nf.test | 268 +++++++- .../nf-core/hifiasm/tests/main.nf.test.snap | 283 +++++--- modules/nf-core/hifiasm/tests/nextflow.config | 2 +- modules/nf-core/liftoff/environment.yml | 2 + modules/nf-core/lima/environment.yml | 2 + modules/nf-core/lima/tests/tags.yml | 2 - .../nf-core/merqury/merqury/environment.yml | 2 + .../merqury/merqury/merqury-merqury.diff | 1 - .../nf-core/merqury/merqury/tests/tags.yml | 2 - modules/nf-core/meryl/count/environment.yml | 2 + modules/nf-core/meryl/count/tests/tags.yml | 2 - .../nf-core/meryl/unionsum/environment.yml | 2 + modules/nf-core/meryl/unionsum/tests/tags.yml | 2 - .../minimap2/align/minimap2-align.diff | 1 - modules/nf-core/minimap2/align/tests/tags.yml | 2 - modules/nf-core/pilon/environment.yml | 2 + modules/nf-core/pilon/tests/tags.yml | 2 - .../nf-core/porechop/porechop/environment.yml | 3 + modules/nf-core/porechop/porechop/main.nf | 7 +- .../nf-core/porechop/porechop/tests/tags.yml | 2 - modules/nf-core/samtools/fastq/main.nf | 15 + .../nf-core/samtools/fastq/tests/main.nf.test | 52 ++ .../samtools/fastq/tests/main.nf.test.snap | 148 +++++ modules/nf-core/samtools/fastq/tests/tags.yml | 2 - .../nf-core/samtools/flagstat/tests/tags.yml | 2 - .../nf-core/samtools/idxstats/tests/tags.yml | 2 - modules/nf-core/samtools/index/tests/tags.yml | 2 - modules/nf-core/samtools/sort/tests/tags.yml | 3 - modules/nf-core/samtools/stats/tests/tags.yml | 2 - modules/nf-core/trimgalore/environment.yml | 2 + modules/nf-core/trimgalore/tests/tags.yml | 2 - subworkflows/local/assemble/main.nf | 20 +- .../bam_sort_stats_samtools/tests/tags.yml | 2 - .../nf-core/bam_stats_samtools/tests/tags.yml | 2 - 52 files changed, 1081 insertions(+), 1040 deletions(-) delete mode 100644 modules/nf-core/busco/busco/tests/nextflow.augustus.config delete mode 100644 modules/nf-core/busco/busco/tests/nextflow.metaeuk.config delete mode 100644 modules/nf-core/busco/busco/tests/old_test.yml delete mode 100644 modules/nf-core/busco/busco/tests/tags.yml delete mode 100644 modules/nf-core/flye/tests/tags.yml delete mode 100644 modules/nf-core/lima/tests/tags.yml delete mode 100644 modules/nf-core/merqury/merqury/tests/tags.yml delete mode 100644 modules/nf-core/meryl/count/tests/tags.yml delete mode 100644 modules/nf-core/meryl/unionsum/tests/tags.yml delete mode 100644 modules/nf-core/minimap2/align/tests/tags.yml delete mode 100644 modules/nf-core/pilon/tests/tags.yml delete mode 100644 modules/nf-core/porechop/porechop/tests/tags.yml delete mode 100644 modules/nf-core/samtools/fastq/tests/tags.yml delete mode 100644 modules/nf-core/samtools/flagstat/tests/tags.yml delete mode 100644 modules/nf-core/samtools/idxstats/tests/tags.yml delete mode 100644 modules/nf-core/samtools/index/tests/tags.yml delete mode 100644 modules/nf-core/samtools/sort/tests/tags.yml delete mode 100644 modules/nf-core/samtools/stats/tests/tags.yml delete mode 100644 modules/nf-core/trimgalore/tests/tags.yml delete mode 100644 subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml delete mode 100644 subworkflows/nf-core/bam_stats_samtools/tests/tags.yml diff --git a/modules.json b/modules.json index 0838f782..e121a80e 100644 --- a/modules.json +++ b/modules.json @@ -7,33 +7,33 @@ "nf-core": { "busco/busco": { "branch": "master", - "git_sha": "d34caf3c0d3cf5b9bae0fae6107bab0933c96f37", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["modules"] }, "fastqc": { "branch": "master", - "git_sha": "08108058ea36a63f141c25c4e75f9f872a5b2296", + "git_sha": "b1966f36ec9de31927b2603d8f499960b2a4c294", "installed_by": ["modules"] }, "flye": { "branch": "master", - "git_sha": "ce1a66562156776bb0dd1c1bb5640d368dadd4e6", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["modules"] }, "hifiasm": { "branch": "master", - "git_sha": "ea8610177512dff284c7d3dc0c8dcbcc12fb4cf0", + "git_sha": "971d801da83f2e50ab5935f93e40375adaa79b51", "installed_by": ["modules"] }, "liftoff": { "branch": "master", - "git_sha": "32b1f7f4e3d5c6e68af8f1d732f61c3d2aa6e67e", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"], "patch": "modules/nf-core/liftoff/liftoff.diff" }, "lima": { "branch": "master", - "git_sha": "1c4249137bdcd4392317e34123c00b5049c58d45", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["modules"] }, "links": { @@ -43,34 +43,34 @@ }, "merqury/merqury": { "branch": "master", - "git_sha": "42140b76b12c18dbde34bfa7f2ef09afae8b054f", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["modules"], "patch": "modules/nf-core/merqury/merqury/merqury-merqury.diff" }, "meryl/count": { "branch": "master", - "git_sha": "42140b76b12c18dbde34bfa7f2ef09afae8b054f", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["modules"] }, "meryl/unionsum": { "branch": "master", - "git_sha": "42140b76b12c18dbde34bfa7f2ef09afae8b054f", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["modules"] }, "minimap2/align": { "branch": "master", - "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["modules"], "patch": "modules/nf-core/minimap2/align/minimap2-align.diff" }, "pilon": { "branch": "master", - "git_sha": "2816479e1de5c04f0907f74a53a2096ceaff558e", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["modules"] }, "porechop/porechop": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "dbf496251becaa54933305bb494b880253a84ee6", "installed_by": ["modules"] }, "ragtag/patch": { @@ -85,37 +85,37 @@ }, "samtools/fastq": { "branch": "master", - "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["modules"] }, "samtools/flagstat": { "branch": "master", - "git_sha": "2d20463181b1c38981a02e90d3084b5f9fa8d540", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["bam_stats_samtools", "modules"] }, "samtools/idxstats": { "branch": "master", - "git_sha": "2d20463181b1c38981a02e90d3084b5f9fa8d540", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["bam_stats_samtools", "modules"] }, "samtools/index": { "branch": "master", - "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["bam_sort_stats_samtools", "modules"] }, "samtools/sort": { "branch": "master", - "git_sha": "b7800db9b069ed505db3f9d91b8c72faea9be17b", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["bam_sort_stats_samtools", "modules"] }, "samtools/stats": { "branch": "master", - "git_sha": "2d20463181b1c38981a02e90d3084b5f9fa8d540", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["bam_stats_samtools", "modules"] }, "trimgalore": { "branch": "master", - "git_sha": "8d3e71002c5008e3f68a691ad8cd32c346356258", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["modules"] } } @@ -124,12 +124,12 @@ "nf-core": { "bam_sort_stats_samtools": { "branch": "master", - "git_sha": "763d4b5c05ffda3ac1ac969dc67f7458cfb2eb1d", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["subworkflows"] }, "bam_stats_samtools": { "branch": "master", - "git_sha": "763d4b5c05ffda3ac1ac969dc67f7458cfb2eb1d", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["bam_sort_stats_samtools", "subworkflows"] }, "utils_nextflow_pipeline": { diff --git a/modules/nf-core/busco/busco/environment.yml b/modules/nf-core/busco/busco/environment.yml index 53e5e90e..ba8a40c0 100644 --- a/modules/nf-core/busco/busco/environment.yml +++ b/modules/nf-core/busco/busco/environment.yml @@ -1,6 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda dependencies: - - bioconda::busco=5.8.2 + - bioconda::busco=5.8.3 + - bioconda::sepp=4.5.5 diff --git a/modules/nf-core/busco/busco/main.nf b/modules/nf-core/busco/busco/main.nf index 609cae95..05ac4295 100644 --- a/modules/nf-core/busco/busco/main.nf +++ b/modules/nf-core/busco/busco/main.nf @@ -1,55 +1,65 @@ process BUSCO_BUSCO { - tag "$meta.id" + tag "${meta.id}" label 'process_medium' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/busco:5.8.2--pyhdfd78af_0': - 'biocontainers/busco:5.8.2--pyhdfd78af_0' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c6/c607f319867d96a38c8502f751458aa78bbd18fe4c7c4fa6b9d8350e6ba11ebe/data' + : 'community.wave.seqera.io/library/busco_sepp:f2dbc18a2f7a5b64'}" input: tuple val(meta), path(fasta, stageAs:'tmp_input/*') val mode // Required: One of genome, proteins, or transcriptome val lineage // Required: lineage for checking against, or "auto/auto_prok/auto_euk" for enabling auto-lineage - path busco_lineages_path // Recommended: busco lineages file - downloads if not set - path config_file // Optional: busco configuration file + path busco_lineages_path // Recommended: BUSCO lineages file - downloads if not set + path config_file // Optional: BUSCO configuration file + val clean_intermediates // Optional: Remove intermediate files output: - tuple val(meta), path("*-busco.batch_summary.txt") , emit: batch_summary - tuple val(meta), path("short_summary.*.txt") , emit: short_summaries_txt , optional: true - tuple val(meta), path("short_summary.*.json") , emit: short_summaries_json , optional: true - tuple val(meta), path("*-busco/*/run_*/full_table.tsv") , emit: full_table , optional: true - tuple val(meta), path("*-busco/*/run_*/missing_busco_list.tsv") , emit: missing_busco_list , optional: true - tuple val(meta), path("*-busco/*/run_*/single_copy_proteins.faa") , emit: single_copy_proteins , optional: true - tuple val(meta), path("*-busco/*/run_*/busco_sequences") , emit: seq_dir , optional: true - tuple val(meta), path("*-busco/*/translated_proteins") , emit: translated_dir , optional: true - tuple val(meta), path("*-busco") , emit: busco_dir - tuple val(meta), path("busco_downloads/lineages/*") , emit: downloaded_lineages , optional: true + tuple val(meta), path("*-busco.batch_summary.txt") , emit: batch_summary + tuple val(meta), path("short_summary.*.txt") , emit: short_summaries_txt , optional: true + tuple val(meta), path("short_summary.*.json") , emit: short_summaries_json, optional: true + tuple val(meta), path("*-busco.log") , emit: log , optional: true + tuple val(meta), path("*-busco/*/run_*/full_table.tsv") , emit: full_table , optional: true + tuple val(meta), path("*-busco/*/run_*/missing_busco_list.tsv") , emit: missing_busco_list , optional: true + tuple val(meta), path("*-busco/*/run_*/single_copy_proteins.faa") , emit: single_copy_proteins, optional: true + tuple val(meta), path("*-busco/*/run_*/busco_sequences") , emit: seq_dir , optional: true + tuple val(meta), path("*-busco/*/translated_proteins") , emit: translated_dir , optional: true + tuple val(meta), path("*-busco") , emit: busco_dir + tuple val(meta), path("busco_downloads/lineages/*") , emit: downloaded_lineages , optional: true + tuple val(meta), path("*-busco/*/run_*/busco_sequences/single_copy_busco_sequences/*.faa"), emit: single_copy_faa , optional: true + tuple val(meta), path("*-busco/*/run_*/busco_sequences/single_copy_busco_sequences/*.fna"), emit: single_copy_fna , optional: true - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - if ( mode !in [ 'genome', 'proteins', 'transcriptome' ] ) { - error "Mode must be one of 'genome', 'proteins', or 'transcriptome'." + if (mode !in ['genome', 'proteins', 'transcriptome']) { + error("Mode must be one of 'genome', 'proteins', or 'transcriptome'.") } def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}-${lineage}" def busco_config = config_file ? "--config ${config_file}" : '' - def busco_lineage = lineage in [ 'auto', 'auto_prok', 'auto_euk'] + def busco_lineage = lineage in ['auto', 'auto_prok', 'auto_euk'] ? lineage.replaceFirst('auto', '--auto-lineage').replaceAll('_', '-') : "--lineage_dataset ${lineage}" def busco_lineage_dir = busco_lineages_path ? "--download_path ${busco_lineages_path}" : '' + def intermediate_files = [ + './*-busco/*/auto_lineage', + './*-busco/*/**/{miniprot,hmmer,.bbtools}_output', + './*-busco/*/prodigal_output/predicted_genes/tmp/', + ] + def clean_cmd = clean_intermediates ? "rm -fr ${intermediate_files.join(' ')}" : '' """ - # Nextflow changes the container --entrypoint to /bin/bash (container default entrypoint: /usr/local/env-execute) - # Check for container variable initialisation script and source it. - if [ -f "/usr/local/env-activate.sh" ]; then - set +u # Otherwise, errors out because of various unbound variables - . "/usr/local/env-activate.sh" - set -u + # Fix Augustus for Apptainer + ENV_AUGUSTUS=/opt/conda/etc/conda/activate.d/augustus.sh + set +u + if [ -z "\${AUGUSTUS_CONFIG_PATH}" ] && [ -f "\${ENV_AUGUSTUS}" ]; then + source "\${ENV_AUGUSTUS}" fi + set -u # If the augustus config directory is not writable, then copy to writeable area if [ ! -w "\${AUGUSTUS_CONFIG_PATH}" ]; then @@ -74,21 +84,25 @@ process BUSCO_BUSCO { cd .. busco \\ - --cpu $task.cpus \\ + --cpu ${task.cpus} \\ --in "\$INPUT_SEQS" \\ --out ${prefix}-busco \\ - --mode $mode \\ - $busco_lineage \\ - $busco_lineage_dir \\ - $busco_config \\ - $args + --mode ${mode} \\ + ${busco_lineage} \\ + ${busco_lineage_dir} \\ + ${busco_config} \\ + ${args} # clean up rm -rf "\$INPUT_SEQS" + ${clean_cmd} + # find and remove broken symlinks from the cleanup + find . -xtype l -delete # Move files to avoid staging/publishing issues mv ${prefix}-busco/batch_summary.txt ${prefix}-busco.batch_summary.txt mv ${prefix}-busco/*/short_summary.*.{json,txt} . || echo "Short summaries were not available: No genes were found." + mv ${prefix}-busco/logs/busco.log ${prefix}-busco.log cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -97,11 +111,11 @@ process BUSCO_BUSCO { """ stub: - def prefix = task.ext.prefix ?: "${meta.id}-${lineage}" - def fasta_name = files(fasta).first().name - '.gz' + def prefix = task.ext.prefix ?: "${meta.id}-${lineage}" + def fasta_name = files(fasta).first().name - '.gz' """ touch ${prefix}-busco.batch_summary.txt - mkdir -p ${prefix}-busco/$fasta_name/run_${lineage}/busco_sequences + mkdir -p ${prefix}-busco/${fasta_name}/run_${lineage}/busco_sequences cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/busco/busco/meta.yml b/modules/nf-core/busco/busco/meta.yml index 8f719e08..0222e490 100644 --- a/modules/nf-core/busco/busco/meta.yml +++ b/modules/nf-core/busco/busco/meta.yml @@ -40,6 +40,9 @@ input: - - config_file: type: file description: Path to BUSCO config file. + - - clean_intermediates: + type: boolean + description: Flag to remove intermediate files. output: - batch_summary: - meta: @@ -71,6 +74,16 @@ output: type: file description: Short Busco summary in JSON format pattern: "short_summary.*.json" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*-busco.log": + type: file + description: BUSCO main log + pattern: "*-busco.log" - full_table: - meta: type: map @@ -142,6 +155,26 @@ output: type: directory description: Lineages downloaded by BUSCO when running the analysis, for example bacteria_odb12 pattern: "busco_downloads/lineages/*" + - single_copy_faa: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*-busco/*/run_*/busco_sequences/single_copy_busco_sequences/*.faa": + type: file + description: Single copy .faa sequence files + pattern: "*-busco/*/run_*/busco_sequences/single_copy_busco_sequences/*.faa" + - single_copy_fna: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*-busco/*/run_*/busco_sequences/single_copy_busco_sequences/*.fna": + type: file + description: Single copy .fna sequence files + pattern: "*-busco/*/run_*/busco_sequences/single_copy_busco_sequences/*.fna" - versions: - versions.yml: type: file diff --git a/modules/nf-core/busco/busco/tests/main.nf.test b/modules/nf-core/busco/busco/tests/main.nf.test index 55954a73..411ceb86 100644 --- a/modules/nf-core/busco/busco/tests/main.nf.test +++ b/modules/nf-core/busco/busco/tests/main.nf.test @@ -14,6 +14,9 @@ nextflow_process { config './nextflow.config' when { + params { + busco_args = '--tar' + } process { """ input[0] = [ @@ -24,6 +27,7 @@ nextflow_process { input[2] = 'bacteria_odb12' // Launch with 'auto' to use --auto-lineage, and specified lineages // 'auto' removed from test due to memory issues input[3] = [] // Download busco lineage input[4] = [] // No config + input[5] = false // Clean intermediates """ } } @@ -59,7 +63,7 @@ nextflow_process { assert contains('fragmented_busco_sequences.tar.gz') } - with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + with(path(process.out.log[0][1]).text) { assert contains('DEBUG:busco.run_BUSCO') assert contains('Results from dataset') assert contains('how to cite BUSCO') @@ -75,6 +79,9 @@ nextflow_process { config './nextflow.config' when { + params { + busco_args = '--tar' + } process { """ input[0] = [ @@ -88,6 +95,7 @@ nextflow_process { input[2] = 'bacteria_odb12' input[3] = [] input[4] = [] + input[5] = false """ } } @@ -143,7 +151,7 @@ nextflow_process { assert contains('fragmented_busco_sequences.tar.gz') } - with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + with(path(process.out.log[0][1]).text) { assert contains('DEBUG:busco.run_BUSCO') assert contains('Results from dataset') assert contains('how to cite BUSCO') @@ -157,9 +165,12 @@ nextflow_process { test("test_busco_eukaryote_metaeuk") { - config './nextflow.metaeuk.config' + config './nextflow.config' when { + params { + busco_args = '--tar --metaeuk' + } process { """ input[0] = [ @@ -170,6 +181,7 @@ nextflow_process { input[2] = 'eukaryota_odb10' input[3] = [] input[4] = [] + input[5] = false """ } } @@ -205,7 +217,7 @@ nextflow_process { assert contains('fragmented_busco_sequences.tar.gz') } - with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + with(path(process.out.log[0][1]).text) { assert contains('DEBUG:busco.run_BUSCO') assert contains('Results from dataset') assert contains('how to cite BUSCO') @@ -220,9 +232,12 @@ nextflow_process { test("test_busco_eukaryote_augustus") { - config './nextflow.augustus.config' + config './nextflow.config' when { + params { + busco_args = '--tar --augustus' + } process { """ input[0] = [ @@ -233,6 +248,7 @@ nextflow_process { input[2] = 'eukaryota_odb10' input[3] = [] input[4] = [] + input[5] = false """ } } @@ -245,7 +261,7 @@ nextflow_process { process.out.versions[0] ).match() - with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + with(path(process.out.log[0][1]).text) { assert contains('DEBUG:busco.run_BUSCO') assert contains('Augustus did not recognize any genes') @@ -266,6 +282,9 @@ nextflow_process { config './nextflow.config' when { + params { + busco_args = '--tar' + } process { """ input[0] = [ @@ -276,6 +295,7 @@ nextflow_process { input[2] = 'bacteria_odb12' input[3] = [] input[4] = [] + input[5] = false """ } } @@ -311,7 +331,7 @@ nextflow_process { assert contains('fragmented_busco_sequences.tar.gz') } - with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + with(path(process.out.log[0][1]).text) { assert contains('DEBUG:busco.run_BUSCO') assert contains('Results from dataset') assert contains('how to cite BUSCO') @@ -328,6 +348,9 @@ nextflow_process { config './nextflow.config' when { + params { + busco_args = '--tar' + } process { """ input[0] = [ @@ -338,6 +361,7 @@ nextflow_process { input[2] = 'bacteria_odb12' input[3] = [] input[4] = [] + input[5] = false """ } } @@ -375,7 +399,7 @@ nextflow_process { assert contains('fragmented_busco_sequences.tar.gz') } - with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + with(path(process.out.log[0][1]).text) { assert contains('DEBUG:busco.run_BUSCO') assert contains('Results from dataset') assert contains('how to cite BUSCO') @@ -384,6 +408,53 @@ nextflow_process { } + test("test_busco_cleanup") { + + config './nextflow.config' + + when { + params { + busco_args = '--tar' + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) + ] + input[1] = 'genome' + input[2] = 'bacteria_odb12' + input[3] = [] + input[4] = [] + input[5] = true + """ + } + } + + then { + assert process.success + + assert snapshot( + process.out.batch_summary[0][1], + process.out.full_table[0][1], + process.out.missing_busco_list[0][1], + process.out.versions[0] + ).match() + + with(path(process.out.log[0][1]).text) { + assert contains('DEBUG:busco.run_BUSCO') + assert contains('Results from dataset') + assert contains('how to cite BUSCO') + } + + assert path("${process.out.busco_dir[0][1]}/*/auto_lineage").exists() == false + assert path("${process.out.busco_dir[0][1]}/*/**/{miniprot,hmmer,.bbtools}_output").exists() == false + + assert process.out.single_copy_proteins == [] + assert process.out.translated_dir == [] + } + } + test("minimal-stub") { options '-stub' @@ -399,6 +470,7 @@ nextflow_process { input[2] = 'bacteria_odb12' input[3] = [] input[4] = [] + input[5] = false """ } } @@ -413,5 +485,4 @@ nextflow_process { ) } } - } \ No newline at end of file diff --git a/modules/nf-core/busco/busco/tests/main.nf.test.snap b/modules/nf-core/busco/busco/tests/main.nf.test.snap index 0f44373f..1026524b 100644 --- a/modules/nf-core/busco/busco/tests/main.nf.test.snap +++ b/modules/nf-core/busco/busco/tests/main.nf.test.snap @@ -10,75 +10,88 @@ ] ], [ - "versions.yml:md5,c6e638f981761c13cd9ff7663cf707e6" + "versions.yml:md5,0046a4b8575cbc3635f2a9ee616fd840" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.2" + "nextflow": "24.10.3" }, - "timestamp": "2024-12-13T15:30:45.505241761" + "timestamp": "2025-03-12T10:50:57.218573431" }, "test_busco_eukaryote_augustus": { "content": [ "test-eukaryota_odb10-busco.batch_summary.txt:md5,3ea3bdc423a461dae514d816bdc61c89", - "versions.yml:md5,c6e638f981761c13cd9ff7663cf707e6" + "versions.yml:md5,0046a4b8575cbc3635f2a9ee616fd840" ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nextflow": "24.10.3" }, - "timestamp": "2024-12-11T13:07:45.550722277" + "timestamp": "2025-03-12T10:44:25.359421247" }, "test_busco_genome_single_fasta": { "content": [ "test-bacteria_odb12-busco.batch_summary.txt:md5,e3e503e1540b633d95c273c465945740", - "full_table.tsv:md5,e2a08fdd9b2596322e70c5549d1affc7", - "missing_busco_list.tsv:md5,25417462f2c484f1942c86b21bcf77d0", - "versions.yml:md5,c6e638f981761c13cd9ff7663cf707e6" + "full_table.tsv:md5,086f2ecdc90d47745c828c9b25357039", + "missing_busco_list.tsv:md5,9919aee2da9d30a3985aede354850a46", + "versions.yml:md5,0046a4b8575cbc3635f2a9ee616fd840" ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nextflow": "24.10.3" }, - "timestamp": "2024-12-11T12:43:40.359736221" + "timestamp": "2025-03-12T10:41:46.251404188" }, "test_busco_genome_multi_fasta": { "content": [ [ - "full_table.tsv:md5,5e7df014f2804789f0d98ae2e09734ad", - "full_table.tsv:md5,e2a08fdd9b2596322e70c5549d1affc7" + "full_table.tsv:md5,5a6bf59055e2040e74797a1e36c8e374", + "full_table.tsv:md5,086f2ecdc90d47745c828c9b25357039" ], [ - "missing_busco_list.tsv:md5,d902f10173b463f81e4892ef64f63c50", - "missing_busco_list.tsv:md5,25417462f2c484f1942c86b21bcf77d0" + "missing_busco_list.tsv:md5,a55eee6869fad9176d812e59886232fb", + "missing_busco_list.tsv:md5,9919aee2da9d30a3985aede354850a46" ], - "versions.yml:md5,c6e638f981761c13cd9ff7663cf707e6" + "versions.yml:md5,0046a4b8575cbc3635f2a9ee616fd840" ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nextflow": "24.10.3" }, - "timestamp": "2024-12-11T17:17:42.175675858" + "timestamp": "2025-03-12T10:42:28.126899794" }, "test_busco_eukaryote_metaeuk": { "content": [ "test-eukaryota_odb10-busco.batch_summary.txt:md5,ff6d8277e452a83ce9456bbee666feb6", - "full_table.tsv:md5,9bfa9ef7d54ca6ad8bcf8e87729720b1", - "missing_busco_list.tsv:md5,325b529e5a8af2a392d747b4eddc150c", - "versions.yml:md5,c6e638f981761c13cd9ff7663cf707e6" + "full_table.tsv:md5,cfb55ab2ce590d2def51926324691aa8", + "missing_busco_list.tsv:md5,77e3d4503b2c13db0d611723fc83ab7e", + "versions.yml:md5,0046a4b8575cbc3635f2a9ee616fd840" ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nextflow": "24.10.3" }, - "timestamp": "2024-12-11T13:07:17.335085326" + "timestamp": "2025-03-12T10:43:59.997031348" + }, + "test_busco_cleanup": { + "content": [ + "test-bacteria_odb12-busco.batch_summary.txt:md5,e3e503e1540b633d95c273c465945740", + "full_table.tsv:md5,086f2ecdc90d47745c828c9b25357039", + "missing_busco_list.tsv:md5,9919aee2da9d30a3985aede354850a46", + "versions.yml:md5,0046a4b8575cbc3635f2a9ee616fd840" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-03-12T10:50:48.928173488" }, "test_busco_transcriptome": { "content": [ "test-bacteria_odb12-busco.batch_summary.txt:md5,6cd69d8a66b5f8b7fd4a9de758e7a739", - "full_table.tsv:md5,73a3a90c2fa8fef41cafed5a607fab66", - "missing_busco_list.tsv:md5,4778855c345f4e409750c9bbd38c5a0c", + "full_table.tsv:md5,4efc19f8d2cc7ea9e73425f09cb3ed97", + "missing_busco_list.tsv:md5,55f0322d494e5c165508712be63062bf", [ "9767721at2.faa:md5,1731738ca153959391f8302fd5a3679f", "9778364at2.faa:md5,7a19a6b6696ae53efce30457b4dd1ab2", @@ -125,25 +138,25 @@ "9814755at2.faa:md5,9b4c4648d250c2e6d04acb78f9cf6df0" ], "single_copy_proteins.faa:md5,14124def13668c6d9b0d589207754b31", - "versions.yml:md5,c6e638f981761c13cd9ff7663cf707e6" + "versions.yml:md5,0046a4b8575cbc3635f2a9ee616fd840" ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nextflow": "24.10.3" }, - "timestamp": "2024-12-11T12:45:43.336777509" + "timestamp": "2025-03-12T10:45:08.029718703" }, "test_busco_protein": { "content": [ "test-bacteria_odb12-busco.batch_summary.txt:md5,44d4cdebd61a3c8e8981ddf1829f83b3", - "full_table.tsv:md5,696bae3f377fd5dbaf19f1c522088d93", - "missing_busco_list.tsv:md5,d902f10173b463f81e4892ef64f63c50", - "versions.yml:md5,c6e638f981761c13cd9ff7663cf707e6" + "full_table.tsv:md5,350f9b1b6c37cfcf41be84e93ef41931", + "missing_busco_list.tsv:md5,a55eee6869fad9176d812e59886232fb", + "versions.yml:md5,0046a4b8575cbc3635f2a9ee616fd840" ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nextflow": "24.10.3" }, - "timestamp": "2024-12-11T12:45:16.960592213" + "timestamp": "2025-03-12T10:44:44.094048564" } } \ No newline at end of file diff --git a/modules/nf-core/busco/busco/tests/nextflow.augustus.config b/modules/nf-core/busco/busco/tests/nextflow.augustus.config deleted file mode 100644 index 84daa69d..00000000 --- a/modules/nf-core/busco/busco/tests/nextflow.augustus.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: 'BUSCO_BUSCO' { - ext.args = '--tar --augustus' - } -} diff --git a/modules/nf-core/busco/busco/tests/nextflow.config b/modules/nf-core/busco/busco/tests/nextflow.config index 1ec3fec0..db73a7ee 100644 --- a/modules/nf-core/busco/busco/tests/nextflow.config +++ b/modules/nf-core/busco/busco/tests/nextflow.config @@ -1,5 +1,5 @@ process { withName: 'BUSCO_BUSCO' { - ext.args = '--tar' + ext.args = params.busco_args } } diff --git a/modules/nf-core/busco/busco/tests/nextflow.metaeuk.config b/modules/nf-core/busco/busco/tests/nextflow.metaeuk.config deleted file mode 100644 index c1418445..00000000 --- a/modules/nf-core/busco/busco/tests/nextflow.metaeuk.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: 'BUSCO_BUSCO' { - ext.args = '--tar --metaeuk' - } -} diff --git a/modules/nf-core/busco/busco/tests/old_test.yml b/modules/nf-core/busco/busco/tests/old_test.yml deleted file mode 100644 index 75177f5d..00000000 --- a/modules/nf-core/busco/busco/tests/old_test.yml +++ /dev/null @@ -1,624 +0,0 @@ -- name: busco test_busco_genome_single_fasta - command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_genome_single_fasta -c ./tests/config/nextflow.config - tags: - - busco - files: - - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.json - contains: - - "one_line_summary" - - "mode" - - "dataset" - - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.txt - contains: - - "BUSCO version" - - "The lineage dataset is" - - "BUSCO was run in mode" - - "Complete BUSCOs" - - "Missing BUSCOs" - - "Dependencies and versions" - - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt - md5sum: bc2440f8a68d7fbf931ff911c1c3fdfa - - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_err.log - - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_out.log - md5sum: 9caf1a1434414c78562eb0bbb9c0e53f - - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_err.log - - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_out.log - contains: - - "# hmmsearch :: search profile(s) against a sequence database" - - "# target sequence database:" - - "Internal pipeline statistics summary:" - - "[ok]" - - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_err.log - md5sum: 538510cfc7483498210f01e53fe035ad - - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_out.log - md5sum: 61050b0706addc9498b2088a2d6efa9a - - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/.checkpoint - contains: - - "Tool: prodigal" - - "Completed" - - "jobs" - - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.faa - md5sum: 836e9a80d33d8b89168f07ddc13ee991 - - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.fna - md5sum: 20eeb75f86842e6e136f02bca8b73a9f - - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa - md5sum: 836e9a80d33d8b89168f07ddc13ee991 - - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.fna - md5sum: 20eeb75f86842e6e136f02bca8b73a9f - - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_err.log - md5sum: 538510cfc7483498210f01e53fe035ad - - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_out.log - md5sum: 61050b0706addc9498b2088a2d6efa9a - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/.bbtools_output/.checkpoint - contains: - - "Tool: bbtools" - - "Completed" - - "jobs" - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/full_table.tsv - md5sum: c56edab1dc1522e993c25ae2b730799f - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/hmmer_output.tar.gz - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/missing_busco_list.tsv - md5sum: b533ef30270f27160acce85a22d01bf5 - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.json - contains: - - "one_line_summary" - - "mode" - - "lineage_dataset" - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.txt - contains: - - "# BUSCO version is:" - - "Results:" - - "busco:" - - path: output/busco/test-bacteria_odb10-busco/logs/busco.log - contains: - - "DEBUG:busco.run_BUSCO" - - "Results from dataset" - - "how to cite BUSCO" - - path: output/busco/versions.yml - -- name: busco test_busco_genome_multi_fasta - command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_genome_multi_fasta -c ./tests/config/nextflow.config - tags: - - busco - files: - - path: output/busco/short_summary.specific.bacteria_odb10.genome.fasta.json - contains: - - "one_line_summary" - - "mode" - - "dataset" - - path: output/busco/short_summary.specific.bacteria_odb10.genome.fasta.txt - contains: - - "BUSCO version" - - "The lineage dataset is" - - "BUSCO was run in mode" - - "Complete BUSCOs" - - "Missing BUSCOs" - - "Dependencies and versions" - - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.json - contains: - - "one_line_summary" - - "mode" - - "dataset" - - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.txt - contains: - - "BUSCO version" - - "The lineage dataset is" - - "BUSCO was run in mode" - - "Complete BUSCOs" - - "Missing BUSCOs" - - "Dependencies and versions" - - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt - md5sum: 8c64c1a28b086ef2ee444f99cbed5f7d - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/bbtools_err.log - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/bbtools_out.log - md5sum: 8f047bdb33264d22a83920bc2c63f29a - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/hmmsearch_err.log - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/hmmsearch_out.log - contains: - - "# hmmsearch :: search profile(s) against a sequence database" - - "# target sequence database:" - - "Internal pipeline statistics summary:" - - "[ok]" - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/prodigal_err.log - md5sum: c1fdc6977332f53dfe7f632733bb4585 - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/prodigal_out.log - md5sum: 50752acb1c5a20be886bfdfc06635bcb - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/.checkpoint - contains: - - "Tool: prodigal" - - "Completed" - - "jobs" - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/predicted.faa - md5sum: 8166471fc5f08c82fd5643ab42327f9d - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/predicted.fna - md5sum: ddc508a18f60e7f3314534df50cdf8ca - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa - md5sum: 8166471fc5f08c82fd5643ab42327f9d - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.fna - md5sum: ddc508a18f60e7f3314534df50cdf8ca - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_err.log - md5sum: c1fdc6977332f53dfe7f632733bb4585 - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_out.log - md5sum: 50752acb1c5a20be886bfdfc06635bcb - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4.faa - md5sum: e56fd59c38248dc21ac94355dca98121 - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4.fna - md5sum: b365f84bf99c68357952e0b98ed7ce42 - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4_err.log - md5sum: e5f14d7925ba14a0f9850542f3739894 - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4_out.log - md5sum: d41971bfc1b621d4ffd2633bc47017ea - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/.bbtools_output/.checkpoint - contains: - - "Tool: bbtools" - - "Completed" - - "jobs" - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/full_table.tsv - md5sum: c9651b88b10871abc260ee655898e828 - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/hmmer_output.tar.gz - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/missing_busco_list.tsv - md5sum: 9939309df2da5419de88c32d1435c779 - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/short_summary.json - contains: - - "one_line_summary" - - "mode" - - "dataset" - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/short_summary.txt - contains: - - "# BUSCO version is:" - - "Results:" - - "busco:" - - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_err.log - - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_out.log - md5sum: 9caf1a1434414c78562eb0bbb9c0e53f - - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_err.log - - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_out.log - contains: - - "# hmmsearch :: search profile(s) against a sequence database" - - "# target sequence database:" - - "Internal pipeline statistics summary:" - - "[ok]" - - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_err.log - md5sum: 538510cfc7483498210f01e53fe035ad - - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_out.log - md5sum: 61050b0706addc9498b2088a2d6efa9a - - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/.checkpoint - contains: - - "Tool: prodigal" - - "Completed" - - "jobs" - - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.faa - md5sum: 836e9a80d33d8b89168f07ddc13ee991 - - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.fna - md5sum: 20eeb75f86842e6e136f02bca8b73a9f - - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa - md5sum: 836e9a80d33d8b89168f07ddc13ee991 - - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.fna - md5sum: 20eeb75f86842e6e136f02bca8b73a9f - - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_err.log - md5sum: 538510cfc7483498210f01e53fe035ad - - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_out.log - md5sum: 61050b0706addc9498b2088a2d6efa9a - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/.bbtools_output/.checkpoint - contains: - - "Tool: bbtools" - - "Completed" - - "jobs" - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/full_table.tsv - md5sum: c56edab1dc1522e993c25ae2b730799f - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/hmmer_output.tar.gz - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/missing_busco_list.tsv - md5sum: b533ef30270f27160acce85a22d01bf5 - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.json - contains: - - "one_line_summary" - - "mode" - - "dataset" - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.txt - contains: - - "# BUSCO version is:" - - "Results:" - - "busco:" - - path: output/busco/test-bacteria_odb10-busco/logs/busco.log - contains: - - "DEBUG:busco.run_BUSCO" - - "Results from dataset" - - "how to cite BUSCO" - - path: output/busco/versions.yml - -- name: busco test_busco_eukaryote_metaeuk - command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_eukaryote_metaeuk -c ./tests/config/nextflow.config - tags: - - busco - files: - - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.json - contains: - - "one_line_summary" - - "mode" - - "dataset" - - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.txt - contains: - - "BUSCO version" - - "The lineage dataset is" - - "BUSCO was run in mode" - - "Complete BUSCOs" - - "Missing BUSCOs" - - "Dependencies and versions" - - path: output/busco/test-eukaryota_odb10-busco.batch_summary.txt - md5sum: ff6d8277e452a83ce9456bbee666feb6 - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_err.log - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_out.log - md5sum: e63debaa653f18f7405d936050abc093 - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_err.log - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_out.log - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_err.log - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_out.log - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_err.log - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_out.log - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/.bbtools_output/.checkpoint - contains: - - "Tool: bbtools" - - "Completed" - - "jobs" - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/fragmented_busco_sequences.tar.gz - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/single_copy_busco_sequences.tar.gz - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/full_table.tsv - md5sum: bd880e90b9e5620a58943a3e0f9ff16b - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/hmmer_output.tar.gz - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/.checkpoint - contains: - - "Tool: metaeuk" - - "Completed" - - "jobs" - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/combined_pred_proteins.fas - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.codon.fas - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.fas - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.gff - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.headersMap.tsv - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/refseq_db_rerun.faa - md5sum: d80b8fa4cb5ed0d47d63d6aa93635bc2 - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.codon.fas - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.fas - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.gff - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.headersMap.tsv - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/missing_busco_list.tsv - md5sum: 1e8e79c540fd2e69ba0d2659d9eb2988 - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.json - contains: - - "one_line_summary" - - "mode" - - "dataset" - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.txt - contains: - - "# BUSCO version is:" - - "Results:" - - "busco:" - - path: output/busco/test-eukaryota_odb10-busco/logs/busco.log - contains: - - "DEBUG:busco.run_BUSCO" - - "Results from dataset" - - "how to cite BUSCO" - - path: output/busco/versions.yml - -- name: busco test_busco_eukaryote_augustus - command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_eukaryote_augustus -c ./tests/config/nextflow.config - tags: - - busco - files: - - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.json - contains: - - "one_line_summary" - - "mode" - - "dataset" - - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.txt - contains: - - "BUSCO version" - - "The lineage dataset is" - - "BUSCO was run in mode" - - "Complete BUSCOs" - - "Missing BUSCOs" - - "Dependencies and versions" - - path: output/busco/test-eukaryota_odb10-busco.batch_summary.txt - md5sum: ff6d8277e452a83ce9456bbee666feb6 - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_err.log - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_out.log - md5sum: e63debaa653f18f7405d936050abc093 - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_err.log - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_out.log - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_err.log - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_out.log - contains: - - "metaeuk" - - "easy-predict" - - "Compute score and coverage" - - "Time for processing:" - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_err.log - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_out.log - contains: - - "metaeuk" - - "easy-predict" - - "Compute score and coverage" - - "Time for processing:" - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/.bbtools_output/.checkpoint - contains: - - "Tool: bbtools" - - "Completed" - - "jobs" - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/fragmented_busco_sequences.tar.gz - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/single_copy_busco_sequences.tar.gz - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/full_table.tsv - md5sum: bd880e90b9e5620a58943a3e0f9ff16b - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/hmmer_output.tar.gz - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/.checkpoint - contains: - - "Tool: metaeuk" - - "Completed" - - "jobs" - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/combined_pred_proteins.fas - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.codon.fas - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.fas - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.gff - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.headersMap.tsv - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/refseq_db_rerun.faa - md5sum: d80b8fa4cb5ed0d47d63d6aa93635bc2 - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.codon.fas - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.fas - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.gff - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.headersMap.tsv - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/missing_busco_list.tsv - md5sum: 1e8e79c540fd2e69ba0d2659d9eb2988 - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.json - contains: - - "one_line_summary" - - "mode" - - "dataset" - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.txt - contains: - - "# BUSCO version is:" - - "Results:" - - "busco:" - - path: output/busco/test-eukaryota_odb10-busco/logs/busco.log - contains: - - "DEBUG:busco.run_BUSCO" - - "Results from dataset" - - "how to cite BUSCO" - - path: output/busco/versions.yml - -- name: busco test_busco_protein - command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_protein -c ./tests/config/nextflow.config - tags: - - busco - files: - - path: output/busco/short_summary.specific.bacteria_odb10.proteome.fasta.json - contains: - - "one_line_summary" - - "mode" - - "dataset" - - path: output/busco/short_summary.specific.bacteria_odb10.proteome.fasta.txt - contains: - - "BUSCO version" - - "The lineage dataset is" - - "BUSCO was run in mode" - - "Complete BUSCOs" - - "Missing BUSCOs" - - "Dependencies and versions" - - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt - md5sum: 7a65e6cbb6c56a2ea4e739ae0aa3297d - - path: output/busco/test-bacteria_odb10-busco/logs/busco.log - contains: - - "DEBUG:busco.run_BUSCO" - - "Results from dataset" - - "how to cite BUSCO" - - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/logs/hmmsearch_err.log - - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/logs/hmmsearch_out.log - contains: - - "# hmmsearch :: search profile(s) against a sequence database" - - "# target sequence database:" - - "Internal pipeline statistics summary:" - - "[ok]" - - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz - - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz - - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz - - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/full_table.tsv - md5sum: 0e34f1011cd83ea1d5d5103ec62b8922 - - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/hmmer_output.tar.gz - - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/missing_busco_list.tsv - md5sum: 9939309df2da5419de88c32d1435c779 - - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/short_summary.json - contains: - - "one_line_summary" - - "mode" - - "dataset" - - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/short_summary.txt - contains: - - "# BUSCO version is:" - - "Results:" - - "busco:" - - path: output/busco/versions.yml - -- name: busco test_busco_transcriptome - command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_transcriptome -c ./tests/config/nextflow.config - tags: - - busco - files: - - path: output/busco/short_summary.specific.bacteria_odb10.test1.contigs.fa.json - contains: - - "one_line_summary" - - "mode" - - "dataset" - - path: output/busco/short_summary.specific.bacteria_odb10.test1.contigs.fa.txt - contains: - - "BUSCO version" - - "The lineage dataset is" - - "BUSCO was run in mode" - - "Complete BUSCOs" - - "Missing BUSCOs" - - "Dependencies and versions" - - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt - md5sum: 46118ecf60d1b87d22b96d80f4f03632 - - path: output/busco/test-bacteria_odb10-busco/logs/busco.log - contains: - - "DEBUG:busco.run_BUSCO" - - "Results from dataset" - - "how to cite BUSCO" - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/.checkpoint - contains: - - "Tool: makeblastdb" - - "Completed" - - "jobs" - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.ndb - md5sum: 3788c017fe5e6f0f58224e9cdd21822b - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nhr - md5sum: 8ecd2ce392bb5e25ddbe1d85f879582e - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nin - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.njs - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.not - md5sum: 0c340e376c7e85d19f82ec1a833e6a6e - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nsq - md5sum: 532d5c0a7ea00fe95ca3c97cb3be6198 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.ntf - md5sum: de1250813f0c7affc6d12dac9d0fb6bb - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nto - md5sum: ff74bd41f9cc9b011c63a32c4f7693bf - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/hmmsearch_err.log - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/hmmsearch_out.log - contains: - - "# hmmsearch :: search profile(s) against a sequence database" - - "# target sequence database:" - - "Internal pipeline statistics summary:" - - "[ok]" - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/makeblastdb_err.log - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/makeblastdb_out.log - contains: - - "Building a new DB" - - "Adding sequences from FASTA" - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/tblastn_err.log - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/tblastn_out.log - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/.checkpoint - contains: - - "Tool: tblastn" - - "Completed" - - "jobs" - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/coordinates.tsv - md5sum: cc30eed321944af293452bdbcfc24292 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_101.temp - md5sum: 73e9c65fc83fedc58f57f09b08f08238 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_119.temp - md5sum: 7fa4cc7955ec0cc36330a221c579b975 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_129.temp - md5sum: 6f1601c875d019e3f6f1f98ed8e988d4 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_138.temp - md5sum: 3f8e034686cd240c2330650d791bcae2 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_143.temp - md5sum: df3dfa8e9ba30ed70cf75b5e7abf2179 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_172.temp - md5sum: 7d463e0e6cf7169bc9077d8dc776dda1 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_178.temp - md5sum: 2288edf7fa4f88f51b4cf4d94086f77e - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_188.temp - md5sum: 029906abbad6d87fc57830dd548cac24 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_195.temp - md5sum: 4937f3b348774a31b1160a00297c29cc - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_210.temp - md5sum: afcb20ba4c466479d6b91c8c62251e1f - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_232.temp - md5sum: 2e1e823ce017345bd998191a39fa9924 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_268.temp - md5sum: 08c2d82c34ecffbe1c638b410349412e - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_29.temp - md5sum: cd9b63cf93524284781535c888313764 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_44.temp - md5sum: d1929b742b24ebe379bf4801ca882dca - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_58.temp - md5sum: 69215765b010c05336538cb322c900b3 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_72.temp - md5sum: 6feaa1cc3b0899a147ea9d466878f3e3 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_80.temp - md5sum: 13625eae14e860a96ce17cd4e37e9d01 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_81.temp - md5sum: e14b2484649b0dbc8926815c207b806d - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_93.temp - md5sum: 6902c93691df00e690faea914c71839e - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_97.temp - md5sum: 0a0d9d38a83acbd5ad43c29cdf429988 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/tblastn.tsv - contains: - - "TBLASTN" - - "BLAST processed" - - "queries" - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/full_table.tsv - md5sum: 24df25199e13c88bd892fc3e7b541ca0 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/hmmer_output.tar.gz - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/missing_busco_list.tsv - md5sum: e7232e2b8cca4fdfdd9e363b39ebbc81 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/short_summary.json - contains: - - "one_line_summary" - - "mode" - - "dataset" - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/short_summary.txt - contains: - - "# BUSCO version is:" - - "Results:" - - "busco:" - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/single_copy_proteins.faa - md5sum: e04b9465733577ae6e4bccb7aa01e720 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1024388at2.faa - md5sum: 7333c39a20258f20c7019ea0cd83157c - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1054741at2.faa - md5sum: ebb481e77a824685fbe04d8a2f3a0d7d - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1093223at2.faa - md5sum: 34621c7d499034e8f8e6b92fd4020a93 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1151822at2.faa - md5sum: aa89ca381c1c70c9c4e1380351ca7c2a - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/143460at2.faa - md5sum: f2e91d78b8dd3722840378789f29e8c8 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1491686at2.faa - md5sum: 73c25aef5c9cba7f4151804941b146ea - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1504821at2.faa - md5sum: cda556018d1f84ebe517e89f6fc107d0 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1574817at2.faa - md5sum: a9096c9fb8b25c78a72871ab0463acdc - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1592033at2.faa - md5sum: e463d25ce186c0cebfd749474f3a4c64 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1623045at2.faa - md5sum: f2cfd241590c6d8377286d6135480937 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1661836at2.faa - md5sum: 586569546fb9861502468e3d9ba2775c - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1674344at2.faa - md5sum: 24c658bee14ad84b062d81ad96642eb8 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1698718at2.faa - md5sum: 0b8e26ddf5149bbd8805be7af125208d - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1990650at2.faa - md5sum: 159320712ee01fb2ccb31a25df44eead - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/223233at2.faa - md5sum: 812629c0b06ac3d18661c2ca78de0c08 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/402899at2.faa - md5sum: f7ff4e1591342d30b77392a2e84b57d9 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/505485at2.faa - md5sum: 7b34a24fc49c540d46fcf96ff5129564 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/665824at2.faa - md5sum: 4cff2df64f6bcaff8bc19c234c8bcccd - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/776861at2.faa - md5sum: 613af7a3fea30ea2bece66f603b9284a - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/874197at2.faa - md5sum: a7cd1b13c9ef91c7ef4e31614166f197 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/932854at2.faa - md5sum: fe313ffd5efdb0fed887a04fba352552 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/95696at2.faa - md5sum: 4e1f30a2fea4dfbf9bb7fae2700622a0 - - path: output/busco/versions.yml diff --git a/modules/nf-core/busco/busco/tests/tags.yml b/modules/nf-core/busco/busco/tests/tags.yml deleted file mode 100644 index 7c4d2835..00000000 --- a/modules/nf-core/busco/busco/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -busco/busco: - - "modules/nf-core/busco/busco/**" diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml index 691d4c76..f9f54ee9 100644 --- a/modules/nf-core/fastqc/environment.yml +++ b/modules/nf-core/fastqc/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 033f4154..23e16634 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -29,7 +29,7 @@ process FASTQC { // The total amount of allocated RAM by FastQC is equal to the number of threads defined (--threads) time the amount of RAM defined (--memory) // https://github.com/s-andrews/FastQC/blob/1faeea0412093224d7f6a07f777fad60a5650795/fastqc#L211-L222 // Dividing the task.memory by task.cpu allows to stick to requested amount of RAM in the label - def memory_in_mb = task.memory ? task.memory.toUnit('MB').toFloat() / task.cpus : null + def memory_in_mb = task.memory ? task.memory.toUnit('MB') / task.cpus : null // FastQC memory value allowed range (100 - 10000) def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) diff --git a/modules/nf-core/flye/environment.yml b/modules/nf-core/flye/environment.yml index 87b97eb8..cbad0b1c 100644 --- a/modules/nf-core/flye/environment.yml +++ b/modules/nf-core/flye/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/flye/tests/tags.yml b/modules/nf-core/flye/tests/tags.yml deleted file mode 100644 index 31103d13..00000000 --- a/modules/nf-core/flye/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -flye: - - modules/nf-core/flye/** diff --git a/modules/nf-core/hifiasm/environment.yml b/modules/nf-core/hifiasm/environment.yml index 6aea679a..04d7b0d4 100644 --- a/modules/nf-core/hifiasm/environment.yml +++ b/modules/nf-core/hifiasm/environment.yml @@ -1,6 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda dependencies: - - bioconda::hifiasm=0.24.0 + - bioconda::hifiasm=0.25.0 diff --git a/modules/nf-core/hifiasm/main.nf b/modules/nf-core/hifiasm/main.nf index 9b3e4ad6..e85e26e1 100644 --- a/modules/nf-core/hifiasm/main.nf +++ b/modules/nf-core/hifiasm/main.nf @@ -4,118 +4,107 @@ process HIFIASM { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hifiasm:0.24.0--h5ca1c30_0' : - 'biocontainers/hifiasm:0.24.0--h5ca1c30_0' }" + 'https://depot.galaxyproject.org/singularity/hifiasm:0.25.0--h5ca1c30_0' : + 'biocontainers/hifiasm:0.25.0--h5ca1c30_0' }" input: tuple val(meta) , path(long_reads) , path(ul_reads) tuple val(meta1), path(paternal_kmer_dump), path(maternal_kmer_dump) tuple val(meta2), path(hic_read1) , path(hic_read2) + tuple val(meta3), path(bin_files) output: - tuple val(meta), path("*.r_utg.gfa") , emit: raw_unitigs - tuple val(meta), path("*.ec.bin") , emit: corrected_reads - tuple val(meta), path("*.ovlp.source.bin") , emit: source_overlaps - tuple val(meta), path("*.ovlp.reverse.bin"), emit: reverse_overlaps - tuple val(meta), path("*.bp.p_ctg.gfa") , emit: processed_contigs, optional: true - tuple val(meta), path("*.p_utg.gfa") , emit: processed_unitigs, optional: true - tuple val(meta), path("*.asm.p_ctg.gfa") , emit: primary_contigs , optional: true - tuple val(meta), path("*.asm.a_ctg.gfa") , emit: alternate_contigs, optional: true - tuple val(meta), path("*.hap1.p_ctg.gfa") , emit: paternal_contigs , optional: true - tuple val(meta), path("*.hap2.p_ctg.gfa") , emit: maternal_contigs , optional: true - tuple val(meta), path("*.log") , emit: log - path "versions.yml" , emit: versions + tuple val(meta), path("*.r_utg.gfa") , emit: raw_unitigs + tuple val(meta), path("*.bin") , emit: bin_files , optional: true + tuple val(meta), path("*.p_utg.gfa") , emit: processed_unitigs, optional: true + tuple val(meta), path("${prefix}.{p_ctg,bp.p_ctg,hic.p_ctg}.gfa"), emit: primary_contigs , optional: true + tuple val(meta), path("${prefix}.{a_ctg,hic.a_ctg}.gfa") , emit: alternate_contigs, optional: true + tuple val(meta), path("${prefix}.*.hap1.p_ctg.gfa") , emit: hap1_contigs , optional: true + tuple val(meta), path("${prefix}.*.hap2.p_ctg.gfa") , emit: hap2_contigs , optional: true + tuple val(meta), path("*.ec.fa.gz") , emit: corrected_reads , optional: true + tuple val(meta), path("*.ovlp.paf.gz") , emit: read_overlaps , optional: true + tuple val(meta), path("${prefix}.stderr.log") , emit: log + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def ultralong = ul_reads ? "--ul ${ul_reads}" : "" + prefix = task.ext.prefix ?: "${meta.id}" - if ((paternal_kmer_dump) && (maternal_kmer_dump) && (hic_read1) && (hic_read2)) { - error "Hifiasm Trio-binning and Hi-C integrated should not be used at the same time" - } else if ((paternal_kmer_dump) && !(maternal_kmer_dump)) { - error "Hifiasm Trio-binning requires maternal data" - } else if (!(paternal_kmer_dump) && (maternal_kmer_dump)) { - error "Hifiasm Trio-binning requires paternal data" - } else if ((paternal_kmer_dump) && (maternal_kmer_dump)) { - """ - hifiasm \\ - $args \\ - -o ${prefix}.asm \\ - -t $task.cpus \\ - -1 $paternal_kmer_dump \\ - -2 $maternal_kmer_dump \\ - $ultralong \\ - $long_reads \\ - 2> >( tee ${prefix}.stderr.log >&2 ) + def long_reads_sorted = long_reads instanceof List ? long_reads.sort{ it.name } : long_reads + def ul_reads_sorted = ul_reads instanceof List ? ul_reads.sort{ it.name } : ul_reads + def ultralong = ul_reads ? "--ul ${ul_reads_sorted}" : "" + if([paternal_kmer_dump, maternal_kmer_dump].any() && [hic_read1, hic_read2].any()) { + log.error("ERROR: hifiasm trio binning mode and Hi-C phasing can not be used at the same time.") + } - cat <<-END_VERSIONS > versions.yml - "${task.process}": - hifiasm: \$(hifiasm --version 2>&1) - END_VERSIONS - """ - } else if ((hic_read1) && !(hic_read2)) { - error "Hifiasm Hi-C integrated requires paired-end data (only R1 specified here)" - } else if (!(hic_read1) && (hic_read2)) { - error "Hifiasm Hi-C integrated requires paired-end data (only R2 specified here)" - } else if ((hic_read1) && (hic_read2)) { - """ - hifiasm \\ - $args \\ - -o ${prefix}.asm \\ - -t $task.cpus \\ - --h1 $hic_read1 \\ - --h2 $hic_read2 \\ - $ultralong \\ - $long_reads \\ - 2> >( tee ${prefix}.stderr.log >&2 ) + def input_trio = "" + if([paternal_kmer_dump, maternal_kmer_dump].any()) { + if(![paternal_kmer_dump, maternal_kmer_dump].every()) { + log.error("ERROR: Either the maternal or paternal kmer dump is missing!") + } else { + input_trio = "-1 ${paternal_kmer_dump} -2 ${maternal_kmer_dump}" + } + } + def input_hic = "" + if([hic_read1, hic_read2].any()) { + if(![hic_read1, hic_read2].every()) { + log.error("ERROR: Either the forward or reverse Hi-C reads are missing!") + } else { + input_hic = "--h1 ${hic_read1} --h2 ${hic_read2}" + } + } + """ + hifiasm \\ + $args \\ + -t ${task.cpus} \\ + ${input_trio} \\ + ${input_hic} \\ + ${ultralong} \\ + -o ${prefix} \\ + ${long_reads_sorted} \\ + 2> >( tee ${prefix}.stderr.log >&2 ) - cat <<-END_VERSIONS > versions.yml - "${task.process}": - hifiasm: \$(hifiasm --version 2>&1) - END_VERSIONS - """ - } else { // Phasing with Hi-C data is not supported yet - """ - hifiasm \\ - $args \\ - -o ${prefix}.asm \\ - -t $task.cpus \\ - $ultralong \\ - $long_reads \\ - 2> >( tee ${prefix}.stderr.log >&2 ) + if [ -f ${prefix}.ec.fa ]; then + gzip ${prefix}.ec.fa + fi - cat <<-END_VERSIONS > versions.yml - "${task.process}": - hifiasm: \$(hifiasm --version 2>&1) - END_VERSIONS - """ - } - stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.asm.r_utg.gfa - touch ${prefix}.asm.ec.bin - touch ${prefix}.asm.ovlp.source.bin - touch ${prefix}.asm.ovlp.reverse.bin - touch ${prefix}.asm.bp.p_ctg.gfa - touch ${prefix}.asm.p_utg.gfa - touch ${prefix}.asm.p_ctg.gfa - touch ${prefix}.asm.a_ctg.gfa - touch ${prefix}.asm.hap1.p_ctg.gfa - touch ${prefix}.asm.hap2.p_ctg.gfa - touch ${prefix}.stderr.log + if [ -f ${prefix}.ovlp.paf ]; then + gzip ${prefix}.ovlp.paf + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hifiasm: \$(hifiasm --version 2>&1) + END_VERSIONS + """ - cat <<-END_VERSIONS > versions.yml - "${task.process}": - hifiasm: \$(hifiasm --version 2>&1) - END_VERSIONS - """ + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.r_utg.gfa + touch ${prefix}.ec.bin + touch ${prefix}.ovlp.source.bin + touch ${prefix}.ovlp.reverse.bin + touch ${prefix}.hic.tlb.bin + touch ${prefix}.hic.lk.bin + touch ${prefix}.bp.p_ctg.gfa + touch ${prefix}.p_utg.gfa + touch ${prefix}.p_ctg.gfa + touch ${prefix}.a_ctg.gfa + touch ${prefix}.hap1.p_ctg.gfa + touch ${prefix}.hap2.p_ctg.gfa + echo "" | gzip > ${prefix}.ec.fa.gz + echo "" | gzip > ${prefix}.ovlp.paf.gz + touch ${prefix}.stderr.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hifiasm: \$(hifiasm --version 2>&1) + END_VERSIONS + """ } diff --git a/modules/nf-core/hifiasm/meta.yml b/modules/nf-core/hifiasm/meta.yml index b255571f..fcd211db 100644 --- a/modules/nf-core/hifiasm/meta.yml +++ b/modules/nf-core/hifiasm/meta.yml @@ -25,9 +25,11 @@ input: - long_reads: type: file description: Long reads PacBio HiFi reads or ONT reads (requires ext.arg '--ont'). + ontologies: [] - ul_reads: type: file description: ONT long reads to use with --ul. + ontologies: [] - - meta1: type: map description: | @@ -36,10 +38,12 @@ input: type: file description: Yak kmer dump file for paternal reads (can be used for haplotype resolution). It can have an arbitrary extension. + ontologies: [] - maternal_kmer_dump: type: file description: Yak kmer dump file for maternal reads (can be used for haplotype resolution). It can have an arbitrary extension. + ontologies: [] - - meta2: type: map description: | @@ -47,9 +51,19 @@ input: - hic_read1: type: file description: Hi-C data Forward reads. + ontologies: [] - hic_read2: type: file description: Hi-C data Reverse reads. + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing information about the input bin files + - bin_files: + type: file + description: bin files produced during a previous Hifiasm run + ontologies: [] output: - raw_unitigs: - meta: @@ -61,116 +75,133 @@ output: type: file description: Raw unitigs pattern: "*.r_utg.gfa" - - corrected_reads: + - bin_files: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.ec.bin": + - "*.bin": type: file - description: Corrected reads - pattern: "*.ec.bin" - - source_overlaps: - - meta: - type: map description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - "*.ovlp.source.bin": - type: file - description: Source overlaps - pattern: "*.ovlp.source.bin" - - reverse_overlaps: + Binary files containing processed data for hifiasm, including + error-corrected reads, read overlaps, and Hi-C alignments. Can + be re-used as an input for subsequent re-runs of hifiasm with new + inputs or modified parameters in order to save recomputation of + initial results, which are the most computationally-expensive + steps. + pattern: "*.bin" + - processed_unitigs: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.ovlp.reverse.bin": + - "*.p_utg.gfa": type: file - description: Reverse overlaps - pattern: "*.ovlp.reverse.bin" - - processed_contigs: + description: Processed unitigs + pattern: "*.p_utg.gfa" + - primary_contigs: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.bp.p_ctg.gfa": + - ${prefix}.{p_ctg,bp.p_ctg,hic.p_ctg}.gfa: type: file - description: Processed contigs - pattern: "*.bp.p_ctg.gfa" - - processed_unitigs: + description: Contigs representing the primary assembly + pattern: "${prefix}.{p_ctg,bp.p_ctg,hic.p_ctg}.gfa" + - alternate_contigs: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.p_utg.gfa": + - ${prefix}.{a_ctg,hic.a_ctg}.gfa: type: file - description: Processed unitigs - pattern: "*.p_utg.gfa" - - primary_contigs: + description: Contigs representing the alternative assembly + pattern: "${prefix}.{a_ctg,hic.a_ctg}.gfa" + - hap1_contigs: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.asm.p_ctg.gfa": + pattern: "${prefix}.*.hap1.p_ctg.gfa" + - ${prefix}.*.hap1.p_ctg.gfa: type: file - description: Primary contigs - pattern: "*.asm.p_ctg.gfa" - - alternate_contigs: + description: | + Contigs for the first haplotype. How the haplotypes are represented + depends on the input mode; in standard HiFi-only mode, these + are partially-phased parental contigs. In Hi-C mode, they + are fully phased parental contigs, but the phasing is not maintained + between contigs. In trio mode, they are fully phased paternal contigs + all originating from a single parental haplotype. + pattern: "${prefix}.*.hap1.p_ctg.gfa" + - hap2_contigs: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.asm.a_ctg.gfa": + pattern: "${prefix}.*.hap2.p_ctg.gfa" + - ${prefix}.*.hap2.p_ctg.gfa: type: file - description: Alternative contigs - pattern: "*.asm.a_ctg.gfa" - - paternal_contigs: + description: | + Contigs for the second haplotype. How the haplotypes are represented + depends on the input mode; in standard HiFi-only mode, these + are partially-phased parental contigs. In Hi-C mode, they + are fully phased parental contigs, but the phasing is not maintained + between contigs. In trio mode, they are fully phased paternal contigs + all originating from a single parental haplotype. + pattern: "${prefix}.*.hap2.p_ctg.gfa" + - corrected_reads: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.hap1.p_ctg.gfa": + - "*.ec.fa.gz": type: file - description: Paternal contigs - pattern: "*.hap1.p_ctg.gfa" - - maternal_contigs: + description: | + If option --write-ec specified, a gzipped fasta file containing the error corrected + reads produced by the hifiasm error correction module + pattern: "*.ec.fa.gz" + - read_overlaps: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.hap2.p_ctg.gfa": + - "*.ovlp.paf.gz": type: file - description: Maternal contigs - pattern: "*.hap2.p_ctg.gfa" + description: | + If option --write-paf specified, a gzipped paf file describing the overlaps + among all error-corrected reads + pattern: "*.ovlp.paf.gz" - log: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.log": + pattern: "*.stderr.log" + - ${prefix}.stderr.log: type: file description: Stderr log - pattern: "*.log" + pattern: "*.stderr.log" - versions: - versions.yml: type: file description: File containing software versions pattern: "versions.yml" + ontologies: [] authors: - "@sidorov-si" - "@scorreard" - "@mbeavitt" - "@schmytzi" + - "@prototaxites" maintainers: - "@sidorov-si" - "@scorreard" diff --git a/modules/nf-core/hifiasm/tests/main.nf.test b/modules/nf-core/hifiasm/tests/main.nf.test index 12c69bd9..53edf404 100644 --- a/modules/nf-core/hifiasm/tests/main.nf.test +++ b/modules/nf-core/hifiasm/tests/main.nf.test @@ -9,8 +9,12 @@ nextflow_process { tag "modules_nfcore" tag "hifiasm" - test("homo_sapiens pacbio hifi [fastq, [,], [,] ]") { + test("homo_sapiens pacbio hifi [fastq, [,], [,], [,]]") { when { + params { + extra_output = "--write-ec --write-paf" + } + process { """ input[0] = [ @@ -28,30 +32,177 @@ nextflow_process { [], [] ] + input[3] = [ + [], + [] + ] """ } } then { + def bin_files = process.out.bin_files.get(0).get(1).collect { bin -> file(bin).name } + def expected_bin_files = [ + "test.ec.bin", + "test.ovlp.reverse.bin", + "test.ovlp.source.bin" + ] + assertAll( { assert process.success }, - { assert file(process.out.corrected_reads.get(0).get(1)).exists() }, - { assert file(process.out.source_overlaps.get(0).get(1)).exists() }, - { assert file(process.out.reverse_overlaps.get(0).get(1)).exists() }, + { assertContainsInAnyOrder(bin_files, expected_bin_files) }, { assert file(process.out.log.get(0).get(1)).exists() }, { assert snapshot( process.out.raw_unitigs, - process.out.processed_contigs, process.out.processed_unitigs, - process.out.paternal_contigs, - process.out.maternal_contigs, + process.out.primary_contigs, + process.out.hap1_contigs, + process.out.hap2_contigs, + process.out.fasta, + process.out.paf, process.out.versions ).match() } ) } } - test("homo_sapiens pacbio hifi [fastq, [yak, yak], [,] ]") { + test("homo_sapiens pacbio hifi [fastq, [,], [,], [bin] ]") { + + setup { + run("HIFIASM", alias: "HIFIASM_INITIAL") { + script "../main.nf" + process { + """ + input[0] = [ + [ id : 'test'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true), + [] + ] + input[1] = [ + [], + [], + [] + ] + input[2] = [ + [], + [], + [] + ] + input[3] = [ + [], + [] + ] + """ + } + } + } + when { + params { + extra_output = "" + } + + process { + """ + input[0] = [ + [ id : 'test'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true), + [] + ] + input[1] = [ + [], + [], + [] + ] + input[2] = [ + [], + [], + [] + ] + input[3] = HIFIASM_INITIAL.out.bin_files + """ + } + } + then { + assertAll( + { assert process.success }, + { assert file(process.out.log.get(0).get(1)).exists() }, + { assert file(process.out.log.get(0).get(1)).readLines().first().contains("Reads has been loaded.") }, + { assert snapshot( + process.out.raw_unitigs, + process.out.processed_unitigs, + process.out.primary_contigs, + process.out.hap1_contigs, + process.out.hap2_contigs, + process.out.versions + ).match() } + ) + } + } + + test("homo_sapiens pacbio hifi [fastq x2, [,], [,], [,] ]") { + + when { + params { + extra_output = "" + } + + process { + """ + input[0] = Channel.of([ + [ id : 'test'], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/alz.ccs.fastq', checkIfExists: true), + ], + [] + ]) + input[1] = [ + [], + [], + [] + ] + input[2] = [ + [], + [], + [] + ] + input[3] = [ + [], + [] + ] + """ + } + } + then { + def bin_files = process.out.bin_files.get(0).get(1).collect { bin -> file(bin).name } + def expected_bin_files = [ + "test.ec.bin", + "test.ovlp.reverse.bin", + "test.ovlp.source.bin" + ] + + assertAll( + { assert process.success }, + { assertContainsInAnyOrder(bin_files, expected_bin_files) }, + { assert file(process.out.log.get(0).get(1)).exists() }, + { assert snapshot( + process.out.raw_unitigs, + process.out.processed_unitigs, + process.out.primary_contigs, + process.out.hap1_contigs, + process.out.hap2_contigs, + process.out.versions + ).match() } + ) + } + } + + test("homo_sapiens pacbio hifi [fastq, [yak, yak], [,], [,] ]") { + when { + params { + extra_output = "" + } + process { """ input[0] = [ @@ -69,29 +220,47 @@ nextflow_process { [], [] ] + input[3] = [ + [], + [] + ] """ } } then { + def bin_files = process.out.bin_files.get(0).get(1).collect { bin -> file(bin).name } + def expected_bin_files = [ + "test.ec.bin", + "test.ovlp.reverse.bin", + "test.ovlp.source.bin", + "test.hap1.phase.bin", + "test.hap2.phase.bin" + ] + assertAll( { assert process.success }, - { assert file(process.out.corrected_reads.get(0).get(1)).exists() }, - { assert file(process.out.source_overlaps.get(0).get(1)).exists() }, - { assert file(process.out.reverse_overlaps.get(0).get(1)).exists() }, + { assertContainsInAnyOrder(bin_files, expected_bin_files) }, + { assert file(process.out.hap1_contigs.get(0).get(1)).exists() }, + { assert file(process.out.hap1_contigs.get(0).get(1)).name == "test.dip.hap1.p_ctg.gfa" }, + { assert file(process.out.hap2_contigs.get(0).get(1)).exists() }, + { assert file(process.out.hap2_contigs.get(0).get(1)).name == "test.dip.hap2.p_ctg.gfa" }, { assert file(process.out.log.get(0).get(1)).exists() }, - { assert file(process.out.maternal_contigs.get(0).get(1)).length() == 0}, { assert snapshot( process.out.raw_unitigs, process.out.processed_unitigs, - process.out.paternal_contigs, + process.out.hap1_contigs, process.out.versions ).match() } ) } } - test("homo_sapiens pacbio hifi [fastq, [,], [fastq, fastq] ]") { + test("homo_sapiens pacbio hifi [fastq, [,], [fastq, fastq], [,] ]") { when { + params { + extra_output = "" + } + process { """ input[0] = [ @@ -109,29 +278,49 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + input[3] = [ + [], + [] + ] """ } } then { + def bin_files = process.out.bin_files.get(0).get(1).collect { bin -> file(bin).name } + def expected_bin_files = [ + "test.ec.bin", + "test.ovlp.reverse.bin", + "test.ovlp.source.bin", + "test.hic.lk.bin", + "test.hic.tlb.bin" + ] + assertAll( { assert process.success }, - { assert file(process.out.corrected_reads.get(0).get(1)).exists() }, - { assert file(process.out.source_overlaps.get(0).get(1)).exists() }, - { assert file(process.out.reverse_overlaps.get(0).get(1)).exists() }, + { assertContainsInAnyOrder(bin_files, expected_bin_files) }, + { assert file(process.out.hap1_contigs.get(0).get(1)).exists() }, + { assert file(process.out.hap1_contigs.get(0).get(1)).name == "test.hic.hap1.p_ctg.gfa" }, + { assert file(process.out.hap2_contigs.get(0).get(1)).exists() }, + { assert file(process.out.hap2_contigs.get(0).get(1)).name == "test.hic.hap2.p_ctg.gfa" }, { assert file(process.out.log.get(0).get(1)).exists() }, - { assert file(process.out.maternal_contigs.get(0).get(1)).length() == 0}, { assert snapshot( process.out.raw_unitigs, process.out.processed_unitigs, - process.out.paternal_contigs, + process.out.primary_contigs, + process.out.alternate_contigs, + process.out.hap1_contigs, process.out.versions ).match() } ) } } - test("homo_sapiens pacbio hifi [fastq, [yak, yak], [fastq, fastq] ]") { + test("homo_sapiens pacbio hifi [fastq, [yak, yak], [fastq, fastq], [,] ]") { when { + params { + extra_output = "" + } + process { """ input[0] = [ [ id : 'test'], @@ -148,7 +337,12 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz, checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz, checkIfExists: true) ] + input[3] = [ + [], + [] + ] """ + } } then { assertAll( @@ -157,8 +351,13 @@ nextflow_process { } } - test("homo_sapiens pacbio hifi [fastq, [yak, ], [,] ]") { + test("homo_sapiens pacbio hifi [fastq, [yak, ], [,], [,] ]") { when { + params { + extra_output = "" + } + + process { """ input[0] = [ [ id : 'test'], @@ -171,7 +370,12 @@ nextflow_process { [] ] input[2] = [] + input[3] = [ + [], + [] + ] """ + } } then { assertAll( @@ -180,8 +384,13 @@ nextflow_process { } } - test("homo_sapiens pacbio hifi [fastq, [,], [, fastq] ]") { + test("homo_sapiens pacbio hifi [fastq, [,], [, fastq], [,] ]") { when { + params { + extra_output = "" + } + + process { """ input[0] = [ [ id : 'test'], @@ -194,7 +403,12 @@ nextflow_process { [], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + input[3] = [ + [], + [] + ] """ + } } then { assertAll( @@ -203,9 +417,13 @@ nextflow_process { } } - test("homo_sapiens pacbio hifi [fastq, [,], [,] ] - stub") { + test("homo_sapiens pacbio hifi [fastq, [,], [,], [,] ] - stub") { options "-stub" when { + params { + extra_output = "" + } + process { """ input[0] = [ @@ -223,6 +441,10 @@ nextflow_process { [], [] ] + input[3] = [ + [], + [] + ] """ } } diff --git a/modules/nf-core/hifiasm/tests/main.nf.test.snap b/modules/nf-core/hifiasm/tests/main.nf.test.snap index e9a0de59..1e2d86f4 100644 --- a/modules/nf-core/hifiasm/tests/main.nf.test.snap +++ b/modules/nf-core/hifiasm/tests/main.nf.test.snap @@ -1,12 +1,12 @@ { - "homo_sapiens pacbio hifi [fastq, [,], [,] ]": { + "homo_sapiens pacbio hifi [fastq, [,], [fastq, fastq], [,] ]": { "content": [ [ [ { "id": "test" }, - "test.asm.bp.r_utg.gfa:md5,6a200e4bcb5d8d8fa6ed75874d26edc6" + "test.hic.r_utg.gfa:md5,6a200e4bcb5d8d8fa6ed75874d26edc6" ] ], [ @@ -14,7 +14,7 @@ { "id": "test" }, - "test.asm.bp.p_ctg.gfa:md5,c3225425a73a8d6d46536dab72380650" + "test.hic.p_utg.gfa:md5,6a200e4bcb5d8d8fa6ed75874d26edc6" ] ], [ @@ -22,15 +22,38 @@ { "id": "test" }, - "test.asm.bp.p_utg.gfa:md5,6a200e4bcb5d8d8fa6ed75874d26edc6" + "test.hic.p_ctg.gfa:md5,6d0ec1ec65b1d2b295ac94b6fef25a27" ] ], + [ + + ], + [ + [ + { + "id": "test" + }, + "test.hic.hap1.p_ctg.gfa:md5,f67a8fdfa756961360732c79d189054d" + ] + ], + [ + "versions.yml:md5,b7828129854c24ce80a025f90bf3f557" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-15T14:24:48.67437687" + }, + "homo_sapiens pacbio hifi [fastq, [,], [,], [bin] ]": { + "content": [ [ [ { "id": "test" }, - "test.asm.bp.hap1.p_ctg.gfa:md5,78a86b6d5741a680e22a427ca9faa74a" + "test.bp.r_utg.gfa:md5,6a200e4bcb5d8d8fa6ed75874d26edc6" ] ], [ @@ -38,27 +61,67 @@ { "id": "test" }, - "test.asm.bp.hap2.p_ctg.gfa:md5,ac2116fd2f22c67d4c304cbf9b9f7793" + "test.bp.p_utg.gfa:md5,6a200e4bcb5d8d8fa6ed75874d26edc6" ] ], [ - "versions.yml:md5,a2ea36e18c39850b1680302d9f6c950f" + [ + { + "id": "test" + }, + "test.bp.p_ctg.gfa:md5,c3225425a73a8d6d46536dab72380650" + ] + ], + [ + [ + { + "id": "test" + }, + "test.bp.hap1.p_ctg.gfa:md5,78a86b6d5741a680e22a427ca9faa74a" + ] + ], + [ + [ + { + "id": "test" + }, + "test.bp.hap2.p_ctg.gfa:md5,ac2116fd2f22c67d4c304cbf9b9f7793" + ] + ], + [ + "versions.yml:md5,b7828129854c24ce80a025f90bf3f557" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nextflow": "24.10.5" }, - "timestamp": "2024-11-28T10:51:30.175326435" + "timestamp": "2025-04-15T14:24:20.599937477" }, - "homo_sapiens pacbio hifi [fastq, [,], [fastq, fastq] ]": { + "homo_sapiens pacbio hifi [fastq x2, [,], [,], [,] ]": { "content": [ [ [ { "id": "test" }, - "test.asm.hic.r_utg.gfa:md5,6a200e4bcb5d8d8fa6ed75874d26edc6" + "test.bp.r_utg.gfa:md5,e6e38ac76f6b73142c3fe403f71d27b0" + ] + ], + [ + [ + { + "id": "test" + }, + "test.bp.p_utg.gfa:md5,e6e38ac76f6b73142c3fe403f71d27b0" + ] + ], + [ + [ + { + "id": "test" + }, + "test.bp.p_ctg.gfa:md5,43890a1832d8f26de263e57dc5e3b8de" ] ], [ @@ -66,7 +129,7 @@ { "id": "test" }, - "test.asm.hic.p_utg.gfa:md5,6a200e4bcb5d8d8fa6ed75874d26edc6" + "test.bp.hap1.p_ctg.gfa:md5,7d7ea2bed472de263f6ec3521959b0d9" ] ], [ @@ -74,20 +137,20 @@ { "id": "test" }, - "test.asm.hic.hap1.p_ctg.gfa:md5,f67a8fdfa756961360732c79d189054d" + "test.bp.hap2.p_ctg.gfa:md5,ce096a66c9bba039c6a22ba9e9409d01" ] ], [ - "versions.yml:md5,a2ea36e18c39850b1680302d9f6c950f" + "versions.yml:md5,b7828129854c24ce80a025f90bf3f557" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nextflow": "24.10.5" }, - "timestamp": "2024-11-28T10:52:15.111066189" + "timestamp": "2025-04-15T14:24:28.744387853" }, - "homo_sapiens pacbio hifi [fastq, [,], [,] ] - stub": { + "homo_sapiens pacbio hifi [fastq, [,], [,], [,] ] - stub": { "content": [ { "0": [ @@ -95,7 +158,7 @@ { "id": "test" }, - "test.asm.r_utg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.r_utg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ @@ -103,26 +166,24 @@ { "id": "test" }, - "test.asm.ec.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + [ + "test.ec.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.hic.lk.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.hic.tlb.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.ovlp.reverse.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.ovlp.source.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] ] ], "10": [ - [ - { - "id": "test" - }, - "test.stderr.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "11": [ - "versions.yml:md5,a2ea36e18c39850b1680302d9f6c950f" + "versions.yml:md5,b7828129854c24ce80a025f90bf3f557" ], "2": [ [ { "id": "test" }, - "test.asm.ovlp.source.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.p_utg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "3": [ @@ -130,7 +191,10 @@ { "id": "test" }, - "test.asm.ovlp.reverse.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + [ + "test.bp.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] ] ], "4": [ @@ -138,31 +202,21 @@ { "id": "test" }, - "test.asm.bp.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.a_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "5": [ - [ - { - "id": "test" - }, - "test.asm.p_utg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" - ] + ], "6": [ - [ - { - "id": "test" - }, - "test.asm.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" - ] + ], "7": [ [ { "id": "test" }, - "test.asm.a_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.ec.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "8": [ @@ -170,7 +224,7 @@ { "id": "test" }, - "test.asm.hap1.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.ovlp.paf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "9": [ @@ -178,7 +232,7 @@ { "id": "test" }, - "test.asm.hap2.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.stderr.log:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "alternate_contigs": [ @@ -186,55 +240,54 @@ { "id": "test" }, - "test.asm.a_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.a_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "corrected_reads": [ + "bin_files": [ [ { "id": "test" }, - "test.asm.ec.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + [ + "test.ec.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.hic.lk.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.hic.tlb.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.ovlp.reverse.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.ovlp.source.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] ] ], - "log": [ + "corrected_reads": [ [ { "id": "test" }, - "test.stderr.log:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.ec.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "maternal_contigs": [ - [ - { - "id": "test" - }, - "test.asm.hap2.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" - ] + "hap1_contigs": [ + ], - "paternal_contigs": [ - [ - { - "id": "test" - }, - "test.asm.hap1.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" - ] + "hap2_contigs": [ + ], - "primary_contigs": [ + "log": [ [ { "id": "test" }, - "test.asm.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.stderr.log:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "processed_contigs": [ + "primary_contigs": [ [ { "id": "test" }, - "test.asm.bp.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + [ + "test.bp.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] ] ], "processed_unitigs": [ @@ -242,7 +295,7 @@ { "id": "test" }, - "test.asm.p_utg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.p_utg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "raw_unitigs": [ @@ -250,44 +303,90 @@ { "id": "test" }, - "test.asm.r_utg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.r_utg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "reverse_overlaps": [ + "read_overlaps": [ [ { "id": "test" }, - "test.asm.ovlp.reverse.bin:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "source_overlaps": [ - [ - { - "id": "test" - }, - "test.asm.ovlp.source.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.ovlp.paf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "versions": [ - "versions.yml:md5,a2ea36e18c39850b1680302d9f6c950f" + "versions.yml:md5,b7828129854c24ce80a025f90bf3f557" ] } ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-15T14:25:06.074724035" + }, + "homo_sapiens pacbio hifi [fastq, [,], [,], [,]]": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.bp.r_utg.gfa:md5,6a200e4bcb5d8d8fa6ed75874d26edc6" + ] + ], + [ + [ + { + "id": "test" + }, + "test.bp.p_utg.gfa:md5,6a200e4bcb5d8d8fa6ed75874d26edc6" + ] + ], + [ + [ + { + "id": "test" + }, + "test.bp.p_ctg.gfa:md5,c3225425a73a8d6d46536dab72380650" + ] + ], + [ + [ + { + "id": "test" + }, + "test.bp.hap1.p_ctg.gfa:md5,78a86b6d5741a680e22a427ca9faa74a" + ] + ], + [ + [ + { + "id": "test" + }, + "test.bp.hap2.p_ctg.gfa:md5,ac2116fd2f22c67d4c304cbf9b9f7793" + ] + ], + null, + null, + [ + "versions.yml:md5,b7828129854c24ce80a025f90bf3f557" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-11-28T10:53:06.003871811" + "timestamp": "2025-04-15T14:24:12.033205578" }, - "homo_sapiens pacbio hifi [fastq, [yak, yak], [,] ]": { + "homo_sapiens pacbio hifi [fastq, [yak, yak], [,], [,] ]": { "content": [ [ [ { "id": "test" }, - "test.asm.dip.r_utg.gfa:md5,68361ac3e8babd51f3891d1637ca0fdc" + "test.dip.r_utg.gfa:md5,68361ac3e8babd51f3891d1637ca0fdc" ] ], [ @@ -295,7 +394,7 @@ { "id": "test" }, - "test.asm.dip.p_utg.gfa:md5,68361ac3e8babd51f3891d1637ca0fdc" + "test.dip.p_utg.gfa:md5,68361ac3e8babd51f3891d1637ca0fdc" ] ], [ @@ -303,17 +402,17 @@ { "id": "test" }, - "test.asm.dip.hap1.p_ctg.gfa:md5,eed5da5f3dd415dbb711edb61a09802f" + "test.dip.hap1.p_ctg.gfa:md5,eed5da5f3dd415dbb711edb61a09802f" ] ], [ - "versions.yml:md5,a2ea36e18c39850b1680302d9f6c950f" + "versions.yml:md5,b7828129854c24ce80a025f90bf3f557" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nextflow": "24.10.5" }, - "timestamp": "2024-11-28T10:51:45.301359171" + "timestamp": "2025-04-15T14:24:37.330378652" } -} +} \ No newline at end of file diff --git a/modules/nf-core/hifiasm/tests/nextflow.config b/modules/nf-core/hifiasm/tests/nextflow.config index 8dc2ae48..ead4b539 100644 --- a/modules/nf-core/hifiasm/tests/nextflow.config +++ b/modules/nf-core/hifiasm/tests/nextflow.config @@ -1,3 +1,3 @@ process { - ext.args = "-f0" + ext.args = "-f0 ${params.extra_output}" } diff --git a/modules/nf-core/liftoff/environment.yml b/modules/nf-core/liftoff/environment.yml index 94c10a3d..bdac6d51 100644 --- a/modules/nf-core/liftoff/environment.yml +++ b/modules/nf-core/liftoff/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/lima/environment.yml b/modules/nf-core/lima/environment.yml index 7c137245..2e56e30d 100644 --- a/modules/nf-core/lima/environment.yml +++ b/modules/nf-core/lima/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/lima/tests/tags.yml b/modules/nf-core/lima/tests/tags.yml deleted file mode 100644 index bf24addd..00000000 --- a/modules/nf-core/lima/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -lima: - - modules/nf-core/lima/** diff --git a/modules/nf-core/merqury/merqury/environment.yml b/modules/nf-core/merqury/merqury/environment.yml index a62b4b92..84dc78d9 100644 --- a/modules/nf-core/merqury/merqury/environment.yml +++ b/modules/nf-core/merqury/merqury/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/merqury/merqury/merqury-merqury.diff b/modules/nf-core/merqury/merqury/merqury-merqury.diff index 60b70cc5..a0a65d32 100644 --- a/modules/nf-core/merqury/merqury/merqury-merqury.diff +++ b/modules/nf-core/merqury/merqury/merqury-merqury.diff @@ -35,7 +35,6 @@ Changes in 'merqury/merqury/main.nf': when: -'modules/nf-core/merqury/merqury/tests/tags.yml' is unchanged 'modules/nf-core/merqury/merqury/tests/main.nf.test' is unchanged 'modules/nf-core/merqury/merqury/tests/main.nf.test.snap' is unchanged ************************************************************ diff --git a/modules/nf-core/merqury/merqury/tests/tags.yml b/modules/nf-core/merqury/merqury/tests/tags.yml deleted file mode 100644 index af157f18..00000000 --- a/modules/nf-core/merqury/merqury/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -merqury/merqury: - - "modules/nf-core/merqury/merqury/**" diff --git a/modules/nf-core/meryl/count/environment.yml b/modules/nf-core/meryl/count/environment.yml index e37d7901..deebca1f 100644 --- a/modules/nf-core/meryl/count/environment.yml +++ b/modules/nf-core/meryl/count/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/meryl/count/tests/tags.yml b/modules/nf-core/meryl/count/tests/tags.yml deleted file mode 100644 index b25bfa60..00000000 --- a/modules/nf-core/meryl/count/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -meryl/count: - - "modules/nf-core/meryl/count/**" diff --git a/modules/nf-core/meryl/unionsum/environment.yml b/modules/nf-core/meryl/unionsum/environment.yml index e37d7901..deebca1f 100644 --- a/modules/nf-core/meryl/unionsum/environment.yml +++ b/modules/nf-core/meryl/unionsum/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/meryl/unionsum/tests/tags.yml b/modules/nf-core/meryl/unionsum/tests/tags.yml deleted file mode 100644 index 707dd781..00000000 --- a/modules/nf-core/meryl/unionsum/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -meryl/unionsum: - - "modules/nf-core/meryl/unionsum/**" diff --git a/modules/nf-core/minimap2/align/minimap2-align.diff b/modules/nf-core/minimap2/align/minimap2-align.diff index 2275aff7..5cf98414 100644 --- a/modules/nf-core/minimap2/align/minimap2-align.diff +++ b/modules/nf-core/minimap2/align/minimap2-align.diff @@ -15,7 +15,6 @@ Changes in 'minimap2/align/main.nf': val bam_index_extension val cigar_paf_format -'modules/nf-core/minimap2/align/tests/tags.yml' is unchanged 'modules/nf-core/minimap2/align/tests/main.nf.test' is unchanged 'modules/nf-core/minimap2/align/tests/main.nf.test.snap' is unchanged ************************************************************ diff --git a/modules/nf-core/minimap2/align/tests/tags.yml b/modules/nf-core/minimap2/align/tests/tags.yml deleted file mode 100644 index 39dba374..00000000 --- a/modules/nf-core/minimap2/align/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -minimap2/align: - - "modules/nf-core/minimap2/align/**" diff --git a/modules/nf-core/pilon/environment.yml b/modules/nf-core/pilon/environment.yml index a67d2869..eca24d42 100644 --- a/modules/nf-core/pilon/environment.yml +++ b/modules/nf-core/pilon/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/pilon/tests/tags.yml b/modules/nf-core/pilon/tests/tags.yml deleted file mode 100644 index 0d94efba..00000000 --- a/modules/nf-core/pilon/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -pilon: - - "modules/nf-core/pilon/**" diff --git a/modules/nf-core/porechop/porechop/environment.yml b/modules/nf-core/porechop/porechop/environment.yml index 4defeb33..109cf8bd 100644 --- a/modules/nf-core/porechop/porechop/environment.yml +++ b/modules/nf-core/porechop/porechop/environment.yml @@ -1,5 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda dependencies: - bioconda::porechop=0.2.4 + - conda-forge::pigz=2.8 diff --git a/modules/nf-core/porechop/porechop/main.nf b/modules/nf-core/porechop/porechop/main.nf index 1ff02a12..34daf3e8 100644 --- a/modules/nf-core/porechop/porechop/main.nf +++ b/modules/nf-core/porechop/porechop/main.nf @@ -3,9 +3,10 @@ process PORECHOP_PORECHOP { label 'process_medium' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/porechop:0.2.4--py39h7cff6ad_2' : - 'biocontainers/porechop:0.2.4--py39h7cff6ad_2' }" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/2b/2bce1f10c51906a66c4c4d3a7485394f67e304177192ad1cce6cf586a3a18bae/data' : + 'community.wave.seqera.io/library/porechop_pigz:d1655e5b5bad786c' }" + input: tuple val(meta), path(reads) diff --git a/modules/nf-core/porechop/porechop/tests/tags.yml b/modules/nf-core/porechop/porechop/tests/tags.yml deleted file mode 100644 index 743645c2..00000000 --- a/modules/nf-core/porechop/porechop/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -porechop/porechop: - - "modules/nf-core/porechop/porechop/**" diff --git a/modules/nf-core/samtools/fastq/main.nf b/modules/nf-core/samtools/fastq/main.nf index 136744d5..696d668f 100644 --- a/modules/nf-core/samtools/fastq/main.nf +++ b/modules/nf-core/samtools/fastq/main.nf @@ -41,4 +41,19 @@ process SAMTOOLS_FASTQ { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def output = ( interleave && ! meta.single_end ) ? "touch ${prefix}_interleaved.fastq" : + meta.single_end ? "echo | gzip > ${prefix}_1.fastq.gz && echo | gzip > ${prefix}_singleton.fastq.gz" : + "echo | gzip > ${prefix}_1.fastq.gz && echo | gzip > ${prefix}_2.fastq.gz && echo | gzip > ${prefix}_singleton.fastq.gz" + """ + ${output} + echo | gzip > ${prefix}_other.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/samtools/fastq/tests/main.nf.test b/modules/nf-core/samtools/fastq/tests/main.nf.test index f6ac1123..971ea1d4 100644 --- a/modules/nf-core/samtools/fastq/tests/main.nf.test +++ b/modules/nf-core/samtools/fastq/tests/main.nf.test @@ -64,4 +64,56 @@ nextflow_process { ) } } + + test("bam - stub") { + + options "-stub" + + when { + process { + """ + interleave = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = interleave + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("bam_interleave - stub") { + + options "-stub" + + when { + process { + """ + interleave = true + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = interleave + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } } diff --git a/modules/nf-core/samtools/fastq/tests/main.nf.test.snap b/modules/nf-core/samtools/fastq/tests/main.nf.test.snap index 10e5cd3d..ff63f9ae 100644 --- a/modules/nf-core/samtools/fastq/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/fastq/tests/main.nf.test.snap @@ -27,6 +27,89 @@ }, "timestamp": "2024-02-12T18:18:23.988269" }, + "bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test_singleton.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test_other.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "4": [ + "versions.yml:md5,11e074d69900de5a7dfdbe1fb4e789fd" + ], + "fastq": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "interleaved": [ + + ], + "other": [ + [ + { + "id": "test", + "single_end": false + }, + "test_other.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "singleton": [ + [ + { + "id": "test", + "single_end": false + }, + "test_singleton.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,11e074d69900de5a7dfdbe1fb4e789fd" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-03-05T12:50:58.986886415" + }, "bam_fastq": { "content": [ [ @@ -135,5 +218,70 @@ "nextflow": "23.04.3" }, "timestamp": "2024-02-12T18:18:30.859468" + }, + "bam_interleave - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_interleaved.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test_other.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "4": [ + "versions.yml:md5,11e074d69900de5a7dfdbe1fb4e789fd" + ], + "fastq": [ + + ], + "interleaved": [ + [ + { + "id": "test", + "single_end": false + }, + "test_interleaved.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "other": [ + [ + { + "id": "test", + "single_end": false + }, + "test_other.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "singleton": [ + + ], + "versions": [ + "versions.yml:md5,11e074d69900de5a7dfdbe1fb4e789fd" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-03-05T12:51:10.155471004" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/fastq/tests/tags.yml b/modules/nf-core/samtools/fastq/tests/tags.yml deleted file mode 100644 index d14a8534..00000000 --- a/modules/nf-core/samtools/fastq/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -samtools/fastq: - - "modules/nf-core/samtools/fastq/**" diff --git a/modules/nf-core/samtools/flagstat/tests/tags.yml b/modules/nf-core/samtools/flagstat/tests/tags.yml deleted file mode 100644 index 2d2b7255..00000000 --- a/modules/nf-core/samtools/flagstat/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -samtools/flagstat: - - modules/nf-core/samtools/flagstat/** diff --git a/modules/nf-core/samtools/idxstats/tests/tags.yml b/modules/nf-core/samtools/idxstats/tests/tags.yml deleted file mode 100644 index d3057c61..00000000 --- a/modules/nf-core/samtools/idxstats/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -samtools/idxstats: - - modules/nf-core/samtools/idxstats/** diff --git a/modules/nf-core/samtools/index/tests/tags.yml b/modules/nf-core/samtools/index/tests/tags.yml deleted file mode 100644 index e0f58a7a..00000000 --- a/modules/nf-core/samtools/index/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -samtools/index: - - modules/nf-core/samtools/index/** diff --git a/modules/nf-core/samtools/sort/tests/tags.yml b/modules/nf-core/samtools/sort/tests/tags.yml deleted file mode 100644 index cd63ea20..00000000 --- a/modules/nf-core/samtools/sort/tests/tags.yml +++ /dev/null @@ -1,3 +0,0 @@ -samtools/sort: - - modules/nf-core/samtools/sort/** - - tests/modules/nf-core/samtools/sort/** diff --git a/modules/nf-core/samtools/stats/tests/tags.yml b/modules/nf-core/samtools/stats/tests/tags.yml deleted file mode 100644 index 7c28e30f..00000000 --- a/modules/nf-core/samtools/stats/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -samtools/stats: - - modules/nf-core/samtools/stats/** diff --git a/modules/nf-core/trimgalore/environment.yml b/modules/nf-core/trimgalore/environment.yml index b1efd94c..568b9e72 100644 --- a/modules/nf-core/trimgalore/environment.yml +++ b/modules/nf-core/trimgalore/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/trimgalore/tests/tags.yml b/modules/nf-core/trimgalore/tests/tags.yml deleted file mode 100644 index e9937691..00000000 --- a/modules/nf-core/trimgalore/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -trimgalore: - - modules/nf-core/trimgalore/** diff --git a/subworkflows/local/assemble/main.nf b/subworkflows/local/assemble/main.nf index db6a3204..1030beb9 100644 --- a/subworkflows/local/assemble/main.nf +++ b/subworkflows/local/assemble/main.nf @@ -68,8 +68,8 @@ workflow ASSEMBLE { hifi_reads .join(ont_reads) .set { hifiasm_inputs } - HIFIASM(hifiasm_inputs, [[], [], []], [[], [], []]) - GFA_2_FA_HIFI(HIFIASM.out.processed_contigs) + HIFIASM(hifiasm_inputs, [[], [], []], [[], [], []], [[], []]) + GFA_2_FA_HIFI(HIFIASM.out.processed_unitigs) GFA_2_FA_HIFI.out.contigs_fasta.set { ch_assembly } ch_versions = ch_versions.mix(HIFIASM.out.versions).mix(GFA_2_FA_HIFI.out.versions) @@ -79,8 +79,8 @@ workflow ASSEMBLE { ont_reads .map { meta, ontreads -> [meta, ontreads, []] } .set { hifiasm_inputs } - HIFIASM_ONT(hifiasm_inputs, [[], [], []], [[], [], []]) - GFA_2_FA_ONT(HIFIASM_ONT.out.processed_contigs) + HIFIASM_ONT(hifiasm_inputs, [[], [], []], [[], [], []], [[], []]) + GFA_2_FA_ONT(HIFIASM_ONT.out.processed_unitigs) GFA_2_FA_ONT.out.contigs_fasta.set { ch_assembly } ch_versions = ch_versions.mix(HIFIASM_ONT.out.versions).mix(GFA_2_FA_ONT.out.versions) } @@ -89,9 +89,9 @@ workflow ASSEMBLE { hifi_reads .map { meta, ontreads -> [meta, ontreads, []] } .set { hifiasm_inputs } - HIFIASM(hifiasm_inputs, [[], [], []], [[], [], []]) + HIFIASM(hifiasm_inputs, [[], [], []], [[], [], []], [[], []]) - GFA_2_FA_HIFI(HIFIASM.out.processed_contigs) + GFA_2_FA_HIFI(HIFIASM.out.processed_unitigs) GFA_2_FA_HIFI.out.contigs_fasta.set { ch_assembly } ch_versions = ch_versions.mix(HIFIASM.out.versions).mix(GFA_2_FA_HIFI.out.versions) @@ -102,9 +102,9 @@ workflow ASSEMBLE { hifi_reads .map { meta, hifireads -> [meta, hifireads, []] } .set { hifiasm_inputs } - HIFIASM(hifiasm_inputs, [[], [], []], [[], [], []]) + HIFIASM(hifiasm_inputs, [[], [], []], [[], [], []], [[], []]) - GFA_2_FA_HIFI(HIFIASM.out.processed_contigs) + GFA_2_FA_HIFI(HIFIASM.out.processed_unitigs) ch_versions = ch_versions.mix(HIFIASM.out.versions).mix(GFA_2_FA_HIFI.out.versions) @@ -133,8 +133,8 @@ workflow ASSEMBLE { ont_reads .map { meta, ontreads -> [meta, ontreads, []] } .set { hifiasm_inputs } - HIFIASM_ONT(hifiasm_inputs,[[], [], []], [[], [], []]) - GFA_2_FA_ONT(HIFIASM_ONT.out.processed_contigs) + HIFIASM_ONT(hifiasm_inputs,[[], [], []], [[], [], []], [[], []]) + GFA_2_FA_ONT(HIFIASM_ONT.out.processed_unitigs) GFA_2_FA_ONT.out.contigs_fasta .join( GFA_2_FA_HIFI.out.contigs_fasta diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml b/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml deleted file mode 100644 index 30b69d6a..00000000 --- a/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -subworkflows/bam_sort_stats_samtools: - - subworkflows/nf-core/bam_sort_stats_samtools/** diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/tags.yml b/subworkflows/nf-core/bam_stats_samtools/tests/tags.yml deleted file mode 100644 index ec2f2d68..00000000 --- a/subworkflows/nf-core/bam_stats_samtools/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -subworkflows/bam_stats_samtools: - - subworkflows/nf-core/bam_stats_samtools/** From a3fe2ed382b61ace3d5eebb63ecc7f7bb7baee23 Mon Sep 17 00:00:00 2001 From: nf-core bot Date: Tue, 6 May 2025 08:34:12 -0500 Subject: [PATCH 18/34] Important! Template update for nf-core/tools v3.2.1 (#153) * Template update for nf-core/tools version 3.2.1 * update changelog * fix merge problem * fix nf_core_version * fix linting * fix linting * fix linting * fix ci * clean CI * more memory for HIFIASM_ONT test * more memory for HIFIASM tests * change test limits * move CI disk space cleanup * fix CI test names --------- Co-authored-by: Niklas Schandry --- .github/workflows/awsfulltest.yml | 42 +++++-------------- .github/workflows/ci.yml | 5 ++- .nf-core.yml | 6 +-- CHANGELOG.md | 2 + conf/test.config | 8 +++- modules/local/medaka/environment.yml | 5 --- nextflow.config | 4 +- ro-crate-metadata.json | 42 ++++++------------- .../main.nf | 18 ++++---- 9 files changed, 50 insertions(+), 82 deletions(-) delete mode 100644 modules/local/medaka/environment.yml diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 8dc44375..8f68e19e 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -4,43 +4,23 @@ name: nf-core AWS full size tests # It runs the -profile 'test_full' on AWS batch on: - pull_request: - branches: - - main - - master workflow_dispatch: pull_request_review: types: [submitted] + release: + types: [published] + jobs: run-platform: name: Run AWS full tests - # run only if the PR is approved by at least 2 reviewers and against the master branch or manually triggered - if: github.repository == 'nf-core/genomeassembler' && github.event.review.state == 'approved' && github.event.pull_request.base.ref == 'master' || github.event_name == 'workflow_dispatch' + # run only if the PR is approved by at least 2 reviewers and against the master/main branch or manually triggered + if: github.repository == 'nf-core/genomeassembler' && github.event.review.state == 'approved' && (github.event.pull_request.base.ref == 'master' || github.event.pull_request.base.ref == 'main') || github.event_name == 'workflow_dispatch' runs-on: ubuntu-latest steps: - - name: Get PR reviews - uses: octokit/request-action@v2.x - if: github.event_name != 'workflow_dispatch' - id: check_approvals - continue-on-error: true - with: - route: GET /repos/${{ github.repository }}/pulls/${{ github.event.pull_request.number }}/reviews?per_page=100 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - name: Check for approvals - if: ${{ failure() && github.event_name != 'workflow_dispatch' }} - run: | - echo "No review approvals found. At least 2 approvals are required to run this action automatically." - exit 1 - - - name: Check for enough approvals (>=2) - id: test_variables - if: github.event_name != 'workflow_dispatch' + - name: Set revision variable + id: revision run: | - JSON_RESPONSE='${{ steps.check_approvals.outputs.data }}' - CURRENT_APPROVALS_COUNT=$(echo $JSON_RESPONSE | jq -c '[.[] | select(.state | contains("APPROVED")) ] | length') - test $CURRENT_APPROVALS_COUNT -ge 2 || exit 1 # At least 2 approvals are required + echo "revision=${{ (github.event_name == 'workflow_dispatch' || github.event_name == 'release') && github.sha || 'dev' }}" >> "$GITHUB_OUTPUT" - name: Launch workflow via Seqera Platform uses: seqeralabs/action-tower-launch@v2 @@ -51,12 +31,12 @@ jobs: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - revision: ${{ github.sha }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/genomeassembler/work-${{ github.sha }} + revision: ${{ steps.revision.outputs.revision }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/genomeassembler/work-${{ steps.revision.outputs.revision }} parameters: | { "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/genomeassembler/results-${{ github.sha }}" + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/genomeassembler/results-${{ steps.revision.outputs.revision }}" } profiles: test_full diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bfe73cf6..5584fa4c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -86,9 +86,10 @@ jobs: echo $(realpath $CONDA)/condabin >> $GITHUB_PATH echo $(realpath python) >> $GITHUB_PATH - - name: "Run pipeline with test data ${{ matrix.ASSEMBLER }}" + - name: "Run pipeline with test data ${{ matrix.NXF_VER }} | ${{ matrix.ASSEMBLER }} | ${{ matrix.profile }}" + continue-on-error: ${{ matrix.NXF_VER == 'latest-everything' }} run: | - nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.ASSEMBLER }},test,${{matrix.profile}} --outdir ./results_${{matrix.profile}}_${{ matrix.ASSEMBLER }} + nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.ASSEMBLER }},test,${{matrix.profile}} --outdir ./results_${{matrix.profile}}_${{ matrix.ASSEMBLER }} - name: Clean up Disk space uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 diff --git a/.nf-core.yml b/.nf-core.yml index b1854eb2..8d3b34cb 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,13 +1,13 @@ lint: + multiqc_config: false files_exist: - conf/igenomes.config - conf/igenomes_ignored.config - - assets/multiqc_config.yml - assets/sendmail_template.txt + - assets/multiqc_config.yml files_unchanged: - .github/CONTRIBUTING.md - multiqc_config: false -nf_core_version: 3.2.0 +nf_core_version: 3.2.1 repository_type: pipeline template: author: Niklas Schandry diff --git a/CHANGELOG.md b/CHANGELOG.md index 90afe04a..fdbca982 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +[#153](https://github.com/nf-core/genomeassembler/issues/153) - Switched to nf-core template 3.2.1 + [#144](https://github.com/nf-core/genomeassembler/issues/144) - Added hifiasm_on_hifiasm assembly strategy ### `Fixed` diff --git a/conf/test.config b/conf/test.config index 1f8df199..47a69de4 100644 --- a/conf/test.config +++ b/conf/test.config @@ -17,7 +17,13 @@ process { time: '1.h' ] } - +/* +process { + withName: 'HIFIASM.*' { + memory = '15.GB' + } +} +*/ params { config_profile_name = 'Test profile' config_profile_description = 'Minimal test dataset to check pipeline function' diff --git a/modules/local/medaka/environment.yml b/modules/local/medaka/environment.yml deleted file mode 100644 index 37fc99d1..00000000 --- a/modules/local/medaka/environment.yml +++ /dev/null @@ -1,5 +0,0 @@ -channels: - - conda-forge - - bioconda -dependencies: - - bioconda::medaka=2.0.1 diff --git a/nextflow.config b/nextflow.config index ae6f3284..6138feb2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -232,6 +232,8 @@ podman.registry = 'quay.io' singularity.registry = 'quay.io' charliecloud.registry = 'quay.io' + + // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -297,7 +299,7 @@ manifest { // Nextflow plugins plugins { - id 'nf-schema@2.1.1' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-schema@2.2.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet } validation { diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 48261909..66380e58 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -22,8 +22,8 @@ "@id": "./", "@type": "Dataset", "creativeWorkStatus": "InProgress", - "datePublished": "2025-03-19T08:25:11+00:00", - "description": "

    \n \n \n \"nf-core/genomeassembler\"\n \n

    \n\n[![GitHub Actions CI Status](https://github.com/nf-core/genomeassembler/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/genomeassembler/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.14986998-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.14986998)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/genomeassembler)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23genomeassembler-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/genomeassembler)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/genomeassembler** is a bioinformatics pipeline that carries out genome assembly, polishing and scaffolding from long reads (ONT or pacbio). Assembly can be done via `flye` or `hifiasm`, polishing can be carried out with `medaka` (ONT), or `pilon` (requires short-reads), and scaffolding can be done using `LINKS`, `Longstitch`, or `RagTag` (if a reference is available). Quality control includes `BUSCO`, `QUAST` and `merqury` (requires short-reads).\nCurrently, this pipeline does not implement phasing of polyploid genomes or HiC scaffolding.\n\n\n \n \"nf-core/genomeassembler\"\n\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,ontreads,hifireads,ref_fasta,ref_gff,shortread_F,shortread_R,paired\nsampleName,ontreads.fa.gz,hifireads.fa.gz,assembly.fasta.gz,reference.fasta,reference.gff,short_F1.fastq,short_F2.fastq,true\n```\n\nEach row represents one genome to be assembled. `sample` should contain the name of the sample, `ontreads` should contain a path to ONT reads (fastq.gz), `hifireads` a path to HiFi reads (fastq.gz), `ref_fasta` and `ref_gff` contain reference genome fasta and annotations. `shortread_F` and `shortread_R` contain paths to short-read data, `paired` indicates if short-reads are paired. Columns can be omitted if they contain no data, with the exception of `shortread_R`, which needs to be present if `shortread_F` is there, even if it is empty.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/genomeassembler \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/genomeassembler/usage) and the [parameter documentation](https://nf-co.re/genomeassembler/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/genomeassembler/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/genomeassembler/output).\n\n## Credits\n\nnf-core/genomeassembler was originally written by [Niklas Schandry](https://github.com/nschan), of the Faculty of Biology of the Ludwig-Maximilians University (LMU) in Munich, Germany.\n\nI thank the following people for their extensive assistance and constructive reviews during the development of this pipeline:\n\n- [Mahesh Binzer-Panchal](https://github.com/mahesh-panchal)\n- [Matthias H\u00f6rtenhuber](https://github.com/mashehu)\n- [Louis Le N\u00e9zet](https://github.com/LouisLeNezet)\n- [J\u00falia Mir Pedrol](https://github.com/mirpedrol)\n- [Daniel Straub](https://github.com/d4straub)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#genomeassembler` channel](https://nfcore.slack.com/channels/genomeassembler) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/genomeassembler for your analysis, please cite it using the following doi: [10.5281/zenodo.14986998](https://doi.org/10.5281/zenodo.14986998)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "datePublished": "2025-04-30T12:26:07+00:00", + "description": "

    \n \n \n \"nf-core/genomeassembler\"\n \n

    \n\n[![GitHub Actions CI Status](https://github.com/nf-core/genomeassembler/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/genomeassembler/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/genomeassembler)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23genomeassembler-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/genomeassembler)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/genomeassembler** is a bioinformatics pipeline that ...\n\n\n\n\n1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n\n\nNow, you can run the pipeline using:\n\n\n\n```bash\nnextflow run nf-core/genomeassembler \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/genomeassembler/usage) and the [parameter documentation](https://nf-co.re/genomeassembler/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/genomeassembler/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/genomeassembler/output).\n\n## Credits\n\nnf-core/genomeassembler was originally written by Niklas Schandry.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#genomeassembler` channel](https://nfcore.slack.com/channels/genomeassembler) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" @@ -105,7 +105,7 @@ }, "mentions": [ { - "@id": "#a74a52d1-f86a-42f5-b7ef-1224396e1e3c" + "@id": "#c5ce6267-6257-4d17-b43b-adb516de81e9" } ], "name": "nf-core/genomeassembler" @@ -144,37 +144,19 @@ } ], "dateCreated": "", - "dateModified": "2025-03-19T09:25:11Z", + "dateModified": "2025-04-30T12:26:07Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", - "keywords": [ - "nf-core", - "nextflow", - "genome-assembly" - ], - "license": [ - "MIT" - ], - "maintainer": [ - { - "@id": "https://orcid.org/0000-0003-3099-7860" - } - ], - "name": [ - "nf-core/genomeassembler" - ], + "keywords": ["nf-core", "nextflow", "genome-assembly"], + "license": ["MIT"], + "name": ["nf-core/genomeassembler"], "programmingLanguage": { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow" }, "sdPublisher": { "@id": "https://nf-co.re/" }, - "url": [ - "https://github.com/nf-core/genomeassembler", - "https://nf-co.re/genomeassembler/dev/" - ], - "version": [ - "1.1.0dev" - ] + "url": ["https://github.com/nf-core/genomeassembler", "https://nf-co.re/genomeassembler/dev/"], + "version": ["1.1.0dev"] }, { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow", @@ -189,11 +171,11 @@ "version": "!>=24.04.2" }, { - "@id": "#a74a52d1-f86a-42f5-b7ef-1224396e1e3c", + "@id": "#c5ce6267-6257-4d17-b43b-adb516de81e9", "@type": "TestSuite", "instance": [ { - "@id": "#9c9d46ea-11b2-4050-9976-134db40769ea" + "@id": "#788bf1cf-1341-4537-9ac5-0b09c41ca3da" } ], "mainEntity": { @@ -202,7 +184,7 @@ "name": "Test suite for nf-core/genomeassembler" }, { - "@id": "#9c9d46ea-11b2-4050-9976-134db40769ea", + "@id": "#788bf1cf-1341-4537-9ac5-0b09c41ca3da", "@type": "TestInstance", "name": "GitHub Actions workflow for testing nf-core/genomeassembler", "resource": "repos/nf-core/genomeassembler/actions/workflows/ci.yml", diff --git a/subworkflows/local/utils_nfcore_genomeassembler_pipeline/main.nf b/subworkflows/local/utils_nfcore_genomeassembler_pipeline/main.nf index 0f8cf813..580914ca 100644 --- a/subworkflows/local/utils_nfcore_genomeassembler_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_genomeassembler_pipeline/main.nf @@ -121,7 +121,7 @@ workflow PIPELINE_COMPLETION { plaintext_email // boolean: Send plain-text email instead of HTML outdir // path: Path to output directory where results will be published monochrome_logs // boolean: Disable ANSI colour codes in log output - hook_url // string: hook URL for notifications + hook_url // string: hook URL for notifications main: summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") @@ -138,7 +138,7 @@ workflow PIPELINE_COMPLETION { plaintext_email, outdir, monochrome_logs, - [], + [] ) } @@ -171,7 +171,7 @@ def validateInputSamplesheet(input) { error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") } - return [metas[0], fastqs] + return [ metas[0], fastqs ] } // // Generate methods description for MultiQC @@ -181,10 +181,10 @@ def toolCitationText() { // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", // Uncomment function in methodsDescriptionText to render in MultiQC report def citation_text = [ - "Tools used in the workflow included:", - "FastQC (Andrews 2010),", - ".", - ].join(' ').trim() + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "." + ].join(' ').trim() return citation_text } @@ -194,8 +194,8 @@ def toolBibliographyText() { // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", // Uncomment function in methodsDescriptionText to render in MultiQC report def reference_text = [ - "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • " - ].join(' ').trim() + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + ].join(' ').trim() return reference_text } From 463fa0a200c0758ca1d6b7617b5d377faf66bca7 Mon Sep 17 00:00:00 2001 From: Niklas Schandry Date: Wed, 7 May 2025 15:36:05 +0200 Subject: [PATCH 19/34] fix GFA_2_FA* outputs (#150) --- conf/modules.config | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index 19737fe7..6c0c27b2 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -178,6 +178,20 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + withName: GFA_2_FA_HIFI { + publishDir = [ + path: { "${params.outdir}/${meta.id}/assembly/hifiasm/fasta" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: GFA_2_FA_ONT { + publishDir = [ + path: { "${params.outdir}/${meta.id}/assembly/hifiasm_ont/fasta" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } withName: '.*ASSEMBLE:.*RAGTAG_PATCH' { publishDir = [ path: { "${params.outdir}/${meta.id}/assembly/ragtag/" }, From 2c68e58c9b6c5b38e2404138a86035f76d454b3a Mon Sep 17 00:00:00 2001 From: Niklas Schandry Date: Thu, 8 May 2025 11:20:07 +0200 Subject: [PATCH 20/34] Module updates (#154) * newer quast * fix busco inputs * modules update * update changelog * avoid capturing python locale warning in quast version * correct minimap2_align inputs * separate configs for easier maintainance, change minimap2 preset to lr:hq for ONT reads * update changelog --- CHANGELOG.md | 8 +- conf/modules.config | 611 +----------------- conf/modules/QC/alignments.config | 81 +++ conf/modules/QC/busco.config | 57 ++ conf/modules/QC/jellyfish-genomescope.config | 37 ++ conf/modules/QC/merqury.config | 51 ++ conf/modules/QC/meryl.config | 16 + conf/modules/QC/quast.config | 51 ++ conf/modules/assembly.config | 60 ++ conf/modules/hifi-prep.config | 16 + conf/modules/liftoff.config | 50 ++ conf/modules/ont-prep.config | 23 + conf/modules/polishing.config | 31 + conf/modules/report.config | 9 + conf/modules/scaffolding.config | 33 + conf/modules/trimgalore.config | 9 + conf/old.modules.config | 597 +++++++++++++++++ modules.json | 6 +- modules/local/quast/environment.yml | 2 +- modules/local/quast/main.nf | 10 +- modules/nf-core/hifiasm/main.nf | 4 +- .../nf-core/hifiasm/tests/main.nf.test.snap | 30 +- modules/nf-core/links/tests/main.nf.test | 66 +- modules/nf-core/links/tests/main.nf.test.snap | 160 ++--- .../nf-core/minimap2/align/environment.yml | 5 +- modules/nf-core/minimap2/align/main.nf | 4 +- .../minimap2/align/minimap2-align.diff | 2 +- .../minimap2/align/tests/main.nf.test.snap | 122 ++-- subworkflows/local/qc/busco/main.nf | 2 +- 29 files changed, 1348 insertions(+), 805 deletions(-) create mode 100644 conf/modules/QC/alignments.config create mode 100644 conf/modules/QC/busco.config create mode 100644 conf/modules/QC/jellyfish-genomescope.config create mode 100644 conf/modules/QC/merqury.config create mode 100644 conf/modules/QC/meryl.config create mode 100644 conf/modules/QC/quast.config create mode 100644 conf/modules/assembly.config create mode 100644 conf/modules/hifi-prep.config create mode 100644 conf/modules/liftoff.config create mode 100644 conf/modules/ont-prep.config create mode 100644 conf/modules/polishing.config create mode 100644 conf/modules/report.config create mode 100644 conf/modules/scaffolding.config create mode 100644 conf/modules/trimgalore.config create mode 100644 conf/old.modules.config diff --git a/CHANGELOG.md b/CHANGELOG.md index fdbca982..4daf67d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,10 +9,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 [#153](https://github.com/nf-core/genomeassembler/issues/153) - Switched to nf-core template 3.2.1 -[#144](https://github.com/nf-core/genomeassembler/issues/144) - Added hifiasm_on_hifiasm assembly strategy +[#144](https://github.com/nf-core/genomeassembler/issues/144) - Added `hifiasm_on_hifiasm` assembly strategy ### `Fixed` +[#154](https://github.com/nf-core/genomeassembler/pull/154) - Module maintainance: + +- updated `hifiasm`, `minimap2`, `links` nf-core modules +- updated container in local `quast` module +- separated `modules.config` into several files for easier navigation and maintainance + [#138](https://github.com/nf-core/genomeassembler/pull/138) - Switched to RagTag nf-core module [#142](https://github.com/nf-core/genomeassembler/pull/142) - Switch `--collect` to accept a glob pattern instead of a folder, consistent with input validation. diff --git a/conf/modules.config b/conf/modules.config index 6c0c27b2..efacbbdd 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -17,581 +17,38 @@ process { mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - /* - ---------- - Reads in - ONT - ---------- - */ - // nanoq: local module; QC - withName: NANOQ { - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/nanoq" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // only local module - withName: COLLECT { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/collect" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // porechop: nf-core module - withName: PORECHOP { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/porechop" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // Genomescope / jellyfish: local modules - withName: COUNT { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/genomescope/jellyfish/count/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: DUMP { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/genomescope/jellyfish/dump/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: STATS { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/genomescope/jellyfish/stats/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: HISTO { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/genomescope/jellyfish/histo/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: GENOMESCOPE { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/genomescope/genomescope/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - /* - ---------- - Reads in - HiFi - all nf-core - ---------- - */ - withName: LIMA { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/lima/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: TO_FASTQ { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/lima/fastq/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - /* - ---------- - Reads in - Short reads - all nf-core - ---------- - */ - withName: TRIMGALORE { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/trimgalore" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: MERYL_COUNT { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/meryl/count/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: MERYL_UNIONSUM { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/meryl/unionsum/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - /* - ---------- - ASSEMBLY - ---------- - */ - // FLYE: nf-core - withName: FLYE { - ext.args = { - [ - meta.genome_size ? "--genome-size ${meta.genome_size}" : '', - params.flye_args - ].join(" ").trim() - } - publishDir = [ - path: { "${params.outdir}/${meta.id}/assembly/flye/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // HIFIASM: - /* updated nf-core module */ - - withName: HIFIASM { - ext.args = { [ params.hifiasm_args ].join(" ").trim() } - publishDir = [ - path: { "${params.outdir}/${meta.id}/assembly/hifiasm/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: HIFIASM_ONT { - ext.args = { [ params.hifiasm_args, "--ont" ].join(" ").trim() } - publishDir = [ - path: { "${params.outdir}/${meta.id}/assembly/hifiasm_ont/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: GFA_2_FA { - publishDir = [ - path: { "${params.outdir}/${meta.id}/assembly/hifiasm/fasta" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: GFA_2_FA_HIFI { - publishDir = [ - path: { "${params.outdir}/${meta.id}/assembly/hifiasm/fasta" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: GFA_2_FA_ONT { - publishDir = [ - path: { "${params.outdir}/${meta.id}/assembly/hifiasm_ont/fasta" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*ASSEMBLE:.*RAGTAG_PATCH' { - publishDir = [ - path: { "${params.outdir}/${meta.id}/assembly/ragtag/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_assembly_patch" } - } - - /* - ---------- - Polishing - ---------- - */ - // Medaka: local; nf-core module cant deal with gzipped input - withName: MEDAKA { - ext.args1 = { } // args mini_align - ext.args2 = { [params.medaka_model ? "--model ${params.medaka_model}" : ''].join(" ").trim() } // args medaka_inference - ext.args3 = { } // args medaka sequence - ext.prefix = { "${meta.id}_medaka" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/polish/medaka" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // Pilon: nf-core - withName: PILON { - ext.prefix = { "${meta.id}_pilon" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/polish/pilon" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - /* - ---------- - Scaffolding - ---------- - */ - // RagTag - withName: '.*SCAFFOLD:.*RAGTAG_SCAFFOLD' { - publishDir = [ - path: { "${params.outdir}/${meta.id}/scaffold/ragtag/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_ragtag" } - ext.args = [ - "-C", - "-u", - "-r", - "-w" - ].join(" ").trim() - } - - withName: LINKS { - publishDir = [ - path: { "${params.outdir}/${meta.id}/scaffold/links/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_links" } - ext.args = ["-t 40,200", "-d 500,2000,5000"].join(" ").trim() - } - - // No nf-core module yet. - withName: LONGSTITCH { - publishDir = [ - path: { "${params.outdir}/${meta.id}/scaffold/longstitch/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_longstitch" } - } - /* - -------- - Annotations - liftoff nf-core module - -------- - gff file goes with fasta file - */ - - withName: '.*ASSEMBLE:.*LIFTOFF' { - publishDir = [ - path: { "${params.outdir}/${meta.id}/assembly/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_assembly" } - } - - withName: '.*PILON:.*LIFTOFF' { - publishDir = [ - path: { "${params.outdir}/${meta.id}/polish/pilon/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_pilon" } - } - - withName: '.*MEDAKA:.*LIFTOFF' { - publishDir = [ - path: { "${params.outdir}/${meta.id}/polish/medaka" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_medaka" } - } - - withName: '.*RAGTAG:.*LIFTOFF' { - publishDir = [ - path: { "${params.outdir}/${meta.id}/scaffold/ragtag/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_ragtag" } - } - - withName: '.*LONGSTITCH:.*LIFTOFF' { - publishDir = [ - path: { "${params.outdir}/${meta.id}/scaffold/longstitch" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_longstitch" } - } - - withName: '.*LINKS:.*LIFTOFF' { - publishDir = [ - path: { "${params.outdir}/${meta.id}/scaffold/links" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_links" } - } - - /* - ---------- - QC - ---------- - */ - - // BUSCO: nf-core - withName: '.*:ASSEMBLE:.*:BUSCO' { - ext.prefix = { "${meta.id}_assembly-${lineage}" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, - mode: params.publish_dir_mode, - pattern: "*{-busco,_summary}*", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*PILON:.*:BUSCO' { - ext.prefix = { "${meta.id}_pilon-${lineage}" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, - mode: params.publish_dir_mode, - pattern: "*{-busco,_summary}*", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*MEDAKA:.*:BUSCO' { - ext.prefix = { "${meta.id}_medaka-${lineage}" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, - mode: params.publish_dir_mode, - pattern: "*{-busco,_summary}*", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*LINKS:.*:BUSCO' { - ext.prefix = { "${meta.id}_links-${lineage}" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, - mode: params.publish_dir_mode, - pattern: "*{-busco,_summary}*", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*LONGSTITCH:.*:BUSCO' { - ext.prefix = { "${meta.id}_longstitch-${lineage}" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, - mode: params.publish_dir_mode, - pattern: "*{-busco,_summary}*", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // avoid catching ragtag from ont_on_hifi assembly - withName: '.*:SCAFFOLD:.*RAGTAG:.*:BUSCO' { - ext.prefix = { "${meta.id}_ragtag-${lineage}" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, - mode: params.publish_dir_mode, - pattern: "*{-busco,_summary}*", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - // QUAST: Prefer to keep the local module since it can deal with the inputs I have - withName: '.*:ASSEMBLE:.*:QUAST' { - ext.prefix = { "${meta.id}_assembly" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/QUAST" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*PILON:.*:QUAST' { - ext.prefix = { "${meta.id}_pilon" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/QUAST" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*MEDAKA:.*:QUAST' { - ext.prefix = { "${meta.id}_medaka" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/QUAST" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*LINKS:.*:QUAST' { - ext.prefix = { "${meta.id}_links" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/QUAST/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*LONGSTITCH:.*:QUAST' { - ext.prefix = { "${meta.id}_longstitch" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/QUAST/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // avoid catching ragtag from ont_on_hifi assembly - withName: '.*:SCAFFOLD:.*RAGTAG:.*:QUAST' { - ext.prefix = { "${meta.id}_ragtag" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/QUAST/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - // MERQURY: nf-core - withName: '.*:ASSEMBLE:.*:MERQURY' { - ext.prefix = { "${meta.id}_assembly" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/merqury/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*PILON:.*:MERQURY' { - ext.prefix = { "${meta.id}_pilon" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/merqury/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*MEDAKA:.*:MERQURY' { - ext.prefix = { "${meta.id}_medaka" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/merqury/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*LINKS:.*:MERQURY' { - ext.prefix = { "${meta.id}_links" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/merqury/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*LONGSTITCH:.*:MERQURY' { - ext.prefix = { "${meta.id}_longstitch" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/merqury/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // avoid catching ragtag from ont_on_hifi assembly - withName: '.*:SCAFFOLD:.*RAGTAG:.*:MERQURY' { - ext.prefix = { "${meta.id}_ragtag" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/merqury/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // Refence - withName: '.*MAP_TO_REF.*' { - ext.prefix = { "${meta.id}_to_reference" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/alignments/reference/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = { - (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax map-ont" : "-ax map-hifi") : (params.ont) ? "-ax map-ont" : "-ax map-hifi" - } - } - // Assembly mappings - withName: '.*ASSEMBLE:.*MAP_TO_ASSEMBLY.*' { - ext.prefix = { "${meta.id}_assembly" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/alignments/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = { - (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax map-ont" : "-ax map-hifi") : (params.ont) ? "-ax map-ont" : "-ax map-hifi" - } - } - withName: '.*MEDAKA:.*MAP_TO_ASSEMBLY.*' { - ext.prefix = { "${meta.id}_medaka" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/alignments/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = { - (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax map-ont" : "-ax map-hifi") : (params.ont) ? "-ax map-ont" : "-ax map-hifi" - } - } - withName: '.*PILON:.*MAP_TO_ASSEMBLY.*' { - ext.prefix = { "${meta.id}_pilon" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/alignments/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = { - (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax map-ont" : "-ax map-hifi") : (params.ont) ? "-ax map-ont" : "-ax map-hifi" - } - } - withName: '.*LONGSTITCH:.*MAP_TO_ASSEMBLY.*' { - ext.prefix = { "${meta.id}_longstitch" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/alignments/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = { - (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax map-ont" : "-ax map-hifi") : (params.ont) ? "-ax map-ont" : "-ax map-hifi" - } - } - withName: '.*LINKS:.*MAP_TO_ASSEMBLY.*' { - ext.prefix = { "${meta.id}_links" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/alignments/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = { - (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax map-ont" : "-ax map-hifi") : (params.ont) ? "-ax map-ont" : "-ax map-hifi" - } - } - withName: '.*RAGTAG:.*MAP_TO_ASSEMBLY.*' { - ext.prefix = { "${meta.id}_ragtag" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/alignments/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = { - (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax map-ont" : "-ax map-hifi") : (params.ont) ? "-ax map-ont" : "-ax map-hifi" - } - } - // Pilon mapping - withName: '.*PILON:MAP_SR.*' { - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/alignments/shortreads/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_shortreads" } - ext.args = { "-ax sr " } - } - /* - -------- - Report - */ - withName: REPORT { - publishDir = [ - path: { "${params.outdir}/report/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } } + +// Read preparation +includeConfig 'modules/ont-prep.config' +includeConfig 'modules/hifi-prep.config' +includeConfig 'modules/trimgalore.config' + +// Assembly +includeConfig 'modules/assembly.config' + +// Polishing +includeConfig 'modules/polishing.config' + +// Scaffolding +includeConfig 'modules/scaffolding.config' + +// Annotation liftoff +includeConfig 'modules/liftoff.config' + +// QC +// BUSCO +includeConfig 'modules/QC/busco.config' +// QUAST +includeConfig 'modules/QC/quast.config' +// merqury +includeConfig 'modules/QC/merqury.config' +// alignments +includeConfig 'modules/QC/alignments.config' +// Meryl +includeConfig 'modules/QC/meryl.config' +// Jellyfish, Genomescope +includeConfig 'modules/QC/jellyfish-genomescope.config' + +// Report +includeConfig 'modules/report.config' diff --git a/conf/modules/QC/alignments.config b/conf/modules/QC/alignments.config new file mode 100644 index 00000000..f9536164 --- /dev/null +++ b/conf/modules/QC/alignments.config @@ -0,0 +1,81 @@ +process { + // Refence + withName: '.*MAP_TO_REF.*' { + ext.prefix = { "${meta.id}_to_reference" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/reference/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = { + (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax lr:hq" : "-ax map-hifi") : (params.ont) ? "-ax lr:hq" : "-ax map-hifi" + } + } + // Assembly mappings + withName: '.*ASSEMBLE:.*MAP_TO_ASSEMBLY.*' { + ext.prefix = { "${meta.id}_assembly" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = { + (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax lr:hq" : "-ax map-hifi") : (params.ont) ? "-ax lr:hq" : "-ax map-hifi" + } + } + withName: '.*MEDAKA:.*MAP_TO_ASSEMBLY.*' { + ext.prefix = { "${meta.id}_medaka" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = { + (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax lr:hq" : "-ax map-hifi") : (params.ont) ? "-ax lr:hq" : "-ax map-hifi" + } + } + withName: '.*PILON:.*MAP_TO_ASSEMBLY.*' { + ext.prefix = { "${meta.id}_pilon" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = { + (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax lr:hq" : "-ax map-hifi") : (params.ont) ? "-ax lr:hq" : "-ax map-hifi" + } + } + withName: '.*LONGSTITCH:.*MAP_TO_ASSEMBLY.*' { + ext.prefix = { "${meta.id}_longstitch" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = { + (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax lr:hq" : "-ax map-hifi") : (params.ont) ? "-ax lr:hq" : "-ax map-hifi" + } + } + withName: '.*LINKS:.*MAP_TO_ASSEMBLY.*' { + ext.prefix = { "${meta.id}_links" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = { + (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax lr:hq" : "-ax map-hifi") : (params.ont) ? "-ax lr:hq" : "-ax map-hifi" + } + } + withName: '.*RAGTAG:.*MAP_TO_ASSEMBLY.*' { + ext.prefix = { "${meta.id}_ragtag" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = { + (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax lr:hq" : "-ax map-hifi") : (params.ont) ? "-ax lr:hq" : "-ax map-hifi" + } + } +} diff --git a/conf/modules/QC/busco.config b/conf/modules/QC/busco.config new file mode 100644 index 00000000..4972756f --- /dev/null +++ b/conf/modules/QC/busco.config @@ -0,0 +1,57 @@ +process { + withName: '.*:ASSEMBLE:.*:BUSCO' { + ext.prefix = { "${meta.id}_assembly-${lineage}" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, + mode: params.publish_dir_mode, + pattern: "*{-busco,_summary}*", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*PILON:.*:BUSCO' { + ext.prefix = { "${meta.id}_pilon-${lineage}" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, + mode: params.publish_dir_mode, + pattern: "*{-busco,_summary}*", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*MEDAKA:.*:BUSCO' { + ext.prefix = { "${meta.id}_medaka-${lineage}" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, + mode: params.publish_dir_mode, + pattern: "*{-busco,_summary}*", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*LINKS:.*:BUSCO' { + ext.prefix = { "${meta.id}_links-${lineage}" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, + mode: params.publish_dir_mode, + pattern: "*{-busco,_summary}*", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*LONGSTITCH:.*:BUSCO' { + ext.prefix = { "${meta.id}_longstitch-${lineage}" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, + mode: params.publish_dir_mode, + pattern: "*{-busco,_summary}*", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + // avoid catching ragtag from ont_on_hifi assembly + withName: '.*:SCAFFOLD:.*RAGTAG:.*:BUSCO' { + ext.prefix = { "${meta.id}_ragtag-${lineage}" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, + mode: params.publish_dir_mode, + pattern: "*{-busco,_summary}*", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/QC/jellyfish-genomescope.config b/conf/modules/QC/jellyfish-genomescope.config new file mode 100644 index 00000000..18f70cd9 --- /dev/null +++ b/conf/modules/QC/jellyfish-genomescope.config @@ -0,0 +1,37 @@ +process { + withName: COUNT { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/genomescope/jellyfish/count/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: DUMP { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/genomescope/jellyfish/dump/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: STATS { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/genomescope/jellyfish/stats/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: HISTO { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/genomescope/jellyfish/histo/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: GENOMESCOPE { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/genomescope/genomescope/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/QC/merqury.config b/conf/modules/QC/merqury.config new file mode 100644 index 00000000..06a31325 --- /dev/null +++ b/conf/modules/QC/merqury.config @@ -0,0 +1,51 @@ +process { + withName: '.*:ASSEMBLE:.*:MERQURY' { + ext.prefix = { "${meta.id}_assembly" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/merqury/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*PILON:.*:MERQURY' { + ext.prefix = { "${meta.id}_pilon" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/merqury/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*MEDAKA:.*:MERQURY' { + ext.prefix = { "${meta.id}_medaka" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/merqury/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*LINKS:.*:MERQURY' { + ext.prefix = { "${meta.id}_links" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/merqury/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*LONGSTITCH:.*:MERQURY' { + ext.prefix = { "${meta.id}_longstitch" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/merqury/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + // avoid catching ragtag from ont_on_hifi assembly + withName: '.*:SCAFFOLD:.*RAGTAG:.*:MERQURY' { + ext.prefix = { "${meta.id}_ragtag" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/merqury/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/QC/meryl.config b/conf/modules/QC/meryl.config new file mode 100644 index 00000000..41452a69 --- /dev/null +++ b/conf/modules/QC/meryl.config @@ -0,0 +1,16 @@ +process { + withName: MERYL_COUNT { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/meryl/count/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: MERYL_UNIONSUM { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/meryl/unionsum/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/QC/quast.config b/conf/modules/QC/quast.config new file mode 100644 index 00000000..b97a7494 --- /dev/null +++ b/conf/modules/QC/quast.config @@ -0,0 +1,51 @@ +process { + withName: '.*:ASSEMBLE:.*:QUAST' { + ext.prefix = { "${meta.id}_assembly" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/QUAST" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*PILON:.*:QUAST' { + ext.prefix = { "${meta.id}_pilon" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/QUAST" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*MEDAKA:.*:QUAST' { + ext.prefix = { "${meta.id}_medaka" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/QUAST" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*LINKS:.*:QUAST' { + ext.prefix = { "${meta.id}_links" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/QUAST/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*LONGSTITCH:.*:QUAST' { + ext.prefix = { "${meta.id}_longstitch" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/QUAST/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + // avoid catching ragtag from ont_on_hifi assembly + withName: '.*:SCAFFOLD:.*RAGTAG:.*:QUAST' { + ext.prefix = { "${meta.id}_ragtag" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/QUAST/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/assembly.config b/conf/modules/assembly.config new file mode 100644 index 00000000..eb18c7ef --- /dev/null +++ b/conf/modules/assembly.config @@ -0,0 +1,60 @@ +process { + withName: FLYE { + ext.args = { + [ + meta.genome_size ? "--genome-size ${meta.genome_size}" : '', + params.flye_args + ].join(" ").trim() + } + publishDir = [ + path: { "${params.outdir}/${meta.id}/assembly/flye/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: HIFIASM { + ext.args = { [ params.hifiasm_args ].join(" ").trim() } + publishDir = [ + path: { "${params.outdir}/${meta.id}/assembly/hifiasm/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: HIFIASM_ONT { + ext.args = { [ params.hifiasm_args, "--ont" ].join(" ").trim() } + publishDir = [ + path: { "${params.outdir}/${meta.id}/assembly/hifiasm_ont/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: GFA_2_FA { + publishDir = [ + path: { "${params.outdir}/${meta.id}/assembly/hifiasm/fasta" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: GFA_2_FA_HIFI { + publishDir = [ + path: { "${params.outdir}/${meta.id}/assembly/hifiasm/fasta" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: GFA_2_FA_ONT { + publishDir = [ + path: { "${params.outdir}/${meta.id}/assembly/hifiasm_ont/fasta" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*ASSEMBLE:.*RAGTAG_PATCH' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/assembly/ragtag/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_assembly_patch" } + } +} diff --git a/conf/modules/hifi-prep.config b/conf/modules/hifi-prep.config new file mode 100644 index 00000000..ec84a420 --- /dev/null +++ b/conf/modules/hifi-prep.config @@ -0,0 +1,16 @@ +process { + withName: LIMA { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/lima/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: TO_FASTQ { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/lima/fastq/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/liftoff.config b/conf/modules/liftoff.config new file mode 100644 index 00000000..3f470e90 --- /dev/null +++ b/conf/modules/liftoff.config @@ -0,0 +1,50 @@ +process { + withName: '.*ASSEMBLE:.*LIFTOFF' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/assembly/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_assembly" } + } + withName: '.*PILON:.*LIFTOFF' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/polish/pilon/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_pilon" } + } + withName: '.*MEDAKA:.*LIFTOFF' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/polish/medaka" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_medaka" } + } + withName: '.*RAGTAG:.*LIFTOFF' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/scaffold/ragtag/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_ragtag" } + } + withName: '.*LONGSTITCH:.*LIFTOFF' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/scaffold/longstitch" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_longstitch" } + } + withName: '.*LINKS:.*LIFTOFF' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/scaffold/links" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_links" } + } +} diff --git a/conf/modules/ont-prep.config b/conf/modules/ont-prep.config new file mode 100644 index 00000000..6b824273 --- /dev/null +++ b/conf/modules/ont-prep.config @@ -0,0 +1,23 @@ +process { + withName: NANOQ { + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/nanoq" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: COLLECT { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/collect" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: PORECHOP { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/porechop" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/polishing.config b/conf/modules/polishing.config new file mode 100644 index 00000000..8c39f50e --- /dev/null +++ b/conf/modules/polishing.config @@ -0,0 +1,31 @@ +process { + withName: MEDAKA { + ext.args1 = { } // args mini_align + ext.args2 = { [params.medaka_model ? "--model ${params.medaka_model}" : ''].join(" ").trim() } // args medaka_inference + ext.args3 = { } // args medaka sequence + ext.prefix = { "${meta.id}_medaka" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/polish/medaka" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + // Pilon mapping + withName: '.*PILON:MAP_SR.*' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/shortreads/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_shortreads" } + ext.args = { "-ax sr " } + } + withName: PILON { + ext.prefix = { "${meta.id}_pilon" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/polish/pilon" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/report.config b/conf/modules/report.config new file mode 100644 index 00000000..b405d0f6 --- /dev/null +++ b/conf/modules/report.config @@ -0,0 +1,9 @@ +process { + withName: REPORT { + publishDir = [ + path: { "${params.outdir}/report/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/scaffolding.config b/conf/modules/scaffolding.config new file mode 100644 index 00000000..878b8bfa --- /dev/null +++ b/conf/modules/scaffolding.config @@ -0,0 +1,33 @@ +process { + withName: '.*SCAFFOLD:.*RAGTAG_SCAFFOLD' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/scaffold/ragtag/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_ragtag" } + ext.args = [ + "-C", + "-u", + "-r", + "-w" + ].join(" ").trim() + } + withName: LINKS { + publishDir = [ + path: { "${params.outdir}/${meta.id}/scaffold/links/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_links" } + ext.args = ["-t 40,200", "-d 500,2000,5000"].join(" ").trim() + } + withName: LONGSTITCH { + publishDir = [ + path: { "${params.outdir}/${meta.id}/scaffold/longstitch/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_longstitch" } + } +} diff --git a/conf/modules/trimgalore.config b/conf/modules/trimgalore.config new file mode 100644 index 00000000..dc899e99 --- /dev/null +++ b/conf/modules/trimgalore.config @@ -0,0 +1,9 @@ +process { + withName: TRIMGALORE { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/trimgalore" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/old.modules.config b/conf/old.modules.config new file mode 100644 index 00000000..6c0c27b2 --- /dev/null +++ b/conf/old.modules.config @@ -0,0 +1,597 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + // General catch-all + publishDir = [ + path: { "${params.outdir}/${meta.id}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + /* + ---------- + Reads in + ONT + ---------- + */ + // nanoq: local module; QC + withName: NANOQ { + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/nanoq" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + // only local module + withName: COLLECT { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/collect" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + // porechop: nf-core module + withName: PORECHOP { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/porechop" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + // Genomescope / jellyfish: local modules + withName: COUNT { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/genomescope/jellyfish/count/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: DUMP { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/genomescope/jellyfish/dump/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: STATS { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/genomescope/jellyfish/stats/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: HISTO { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/genomescope/jellyfish/histo/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: GENOMESCOPE { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/genomescope/genomescope/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + /* + ---------- + Reads in + HiFi + all nf-core + ---------- + */ + withName: LIMA { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/lima/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: TO_FASTQ { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/lima/fastq/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + /* + ---------- + Reads in + Short reads + all nf-core + ---------- + */ + withName: TRIMGALORE { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/trimgalore" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: MERYL_COUNT { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/meryl/count/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: MERYL_UNIONSUM { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/meryl/unionsum/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + /* + ---------- + ASSEMBLY + ---------- + */ + // FLYE: nf-core + withName: FLYE { + ext.args = { + [ + meta.genome_size ? "--genome-size ${meta.genome_size}" : '', + params.flye_args + ].join(" ").trim() + } + publishDir = [ + path: { "${params.outdir}/${meta.id}/assembly/flye/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + // HIFIASM: + /* updated nf-core module */ + + withName: HIFIASM { + ext.args = { [ params.hifiasm_args ].join(" ").trim() } + publishDir = [ + path: { "${params.outdir}/${meta.id}/assembly/hifiasm/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: HIFIASM_ONT { + ext.args = { [ params.hifiasm_args, "--ont" ].join(" ").trim() } + publishDir = [ + path: { "${params.outdir}/${meta.id}/assembly/hifiasm_ont/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: GFA_2_FA { + publishDir = [ + path: { "${params.outdir}/${meta.id}/assembly/hifiasm/fasta" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: GFA_2_FA_HIFI { + publishDir = [ + path: { "${params.outdir}/${meta.id}/assembly/hifiasm/fasta" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: GFA_2_FA_ONT { + publishDir = [ + path: { "${params.outdir}/${meta.id}/assembly/hifiasm_ont/fasta" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*ASSEMBLE:.*RAGTAG_PATCH' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/assembly/ragtag/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_assembly_patch" } + } + + /* + ---------- + Polishing + ---------- + */ + // Medaka: local; nf-core module cant deal with gzipped input + withName: MEDAKA { + ext.args1 = { } // args mini_align + ext.args2 = { [params.medaka_model ? "--model ${params.medaka_model}" : ''].join(" ").trim() } // args medaka_inference + ext.args3 = { } // args medaka sequence + ext.prefix = { "${meta.id}_medaka" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/polish/medaka" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + // Pilon: nf-core + withName: PILON { + ext.prefix = { "${meta.id}_pilon" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/polish/pilon" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + /* + ---------- + Scaffolding + ---------- + */ + // RagTag + withName: '.*SCAFFOLD:.*RAGTAG_SCAFFOLD' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/scaffold/ragtag/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_ragtag" } + ext.args = [ + "-C", + "-u", + "-r", + "-w" + ].join(" ").trim() + } + + withName: LINKS { + publishDir = [ + path: { "${params.outdir}/${meta.id}/scaffold/links/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_links" } + ext.args = ["-t 40,200", "-d 500,2000,5000"].join(" ").trim() + } + + // No nf-core module yet. + withName: LONGSTITCH { + publishDir = [ + path: { "${params.outdir}/${meta.id}/scaffold/longstitch/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_longstitch" } + } + /* + -------- + Annotations + liftoff nf-core module + -------- + gff file goes with fasta file + */ + + withName: '.*ASSEMBLE:.*LIFTOFF' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/assembly/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_assembly" } + } + + withName: '.*PILON:.*LIFTOFF' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/polish/pilon/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_pilon" } + } + + withName: '.*MEDAKA:.*LIFTOFF' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/polish/medaka" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_medaka" } + } + + withName: '.*RAGTAG:.*LIFTOFF' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/scaffold/ragtag/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_ragtag" } + } + + withName: '.*LONGSTITCH:.*LIFTOFF' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/scaffold/longstitch" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_longstitch" } + } + + withName: '.*LINKS:.*LIFTOFF' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/scaffold/links" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_links" } + } + + /* + ---------- + QC + ---------- + */ + + // BUSCO: nf-core + withName: '.*:ASSEMBLE:.*:BUSCO' { + ext.prefix = { "${meta.id}_assembly-${lineage}" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, + mode: params.publish_dir_mode, + pattern: "*{-busco,_summary}*", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*PILON:.*:BUSCO' { + ext.prefix = { "${meta.id}_pilon-${lineage}" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, + mode: params.publish_dir_mode, + pattern: "*{-busco,_summary}*", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*MEDAKA:.*:BUSCO' { + ext.prefix = { "${meta.id}_medaka-${lineage}" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, + mode: params.publish_dir_mode, + pattern: "*{-busco,_summary}*", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*LINKS:.*:BUSCO' { + ext.prefix = { "${meta.id}_links-${lineage}" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, + mode: params.publish_dir_mode, + pattern: "*{-busco,_summary}*", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*LONGSTITCH:.*:BUSCO' { + ext.prefix = { "${meta.id}_longstitch-${lineage}" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, + mode: params.publish_dir_mode, + pattern: "*{-busco,_summary}*", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + // avoid catching ragtag from ont_on_hifi assembly + withName: '.*:SCAFFOLD:.*RAGTAG:.*:BUSCO' { + ext.prefix = { "${meta.id}_ragtag-${lineage}" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, + mode: params.publish_dir_mode, + pattern: "*{-busco,_summary}*", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + // QUAST: Prefer to keep the local module since it can deal with the inputs I have + withName: '.*:ASSEMBLE:.*:QUAST' { + ext.prefix = { "${meta.id}_assembly" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/QUAST" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*PILON:.*:QUAST' { + ext.prefix = { "${meta.id}_pilon" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/QUAST" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*MEDAKA:.*:QUAST' { + ext.prefix = { "${meta.id}_medaka" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/QUAST" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*LINKS:.*:QUAST' { + ext.prefix = { "${meta.id}_links" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/QUAST/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*LONGSTITCH:.*:QUAST' { + ext.prefix = { "${meta.id}_longstitch" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/QUAST/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + // avoid catching ragtag from ont_on_hifi assembly + withName: '.*:SCAFFOLD:.*RAGTAG:.*:QUAST' { + ext.prefix = { "${meta.id}_ragtag" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/QUAST/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + // MERQURY: nf-core + withName: '.*:ASSEMBLE:.*:MERQURY' { + ext.prefix = { "${meta.id}_assembly" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/merqury/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*PILON:.*:MERQURY' { + ext.prefix = { "${meta.id}_pilon" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/merqury/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*MEDAKA:.*:MERQURY' { + ext.prefix = { "${meta.id}_medaka" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/merqury/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*LINKS:.*:MERQURY' { + ext.prefix = { "${meta.id}_links" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/merqury/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*LONGSTITCH:.*:MERQURY' { + ext.prefix = { "${meta.id}_longstitch" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/merqury/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + // avoid catching ragtag from ont_on_hifi assembly + withName: '.*:SCAFFOLD:.*RAGTAG:.*:MERQURY' { + ext.prefix = { "${meta.id}_ragtag" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/merqury/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + // Refence + withName: '.*MAP_TO_REF.*' { + ext.prefix = { "${meta.id}_to_reference" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/reference/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = { + (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax map-ont" : "-ax map-hifi") : (params.ont) ? "-ax map-ont" : "-ax map-hifi" + } + } + // Assembly mappings + withName: '.*ASSEMBLE:.*MAP_TO_ASSEMBLY.*' { + ext.prefix = { "${meta.id}_assembly" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = { + (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax map-ont" : "-ax map-hifi") : (params.ont) ? "-ax map-ont" : "-ax map-hifi" + } + } + withName: '.*MEDAKA:.*MAP_TO_ASSEMBLY.*' { + ext.prefix = { "${meta.id}_medaka" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = { + (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax map-ont" : "-ax map-hifi") : (params.ont) ? "-ax map-ont" : "-ax map-hifi" + } + } + withName: '.*PILON:.*MAP_TO_ASSEMBLY.*' { + ext.prefix = { "${meta.id}_pilon" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = { + (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax map-ont" : "-ax map-hifi") : (params.ont) ? "-ax map-ont" : "-ax map-hifi" + } + } + withName: '.*LONGSTITCH:.*MAP_TO_ASSEMBLY.*' { + ext.prefix = { "${meta.id}_longstitch" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = { + (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax map-ont" : "-ax map-hifi") : (params.ont) ? "-ax map-ont" : "-ax map-hifi" + } + } + withName: '.*LINKS:.*MAP_TO_ASSEMBLY.*' { + ext.prefix = { "${meta.id}_links" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = { + (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax map-ont" : "-ax map-hifi") : (params.ont) ? "-ax map-ont" : "-ax map-hifi" + } + } + withName: '.*RAGTAG:.*MAP_TO_ASSEMBLY.*' { + ext.prefix = { "${meta.id}_ragtag" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = { + (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax map-ont" : "-ax map-hifi") : (params.ont) ? "-ax map-ont" : "-ax map-hifi" + } + } + // Pilon mapping + withName: '.*PILON:MAP_SR.*' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/shortreads/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_shortreads" } + ext.args = { "-ax sr " } + } + /* + -------- + Report + */ + withName: REPORT { + publishDir = [ + path: { "${params.outdir}/report/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/modules.json b/modules.json index e121a80e..f1341e7b 100644 --- a/modules.json +++ b/modules.json @@ -22,7 +22,7 @@ }, "hifiasm": { "branch": "master", - "git_sha": "971d801da83f2e50ab5935f93e40375adaa79b51", + "git_sha": "c457b50bf9187031f65b0fb090dc022e8814c729", "installed_by": ["modules"] }, "liftoff": { @@ -38,7 +38,7 @@ }, "links": { "branch": "master", - "git_sha": "e29af567bb2a1095fd23f284f777d31eba92310e", + "git_sha": "d0fcd22f005a2508bc3dc9ee32c206acadc11b28", "installed_by": ["modules"] }, "merqury/merqury": { @@ -59,7 +59,7 @@ }, "minimap2/align": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "a532706a19b3d83f14b1d48a6a815ed33eb48b0c", "installed_by": ["modules"], "patch": "modules/nf-core/minimap2/align/minimap2-align.diff" }, diff --git a/modules/local/quast/environment.yml b/modules/local/quast/environment.yml index 2c14403d..50e43eac 100644 --- a/modules/local/quast/environment.yml +++ b/modules/local/quast/environment.yml @@ -2,4 +2,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::quast=5.2.0 + - bioconda::quast=5.3.0 diff --git a/modules/local/quast/main.nf b/modules/local/quast/main.nf index 5da08e19..118c1b3a 100644 --- a/modules/local/quast/main.nf +++ b/modules/local/quast/main.nf @@ -3,9 +3,9 @@ process QUAST { label 'process_medium' conda "${moduleDir}/environment.yml" - container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://depot.galaxyproject.org/singularity/quast:5.2.0--py39pl5321h2add14b_1' - : 'biocontainers/quast:5.2.0--py39pl5321heaaa4ec_4'}" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/a5/a515d04307ea3e0178af75132105cd36c87d0116c6f9daecf81650b973e870fd/data' : + 'community.wave.seqera.io/library/quast:5.3.0--755a216045b6dbdd' }" input: tuple val(meta), path(consensus), path(fasta), path(gff), path(ref_bam), path(bam) @@ -44,7 +44,7 @@ process QUAST { cat <<-END_VERSIONS > versions.yml "${task.process}": - quast: \$(quast.py --version 2>&1 | sed 's/^.*QUAST v//; s/ .*\$//') + quast: \$(quast.py --version 2>&1 | sed 's/^.*QUAST v//; s/ .*\$//' | tail -n1) END_VERSIONS """ stub: @@ -55,7 +55,7 @@ process QUAST { cat <<-END_VERSIONS > versions.yml "${task.process}": - quast: \$(quast.py --version 2>&1 | sed 's/^.*QUAST v//; s/ .*\$//') + quast: \$(quast.py --version 2>&1 | sed 's/^.*QUAST v//; s/ .*\$//' | tail -n1) END_VERSIONS """ } diff --git a/modules/nf-core/hifiasm/main.nf b/modules/nf-core/hifiasm/main.nf index e85e26e1..7330920e 100644 --- a/modules/nf-core/hifiasm/main.nf +++ b/modules/nf-core/hifiasm/main.nf @@ -96,8 +96,8 @@ process HIFIASM { touch ${prefix}.p_utg.gfa touch ${prefix}.p_ctg.gfa touch ${prefix}.a_ctg.gfa - touch ${prefix}.hap1.p_ctg.gfa - touch ${prefix}.hap2.p_ctg.gfa + touch ${prefix}.bp.hap1.p_ctg.gfa + touch ${prefix}.bp.hap2.p_ctg.gfa echo "" | gzip > ${prefix}.ec.fa.gz echo "" | gzip > ${prefix}.ovlp.paf.gz touch ${prefix}.stderr.log diff --git a/modules/nf-core/hifiasm/tests/main.nf.test.snap b/modules/nf-core/hifiasm/tests/main.nf.test.snap index 1e2d86f4..cf8a7eba 100644 --- a/modules/nf-core/hifiasm/tests/main.nf.test.snap +++ b/modules/nf-core/hifiasm/tests/main.nf.test.snap @@ -206,10 +206,20 @@ ] ], "5": [ - + [ + { + "id": "test" + }, + "test.bp.hap1.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] ], "6": [ - + [ + { + "id": "test" + }, + "test.bp.hap2.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] ], "7": [ [ @@ -266,10 +276,20 @@ ] ], "hap1_contigs": [ - + [ + { + "id": "test" + }, + "test.bp.hap1.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] ], "hap2_contigs": [ - + [ + { + "id": "test" + }, + "test.bp.hap2.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] ], "log": [ [ @@ -323,7 +343,7 @@ "nf-test": "0.9.2", "nextflow": "24.10.5" }, - "timestamp": "2025-04-15T14:25:06.074724035" + "timestamp": "2025-04-17T15:56:54.410648332" }, "homo_sapiens pacbio hifi [fastq, [,], [,], [,]]": { "content": [ diff --git a/modules/nf-core/links/tests/main.nf.test b/modules/nf-core/links/tests/main.nf.test index 4b449ab2..bbffb1dd 100644 --- a/modules/nf-core/links/tests/main.nf.test +++ b/modules/nf-core/links/tests/main.nf.test @@ -7,7 +7,7 @@ nextflow_process { tag "modules_nfcore" tag "links" - test("LINKS - LINKS test data") { + test("LINKS - sarscov2 test data - contigs") { config './nextflow.config' when { @@ -18,11 +18,11 @@ nextflow_process { """ input[0] = [ [ id:'test' ], - file(params.modules_testdata_base_path + 'genomics/scaffolding/LINKS/contigs.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true), ] input[1] = [ [ id:'test'], - file(params.modules_testdata_base_path + 'genomics/scaffolding/LINKS/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), ] """ } @@ -48,7 +48,7 @@ nextflow_process { } - test("LINKS - longstitch test data 1") { + test("LINKS - sarscov2 test data - scaffolds") { config './nextflow.config' when { @@ -59,11 +59,11 @@ nextflow_process { """ input[0] = [ [ id:'test' ], - file(params.modules_testdata_base_path + 'genomics/scaffolding/longstitch/test_scaffolds1.fa', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/scaffolds.fasta', checkIfExists: true), ] input[1] = [ [ id:'test'], - file(params.modules_testdata_base_path + 'genomics/scaffolding/longstitch/test_reads1.fa.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true), ] """ } @@ -74,53 +74,13 @@ nextflow_process { { assert process.success }, { assert snapshot( file(process.out.log[0][1]).name, - file(process.out.pairing_issues[0][1]).name, - process.out.scaffolds_csv, - process.out.scaffolds_fasta, - process.out.bloom, - file(process.out.scaffolds_graph[0][1]).name, - file(process.out.assembly_correspondence[0][1]).name, - file(process.out.tigpair_checkpoint[0][1]).name, - process.out.versions - ).match() - } - ) - } - - } - test("LINKS - longstitch test data 2") { - config './nextflow.config' - - when { - params { - module_args = "-d 1000,2000,3000,4000,5000,6000,7000,8000,9000,10000,12000,14000,16000,18000,20000" - } - process { - """ - input[0] = [ - [ id:'test' ], - file(params.modules_testdata_base_path + 'genomics/scaffolding/longstitch/test_scaffolds2.fa', checkIfExists: true), - ] - input[1] = [ - [ id:'test'], - file(params.modules_testdata_base_path + 'genomics/scaffolding/longstitch/test_reads2.fa.gz', checkIfExists: true), - ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot( - file(process.out.log[0][1]).name, - file(process.out.pairing_issues[0][1]).name, + process.out.pairing_issues, process.out.scaffolds_csv, process.out.scaffolds_fasta, process.out.bloom, file(process.out.scaffolds_graph[0][1]).name, - file(process.out.assembly_correspondence[0][1]).name, - file(process.out.tigpair_checkpoint[0][1]).name, + process.out.assembly_correspondence, + process.out.tigpair_checkpoint, process.out.versions ).match() } @@ -140,12 +100,12 @@ nextflow_process { process { """ input[0] = [ - [ id:'test' ], - file(params.modules_testdata_base_path + 'genomics/scaffolding/LINKS/contigs.fasta', checkIfExists: true), + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), ] input[1] = [ - [ id:'test' ], - file(params.modules_testdata_base_path + 'genomics/scaffolding/LINKS/genome.fasta', checkIfExists: true), + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true), ] """ } diff --git a/modules/nf-core/links/tests/main.nf.test.snap b/modules/nf-core/links/tests/main.nf.test.snap index 0e2507a3..8c19d398 100644 --- a/modules/nf-core/links/tests/main.nf.test.snap +++ b/modules/nf-core/links/tests/main.nf.test.snap @@ -1,4 +1,66 @@ { + "LINKS - sarscov2 test data - scaffolds": { + "content": [ + "test.log", + [ + [ + { + "id": "test" + }, + "test.pairing_issues:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + [ + { + "id": "test" + }, + "test.scaffolds:md5,095cc323b3af3a7873c8b80cf3736a1f" + ] + ], + [ + [ + { + "id": "test" + }, + "test.scaffolds.fa:md5,b8c7938abbc3d2f9b5c3d709d43b4a60" + ] + ], + [ + [ + { + "id": "test" + }, + "test.bloom:md5,23737e49d9a2f070b312da844201b494" + ] + ], + "test.gv", + [ + [ + { + "id": "test" + }, + "test.assembly_correspondence.tsv:md5,a65d30663dce705d382df52ab87ca8a4" + ] + ], + [ + [ + { + "id": "test" + }, + "test.tigpair_checkpoint.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + "versions.yml:md5,f58863e433b849b1ef0dfc19cb57656b" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-04-25T14:13:53.050775593" + }, "LINKS - stub": { "content": [ { @@ -176,89 +238,7 @@ }, "timestamp": "2025-04-11T11:49:53.947870525" }, - "LINKS - longstitch test data 1": { - "content": [ - "test.log", - "test.pairing_issues", - [ - [ - { - "id": "test" - }, - "test.scaffolds:md5,e12db5e5e6a1b5e26d2b50b6256c960d" - ] - ], - [ - [ - { - "id": "test" - }, - "test.scaffolds.fa:md5,afe56607a3d2c3b1e2d605bdf7ca545f" - ] - ], - [ - [ - { - "id": "test" - }, - "test.bloom:md5,b912b31cfadaf81e722f5441ac639f60" - ] - ], - "test.gv", - "test.assembly_correspondence.tsv", - "test.tigpair_checkpoint.tsv", - [ - "versions.yml:md5,f58863e433b849b1ef0dfc19cb57656b" - ] - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" - }, - "timestamp": "2025-04-11T13:39:00.252899964" - }, - "LINKS - longstitch test data 2": { - "content": [ - "test.log", - "test.pairing_issues", - [ - [ - { - "id": "test" - }, - "test.scaffolds:md5,00a943691b987adebe0ab40efced9c7e" - ] - ], - [ - [ - { - "id": "test" - }, - "test.scaffolds.fa:md5,ed43c629e8d440e3cf6fb8b21742557c" - ] - ], - [ - [ - { - "id": "test" - }, - "test.bloom:md5,695262bb4beda52665d2f7ec476a4e7b" - ] - ], - "test.gv", - "test.assembly_correspondence.tsv", - "test.tigpair_checkpoint.tsv", - [ - "versions.yml:md5,f58863e433b849b1ef0dfc19cb57656b" - ] - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" - }, - "timestamp": "2025-04-11T13:39:36.483298325" - }, - "LINKS - LINKS test data": { + "LINKS - sarscov2 test data - contigs": { "content": [ "test.log", [ @@ -266,7 +246,7 @@ { "id": "test" }, - "test.pairing_issues:md5,eb8b04b4ec170a319b40d2ee16a5cf96" + "test.pairing_issues:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], [ @@ -274,7 +254,7 @@ { "id": "test" }, - "test.scaffolds:md5,afe5339405b830e97095058080550064" + "test.scaffolds:md5,41c129edd1e66140fcfb7efce81197ad" ] ], [ @@ -282,7 +262,7 @@ { "id": "test" }, - "test.scaffolds.fa:md5,4aa442ee4b05e1608daf7b5b033a4203" + "test.scaffolds.fa:md5,8abc4f609d0ad415f900b0046b38a72b" ] ], [ @@ -290,7 +270,7 @@ { "id": "test" }, - "test.bloom:md5,96f54f577d1c589251ea0cfe624b898d" + "test.bloom:md5,23737e49d9a2f070b312da844201b494" ] ], "test.gv", @@ -299,7 +279,7 @@ { "id": "test" }, - "test.assembly_correspondence.tsv:md5,0efc40db474ba8d5b334ad48add4bd9d" + "test.assembly_correspondence.tsv:md5,b36e951b0a1bb4b1c1ccd50925392e3d" ] ], [ @@ -307,7 +287,7 @@ { "id": "test" }, - "test.tigpair_checkpoint.tsv:md5,9208c8fe686b5989eaec1485a74cf44e" + "test.tigpair_checkpoint.tsv:md5,168f2075f524a86216118c7230ad65e9" ] ], [ @@ -318,6 +298,6 @@ "nf-test": "0.9.2", "nextflow": "24.10.3" }, - "timestamp": "2025-04-11T11:30:01.790074303" + "timestamp": "2025-04-25T14:07:49.212617595" } } \ No newline at end of file diff --git a/modules/nf-core/minimap2/align/environment.yml b/modules/nf-core/minimap2/align/environment.yml index 60677e65..17886061 100644 --- a/modules/nf-core/minimap2/align/environment.yml +++ b/modules/nf-core/minimap2/align/environment.yml @@ -5,6 +5,5 @@ channels: - bioconda dependencies: - - bioconda::htslib=1.20 - - bioconda::minimap2=2.28 - - bioconda::samtools=1.20 + - bioconda::minimap2=2.29 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf index 5a6d3319..50e3ecf9 100644 --- a/modules/nf-core/minimap2/align/main.nf +++ b/modules/nf-core/minimap2/align/main.nf @@ -5,8 +5,8 @@ process MINIMAP2_ALIGN { // Note: the versions here need to match the versions used in the mulled container below and minimap2/index conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0' : - 'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/66/66dc96eff11ab80dfd5c044e9b3425f52d818847b9c074794cf0c02bfa781661/data' : + 'community.wave.seqera.io/library/minimap2_samtools:33bb43c18d22e29c' }" input: tuple val(meta), path(reads), path(reference) diff --git a/modules/nf-core/minimap2/align/minimap2-align.diff b/modules/nf-core/minimap2/align/minimap2-align.diff index 5cf98414..78677563 100644 --- a/modules/nf-core/minimap2/align/minimap2-align.diff +++ b/modules/nf-core/minimap2/align/minimap2-align.diff @@ -5,7 +5,7 @@ Changes in 'minimap2/align/main.nf': --- modules/nf-core/minimap2/align/main.nf +++ modules/nf-core/minimap2/align/main.nf @@ -9,8 +9,7 @@ - 'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0' }" + 'community.wave.seqera.io/library/minimap2_samtools:33bb43c18d22e29c' }" input: - tuple val(meta), path(reads) diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test.snap b/modules/nf-core/minimap2/align/tests/main.nf.test.snap index 12264a85..89f20336 100644 --- a/modules/nf-core/minimap2/align/tests/main.nf.test.snap +++ b/modules/nf-core/minimap2/align/tests/main.nf.test.snap @@ -4,20 +4,20 @@ [ "@HD\tVN:1.6\tSO:coordinate", "@SQ\tSN:MT192765.1\tLN:29829", - "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta -", - "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index" + "@PG\tID:minimap2\tPN:minimap2\tVN:2.29-r1283\tCL:minimap2 -t 2 -a genome.fasta -", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index" ], "5d426b9a5f5b2c54f1d7f1e4c238ae94", "test.bam.bai", [ - "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + "versions.yml:md5,660fcf8ff66d4dce2045ffa0e325eed8" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-07-25T09:03:00.827260362" + "timestamp": "2025-04-22T14:48:23.829797899" }, "sarscov2 - bam, fasta, true, 'bai', false, false - stub": { "content": [ @@ -44,7 +44,7 @@ ] ], "3": [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" ], "bam": [ [ @@ -68,15 +68,15 @@ ], "versions": [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-07-23T11:21:37.92353539" + "timestamp": "2025-04-22T14:48:54.665655242" }, "sarscov2 - fastq, fasta, true, 'bai', false, false - stub": { "content": [ @@ -103,7 +103,7 @@ ] ], "3": [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" ], "bam": [ [ @@ -127,15 +127,15 @@ ], "versions": [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-06-03T11:29:44.669021368" + "timestamp": "2025-04-22T14:48:38.492212433" }, "sarscov2 - fastq, fasta, false, [], false, false - stub": { "content": [ @@ -156,7 +156,7 @@ ], "3": [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" ], "bam": [ @@ -174,15 +174,15 @@ ] ], "versions": [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-06-03T11:15:52.738781039" + "timestamp": "2025-04-22T14:48:43.879647142" }, "sarscov2 - fastq, fasta, true, [], false, false - stub": { "content": [ @@ -203,7 +203,7 @@ ], "3": [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" ], "bam": [ [ @@ -221,92 +221,92 @@ ], "versions": [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-06-03T11:15:23.033808223" + "timestamp": "2025-04-22T14:48:33.262333471" }, "sarscov2 - [fastq1, fastq2], fasta, true, false, false": { "content": [ [ "@HD\tVN:1.6\tSO:coordinate", "@SQ\tSN:MT192765.1\tLN:29829", - "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz test_2.fastq.gz", - "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + "@PG\tID:minimap2\tPN:minimap2\tVN:2.29-r1283\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz test_2.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam" ], "1bc392244f228bf52cf0b5a8f6a654c9", [ - "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + "versions.yml:md5,660fcf8ff66d4dce2045ffa0e325eed8" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-07-23T11:18:18.964586894" + "timestamp": "2025-04-22T14:48:07.571731983" }, "sarscov2 - fastq, fasta, true, [], false, false": { "content": [ [ "@HD\tVN:1.6\tSO:coordinate", "@SQ\tSN:MT192765.1\tLN:29829", - "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz", - "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + "@PG\tID:minimap2\tPN:minimap2\tVN:2.29-r1283\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam" ], "f194745c0ccfcb2a9c0aee094a08750", [ - "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + "versions.yml:md5,660fcf8ff66d4dce2045ffa0e325eed8" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-07-23T11:17:48.667488325" + "timestamp": "2025-04-22T14:47:56.497792473" }, "sarscov2 - fastq, fasta, true, 'bai', false, false": { "content": [ [ "@HD\tVN:1.6\tSO:coordinate", "@SQ\tSN:MT192765.1\tLN:29829", - "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz", - "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index" + "@PG\tID:minimap2\tPN:minimap2\tVN:2.29-r1283\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index" ], "f194745c0ccfcb2a9c0aee094a08750", "test.bam.bai", [ - "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + "versions.yml:md5,660fcf8ff66d4dce2045ffa0e325eed8" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-07-23T11:18:02.517416733" + "timestamp": "2025-04-22T14:48:01.888544427" }, "sarscov2 - bam, fasta, true, [], false, false": { "content": [ [ "@HD\tVN:1.6\tSO:coordinate", "@SQ\tSN:MT192765.1\tLN:29829", - "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta -", - "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + "@PG\tID:minimap2\tPN:minimap2\tVN:2.29-r1283\tCL:minimap2 -t 2 -a genome.fasta -", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam" ], "5d426b9a5f5b2c54f1d7f1e4c238ae94", [ - "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + "versions.yml:md5,660fcf8ff66d4dce2045ffa0e325eed8" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-07-25T09:02:49.64829488" + "timestamp": "2025-04-22T14:48:18.376062313" }, "sarscov2 - bam, fasta, true, [], false, false - stub": { "content": [ @@ -327,7 +327,7 @@ ], "3": [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" ], "bam": [ [ @@ -345,15 +345,15 @@ ], "versions": [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-07-23T11:21:22.162291795" + "timestamp": "2025-04-22T14:48:49.268693724" }, "sarscov2 - fastq, [], true, false, false": { "content": [ @@ -459,18 +459,18 @@ "@SQ\tSN:ERR5069949.3258358\tLN:151", "@SQ\tSN:ERR5069949.1476386\tLN:151", "@SQ\tSN:ERR5069949.2415814\tLN:150", - "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a test_1.fastq.gz test_1.fastq.gz", - "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + "@PG\tID:minimap2\tPN:minimap2\tVN:2.29-r1283\tCL:minimap2 -t 2 -a test_1.fastq.gz test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam" ], "16c1c651f8ec67383bcdee3c55aed94f", [ - "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + "versions.yml:md5,660fcf8ff66d4dce2045ffa0e325eed8" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-07-23T11:18:34.246998277" + "timestamp": "2025-04-22T14:48:12.942360555" } } \ No newline at end of file diff --git a/subworkflows/local/qc/busco/main.nf b/subworkflows/local/qc/busco/main.nf index 59d30544..93d93c5e 100644 --- a/subworkflows/local/qc/busco/main.nf +++ b/subworkflows/local/qc/busco/main.nf @@ -11,7 +11,7 @@ workflow RUN_BUSCO { Channel.empty().set { short_summary_json } if (params.busco) { - BUSCO(assembly, 'genome', params.busco_lineage, params.busco_db ? file(params.busco_db, checkIfExists: true) : [], []) + BUSCO(assembly, 'genome', params.busco_lineage, params.busco_db ? file(params.busco_db, checkIfExists: true) : [], [], true) BUSCO.out.batch_summary.set { batch_summary } BUSCO.out.short_summaries_txt.set { short_summary_txt } BUSCO.out.short_summaries_json.set { short_summary_json } From 45daf6399db1648c2b03ca7f0150d82d4a5aa55b Mon Sep 17 00:00:00 2001 From: Niklas Schandry Date: Thu, 8 May 2025 14:18:55 +0200 Subject: [PATCH 21/34] pre-release version bump (#155) * version bump * add codename --- .nf-core.yml | 2 +- CHANGELOG.md | 2 +- nextflow.config | 2 +- ro-crate-metadata.json | 44 +++++++++++++++++++++++++++++------------- 4 files changed, 34 insertions(+), 16 deletions(-) diff --git a/.nf-core.yml b/.nf-core.yml index 8d3b34cb..e4c21935 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -20,4 +20,4 @@ template: skip_features: - multiqc - igenomes - version: 1.1.0dev + version: 1.1.0 diff --git a/CHANGELOG.md b/CHANGELOG.md index 4daf67d8..7fe24abd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.1.0dev +## v1.1.0 'Brass Pigeon' - [2025-05-08] ### `Added` diff --git a/nextflow.config b/nextflow.config index 6138feb2..78947c2c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -293,7 +293,7 @@ manifest { description = """Assemble genomes from long ONT or pacbio HiFi reads""" mainScript = 'main.nf' nextflowVersion = '!>=24.04.2' - version = '1.1.0dev' + version = '1.1.0' doi = '10.5281/zenodo.14986998' } diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 66380e58..e2d1702f 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -21,9 +21,9 @@ { "@id": "./", "@type": "Dataset", - "creativeWorkStatus": "InProgress", - "datePublished": "2025-04-30T12:26:07+00:00", - "description": "

    \n \n \n \"nf-core/genomeassembler\"\n \n

    \n\n[![GitHub Actions CI Status](https://github.com/nf-core/genomeassembler/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/genomeassembler/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/genomeassembler)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23genomeassembler-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/genomeassembler)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/genomeassembler** is a bioinformatics pipeline that ...\n\n\n\n\n1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n\n\nNow, you can run the pipeline using:\n\n\n\n```bash\nnextflow run nf-core/genomeassembler \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/genomeassembler/usage) and the [parameter documentation](https://nf-co.re/genomeassembler/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/genomeassembler/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/genomeassembler/output).\n\n## Credits\n\nnf-core/genomeassembler was originally written by Niklas Schandry.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#genomeassembler` channel](https://nfcore.slack.com/channels/genomeassembler) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "creativeWorkStatus": "Stable", + "datePublished": "2025-05-08T11:11:40+00:00", + "description": "

    \n \n \n \"nf-core/genomeassembler\"\n \n

    \n\n[![GitHub Actions CI Status](https://github.com/nf-core/genomeassembler/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/genomeassembler/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.14986998-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.14986998)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/genomeassembler)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23genomeassembler-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/genomeassembler)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/genomeassembler** is a bioinformatics pipeline that carries out genome assembly, polishing and scaffolding from long reads (ONT or pacbio). Assembly can be done via `flye` or `hifiasm`, polishing can be carried out with `medaka` (ONT), or `pilon` (requires short-reads), and scaffolding can be done using `LINKS`, `Longstitch`, or `RagTag` (if a reference is available). Quality control includes `BUSCO`, `QUAST` and `merqury` (requires short-reads).\nCurrently, this pipeline does not implement phasing of polyploid genomes or HiC scaffolding.\n\n\n \n \"nf-core/genomeassembler\"\n\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,ontreads,hifireads,ref_fasta,ref_gff,shortread_F,shortread_R,paired\nsampleName,ontreads.fa.gz,hifireads.fa.gz,assembly.fasta.gz,reference.fasta,reference.gff,short_F1.fastq,short_F2.fastq,true\n```\n\nEach row represents one genome to be assembled. `sample` should contain the name of the sample, `ontreads` should contain a path to ONT reads (fastq.gz), `hifireads` a path to HiFi reads (fastq.gz), `ref_fasta` and `ref_gff` contain reference genome fasta and annotations. `shortread_F` and `shortread_R` contain paths to short-read data, `paired` indicates if short-reads are paired. Columns can be omitted if they contain no data, with the exception of `shortread_R`, which needs to be present if `shortread_F` is there, even if it is empty.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/genomeassembler \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/genomeassembler/usage) and the [parameter documentation](https://nf-co.re/genomeassembler/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/genomeassembler/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/genomeassembler/output).\n\n## Credits\n\nnf-core/genomeassembler was originally written by [Niklas Schandry](https://github.com/nschan), of the Faculty of Biology of the Ludwig-Maximilians University (LMU) in Munich, Germany.\n\nI thank the following people for their extensive assistance and constructive reviews during the development of this pipeline:\n\n- [Mahesh Binzer-Panchal](https://github.com/mahesh-panchal)\n- [Matthias H\u00f6rtenhuber](https://github.com/mashehu)\n- [Louis Le N\u00e9zet](https://github.com/LouisLeNezet)\n- [J\u00falia Mir Pedrol](https://github.com/mirpedrol)\n- [Daniel Straub](https://github.com/d4straub)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#genomeassembler` channel](https://nfcore.slack.com/channels/genomeassembler) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/genomeassembler for your analysis, please cite it using the following doi: [10.5281/zenodo.14986998](https://doi.org/10.5281/zenodo.14986998)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" @@ -105,7 +105,7 @@ }, "mentions": [ { - "@id": "#c5ce6267-6257-4d17-b43b-adb516de81e9" + "@id": "#76569809-bfd0-4c21-9633-985c5289977e" } ], "name": "nf-core/genomeassembler" @@ -144,19 +144,37 @@ } ], "dateCreated": "", - "dateModified": "2025-04-30T12:26:07Z", + "dateModified": "2025-05-08T13:11:40Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", - "keywords": ["nf-core", "nextflow", "genome-assembly"], - "license": ["MIT"], - "name": ["nf-core/genomeassembler"], + "keywords": [ + "nf-core", + "nextflow", + "genome-assembly" + ], + "license": [ + "MIT" + ], + "maintainer": [ + { + "@id": "https://orcid.org/0000-0003-3099-7860" + } + ], + "name": [ + "nf-core/genomeassembler" + ], "programmingLanguage": { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow" }, "sdPublisher": { "@id": "https://nf-co.re/" }, - "url": ["https://github.com/nf-core/genomeassembler", "https://nf-co.re/genomeassembler/dev/"], - "version": ["1.1.0dev"] + "url": [ + "https://github.com/nf-core/genomeassembler", + "https://nf-co.re/genomeassembler/1.1.0/" + ], + "version": [ + "1.1.0" + ] }, { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow", @@ -171,11 +189,11 @@ "version": "!>=24.04.2" }, { - "@id": "#c5ce6267-6257-4d17-b43b-adb516de81e9", + "@id": "#76569809-bfd0-4c21-9633-985c5289977e", "@type": "TestSuite", "instance": [ { - "@id": "#788bf1cf-1341-4537-9ac5-0b09c41ca3da" + "@id": "#6dc2c760-4b37-499d-8854-cbbf1a482d14" } ], "mainEntity": { @@ -184,7 +202,7 @@ "name": "Test suite for nf-core/genomeassembler" }, { - "@id": "#788bf1cf-1341-4537-9ac5-0b09c41ca3da", + "@id": "#6dc2c760-4b37-499d-8854-cbbf1a482d14", "@type": "TestInstance", "name": "GitHub Actions workflow for testing nf-core/genomeassembler", "resource": "repos/nf-core/genomeassembler/actions/workflows/ci.yml", From 0a3b4b75e0db6a578bd9063d6edbfd1da0f6be46 Mon Sep 17 00:00:00 2001 From: Niklas Schandry Date: Wed, 14 May 2025 15:13:53 +0200 Subject: [PATCH 22/34] Docs update (#157) * update usage --- docs/usage.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index bfe3eeed..6a2a8eb9 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -39,7 +39,7 @@ The largest samplesheet format is: ```csv title="samplesheet.csv" sample,ontreads,hifireads,ref_fasta,ref_gff,shortread_F,shortread_R,paired -Sample1,sample1ont.fq.gz,sample1hifi.fq.gz,ref.fa,ref.gff,sample1_r1.fq.gz,sample1_r2.fq.gz,true +Sample1,/path/reads/sample1ont.fq.gz,/path/reads/sample1hifi.fq.gz,/path/references/ref.fa,/path/references/ref.gff,/path/reads/sample1_r1.fq.gz,/path/reads/sample1_r2.fq.gz,true ``` The samplesheet _must_ contain a column name `sample` [string]. @@ -55,6 +55,9 @@ Further columns _can_ be: - `shortread_R`: shortread reverse file (paired end) - `paired`: [true/false] true if the reads are paired end, false if they are single-end. The `shortreads_R` column should exist if `paired` is `false` but can be empty. +> [!INFO] +> It is strongly recommended to provide all paths as absolute paths + ### Multiple runs of the same sample For ONT reads, a glob pattern can be provided, matching files will be concatenated into a single file if `--collect` is used. Generally we recommend to provide all reads in a single file. From 81eba874435ee3c2a7accd4394b4f3d7a5c6672b Mon Sep 17 00:00:00 2001 From: Niklas Schandry Date: Thu, 15 May 2025 11:39:31 +0200 Subject: [PATCH 23/34] Report update (#158) * include tabular quast * update report env * add tables for busco * update changelog --- CHANGELOG.md | 2 ++ assets/report/report.qmd | 38 +++++++++++++++++++++++++--- modules/local/report/environment.yml | 6 +++-- modules/local/report/main.nf | 4 +-- 4 files changed, 43 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7fe24abd..f3774764 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 [#144](https://github.com/nf-core/genomeassembler/issues/144) - Added `hifiasm_on_hifiasm` assembly strategy +[#158](https://github.com/nf-core/genomeassembler/pull/158) - Added tables for QUAST and BUSCO to report, (using `gt`, added `gt` to container and env) + ### `Fixed` [#154](https://github.com/nf-core/genomeassembler/pull/154) - Module maintainance: diff --git a/assets/report/report.qmd b/assets/report/report.qmd index 1373b025..16cb578c 100644 --- a/assets/report/report.qmd +++ b/assets/report/report.qmd @@ -1,6 +1,6 @@ --- title: "nf-core/genomeassembler report" -author: "Niklas Schandry" +author: "" format: dashboard editor: source nav-buttons: @@ -316,7 +316,31 @@ for (i in 1:length(unique(quast_stats$sample))) { paste0('\n'), paste0(':::'), paste0('\n\n'), - paste0('### Plots \n\n'), + paste0('### {.tabset}'), + paste0('\n\n'), + paste0('#### Tables \n\n'), + quast_stats %>% + filter(sample == unique(quast_stats$sample)[i]) %>% + dplyr::select(sample, stage, stat, value) %>% + pivot_wider(names_from = "stat", values_from = "value") %>% + #knitr::kable(format = 'html', caption = glue::glue('QUAST statistics')) + gt::gt() %>% + gt::cols_nanoplot(columns = starts_with("# contigs ("), + new_col_name = "Contigs_by_size", + new_col_label = gt::md("*# Contigs by size*")) %>% + gt::cols_nanoplot(columns = starts_with("Total length ("), + new_col_name = "Total_length", + new_col_label = gt::md("*Total length*")) %>% + gt::tab_footnote( + footnote = "Breaks are: contigs >= 0, 1kb, 5kb, 10kb, 25kb, 50kb", + locations = gt::cells_column_labels(columns = c(Contigs_by_size, Total_length))) %>% + gt::cols_align(align = "center", + columns = c(Contigs_by_size, Total_length)) %>% + gt::cols_move(Contigs_by_size, "Largest contig") %>% + gt::cols_move(Total_length, "Total length") %>% + gt::as_raw_html(), + paste0('\n\n'), + paste0('#### Plots \n\n'), knitr::knit_child(glue::glue('quast_files/length/_{ unique(quast_stats$sample)[i] }_quast.Rmd'), envir = globalenv(), quiet = TRUE), @@ -464,7 +488,15 @@ for (i in 1:length(unique(busco_reports$sample))) { paste0('\n'), paste0(':::'), paste0('\n\n'), - paste('### { .flow }'), + paste('###'), + paste0('\n\n'), + busco_reports %>% + filter(sample == cur_sample) %>% + dplyr::select(sample, stage, Var, value) %>% + mutate(Var = str_replace_all(Var, "_", " ")) %>% + pivot_wider(names_from = "Var", values_from = "value") %>% + gt::gt() %>% + gt::as_raw_html(), paste0('\n\n'), knitr::knit_child(glue::glue('busco_files/orthologs/_{ unique(busco_reports$sample)[i] }_orthologs.Rmd'), envir = globalenv(), diff --git a/modules/local/report/environment.yml b/modules/local/report/environment.yml index 0cc7b3a4..9f3973ea 100644 --- a/modules/local/report/environment.yml +++ b/modules/local/report/environment.yml @@ -2,11 +2,13 @@ channels: - conda-forge - bioconda dependencies: - - conda-forge::quarto=1.5.57 + - conda-forge::quarto=1.7.31 + - conda-forge::r-gt=1.0.0 - conda-forge::r-plotly=4.10.4 - conda-forge::r-quarto=1.4.4 - conda-forge::r-tidyverse=2.0.0 - pip - pip: - - quarto-cli==1.5.56 + - pip==25.1.1 + - quarto-cli==1.7.31 - quarto==0.1.0 diff --git a/modules/local/report/main.nf b/modules/local/report/main.nf index 6995f7de..67194df9 100644 --- a/modules/local/report/main.nf +++ b/modules/local/report/main.nf @@ -3,8 +3,8 @@ process REPORT { label 'process_low' conda "${moduleDir}/environment.yml" container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/90/90fef3481ee574cada2330e149cebab7290724e6b9869d84af66d0fc3d04168e/data' - : 'community.wave.seqera.io/library/quarto_r-plotly_r-quarto_r-tidyjson_pruned:2712f84a83ca0d9a'}" + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/76/76fb9a394fb9f3cb587f6dc7f3b3b3830e0dad350ea198dd244dff63cf1f88e2/data' + : 'community.wave.seqera.io/library/quarto_r-gt_r-plotly_r-quarto_pruned:10c4e357d0099e14'}" input: path qmdir_files, stageAs: "*" From 2ff59f6e45856f44b26ab58b4cef60e27db11f6f Mon Sep 17 00:00:00 2001 From: Niklas Schandry Date: Mon, 19 May 2025 15:28:45 +0200 Subject: [PATCH 24/34] update report containers (#161) --- modules/local/report/environment.yml | 2 +- modules/local/report/main.nf | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/modules/local/report/environment.yml b/modules/local/report/environment.yml index 9f3973ea..ff0aceb2 100644 --- a/modules/local/report/environment.yml +++ b/modules/local/report/environment.yml @@ -6,9 +6,9 @@ dependencies: - conda-forge::r-gt=1.0.0 - conda-forge::r-plotly=4.10.4 - conda-forge::r-quarto=1.4.4 + - conda-forge::r-tidyjson=0.3.2 - conda-forge::r-tidyverse=2.0.0 - pip - pip: - - pip==25.1.1 - quarto-cli==1.7.31 - quarto==0.1.0 diff --git a/modules/local/report/main.nf b/modules/local/report/main.nf index 67194df9..85ba0a14 100644 --- a/modules/local/report/main.nf +++ b/modules/local/report/main.nf @@ -3,9 +3,12 @@ process REPORT { label 'process_low' conda "${moduleDir}/environment.yml" container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/76/76fb9a394fb9f3cb587f6dc7f3b3b3830e0dad350ea198dd244dff63cf1f88e2/data' - : 'community.wave.seqera.io/library/quarto_r-gt_r-plotly_r-quarto_pruned:10c4e357d0099e14'}" - + ? 'community.wave.seqera.io/library/quarto_r-gt_r-plotly_r-quarto_pruned:6e20dd9b9b77f359' + : 'community.wave.seqera.io/library/quarto_r-gt_r-plotly_r-quarto_pruned:be4a8863b7b76cf7'}" + /* wave builds: + https://wave.seqera.io/view/builds/bd-6e20dd9b9b77f359_1 singularity + https://wave.seqera.io/view/builds/bd-be4a8863b7b76cf7_1 docker + */ input: path qmdir_files, stageAs: "*" path funct_files, stageAs: "functions/*" From 841bdffbdbe34985c2fa6a39fff156a514f1b515 Mon Sep 17 00:00:00 2001 From: Niklas Schandry Date: Thu, 22 May 2025 16:28:18 +0200 Subject: [PATCH 25/34] add prefix to singularity container for report (#162) --- modules/local/report/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/report/main.nf b/modules/local/report/main.nf index 85ba0a14..a519ed29 100644 --- a/modules/local/report/main.nf +++ b/modules/local/report/main.nf @@ -3,7 +3,7 @@ process REPORT { label 'process_low' conda "${moduleDir}/environment.yml" container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'community.wave.seqera.io/library/quarto_r-gt_r-plotly_r-quarto_pruned:6e20dd9b9b77f359' + ? 'https://community.wave.seqera.io/library/quarto_r-gt_r-plotly_r-quarto_pruned:6e20dd9b9b77f359' : 'community.wave.seqera.io/library/quarto_r-gt_r-plotly_r-quarto_pruned:be4a8863b7b76cf7'}" /* wave builds: https://wave.seqera.io/view/builds/bd-6e20dd9b9b77f359_1 singularity From 342e0f6154cce7ad199a5f6da3b99dc929431536 Mon Sep 17 00:00:00 2001 From: Niklas Schandry Date: Mon, 2 Jun 2025 13:15:43 +0200 Subject: [PATCH 26/34] Report: singularity image link (#163) * correct singularity https link --- modules/local/report/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/report/main.nf b/modules/local/report/main.nf index a519ed29..841e61b4 100644 --- a/modules/local/report/main.nf +++ b/modules/local/report/main.nf @@ -3,7 +3,7 @@ process REPORT { label 'process_low' conda "${moduleDir}/environment.yml" container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://community.wave.seqera.io/library/quarto_r-gt_r-plotly_r-quarto_pruned:6e20dd9b9b77f359' + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/89/8967e1cb830fdc77ec5b84541a50c74a0a05eaaae557314490809de2fc91e4af/data' : 'community.wave.seqera.io/library/quarto_r-gt_r-plotly_r-quarto_pruned:be4a8863b7b76cf7'}" /* wave builds: https://wave.seqera.io/view/builds/bd-6e20dd9b9b77f359_1 singularity From c396932d19f3a6bc4ecfcbd73dfd1560abaceff5 Mon Sep 17 00:00:00 2001 From: Niklas Schandry Date: Thu, 26 Jun 2025 14:02:37 +0200 Subject: [PATCH 27/34] Improve reference input check (#166) * add prefix to singularity container for report * add files exist check for references, closes #165 --- .../local/utils_nfcore_genomeassembler_pipeline/main.nf | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/utils_nfcore_genomeassembler_pipeline/main.nf b/subworkflows/local/utils_nfcore_genomeassembler_pipeline/main.nf index 580914ca..8acd6fab 100644 --- a/subworkflows/local/utils_nfcore_genomeassembler_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_genomeassembler_pipeline/main.nf @@ -73,9 +73,13 @@ workflow PIPELINE_INITIALISATION { .set { ch_samplesheet } if (params.use_ref) { ch_samplesheet - .map { it -> [it.meta, it.ref_fasta] } + .map { it -> [it.meta, file(it.ref_fasta, checkIfExists: true)] } .set { ch_refs } } + if (params.lift_annotations) { + ch_samplesheet + .map { it -> [it.meta, file(it.ref_gff, checkIfExists: true)] } + } // check for assembler / read combination def hifi_only = params.hifi && !params.ont ? true : false if (!params.skip_assembly) { @@ -102,6 +106,7 @@ workflow PIPELINE_INITIALISATION { } } + emit: samplesheet = ch_samplesheet refs = ch_refs From bc85ab46030d853f0fad04fe28687abd5282f288 Mon Sep 17 00:00:00 2001 From: nf-core bot Date: Tue, 1 Jul 2025 09:36:28 +0200 Subject: [PATCH 28/34] Important! Template update for nf-core/tools v3.3.1 (#164) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Template update for nf-core/tools version 3.2.1 * Template update for nf-core/tools version 3.3.1 * merge template 3.3.1 - fix linting * update pre-commit * merge template 3.3.1 - fix linting * pre-commit config? * pre-commit config? * reinstall links * try larger runner * smaller run, disable bloom filter for hifiasm test * updated test snapshot * updated test snapshot * update nftignore * update nftignore * update nftignore * update nftignore * update nftignore * update nftignore * update nftignore * update nftignore * update nftignore * Update .github/actions/nf-test/action.yml Co-authored-by: Matthias Hörtenhuber * Update docs/output.md Co-authored-by: Matthias Hörtenhuber * remove .nf-test.log --------- Co-authored-by: Niklas Schandry Co-authored-by: Matthias Hörtenhuber --- .editorconfig | 37 ----- .github/CONTRIBUTING.md | 3 +- .github/actions/get-shards/action.yml | 69 +++++++++ .github/actions/nf-test/action.yml | 111 ++++++++++++++ .github/workflows/awsfulltest.yml | 4 +- .github/workflows/awstest.yml | 2 +- .github/workflows/clean-up.yml | 2 +- .github/workflows/download_pipeline.yml | 20 +-- .../{fix-linting.yml => fix_linting.yml} | 4 +- .github/workflows/linting.yml | 15 +- .github/workflows/linting_comment.yml | 4 +- .github/workflows/nf-test.yml | 140 ++++++++++++++++++ .github/workflows/release-announcements.yml | 2 +- ...mment.yml => template-version-comment.yml} | 2 +- .gitignore | 2 + .nf-core.yml | 2 +- .pre-commit-config.yaml | 28 +++- .prettierignore | 2 + .prettierrc.yml | 5 + CHANGELOG.md | 2 + README.md | 5 +- conf/base.config | 7 +- conf/test.config | 1 + modules.json | 2 +- nextflow.config | 22 ++- nf-test.config | 24 +++ ro-crate-metadata.json | 21 +-- tests/.nftignore | 6 + tests/default.nf.test | 35 +++++ tests/default.nf.test.snap | 89 +++++++++++ tests/nextflow.config | 12 ++ 31 files changed, 586 insertions(+), 94 deletions(-) delete mode 100644 .editorconfig create mode 100644 .github/actions/get-shards/action.yml create mode 100644 .github/actions/nf-test/action.yml rename .github/workflows/{fix-linting.yml => fix_linting.yml} (96%) create mode 100644 .github/workflows/nf-test.yml rename .github/workflows/{template_version_comment.yml => template-version-comment.yml} (95%) create mode 100644 nf-test.config create mode 100644 tests/.nftignore create mode 100644 tests/default.nf.test create mode 100644 tests/default.nf.test.snap create mode 100644 tests/nextflow.config diff --git a/.editorconfig b/.editorconfig deleted file mode 100644 index 6d9b74cc..00000000 --- a/.editorconfig +++ /dev/null @@ -1,37 +0,0 @@ -root = true - -[*] -charset = utf-8 -end_of_line = lf -insert_final_newline = true -trim_trailing_whitespace = true -indent_size = 4 -indent_style = space - -[*.{md,yml,yaml,html,css,scss,js}] -indent_size = 2 - -# These files are edited and tested upstream in nf-core/modules -[/modules/nf-core/**] -charset = unset -end_of_line = unset -insert_final_newline = unset -trim_trailing_whitespace = unset -indent_style = unset -[/subworkflows/nf-core/**] -charset = unset -end_of_line = unset -insert_final_newline = unset -trim_trailing_whitespace = unset -indent_style = unset - -[/assets/email*] -indent_size = unset - -# ignore python and markdown -[*.{py,md}] -indent_style = unset - -# ignore ro-crate metadata files -[**/ro-crate-metadata.json] -insert_final_newline = unset diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index c2103669..1f9ed4d6 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -78,7 +78,8 @@ If you wish to contribute a new step, please use the following coding standards: 5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core pipelines schema build` tool). 6. Add sanity checks and validation for all relevant parameters. 7. Perform local tests to validate that the new code works as expected. -8. If applicable, add a new test command in `.github/workflow/ci.yml`. +8. If applicable, add a new test in the `tests` directory. +9. Add a description of the output files and if relevant any appropriate images from the MultiQC report to `docs/output.md`. ### Default values diff --git a/.github/actions/get-shards/action.yml b/.github/actions/get-shards/action.yml new file mode 100644 index 00000000..34085279 --- /dev/null +++ b/.github/actions/get-shards/action.yml @@ -0,0 +1,69 @@ +name: "Get number of shards" +description: "Get the number of nf-test shards for the current CI job" +inputs: + max_shards: + description: "Maximum number of shards allowed" + required: true + paths: + description: "Component paths to test" + required: false + tags: + description: "Tags to pass as argument for nf-test --tag parameter" + required: false +outputs: + shard: + description: "Array of shard numbers" + value: ${{ steps.shards.outputs.shard }} + total_shards: + description: "Total number of shards" + value: ${{ steps.shards.outputs.total_shards }} +runs: + using: "composite" + steps: + - name: Install nf-test + uses: nf-core/setup-nf-test@v1 + with: + version: ${{ env.NFT_VER }} + - name: Get number of shards + id: shards + shell: bash + run: | + # Run nf-test with dynamic parameter + nftest_output=$(nf-test test \ + --profile +docker \ + $(if [ -n "${{ inputs.tags }}" ]; then echo "--tag ${{ inputs.tags }}"; fi) \ + --dry-run \ + --ci \ + --changed-since HEAD^) || { + echo "nf-test command failed with exit code $?" + echo "Full output: $nftest_output" + exit 1 + } + echo "nf-test dry-run output: $nftest_output" + + # Default values for shard and total_shards + shard="[]" + total_shards=0 + + # Check if there are related tests + if echo "$nftest_output" | grep -q 'No tests to execute'; then + echo "No related tests found." + else + # Extract the number of related tests + number_of_shards=$(echo "$nftest_output" | sed -n 's|.*Executed \([0-9]*\) tests.*|\1|p') + if [[ -n "$number_of_shards" && "$number_of_shards" -gt 0 ]]; then + shards_to_run=$(( $number_of_shards < ${{ inputs.max_shards }} ? $number_of_shards : ${{ inputs.max_shards }} )) + shard=$(seq 1 "$shards_to_run" | jq -R . | jq -c -s .) + total_shards="$shards_to_run" + else + echo "Unexpected output format. Falling back to default values." + fi + fi + + # Write to GitHub Actions outputs + echo "shard=$shard" >> $GITHUB_OUTPUT + echo "total_shards=$total_shards" >> $GITHUB_OUTPUT + + # Debugging output + echo "Final shard array: $shard" + echo "Total number of shards: $total_shards" diff --git a/.github/actions/nf-test/action.yml b/.github/actions/nf-test/action.yml new file mode 100644 index 00000000..ddfac57b --- /dev/null +++ b/.github/actions/nf-test/action.yml @@ -0,0 +1,111 @@ +name: "nf-test Action" +description: "Runs nf-test with common setup steps" +inputs: + profile: + description: "Profile to use" + required: true + shard: + description: "Shard number for this CI job" + required: true + total_shards: + description: "Total number of test shards(NOT the total number of matrix jobs)" + required: true + paths: + description: "Test paths" + required: true + tags: + description: "Tags to pass as argument for nf-test --tag parameter" + required: false +runs: + using: "composite" + steps: + - name: Setup Nextflow + uses: nf-core/setup-nextflow@v2 + with: + version: "${{ env.NXF_VERSION }}" + + - name: Set up Python + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + with: + python-version: "3.13" + + - name: Install nf-test + uses: nf-core/setup-nf-test@v1 + with: + version: "${{ env.NFT_VER }}" + install-pdiff: true + + - name: Setup apptainer + if: contains(inputs.profile, 'singularity') + uses: eWaterCycle/setup-apptainer@main + + - name: Set up Singularity + if: contains(inputs.profile, 'singularity') + shell: bash + run: | + mkdir -p $NXF_SINGULARITY_CACHEDIR + mkdir -p $NXF_SINGULARITY_LIBRARYDIR + + - name: Conda setup + if: contains(inputs.profile, 'conda') + uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3 + with: + auto-update-conda: true + conda-solver: libmamba + conda-remove-defaults: true + + # TODO Skip failing conda tests and document their failures + # https://github.com/nf-core/modules/issues/7017 + - name: Run nf-test + shell: bash + env: + NFT_WORKDIR: ${{ env.NFT_WORKDIR }} + run: | + nf-test test \ + --profile=+${{ inputs.profile }} \ + $(if [ -n "${{ inputs.tags }}" ]; then echo "--tag ${{ inputs.tags }}"; fi) \ + --ci \ + --changed-since HEAD^ \ + --verbose \ + --tap=test.tap \ + --shard ${{ inputs.shard }}/${{ inputs.total_shards }} + + # Save the absolute path of the test.tap file to the output + echo "tap_file_path=$(realpath test.tap)" >> $GITHUB_OUTPUT + + - name: Generate test summary + if: always() + shell: bash + run: | + # Add header if it doesn't exist (using a token file to track this) + if [ ! -f ".summary_header" ]; then + echo "# 🚀 nf-test results" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "| Status | Test Name | Profile | Shard |" >> $GITHUB_STEP_SUMMARY + echo "|:------:|-----------|---------|-------|" >> $GITHUB_STEP_SUMMARY + touch .summary_header + fi + + if [ -f test.tap ]; then + while IFS= read -r line; do + if [[ $line =~ ^ok ]]; then + test_name="${line#ok }" + # Remove the test number from the beginning + test_name="${test_name#* }" + echo "| ✅ | ${test_name} | ${{ inputs.profile }} | ${{ inputs.shard }}/${{ inputs.total_shards }} |" >> $GITHUB_STEP_SUMMARY + elif [[ $line =~ ^not\ ok ]]; then + test_name="${line#not ok }" + # Remove the test number from the beginning + test_name="${test_name#* }" + echo "| ❌ | ${test_name} | ${{ inputs.profile }} | ${{ inputs.shard }}/${{ inputs.total_shards }} |" >> $GITHUB_STEP_SUMMARY + fi + done < test.tap + else + echo "| ⚠️ | No test results found | ${{ inputs.profile }} | ${{ inputs.shard }}/${{ inputs.total_shards }} |" >> $GITHUB_STEP_SUMMARY + fi + + - name: Clean up + if: always() + shell: bash + run: | + sudo rm -rf /home/ubuntu/tests/ diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 8f68e19e..998de5b9 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -14,7 +14,7 @@ jobs: run-platform: name: Run AWS full tests # run only if the PR is approved by at least 2 reviewers and against the master/main branch or manually triggered - if: github.repository == 'nf-core/genomeassembler' && github.event.review.state == 'approved' && (github.event.pull_request.base.ref == 'master' || github.event.pull_request.base.ref == 'main') || github.event_name == 'workflow_dispatch' + if: github.repository == 'nf-core/genomeassembler' && github.event.review.state == 'approved' && (github.event.pull_request.base.ref == 'master' || github.event.pull_request.base.ref == 'main') || github.event_name == 'workflow_dispatch' || github.event_name == 'release' runs-on: ubuntu-latest steps: - name: Set revision variable @@ -40,7 +40,7 @@ jobs: } profiles: test_full - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: Seqera Platform debug log file path: | diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 1e25cad0..9e625149 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -25,7 +25,7 @@ jobs: } profiles: test - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: Seqera Platform debug log file path: | diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml index 0b6b1f27..ac030fd5 100644 --- a/.github/workflows/clean-up.yml +++ b/.github/workflows/clean-up.yml @@ -10,7 +10,7 @@ jobs: issues: write pull-requests: write steps: - - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 + - uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9 with: stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml index ab06316e..999bcc38 100644 --- a/.github/workflows/download_pipeline.yml +++ b/.github/workflows/download_pipeline.yml @@ -12,14 +12,6 @@ on: required: true default: "dev" pull_request: - types: - - opened - - edited - - synchronize - branches: - - main - - master - pull_request_target: branches: - main - master @@ -52,9 +44,9 @@ jobs: - name: Disk space cleanup uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: - python-version: "3.12" + python-version: "3.13" architecture: "x64" - name: Setup Apptainer @@ -120,6 +112,7 @@ jobs: echo "IMAGE_COUNT_AFTER=$image_count" >> "$GITHUB_OUTPUT" - name: Compare container image counts + id: count_comparison run: | if [ "${{ steps.count_initial.outputs.IMAGE_COUNT_INITIAL }}" -ne "${{ steps.count_afterwards.outputs.IMAGE_COUNT_AFTER }}" ]; then initial_count=${{ steps.count_initial.outputs.IMAGE_COUNT_INITIAL }} @@ -132,3 +125,10 @@ jobs: else echo "The pipeline can be downloaded successfully!" fi + + - name: Upload Nextflow logfile for debugging purposes + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + with: + name: nextflow_logfile.txt + path: .nextflow.log* + include-hidden-files: true diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix_linting.yml similarity index 96% rename from .github/workflows/fix-linting.yml rename to .github/workflows/fix_linting.yml index aff1a510..52d91ad3 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix_linting.yml @@ -32,9 +32,9 @@ jobs: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} # Install and run pre-commit - - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: - python-version: "3.12" + python-version: "3.13" - name: Install pre-commit run: pip install pre-commit diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index dbd52d5a..f2d7d1dd 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -3,9 +3,6 @@ name: nf-core linting # It runs the `nf-core pipelines lint` and markdown lint tests to ensure # that the code meets the nf-core guidelines. on: - push: - branches: - - dev pull_request: release: types: [published] @@ -17,9 +14,9 @@ jobs: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - name: Set up Python 3.12 - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: - python-version: "3.12" + python-version: "3.13" - name: Install pre-commit run: pip install pre-commit @@ -36,13 +33,13 @@ jobs: - name: Install Nextflow uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: - python-version: "3.12" + python-version: "3.13" architecture: "x64" - name: read .nf-core.yml - uses: pietrobolcato/action-read-yaml@1.1.0 + uses: pietrobolcato/action-read-yaml@9f13718d61111b69f30ab4ac683e67a56d254e1d # 1.1.0 id: read_yml with: config: ${{ github.workspace }}/.nf-core.yml @@ -74,7 +71,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 95b6b6af..7e8050fb 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@20319c5641d495c8a52e688b7dc5fada6c3a9fbc # v8 + uses: dawidd6/action-download-artifact@4c1e823582f43b179e2cbb49c3eade4e41f992e2 # v10 with: workflow: linting.yml workflow_conclusion: completed @@ -21,7 +21,7 @@ jobs: run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 + uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml new file mode 100644 index 00000000..c5d931da --- /dev/null +++ b/.github/workflows/nf-test.yml @@ -0,0 +1,140 @@ +name: Run nf-test +on: + push: + paths-ignore: + - "docs/**" + - "**/meta.yml" + - "**/*.md" + - "**/*.png" + - "**/*.svg" + pull_request: + paths-ignore: + - "docs/**" + - "**/meta.yml" + - "**/*.md" + - "**/*.png" + - "**/*.svg" + release: + types: [published] + workflow_dispatch: + +# Cancel if a newer run is started +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + NFT_VER: "0.9.2" + NFT_WORKDIR: "~" + NXF_ANSI_LOG: false + NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity + NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity + +jobs: + nf-test-changes: + name: nf-test-changes + runs-on: # use self-hosted runners + - runs-on=$-nf-test-changes + - runner4cpu-linux-x64 + outputs: + shard: ${{ steps.set-shards.outputs.shard }} + total_shards: ${{ steps.set-shards.outputs.total_shards }} + steps: + - name: Clean Workspace # Purge the workspace in case it's running on a self-hosted runner + run: | + ls -la ./ + rm -rf ./* || true + rm -rf ./.??* || true + ls -la ./ + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + with: + fetch-depth: 0 + + - name: get number of shards + id: set-shards + uses: ./.github/actions/get-shards + env: + NFT_VER: ${{ env.NFT_VER }} + with: + max_shards: 7 + + - name: debug + run: | + echo ${{ steps.set-shards.outputs.shard }} + echo ${{ steps.set-shards.outputs.total_shards }} + + nf-test: + name: "${{ matrix.profile }} | ${{ matrix.NXF_VER }} | ${{ matrix.shard }}/${{ needs.nf-test-changes.outputs.total_shards }}" + needs: [nf-test-changes] + if: ${{ needs.nf-test-changes.outputs.total_shards != '0' }} + runs-on: # use self-hosted runners + - runs-on=$-nf-test + - runner=4cpu-linux-x64 + strategy: + fail-fast: false + matrix: + shard: ${{ fromJson(needs.nf-test-changes.outputs.shard) }} + profile: [conda, docker, singularity] + isMain: + - ${{ github.base_ref == 'master' || github.base_ref == 'main' }} + # Exclude conda and singularity on dev + exclude: + - isMain: false + profile: "conda" + - isMain: false + profile: "singularity" + NXF_VER: + - "24.04.2" + - "latest-everything" + env: + NXF_ANSI_LOG: false + TOTAL_SHARDS: ${{ needs.nf-test-changes.outputs.total_shards }} + + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + with: + fetch-depth: 0 + + - name: Run nf-test + uses: ./.github/actions/nf-test + env: + NFT_WORKDIR: ${{ env.NFT_WORKDIR }} + with: + profile: ${{ matrix.profile }} + shard: ${{ matrix.shard }} + total_shards: ${{ env.TOTAL_SHARDS }} + confirm-pass: + needs: [nf-test] + if: always() + runs-on: # use self-hosted runners + - runs-on=$-confirm-pass + - runner=2cpu-linux-x64 + steps: + - name: One or more tests failed + if: ${{ contains(needs.*.result, 'failure') }} + run: exit 1 + + - name: One or more tests cancelled + if: ${{ contains(needs.*.result, 'cancelled') }} + run: exit 1 + + - name: All tests ok + if: ${{ contains(needs.*.result, 'success') }} + run: exit 0 + + - name: debug-print + if: always() + run: | + echo "::group::DEBUG: `needs` Contents" + echo "DEBUG: toJSON(needs) = ${{ toJSON(needs) }}" + echo "DEBUG: toJSON(needs.*.result) = ${{ toJSON(needs.*.result) }}" + echo "::endgroup::" + + - name: Clean Workspace # Purge the workspace in case it's running on a self-hosted runner + if: always() + run: | + ls -la ./ + rm -rf ./* || true + rm -rf ./.??* || true + ls -la ./ diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml index 76a9e67e..4abaf484 100644 --- a/.github/workflows/release-announcements.yml +++ b/.github/workflows/release-announcements.yml @@ -30,7 +30,7 @@ jobs: bsky-post: runs-on: ubuntu-latest steps: - - uses: zentered/bluesky-post-action@80dbe0a7697de18c15ad22f4619919ceb5ccf597 # v0.1.0 + - uses: zentered/bluesky-post-action@4aa83560bb3eac05dbad1e5f221ee339118abdd2 # v0.2.0 with: post: | Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! diff --git a/.github/workflows/template_version_comment.yml b/.github/workflows/template-version-comment.yml similarity index 95% rename from .github/workflows/template_version_comment.yml rename to .github/workflows/template-version-comment.yml index 537529bc..beb5c77f 100644 --- a/.github/workflows/template_version_comment.yml +++ b/.github/workflows/template-version-comment.yml @@ -14,7 +14,7 @@ jobs: ref: ${{ github.event.pull_request.head.sha }} - name: Read template version from .nf-core.yml - uses: nichmor/minimal-read-yaml@v0.0.2 + uses: nichmor/minimal-read-yaml@1f7205277e25e156e1f63815781db80a6d490b8f # v0.0.2 id: read_yml with: config: ${{ github.workspace }}/.nf-core.yml diff --git a/.gitignore b/.gitignore index a42ce016..f232546a 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,5 @@ testing/ testing* *.pyc null/ +.nf-test/ +.nf-test.log diff --git a/.nf-core.yml b/.nf-core.yml index e4c21935..14b85bf0 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -7,7 +7,7 @@ lint: - assets/multiqc_config.yml files_unchanged: - .github/CONTRIBUTING.md -nf_core_version: 3.2.1 +nf_core_version: 3.3.1 repository_type: pipeline template: author: Niklas Schandry diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1dec8650..4c219f64 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,10 +4,26 @@ repos: hooks: - id: prettier additional_dependencies: - - prettier@3.2.5 - - - repo: https://github.com/editorconfig-checker/editorconfig-checker.python - rev: "3.1.2" + - prettier@3.5.0 + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 hooks: - - id: editorconfig-checker - alias: ec + - id: trailing-whitespace + args: [--markdown-linebreak-ext=md] + exclude: | + (?x)^( + .*ro-crate-metadata.json$| + modules/nf-core/.*| + subworkflows/nf-core/.*| + .*\.snap$| + .*svg$ + )$ + - id: end-of-file-fixer + exclude: | + (?x)^( + .*ro-crate-metadata.json$| + modules/nf-core/.*| + subworkflows/nf-core/.*| + .*\.snap$| + .*svg$ + )$ diff --git a/.prettierignore b/.prettierignore index edd29f01..105b03c5 100644 --- a/.prettierignore +++ b/.prettierignore @@ -11,3 +11,5 @@ testing* *.pyc bin/ ro-crate-metadata.json +*.svg +tests/ diff --git a/.prettierrc.yml b/.prettierrc.yml index c81f9a76..07dbd8bb 100644 --- a/.prettierrc.yml +++ b/.prettierrc.yml @@ -1 +1,6 @@ printWidth: 120 +tabWidth: 4 +overrides: + - files: "*.{md,yml,yaml,html,css,scss,js,cff}" + options: + tabWidth: 2 diff --git a/CHANGELOG.md b/CHANGELOG.md index f3774764..6123d5da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +[#164](https://github.com/nf-core/genomeassembler/issues/164) - Switched to nf-core template 3.3.1 + [#153](https://github.com/nf-core/genomeassembler/issues/153) - Switched to nf-core template 3.2.1 [#144](https://github.com/nf-core/genomeassembler/issues/144) - Added `hifiasm_on_hifiasm` assembly strategy diff --git a/README.md b/README.md index 932cdde3..b01a4e21 100644 --- a/README.md +++ b/README.md @@ -9,13 +9,14 @@ [![GitHub Actions Linting Status](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/genomeassembler/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.14986998-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.14986998) [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.04.2-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/) +[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.1) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/genomeassembler) -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23genomeassembler-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/genomeassembler)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23genomeassembler-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/genomeassembler)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction diff --git a/conf/base.config b/conf/base.config index 508c751f..10a24838 100644 --- a/conf/base.config +++ b/conf/base.config @@ -14,8 +14,8 @@ process { memory = { 6.GB * task.attempt } time = { 4.h * task.attempt } - errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } - maxRetries = 3 // Increased number of retries + errorStrategy = { task.exitStatus in ((130..145) + 104 + 175) ? 'retry' : 'finish' } + maxRetries = 1 maxErrors = '-1' withLabel:process_single { @@ -54,4 +54,7 @@ process { errorStrategy = 'retry' maxRetries = 2 } + withLabel: process_gpu { + ext.use_gpu = { workflow.profile.contains('gpu') } + } } diff --git a/conf/test.config b/conf/test.config index 47a69de4..b8875c0f 100644 --- a/conf/test.config +++ b/conf/test.config @@ -35,4 +35,5 @@ params { hifi = true ont = true assembler = "flye_on_hifiasm" + hifiasm_args = "-f 0" } diff --git a/modules.json b/modules.json index f1341e7b..21003454 100644 --- a/modules.json +++ b/modules.json @@ -38,7 +38,7 @@ }, "links": { "branch": "master", - "git_sha": "d0fcd22f005a2508bc3dc9ee32c206acadc11b28", + "git_sha": "bd049fd0244ed914f2d10bed580b49fb44eba914", "installed_by": ["modules"] }, "merqury/merqury": { diff --git a/nextflow.config b/nextflow.config index 78947c2c..068f2858 100644 --- a/nextflow.config +++ b/nextflow.config @@ -205,7 +205,21 @@ profiles { executor.name = 'local' executor.cpus = 4 executor.memory = 8.GB + process { + resourceLimits = [ + memory: 8.GB, + cpus : 4, + time : 1.h + ] + } } + + gpu { + docker.runOptions = '-u $(id -u):$(id -g) --gpus all' + apptainer.runOptions = '--nv' + singularity.runOptions = '--nv' + } + test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } hifi_flye { includeConfig 'configs/hifi_flye.config' } // Hifi-reads with flye @@ -217,8 +231,12 @@ profiles { hifiont_hifiasm_on_hifiasm { includeConfig 'configs/hifi_ont_hifiasm_on_hifiasm.config' } // ont and hifi reads. ONT via hifiasm, Hifi via hifiasm, scaffold ONT on HiFi } -// Load nf-core custom profiles from different Institutions -includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" +// Load nf-core custom profiles from different institutions + +// If params.custom_config_base is set AND either the NXF_OFFLINE environment variable is not set or params.custom_config_base is a local path, the nfcore_custom.config file from the specified base path is included. +// Load nf-core/genomeassembler custom profiles from different institutions. +includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" + // Load nf-core/genomeassembler custom profiles from different institutions. includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/genomeassembler.config" : "/dev/null" diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 00000000..889df760 --- /dev/null +++ b/nf-test.config @@ -0,0 +1,24 @@ +config { + // location for all nf-test tests + testsDir "." + + // nf-test directory including temporary files for each test + workDir System.getenv("NFT_WORKDIR") ?: ".nf-test" + + // location of an optional nextflow.config file specific for executing tests + configFile "tests/nextflow.config" + + // ignore tests coming from the nf-core/modules repo + ignore 'modules/nf-core/**/*', 'subworkflows/nf-core/**/*' + + // run all test with defined profile(s) from the main nextflow.config + profile "test" + + // list of filenames or patterns that should be trigger a full test run + triggers 'nextflow.config', 'nf-test.config', 'conf/test.config', 'tests/nextflow.config', 'tests/.nftignore' + + // load the necessary plugins + plugins { + load "nft-utils@0.0.3" + } +} diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index e2d1702f..741020af 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -22,8 +22,8 @@ "@id": "./", "@type": "Dataset", "creativeWorkStatus": "Stable", - "datePublished": "2025-05-08T11:11:40+00:00", - "description": "

    \n \n \n \"nf-core/genomeassembler\"\n \n

    \n\n[![GitHub Actions CI Status](https://github.com/nf-core/genomeassembler/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/genomeassembler/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.14986998-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.14986998)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/genomeassembler)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23genomeassembler-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/genomeassembler)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/genomeassembler** is a bioinformatics pipeline that carries out genome assembly, polishing and scaffolding from long reads (ONT or pacbio). Assembly can be done via `flye` or `hifiasm`, polishing can be carried out with `medaka` (ONT), or `pilon` (requires short-reads), and scaffolding can be done using `LINKS`, `Longstitch`, or `RagTag` (if a reference is available). Quality control includes `BUSCO`, `QUAST` and `merqury` (requires short-reads).\nCurrently, this pipeline does not implement phasing of polyploid genomes or HiC scaffolding.\n\n\n \n \"nf-core/genomeassembler\"\n\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,ontreads,hifireads,ref_fasta,ref_gff,shortread_F,shortread_R,paired\nsampleName,ontreads.fa.gz,hifireads.fa.gz,assembly.fasta.gz,reference.fasta,reference.gff,short_F1.fastq,short_F2.fastq,true\n```\n\nEach row represents one genome to be assembled. `sample` should contain the name of the sample, `ontreads` should contain a path to ONT reads (fastq.gz), `hifireads` a path to HiFi reads (fastq.gz), `ref_fasta` and `ref_gff` contain reference genome fasta and annotations. `shortread_F` and `shortread_R` contain paths to short-read data, `paired` indicates if short-reads are paired. Columns can be omitted if they contain no data, with the exception of `shortread_R`, which needs to be present if `shortread_F` is there, even if it is empty.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/genomeassembler \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/genomeassembler/usage) and the [parameter documentation](https://nf-co.re/genomeassembler/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/genomeassembler/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/genomeassembler/output).\n\n## Credits\n\nnf-core/genomeassembler was originally written by [Niklas Schandry](https://github.com/nschan), of the Faculty of Biology of the Ludwig-Maximilians University (LMU) in Munich, Germany.\n\nI thank the following people for their extensive assistance and constructive reviews during the development of this pipeline:\n\n- [Mahesh Binzer-Panchal](https://github.com/mahesh-panchal)\n- [Matthias H\u00f6rtenhuber](https://github.com/mashehu)\n- [Louis Le N\u00e9zet](https://github.com/LouisLeNezet)\n- [J\u00falia Mir Pedrol](https://github.com/mirpedrol)\n- [Daniel Straub](https://github.com/d4straub)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#genomeassembler` channel](https://nfcore.slack.com/channels/genomeassembler) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/genomeassembler for your analysis, please cite it using the following doi: [10.5281/zenodo.14986998](https://doi.org/10.5281/zenodo.14986998)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "datePublished": "2025-06-03T11:01:26+00:00", + "description": "

    \n \n \n \"nf-core/genomeassembler\"\n \n

    \n\n[![GitHub Actions CI Status](https://github.com/nf-core/genomeassembler/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/genomeassembler/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.14986998-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.14986998)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.04.2-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/genomeassembler)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23genomeassembler-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/genomeassembler)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/genomeassembler** is a bioinformatics pipeline that carries out genome assembly, polishing and scaffolding from long reads (ONT or pacbio). Assembly can be done via `flye` or `hifiasm`, polishing can be carried out with `medaka` (ONT), or `pilon` (requires short-reads), and scaffolding can be done using `LINKS`, `Longstitch`, or `RagTag` (if a reference is available). Quality control includes `BUSCO`, `QUAST` and `merqury` (requires short-reads).\nCurrently, this pipeline does not implement phasing of polyploid genomes or HiC scaffolding.\n\n\n \n \"nf-core/genomeassembler\"\n\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,ontreads,hifireads,ref_fasta,ref_gff,shortread_F,shortread_R,paired\nsampleName,ontreads.fa.gz,hifireads.fa.gz,assembly.fasta.gz,reference.fasta,reference.gff,short_F1.fastq,short_F2.fastq,true\n```\n\nEach row represents one genome to be assembled. `sample` should contain the name of the sample, `ontreads` should contain a path to ONT reads (fastq.gz), `hifireads` a path to HiFi reads (fastq.gz), `ref_fasta` and `ref_gff` contain reference genome fasta and annotations. `shortread_F` and `shortread_R` contain paths to short-read data, `paired` indicates if short-reads are paired. Columns can be omitted if they contain no data, with the exception of `shortread_R`, which needs to be present if `shortread_F` is there, even if it is empty.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/genomeassembler \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/genomeassembler/usage) and the [parameter documentation](https://nf-co.re/genomeassembler/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/genomeassembler/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/genomeassembler/output).\n\n## Credits\n\nnf-core/genomeassembler was originally written by [Niklas Schandry](https://github.com/nschan), of the Faculty of Biology of the Ludwig-Maximilians University (LMU) in Munich, Germany.\n\nI thank the following people for their extensive assistance and constructive reviews during the development of this pipeline:\n\n- [Mahesh Binzer-Panchal](https://github.com/mahesh-panchal)\n- [Matthias H\u00f6rtenhuber](https://github.com/mashehu)\n- [Louis Le N\u00e9zet](https://github.com/LouisLeNezet)\n- [J\u00falia Mir Pedrol](https://github.com/mirpedrol)\n- [Daniel Straub](https://github.com/d4straub)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#genomeassembler` channel](https://nfcore.slack.com/channels/genomeassembler) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/genomeassembler for your analysis, please cite it using the following doi: [10.5281/zenodo.14986998](https://doi.org/10.5281/zenodo.14986998)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" @@ -105,7 +105,7 @@ }, "mentions": [ { - "@id": "#76569809-bfd0-4c21-9633-985c5289977e" + "@id": "#cb94731f-386a-4fae-91e7-9fbf9fde0a63" } ], "name": "nf-core/genomeassembler" @@ -144,7 +144,7 @@ } ], "dateCreated": "", - "dateModified": "2025-05-08T13:11:40Z", + "dateModified": "2025-06-03T11:01:26Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", "keywords": [ "nf-core", @@ -154,11 +154,6 @@ "license": [ "MIT" ], - "maintainer": [ - { - "@id": "https://orcid.org/0000-0003-3099-7860" - } - ], "name": [ "nf-core/genomeassembler" ], @@ -189,11 +184,11 @@ "version": "!>=24.04.2" }, { - "@id": "#76569809-bfd0-4c21-9633-985c5289977e", + "@id": "#cb94731f-386a-4fae-91e7-9fbf9fde0a63", "@type": "TestSuite", "instance": [ { - "@id": "#6dc2c760-4b37-499d-8854-cbbf1a482d14" + "@id": "#590a10e6-d2a9-410c-914b-653c68ca442d" } ], "mainEntity": { @@ -202,10 +197,10 @@ "name": "Test suite for nf-core/genomeassembler" }, { - "@id": "#6dc2c760-4b37-499d-8854-cbbf1a482d14", + "@id": "#590a10e6-d2a9-410c-914b-653c68ca442d", "@type": "TestInstance", "name": "GitHub Actions workflow for testing nf-core/genomeassembler", - "resource": "repos/nf-core/genomeassembler/actions/workflows/ci.yml", + "resource": "repos/nf-core/genomeassembler/actions/workflows/nf-test.yml", "runsOn": { "@id": "https://w3id.org/ro/terms/test#GithubService" }, diff --git a/tests/.nftignore b/tests/.nftignore new file mode 100644 index 00000000..e8128b21 --- /dev/null +++ b/tests/.nftignore @@ -0,0 +1,6 @@ +.DS_Store +fastqc/*_fastqc.{html,zip} +pipeline_info/*.{html,json,txt,yml} +*/*/*/*.{log,bin,gz,gff3,fasta,agp} +*/*/*.{log,bin,gz,gff3,txt} +*/*/*/*.assembly_info.txt diff --git a/tests/default.nf.test b/tests/default.nf.test new file mode 100644 index 00000000..67fb220f --- /dev/null +++ b/tests/default.nf.test @@ -0,0 +1,35 @@ +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + tag "pipeline" + + test("-profile test") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_genomeassembler_software_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap new file mode 100644 index 00000000..e4bfbb0b --- /dev/null +++ b/tests/default.nf.test.snap @@ -0,0 +1,89 @@ +{ + "-profile test": { + "content": [ + 6, + { + "FLYE": { + "flye": "2.9.5-b1801" + }, + "GFA_2_FA_HIFI": { + "awk": "1.3.4", + "gzip": 1.13 + }, + "HIFIASM": { + "hifiasm": "0.25.0-r726" + }, + "LIFTOFF": { + "liftoff": "v1.6.3" + }, + "NANOQ": { + "nanoq": "0.10.0" + }, + "RAGTAG_PATCH": { + "ragtag": "2.1.0" + }, + "Workflow": { + "nf-core/genomeassembler": "v1.1.0" + } + }, + [ + "Col-0_2MB", + "Col-0_2MB/QC", + "Col-0_2MB/QC/nanoq", + "Col-0_2MB/QC/nanoq/Col-0_2MB_report.json", + "Col-0_2MB/QC/nanoq/Col-0_2MB_stats.json", + "Col-0_2MB/assembly", + "Col-0_2MB/assembly/Col-0_2MB_assembly.gff3", + "Col-0_2MB/assembly/Col-0_2MB_assembly.unmapped.txt", + "Col-0_2MB/assembly/flye", + "Col-0_2MB/assembly/flye/Col-0_2MB.assembly.fasta.gz", + "Col-0_2MB/assembly/flye/Col-0_2MB.assembly_graph.gfa.gz", + "Col-0_2MB/assembly/flye/Col-0_2MB.assembly_graph.gv.gz", + "Col-0_2MB/assembly/flye/Col-0_2MB.assembly_info.txt", + "Col-0_2MB/assembly/flye/Col-0_2MB.flye.log", + "Col-0_2MB/assembly/flye/Col-0_2MB.params.json", + "Col-0_2MB/assembly/hifiasm", + "Col-0_2MB/assembly/hifiasm/Col-0_2MB.bp.hap1.p_ctg.gfa", + "Col-0_2MB/assembly/hifiasm/Col-0_2MB.bp.hap2.p_ctg.gfa", + "Col-0_2MB/assembly/hifiasm/Col-0_2MB.bp.p_ctg.gfa", + "Col-0_2MB/assembly/hifiasm/Col-0_2MB.bp.p_utg.gfa", + "Col-0_2MB/assembly/hifiasm/Col-0_2MB.bp.r_utg.gfa", + "Col-0_2MB/assembly/hifiasm/Col-0_2MB.ec.bin", + "Col-0_2MB/assembly/hifiasm/Col-0_2MB.ovlp.reverse.bin", + "Col-0_2MB/assembly/hifiasm/Col-0_2MB.ovlp.source.bin", + "Col-0_2MB/assembly/hifiasm/Col-0_2MB.stderr.log", + "Col-0_2MB/assembly/hifiasm/fasta", + "Col-0_2MB/assembly/hifiasm/fasta/Col-0_2MB.bp.p_utg.fa.gz", + "Col-0_2MB/assembly/ragtag", + "Col-0_2MB/assembly/ragtag/Col-0_2MB_assembly_patch.comps.fasta", + "Col-0_2MB/assembly/ragtag/Col-0_2MB_assembly_patch.ctg.agp", + "Col-0_2MB/assembly/ragtag/Col-0_2MB_assembly_patch.ctg.fasta", + "Col-0_2MB/assembly/ragtag/Col-0_2MB_assembly_patch.patch.agp", + "Col-0_2MB/assembly/ragtag/Col-0_2MB_assembly_patch.patch.err", + "Col-0_2MB/assembly/ragtag/Col-0_2MB_assembly_patch.patch.fasta", + "Col-0_2MB/assembly/ragtag/Col-0_2MB_assembly_patch.rename.agp", + "Col-0_2MB/assembly/ragtag/Col-0_2MB_assembly_patch.rename.fasta", + "pipeline_info", + "pipeline_info/nf_core_genomeassembler_software_versions.yml", + "pipeline_info/nf_core_pipeline_software_versions.yml" + ], + [ + "Col-0_2MB_report.json:md5,25d7ae5780b2f565cb46df7c9e09388a", + "Col-0_2MB_stats.json:md5,d41d8cd98f00b204e9800998ecf8427e", + "Col-0_2MB.params.json:md5,afa91c041bce5e190f4a699d11b69db6", + "Col-0_2MB.bp.hap1.p_ctg.gfa:md5,46ee70869884ad585165bd48081414e9", + "Col-0_2MB.bp.hap2.p_ctg.gfa:md5,7792865547989d6d284f640425c4e36c", + "Col-0_2MB.bp.p_ctg.gfa:md5,8fe65466d76815ffe1663ff6d8f2e8d1", + "Col-0_2MB.bp.p_utg.gfa:md5,ba2c77ebdb2ad3e6060f5574e890c6eb", + "Col-0_2MB.bp.r_utg.gfa:md5,ba2c77ebdb2ad3e6060f5574e890c6eb", + "Col-0_2MB.bp.p_utg.fa.gz:md5,812a3a16dc68bb409deb69f0aef7e6a8", + "Col-0_2MB_assembly_patch.patch.err:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-06-26T16:28:52.273158206" + } +} \ No newline at end of file diff --git a/tests/nextflow.config b/tests/nextflow.config new file mode 100644 index 00000000..0c642001 --- /dev/null +++ b/tests/nextflow.config @@ -0,0 +1,12 @@ +/* +======================================================================================== + Nextflow config file for running nf-test tests +======================================================================================== +*/ + +// TODO nf-core: Specify any additional parameters here +// Or any resources requirements +params.modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' +params.pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/genomeassembler' + +aws.client.anonymous = true // fixes S3 access issues on self-hosted runners From 7d0d953ac5733ddc564426890cd654c890119b60 Mon Sep 17 00:00:00 2001 From: Niklas Schandry Date: Tue, 1 Jul 2025 15:31:33 +0200 Subject: [PATCH 29/34] Awk regex (#167) * update awk regex * update snapshot --- modules/local/gfa2fa/main.nf | 4 ++-- tests/default.nf.test.snap | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/local/gfa2fa/main.nf b/modules/local/gfa2fa/main.nf index a53144a3..b5a67867 100644 --- a/modules/local/gfa2fa/main.nf +++ b/modules/local/gfa2fa/main.nf @@ -20,7 +20,7 @@ process GFA_2_FA { | gzip > \$outfile cat <<-END_VERSIONS > versions.yml "${task.process}": - awk: \$(echo \$(awk --version | head -n1 | sed 's/mawk //; s/ .*//')) + awk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') gzip: \$(echo \$(gzip --version | head -n1 | sed 's/gzip //')) END_VERSIONS """ @@ -31,7 +31,7 @@ process GFA_2_FA { touch \$outfile cat <<-END_VERSIONS > versions.yml "${task.process}": - awk: \$(echo \$(awk --version | head -n1 | sed 's/mawk //; s/ .*//')) + awk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') gzip: \$(echo \$(gzip --version | head -n1 | sed 's/gzip //')) END_VERSIONS """ diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index e4bfbb0b..18cb6989 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -7,7 +7,7 @@ "flye": "2.9.5-b1801" }, "GFA_2_FA_HIFI": { - "awk": "1.3.4", + "awk": "mawk 1.3.4 20240123", "gzip": 1.13 }, "HIFIASM": { From 6ae152a581a7a9bc489f17d63de275e87df49cb5 Mon Sep 17 00:00:00 2001 From: Niklas Schandry Date: Tue, 1 Jul 2025 16:07:13 +0200 Subject: [PATCH 30/34] add mawk to gfa2fa env (#168) * add prefix to singularity container for report * include gawk in gfa2fa env * include gawk in gfa2fa env --- modules/local/gfa2fa/environment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/local/gfa2fa/environment.yml b/modules/local/gfa2fa/environment.yml index ae4fa457..810a2818 100644 --- a/modules/local/gfa2fa/environment.yml +++ b/modules/local/gfa2fa/environment.yml @@ -8,3 +8,4 @@ dependencies: - conda-forge::lbzip2=2.5 - conda-forge::sed=4.8 - conda-forge::tar=1.34 + - conda-forge::mawk=1.3.4 From 76433e728a4d22445609855676767ef5fa459f65 Mon Sep 17 00:00:00 2001 From: Niklas Schandry Date: Thu, 3 Jul 2025 09:28:59 +0200 Subject: [PATCH 31/34] Gfa2fa env (#169) * add prefix to singularity container for report * include gawk in gfa2fa env * include gawk in gfa2fa env * mawk version * mawk version in stub * update CHANGELOG * Update CHANGELOG.md Co-authored-by: Simon Pearce <24893913+SPPearce@users.noreply.github.com> * [automated] Fix code linting --------- Co-authored-by: Simon Pearce <24893913+SPPearce@users.noreply.github.com> Co-authored-by: nf-core-bot --- CHANGELOG.md | 6 ++++-- modules/local/gfa2fa/environment.yml | 2 +- modules/local/gfa2fa/main.nf | 4 ++-- tests/default.nf.test.snap | 6 +++--- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6123d5da..aadac002 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,11 +17,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` -[#154](https://github.com/nf-core/genomeassembler/pull/154) - Module maintainance: +[#169](https://github.com/nf-core/genomeassembler/pull/169) - Module mainencance: gfa2fa container and conda env now report the same version of `mawk`. + +[#154](https://github.com/nf-core/genomeassembler/pull/154) - Module maintenance: - updated `hifiasm`, `minimap2`, `links` nf-core modules - updated container in local `quast` module -- separated `modules.config` into several files for easier navigation and maintainance +- separated `modules.config` into several files for easier navigation and maintenance [#138](https://github.com/nf-core/genomeassembler/pull/138) - Switched to RagTag nf-core module diff --git a/modules/local/gfa2fa/environment.yml b/modules/local/gfa2fa/environment.yml index 810a2818..2e1fcd06 100644 --- a/modules/local/gfa2fa/environment.yml +++ b/modules/local/gfa2fa/environment.yml @@ -8,4 +8,4 @@ dependencies: - conda-forge::lbzip2=2.5 - conda-forge::sed=4.8 - conda-forge::tar=1.34 - - conda-forge::mawk=1.3.4 + - bioconda::mawk=1.3.4 diff --git a/modules/local/gfa2fa/main.nf b/modules/local/gfa2fa/main.nf index b5a67867..b258f8d6 100644 --- a/modules/local/gfa2fa/main.nf +++ b/modules/local/gfa2fa/main.nf @@ -20,7 +20,7 @@ process GFA_2_FA { | gzip > \$outfile cat <<-END_VERSIONS > versions.yml "${task.process}": - awk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + awk: \$(mawk -Wversion | sed '1!d; s/.*Awk //; s/,.*//; s/ [0-9]*\$//') gzip: \$(echo \$(gzip --version | head -n1 | sed 's/gzip //')) END_VERSIONS """ @@ -31,7 +31,7 @@ process GFA_2_FA { touch \$outfile cat <<-END_VERSIONS > versions.yml "${task.process}": - awk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + awk: \$(mawk -Wversion | sed '1!d; s/.*Awk //; s/,.*//; s/ [0-9]*\$//') gzip: \$(echo \$(gzip --version | head -n1 | sed 's/gzip //')) END_VERSIONS """ diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 18cb6989..73ab05f2 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -7,7 +7,7 @@ "flye": "2.9.5-b1801" }, "GFA_2_FA_HIFI": { - "awk": "mawk 1.3.4 20240123", + "awk": "mawk 1.3.4", "gzip": 1.13 }, "HIFIASM": { @@ -82,8 +82,8 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "24.10.5" }, - "timestamp": "2025-06-26T16:28:52.273158206" + "timestamp": "2025-07-02T11:25:42.487154678" } } \ No newline at end of file From 0d30fd103f66560a30b8933a7c4726160f5711b8 Mon Sep 17 00:00:00 2001 From: nf-core bot Date: Fri, 11 Jul 2025 11:06:47 +0200 Subject: [PATCH 32/34] Important! Template update for nf-core/tools v3.3.2 (#170) * Template update for nf-core/tools version 3.2.1 * Template update for nf-core/tools version 3.3.1 * Template update for nf-core/tools version 3.3.2 * fix nf-core.yml * Update nextflow.config Co-authored-by: Maxime U Garcia * restore nf-test.config * restore ci * remove duplicated line * add nftignore * add tests/nextflow.config * exlcude svg from pre-commit * bump CI NXF_VER * bump CI NXF_VER * end of file * fix linting * pull request template * pull request template --------- Co-authored-by: Niklas Schandry Co-authored-by: Maxime U Garcia --- .github/CONTRIBUTING.md | 1 - .github/actions/nf-test/action.yml | 2 - .github/workflows/ci.yml | 2 +- .github/workflows/linting.yml | 2 +- .github/workflows/linting_comment.yml | 2 +- .github/workflows/nf-test.yml | 45 ++++++++++--------- .github/workflows/release-announcements.yml | 2 +- .nf-core.yml | 2 +- .pre-commit-config.yaml | 2 +- CHANGELOG.md | 2 + README.md | 5 ++- conf/base.config | 1 + modules.json | 2 +- modules/nf-core/fastqc/meta.yml | 23 ++++++---- nextflow.config | 5 ++- ro-crate-metadata.json | 16 +++---- .../tests/nextflow.config | 2 +- tests/nextflow.config | 2 - 18 files changed, 63 insertions(+), 55 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 1f9ed4d6..e01c8db1 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -79,7 +79,6 @@ If you wish to contribute a new step, please use the following coding standards: 6. Add sanity checks and validation for all relevant parameters. 7. Perform local tests to validate that the new code works as expected. 8. If applicable, add a new test in the `tests` directory. -9. Add a description of the output files and if relevant any appropriate images from the MultiQC report to `docs/output.md`. ### Default values diff --git a/.github/actions/nf-test/action.yml b/.github/actions/nf-test/action.yml index ddfac57b..bf44d961 100644 --- a/.github/actions/nf-test/action.yml +++ b/.github/actions/nf-test/action.yml @@ -54,8 +54,6 @@ runs: conda-solver: libmamba conda-remove-defaults: true - # TODO Skip failing conda tests and document their failures - # https://github.com/nf-core/modules/issues/7017 - name: Run nf-test shell: bash env: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5584fa4c..9a5a5254 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,7 +27,7 @@ jobs: strategy: matrix: NXF_VER: - - "24.04.2" + - "24.10.5" - "latest-everything" ASSEMBLER: - "hifi_flye" diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index f2d7d1dd..8b0f88c3 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -13,7 +13,7 @@ jobs: steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - - name: Set up Python 3.12 + - name: Set up Python 3.13 uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: "3.13" diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 7e8050fb..d43797d9 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@4c1e823582f43b179e2cbb49c3eade4e41f992e2 # v10 + uses: dawidd6/action-download-artifact@ac66b43f0e6a346234dd65d4d0c8fbb31cb316e5 # v11 with: workflow: linting.yml workflow_conclusion: completed diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml index c5d931da..e7b58449 100644 --- a/.github/workflows/nf-test.yml +++ b/.github/workflows/nf-test.yml @@ -1,12 +1,5 @@ name: Run nf-test on: - push: - paths-ignore: - - "docs/**" - - "**/meta.yml" - - "**/*.md" - - "**/*.png" - - "**/*.svg" pull_request: paths-ignore: - "docs/**" @@ -35,8 +28,8 @@ jobs: nf-test-changes: name: nf-test-changes runs-on: # use self-hosted runners - - runs-on=$-nf-test-changes - - runner4cpu-linux-x64 + - runs-on=${{ github.run_id }}-nf-test-changes + - runner=4cpu-linux-x64 outputs: shard: ${{ steps.set-shards.outputs.shard }} total_shards: ${{ steps.set-shards.outputs.total_shards }} @@ -69,7 +62,7 @@ jobs: needs: [nf-test-changes] if: ${{ needs.nf-test-changes.outputs.total_shards != '0' }} runs-on: # use self-hosted runners - - runs-on=$-nf-test + - runs-on=${{ github.run_id }}-nf-test - runner=4cpu-linux-x64 strategy: fail-fast: false @@ -85,7 +78,7 @@ jobs: - isMain: false profile: "singularity" NXF_VER: - - "24.04.2" + - "24.10.5" - "latest-everything" env: NXF_ANSI_LOG: false @@ -97,21 +90,39 @@ jobs: fetch-depth: 0 - name: Run nf-test + id: run_nf_test uses: ./.github/actions/nf-test + continue-on-error: ${{ matrix.NXF_VER == 'latest-everything' }} env: NFT_WORKDIR: ${{ env.NFT_WORKDIR }} with: profile: ${{ matrix.profile }} shard: ${{ matrix.shard }} total_shards: ${{ env.TOTAL_SHARDS }} + + - name: Report test status + if: ${{ always() }} + run: | + if [[ "${{ steps.run_nf_test.outcome }}" == "failure" ]]; then + echo "::error::Test with ${{ matrix.NXF_VER }} failed" + # Add to workflow summary + echo "## ❌ Test failed: ${{ matrix.profile }} | ${{ matrix.NXF_VER }} | Shard ${{ matrix.shard }}/${{ env.TOTAL_SHARDS }}" >> $GITHUB_STEP_SUMMARY + if [[ "${{ matrix.NXF_VER }}" == "latest-everything" ]]; then + echo "::warning::Test with latest-everything failed but will not cause workflow failure. Please check if the error is expected or if it needs fixing." + fi + if [[ "${{ matrix.NXF_VER }}" != "latest-everything" ]]; then + exit 1 + fi + fi + confirm-pass: needs: [nf-test] if: always() runs-on: # use self-hosted runners - - runs-on=$-confirm-pass + - runs-on=${{ github.run_id }}-confirm-pass - runner=2cpu-linux-x64 steps: - - name: One or more tests failed + - name: One or more tests failed (excluding latest-everything) if: ${{ contains(needs.*.result, 'failure') }} run: exit 1 @@ -130,11 +141,3 @@ jobs: echo "DEBUG: toJSON(needs) = ${{ toJSON(needs) }}" echo "DEBUG: toJSON(needs.*.result) = ${{ toJSON(needs.*.result) }}" echo "::endgroup::" - - - name: Clean Workspace # Purge the workspace in case it's running on a self-hosted runner - if: always() - run: | - ls -la ./ - rm -rf ./* || true - rm -rf ./.??* || true - ls -la ./ diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml index 4abaf484..0f732495 100644 --- a/.github/workflows/release-announcements.yml +++ b/.github/workflows/release-announcements.yml @@ -30,7 +30,7 @@ jobs: bsky-post: runs-on: ubuntu-latest steps: - - uses: zentered/bluesky-post-action@4aa83560bb3eac05dbad1e5f221ee339118abdd2 # v0.2.0 + - uses: zentered/bluesky-post-action@6461056ea355ea43b977e149f7bf76aaa572e5e8 # v0.3.0 with: post: | Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! diff --git a/.nf-core.yml b/.nf-core.yml index 14b85bf0..377951a7 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -7,7 +7,7 @@ lint: - assets/multiqc_config.yml files_unchanged: - .github/CONTRIBUTING.md -nf_core_version: 3.3.1 +nf_core_version: 3.3.2 repository_type: pipeline template: author: Niklas Schandry diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4c219f64..402f0a1c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,7 +4,7 @@ repos: hooks: - id: prettier additional_dependencies: - - prettier@3.5.0 + - prettier@3.6.2 - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: diff --git a/CHANGELOG.md b/CHANGELOG.md index aadac002..02c6831d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +[#170](https://github.com/nf-core/genomeassembler/issues/170) - Switched to nf-core template 3.3.2 + [#164](https://github.com/nf-core/genomeassembler/issues/164) - Switched to nf-core template 3.3.1 [#153](https://github.com/nf-core/genomeassembler/issues/153) - Switched to nf-core template 3.2.1 diff --git a/README.md b/README.md index b01a4e21..5719717b 100644 --- a/README.md +++ b/README.md @@ -9,13 +9,14 @@ [![GitHub Actions Linting Status](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/genomeassembler/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.14986998-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.14986998) [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) -[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.04.2-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/) -[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.1) +[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/) +[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/genomeassembler) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23genomeassembler-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/genomeassembler)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) [![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23genomeassembler-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/genomeassembler)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction diff --git a/conf/base.config b/conf/base.config index 10a24838..da8ec780 100644 --- a/conf/base.config +++ b/conf/base.config @@ -56,5 +56,6 @@ process { } withLabel: process_gpu { ext.use_gpu = { workflow.profile.contains('gpu') } + accelerator = { workflow.profile.contains('gpu') ? 1 : null } } } diff --git a/modules.json b/modules.json index 21003454..daa2af82 100644 --- a/modules.json +++ b/modules.json @@ -12,7 +12,7 @@ }, "fastqc": { "branch": "master", - "git_sha": "b1966f36ec9de31927b2603d8f499960b2a4c294", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", "installed_by": ["modules"] }, "flye": { diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml index 2b2e62b8..c8d9d025 100644 --- a/modules/nf-core/fastqc/meta.yml +++ b/modules/nf-core/fastqc/meta.yml @@ -29,9 +29,10 @@ input: description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. + ontologies: [] output: - - html: - - meta: + html: + - - meta: type: map description: | Groovy Map containing sample information @@ -40,8 +41,9 @@ output: type: file description: FastQC report pattern: "*_{fastqc.html}" - - zip: - - meta: + ontologies: [] + zip: + - - meta: type: map description: | Groovy Map containing sample information @@ -50,11 +52,14 @@ output: type: file description: FastQC report archive pattern: "*_{fastqc.zip}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@drpatelh" - "@grst" diff --git a/nextflow.config b/nextflow.config index 068f2858..17bb4d91 100644 --- a/nextflow.config +++ b/nextflow.config @@ -310,14 +310,15 @@ manifest { homePage = 'https://github.com/nf-core/genomeassembler' description = """Assemble genomes from long ONT or pacbio HiFi reads""" mainScript = 'main.nf' - nextflowVersion = '!>=24.04.2' + defaultBranch = 'master' + nextflowVersion = '!>=24.10.5' version = '1.1.0' doi = '10.5281/zenodo.14986998' } // Nextflow plugins plugins { - id 'nf-schema@2.2.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-schema@2.4.2' // Validation of pipeline parameters and creation of an input channel from a sample sheet } validation { diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 741020af..8545a1f2 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -22,8 +22,8 @@ "@id": "./", "@type": "Dataset", "creativeWorkStatus": "Stable", - "datePublished": "2025-06-03T11:01:26+00:00", - "description": "

    \n \n \n \"nf-core/genomeassembler\"\n \n

    \n\n[![GitHub Actions CI Status](https://github.com/nf-core/genomeassembler/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/genomeassembler/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.14986998-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.14986998)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.04.2-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/genomeassembler)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23genomeassembler-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/genomeassembler)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/genomeassembler** is a bioinformatics pipeline that carries out genome assembly, polishing and scaffolding from long reads (ONT or pacbio). Assembly can be done via `flye` or `hifiasm`, polishing can be carried out with `medaka` (ONT), or `pilon` (requires short-reads), and scaffolding can be done using `LINKS`, `Longstitch`, or `RagTag` (if a reference is available). Quality control includes `BUSCO`, `QUAST` and `merqury` (requires short-reads).\nCurrently, this pipeline does not implement phasing of polyploid genomes or HiC scaffolding.\n\n\n \n \"nf-core/genomeassembler\"\n\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,ontreads,hifireads,ref_fasta,ref_gff,shortread_F,shortread_R,paired\nsampleName,ontreads.fa.gz,hifireads.fa.gz,assembly.fasta.gz,reference.fasta,reference.gff,short_F1.fastq,short_F2.fastq,true\n```\n\nEach row represents one genome to be assembled. `sample` should contain the name of the sample, `ontreads` should contain a path to ONT reads (fastq.gz), `hifireads` a path to HiFi reads (fastq.gz), `ref_fasta` and `ref_gff` contain reference genome fasta and annotations. `shortread_F` and `shortread_R` contain paths to short-read data, `paired` indicates if short-reads are paired. Columns can be omitted if they contain no data, with the exception of `shortread_R`, which needs to be present if `shortread_F` is there, even if it is empty.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/genomeassembler \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/genomeassembler/usage) and the [parameter documentation](https://nf-co.re/genomeassembler/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/genomeassembler/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/genomeassembler/output).\n\n## Credits\n\nnf-core/genomeassembler was originally written by [Niklas Schandry](https://github.com/nschan), of the Faculty of Biology of the Ludwig-Maximilians University (LMU) in Munich, Germany.\n\nI thank the following people for their extensive assistance and constructive reviews during the development of this pipeline:\n\n- [Mahesh Binzer-Panchal](https://github.com/mahesh-panchal)\n- [Matthias H\u00f6rtenhuber](https://github.com/mashehu)\n- [Louis Le N\u00e9zet](https://github.com/LouisLeNezet)\n- [J\u00falia Mir Pedrol](https://github.com/mirpedrol)\n- [Daniel Straub](https://github.com/d4straub)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#genomeassembler` channel](https://nfcore.slack.com/channels/genomeassembler) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/genomeassembler for your analysis, please cite it using the following doi: [10.5281/zenodo.14986998](https://doi.org/10.5281/zenodo.14986998)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "datePublished": "2025-07-08T11:38:27+00:00", + "description": "

    \n \n \n \"nf-core/genomeassembler\"\n \n

    \n\n[![GitHub Actions CI Status](https://github.com/nf-core/genomeassembler/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/genomeassembler/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.14986998-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.14986998)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/genomeassembler)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23genomeassembler-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/genomeassembler)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23genomeassembler-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/genomeassembler)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/genomeassembler** is a bioinformatics pipeline that carries out genome assembly, polishing and scaffolding from long reads (ONT or pacbio). Assembly can be done via `flye` or `hifiasm`, polishing can be carried out with `medaka` (ONT), or `pilon` (requires short-reads), and scaffolding can be done using `LINKS`, `Longstitch`, or `RagTag` (if a reference is available). Quality control includes `BUSCO`, `QUAST` and `merqury` (requires short-reads).\nCurrently, this pipeline does not implement phasing of polyploid genomes or HiC scaffolding.\n\n\n \n \"nf-core/genomeassembler\"\n\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,ontreads,hifireads,ref_fasta,ref_gff,shortread_F,shortread_R,paired\nsampleName,ontreads.fa.gz,hifireads.fa.gz,assembly.fasta.gz,reference.fasta,reference.gff,short_F1.fastq,short_F2.fastq,true\n```\n\nEach row represents one genome to be assembled. `sample` should contain the name of the sample, `ontreads` should contain a path to ONT reads (fastq.gz), `hifireads` a path to HiFi reads (fastq.gz), `ref_fasta` and `ref_gff` contain reference genome fasta and annotations. `shortread_F` and `shortread_R` contain paths to short-read data, `paired` indicates if short-reads are paired. Columns can be omitted if they contain no data, with the exception of `shortread_R`, which needs to be present if `shortread_F` is there, even if it is empty.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/genomeassembler \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/genomeassembler/usage) and the [parameter documentation](https://nf-co.re/genomeassembler/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/genomeassembler/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/genomeassembler/output).\n\n## Credits\n\nnf-core/genomeassembler was originally written by [Niklas Schandry](https://github.com/nschan), of the Faculty of Biology of the Ludwig-Maximilians University (LMU) in Munich, Germany.\n\nI thank the following people for their extensive assistance and constructive reviews during the development of this pipeline:\n\n- [Mahesh Binzer-Panchal](https://github.com/mahesh-panchal)\n- [Matthias H\u00f6rtenhuber](https://github.com/mashehu)\n- [Louis Le N\u00e9zet](https://github.com/LouisLeNezet)\n- [J\u00falia Mir Pedrol](https://github.com/mirpedrol)\n- [Daniel Straub](https://github.com/d4straub)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#genomeassembler` channel](https://nfcore.slack.com/channels/genomeassembler) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/genomeassembler for your analysis, please cite it using the following doi: [10.5281/zenodo.14986998](https://doi.org/10.5281/zenodo.14986998)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" @@ -105,7 +105,7 @@ }, "mentions": [ { - "@id": "#cb94731f-386a-4fae-91e7-9fbf9fde0a63" + "@id": "#8601bd11-94fa-4298-805d-fb2c3f6a7eab" } ], "name": "nf-core/genomeassembler" @@ -144,7 +144,7 @@ } ], "dateCreated": "", - "dateModified": "2025-06-03T11:01:26Z", + "dateModified": "2025-07-08T11:38:27Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", "keywords": [ "nf-core", @@ -181,14 +181,14 @@ "url": { "@id": "https://www.nextflow.io/" }, - "version": "!>=24.04.2" + "version": "!>=24.10.5" }, { - "@id": "#cb94731f-386a-4fae-91e7-9fbf9fde0a63", + "@id": "#8601bd11-94fa-4298-805d-fb2c3f6a7eab", "@type": "TestSuite", "instance": [ { - "@id": "#590a10e6-d2a9-410c-914b-653c68ca442d" + "@id": "#a9fa96cd-308f-4e32-b912-de8b760cb8e2" } ], "mainEntity": { @@ -197,7 +197,7 @@ "name": "Test suite for nf-core/genomeassembler" }, { - "@id": "#590a10e6-d2a9-410c-914b-653c68ca442d", + "@id": "#a9fa96cd-308f-4e32-b912-de8b760cb8e2", "@type": "TestInstance", "name": "GitHub Actions workflow for testing nf-core/genomeassembler", "resource": "repos/nf-core/genomeassembler/actions/workflows/nf-test.yml", diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config index 0907ac58..09ef842a 100644 --- a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config @@ -1,5 +1,5 @@ plugins { - id "nf-schema@2.1.0" + id "nf-schema@2.4.2" } validation { diff --git a/tests/nextflow.config b/tests/nextflow.config index 0c642001..f5eb58fd 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -4,8 +4,6 @@ ======================================================================================== */ -// TODO nf-core: Specify any additional parameters here -// Or any resources requirements params.modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' params.pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/genomeassembler' From 389439583e8c003f1fc401c645069018c8988df0 Mon Sep 17 00:00:00 2001 From: Niklas Schandry Date: Fri, 11 Jul 2025 12:02:53 +0200 Subject: [PATCH 33/34] Update dev with review comments (#172) * add prefix to singularity container for report * maxime comments --- CHANGELOG.md | 2 +- conf/old.modules.config | 597 ---------------------------------------- conf/test.config | 8 +- nextflow.config | 3 +- 4 files changed, 3 insertions(+), 607 deletions(-) delete mode 100644 conf/old.modules.config diff --git a/CHANGELOG.md b/CHANGELOG.md index 02c6831d..b7af38b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.1.0 'Brass Pigeon' - [2025-05-08] +## v1.1.0 'Brass Pigeon' - [2025-07-11] ### `Added` diff --git a/conf/old.modules.config b/conf/old.modules.config deleted file mode 100644 index 6c0c27b2..00000000 --- a/conf/old.modules.config +++ /dev/null @@ -1,597 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Config file for defining DSL2 per module options and publishing paths -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Available keys to override module options: - ext.args = Additional arguments appended to command in module. - ext.args2 = Second set of arguments appended to command in module (multi-tool modules). - ext.args3 = Third set of arguments appended to command in module (multi-tool modules). - ext.prefix = File name prefix for output files. ----------------------------------------------------------------------------------------- -*/ - -process { - // General catch-all - publishDir = [ - path: { "${params.outdir}/${meta.id}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - /* - ---------- - Reads in - ONT - ---------- - */ - // nanoq: local module; QC - withName: NANOQ { - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/nanoq" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // only local module - withName: COLLECT { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/collect" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // porechop: nf-core module - withName: PORECHOP { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/porechop" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // Genomescope / jellyfish: local modules - withName: COUNT { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/genomescope/jellyfish/count/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: DUMP { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/genomescope/jellyfish/dump/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: STATS { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/genomescope/jellyfish/stats/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: HISTO { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/genomescope/jellyfish/histo/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: GENOMESCOPE { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/genomescope/genomescope/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - /* - ---------- - Reads in - HiFi - all nf-core - ---------- - */ - withName: LIMA { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/lima/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: TO_FASTQ { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/lima/fastq/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - /* - ---------- - Reads in - Short reads - all nf-core - ---------- - */ - withName: TRIMGALORE { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/trimgalore" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: MERYL_COUNT { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/meryl/count/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: MERYL_UNIONSUM { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/meryl/unionsum/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - /* - ---------- - ASSEMBLY - ---------- - */ - // FLYE: nf-core - withName: FLYE { - ext.args = { - [ - meta.genome_size ? "--genome-size ${meta.genome_size}" : '', - params.flye_args - ].join(" ").trim() - } - publishDir = [ - path: { "${params.outdir}/${meta.id}/assembly/flye/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // HIFIASM: - /* updated nf-core module */ - - withName: HIFIASM { - ext.args = { [ params.hifiasm_args ].join(" ").trim() } - publishDir = [ - path: { "${params.outdir}/${meta.id}/assembly/hifiasm/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: HIFIASM_ONT { - ext.args = { [ params.hifiasm_args, "--ont" ].join(" ").trim() } - publishDir = [ - path: { "${params.outdir}/${meta.id}/assembly/hifiasm_ont/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: GFA_2_FA { - publishDir = [ - path: { "${params.outdir}/${meta.id}/assembly/hifiasm/fasta" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: GFA_2_FA_HIFI { - publishDir = [ - path: { "${params.outdir}/${meta.id}/assembly/hifiasm/fasta" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: GFA_2_FA_ONT { - publishDir = [ - path: { "${params.outdir}/${meta.id}/assembly/hifiasm_ont/fasta" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*ASSEMBLE:.*RAGTAG_PATCH' { - publishDir = [ - path: { "${params.outdir}/${meta.id}/assembly/ragtag/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_assembly_patch" } - } - - /* - ---------- - Polishing - ---------- - */ - // Medaka: local; nf-core module cant deal with gzipped input - withName: MEDAKA { - ext.args1 = { } // args mini_align - ext.args2 = { [params.medaka_model ? "--model ${params.medaka_model}" : ''].join(" ").trim() } // args medaka_inference - ext.args3 = { } // args medaka sequence - ext.prefix = { "${meta.id}_medaka" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/polish/medaka" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // Pilon: nf-core - withName: PILON { - ext.prefix = { "${meta.id}_pilon" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/polish/pilon" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - /* - ---------- - Scaffolding - ---------- - */ - // RagTag - withName: '.*SCAFFOLD:.*RAGTAG_SCAFFOLD' { - publishDir = [ - path: { "${params.outdir}/${meta.id}/scaffold/ragtag/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_ragtag" } - ext.args = [ - "-C", - "-u", - "-r", - "-w" - ].join(" ").trim() - } - - withName: LINKS { - publishDir = [ - path: { "${params.outdir}/${meta.id}/scaffold/links/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_links" } - ext.args = ["-t 40,200", "-d 500,2000,5000"].join(" ").trim() - } - - // No nf-core module yet. - withName: LONGSTITCH { - publishDir = [ - path: { "${params.outdir}/${meta.id}/scaffold/longstitch/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_longstitch" } - } - /* - -------- - Annotations - liftoff nf-core module - -------- - gff file goes with fasta file - */ - - withName: '.*ASSEMBLE:.*LIFTOFF' { - publishDir = [ - path: { "${params.outdir}/${meta.id}/assembly/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_assembly" } - } - - withName: '.*PILON:.*LIFTOFF' { - publishDir = [ - path: { "${params.outdir}/${meta.id}/polish/pilon/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_pilon" } - } - - withName: '.*MEDAKA:.*LIFTOFF' { - publishDir = [ - path: { "${params.outdir}/${meta.id}/polish/medaka" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_medaka" } - } - - withName: '.*RAGTAG:.*LIFTOFF' { - publishDir = [ - path: { "${params.outdir}/${meta.id}/scaffold/ragtag/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_ragtag" } - } - - withName: '.*LONGSTITCH:.*LIFTOFF' { - publishDir = [ - path: { "${params.outdir}/${meta.id}/scaffold/longstitch" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_longstitch" } - } - - withName: '.*LINKS:.*LIFTOFF' { - publishDir = [ - path: { "${params.outdir}/${meta.id}/scaffold/links" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_links" } - } - - /* - ---------- - QC - ---------- - */ - - // BUSCO: nf-core - withName: '.*:ASSEMBLE:.*:BUSCO' { - ext.prefix = { "${meta.id}_assembly-${lineage}" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, - mode: params.publish_dir_mode, - pattern: "*{-busco,_summary}*", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*PILON:.*:BUSCO' { - ext.prefix = { "${meta.id}_pilon-${lineage}" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, - mode: params.publish_dir_mode, - pattern: "*{-busco,_summary}*", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*MEDAKA:.*:BUSCO' { - ext.prefix = { "${meta.id}_medaka-${lineage}" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, - mode: params.publish_dir_mode, - pattern: "*{-busco,_summary}*", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*LINKS:.*:BUSCO' { - ext.prefix = { "${meta.id}_links-${lineage}" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, - mode: params.publish_dir_mode, - pattern: "*{-busco,_summary}*", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*LONGSTITCH:.*:BUSCO' { - ext.prefix = { "${meta.id}_longstitch-${lineage}" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, - mode: params.publish_dir_mode, - pattern: "*{-busco,_summary}*", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // avoid catching ragtag from ont_on_hifi assembly - withName: '.*:SCAFFOLD:.*RAGTAG:.*:BUSCO' { - ext.prefix = { "${meta.id}_ragtag-${lineage}" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, - mode: params.publish_dir_mode, - pattern: "*{-busco,_summary}*", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - // QUAST: Prefer to keep the local module since it can deal with the inputs I have - withName: '.*:ASSEMBLE:.*:QUAST' { - ext.prefix = { "${meta.id}_assembly" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/QUAST" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*PILON:.*:QUAST' { - ext.prefix = { "${meta.id}_pilon" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/QUAST" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*MEDAKA:.*:QUAST' { - ext.prefix = { "${meta.id}_medaka" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/QUAST" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*LINKS:.*:QUAST' { - ext.prefix = { "${meta.id}_links" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/QUAST/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*LONGSTITCH:.*:QUAST' { - ext.prefix = { "${meta.id}_longstitch" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/QUAST/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // avoid catching ragtag from ont_on_hifi assembly - withName: '.*:SCAFFOLD:.*RAGTAG:.*:QUAST' { - ext.prefix = { "${meta.id}_ragtag" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/QUAST/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - // MERQURY: nf-core - withName: '.*:ASSEMBLE:.*:MERQURY' { - ext.prefix = { "${meta.id}_assembly" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/merqury/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*PILON:.*:MERQURY' { - ext.prefix = { "${meta.id}_pilon" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/merqury/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*MEDAKA:.*:MERQURY' { - ext.prefix = { "${meta.id}_medaka" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/merqury/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*LINKS:.*:MERQURY' { - ext.prefix = { "${meta.id}_links" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/merqury/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*LONGSTITCH:.*:MERQURY' { - ext.prefix = { "${meta.id}_longstitch" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/merqury/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // avoid catching ragtag from ont_on_hifi assembly - withName: '.*:SCAFFOLD:.*RAGTAG:.*:MERQURY' { - ext.prefix = { "${meta.id}_ragtag" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/merqury/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // Refence - withName: '.*MAP_TO_REF.*' { - ext.prefix = { "${meta.id}_to_reference" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/alignments/reference/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = { - (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax map-ont" : "-ax map-hifi") : (params.ont) ? "-ax map-ont" : "-ax map-hifi" - } - } - // Assembly mappings - withName: '.*ASSEMBLE:.*MAP_TO_ASSEMBLY.*' { - ext.prefix = { "${meta.id}_assembly" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/alignments/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = { - (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax map-ont" : "-ax map-hifi") : (params.ont) ? "-ax map-ont" : "-ax map-hifi" - } - } - withName: '.*MEDAKA:.*MAP_TO_ASSEMBLY.*' { - ext.prefix = { "${meta.id}_medaka" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/alignments/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = { - (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax map-ont" : "-ax map-hifi") : (params.ont) ? "-ax map-ont" : "-ax map-hifi" - } - } - withName: '.*PILON:.*MAP_TO_ASSEMBLY.*' { - ext.prefix = { "${meta.id}_pilon" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/alignments/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = { - (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax map-ont" : "-ax map-hifi") : (params.ont) ? "-ax map-ont" : "-ax map-hifi" - } - } - withName: '.*LONGSTITCH:.*MAP_TO_ASSEMBLY.*' { - ext.prefix = { "${meta.id}_longstitch" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/alignments/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = { - (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax map-ont" : "-ax map-hifi") : (params.ont) ? "-ax map-ont" : "-ax map-hifi" - } - } - withName: '.*LINKS:.*MAP_TO_ASSEMBLY.*' { - ext.prefix = { "${meta.id}_links" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/alignments/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = { - (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax map-ont" : "-ax map-hifi") : (params.ont) ? "-ax map-ont" : "-ax map-hifi" - } - } - withName: '.*RAGTAG:.*MAP_TO_ASSEMBLY.*' { - ext.prefix = { "${meta.id}_ragtag" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/alignments/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = { - (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax map-ont" : "-ax map-hifi") : (params.ont) ? "-ax map-ont" : "-ax map-hifi" - } - } - // Pilon mapping - withName: '.*PILON:MAP_SR.*' { - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/alignments/shortreads/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_shortreads" } - ext.args = { "-ax sr " } - } - /* - -------- - Report - */ - withName: REPORT { - publishDir = [ - path: { "${params.outdir}/report/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } -} diff --git a/conf/test.config b/conf/test.config index b8875c0f..be3dc0e5 100644 --- a/conf/test.config +++ b/conf/test.config @@ -17,13 +17,7 @@ process { time: '1.h' ] } -/* -process { - withName: 'HIFIASM.*' { - memory = '15.GB' - } -} -*/ + params { config_profile_name = 'Test profile' config_profile_description = 'Minimal test dataset to check pipeline function' diff --git a/nextflow.config b/nextflow.config index 17bb4d91..45e4f254 100644 --- a/nextflow.config +++ b/nextflow.config @@ -239,8 +239,7 @@ includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !pa // Load nf-core/genomeassembler custom profiles from different institutions. -includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/genomeassembler.config" : "/dev/null" - +includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/pipeline/genomeassembler.config" : "/dev/null" // Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile // Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled // Set to your registry if you have a mirror of containers From 77843addd02da8f535863a90827f677e954323b5 Mon Sep 17 00:00:00 2001 From: Niklas Schandry Date: Mon, 21 Jul 2025 10:43:05 +0200 Subject: [PATCH 34/34] Comments lundin (#174) * add prefix to singularity container for report * maxime comments * comments Daniel Lundin * update release date * Revert "comments Daniel Lundin" This reverts commit 376973f040ef2836a17796ae2d62e19bd74cb76e. * Daniel Lundin comments --- CHANGELOG.md | 2 +- modules/local/collect_reads/main.nf | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b7af38b8..5de49257 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.1.0 'Brass Pigeon' - [2025-07-11] +## v1.1.0 'Brass Pigeon' - [2025-07-21] ### `Added` diff --git a/modules/local/collect_reads/main.nf b/modules/local/collect_reads/main.nf index ee38cb3b..4dbdc87a 100644 --- a/modules/local/collect_reads/main.nf +++ b/modules/local/collect_reads/main.nf @@ -18,7 +18,7 @@ process COLLECT_READS { def prefix = task.ext.prefix ?: "${meta.id}" """ - zcat ${reads} | gzip > ${prefix}_all_reads.fq.gz + cat ${reads} > ${prefix}_all_reads.fq.gz cat <<-END_VERSIONS > versions.yml "${task.process}": gzip: \$(echo \$(gzip --version | head -n1 | sed 's/gzip //')) @@ -28,7 +28,7 @@ process COLLECT_READS { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}_all_reads.fq.gz + touch ${prefix}_all_reads.fq; gzip ${prefix}_all_reads.fq cat <<-END_VERSIONS > versions.yml "${task.process}": gzip: \$(echo \$(gzip --version | head -n1 | sed 's/gzip //'))