diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index e01c8db1..1f9ed4d6 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -79,6 +79,7 @@ If you wish to contribute a new step, please use the following coding standards: 6. Add sanity checks and validation for all relevant parameters. 7. Perform local tests to validate that the new code works as expected. 8. If applicable, add a new test in the `tests` directory. +9. Add a description of the output files and if relevant any appropriate images from the MultiQC report to `docs/output.md`. ### Default values diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index 9ce539c1..00000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,95 +0,0 @@ -name: nf-core CI -# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors -on: - push: - branches: - - dev - pull_request: - release: - types: [published] - workflow_dispatch: - -env: - NXF_ANSI_LOG: false - NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity - NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity - -concurrency: - group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" - cancel-in-progress: true - -jobs: - test: - name: "Run pipeline with test data (${{ matrix.NXF_VER }} | ${{ matrix.ASSEMBLER }} | ${{ matrix.profile }})" - # Only run on push if this is the nf-core dev branch (merged PRs) - if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/genomeassembler') }}" - runs-on: ubuntu-latest - strategy: - matrix: - NXF_VER: - - "25.04.0" - - "latest-everything" - ASSEMBLER: - - "hifi_flye" - - "hifi_hifiasm" - - "ont_flye" - - "ont_hifiasm" - - "hifiont_hifiasm" - - "hifiont_flye_on_hifiasm" - - "hifiont_hifiasm_on_hifiasm" - profile: - - "conda" - - "docker" - - "singularity" - isMaster: - - ${{ github.base_ref == 'master' }} - # Exclude conda and singularity on dev - exclude: - - isMaster: false - profile: "conda" - - isMaster: false - profile: "singularity" - - steps: - - name: Check out pipeline code - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - with: - fetch-depth: 0 - - - name: Set up Nextflow - uses: nf-core/setup-nextflow@v2 - with: - version: "${{ matrix.NXF_VER }}" - - - name: Set up Apptainer - if: matrix.profile == 'singularity' - uses: eWaterCycle/setup-apptainer@main - - - name: Set up Singularity - if: matrix.profile == 'singularity' - run: | - mkdir -p $NXF_SINGULARITY_CACHEDIR - mkdir -p $NXF_SINGULARITY_LIBRARYDIR - - - name: Set up Miniconda - if: matrix.profile == 'conda' - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3 - with: - miniconda-version: "latest" - auto-update-conda: true - conda-solver: libmamba - channels: conda-forge,bioconda - - - name: Set up Conda - if: matrix.profile == 'conda' - run: | - echo $(realpath $CONDA)/condabin >> $GITHUB_PATH - echo $(realpath python) >> $GITHUB_PATH - - - name: "Run pipeline with test data ${{ matrix.NXF_VER }} | ${{ matrix.ASSEMBLER }} | ${{ matrix.profile }}" - continue-on-error: ${{ matrix.NXF_VER == 'latest-everything' }} - run: | - nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.ASSEMBLER }},test,${{matrix.profile}} --outdir ./results_${{matrix.profile}}_${{ matrix.ASSEMBLER }} - - - name: Clean up Disk space - uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml index c98d76ec..fd742d1b 100644 --- a/.github/workflows/nf-test.yml +++ b/.github/workflows/nf-test.yml @@ -78,7 +78,7 @@ jobs: - isMain: false profile: "singularity" NXF_VER: - - "25.04.0" + - "25.10.0" - "latest-everything" env: NXF_ANSI_LOG: false diff --git a/.gitignore b/.gitignore index f232546a..ecdcfbc3 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ testing* null/ .nf-test/ .nf-test.log +schema.md diff --git a/.nf-core.yml b/.nf-core.yml index 1e279baf..587f3004 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -7,6 +7,9 @@ lint: files_unchanged: - assets/sendmail_template.txt - .github/CONTRIBUTING.md + - assets/nf-core-genomeassembler_logo_light.png + - docs/images/nf-core-genomeassembler_logo_light.png + - docs/images/nf-core-genomeassembler_logo_dark.png nf_core_version: 3.5.1 repository_type: pipeline template: @@ -20,4 +23,4 @@ template: skip_features: - multiqc - igenomes - version: 1.1.0 + version: 2.0.0dev diff --git a/.prettierignore b/.prettierignore index 0e2ed66e..dd749d43 100644 --- a/.prettierignore +++ b/.prettierignore @@ -14,5 +14,3 @@ bin/ ro-crate-metadata.json modules/nf-core/ subworkflows/nf-core/ -*.svg -tests/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 5de49257..440f8a8a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,53 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v2.0.0 'Saffron Vulture' - [2026-xx-xx] + +This is a major release, with breaking changes. +v2.0.0 of genomeassembler is a large refactor of the pipeline to facilitate sample-level parameteristation. This allows to either parameterise the _pipeline_ using `params`, or parameterise _samples_ via the `input` samplesheet. In case both types of parameterisations are used, sample parameters will take priority. + +Since this workflow follows a sample-centric implementation, nextflow will always render the full pipeline dag, but depending on configuration samples may not travel through the whole pipeline. This may also cause terminal output to show task instances that will never become an active process. + +In addition, v2.0.0 contains these changes: + +### `Added` + +- fastplong for long-read trimming and qc +- fastp for short-read trimming and qc +- migration to nf-test +- increased flexibility of the scaffolding strategy +- added option to group samples +- `dorado polish` added as an alternative to `medaka` for ONT polishing. This is an **experimental feature**, due to `dorado` being under active development. +- HiC scaffolding subworkflow: + - mapping with `bwamem2` or `minimap2` + - duplicate removal with `picard` + - scaffolding with `yahs` +- Switched to the versions topic, requires nextflow >=25.10.0 + +### `Fixed` + +### `Dependencies` + +- `fastplong` +- `fastp` +- `dorado` +- `bwamem2` +- `picard` +- `yahs` + +### `Deprecated` + +The following tools are no longer used: + +- `nanoq` +- `porechop` +- `lima` +- `trimgalor` + +The following param is no longer implemented: + +- `dump`, used to dump jellyfish output. + ## v1.1.0 'Brass Pigeon' - [2025-07-21] ### `Added` @@ -72,3 +119,5 @@ Initial release of nf-core/genomeassembler, created with the [nf-core](https://n ### `Dependencies` ### `Deprecated` + +Codenames for v1.x are various types of metallic pigeons, v2.x are vultures of different colors. diff --git a/CITATIONS.md b/CITATIONS.md index b8cc1329..6f442e2e 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -12,19 +12,11 @@ ### Preprocessing -- [lima](https://github.com/pacificbiosciences/barcoding/) +- [fastp](https://github.com/OpenGene/fastp/) and [fastplong](https://github.com/OpenGene/fastplong/) -- [nanoq](https://github.com/esteinig/nanoq) + > Shifu Chen. 2023. Ultrafast one-pass FASTQ data preprocessing, quality control, and deduplication using fastp. iMeta 2: e107. https://doi.org/10.1002/imt2.107 - > Steinig and Coin (2022). Nanoq: ultra-fast quality control for nanopore reads. Journal of Open Source Software, 7(69), 2991, https://doi.org/10.21105/joss.02991 - -- [porechop](https://github.com/rrwick/Porechop) - - > Wick RR, Judd LM, Gorrie CL, Holt KE. Completing bacterial genome assemblies with multiplex MinION sequencing. Microb Genom. 2017;3(10):e000132. Published 2017 Sep 14. doi:10.1099/mgen.0.000132 - -- [TrimGalore](https://github.com/FelixKrueger/TrimGalore) - - > Felix Krueger, Frankie James, Phil Ewels, Ebrahim Afyounian, Michael Weinstein, Benjamin Schuster-Boeckler, Gert Hulselmans, & sclamons. (2023). FelixKrueger/TrimGalore. Zenodo. https://doi.org/10.5281/zenodo.7598955 + > Shifu Chen, Yanqing Zhou, Yaru Chen, Jia Gu; fastp: an ultra-fast all-in-one FASTQ preprocessor, Bioinformatics, Volume 34, Issue 17, 1 September 2018, Pages i884–i890, https://doi.org/10.1093/bioinformatics/bty560 ### Assembly @@ -52,6 +44,8 @@ - [medaka](https://github.com/nanoporetech/medaka) +- [dorado](https://github.com/nanoporetech/dorado) + ### Scaffolding - [LINKS](https://github.com/bcgsc/LINKS) @@ -62,6 +56,10 @@ > Coombe L, Li JX, Lo T, Wong J, Nikolic V, Warren RL and Birol I. LongStitch: high-quality genome assembly correction and scaffolding using long reads. BMC Bioinformatics 22, 534 (2021). https://doi.org/10.1186/s12859-021-04451-7 +- [yahs](https://github.com/c-zhou/yahs) + + > Chenxi Zhou, Shane A. McCarthy, Richard Durbin. YaHS: yet another Hi-C scaffolding tool. Bioinformatics, 39(1), btac808. + - [RagTag](https://github.com/malonge/RagTag) > Alonge, Michael, et al. "Automated assembly scaffolding elevates a new tomato system for high-throughput genome editing." Genome Biology (2022). https://doi.org/10.1186/s13059-022-02823-7 @@ -108,6 +106,16 @@ > Petr Danecek, James K Bonfield, Jennifer Liddle, John Marshall, Valeriu Ohan, Martin O Pollard, Andrew Whitwham, Thomas Keane, Shane A McCarthy, Robert M Davies, Heng Li (2021) Twelve years of SAMtools and BCFtools. GigaScience, Volume 10, Issue 2, February 2021, giab008, https://doi.org/10.1093/gigascience/giab008 +- [bwa-mem2](https://github.com/bwa-mem2/bwa-mem2) + + > Vasimuddin Md, Sanchit Misra, Heng Li, Srinivas Aluru. Efficient Architecture-Aware Acceleration of BWA-MEM for Multicore Systems. IEEE Parallel and Distributed Processing Symposium (IPDPS), 2019. + + > Li H. (2013) Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM. arXiv:1303.3997v2 [q-bio.GN]. (if you use the BWA-MEM algorithm or the fastmap command, or want to cite the whole BWA package) + +- [picard](https://broadinstitute.github.io/picard/) + + > “Picard Toolkit.” 2019. Broad Institute, GitHub Repository. https://broadinstitute.github.io/picard/; Broad Institute + ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) diff --git a/README.md b/README.md index 1295272c..4ff35167 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ [![GitHub Actions Linting Status](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/genomeassembler/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.14986998) [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) -[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/) [![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) @@ -21,12 +21,12 @@ ## Introduction -**nf-core/genomeassembler** is a bioinformatics pipeline that carries out genome assembly, polishing and scaffolding from long reads (ONT or pacbio). Assembly can be done via `flye` or `hifiasm`, polishing can be carried out with `medaka` (ONT), or `pilon` (requires short-reads), and scaffolding can be done using `LINKS`, `Longstitch`, or `RagTag` (if a reference is available). Quality control includes `BUSCO`, `QUAST` and `merqury` (requires short-reads). -Currently, this pipeline does not implement phasing of polyploid genomes or HiC scaffolding. +**nf-core/genomeassembler** is a bioinformatics pipeline that carries out genome assembly, polishing and scaffolding from long reads (ONT or pacbio). Assembly can be done via `flye` or `hifiasm`, or combinations of both, polishing can be carried out with `medaka` (ONT), `dorado` (ONT only, experimental) or `pilon` (requires short-reads), and scaffolding can be done using `LINKS`, `Longstitch`, both using long-reads, `yahs` if HiC reads are availble, or `RagTag` if a reference is available. Quality control includes `BUSCO`, `QUAST` and `merqury` (requires short-reads). +Currently, this pipeline does not implement phasing of polyploid genomes. - nf-core/genomeassembler + nf-core/genomeassembler ## Usage @@ -34,31 +34,18 @@ Currently, this pipeline does not implement phasing of polyploid genomes or HiC > [!NOTE] > If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. -First, prepare a samplesheet with your input data that looks as follows: +nf-core/genomeassembler can be set up via pipeline parameters, or via a samplesheet, or a combination of both. For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/genomeassembler/usage) and the [parameter documentation](https://nf-co.re/genomeassembler/parameters). -`samplesheet.csv`: - -```csv -sample,ontreads,hifireads,ref_fasta,ref_gff,shortread_F,shortread_R,paired -sampleName,ontreads.fa.gz,hifireads.fa.gz,assembly.fasta.gz,reference.fasta,reference.gff,short_F1.fastq,short_F2.fastq,true -``` - -Each row represents one genome to be assembled. `sample` should contain the name of the sample, `ontreads` should contain a path to ONT reads (fastq.gz), `hifireads` a path to HiFi reads (fastq.gz), `ref_fasta` and `ref_gff` contain reference genome fasta and annotations. `shortread_F` and `shortread_R` contain paths to short-read data, `paired` indicates if short-reads are paired. Columns can be omitted if they contain no data, with the exception of `shortread_R`, which needs to be present if `shortread_F` is there, even if it is empty. - -Now, you can run the pipeline using: +The pipeline can be run with a test-profile via: ```bash nextflow run nf-core/genomeassembler \ - -profile \ - --input samplesheet.csv \ - --outdir + -profile test, \ ``` > [!WARNING] > Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files). -For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/genomeassembler/usage) and the [parameter documentation](https://nf-co.re/genomeassembler/parameters). - ## Pipeline output To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/genomeassembler/results) tab on the nf-core website pipeline page. @@ -67,12 +54,14 @@ For more details about the output files and reports, please refer to the ## Credits -nf-core/genomeassembler was originally written by [Niklas Schandry](https://github.com/nschan), of the Faculty of Biology of the Ludwig-Maximilians University (LMU) in Munich, Germany. +nf-core/genomeassembler was written, and is currently maintained by [Niklas Schandry](https://github.com/nschan), of the Faculty of Biology of the Ludwig-Maximilians University (LMU) in Munich, Germany, with funding support from the German Research Foundation (Deutsche Forschungsgemeinschaft [DFG], via Transregional Research Center TRR356 grant 491090170-A05 to Niklas Schandry). -I thank the following people for their extensive assistance and constructive reviews during the development of this pipeline: +I thank the following people for constructive reviews and discussion during the development of this pipeline: +- [Jim Downie](https://github.com/prototaxites) - [Mahesh Binzer-Panchal](https://github.com/mahesh-panchal) - [Matthias Hörtenhuber](https://github.com/mashehu) +- [Evangelos Karatzas](https://github.com/vagkaratzas) - [Louis Le Nézet](https://github.com/LouisLeNezet) - [Júlia Mir Pedrol](https://github.com/mirpedrol) - [Daniel Straub](https://github.com/d4straub) diff --git a/assets/nf-core-genomeassembler_logo_light.png b/assets/nf-core-genomeassembler_logo_light.png index 9a4851b0..e17ba526 100644 Binary files a/assets/nf-core-genomeassembler_logo_light.png and b/assets/nf-core-genomeassembler_logo_light.png differ diff --git a/assets/report/_brand.yml b/assets/report/_brand.yml new file mode 100644 index 00000000..40b25dac --- /dev/null +++ b/assets/report/_brand.yml @@ -0,0 +1,38 @@ +color: + palette: + green: "#24B064" + brown: "#3F2B29" + yellow: "#ECDC86" + white: "#f9f9f9" + gray-dark: "#343a40" + gray-100: "#f8f9fa" + gray-200: "#e9ecef" + gray-300: "#dee2e6" + gray-400: "#ced4da" + gray-500: "#adb5bd" + gray-600: "#6c757d" + gray-700: "#495057" + gray-800: "#343a40" + gray-900: "#212529" + background: white + foreground: gray-dark + primary: green + secondary: yellow + tertiary: brown + light: "#f8f9fa" + dark: "#212529" + +logo: + images: + banner: "nf-core-logo.png" + apple: "nf-core-logo-square.png" + small: apple + medium: banner + large: banner + +typography: + fonts: + - family: Maven Pro + source: google + base: Maven Pro + headings: Maven Pro diff --git a/assets/report/functions/plot_merqury.R b/assets/report/functions/plot_merqury.R index 55bc78b9..d212db90 100644 --- a/assets/report/functions/plot_merqury.R +++ b/assets/report/functions/plot_merqury.R @@ -1,93 +1,94 @@ -plot_merqury_stats <- function(data, samplename) { - data %>% - filter(sample == paste(samplename)) %>% - ggplot(aes(x = stage, y = assembly * 100 / total)) + - geom_line(aes(group = sample)) + - geom_point(size = 7, color = "black", fill = "white", pch = 21) + - labs( - y = "k-mer completeness [%]", - x = "Stage", - color = "k-mers copy number", - fill = "k-mers copy number", - title = glue::glue("k-mer completeness {samplename} assemblies") - ) +plot_merqury_stats <- function(data, groupname) { + data %>% + filter(group == paste(groupname)) %>% + ggplot(aes(x = stage, y = assembly*100/total, color = sample, fill = sample)) + + geom_line(aes(group = sample)) + + geom_point(size = 7, color = "black", pch=21) + + labs( + y = "k-mer completeness [%]", + x = "Stage", + color = "k-mers copy number", + fill = "k-mers copy number", + title = glue::glue("k-mer completeness {groupname} assemblies") + ) + + fill_scale_plots + + color_scale_plots } -plot_merqury_multiplicity <- function(data, samplename) { - y_max <- data %>% - filter(sample == paste(samplename), Assembly != "read-only") %$% - max(Count) - x_max <- data %>% - filter(sample == paste(samplename), Assembly != "read-only") %$% - quantile(Count, .95) - data %>% - filter(sample == paste(samplename)) %>% - mutate(Assembly = case_when(Assembly == "read-only" ~ "Reads", TRUE ~ "Assembly")) %>% - ggplot(aes(x = kmer_multiplicity, y = Count)) + - geom_line(aes(color = Assembly)) + - # geom_area(aes(fill = Assembly), alpha = 0.15,stat = "identity") + - facet_grid(~stage) + - coord_cartesian( - xlim = c(0, x_max * 1.1), - ylim = c(0, y_max * 1.05), - expand = TRUE, - default = FALSE, - clip = "on" - ) + - labs( - x = "kmer multiplicity", - y = "Count", - color = "k-mers from", - fill = "k-mers from", - title = glue::glue("k-mer multiplicity across {samplename} assemblies") - ) + - theme(legend.position = "bottom") + - color_scale_plots + - fill_scale_plots +plot_merqury_multiplicity <- function(data, groupname) { + y_max <- data %>% + filter(group == paste(groupname), Assembly != "read-only") %$% + max(Count) + x_max <- data %>% + filter(group == paste(groupname), Assembly != "read-only") %$% + quantile(Count, .9) + data %>% + filter(group == paste(groupname)) %>% + mutate(Assembly = case_when(Assembly == "read-only" ~ "Reads", TRUE ~ "Assembly")) %>% + ggplot(aes(x = kmer_multiplicity, y = Count)) + + geom_line(aes(color = Assembly)) + + facet_grid(sample ~ stage) + + coord_cartesian( + xlim = c(0, x_max * 1.05), + ylim = c(0, y_max * 1.05), + expand = TRUE, + default = FALSE, + clip = "on" + ) + + labs( + x = "kmer multiplicity", + y = "Count", + color = "k-mers from", + fill = "k-mers from", + title = glue::glue("k-mer multiplicity across {groupname} assemblies") + ) + + theme(legend.position = "bottom") + + color_scale_plots + + fill_scale_plots } -plot_merqury_copynumber <- function(data, samplename) { - y_max <- data %>% - filter(sample == paste(samplename), Copies != "read-only") %$% - max(Count) - x_max <- data %>% - filter(sample == paste(samplename), Copies != "read-only") %$% - quantile(Count, .965) - data %>% - filter(sample == paste(samplename)) %>% - ggplot(aes(x = kmer_multiplicity, y = Count)) + - geom_line(aes(color = Copies)) + - # geom_area(aes(fill = Copies), alpha = 0.15, stat = "identity") + - facet_grid(~stage) + - coord_cartesian( - xlim = c(0, x_max * 1.1), - ylim = c(0, y_max * 1.05), - expand = TRUE, - default = FALSE, - clip = "on" - ) + - labs( - x = "kmer multiplicity", - y = "Count", - color = "k-mers copy number", - fill = "k-mers copy number", - title = glue::glue("k-mer copy number across {samplename} assemblies") - ) + - theme(legend.position = "bottom") + - color_scale_plots + - fill_scale_plots +plot_merqury_copynumber <- function(data, groupname) { + y_max <- data %>% + filter(group == paste(groupname), Copies != "read-only") %$% + max(Count) + x_max <- data %>% + filter(group == paste(groupname), Copies != "read-only") %$% + quantile(Count, .9) + data %>% + filter(group == paste(groupname)) %>% + ggplot(aes(x = kmer_multiplicity, y = Count)) + + geom_line(aes(color = Copies)) + + facet_grid(sample ~ stage) + + coord_cartesian( + xlim = c(0, x_max * 1.05), + ylim = c(0, y_max * 1.05), + expand = TRUE, + default = FALSE, + clip = "on" + ) + + labs( + x = "kmer multiplicity", + y = "Count", + color = "k-mers copy number", + fill = "k-mers copy number", + title = glue::glue("k-mer copy number across {groupname} assemblies") + ) + + theme(legend.position = "bottom") + + color_scale_plots + + fill_scale_plots } -plot_merqury_qv <- function(data, samplename) { - data %>% - filter(sample == paste(samplename)) %>% - ggplot(aes(x = stage, y = QV)) + - geom_line(aes(group = sample)) + - geom_point( - pch = 21, - color = "black", - fill = "white", - size = 5 - ) + - labs(y = "QV", x = "Stage", title = "QV across assembly stages") +plot_merqury_qv <- function(data, groupname) { + data %>% + filter(group == paste(groupname)) %>% + ggplot(aes(x = stage, y = QV, color = sample, fill = sample)) + + geom_line(aes(group = sample)) + + geom_point( + pch = 21, + color = "black", + size = 5 + ) + + labs(y = "QV", x = "Stage", title = "QV across assembly stages") + + fill_scale_plots + + color_scale_plots } diff --git a/assets/report/functions/read_busco.R b/assets/report/functions/read_busco.R index 11834f74..f0753ace 100644 --- a/assets/report/functions/read_busco.R +++ b/assets/report/functions/read_busco.R @@ -1,85 +1,83 @@ # read busco short summary tsv read_busco_report <- function(file) { - assembly <- read_lines(file, - skip = 2L, - n_max = 1L + assembly <- read_lines(file, + skip = 2L, + n_max = 1L) %>% + str_extract('(?<=input_seqs/).+?(?=\\.fa)') + bind_rows( + read_tsv( + file, + skip = 9L, + col_names = c("empty", "count", "variable"), + trim_ws = T, + n_max = 6L, + col_select = c(2, 3), + show_col_types = FALSE ) %>% - str_extract("(?<=input_seqs/).+?(?=\\.fa)") - bind_rows( - read_tsv( - file, - skip = 9L, - col_names = c("empty", "count", "variable"), - trim_ws = T, - n_max = 6L, - col_select = c(2, 3), - show_col_types = FALSE - ) %>% - magrittr::set_colnames(c("count", "stat")) %>% - mutate( - percent = 100 * count / count[stat == "Total BUSCO groups searched"], - percent = round(percent, 3) - ), - read_tsv( - file, - skip = 17L, - col_names = c("empty", "count", "variable"), - trim_ws = T, - n_max = 6L, - col_select = c(2, 3), - show_col_types = FALSE - ) %>% - magrittr::set_colnames(c("count", "stat")) %>% - dplyr::filter(!stat == "Percent gaps") %>% - mutate( - count = case_when( - str_detect(count, "MB") ~ count %>% str_extract("[0-9]+") %>% as.double() %>% - { - . * 1e6 - }, - str_detect(count, "KB") ~ count %>% str_extract("[0-9]+") %>% as.double() %>% - { - . * 1e3 - }, - TRUE ~ count %>% str_extract("[0-9]+") %>% as.double() - ), - percent = NA_real_ - ) - ) %>% - mutate(assembly = assembly) %>% - mutate( - BUSCO = stat %>% - str_extract("\\(.\\)") %>% - fct_relevel(c("(C)", "(S)", "(D)", "(F)", "(M)")), - BUSCO = case_when( - str_detect(BUSCO, "(C)") ~ "Complete", - str_detect(BUSCO, "(S)") ~ "Single Copy", - str_detect(BUSCO, "(D)") ~ "Duplicated", - str_detect(BUSCO, "(F)") ~ "Fragmented", - str_detect(BUSCO, "(M)") ~ "Missing", - ) - ) + magrittr::set_colnames(c("count", "stat")) %>% + mutate( + percent = 100 * count / count[stat == "Total BUSCO groups searched"], + percent = round(percent, 3) + ), + read_tsv( + file, + skip = 17L, + col_names = c("empty", "count", "variable"), + trim_ws = T, + n_max = 6L, + col_select = c(2, 3), + show_col_types = FALSE + ) %>% + magrittr::set_colnames(c("count", "stat")) %>% + dplyr::filter(!stat %in% c("Scaffold N50", "Contigs N50", "Percent gaps", "Number of scaffolds", "Percent gaps")) %>% + mutate( + count = case_when( + str_detect(count, "MB") ~ count %>% str_extract("[0-9]+") %>% as.double() %>% { + . * 1e6 + }, + str_detect(count, "KB") ~ count %>% str_extract("[0-9]+") %>% as.double() %>% { + . * 1e3 + }, + TRUE ~ count %>% str_extract("[0-9]+") %>% as.double() + ), + percent = NA_real_ + ) + ) %>% + mutate(assembly = assembly) %>% + mutate( + BUSCO = stat %>% + str_extract("\\(.\\)") %>% + fct_relevel(c("(C)", "(S)", "(D)", "(F)", "(M)")), + BUSCO = case_when( + str_detect(BUSCO, "(C)") ~ "Complete", + str_detect(BUSCO, "(S)") ~ "Single Copy", + str_detect(BUSCO, "(D)") ~ "Duplicated", + str_detect(BUSCO, "(F)") ~ "Fragmented", + str_detect(BUSCO, "(M)") ~ "Missing", + ) + ) } ## Read busco batch summary -read_busco_batch <- \(x) { - read_tsv(x, show_col_types = F) %>% - set_colnames(colnames(.) %>% str_replace_all(" ", "_")) %>% - mutate( - sample = str_extract(x %>% basename(), - ".+?(?=_[assembly|links|longstitch|ragtag|medaka|pilon])"), - stage = case_when( - str_detect(x, "_ragtag") ~ "RagTag", - str_detect(x, "_medaka") ~ "medaka", - str_detect(x, "_pilon") ~ "pilon", - str_detect(x, "_longstitch") ~ "longstitch", - str_detect(x, "_links") ~ "LINKS", - str_detect(x, "assembly") ~ "Assembly", - TRUE ~ "Unknown"), - Percent_gaps = Percent_gaps %>% str_remove("%") %>% as.numeric() - ) %>% - dplyr::select(-Dataset) %>% - pivot_longer(Complete:Number_of_scaffolds, names_to = "Var") -} +read_busco_batch <- \(x) {read_tsv(x, show_col_types = F) %>% + set_colnames(colnames(.) %>% str_replace_all(" ", "_")) %>% + mutate( + # Get sample name by matching filename to samples in groups, reverse sort by length to hopefully catch + # the correct name first in case there is partial overlap between sample names. + sample = basename(x) %>% + str_extract(groups$sample[rev(order(nchar(groups$sample)))] %>% paste(collapse = "|")),, + stage = case_when( + str_detect(x, "ragtag") ~ "RagTag", + str_detect(x, "medaka") ~ "medaka", + str_detect(x, "dorado") ~ "dorado", + str_detect(x, "pilon") ~ "pilon", + str_detect(x, "longstitch") ~ "longstitch", + str_detect(x, "links") ~ "LINKS", + str_detect(x, "yahs") ~ "HiC", + str_detect(x, "assembl[ey]") ~ "Assembly", + ), + Percent_gaps = Percent_gaps %>% str_remove("%") %>% as.numeric) %>% + dplyr::select(-Dataset) %>% + pivot_longer(Complete:Number_of_scaffolds, names_to = "Var")} diff --git a/assets/report/functions/read_fastplong.R b/assets/report/functions/read_fastplong.R new file mode 100644 index 00000000..3979a32b --- /dev/null +++ b/assets/report/functions/read_fastplong.R @@ -0,0 +1,33 @@ +library(magrittr) +library(tidyjson) +library(dplyr) +library(readr) + +# Read a nanoq json report +read_fastplong <- function(file) { + group_name <- file %>% str_extract('(?<=fastplong/).+?(?=_(ont|hifi)\\.fastplong\\.json)') + read_type <- file %>% + str_extract('(?<=fastplong/).*') %>% + str_extract('_(ont|hifi)\\.') %>% + str_remove_all("_|\\.") |> + str_replace("ont", "ONT") |> + str_replace("hifi", "PB HiFi") + read_json(file) %>% + enter_object("summary") %>% + tidyjson::spread_all() %>% + as_tibble() %>% + select(-document.id,-fastplong_version) %>% + pivot_longer(everything(), + names_to = c("stage", "stat"), + names_pattern = "(.*)\\.(.*)") %>% + mutate(group = group_name) %>% + mutate( + stat = stat %>% + str_replace_all("_", " ") %>% + str_to_title(), + stage = stage %>% + str_replace_all("_", " ") %>% + str_to_title(), + read_type = read_type + ) +} diff --git a/assets/report/functions/read_genomescope.R b/assets/report/functions/read_genomescope.R index 40745392..babcb9e4 100644 --- a/assets/report/functions/read_genomescope.R +++ b/assets/report/functions/read_genomescope.R @@ -1,43 +1,44 @@ +## WIP read_genomescope <- function(path) { - rbind( - read_table(path, skip = 3, n_max = 1) %>% dplyr::select(-X4), - read_table( - path, - skip = 5, - n_max = 3, - col_names = c( - "property", - "property2", - "property3", - "min", - "bp", - "max", - "bp2" - ) - ) %>% - dplyr::select(-starts_with("bp")) %>% - mutate(property = glue::glue("{property} {property2} {property3}")) %>% - dplyr::select(-property2, -property3), - read_table( - path, - skip = 8, - n_max = 1, - c("property", "property2", "min", "max") - ) %>% - mutate(property = glue::glue("{property} {property2}")) %>% - dplyr::select(-property2), - read_table( - path, - skip = 9, - n_max = 1, - c("property", "property2", "property3", "min", "max") - ) %>% - mutate(property = glue::glue("{property} {property2} {property3}")) %>% - dplyr::select(-property2, -property3) + rbind( + read_table(path, skip = 3, n_max = 1) %>% dplyr::select(-X4), + read_table( + path, + skip = 5, + n_max = 3, + col_names = c( + "property", + "property2", + "property3", + "min", + "bp", + "max", + "bp2" + ) ) %>% - mutate( - min = str_extract(min, "[0-9\\.]+") %>% as.numeric(), - max = str_extract(max, "[0-9\\.]+") %>% as.numeric() - ) %>% - mutate(sample = str_extract(path %>% basename(), ".+?(?=_genomescope)")) + dplyr::select(-starts_with("bp")) %>% + mutate(property = glue::glue("{property} {property2} {property3}")) %>% + dplyr::select(-property2, -property3), + read_table( + path, + skip = 8, + n_max = 1, + c("property", "property2", "min", "max") + ) %>% + mutate(property = glue::glue("{property} {property2}")) %>% + dplyr::select(-property2), + read_table( + path, + skip = 9, + n_max = 1, + c("property", "property2", "property3", "min", "max") + ) %>% + mutate(property = glue::glue("{property} {property2} {property3}")) %>% + dplyr::select(-property2, -property3) + ) %>% + mutate( + min = str_extract(min, "[0-9\\.]+") %>% as.numeric(), + max = str_extract(max, "[0-9\\.]+") %>% as.numeric() + ) %>% + mutate(sample = str_extract(path %>% basename(), ".+?(?=_genomescope)")) } diff --git a/assets/report/functions/read_nanoq.R b/assets/report/functions/read_nanoq.R index 3dcee647..e8e9dfd7 100644 --- a/assets/report/functions/read_nanoq.R +++ b/assets/report/functions/read_nanoq.R @@ -1,28 +1,29 @@ +library(magrittr) library(tidyjson) +library(dplyr) +library(readr) # Read a nanoq json report read_nanoq <- function(file) { - bind_rows( - read_json(file) %>% - tidyjson::spread_all() %>% - as_tibble() %>% - select(-document.id) %>% - pivot_longer(everything(), values_to = "val", names_to = "stat"), - read_json(file) %>% - enter_object(top_lengths) %>% - gather_array() %>% - unnest(cols = c(..JSON)) %>% - mutate(stat = "longest") %>% - dplyr::select(4, val = 3) - ) %>% - mutate( - sample = str_extract(file, "(?<=nanoq/).+?(?=_report)"), - stage = "Reads" - ) %>% - mutate( - stat = stat %>% - str_replace_all("_", " ") %>% - str_to_title() %>% - str_replace("Reads", "N Reads") - ) + bind_rows( + read_json(file) %>% + tidyjson::spread_all() %>% + as_tibble() %>% + select(-document.id) %>% + pivot_longer(everything(), values_to = "val", names_to = "stat"), + read_json(file) %>% + enter_object(top_lengths) %>% + gather_array() %>% + unnest(cols = c(..JSON)) %>% + mutate(stat = "longest") %>% + dplyr::select(4, val = 3) + ) %>% + mutate(sample = str_extract(file, '(?<=nanoq/).+?(?=_report)'), + stage = "Reads") %>% + mutate( + stat = stat %>% + str_replace_all("_", " ") %>% + str_to_title() %>% + str_replace("Reads", "N Reads") + ) } diff --git a/assets/report/functions/read_quast.R b/assets/report/functions/read_quast.R index a6cdc24e..13d87397 100644 --- a/assets/report/functions/read_quast.R +++ b/assets/report/functions/read_quast.R @@ -1,21 +1,21 @@ read_quast_report <- function(file) { - assembly <- read_tsv( - file, - skip = 0L, - col_names = c("stat", "value"), - trim_ws = T, - n_max = 1L, - show_col_types = FALSE - ) %$% - value - read_tsv( - file, - skip = 1L, - col_names = c("stat", "value"), - col_types = "cd", - trim_ws = T, - n_max = 36L, - show_col_types = FALSE - ) %>% - mutate(assembly = assembly) + assembly = read_tsv( + file, + skip = 0L, + col_names = c("stat", "value"), + trim_ws = T, + n_max = 1L, + show_col_types = FALSE + ) %$% + value + read_tsv( + file, + skip = 1L, + col_names = c("stat", "value"), + col_types = "cd", + trim_ws = T, + n_max = 36L, + show_col_types = FALSE + ) %>% + mutate(assembly = assembly) } diff --git a/assets/report/nf-core-logo-square.png b/assets/report/nf-core-logo-square.png new file mode 100644 index 00000000..17313637 Binary files /dev/null and b/assets/report/nf-core-logo-square.png differ diff --git a/assets/report/nf-core-logo.png b/assets/report/nf-core-logo.png new file mode 100644 index 00000000..91ddb58d Binary files /dev/null and b/assets/report/nf-core-logo.png differ diff --git a/assets/report/report.qmd b/assets/report/report.qmd index 16cb578c..5ee6fb59 100644 --- a/assets/report/report.qmd +++ b/assets/report/report.qmd @@ -1,19 +1,25 @@ --- title: "nf-core/genomeassembler report" -author: "" -format: dashboard -editor: source -nav-buttons: - - icon: github - - href: https://github.com/nf-core/genomeassembler +format: + html: + page-layout: full + toc: true + toc_float: true + toc-location: left + other-links: + - text: "Github repo" + icon: github + href: https://github.com/nf-core/genomeassembler + - text: "Website" + icon: small + href: https://nf-co.re/genomeassembler params: - nanoq: false + fastplong: false busco: false quast: false jellyfish: false merqury: false --- - ```{r load libraries and functions} #| message: false #| output: false @@ -23,545 +29,199 @@ params: library(tidyverse) library(magrittr) library(plotly) + +#library(gt) # Load functions -list.files("functions", full.names = T, pattern = ".R") %>% +list.files("functions", + full.names = T, + pattern = ".R") |> map(\(x) source(x)) # Set default ggplot theme theme_set(theme_bw(base_size = 14, - base_family = "Arial")) + base_family = gt::google_font("Maven Pro"))) theme_update(strip.background = element_blank(), - axis.text.x = element_text(angle = 70, hjust = 1)) + axis.text.x = element_text(angle = 70, hjust = 1)) ## Colors, these come from the khroma package ("muted") ### For <=9 stages: -color_scale_plots <- scale_color_manual(values = c("#CC6677", "#332288", "#DDCC77", "#117733", "#88CCEE", "#882255", "#44AA99", "#999933", "#AA4499"), na.value = "#DDDDDD") -fill_scale_plots <- scale_fill_manual(values = c("#CC6677", "#332288", "#DDCC77", "#117733", "#88CCEE", "#882255", "#44AA99", "#999933", "#AA4499"), na.value = "#DDDDDD") +colors_9 <- c( + "#CC6677", + "#332288", + "#DDCC77", + "#117733", + "#88CCEE", + "#882255", + "#44AA99", + "#999933", + "#AA4499" +) +color_scale_plots <- scale_color_manual(values = colors_9, na.value = "#DDDDDD") +fill_scale_plots <- scale_fill_manual(values = colors_9, na.value = "#DDDDDD") + + # Base directory containing reports -data_base = "data/" +data_base <- "data/" + +# groups +groups <- yaml::read_yaml("groups.yml") |> + map_dfr(\(row) + data.frame(sample = pluck(row, 1, "id"), group = pluck(row, 1, "group", .default = "null"))) |> + mutate(group = case_when(group %in% c("","null") ~ sample, TRUE ~ group)) ``` # About This report displays the main information gathered from various QC steps. -# nanoq {.tabset} +These may include: + + - Read Quality control: + - [fastplong](https://github.com/OpenGene/fastplong): QC, adaptor removal, barcode removal and trimming for long reads + - [fastp](https://github.com/OpenGene/fastp) QC, adaptor removal, barcode removal and trimming for short reads + - Assembly Quality control for each stage + - [QUAST (QUality ASsessment Tool)](https://github.com/ablab/quast): Computes various statistics of assemblies + - [BUSCO](https://busco.ezlab.org/): BUSCO provides a quantitative assessment of the completeness in terms of expected gene content of a genome assembly, transcriptome, or annotated gene set. + - [merqury](https://github.com/marbl/merqury): k-mer spectrum based analysis of genomes, completeness, quality and ploidy + - [genomescope](https://github.com/schatzlab/genomescope): genome analysis from unassembled reads + +# Reads + +## fastplong {.tabset} -::: {.content-visible unless-profile="nanoq"} -nanoq was not included in the pipeline run, no ONT reads were included. +::: {.content-visible unless-profile="fastplong"} +[fastplong](https://github.com/OpenGene/fastplong) was not included in the pipeline run. ::: -```{r nanoq read inputs} -#| eval: !expr params$nanoq +```{r fastplong read inputs} +#| eval: !expr params$fastplong #| include: false #| message: false #| output: false -# Parse nanoq reports into table -nanoq_reports <- list.files(paste0(data_base, "nanoq"), - pattern = "report.json", - full.names = T) %>% - map_dfr(\(x) read_nanoq(x)) +# Parse fastplong reports into table +# Note that for these reports the sample name is the group +fastplong_reports <- list.files(paste0(data_base, "fastplong"), + pattern = ".json", + full.names = T) |> + map_dfr(\(x) read_fastplong(x)) |> + left_join(groups, by = join_by(group)) ``` -```{r} -#| eval: !expr params$nanoq -#| include: false - -# For each sample, we create one plot chunk that will be saved into nanoq files -# This is an rmd chunk in plain text. - -dir.create("nanoq_files") -for (i in 1:length(unique(nanoq_reports$sample))) { -paste0('```{r}\n - #| title: "Nanoq read statistics" - p <- nanoq_reports %>% - filter(stat %in% c("Median Length", "Longest", "Median Quality","Bases")) %>% - filter(sample == "', unique(nanoq_reports$sample)[i], '") %>% - mutate(stat=fct_relevel(stat,c("Bases","Longest","Median Length","Median Quality"))) %>% - ggplot(aes(x = sample, y = val)) + - geom_line() + - geom_point(size = 5, pch=21, aes(fill=stage)) + - facet_wrap(~stat, scales = "free_y", ncol=2) + - fill_scale_plots + - scale_y_continuous(labels = function(x) format(x,scientific=-1,trim=T, digits = 3, drop0trailing=T), n.breaks = 4) + - theme(axis.title.x = element_blank(), - axis.title.y = element_blank(), - legend.position = "none", - legend.title = element_blank(), - panel.grid.minor = element_blank()) - ggplotly(p)\n```') %>% - write_lines(glue::glue("nanoq_files/_{ unique(nanoq_reports$sample)[i] }_nanoq.Rmd")) -} -``` +:::: {.content-visible when-profile="fastplong" } -::: {.content-visible when-profile="nanoq"} +[fastplong](https://github.com/OpenGene/fastplong) is a tool for trimming and quality control of long reads. If groups were provided in the sample sheet, the results are reported for each group, otherwise for each sample. -::: {.panel-tabset .flow} +::: {.panel-tabset} -```{r nanoq add subplots} -#| eval: !expr params$nanoq +```{r fastplong add subplots} +#| eval: !expr params$fastplong #| results: asis -# This loop creates one tab per sample. -## Each tab contains 3 valueboxes -## Below the valueboxes, the sample-specific plot code generated above is inserted - -for (i in 1:length(unique(nanoq_reports$sample))) { - cat(paste0('## ', unique(nanoq_reports$sample)[i], '\n\n'), - paste0('### { width = 30% }', '\n\n'), - paste0('::: {.valuebox icon="magic" color="primary" title="Total bases sequenced"}','\n'), - paste0(nanoq_reports %>% - filter(stat == "Bases") %>% - filter(sample == unique(nanoq_reports$sample)[i],) %$% - sum(val) %>% - format(scientific=-1,trim=T, digits = 3, drop0trailing=T),'\n'), - paste0(':::', '\n\n'), - paste0('::: {.valuebox icon="collection" color="secondary" title="Number of reads"}', '\n'), - paste0(nanoq_reports %>% - filter(stat == "N Reads") %>% - filter(sample == unique(nanoq_reports$sample)[i]) %$% - min(val) %>% - paste(" bases"), '\n'), - paste0(':::', '\n\n'), - paste0('::: {.valuebox icon="chevron-double-up" color="success" title="Longest read"}', '\n'), - paste0(nanoq_reports %>% - filter(stat == "Longest") %>% - filter(sample == unique(nanoq_reports$sample)[i]) %$% - max(val) %>% - paste0(" bases"),'\n'), - paste0(':::', '\n\n'), - paste0('### ', '\n\n'), - knitr::knit_child(glue::glue('nanoq_files/_{ unique(nanoq_reports$sample)[i] }_nanoq.Rmd'), - envir = globalenv(), - quiet = TRUE), - paste0('\n\n'), - sep = "" - ) -} +source("scripts/_fastplong_page.R") ``` +::: + +:::: + + ```{r} -#| eval: !expr params$nanoq +#| eval: !expr params$fastplong +#| include: false # Clean up the intermediate files -unlink("nanoq_files", recursive = T) +unlink("fastplong_files", recursive = T) ``` -::: - -::: +# Assemblies -# QUAST {.tabset} +## QUAST {.tabset} ::: {.content-visible unless-profile="quast"} QUAST was not included in the pipeline run. ::: -::: {.content-visible when-profile="quast"} -QUAST reports assembly statistics, taking into account the reference, if provided. +::: {.content-visible when-profile="quast" .panel-tabset .flow} -```{r message = F} -#| eval: !expr params$quast -# This chunk parses the quast reports from data/quast -quast_stats <- list.files(paste0(data_base, "quast"), - pattern = "report.tsv", - full.names = T) %>% - map_dfr(\(x) { - read_quast_report(x) %>% - mutate(sample = str_extract(x %>% basename(), - ".+?(?=_[assembly|links|longstitch|ragtag|medaka|pilon])"), - stage = case_when( - str_detect(x, "_ragtag") ~ "RagTag", - str_detect(x, "_medaka") ~ "medaka", - str_detect(x, "_pilon") ~ "pilon", - str_detect(x, "_longstitch") ~ "longstitch", - str_detect(x, "_links") ~ "LINKS", - str_detect(x, "assembly") ~ "Assembly", - TRUE ~ "Unknown") - ) - } - ) -``` +QUAST reports assembly statistics, taking into account the reference, if provided. -```{r quast write length plots} +```{r prepare quast, message = F} #| eval: !expr params$quast #| include: false -# This creates code that will generate the length plot based on the contents of the quast report. -dir.create("quast_files") -dir.create("quast_files/length") -for (i in 1:length(unique(quast_stats$sample))) { -paste0('```{r}\n - p <- quast_stats %>% - filter(sample == "', unique(quast_stats$sample)[i], '") %>% - filter(str_detect(stat, "[L].*[59]0")) %>% - mutate(stat = fct_relevel(stat, "L50","L90","LG50","LG90")) %>% - ggplot(aes(x=stat, y=value)) + - geom_point(aes(fill = stage), - size = 5, - pch = 21, - alpha = 0.8, - position = position_dodge(width = 0.4)) + - facet_wrap(~ sample, scales = "free_y") + - fill_scale_plots + - labs(title = "QUAST: L(G) 50 and 90") + - theme(panel.border = element_rect(fill = NA)) - ggplotly(p) \n```') %>% - write_lines(glue::glue("quast_files/length/_{ unique(quast_stats$sample)[i] }_quast.Rmd")) -} +#| warning: false +# This script prepares things required to render the page +source("scripts/_quast.R") ``` -```{r quast contig plots} -#| eval: !expr params$quast -#| include: false -# This creates code that will generate the contig plots based on the contents of the quast report. - -dir.create("quast_files/contigs") -for (i in 1:length(unique(quast_stats$sample))) { -paste0('```{r}\n - p <- quast_stats %>% - filter(sample == "', unique(quast_stats$sample)[i], '") %>% - filter(str_detect(stat, "# contigs \\\\(")) %>% - filter(!str_detect(stat, ">= 0")) %>% - mutate(stat = stat %>% str_remove_all("# contigs ") %>% str_remove_all("[()]") %>% fct_inorder()) %>% - ggplot(aes(x=stat, y=value)) + - geom_point(aes(fill = stage), - size = 5, - pch = 21, - alpha = 0.8, - position = position_dodge(width = 0.4)) + - facet_wrap(~ sample, scales = "free_y") + - fill_scale_plots + - theme(axis.title.x = element_blank(), - axis.title.y = element_blank()) + - labs(title = "QUAST: Number of contigs by size") -ggplotly(p) - p <- quast_stats %>% - filter(sample == "', unique(quast_stats$sample)[i], '") %>% - filter(str_detect(stat, "Total length")) %>% - filter(!str_detect(stat, ">= 0")) %>% - mutate(stat = stat %>% str_remove_all("Total length ") %>% str_remove_all("[()]") %>% fct_inorder()) %>% - ggplot(aes(x = stat, y = value)) + - geom_point( - aes(fill = stage), - size = 5, - pch = 21, - height = 0, - width = 0.2, - alpha = 0.8, - position = position_dodge(width = 0.4) - ) + - facet_wrap( ~ sample, scales = "free_y") + - fill_scale_plots + - theme(axis.title.x = element_blank(), - axis.title.y = element_blank()) + - labs(title = "QUAST: Aggregated length") + - scale_y_continuous( - labels = function(x) - format( - x, - scientific = -1, - trim = T, - digits = 3, - drop0trailing = T - ) - ) -ggplotly(p) - \n```') %>% - write_lines(glue::glue("quast_files/contigs/_{ unique(quast_stats$sample)[i] }_quast.Rmd")) -} -``` +:::: {.panel-tabset .flow} -::: {.panel-tabset .flow} -```{r quast add length subplots} +```{r quast page} #| eval: !expr params$quast #| results: asis -# This generates the tab-page for each sample -# Per sample there are 3 value boxes -# Below the value boxes there are two plots, one showing the length and one showing the contig statistics - -for (i in 1:length(unique(quast_stats$sample))) { - cat(paste0('## ', unique(quast_stats$sample)[i], '\n'), - paste0('### { width=30% }\n\n'), - paste0('::: {.valuebox icon="arrow-up-right-circle" color="primary" title="Total length"}\n'), - quast_stats %>% - filter(sample == unique(quast_stats$sample)[i]) %>% - filter(stat == "Total length (>= 0 bp)") %$% - max(value) %>% - format( - scientific = -1, - trim = T, - digits = 3, - drop0trailing = T - ) %>% - paste("bp"), - paste0('\n'), - paste0(':::'), - paste0('\n\n'), - paste0('::: {.valuebox icon="percent" color="success" title="GC Content"}\n'), - quast_stats %>% - filter(sample == unique(quast_stats$sample)[i]) %>% - filter(stat == "GC (%)") %$% - mean(value) %>% - round(2) %>% - paste(" %"), - paste0('\n'), - paste0(':::'), - paste0('\n\n'), - paste0('::: {.valuebox icon="emoji-heart-eyes" color="info" title="Lowest L90"}\n'), - quast_stats %>% - filter(sample == unique(quast_stats$sample)[i]) %>% - filter(stat == "L90") %>% - filter(value == min(value)) %>% - unique() %$% - glue::glue("{unique(value)}, at stage(s): {paste(stage, collapse = ', ')}"), - paste0('\n'), - paste0(':::'), - paste0('\n\n'), - paste0('### {.tabset}'), - paste0('\n\n'), - paste0('#### Tables \n\n'), - quast_stats %>% - filter(sample == unique(quast_stats$sample)[i]) %>% - dplyr::select(sample, stage, stat, value) %>% - pivot_wider(names_from = "stat", values_from = "value") %>% - #knitr::kable(format = 'html', caption = glue::glue('QUAST statistics')) - gt::gt() %>% - gt::cols_nanoplot(columns = starts_with("# contigs ("), - new_col_name = "Contigs_by_size", - new_col_label = gt::md("*# Contigs by size*")) %>% - gt::cols_nanoplot(columns = starts_with("Total length ("), - new_col_name = "Total_length", - new_col_label = gt::md("*Total length*")) %>% - gt::tab_footnote( - footnote = "Breaks are: contigs >= 0, 1kb, 5kb, 10kb, 25kb, 50kb", - locations = gt::cells_column_labels(columns = c(Contigs_by_size, Total_length))) %>% - gt::cols_align(align = "center", - columns = c(Contigs_by_size, Total_length)) %>% - gt::cols_move(Contigs_by_size, "Largest contig") %>% - gt::cols_move(Total_length, "Total length") %>% - gt::as_raw_html(), - paste0('\n\n'), - paste0('#### Plots \n\n'), - knitr::knit_child(glue::glue('quast_files/length/_{ unique(quast_stats$sample)[i] }_quast.Rmd'), - envir = globalenv(), - quiet = TRUE), - paste0('\n\n\n'), - knitr::knit_child(glue::glue('quast_files/contigs/_{ unique(quast_stats$sample)[i] }_quast.Rmd'), - envir = globalenv(), - quiet = TRUE), - paste0('\n\n\n'), - sep = "") -} +# This produces the contents of the page. +source("scripts/_quast_page.R") ``` ```{r unlink quast} #| eval: !expr params$quast +#| include: false # Remove temporary files, write out collected report (mainly for debugging) unlink("quast_files/contigs", recursive = T) unlink("quast_files/length", recursive = T) +unlink("quast_files/NL_plots", recursive = T) write_csv(quast_stats,"quast_files/reports.csv") ``` -::: + +:::: + ::: -# BUSCO +## BUSCO ::: {.content-visible unless-profile="busco"} + BUSCO was not included in the pipeline run. + ::: -```{r} +::: {.content-visible when-profile="busco"} + +BUSCO assess assembly quality based on the presence / absence of expected single-copy orthologs. + +```{r busco data} #| eval: !expr params$busco #| warning: false #| message: false #| echo: false # Parse the reports from busco -busco_reports <- list.files(paste0(data_base, "busco"), - full.names = T, - pattern = "batch_summary") %>% - map_dfr(\(x) read_busco_batch(x)) +source("scripts/_busco.R") ``` +:::: {.panel-tabset .flow} -```{r} -#| eval: !expr params$busco -#| include: false -# This creates code that will generate the plots based on BUSCO results - -dir.create("busco_files") -dir.create("busco_files/orthologs") -for (i in 1:length(unique(busco_reports$sample))) { -paste0('```{r}\n - p <- busco_reports %>% - filter(sample == "', unique(busco_reports$sample)[i], '") %>% - filter(Var %in% c("Complete","Single","Duplicated","Fragmented")) %>% - ggplot(aes(y = value, x = Var)) + - geom_point( - aes(fill = stage), - size = 6, - pch = 21, - height = 0, - alpha = 0.8, - position = position_dodge(width = 0.4) - ) + - facet_wrap( ~ sample, nrow = 3) + - fill_scale_plots + - labs( y = "% of Single Copy Orthologs", - title = "BUSCO: Conserved Orthologs") + - coord_cartesian(clip = "on") + - theme( - panel.border = element_rect(fill = NA), - legend.position = "bottom", - axis.title.y = element_text(angle = 90), - axis.title.x = element_blank() - ) - ggplotly(p) - \n```') %>% - write_lines(glue::glue("busco_files/orthologs/_{ unique(busco_reports$sample)[i] }_orthologs.Rmd")) -} -``` - -::: {.content-visible when-profile="busco"} -BUSCO assess assembly quality based on the presence / absence of expected single-copy orthologs. - -::: {.panel-tabset .flow} -```{r busco orthologs add subplots and valueboxes} +```{r busco page} #| eval: !expr params$busco #| results: asis # -# This generates the tab-page for each sample -# Per sample there are 3 value boxes -# Below the value boxes there are one plots, showing the BUSCO statistics - -for (i in 1:length(unique(busco_reports$sample))) { - cur_sample <- unique(busco_reports$sample)[i] - # The BUSCO valueboxes contain information on which stage of the assembly had the highest quality, this requires some variables. - completeness_val <- busco_reports %>% - filter(sample == cur_sample) %>% - filter(Var == "Complete") %>% - filter(value == max(value)) %$% - value %>% - unique() - completeness_stage <- busco_reports %>% - filter(sample == cur_sample) %>% - filter(Var == "Complete") %>% - filter(value == max(value)) %$% - stage - frag_val <- busco_reports %>% - filter(sample == cur_sample) %>% - filter(Var == "Fragmented") %>% - filter(value == max(value)) %$% - value %>% - unique() - frag_stage <- busco_reports %>% - filter(sample == cur_sample) %>% - filter(Var == "Fragmented") %>% - filter(value == max(value)) %$% - stage - missing_val <- busco_reports %>% - filter(sample == cur_sample) %>% - filter(Var == "Missing") %>% - filter(value == max(value)) %$% - value %>% - unique() - missing_stage <- busco_reports %>% - filter(sample == cur_sample) %>% - filter(Var == "Missing") %>% - filter(value == max(value)) %$% - stage - cat(paste('## ', unique(busco_reports$sample)[i]), - paste0('\n\n'), - paste0('### {.fill} \n\n'), - paste0('::: {.valuebox icon="percent" color="success" title="Max. BUSCO Completeness" }\n'), - paste0('\n'), - glue::glue("{unique(completeness_val)}%,\nat stage(s): {paste(unique(completeness_stage), collapse = ', ')}"), - paste0('\n'), - paste0(':::'), - paste0('\n'), - paste0('::: {.valuebox icon="heartbreak" color="warning" title="Max. BUSCO Fragmented"}\n'), - glue::glue("{unique(frag_val)}%,\nat stage(s): {paste(unique(frag_stage), collapse = ', ')}"), - paste0('\n'), - paste0('\n'), - paste0(':::'), - paste0('\n'), - paste0('::: {.valuebox icon="person-walking" color="danger" title="Max. BUSCOs Missing"}\n'), - glue::glue("{unique(missing_val)}%, at stage(s): {paste(unique(missing_stage), collapse = ', ')}"), - paste0('\n'), - paste0(':::'), - paste0('\n\n'), - paste('###'), - paste0('\n\n'), - busco_reports %>% - filter(sample == cur_sample) %>% - dplyr::select(sample, stage, Var, value) %>% - mutate(Var = str_replace_all(Var, "_", " ")) %>% - pivot_wider(names_from = "Var", values_from = "value") %>% - gt::gt() %>% - gt::as_raw_html(), - paste0('\n\n'), - knitr::knit_child(glue::glue('busco_files/orthologs/_{ unique(busco_reports$sample)[i] }_orthologs.Rmd'), - envir = globalenv(), - quiet = TRUE), - paste0('\n\n\n'), - sep = "") -} +# This generates the tab-page for each group + +source("scripts/_busco_page.R") ``` -::: + +:::: + ::: -```{r} +```{r include = F} #| eval: !expr params$busco # Delete temporary files unlink("busco_files/orthologs", recursive = T) -``` -```{r} #| eval: !expr params$busco # Export large report table, mainly for debugging write_csv(busco_reports,"busco_files/reports.csv") ``` -# genomescope - -::: {.content-visible unless-profile="jellyfish"} -jellyfish / genomescope was not included in the pipeline run. -::: - -```{r} -#| eval: !expr params$jellyfish -#| message: false -#| echo: false -#| output: false -#| warning: false -# Parse the genomescope statistics -genomescope_out <- list.files(paste0(data_base, "genomescope"), full.names = T, pattern = "genomescope.txt") %>% - map_dfr(\(x) read_genomescope(x)) -``` - -::: {.content-visible when-profile="jellyfish"} -Jellyfish and genomescope are used to infer genome size from the initial ONT reads. - -```{r} -#| eval: !expr params$jellyfish -#| output: asis -# Since genomescope produces plots, I am simply including those here instead of recreating them, the proper QC for kmers comes with merqury. -img_files <- list.files(paste0(data_base,"genomescope"), full.names = T, pattern = "plot.png") -dir.create("genomescope_files") -for (file in img_files) { - file.copy(from = file, - to = paste0("genomescope_files/", file %>% basename(), sep ="")) - -} -img_files <- list.files("genomescope_files", full.names = T, pattern = "plot.png") - -cat(":::{.panel-tabset}\n", - glue::glue('## <% basename(), ".+?(?=_plot.png)")>>\n ![](<>){width=50% fig-align="centre"}\n\n\n', .open = "<<", .close = ">>"), - ":::\n", - sep = "" -) -``` -::: - -# merqury +## merqury ::: {.content-visible unless-profile="merqury"} meryl and merqury were not included in the pipeline run. @@ -573,267 +233,26 @@ meryl and merqury were not included in the pipeline run. #| message: false #| output: false -# Here the merqury stats are parsed and the assembly stage is extracted -merqury_stats <- list.files(paste0(data_base, "merqury"), full.names = T, pattern = "stats") %>% - lapply(\(x) { - read_tsv(x, col_names = c("sample_stage","all","assembly","total","percent"), show_col_types = FALSE) %>% - mutate(sample = str_extract(x %>% basename(), - ".+?(?=_[assembly|links|longstitch|ragtag|medaka|pilon])"), - stage = case_when( - str_detect(x, "_ragtag") ~ "RagTag", - str_detect(x, "_medaka") ~ "medaka", - str_detect(x, "_pilon") ~ "pilon", - str_detect(x, "_longstitch") ~ "longstitch", - str_detect(x, "_links") ~ "LINKS", - str_detect(x, "assembly") ~ "Assembly", - TRUE ~ "Unknown" - ) - ) - } - ) %>% - bind_rows() -# This parses the assembly stats -merqury_asm_hists <- list.files(paste0(data_base, "/merqury"), full.names = T, pattern = "asm.hist") %>% - lapply(\(x) { - read_tsv(x, col_names = T, show_col_types = FALSE) %>% - mutate( - sample = str_extract(x %>% basename(), - ".+?(?=_[assembly|links|longstitch|ragtag|medaka|pilon])"), - stage = case_when( - str_detect(x, "_ragtag") ~ "RagTag", - str_detect(x, "_medaka") ~ "medaka", - str_detect(x, "_pilon") ~ "pilon", - str_detect(x, "_longstitch") ~ "longstitch", - str_detect(x, "_links") ~ "LINKS", - str_detect(x, "assembly") ~ "Assembly", - TRUE ~ "Unknown"), - Assembly = as.factor(Assembly), - stage = as.factor(stage), - sample = as.factor(sample), - kmer_multiplicity = as.integer(kmer_multiplicity), - Count = as.integer(Count) - ) - } - ) %>% - bind_rows() -# This parses the copy number file -merqury_cn_hists <- list.files(paste0(data_base, "merqury"), full.names = T, pattern = "cn.hist") %>% - lapply(\(x) { - read_tsv(x, col_names = T, show_col_types = FALSE) %>% - mutate( - sample = str_extract(x %>% basename(), - ".+?(?=_[assembly|links|longstitch|ragtag|medaka|pilon])"), - stage = case_when( - str_detect(x, "_ragtag") ~ "RagTag", - str_detect(x, "_medaka") ~ "medaka", - str_detect(x, "_pilon") ~ "pilon", - str_detect(x, "_longstitch") ~ "longstitch", - str_detect(x, "_links") ~ "LINKS", - str_detect(x, "assembly") ~ "Assembly", - TRUE ~ "Unknown"), - Copies = as.factor(Copies), - stage = as.factor(stage), - sample = as.factor(sample), - kmer_multiplicity = as.integer(kmer_multiplicity), - Count = as.integer(Count) - ) - } - ) %>% - bind_rows() -# This parses the qv file -merqury_qv <- - list.files(paste0(data_base, "merqury"), full.names = T, pattern = ".qv") %>% - lapply(\(x) { - read_tsv(x, - col_names = c("Assembly", "kmers_assembly_unique", "kmers_assembly_shared", "QV", "error_rate"), - show_col_types = FALSE) %>% - mutate( - sample = str_extract(x %>% basename(), - ".+?(?=_[assembly|links|longstitch|ragtag|medaka|pilon])"), - stage = case_when( - str_detect(x, "_ragtag") ~ "RagTag", - str_detect(x, "_medaka") ~ "medaka", - str_detect(x, "_pilon") ~ "pilon", - str_detect(x, "_longstitch") ~ "longstitch", - str_detect(x, "_links") ~ "LINKS", - str_detect(x, "assembly") ~ "Assembly", - TRUE ~ "Unknown"), - stage = as.factor(stage), - sample = as.factor(sample), - kmers_assembly_shared = as.integer(kmers_assembly_shared), - kmers_assembly_unique = as.integer(kmers_assembly_unique), - QV = as.double(QV), - error_rate = as.double(error_rate) - ) - } - ) %>% - bind_rows() -dir.create("merqury_files") -``` - -```{r merqury qv} -#| eval: !expr params$merqury -#| include: false -# This generates QV-plots from merqury; the plot function is stuffed into plot_merqury -dir.create("merqury_files/qv_plots/") -for (i in 1:length(unique(merqury_qv$sample))) { - cur_sample <- unique(merqury_qv$sample)[i] - paste0('```{r} -p <- merqury_qv %>% - plot_merqury_qv("', cur_sample,'") -ggplotly(p)\n```') %>% - write_lines(glue::glue("merqury_files/qv_plots/_{ cur_sample }_qv_plt.Rmd")) -} -``` - -```{r merqury completeness} -#| eval: !expr params$merqury -#| include: false -# This generates stat-plots from merqury; the plot function is stuffed into plot_merqury - -dir.create("merqury_files/stat_plots/") -for (i in 1:length(unique(merqury_stats$sample))) { - cur_sample <- unique(merqury_stats$sample)[i] - paste0('```{r} -p <- merqury_stats %>% - plot_merqury_stats("', cur_sample,'") -ggplotly(p)\n```') %>% - write_lines(glue::glue("merqury_files/stat_plots/_{ cur_sample }_completeness_plt.Rmd")) -} -``` - -```{r merqury asm} -#| eval: !expr params$merqury -#| include: false -# This generates assembly plots from merqury; the plot function is stuffed into plot_merqury - -dir.create("merqury_files/asm_plots/") -for (i in 1:length(unique(merqury_asm_hists$sample))) { - cur_sample <- unique(merqury_asm_hists$sample)[i] - paste0('```{r} -p <- merqury_asm_hists %>% - plot_merqury_multiplicity("', cur_sample,'") -ggplotly(p)\n```') %>% - write_lines(glue::glue("merqury_files/asm_plots/_{ cur_sample }_asm_plt.Rmd")) -} -``` - -```{r merqury cn} -#| eval: !expr params$merqury -#| include: false -# This generates copy-number from merqury; the plot function is stuffed into plot_merqury - -dir.create("merqury_files/cn_plots/") -for (i in 1:length(unique(merqury_cn_hists$sample))) { - cur_sample <- unique(merqury_cn_hists$sample)[i] - paste0('```{r} -p <- merqury_cn_hists %>% - plot_merqury_copynumber("', cur_sample,'") -ggplotly(p)\n```') %>% - write_lines(glue::glue("merqury_files/cn_plots/_{ cur_sample }_cn_plt.Rmd")) -} +source("scripts/_merqury.R") ``` ::: {.content-visible when-profile="merqury"} + merqury compares k-mer spectra between assemblies and short read libraries to assess assembly quality and completeness. -::: {.panel-tabset .flow} +:::: {.panel-tabset .flow} + ```{r merqury add plots and valueboxes} #| eval: !expr params$merqury #| results: asis -# This generates the tab-page for each sample -# Per sample there are 3 value boxes -# Below the value boxes there is a tabset of plots, each tab contains one of the plot-types produced above. -# Those are: Completeness, k-mer specatr, QV and CN - -for (i in 1:length(unique(merqury_stats$sample))) { - cur_sample <- unique(merqury_stats$sample)[i] - highest_val <- merqury_stats %>% - filter(sample == cur_sample) %>% - filter(percent == max(percent)) %$% - percent %>% - unique() - highest_stage <- merqury_stats %>% - filter(sample == cur_sample) %>% - filter(percent == highest_val) %$% - stage %>% - unique() - lowest_val <- merqury_stats %>% - filter(sample == cur_sample) %>% - filter(percent == min(percent)) %$% - percent %>% - unique() - lowest_stage <- merqury_stats %>% - filter(sample == cur_sample) %>% - filter(percent == lowest_val) %$% - stage %>% - unique() - highest_qv <- merqury_qv %>% - filter(sample == cur_sample) %>% - filter(QV == max(QV)) %$% - QV %>% - unique() - highest_qv_stage <- merqury_qv %>% - filter(sample == cur_sample) %>% - filter(QV == max(QV)) %$% - stage %>% - unique() - - cat(paste('## ', cur_sample), - paste0('\n\n'), - paste0('### Valueboxes'), - paste0('\n\n'), - paste0('::: {.valuebox icon="exclude" color="primary" title="Merqury QV" }\n'), - glue::glue("QV: {unique(highest_qv) %>% round(2)}, at stage(s): {paste(highest_qv_stage, collapse = ', ')}"), - paste0('\n'), - paste0(':::'), - paste0('\n\n'), - paste0('::: {.valuebox icon="percent" color="success" title="Highest k-mer completeness" }\n'), - glue::glue("{unique(highest_val) %>% round(2)}%, at stage(s): {paste(highest_stage, collapse = ', ')}"), - paste0('\n'), - paste0(':::'), - paste0('\n\n'), - paste0('::: {.valuebox icon="heartbreak" color="warning" title="Lowest k-mer completeness" }\n'), - glue::glue("{unique(lowest_val) %>% round(2)}%, at stage(s): {paste(lowest_stage, collapse = ', ')}"), - paste0('\n'), - paste0(':::'), - paste0('\n\n'), - paste0('\n\n'), - paste0('### Plots { .tabset }'), - paste0('\n\n'), - paste0('#### Completeness \n'), - paste0('\n'), - knitr::knit_child(glue::glue('merqury_files/stat_plots/_{ cur_sample }_completeness_plt.Rmd'), - envir = globalenv(), - quiet = TRUE), - paste0('\n'), - paste0('#### QV \n'), - paste0('\n'), - paste0('QV is defined as:\n', expression(10*-log10(error_rate))), - paste0('\n'), - knitr::knit_child(glue::glue('merqury_files/qv_plots/_{ cur_sample }_qv_plt.Rmd'), - envir = globalenv(), - quiet = TRUE), - paste0('\n'), - paste0('#### Spectra \n'), - paste0('\n'), - knitr::knit_child(glue::glue('merqury_files/asm_plots/_{ cur_sample }_asm_plt.Rmd'), - envir = globalenv(), - quiet = TRUE), - paste0('\n'), - paste0('#### Copy Number \n'), - paste0('\n'), - knitr::knit_child(glue::glue('merqury_files/cn_plots/_{ cur_sample }_cn_plt.Rmd'), - envir = globalenv(), - quiet = TRUE), - paste0('\n\n\n'), - sep = "") -} +source("scripts/_merqury_page.R") ``` -::: + +:::: + ::: -```{r} +```{r include = F} #| eval: !expr params$merqury # Delete files. unlink("merqury_files/cn_plots") @@ -841,22 +260,64 @@ unlink("merqury_files/asm_plots") unlink("merqury_files") ``` +## genomescope + +::: {.content-visible unless-profile="jellyfish"} +jellyfish / genomescope was not included in the pipeline run. +::: + +```{r} +#| eval: !expr params$jellyfish +#| message: false +#| echo: false +#| output: false +#| warning: false +# Parse the genomescope statistics +genomescope_out <- list.files(paste0(data_base, "genomescope"), full.names = T, pattern = "genomescope.txt") |> + map_dfr(\(x) read_genomescope(x)) |> + left_join(groups, by = join_by(sample)) +``` + +::: {.content-visible when-profile="jellyfish"} + +Below are the genomescope estimates based on the provided QC reads: + +```{r echo = F} +#| eval: !expr params$jellyfish +#| output: asis +source("scripts/_genomescope_page.R") +``` + +::: + + # Software versions The pipeline was run using the following software versions: ```{r} +#| message: false +#| echo: false +#| output: asis versions <- yaml::read_yaml("software_versions.yml") lapply(1:length(versions), \(process) { - proc = versions[[process]] - proc_name = names(versions[process]) - tools <- lapply(1:length(proc), \(tool) { - tool_name = proc[tool] %>% names - tool_version = proc[[tool]] %>% as.character() - return(tibble(Process = proc_name,Tool = tool_name, Version = tool_version)) - }) %>% + proc = versions[[process]] + proc_name = names(versions[process]) + tools <- lapply(1:length(proc), \(tool) { + tool_name = proc[tool] |> names() + tool_version = proc[[tool]] |> as.character() + return(tibble(Process = proc_name,Tool = tool_name, Version = tool_version)) + }) |> bind_rows() - }) %>% -bind_rows() %>% -knitr::kable() +}) |> + bind_rows() |> + gt::gt() |> + gt::fmt_auto() |> + gt::opt_stylize(color = "gray") |> + gt::opt_table_font( + font = list( + gt::google_font(name = "Maven Pro"), + "rounded-sans" + )) |> + gt::as_raw_html() ``` diff --git a/assets/report/scripts/_busco.R b/assets/report/scripts/_busco.R new file mode 100644 index 00000000..09fb97ce --- /dev/null +++ b/assets/report/scripts/_busco.R @@ -0,0 +1,43 @@ +# Parse BUSCO reports + +busco_reports <- list.files(paste0(data_base, "busco"), + full.names = T, + pattern = "batch_summary") |> + map_dfr(\(x) read_busco_batch(x)) |> + left_join(groups, by = join_by(sample)) + +dir.create("busco_files") +dir.create("busco_files/orthologs") + +# Create BUSCO plot +for (i in 1:length(unique(busco_reports$group))) { + cur_group <- unique(busco_reports$group)[i] + group_size <- busco_reports |> filter(group == cur_group) |> _$sample |> unique() |> length() + plt_height <- case_when(group_size < 5 ~ 7, TRUE ~ group_size+3) + paste0('```{r echo = F, fig.height = ',plt_height,'} + p <- busco_reports |> + filter(group == "', unique(busco_reports$group)[i], '") |> + filter(Var %in% c("Complete","Single","Duplicated","Fragmented")) |> + ggplot(aes(y = value, x = Var)) + + geom_point( + aes(fill = stage), + size = 6, + pch = 21, + alpha = 0.8, + position = position_dodge(width = 0.4) + ) + + facet_wrap( ~ sample, nrow = 3) + + fill_scale_plots + + labs( y = "% of Single Copy Orthologs", + title = "BUSCO: Conserved Orthologs") + + coord_cartesian(clip = "on") + + theme( + panel.border = element_rect(fill = NA), + legend.position = "bottom", + axis.title.y = element_text(angle = 90), + axis.title.x = element_blank() + ) + ggplotly(p) + \n```') |> + write_lines(glue::glue("busco_files/orthologs/_{ unique(busco_reports$group)[i] }_orthologs.Rmd")) +} diff --git a/assets/report/scripts/_busco_page.R b/assets/report/scripts/_busco_page.R new file mode 100644 index 00000000..0cd134d2 --- /dev/null +++ b/assets/report/scripts/_busco_page.R @@ -0,0 +1,33 @@ +# This generates a tab-page for each sample + +for (i in 1:length(unique(busco_reports$group))) { + cur_group <- unique(busco_reports$group)[i] + cat( + paste0('### ', cur_group, '\n\n'), + paste0('::: {.panel-tabset} \n\n'), + paste0('#### Tabular \n\n'), + busco_reports |> + filter(group == cur_group) |> + dplyr::select(sample, stage, Var, value) |> + mutate(Var = str_replace_all(Var, "_", " ") |> str_replace_all("percent", "(%)")) |> + pivot_wider(names_from = "Var", values_from = "value", id_cols = c(sample,stage)) |> + dplyr::arrange(factor(stage, levels = c("Assembly","medaka", "pilon", "dorado","hic","links","longstitch","ragtag")), sample) |> + gt::gt() |> + gt::fmt_auto() |> + gt::opt_stylize(color = "gray") |> + gt::opt_table_font( + font = list( + gt::google_font(name = "Maven Pro"), + "rounded-sans" + )) |> + gt::as_raw_html(), + paste0('\n\n'), + paste('#### Plot'), + paste0('\n\n'), + knitr::knit_child(glue::glue('busco_files/orthologs/_{ unique(busco_reports$group)[i] }_orthologs.Rmd'), + envir = globalenv(), + quiet = TRUE), + paste0('\n\n\n'), + paste0(':::\n\n'), + sep = "") +} diff --git a/assets/report/scripts/_fastplong_page.R b/assets/report/scripts/_fastplong_page.R new file mode 100644 index 00000000..297bdba0 --- /dev/null +++ b/assets/report/scripts/_fastplong_page.R @@ -0,0 +1,41 @@ +# This loop creates one tab per group +for (i in 1:length(unique(fastplong_reports$group))) { + cat(paste0('### ', unique(fastplong_reports$group)[i] , '\n\n'), + paste0('\n\n'), + paste0('Read filtering and QC results for ', unique(fastplong_reports$group)[i]), + paste0('\n\n'), + fastplong_reports |> + filter(group == unique(fastplong_reports$group)[i]) |> + dplyr::select(-sample) |> + unique() |> + pivot_wider(id_cols = c("stat","group","read_type"), names_from = stage, values_from = value) |> + mutate(Filtered = `Before Filtering` - `After Filtering` |> round(), + Filtered = case_when(!str_detect(stat, "Rate|Length|Content") ~ Filtered, + TRUE ~ NA_real_), + Filtered_Perc = Filtered / `Before Filtering`) |> + dplyr::select(-group) |> + dplyr::arrange( + stat |> fct_relevel( + "Total Reads", + "Total Bases", + "Read Mean Length", + "Q30 Rate", + "Q20 Rate", + "Q30 Bases", + "Q20 Bases" + ), + read_type + ) |> + gt::gt() |> + gt::cols_label(stat = "", read_type = "Read Type", Filtered_Perc = "% filtered") |> + gt::fmt_auto() |> + gt::fmt_percent(Filtered_Perc) |> + gt::tab_footnote( + footnote = "Due to read splitting it is possible that the number of reads after filtering is larger than before.", + locations = gt::cells_column_labels(columns = c(Filtered))) |> + gt::opt_stylize(color = "gray") |> + gt::as_raw_html(), + paste0('\n\n'), + sep = "" + ) +} diff --git a/assets/report/scripts/_genomescope_page.R b/assets/report/scripts/_genomescope_page.R new file mode 100644 index 00000000..73bb61b6 --- /dev/null +++ b/assets/report/scripts/_genomescope_page.R @@ -0,0 +1,21 @@ +# Since genomescope produces plots, I am simply including those here instead of recreating them, the proper QC for kmers comes with merqury. +img_files <- list.files(paste0(data_base,"genomescope"), full.names = T, pattern = "plot.png") +dir.create("genomescope_files") +for (file in img_files) { + file.copy(from = file, + to = paste0("genomescope_files/", file |> basename(), sep ="")) + +} + +img_files <- data.frame(file = list.files("genomescope_files/", full.names = T, pattern = "plot.png")) |> + mutate(group = str_extract(file |> basename(), ".+?(?=_plot.png)")) + + +cat(":::{.panel-tabset}\n\n") +for(grp in unique(img_files$group)) { + cat(glue::glue('## {grp}\n\n\n')) + cat(glue::glue('![](< filter(group == grp) %$% file>>){fig-align="centre"}\n\n\n', + .open = "<<", + .close = ">>")) +} +cat(":::\n") diff --git a/assets/report/scripts/_merqury.R b/assets/report/scripts/_merqury.R new file mode 100644 index 00000000..62138dcc --- /dev/null +++ b/assets/report/scripts/_merqury.R @@ -0,0 +1,154 @@ +# Here the merqury stats are parsed and the assembly stage is extracted +merqury_stats <- list.files(paste0(data_base, "merqury"), full.names = T, pattern = "stats") |> + lapply(\(x) { + read_tsv(x, col_names = c("sample_stage","all","assembly","total","percent"), show_col_types = FALSE) |> + mutate( # Get sample name by matching filename to samples in groups, reverse sort by length to hopefully catch + # the correct name first in case there is partial overlap between sample names. + sample = basename(x) |> + str_extract(groups$sample[rev(order(nchar(groups$sample)))] |> paste(collapse = "|")), + stage = case_when( + str_detect(x, "_ragtag") ~ "RagTag", + str_detect(x, "_medaka") ~ "medaka", + str_detect(x, "_dorado") ~ "dorado", + str_detect(x, "_pilon") ~ "pilon", + str_detect(x, "_longstitch") ~ "longstitch", + str_detect(x, "_links") ~ "LINKS", + str_detect(x, "_yahs") ~ "HiC", + str_detect(x, "assembl[ey]") ~ "Assembly", + TRUE ~ "Unknown")) }) |> + bind_rows() |> + left_join(groups, by = join_by(sample)) + +# This parses the assembly stats +merqury_asm_hists <- list.files(paste0(data_base, "/merqury"), full.names = T, pattern = "asm.hist") |> + lapply(\(x) { + read_tsv(x, col_names = T, show_col_types = FALSE) |> + mutate( + sample = str_extract(x |> basename(), + groups$sample[rev(order(nchar(groups$sample)))] |> paste(collapse = "|")), + stage = case_when( + str_detect(x, "_ragtag") ~ "RagTag", + str_detect(x, "_medaka") ~ "medaka", + str_detect(x, "_dorado") ~ "dorado", + str_detect(x, "_pilon") ~ "pilon", + str_detect(x, "_longstitch") ~ "longstitch", + str_detect(x, "_links") ~ "LINKS", + str_detect(x, "assembl[ey]") ~ "Assembly", + TRUE ~ "Unknown"), + Assembly = as.factor(Assembly), + stage = as.factor(stage), + sample = as.factor(sample), + kmer_multiplicity = as.integer(kmer_multiplicity), + Count = as.integer(Count)) + }) |> + bind_rows() |> + left_join(groups, by = join_by(sample)) + +# This parses the copy number file +merqury_cn_hists <- list.files(paste0(data_base, "merqury"), full.names = T, pattern = "cn.hist") |> + lapply(\(x) { + read_tsv(x, col_names = T, show_col_types = FALSE) |> + mutate( + sample = str_extract(x |> basename(), + groups$sample[rev(order(nchar(groups$sample)))] |> paste(collapse = "|")), + stage = case_when( + str_detect(x, "_ragtag") ~ "RagTag", + str_detect(x, "_medaka") ~ "medaka", + str_detect(x, "_dorado") ~ "dorado", + str_detect(x, "_pilon") ~ "pilon", + str_detect(x, "_longstitch") ~ "longstitch", + str_detect(x, "_links") ~ "LINKS", + str_detect(x, "assembl[ey]") ~ "Assembly", + TRUE ~ "Unknown"), + Copies = as.factor(Copies), + stage = as.factor(stage), + sample = as.factor(sample), + kmer_multiplicity = as.integer(kmer_multiplicity), + Count = as.integer(Count)) + }) |> + bind_rows() |> + left_join(groups, by = join_by(sample)) + +# This parses the qv file +merqury_qv <- list.files(paste0(data_base, "merqury"), full.names = T, pattern = ".qv") |> + lapply(\(x) { + read_tsv(x, + col_names = c("Assembly", "kmers_assembly_unique", "kmers_assembly_shared", "QV", "error_rate"), + show_col_types = FALSE) |> + mutate( + sample = str_extract(x |> basename(), + groups$sample[rev(order(nchar(groups$sample)))] |> paste(collapse = "|")), + stage = case_when( + str_detect(x, "_ragtag") ~ "RagTag", + str_detect(x, "_medaka") ~ "medaka", + str_detect(x, "_dorado") ~ "dorado", + str_detect(x, "_pilon") ~ "pilon", + str_detect(x, "_longstitch") ~ "longstitch", + str_detect(x, "_links") ~ "LINKS", + str_detect(x, "assembl[ey]") ~ "Assembly", + TRUE ~ "Unknown"), + stage = as.factor(stage), + sample = as.factor(sample), + kmers_assembly_shared = as.integer(kmers_assembly_shared), + kmers_assembly_unique = as.integer(kmers_assembly_unique), + QV = as.double(QV), + error_rate = as.double(error_rate)) + } + ) |> + bind_rows() |> + left_join(groups, by = join_by(sample)) +dir.create("merqury_files") + +# This generates QV-plots from merqury; the plot function is stuffed into plot_merqury +dir.create("merqury_files/qv_plots/") +for (i in 1:length(unique(merqury_qv$group))) { + cur_group <- unique(merqury_qv$group)[i] + group_size <- merqury_qv |> filter(group == cur_group) |> _$sample |> unique() |> length() + plt_height <- case_when(group_size < 5 ~ 7, TRUE ~ group_size+3) + cur_group <- unique(merqury_qv$group)[i] + paste0('```{r echo = F, fig.height = ',plt_height,'} +p <- merqury_qv |> + plot_merqury_qv("', cur_group,'") +ggplotly(p)\n```') |> + write_lines(glue::glue("merqury_files/qv_plots/_{ cur_group }_qv_plt.Rmd")) +} +# This generates stat-plots from merqury; the plot function is stuffed into plot_merqury + +dir.create("merqury_files/stat_plots/") +for (i in 1:length(unique(merqury_stats$group))) { + cur_group <- unique(merqury_stats$group)[i] + group_size <- merqury_stats |> filter(group == cur_group) |> _$sample |> unique() |> length() + plt_height <- case_when(group_size < 5 ~ 7, TRUE ~ group_size+3) + cur_group <- unique(merqury_stats$group)[i] + paste0('```{r echo = F, fig.height = ',plt_height,'} +p <- merqury_stats |> + plot_merqury_stats("', cur_group,'") +ggplotly(p)\n```') |> + write_lines(glue::glue("merqury_files/stat_plots/_{ cur_group }_completeness_plt.Rmd")) +} + +# This generates assembly plots from merqury; the plot function is stuffed into plot_merqury + +dir.create("merqury_files/asm_plots/") +for (i in 1:length(unique(merqury_asm_hists$group))) { + cur_group <- unique(merqury_asm_hists$group)[i] + group_size <- merqury_asm_hists |> filter(group == cur_group) |> _$sample |> unique() |> length() + plt_height <- case_when(group_size < 5 ~ 7, TRUE ~ group_size+3) + paste0('```{r echo = F, fig.height = ',plt_height,'} +p <- merqury_asm_hists |> + plot_merqury_multiplicity("', cur_group,'") +print(p)\n```') |> + write_lines(glue::glue("merqury_files/asm_plots/_{ cur_group }_asm_plt.Rmd")) +} + +dir.create("merqury_files/cn_plots/") +for (i in 1:length(unique(merqury_cn_hists$group))) { + cur_group <- unique(merqury_cn_hists$group)[i] + group_size <- merqury_cn_hists |> filter(group == cur_group) |> _$sample |> unique() |> length() + plt_height <- case_when(group_size < 5 ~ 7, TRUE ~ group_size+3) + paste0('```{r echo = F, fig.height = ',plt_height,'} +p <- merqury_cn_hists |> + plot_merqury_copynumber("', cur_group,'") +print(p)\n```') |> + write_lines(glue::glue("merqury_files/cn_plots/_{ cur_group }_cn_plt.Rmd")) +} diff --git a/assets/report/scripts/_merqury_page.R b/assets/report/scripts/_merqury_page.R new file mode 100644 index 00000000..4a753393 --- /dev/null +++ b/assets/report/scripts/_merqury_page.R @@ -0,0 +1,41 @@ +# This generates the tab-page for each sample +# Per sample there are 3 value boxes +# Below the value boxes there is a tabset of plots, each tab contains one of the plot-types produced above. +# Those are: Completeness, k-mer specatr, QV and CN + +for (i in 1:length(unique(merqury_stats$group))) { + cur_group <- unique(merqury_stats$group)[i] + cat( + paste0('### ', cur_group, '\n\n'), + paste0('merqury creates assembly statistics, through comparisons of the k-mer spectrum of short-reads to the k-mer spectrum of an assembly.\n\n'), + paste0('::: {.panel-tabset} \n\n'), + paste0('#### Completeness \n'), + paste0('\n'), + knitr::knit_child(glue::glue('merqury_files/stat_plots/_{ cur_group }_completeness_plt.Rmd'), + envir = globalenv(), + quiet = TRUE), + paste0('\n'), + paste0('#### QV \n'), + paste0('\n'), + paste0('QV is defined as:\n', expression(10*-log10(error_rate))), + paste0('\n'), + knitr::knit_child(glue::glue('merqury_files/qv_plots/_{ cur_group }_qv_plt.Rmd'), + envir = globalenv(), + quiet = TRUE), + paste0('\n'), + paste0('#### Spectra \n'), + paste0('\n'), + knitr::knit_child(glue::glue('merqury_files/asm_plots/_{ cur_group }_asm_plt.Rmd'), + envir = globalenv(), + quiet = TRUE), + paste0('\n'), + paste0('#### Copy Number \n'), + paste0('\n'), + knitr::knit_child(glue::glue('merqury_files/cn_plots/_{ cur_group }_cn_plt.Rmd'), + envir = globalenv(), + quiet = TRUE), + paste0('\n\n\n'), + paste0(':::'), + paste0('\n\n\n'), + sep = "") +} diff --git a/assets/report/scripts/_quast.R b/assets/report/scripts/_quast.R new file mode 100644 index 00000000..53f3f7c7 --- /dev/null +++ b/assets/report/scripts/_quast.R @@ -0,0 +1,158 @@ +# This file parses QUAST outputs and creates the templates for plotting. + +# Parse the quast reports from data/quast +quast_stats <- list.files(paste0(data_base, "quast"), + pattern = "report.tsv", + full.names = T) |> + map_dfr(\(x) { + read_quast_report(x) |> + mutate( + # Get sample name by matching filename to samples in groups, reverse sort by length to hopefully catch + # the correct name first in case there is partial overlap between sample names. + sample = basename(x) |> + str_extract(groups$sample[rev(order(nchar(groups$sample)))] |> paste(collapse = "|")), + stage = case_when( + str_detect(x, "_ragtag") ~ "RagTag", + str_detect(x, "_medaka") ~ "medaka", + str_detect(x, "_dorado") ~ "dorado", + str_detect(x, "_pilon") ~ "pilon", + str_detect(x, "_longstitch") ~ "longstitch", + str_detect(x, "_links") ~ "LINKS", + str_detect(x, "_yahs") ~ "HiC", + str_detect(x, "assembl[ey]") ~ "Assembly", + TRUE ~ "Unknown")) }) |> + left_join(groups, by = join_by(sample)) |> + mutate(stage = stage |> fct_relevel("Assembly", "medaka", "dorado", "pilon", "longstitch", "LINKS", "HiC", "RagTag") ) |> + dplyr::arrange(sample, stage) + +# This creates code that will generate the length plot based on the contents of the quast report. +dir.create("quast_files") +dir.create("quast_files/length") +for (i in 1:length(unique(quast_stats$group))) { + cur_group <- unique(quast_stats$group)[i] + group_size <- quast_stats |> filter(group == cur_group) |> _$sample |> unique() |> length() + plt_height <- case_when(group_size < 5 ~ 7, TRUE ~ group_size+3) + paste0('```{r echo = F, fig.height = ',plt_height,'} + quast_stats |> + filter(group == "', unique(quast_stats$group)[i], '") |> + filter(str_detect(stat, "[L].*[59]0")) |> + mutate(stat = fct_relevel(stat, "L50","L90","LG50","LG90")) |> + ggplot(aes(x = stat, y = value)) + + geom_point( + aes(fill = stage), + size = 5, + pch = 21, + height = 0, + width = 0.2, + alpha = 0.8, + position = position_dodge(width = 0.4) + ) + + facet_wrap(~ sample, scales = "free_y") + + fill_scale_plots + + theme_bw(base_size = 14) + + theme( + axis.title.x = element_blank(), + strip.background = element_blank(), + legend.position = "bottom", + axis.text.x = element_text(angle = 60, hjust = 1) + ) + + scale_y_continuous( + labels = function(x) + format( + x, + scientific = -1, + trim = T, + digits = 3, + drop0trailing = T + ) + ) + + labs(y = "Aggregated length of contigs in bin")\n```') |> + write_lines(glue::glue("quast_files/length/_{ unique(quast_stats$group)[i] }_quast.Rmd")) +} + +dir.create("quast_files/contigs") +for (i in 1:length(unique(quast_stats$group))) { + cur_group <- unique(quast_stats$group)[i] + group_size <- quast_stats |> filter(group == cur_group) |> _$sample |> unique() |> length() + plt_height <- case_when(group_size < 5 ~ 7, TRUE ~ group_size+3) + paste0('```{r echo = F, fig.height = ',plt_height,'} + quast_stats |> + filter(group == "', unique(quast_stats$group)[i], '") |> + filter(str_detect(stat, "# contigs \\\\(")) |> + filter(!str_detect(stat, ">= 0")) |> + mutate(stat = stat |> str_remove_all("# contigs ") |> str_remove_all("[()]") |> fct_inorder()) |> + ggplot(aes(x = stat, y = value)) + + geom_point( + aes(fill = stage), + size = 5, + pch = 21, + alpha = 0.8, + position = position_dodge(width = 0.4) + ) + + facet_wrap(~ sample, scales = "free_y") + + fill_scale_plots + + theme_bw(base_size = 14) + + theme( + axis.title.x = element_blank(), + axis.title.y = element_blank(), + strip.background = element_blank(), + legend.position = "bottom", + axis.text.x = element_text(angle = 60, hjust = 1) + ) + \n```') |> + write_lines(glue::glue("quast_files/contigs/_{ unique(quast_stats$group)[i] }_quast.Rmd")) +} + +# This creates code that will generate the contig plots based on the contents of the quast report. +dir.create("quast_files/NL_plots") +for (i in 1:length(unique(quast_stats$group))) { + cur_group <- unique(quast_stats$group)[i] + group_size <- quast_stats |> filter(group == cur_group) |> _$sample |> unique() |> length() + plt_height <- case_when(group_size < 5 ~ 7, TRUE ~ group_size+3) + paste0('```{r echo = F, fig.height = ',plt_height,'} + quast_stats |> + filter(group == "', unique(quast_stats$group)[i], '") |> + filter(str_detect(stat, "[N].*[59]0")) |> + ggplot(aes(y = stat, x = value)) + + geom_point( + aes(fill = stage), + size = 5, + pch = 21, + alpha = 0.8, + position = position_dodge(width = 0.4) + ) + + facet_wrap(~sample, scales = "free") + + theme_bw(base_size = 14) + + theme( + axis.title = element_blank(), + strip.background = element_blank(), + legend.position = "bottom", + axis.text.x = element_text(angle = 60, hjust = 1) + ) + + fill_scale_plots + \n```') |> + write_lines(glue::glue("quast_files/NL_plots/_{ unique(quast_stats$group)[i] }_N_quast.Rmd")) + paste0('```{r echo = F, fig.height = ',plt_height,'} + quast_stats |> + filter(group == "', unique(quast_stats$group)[i], '") |> + filter(str_detect(stat, "[L].*[59]0")) |> + ggplot(aes(y = stat, x = value)) + + geom_point( + aes(fill = stage), + size = 5, + pch = 21, + alpha = 0.8, + position = position_dodge(width = 0.4) + ) + + facet_wrap(~sample, scales = "free") + + theme_bw(base_size = 14) + + theme( + axis.title = element_blank(), + strip.background = element_blank(), + legend.position = "bottom", + axis.text.x = element_text(angle = 60, hjust = 1) + ) + + fill_scale_plots + \n```') |> + write_lines(glue::glue("quast_files/NL_plots/_{ unique(quast_stats$group)[i] }_L_quast.Rmd")) +} diff --git a/assets/report/scripts/_quast_page.R b/assets/report/scripts/_quast_page.R new file mode 100644 index 00000000..23b688da --- /dev/null +++ b/assets/report/scripts/_quast_page.R @@ -0,0 +1,131 @@ +# This generates a tab-page for each sample + +for (i in 1:length(unique(quast_stats$group))) { + cat(paste0('### ', unique(quast_stats$group)[i], '\n\n'), + paste0('::: {.panel-tabset} \n\n'), + paste0('#### Tabular \n\n'), + paste0('::::: {.panel-tabset} \n\n'), + paste0('##### Overview \n\n'), + quast_stats |> + filter(group == unique(quast_stats$group)[i]) |> + dplyr::select(sample, stage, stat, value) |> + pivot_wider(names_from = "stat", values_from = "value",id_cols = c(sample, stage)) |> + dplyr::arrange(stage, sample) |> + dplyr::select( + sample, + stage, + `# contigs`, + `Largest contig`, + starts_with("# contigs ("), + `Total length`, + `Reference length` , + starts_with("Total length ("), + `GC (%)` + ) |> + gt::gt() |> + gt::cols_nanoplot(columns = starts_with("# contigs ("), + new_col_name = "Contigs_by_size", + new_col_label = gt::md("*# Contigs by size*")) |> + gt::cols_nanoplot(columns = starts_with("Total length ("), + new_col_name = "Total_length", + new_col_label = gt::md("*Total length*")) |> + gt::tab_footnote( + footnote = "Breaks are: contigs >= 0, 1kb, 5kb, 10kb, 25kb, 50kb", + locations = gt::cells_column_labels(columns = c(Contigs_by_size, Total_length))) |> + gt::cols_align(align = "center", columns = c(Contigs_by_size, Total_length)) |> + gt::cols_move(Contigs_by_size, "Largest contig") |> + gt::cols_move(Total_length, "Total length") |> + gt::fmt_auto() |> + gt::fmt_scientific(columns = c("Largest contig", "Total length", "Reference length")) |> + gt::opt_stylize(color = "gray") |> + gt::opt_table_font( + font = list( + gt::google_font(name = "Maven Pro"), + "rounded-sans" + )) |> + gt::as_raw_html() + , + paste0('\n\n'), + paste0('##### N/L 50/90 \n\n'), + paste0('N50: length of a contig, such that all the contigs of at least the same length together cover at least 50% of the assembly.
N90: same as N50 but the contigs cover 90% of the assembly.
NG 50/90: Similar to N50/90, but measures coverage of the reference.
L measures the number of contigs required to cover 50 (or 90) % of the assembly length.
LG measures the number of contigs to cover the given percentage of the reference.\n\n'), + quast_stats |> + filter(group == unique(quast_stats$group)[i]) |> + filter(str_detect(stat, "[NLG].*[59]0")) |> + dplyr::select(sample, stage, stat, value) |> + pivot_wider(names_from = "stat", values_from = "value",id_cols = c(sample, stage)) |> + dplyr::arrange(stage, sample) |> + gt::gt() |> + gt::fmt_auto() |> + gt::fmt_scientific(columns = starts_with("N")) |> + gt::opt_stylize(color = "gray") |> + gt::opt_table_font( + font = list( + gt::google_font(name = "Maven Pro"), + "rounded-sans" + )) |> + gt::as_raw_html() + , + paste0('\n\n'), + paste0('##### Comparison to ref \n\n'), + quast_stats |> + filter(group == unique(quast_stats$group)[i]) |> + filter( + stat %in% c( + "Reference mapped (%)", + "Reference properly paired (%)", + "Reference avg. coverage depth", + "Reference coverage >= 1x (%)", + "# misassemblies", + "# misassembled contigs", + "Misassembled contigs length", + "# local misassemblies" + ) + ) |> + dplyr::select(sample, stage, stat, value) |> + pivot_wider( + names_from = "stat", + values_from = "value", + id_cols = c(sample, stage) + ) |> + dplyr::arrange(stage, sample) |> + gt::gt() |> + gt::fmt_auto() |> + gt::opt_stylize(color = "gray") |> + gt::opt_table_font( + font = list( + gt::google_font(name = "Maven Pro"), + "rounded-sans" + )) |> + gt::as_raw_html(), + paste0(':::::'), # tables tabset + paste0('\n\n'), + paste0('#### Visual'), + paste0('\n\n'), + paste0('::::: {.panel-tabset} \n\n'), + paste0('\n\n'), + paste0('##### Contigs by size\n'), + knitr::knit_child(glue::glue('quast_files/contigs/_{ unique(quast_stats$group)[i] }_quast.Rmd'), + envir = globalenv(), + quiet = TRUE), + paste0('\n\n'), + paste0('##### N 50 / 90\n'), + paste0('\n\n'), + paste0('N50: length of a contig, such that all the contigs of at least the same length together cover at least 50% of the assembly.
N90: same as N50 but the contigs cover 90% of the assembly.
NG 50/90: Similar to N50/90, but measures coverage of the reference.\n\n'), + knitr::knit_child(glue::glue('quast_files/NL_plots/_{ unique(quast_stats$group)[i] }_N_quast.Rmd'), + envir = globalenv(), + quiet = TRUE), + paste0('\n\n'), + paste0('##### L 50 / 90'), + paste0('\n\n'), + paste0('L measures the number of contigs required to cover 50 (or 90) % of the assembly length, LG measures the number of contigs to cover the given percentage of the reference.\n\n'), + knitr::knit_child(glue::glue('quast_files/NL_plots/_{ unique(quast_stats$group)[i] }_L_quast.Rmd'), + envir = globalenv(), + quiet = TRUE), + paste0('\n\n'), + paste0('\n\n'), + paste0(':::::'), # plots tabset + paste0('\n\n'), + paste0(':::'), # group tabsets + paste0('\n\n'), + sep = "") +} diff --git a/conf/modules.config b/conf/modules.config index efacbbdd..c84cc2a2 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -14,15 +14,14 @@ process { // General catch-all publishDir = [ path: { "${params.outdir}/${meta.id}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] } // Read preparation includeConfig 'modules/ont-prep.config' includeConfig 'modules/hifi-prep.config' -includeConfig 'modules/trimgalore.config' +includeConfig 'modules/fastp.config' // Assembly includeConfig 'modules/assembly.config' diff --git a/conf/modules/QC/alignments.config b/conf/modules/QC/alignments.config index f9536164..b4c5ae9a 100644 --- a/conf/modules/QC/alignments.config +++ b/conf/modules/QC/alignments.config @@ -4,11 +4,17 @@ process { ext.prefix = { "${meta.id}_to_reference" } publishDir = [ path: { "${params.outdir}/${meta.id}/QC/alignments/reference/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode + ] + } + withName: '.*MAP_TO_REF.*:ALIGN' { + ext.prefix = { "${meta.id}_to_reference" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/reference/" }, + mode: params.publish_dir_mode ] ext.args = { - (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax lr:hq" : "-ax map-hifi") : (params.ont) ? "-ax lr:hq" : "-ax map-hifi" + (meta.qc_reads == 'ont' ? "-ax lr:hq" : "-ax map-hifi") } } // Assembly mappings @@ -16,66 +22,124 @@ process { ext.prefix = { "${meta.id}_assembly" } publishDir = [ path: { "${params.outdir}/${meta.id}/QC/alignments/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode + ] + } + withName: '.*ASSEMBLE:.*MAP_TO_ASSEMBLY.*:ALIGN' { + ext.prefix = { "${meta.id}_assembly" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/" }, + mode: params.publish_dir_mode ] ext.args = { - (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax lr:hq" : "-ax map-hifi") : (params.ont) ? "-ax lr:hq" : "-ax map-hifi" + (meta.qc_reads == 'ont' ? "-ax lr:hq" : "-ax map-hifi") } } withName: '.*MEDAKA:.*MAP_TO_ASSEMBLY.*' { ext.prefix = { "${meta.id}_medaka" } publishDir = [ path: { "${params.outdir}/${meta.id}/QC/alignments/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode + ] + } + withName: '.*MEDAKA:.*MAP_TO_ASSEMBLY.*:ALIGN' { + ext.prefix = { "${meta.id}_medaka" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/" }, + mode: params.publish_dir_mode ] ext.args = { - (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax lr:hq" : "-ax map-hifi") : (params.ont) ? "-ax lr:hq" : "-ax map-hifi" + (meta.qc_reads == 'ont' ? "-ax lr:hq" : "-ax map-hifi") } } withName: '.*PILON:.*MAP_TO_ASSEMBLY.*' { ext.prefix = { "${meta.id}_pilon" } publishDir = [ path: { "${params.outdir}/${meta.id}/QC/alignments/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode + ] + } + withName: '.*PILON:.*MAP_TO_ASSEMBLY.*:ALIGN' { + ext.prefix = { "${meta.id}_pilon" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/" }, + mode: params.publish_dir_mode ] ext.args = { - (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax lr:hq" : "-ax map-hifi") : (params.ont) ? "-ax lr:hq" : "-ax map-hifi" + (meta.qc_reads == 'ont' ? "-ax lr:hq" : "-ax map-hifi") } } withName: '.*LONGSTITCH:.*MAP_TO_ASSEMBLY.*' { ext.prefix = { "${meta.id}_longstitch" } publishDir = [ path: { "${params.outdir}/${meta.id}/QC/alignments/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode + ] + } + withName: '.*LONGSTITCH:.*MAP_TO_ASSEMBLY.*:ALIGN' { + ext.prefix = { "${meta.id}_longstitch" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/" }, + mode: params.publish_dir_mode ] ext.args = { - (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax lr:hq" : "-ax map-hifi") : (params.ont) ? "-ax lr:hq" : "-ax map-hifi" + (meta.qc_reads == 'ont' ? "-ax lr:hq" : "-ax map-hifi") } } withName: '.*LINKS:.*MAP_TO_ASSEMBLY.*' { ext.prefix = { "${meta.id}_links" } publishDir = [ path: { "${params.outdir}/${meta.id}/QC/alignments/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode + ] + } + withName: '.*LINKS:.*MAP_TO_ASSEMBLY.*:ALIGN' { + ext.prefix = { "${meta.id}_links" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/" }, + mode: params.publish_dir_mode + ] + ext.args = { + (meta.qc_reads == 'ont' ? "-ax lr:hq" : "-ax map-hifi") + } + } + withName: '.*HIC:.*MAP_TO_ASSEMBLY.*' { + ext.prefix = { "${meta.id}_hic" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/" }, + mode: params.publish_dir_mode + ] + } + withName: '.*HIC:.*MAP_TO_ASSEMBLY.*:ALIGN' { + ext.prefix = { "${meta.id}_hic" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/" }, + mode: params.publish_dir_mode ] ext.args = { - (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax lr:hq" : "-ax map-hifi") : (params.ont) ? "-ax lr:hq" : "-ax map-hifi" + (meta.qc_reads == 'ont' ? "-ax lr:hq" : "-ax map-hifi") } } withName: '.*RAGTAG:.*MAP_TO_ASSEMBLY.*' { ext.prefix = { "${meta.id}_ragtag" } publishDir = [ path: { "${params.outdir}/${meta.id}/QC/alignments/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode + ] + } + withName: '.*RAGTAG:.*MAP_TO_ASSEMBLY.*:ALIGN' { + ext.prefix = { "${meta.id}_ragtag" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/" }, + mode: params.publish_dir_mode ] ext.args = { - (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax lr:hq" : "-ax map-hifi") : (params.ont) ? "-ax lr:hq" : "-ax map-hifi" + (meta.qc_reads == 'ont' ? "-ax lr:hq" : "-ax map-hifi") + } + } + withName: '.*SAMTOOLS:.*INDEX.*' { + ext.args = { + "-c -m ${params.csi_index_size}" } } } diff --git a/conf/modules/QC/busco.config b/conf/modules/QC/busco.config index 4972756f..f13505c5 100644 --- a/conf/modules/QC/busco.config +++ b/conf/modules/QC/busco.config @@ -4,8 +4,7 @@ process { publishDir = [ path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, mode: params.publish_dir_mode, - pattern: "*{-busco,_summary}*", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + pattern: "*{-busco,_summary}*" ] } withName: '.*PILON:.*:BUSCO' { @@ -13,8 +12,7 @@ process { publishDir = [ path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, mode: params.publish_dir_mode, - pattern: "*{-busco,_summary}*", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + pattern: "*{-busco,_summary}*" ] } withName: '.*MEDAKA:.*:BUSCO' { @@ -22,8 +20,7 @@ process { publishDir = [ path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, mode: params.publish_dir_mode, - pattern: "*{-busco,_summary}*", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + pattern: "*{-busco,_summary}*" ] } withName: '.*LINKS:.*:BUSCO' { @@ -31,8 +28,7 @@ process { publishDir = [ path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, mode: params.publish_dir_mode, - pattern: "*{-busco,_summary}*", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + pattern: "*{-busco,_summary}*" ] } withName: '.*LONGSTITCH:.*:BUSCO' { @@ -40,8 +36,7 @@ process { publishDir = [ path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, mode: params.publish_dir_mode, - pattern: "*{-busco,_summary}*", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + pattern: "*{-busco,_summary}*" ] } // avoid catching ragtag from ont_on_hifi assembly @@ -50,8 +45,15 @@ process { publishDir = [ path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, mode: params.publish_dir_mode, - pattern: "*{-busco,_summary}*", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + pattern: "*{-busco,_summary}*" + ] + } + withName: '.*:SCAFFOLD:.*HIC:QC.*:BUSCO' { + ext.prefix = { "${meta.id}_yahs-${lineage}" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, + mode: params.publish_dir_mode, + pattern: "*{-busco,_summary}*" ] } } diff --git a/conf/modules/QC/jellyfish-genomescope.config b/conf/modules/QC/jellyfish-genomescope.config index 18f70cd9..61ccb062 100644 --- a/conf/modules/QC/jellyfish-genomescope.config +++ b/conf/modules/QC/jellyfish-genomescope.config @@ -2,36 +2,31 @@ process { withName: COUNT { publishDir = [ path: { "${params.outdir}/${meta.id}/reads/genomescope/jellyfish/count/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] } withName: DUMP { publishDir = [ path: { "${params.outdir}/${meta.id}/reads/genomescope/jellyfish/dump/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] } withName: STATS { publishDir = [ path: { "${params.outdir}/${meta.id}/reads/genomescope/jellyfish/stats/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] } withName: HISTO { publishDir = [ path: { "${params.outdir}/${meta.id}/reads/genomescope/jellyfish/histo/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] } withName: GENOMESCOPE { publishDir = [ path: { "${params.outdir}/${meta.id}/reads/genomescope/genomescope/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] } } diff --git a/conf/modules/QC/merqury.config b/conf/modules/QC/merqury.config index 06a31325..1d09663c 100644 --- a/conf/modules/QC/merqury.config +++ b/conf/modules/QC/merqury.config @@ -3,40 +3,42 @@ process { ext.prefix = { "${meta.id}_assembly" } publishDir = [ path: { "${params.outdir}/${meta.id}/QC/merqury/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] } withName: '.*PILON:.*:MERQURY' { ext.prefix = { "${meta.id}_pilon" } publishDir = [ path: { "${params.outdir}/${meta.id}/QC/merqury/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] } withName: '.*MEDAKA:.*:MERQURY' { ext.prefix = { "${meta.id}_medaka" } publishDir = [ path: { "${params.outdir}/${meta.id}/QC/merqury/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] } withName: '.*LINKS:.*:MERQURY' { ext.prefix = { "${meta.id}_links" } publishDir = [ path: { "${params.outdir}/${meta.id}/QC/merqury/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] } withName: '.*LONGSTITCH:.*:MERQURY' { ext.prefix = { "${meta.id}_longstitch" } publishDir = [ path: { "${params.outdir}/${meta.id}/QC/merqury/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode + ] + } + withName: '.*:SCAFFOLD:HIC:.*:MERQURY' { + ext.prefix = { "${meta.id}_yahs" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/merqury/" }, + mode: params.publish_dir_mode ] } // avoid catching ragtag from ont_on_hifi assembly @@ -44,8 +46,7 @@ process { ext.prefix = { "${meta.id}_ragtag" } publishDir = [ path: { "${params.outdir}/${meta.id}/QC/merqury/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] } } diff --git a/conf/modules/QC/meryl.config b/conf/modules/QC/meryl.config index 41452a69..322e71cb 100644 --- a/conf/modules/QC/meryl.config +++ b/conf/modules/QC/meryl.config @@ -2,15 +2,13 @@ process { withName: MERYL_COUNT { publishDir = [ path: { "${params.outdir}/${meta.id}/reads/meryl/count/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] } withName: MERYL_UNIONSUM { publishDir = [ path: { "${params.outdir}/${meta.id}/reads/meryl/unionsum/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] } } diff --git a/conf/modules/QC/quast.config b/conf/modules/QC/quast.config index b97a7494..63523f6d 100644 --- a/conf/modules/QC/quast.config +++ b/conf/modules/QC/quast.config @@ -3,40 +3,42 @@ process { ext.prefix = { "${meta.id}_assembly" } publishDir = [ path: { "${params.outdir}/${meta.id}/QC/QUAST" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] } withName: '.*PILON:.*:QUAST' { ext.prefix = { "${meta.id}_pilon" } publishDir = [ path: { "${params.outdir}/${meta.id}/QC/QUAST" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] } withName: '.*MEDAKA:.*:QUAST' { ext.prefix = { "${meta.id}_medaka" } publishDir = [ path: { "${params.outdir}/${meta.id}/QC/QUAST" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] } withName: '.*LINKS:.*:QUAST' { ext.prefix = { "${meta.id}_links" } publishDir = [ path: { "${params.outdir}/${meta.id}/QC/QUAST/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] } withName: '.*LONGSTITCH:.*:QUAST' { ext.prefix = { "${meta.id}_longstitch" } publishDir = [ path: { "${params.outdir}/${meta.id}/QC/QUAST/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode + ] + } + withName: '.*SCAFFOLD:HIC.*:QUAST' { + ext.prefix = { "${meta.id}_yahs" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/QUAST/" }, + mode: params.publish_dir_mode ] } // avoid catching ragtag from ont_on_hifi assembly @@ -44,8 +46,7 @@ process { ext.prefix = { "${meta.id}_ragtag" } publishDir = [ path: { "${params.outdir}/${meta.id}/QC/QUAST/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] } } diff --git a/conf/modules/assembly.config b/conf/modules/assembly.config index eb18c7ef..1c3885a8 100644 --- a/conf/modules/assembly.config +++ b/conf/modules/assembly.config @@ -3,57 +3,56 @@ process { ext.args = { [ meta.genome_size ? "--genome-size ${meta.genome_size}" : '', - params.flye_args + // If flye is both for ONT and HiFi, we need to detect the type of read based on the read suffix (fastplong) + // This is presumably prone to fail in certain weird situations + (meta.assembler_ont == "flye" && meta.assembler_hifi == "flye" && strategy == "scaffold") ? + (reads ==~ ".*${meta.id}_ont.*" ? meta.assembler_ont_args : meta.assembler_hifi_args) : + (meta.assembler_ont == "flye") ? meta.assembler_ont_args : "", + (meta.assembler_hifi == "flye") ? meta.assembler_hifi_args : "", + meta.flye_args ].join(" ").trim() } publishDir = [ path: { "${params.outdir}/${meta.id}/assembly/flye/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] } withName: HIFIASM { - ext.args = { [ params.hifiasm_args ].join(" ").trim() } + ext.args = { [ meta.assembler_hifi_args, meta.hifiasm_args ].join(" ").trim() } publishDir = [ path: { "${params.outdir}/${meta.id}/assembly/hifiasm/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] } withName: HIFIASM_ONT { - ext.args = { [ params.hifiasm_args, "--ont" ].join(" ").trim() } + ext.args = { [ meta.assembler_ont_args, meta.hifiasm_args, "--ont" ].join(" ").trim() } publishDir = [ path: { "${params.outdir}/${meta.id}/assembly/hifiasm_ont/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] } withName: GFA_2_FA { publishDir = [ path: { "${params.outdir}/${meta.id}/assembly/hifiasm/fasta" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] } withName: GFA_2_FA_HIFI { publishDir = [ path: { "${params.outdir}/${meta.id}/assembly/hifiasm/fasta" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] } withName: GFA_2_FA_ONT { publishDir = [ path: { "${params.outdir}/${meta.id}/assembly/hifiasm_ont/fasta" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] } withName: '.*ASSEMBLE:.*RAGTAG_PATCH' { publishDir = [ path: { "${params.outdir}/${meta.id}/assembly/ragtag/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] ext.prefix = { "${meta.id}_assembly_patch" } } diff --git a/conf/modules/fastp.config b/conf/modules/fastp.config new file mode 100644 index 00000000..7d9f5b72 --- /dev/null +++ b/conf/modules/fastp.config @@ -0,0 +1,18 @@ +process { + withName: FASTP { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/fastp" }, + mode: params.publish_dir_mode + ] + } +} + +process { + withName: FASTP_HIC { + ext.prefix = {"${meta.id}_hicreads"} + publishDir = [ + path: { "${params.outdir}/${meta.id}/hic_reads/fastp" }, + mode: params.publish_dir_mode + ] + } +} diff --git a/conf/modules/hifi-prep.config b/conf/modules/hifi-prep.config index ec84a420..2c61f5ab 100644 --- a/conf/modules/hifi-prep.config +++ b/conf/modules/hifi-prep.config @@ -1,16 +1,20 @@ process { - withName: LIMA { + withName: FASTPLONG_HIFI { publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/lima/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + path: { "${params.outdir}/${meta.id}/reads/fastplong/hifi/" }, + mode: params.publish_dir_mode ] + ext.args = { + [ + meta.hifi_fastplong_args + ].join(" ").trim() + } + ext.prefix = { "${meta.id}_hifi" } } withName: TO_FASTQ { publishDir = [ path: { "${params.outdir}/${meta.id}/reads/lima/fastq/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] } } diff --git a/conf/modules/liftoff.config b/conf/modules/liftoff.config index 3f470e90..cc86bdc0 100644 --- a/conf/modules/liftoff.config +++ b/conf/modules/liftoff.config @@ -2,48 +2,42 @@ process { withName: '.*ASSEMBLE:.*LIFTOFF' { publishDir = [ path: { "${params.outdir}/${meta.id}/assembly/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] ext.prefix = { "${meta.id}_assembly" } } withName: '.*PILON:.*LIFTOFF' { publishDir = [ path: { "${params.outdir}/${meta.id}/polish/pilon/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] ext.prefix = { "${meta.id}_pilon" } } withName: '.*MEDAKA:.*LIFTOFF' { publishDir = [ path: { "${params.outdir}/${meta.id}/polish/medaka" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] ext.prefix = { "${meta.id}_medaka" } } withName: '.*RAGTAG:.*LIFTOFF' { publishDir = [ path: { "${params.outdir}/${meta.id}/scaffold/ragtag/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] ext.prefix = { "${meta.id}_ragtag" } } withName: '.*LONGSTITCH:.*LIFTOFF' { publishDir = [ path: { "${params.outdir}/${meta.id}/scaffold/longstitch" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] ext.prefix = { "${meta.id}_longstitch" } } withName: '.*LINKS:.*LIFTOFF' { publishDir = [ path: { "${params.outdir}/${meta.id}/scaffold/links" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] ext.prefix = { "${meta.id}_links" } } diff --git a/conf/modules/ont-prep.config b/conf/modules/ont-prep.config index 6b824273..dddbe9d0 100644 --- a/conf/modules/ont-prep.config +++ b/conf/modules/ont-prep.config @@ -1,23 +1,20 @@ process { - withName: NANOQ { - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/nanoq" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } withName: COLLECT { publishDir = [ path: { "${params.outdir}/${meta.id}/reads/collect" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] } - withName: PORECHOP { + withName: FASTPLONG_ONT { publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/porechop" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + path: { "${params.outdir}/${meta.id}/reads/fastplong/ont/" }, + mode: params.publish_dir_mode ] + ext.args = { + [ + meta.ont_fastplong_args + ].join(" ").trim() + } + ext.prefix = { "${meta.id}_ont" } } } diff --git a/conf/modules/polishing.config b/conf/modules/polishing.config index 8c39f50e..40234c77 100644 --- a/conf/modules/polishing.config +++ b/conf/modules/polishing.config @@ -6,26 +6,43 @@ process { ext.prefix = { "${meta.id}_medaka" } publishDir = [ path: { "${params.outdir}/${meta.id}/polish/medaka" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode + ] + } + withName: '.*:DORADO:ALIGN.*' { + //ext.args = { ["--add-fastq-rg"] } + publishDir = [ + path: { "${params.outdir}/${meta.id}/polish/dorado/alignment" }, + mode: params.publish_dir_mode + ] + } + withName: '.*:DORADO:POLISH.*' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/polish/dorado/" }, + mode: params.publish_dir_mode ] } // Pilon mapping - withName: '.*PILON:MAP_SR.*' { + withName: '.*PILON:MAP_SR:ALIGN.*' { publishDir = [ path: { "${params.outdir}/${meta.id}/QC/alignments/shortreads/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] ext.prefix = { "${meta.id}_shortreads" } ext.args = { "-ax sr " } } + withName: '.*PILON:MAP_SR.*' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/shortreads/" }, + mode: params.publish_dir_mode + ] + ext.prefix = { "${meta.id}_shortreads" } + } withName: PILON { ext.prefix = { "${meta.id}_pilon" } publishDir = [ path: { "${params.outdir}/${meta.id}/polish/pilon" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] } } diff --git a/conf/modules/report.config b/conf/modules/report.config index b405d0f6..54ac3730 100644 --- a/conf/modules/report.config +++ b/conf/modules/report.config @@ -2,8 +2,7 @@ process { withName: REPORT { publishDir = [ path: { "${params.outdir}/report/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] } } diff --git a/conf/modules/scaffolding.config b/conf/modules/scaffolding.config index 878b8bfa..7f75b6ce 100644 --- a/conf/modules/scaffolding.config +++ b/conf/modules/scaffolding.config @@ -2,8 +2,7 @@ process { withName: '.*SCAFFOLD:.*RAGTAG_SCAFFOLD' { publishDir = [ path: { "${params.outdir}/${meta.id}/scaffold/ragtag/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] ext.prefix = { "${meta.id}_ragtag" } ext.args = [ @@ -16,8 +15,7 @@ process { withName: LINKS { publishDir = [ path: { "${params.outdir}/${meta.id}/scaffold/links/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] ext.prefix = { "${meta.id}_links" } ext.args = ["-t 40,200", "-d 500,2000,5000"].join(" ").trim() @@ -25,9 +23,61 @@ process { withName: LONGSTITCH { publishDir = [ path: { "${params.outdir}/${meta.id}/scaffold/longstitch/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] ext.prefix = { "${meta.id}_longstitch" } } + withName: BWAMEM2_INDEX { + publishDir = [ + path: { "${params.outdir}/${meta.id}/scaffold/hic/bwamem2/index" }, + mode: params.publish_dir_mode + ] + ext.prefix = { "${meta.id}_bwamem_index" } + } + withName: BWAMEM2_MEM { + publishDir = [ + path: { "${params.outdir}/${meta.id}/scaffold/hic/bwamem2/mem" }, + mode: params.publish_dir_mode + ] + ext.prefix = { "${meta.id}_bwamem" } + ext.args = { "-5SP"} + } + withName: MINIMAP2_HIC { + publishDir = [ + path: { "${params.outdir}/${meta.id}/scaffold/hic/minimap/" }, + mode: params.publish_dir_mode + ] + ext.prefix = { "${meta.id}_minimap_hic" } + ext.args = { "--no-pairing" } + } + withName: MARKDUP { + ext.args = "-Djava.io.tmpdir=./tmp-picard-mardkdup --CREATE_INDEX" + publishDir = [ + path: { "${params.outdir}/${meta.id}/scaffold/hic/markdup/" }, + mode: params.publish_dir_mode + ] + ext.prefix = { "${meta.id}_hic_dedup" } + } + withName: ADD_RG { + ext.args = {[ "--RGID 1", "--RGLB ${meta.id}","--RGPM UNKNOWN", "--RGPL ILLUMINA","--RGPU 0", "--RGSM ${meta.id}"].join(" ").trim()} + publishDir = [ + path: { "${params.outdir}/${meta.id}/scaffold/hic/add_replace_rg/" }, + mode: params.publish_dir_mode + ] + ext.prefix = { "${meta.id}_add_rg" } + } + withName: YAHS { + publishDir = [ + path: { "${params.outdir}/${meta.id}/scaffold/hic/yahs/" }, + mode: params.publish_dir_mode + ] + ext.prefix = { "${meta.id}_hic_dedup" } + } + withName: '.*HIC:SAMTOOLS_FAIDX.*' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/scaffold/hic/yahs/index/" }, + mode: params.publish_dir_mode + ] + ext.prefix = { "${meta.id}_hic_dedup" } + } } diff --git a/conf/modules/trimgalore.config b/conf/modules/trimgalore.config deleted file mode 100644 index dc899e99..00000000 --- a/conf/modules/trimgalore.config +++ /dev/null @@ -1,9 +0,0 @@ -process { - withName: TRIMGALORE { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/trimgalore" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } -} diff --git a/conf/test.config b/conf/test.config index be3dc0e5..35891c1e 100644 --- a/conf/test.config +++ b/conf/test.config @@ -21,13 +21,11 @@ process { params { config_profile_name = 'Test profile' config_profile_description = 'Minimal test dataset to check pipeline function' - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/samplesheet/test_samplesheet.csv' - quast = false - busco = false - jellyfish = false - genome_size = 2000000 - hifi = true - ont = true - assembler = "flye_on_hifiasm" + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/samplesheet/test_samplesheet_v2.csv' + ontreads = 'https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/ONT-Col-0_test_data.fastq.gz' + hifireads = 'https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/HiFi-Col-0_test_data.fastq.gz' + ref_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/Col-CEN_v1.2.Chr1_5MB-7MB.fasta.gz' + ref_gff = 'https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/Col-CEN_v1.2_genes_araport11.Chr1_5MB-7MB.gff3.gz' + genome_size = "2000000" hifiasm_args = "-f 0" } diff --git a/conf/test_full.config b/conf/test_full.config index 8dd4fee9..6394b2df 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -10,23 +10,30 @@ ---------------------------------------------------------------------------------------- */ +/* +Notes on this test: + There are 10 samples in the samplesheet. + Of these, 4 are "single" assembler strategy, and these are grouped + During read-prep this group becomes 1 sample + All prep steps should be 7 processes total (6 ungrouped + group) + These should be visible as a group in the report. +*/ + params { config_profile_name = 'Full test profile' config_profile_description = 'Full test dataset to check pipeline function' - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/samplesheet/full_test_samplesheet.csv' - ont = true - hifi = true - quast = true - busco = true // needs DB - jellyfish = true - genome_size = 2000000 - assembler = "flye_on_hifiasm" - polish_medaka = true - polish_pilon = true - scaffold_links = true - scaffold_longstitch = true - scaffold_ragtag = true - short_reads = true - merqury = true + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/samplesheet/full_test_samplesheet_v2_no_medaka.csv' + ontreads = 's3://nf-core-awsmegatests/genomeassembler/input/Col_0.ONT.porechopped.fastq.gz' + hifireads = 's3://nf-core-awsmegatests/genomeassembler/input/Col_0.hifi_reads.fastq.gz' + shortread_F = 's3://nf-core-awsmegatests/genomeassembler/input/SRR1604937_1.fastq.gz' + shortread_R = 's3://nf-core-awsmegatests/genomeassembler/input/SRR1604937_2.fastq.gz' + ref_gff = 'http://raw.githubusercontent.com/schatzlab/Col-CEN/main/v1.2/Col-CEN_v1.2_genes.araport11.gff3.gz' + ref_fasta = 'http://raw.githubusercontent.com/schatzlab/Col-CEN/main/v1.2/Col-CEN_v1.2.fasta.gz' + quast = true + busco = true + jellyfish = true + use_ref = true + shortread_trim = true + busco_lineage = "brassicales_odb12" } diff --git a/conf/test_full_medaka.config b/conf/test_full_medaka.config new file mode 100644 index 00000000..4ebd0104 --- /dev/null +++ b/conf/test_full_medaka.config @@ -0,0 +1,39 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/genomeassembler -profile test_full, --outdir + +---------------------------------------------------------------------------------------- +*/ + +/* +Notes on this test: + There are 10 samples in the samplesheet. + Of these, 4 are "single" assembler strategy, and these are grouped + During read-prep this group becomes 1 sample + All prep steps should be 7 processes total (6 ungrouped + group) + These should be visible as a group in the report. +*/ + +params { + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' + + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/samplesheet/full_test_samplesheet_v2.csv' + ontreads = 's3://nf-core-awsmegatests/genomeassembler/input/Col_0.ONT.porechopped.fastq.gz' + hifireads = 's3://nf-core-awsmegatests/genomeassembler/input/Col_0.hifi_reads.fastq.gz' + shortread_F = 's3://nf-core-awsmegatests/genomeassembler/input/SRR1604937_1.fastq.gz' + shortread_R = 's3://nf-core-awsmegatests/genomeassembler/input/SRR1604937_2.fastq.gz' + ref_gff = 'http://raw.githubusercontent.com/schatzlab/Col-CEN/main/v1.2/Col-CEN_v1.2_genes.araport11.gff3.gz' + ref_fasta = 'http://raw.githubusercontent.com/schatzlab/Col-CEN/main/v1.2/Col-CEN_v1.2.fasta.gz' + quast = true + busco = true + jellyfish = true + use_ref = true + shortread_trim = true + busco_lineage = "brassicales_odb12" +} diff --git a/configs/hifi_flye.config b/configs/hifi_flye.config deleted file mode 100644 index 7b5a3b10..00000000 --- a/configs/hifi_flye.config +++ /dev/null @@ -1,8 +0,0 @@ -// Use this config to assemble HIFI reads with FLYE in --pacbio-hifi mode - -params { - assembler = 'flye' - flye_mode = '--pacbio-hifi' - hifi = true - ont = false -} diff --git a/configs/hifi_hifiasm.config b/configs/hifi_hifiasm.config deleted file mode 100644 index 1b7558a2..00000000 --- a/configs/hifi_hifiasm.config +++ /dev/null @@ -1,7 +0,0 @@ -// Use this config to assemble HIFI reads with hifiasm - -params { - assembler = 'hifiasm' - hifi = true - ont = false -} diff --git a/configs/hifi_ont_flye_on_hifiasm.config b/configs/hifi_ont_flye_on_hifiasm.config deleted file mode 100644 index 092e3a23..00000000 --- a/configs/hifi_ont_flye_on_hifiasm.config +++ /dev/null @@ -1,12 +0,0 @@ -/* - Use this config to: - assemble HIFI reads with hifiasm - assemble ONT reads with flye - scaffold the flye assembly onto the hifiasm assembly -*/ - -params { - hifi = true - ont = true - assembler = "flye_on_hifiasm" -} diff --git a/configs/hifi_ont_hifiasm_on_hifiasm.config b/configs/hifi_ont_hifiasm_on_hifiasm.config deleted file mode 100644 index 9e548e42..00000000 --- a/configs/hifi_ont_hifiasm_on_hifiasm.config +++ /dev/null @@ -1,12 +0,0 @@ -/* - Use this config to: - assemble HIFI reads with hifiasm - assemble ONT reads with hifiasm --ont - scaffold the ONT assembly onto the HiFi assembly -*/ - -params { - hifi = true - ont = true - assembler = "hifiasm_on_hifiasm" -} diff --git a/configs/hifi_ont_hifiasm_ul.config b/configs/hifi_ont_hifiasm_ul.config deleted file mode 100644 index 4c48a6ba..00000000 --- a/configs/hifi_ont_hifiasm_ul.config +++ /dev/null @@ -1,8 +0,0 @@ -// Use this config to assemble HIFI and ONT reads with hifiasm in --ul mode - -params { - assembler = 'hifiasm' - hifi = true - ont = true - hifiasm_ont = true -} diff --git a/configs/ont_flye.config b/configs/ont_flye.config deleted file mode 100644 index 18b1ff6a..00000000 --- a/configs/ont_flye.config +++ /dev/null @@ -1,7 +0,0 @@ -// Use this config to assemble ONT reads with flye - -params { - assembler = 'flye' - hifi = false - ont = true -} diff --git a/configs/ont_hifiasm.config b/configs/ont_hifiasm.config deleted file mode 100644 index 9835cd88..00000000 --- a/configs/ont_hifiasm.config +++ /dev/null @@ -1,7 +0,0 @@ -// Use this config to assemble ONT reads with hifiasm using --ont - -params { - assembler = 'hifiasm' - hifi = false - ont = true -} diff --git a/docs/images/genomeassembler.dark.png b/docs/images/genomeassembler.dark.png deleted file mode 100644 index 4b572110..00000000 Binary files a/docs/images/genomeassembler.dark.png and /dev/null differ diff --git a/docs/images/genomeassembler.light.png b/docs/images/genomeassembler.light.png deleted file mode 100644 index 36a351fc..00000000 Binary files a/docs/images/genomeassembler.light.png and /dev/null differ diff --git a/docs/images/genomeassembler_v2.light.png b/docs/images/genomeassembler_v2.light.png new file mode 100644 index 00000000..fa3de794 Binary files /dev/null and b/docs/images/genomeassembler_v2.light.png differ diff --git a/docs/images/nf-core-genomeassembler_logo_dark.png b/docs/images/nf-core-genomeassembler_logo_dark.png index 0039f2ff..aa72431d 100644 Binary files a/docs/images/nf-core-genomeassembler_logo_dark.png and b/docs/images/nf-core-genomeassembler_logo_dark.png differ diff --git a/docs/images/nf-core-genomeassembler_logo_light.png b/docs/images/nf-core-genomeassembler_logo_light.png index edbdc154..a3cb8082 100644 Binary files a/docs/images/nf-core-genomeassembler_logo_light.png and b/docs/images/nf-core-genomeassembler_logo_light.png differ diff --git a/docs/output.md b/docs/output.md index 6b13ad6e..1253b71c 100644 --- a/docs/output.md +++ b/docs/output.md @@ -11,9 +11,9 @@ The directories listed below will be created in the results directory after the The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: - [**Read preparation**](#read-preparation) - - [**ONT Reads**](#ont-reads): - - [**HiFi reads**](#hifi-reads): + - [**Long reads**](#long-reads): - [**Short reads**](#short-reads): + - [**HiC reads**](#hic-reads): - [**Assembly**](#assembly), choice between assemblers - [**Polishing**](#polishing) - [**Scaffolding**](#scaffolding) @@ -38,10 +38,10 @@ Within each sample, the files are structured as follows: The outputs from all read preparation steps are emitted into `/reads/`. -#### ONT reads +#### Long reads -If the basecalls are scattered across multiple files, `collect` can be used to collect those into a single file. -[porechop](https://github.com/rrwick/Porechop) is a tool that identifies and trims adapter sequences from ONT reads. +If the ONT basecalls are scattered across multiple files, `collect` can be used to collect those into a single file. +[fastplong](https://github.com/OpenGene/fastplong) is a tool for QC and preprocessing of long-reads. [genomescope](https://github.com/tbenavi1/genomescope2.0) estimates genome size and ploidy from the k-mer spectrum computed by [jellyfish](https://github.com/gmarcais/Jellyfish).
@@ -50,7 +50,9 @@ If the basecalls are scattered across multiple files, `collect` can be used to c - `/` - `reads/` - `collect/`: single fastq.gz files per sample - - `porechop/`: output from porechop, fastq.gz + - `fastplong/`: output from fastplong, fastq.gz and report in json and html format. + - `ont/`: fastplong output for ONT reads + - `hifi/`: fastplong output for HiFi reads - `genomescope/`: output from jellyfish and genomescope - `jellyfish/` - `count/`: output from jellyfish count @@ -61,38 +63,46 @@ If the basecalls are scattered across multiple files, `collect` can be used to c
-#### HiFi reads +#### Short reads -[lima](https://lima.how/) performs trimming of adapters from pacbio HiFi reads. +[fastp](https://github.com/OpenGene/fastp) performs shortread QC and trimming. +[meryl](https://github.com/marbl/meryl) calculates the k-mer spectrum of short reads. +If a group was provided, the group name will be used instead of SampleName below.
Output files - `/` - `reads/` - - `lima/`: hifi reads after adapter removal with lima. - - `fastq/`: hifi reads after adapter remval with lima converted to fastq format. + - `fastp/`: + - `_1.fastp.fastq.gz`: Trimmed forward reads + - `_2.fastp.fastq.gz`: Trimmed reverse reads (if included) + - `.fastp.html`: html report + - `.fastp.json`: json report + - `.fastp.log`: logfile + - `meryl/`: output from meryl + - `count/`: k-mer counts per file + - `unionsum/`: union of k-mer counts per sample
-#### Short reads +#### HiC reads -[TrimGalore!](https://github.com/FelixKrueger/TrimGalore) can remove adapters from illumina short-reads. -[meryl](https://github.com/marbl/meryl) calculates the k-mer spectrum of short reads. +[fastp](https://github.com/OpenGene/fastp) performs shortread QC and trimming. + +If a group was provided, the group name will be used instead of SampleName below.
Output files - `/` - - `reads/` - - `trimgalore/`: - - `_val_1.fq.gz`: Trimmed forward reads - - `_val_2.fq.gz`: Trimmed reverse reads (if included) - - `_1.fastq.gz.trimming_report.txt`: Trimming report forward - - `_2.fastq.gz.trimming_report.txt`: Trimming report reverse (if included) - - `meryl/`: output from meryl - - `count/`: k-mer counts per file - - `unionsum/`: union of k-mer counts per sample + - `hic_reads/` + - `fastp/`: + - `_1.fastp.fastq.gz`: Trimmed forward reads + - `_2.fastp.fastq.gz`: Trimmed reverse reads (if included) + - `.fastp.html`: html report + - `.fastp.json`: json report + - `.fastp.log`: logfile
@@ -101,7 +111,7 @@ If the basecalls are scattered across multiple files, `collect` can be used to c This folder contains the initial assemblies of the provided reads. Depending on the assembly strategy chosen, different assemblers are used. [flye](https://github.com/mikolmogorov/Flye) performs assembly of ONT reads -[hifiasm](https://github.com/chhylp123/hifiasm) performs assembly of HiFi reads, or combinations of HiFi reads and ONT reads in `--ul` mode. +[hifiasm](https://github.com/chhylp123/hifiasm) performs assembly of HiFi or ONT reads, or combinations of HiFi reads and ONT reads in `--ul` mode. [ragtag](https://github.com/malonge/RagTag) performs scaffolding and can be used to scaffold assemblies of ONT onto assemblies of HiFi reads. Annotation `gff3` and `unmapped.txt` files are only created if a reference for annotation liftover is provided and `lift_annotations` is enabled. @@ -124,7 +134,7 @@ Annotation `gff3` and `unmapped.txt` files are only created if a reference for a - `.asm.bp.r_utg.gfa`: raw unitigs in gfa format - `.stderr.log`: Any output form hifiasm to stderr - `gfa2_fasta/`: hifiasm assembly in fasta format. - - `ragtag/`: output from RagTag, only if `'flye_on_hifiasm'` was used as the assembler. Contains one folder per sample. + - `ragtag/`: output from RagTag, only if `'scaffold'` was used as the strategy. - `_assembly_scaffold/` - `_assembly_scaffold.agp`: Scaffolds in agp format - `_assembly_scaffold.fasta`: Scaffolds in fasta format @@ -154,6 +164,13 @@ Annotation `gff3` and `unmapped.txt` files are only created if a reference for a - `_medaka.fa.gz` Polished assembly - `_medaka.gff3` annotation liftover - `_medaka.unnapped.txt` annotations that could not be lifted over during annotation liftover + - `dorado/`: output from dorado + - `_dorado.fa.gz` Polished assembly + - `_dorado.gff3` annotation liftover + - `_dorado.unnapped.txt` annotations that could not be lifted over during annotation liftover + - `alignments/` output from dorado aligner + - `_dorado_aligned.bam` Alignment + - `_dorado_aligned.bai` Alignment index @@ -188,6 +205,14 @@ Annotation `gff3` and `unmapped.txt` files are only created if a reference for a - `_ragtag_.stats`: Scaffolding statistics - `_ragtag.gff3` annotation liftover - `_ragtag.unnapped.txt` annotations that could not be lifted over during annotation liftover + - `hic/`: output from HiC scaffolding workflow + - `bwamem2/`: bwamem2 outputs + - `index/`: outputs from bwamem2 index + - `mem/`: outputs from bwamem2 mem + - `minimap/`: minimap2 outputs + - `markdup/`: output from picard markduplicates + - `yahs/`: output from yahs + - `index/`: alignment index used with yahs @@ -229,9 +254,6 @@ The files and folders in the different QC folders are named based on - `_.assembly_only.bed` : bp errors in assembly (bed) - `_.assembly_only.wig` : bp errors in assembly (wig) - `_.unionsum.hist.ploidy` : ploidy estimates from short-reads - - `nanoq/`: nanoq results - - `_report.json`: nanoq report in json format - - `_stats.json`: nanoq stats in json format - `QUAST/`: QUAST analysis - `_/`: QUAST results, cp. [QUAST Docs](https://github.com/ablab/quast?tab=readme-ov-file#output) - `report.txt`: summary table @@ -271,19 +293,19 @@ The files in the alignment folder have the following base name structure: - `QC/` - `alignments/`: alignments to assemblies - `_.bam` Alignment - - `_.bai` bam index file + - `_.csi` bam index file - `_.stats` comprehensive statistics from alignment file - `_.idxstats` alignment summary statistics - `_.flagstat` number of alignments for each FLAG type - `shortreads/`: folder containing short read mapping for pilon - `_shortreads.bam` Alignment - - `_shortreads.bai` bam index file + - `_shortreads.csi` bam index file - `_shortreads.stats` comprehensive statistics from alignment file - `_shortreads.idxstats` alignment summary statistics - `_shortreads.flagstat` number of alignments for each FLAG type - `reference/`: folder containing alignment of long reads to reference - `_to_reference.bam` Alignment - - `_to_reference.bai` bam index file + - `_to_reference.csi` bam index file - `_to_reference.stats` comprehensive statistics from alignment file - `_to_reference.idxstats` alignment summary statistics - `_to_reference.flagstat` number of alignments for each FLAG type diff --git a/docs/usage.md b/docs/usage.md index 178148b4..1d9bdca7 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -7,23 +7,62 @@ ## Introduction This pipeline is designed to assemble haploid (or diploid inbred) genomes from long-reads. `nf-core/genomeassembler` can take ONT and HiFi reads, and supports different assembly strategies. The pipeline can also integrate information on a reference genome (e.g. closely related individual) and short-reads for quality control. -This pipeline can perform assembly, polishing, scaffolding and annotation lift-over from a reference genome. Phasing or HiC scaffolding are currently unsupported. +This pipeline can perform assembly, polishing, scaffolding using long-reads, HiC data, or a reference, and annotation lift-over from a reference genome. -![Pipeline metromap](images/genomeassembler.light.png) +> [!NOTE] +> Phasing is currently not supported. + +![Pipeline metromap](images/genomeassembler_v2.light.png) + +Since it is often difficult to know which tool, or assembly strategy will perform best on a dataset, `nf-core/genomeassembler` can also be used to compare outcomes of different approaches in one run. +To compare different samples, a column named `group` is required, which should contain the same value for all samples that should be compared to each other. + +## Parameterization + +Parameters for this pipeline can either be supplied **globally**, e.g: + +- via `--paramname value`, +- or in a config with `params { paramname = value }`, +- or a yaml with: + +```yaml +--- +params: + - paramname: "value" +``` + +or as **sample parameters**, by adding a _correctly named_ column to the samplesheet. In the above example this would be a column named `paramname`. + +Sample parameters take priority over global parameters, if both are provided the sample-specific parameter will be used for that sample. + +> [!NOTE] +> The parameter names will be used in subsequent sections. Since all parameters can be provided per-sample or pipeline wide, no examples will be given. + +The list of all parameters that can be provided globally is available [here](https://nf-co.re/genomeassembler/parameters/), parameters that can be set per sample are provided at the [end of this page](#sample-parameters). + +## Samples and grouping + +This pipeline is intended to support two main use-cases: + +- either a larger set of samples is assembled using a shared set of parameters and settings set mostly via params, +- or a single, or few samples, are assembled using different strategies, typically with the goal of comparing strategies to identify the best approach for a given dataset. + +In the second case, it is likely that several samples will use the same inputs (i.e. reads). Such samples can be put into one group, by assigning them the same value in the group column of the samplesheet, and for these samples pre-processing of reads will only be done once per group, instead of once per sample. This can be used to avoid unneccessary redundant work on the same set of inputs and will only affect preprocessing and reporting, where these samples will be displayed together. + +> [!WARNING] +> Grouping should **never** be used for samples that use different input files. + +## Choice of assembly-strategy and assembler -### Pre-set profiles +Assembly strategy is controlled via `strategy` (either pipeline parameter or sample-setting), and assembler(s) used are chosen via `assembler` (either pipeline parameter or sample-setting) +`nf-core/genomeassembler` currently supports the following assembly strategies: -To ease configuration, there are a couple of pre-defined profiles for various combinations of read sources and assemblers (named readtype_assembler) +- single (default): Use a single assembler for a single type of read. The assembler should be provided via `assembler` and can be `hifiasm` (default) or `flye`. +- hybrid: Use a single assembler for a combined assembly of ONT and HiFi reads. The assembler should be provided via `assembler`. Currently, only `hifiasm` supports hybrid assembly. +- scaffold: Assemble ONT reads and HiFi indepently and scaffold one assembly onto the other. `assembler` has to be provided as `"ontAssembler_hifiAssembler"` and could for example be: "`flye_hifiasm"` to assemble ont reads with `flye` and HiFi reads with `hifiasm` or "hifiasm_hifiasm" to assemble both ont and hifi reads indepently with `hifiasm`. When running in "scaffold" mode, `assembly_scaffolding_order` can be used to control which assembly gets scaffolded onto which, the default being "ont_on_hifi" where ONT assembly is scaffolded onto HifI assembly. -| ONT | HiFI  | Assembly-strategy  | Profile name | -| --- | ----- | ---------------------------------------------------------------------- | ---------------------------- | -| Yes | No  | flye | `ont_flye` | -| No | Yes  | flye | `hifi_flye` | -| Yes | No | hifiasm | `ont_hifiasm` | -| No | Yes  | hifiasm | `hifi_hifiasm` | -| Yes | Yes  | hifiasm --ul | `hifiont_hifiasm` | -| Yes | Yes  | Scaffolding of ONT assemblies (flye) onto HiFi assemblies (hifiasm) | `hifiont_flye_on_hifiasm` | -| Yes | Yes  | Scaffolding of ONT assemblies (hifiasm) onto HiFi assemblies (hifiasm) | `hifiont_hifiasm_on_hifiasm` | +Assembler specific arguments can be provided for the assembler via `hifiasm_args` or `flye_args`, or with more fine-grained control via `assembler_ont_args` and `assembler_hifi_args` for scaffolding. +`assembler_ont_args` controls the parameters for the assembler used with ONT-reads in `single` and `hybrid` strategies, or for the assembler used for ONT reads when using `scaffold`. `assembler_hifi_args` can be used to pass arguments to the assembler used for HiFi reads in `single`, or `scaffold` mode. ## Samplesheet input @@ -35,16 +74,17 @@ You will need to create a samplesheet with information about the samples you wou ### Samplesheet layout -The largest samplesheet format is: +The samplesheet _must_ contain a column name `sample` [string]. The most barebone samplesheet format is: ```csv title="samplesheet.csv" -sample,ontreads,hifireads,ref_fasta,ref_gff,shortread_F,shortread_R,paired -Sample1,/path/reads/sample1ont.fq.gz,/path/reads/sample1hifi.fq.gz,/path/references/ref.fa,/path/references/ref.gff,/path/reads/sample1_r1.fq.gz,/path/reads/sample1_r2.fq.gz,true +sample +Sample1 +Sample2 ``` -The samplesheet _must_ contain a column name `sample` [string]. -Further columns _can_ be: +Further commonly used columns _can_ be: +- `group` [string] to group different samples in the report to facilitate comparisons. - `ontreads` [path] for long reads produced with oxford nanopore sequencers - `hifireads` [path] for long reads produced with pacbio sequencers in "HiFi" mode - Reference information: @@ -55,13 +95,11 @@ Further columns _can_ be: - `shortread_R`: shortread reverse file (paired end) - `paired`: [true/false] true if the reads are paired end, false if they are single-end. The `shortreads_R` column should exist if `paired` is `false` but can be empty. +But samplesheets can grow more complex if a range of strategies should be compared in a single pipeline run. A list of all possible columns can be found at the [end of this page](#sample-parameters) + > [!INFO] > It is strongly recommended to provide all paths as absolute paths -### Multiple runs of the same sample - -For ONT reads, a glob pattern can be provided, matching files will be concatenated into a single file if `--collect` is used. Generally we recommend to provide all reads in a single file. - ## Running the pipeline The typical command for running the pipeline is as follows: @@ -221,3 +259,259 @@ We recommend adding the following line to your environment to limit this (typica ```bash NXF_OPTS='-Xms1g -Xmx4g' ``` + +# Sample Parameters + +This section lists all possible parameters that can be set per sample. +If parameters are not provided, they are inherited from the pipeline parameters; see params for default settings. + +## Sample information + +| Parameter | Description | Type | +| --------- | ------------ | -------- | +| `sample` | Sample name | `string` | +| `group` | Sample group | `string` | + +## Reference Parameters + +Options controlling pipeline behavior + +| Parameter | Description | Type | +| ----------- | ------------------------------------------ | --------- | +| `ref_fasta` | Path to reference genome seqeunce (fasta) | `string` | +| `ref_gff` | Path to reference genome annotations (gff) | `string` | +| `use_ref` | Use reference genome | `boolean` | + +## Assembly options + +Options controlling assembly. + +> NOTE: hifiasm_args and flye_args will be passed to the respective assembler in all cases. If the same assembler is used in different strategies, these need may need to be parameterised per sample. + +The difference between `{hifiasm,flye}_args` and `assembler_{ont,hifi}_args` is subtle: the former will be applied for all cases where this particular assembler is used, whereas the latter will apply the args to the assembler used for assembling a specific type of data. `{hifiasm,flye}_args` are generally expected to be used for e.g. system specific configuration via `params`, although they can also be set per-sample, whereas `assembler_{ont,hifi}_args` provide a bit more of an abstract interface, possibly more appropriate to adjust certain parameters per-sample. + +| Parameter | Description | Type | +| ------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | +| `strategy` | Assembly strategy to use. Valid choices are `'single'`, `'hybrid'` and `'scaffold'` | `string` | +| `assembler` | Assembler to use. Valid choices depend on strategy; for single either `flye` or `hifiasm`, hybrid can be done with `hifiasm` and for scaffolded assembly provide the names of the assemblers separated with an underscore. The first assembler will | +| be used for ONT reads, the second for HiFi reads. | `string` | +| `assembler_ont` | Assembler to use for ONT reads. Often determined automatically, but required for complex runs, where both ONT and HiFi reads are provided, but some assemblies should be done using `strategy: "single"` using only ONT reads. Such cases can not unambiguously be resolved otherwise. | `string` | +| `assembler_hifi` | Assembler to use for HiFi reads. Often determined automatically, but required for complex runs, where both ONT and HiFi reads are provided, but some assemblies should be done using `strategy: "single"` using only HiFi reads. Such cases can not unambiguously be resolved otherwise. | `string` | +| `assembly_scaffolding_order` | When strategy is "scaffold", which assembly should be scaffolded onto which? | `string` | +| `genome_size` | expected genome size, optional | `string` | +| `flye_mode` | flye mode | `string` | +| `flye_args` | additional args for flye | `string` | +| `hifiasm_args` | Extra arguments passed to `hifiasm` | `string` | +| `assembler_ont_args` | Extra arguments passed to assembler_ont, assembling ONT reads and hybrid assemblies | `string` | +| `assembler_hifi_args` | Extra arguments passed to assembler_hifi; assembling HiFi reads | `string` | + +## Long-read preprocessing + +All long-reads will be passed to `fastplong` for trimming and quality control. +If reads should not be modified by `fastplong`, adaptor trimming can be disabled using `-A`, quality filtering can be disabled with `-Q`. +These arguments can be passed via `_fastplot_args` for the different read types. + +| Parameter | Description | Type | +| --------------------- | ---------------------------------------------------------- | --------- | +| `ontreads` | Path to ONT reads | `string` | +| `ont_collect` | Collect ONT reads from several files? | `boolean` | +| `ont_adapters` | Adaptors for ONT read-trimming | `string` | +| `ont_fastplong_args` | Additional args to be passed to `fastplong` for ONT reads | `string` | +| `hifireads` | Path to HiFi reads | `string` | +| `hifi_adapters` | Adaptors for HiFi read-trimming | `string` | +| `hifi_fastplong_args` | Additional args to be passed to `fastplong` for HiFi reads | `string` | +| `jellyfish` | Run jellyfish and genomescope (recommended) | `boolean` | +| `jellyfish_k` | Value of k used during k-mer analysis with jellyfish | `integer` | +| `dump` | dump jellyfish output | `boolean` | + +## Short read options + +Options for short reads. + +| Parameter | Description | Type | +| ----------------- | ------------------------------- | --------- | +| `use_short_reads` | Use short reads? | `boolean` | +| `shortread_trim` | Trim short reads? | `boolean` | +| `meryl_k` | kmer length for meryl / merqury | `integer` | +| `shortread_F` | Path to forward short reads | `string` | +| `shortread_R` | Path to reverse short reads | `string` | +| `paired` | Are shortreads paired? | `string` | + +## Polishing options + +Polishing options. When using `polish` with either `dorado+pilon` or `medaka+pilon`, the assembly will be polished using ONT reads first, and then the ONT-polished assembly will be polished with short reads using `pilon`. `dorado` and `medaka` are mutually exclusive. `dorado` is not available via conda. **`dorado` is an experimental feature which may not work for all inputs.** + +| Parameter | Description | Type | +| --------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | --------- | +| `polish_pilon` | Polish assembly with pilon? Requires short reads | `boolean` | +| `medaka_model` | model to use with medaka | `string` | +| `polish_medaka` | Polish assembly with medaka (ONT only) | `boolean` | +| `polish_dorado` | EXPERIMENTAL: Polish assembly with dorado (ONT only) | `boolean` | +| `polish` | Alternative polish interface: can be 'pilon','medaka', 'dorado', 'dorado+pilon' or 'medaka+pilon', do not include quotation marks. Only available through samplesheet, takes priority over `polish_*`. | `string` | + +## Scaffolding options + +Options `_longstitch`, `_links` and `_hic` are mutually exclusive. +`RagTag` scaffolding can be used to either scaffold the (polished) assembly, or can be combined with `longstitch`, `links` or `hic`, to scaffold the scaffolding results onto a reference. `RagTag` will always scaffold the most "advanced" stage of assembly; meaning that if the assembly was scaffolded, the scaffolded assembly will be used, if the assembly was polished, the polished assembly will be used, if the pipeline only carried out assembly for that sample, the assembly will be scaffolded. + +| Parameter | Description | Type | +| --------------------- | ------------------------------------------ | --------- | +| `scaffold_longstitch` | Scaffold with longstitch? | `boolean` | +| `scaffold_links` | Scaffolding with links? | `boolean` | +| `scaffold_hic` | Scaffold with yahs (requires hic reads)? | `boolean` | +| `scaffold_ragtag` | Scaffold with ragtag (requires reference)? | `boolean` | + +### HiC + +HiC scaffolding specific parameters. Supplying HiC reads activates HiC scaffolding, unless explicitly deactivated using `scaffold_hic`. +`bwa-mem2` generally is more suitable for HiC alignments than `minimap2`, and is the recommended option. +However, `bwamem2` requires substantial memory for large genomes, which may prohibit use of `bwamem2` in some cases. + +| Parameter | Description | Type | +| ------------- | ----------------------------------------------------------- | --------- | +| `hic_aligner` | Aligner to use, default "bwa-mem2", alternative: "minimap2" | `string` | +| `hic_F` | Forward / \_1 HiC reads | `path` | +| `hic_R` | Reverse / \_2 HiC reads | `path` | +| `hic_trim` | Trim HiC reads? default: false | `boolean` | + +## QC options + +Options for QC tools + +| Parameter | Description | Type | +| ------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------- | --------- | +| `merqury` | Run merqury (if short reads are provided) | `boolean` | +| `qc_reads` | Long reads that should be used for QC when both ONT and HiFi reads are provided. Options are `'ont'` or `'hifi'` | +| `busco` | Run BUSCO? | `boolean` | +| `busco_db` | Path to busco db (optional) | `string` | +| `busco_lineage` | Busco lineage to use | `string` | +| `quast` | Run quast | `boolean` | +| `ref_map_bam` | A mapping (bam) of reads mapped to the reference can be provided for QC. If provided alignment to reference fasta will not run | `string` | +| `assembly` | Can be used to proved existing assembly will skip assembly and perform downstream steps including qc | `string` | +| `assembly_map_bam` | A mapping (bam) of reads mapped to the provided assembly can be specified for QC. If provided alignment to the provided assembly fasta will not run | `string` | + +## Annotations options + +Options controlling annotation liftover + +| Parameter | Description | Type | +| ------------------ | ----------------------------------------- | --------- | +| `lift_annotations` | Lift-over annotations (requires ref_gff)? | `boolean` | + +### Updating the pipeline + +When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: + +```bash +nextflow pull nf-core/genomeassembler +``` + +### Reproducibility + +It is a good idea to specify the pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. + +First, go to the [nf-core/genomeassembler releases page](https://github.com/nf-core/genomeassembler/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag. + +This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. + +To further assist in reproducibility, you can use share and reuse [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. + +> [!TIP] +> If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. + +## Core Nextflow arguments + +> [!NOTE] +> These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). + +### `-profile` + +Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. + +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. + +> [!INFO] +> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. + +The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to check if your system is supported, please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). + +Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! +They are loaded in sequence, so later profiles can overwrite earlier profiles. + +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer environment. + +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters +- `docker` + - A generic configuration profile to be used with [Docker](https://docker.com/) +- `singularity` + - A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) +- `podman` + - A generic configuration profile to be used with [Podman](https://podman.io/) +- `shifter` + - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) +- `charliecloud` + - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `apptainer` + - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) +- `wave` + - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later). +- `conda` + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. + +### `-resume` + +Specify this when restarting a pipeline. Nextflow will use cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. For input to be considered the same, not only the names must be identical but the files' contents as well. For more info about this parameter, see [this blog post](https://www.nextflow.io/blog/2019/demystifying-nextflow-resume.html). + +You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. + +### `-c` + +Specify the path to a specific config file (this is a core Nextflow command). See the [nf-core website documentation](https://nf-co.re/usage/configuration) for more information. + +## Custom configuration + +### Resource requests + +Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the pipeline steps, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher resources request (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. + +To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. + +### Custom Containers + +In some cases, you may wish to change the container or conda environment used by a pipeline steps for a particular tool. By default, nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However, in some cases the pipeline specified version maybe out of date. + +To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. + +### Custom Tool Arguments + +A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default. + +To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website. + +### nf-core/configs + +In most cases, you will only need to create a custom config as a one-off but if you and others within your organisation are likely to be running nf-core pipelines regularly and need to use the same settings regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter. You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. + +See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for more information about creating your own configuration files. + +If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). + +## Running in the background + +Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. + +The Nextflow `-bg` flag launches Nextflow in the background, detached from your terminal so that the workflow does not stop if you log out of your session. The logs are saved to a file. + +Alternatively, you can use `screen` / `tmux` or similar tool to create a detached session which you can log back into at a later time. +Some HPC setups also allow you to run nextflow within a cluster job submitted your job scheduler (from where it submits more jobs). + +## Nextflow memory requirements + +In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. +We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`): + +```bash +NXF_OPTS='-Xms1g -Xmx4g' +``` diff --git a/main.nf b/main.nf index 9bd1bab0..396744a2 100644 --- a/main.nf +++ b/main.nf @@ -31,7 +31,6 @@ workflow NFCORE_GENOMEASSEMBLER { take: samplesheet // channel: samplesheet read in from --input - refs main: @@ -39,8 +38,7 @@ workflow NFCORE_GENOMEASSEMBLER { // WORKFLOW: Run pipeline // GENOMEASSEMBLER ( - samplesheet, - refs + samplesheet ) } /* @@ -70,9 +68,11 @@ workflow { // // WORKFLOW: Run main workflow // + NFCORE_GENOMEASSEMBLER ( - PIPELINE_INITIALISATION.out.samplesheet, PIPELINE_INITIALISATION.out.refs + PIPELINE_INITIALISATION.out.samplesheet ) + // // SUBWORKFLOW: Run completion tasks // diff --git a/modules.json b/modules.json index c1eff73d..4d65ad24 100644 --- a/modules.json +++ b/modules.json @@ -7,115 +7,141 @@ "nf-core": { "busco/busco": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "56925dff3062ca9387de65dd763568916b562124", + "installed_by": ["modules"] + }, + "bwamem2/index": { + "branch": "master", + "git_sha": "5dd46a36fca68d6ad1a6b22ec47adc8c6863717d", + "installed_by": ["modules"] + }, + "bwamem2/mem": { + "branch": "master", + "git_sha": "5dd46a36fca68d6ad1a6b22ec47adc8c6863717d", + "installed_by": ["modules"] + }, + "fastp": { + "branch": "master", + "git_sha": "a331ecfd1aa48b2b2298aab23bb4516c800e410b", + "installed_by": ["modules"] + }, + "fastplong": { + "branch": "master", + "git_sha": "a331ecfd1aa48b2b2298aab23bb4516c800e410b", "installed_by": ["modules"] }, "fastqc": { "branch": "master", - "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "git_sha": "3009f27c4e4b6e99da4eeebe82799e13924a4a1f", "installed_by": ["modules"] }, "flye": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "1ab453187e9eada07bdd98609ce770971bfe86e4", "installed_by": ["modules"] }, "hifiasm": { "branch": "master", - "git_sha": "c457b50bf9187031f65b0fb090dc022e8814c729", + "git_sha": "c87519b3c0bfcb0b022b77738cf8164b3af042eb", "installed_by": ["modules"] }, "liftoff": { "branch": "master", - "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "git_sha": "c7659d7353f37c1fb4407aaf013bc7d4b6cec015", "installed_by": ["modules"], "patch": "modules/nf-core/liftoff/liftoff.diff" }, - "lima": { - "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", - "installed_by": ["modules"] - }, "links": { "branch": "master", - "git_sha": "bd049fd0244ed914f2d10bed580b49fb44eba914", - "installed_by": ["modules"] + "git_sha": "61b37255166a4484051e63ebbb1376960a08e73e", + "installed_by": ["modules"], + "patch": "modules/nf-core/links/links.diff" }, "merqury/merqury": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "43f0c26fd70298d296a27ce3a68f1c0f7530f600", "installed_by": ["modules"], "patch": "modules/nf-core/merqury/merqury/merqury-merqury.diff" }, "meryl/count": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "43f0c26fd70298d296a27ce3a68f1c0f7530f600", "installed_by": ["modules"] }, "meryl/unionsum": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "43f0c26fd70298d296a27ce3a68f1c0f7530f600", "installed_by": ["modules"] }, "minimap2/align": { "branch": "master", - "git_sha": "a532706a19b3d83f14b1d48a6a815ed33eb48b0c", + "git_sha": "5c9f8d5b7671237c906abadc9ff732b301ca15ca", "installed_by": ["modules"], "patch": "modules/nf-core/minimap2/align/minimap2-align.diff" }, - "pilon": { + "picard/addorreplacereadgroups": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "74ec93d00bef147da3fb1f2262e8d31c14108f88", + "installed_by": ["modules"] + }, + "picard/markduplicates": { + "branch": "master", + "git_sha": "66d5808eaaabd9de8997c4c31a9e8cdd3b56c080", "installed_by": ["modules"] }, - "porechop/porechop": { + "pilon": { "branch": "master", - "git_sha": "dbf496251becaa54933305bb494b880253a84ee6", + "git_sha": "79f27eb566084b865363b634136cfe2256b84051", "installed_by": ["modules"] }, "ragtag/patch": { "branch": "master", - "git_sha": "62775d90df7565c82bd4ceedca70149529820cff", + "git_sha": "96f35d1a14c9bd453c5cefff865622e43057dbc7", "installed_by": ["modules"] }, "ragtag/scaffold": { "branch": "master", - "git_sha": "7d163aded9221aef68d8c11cb7a04354a232d89c", + "git_sha": "96f35d1a14c9bd453c5cefff865622e43057dbc7", + "installed_by": ["modules"] + }, + "samtools/faidx": { + "branch": "master", + "git_sha": "b2e78932ef01165fd85829513eaca29eff8e640a", "installed_by": ["modules"] }, "samtools/fastq": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "1d2fbdcbca677bbe8da0f9d0d2bb7c02f2cab1c9", "installed_by": ["modules"] }, "samtools/flagstat": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "1d2fbdcbca677bbe8da0f9d0d2bb7c02f2cab1c9", "installed_by": ["bam_stats_samtools", "modules"] }, "samtools/idxstats": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "1d2fbdcbca677bbe8da0f9d0d2bb7c02f2cab1c9", "installed_by": ["bam_stats_samtools", "modules"] }, "samtools/index": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "1d2fbdcbca677bbe8da0f9d0d2bb7c02f2cab1c9", "installed_by": ["bam_sort_stats_samtools", "modules"] }, "samtools/sort": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "5cb9a8694da0a0e550921636bb60bc8c56445fd7", "installed_by": ["bam_sort_stats_samtools", "modules"] }, "samtools/stats": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "fe93fde0845f907fc91ad7cc7d797930408824df", "installed_by": ["bam_stats_samtools", "modules"] }, - "trimgalore": { + "yahs": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "b9837690be3f0296341829f656347d4662891daa", "installed_by": ["modules"] } } @@ -124,12 +150,12 @@ "nf-core": { "bam_sort_stats_samtools": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "7ac6cbe7c17c2dad685da7f70496c8f48ea48687", "installed_by": ["subworkflows"] }, "bam_stats_samtools": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "7ac6cbe7c17c2dad685da7f70496c8f48ea48687", "installed_by": ["bam_sort_stats_samtools", "subworkflows"] }, "utils_nextflow_pipeline": { diff --git a/modules/local/collect_reads/main.nf b/modules/local/collect_reads/main.nf index 4dbdc87a..a753c647 100644 --- a/modules/local/collect_reads/main.nf +++ b/modules/local/collect_reads/main.nf @@ -12,26 +12,18 @@ process COLLECT_READS { output: tuple val(meta), path("*_all_reads.fq.gz"), emit: combined_reads - path "versions.yml", emit: versions + tuple val("${task.process}"), val('gzip'), eval('gzip --version | head -n1 | sed "s/gzip //"'), emit: versions_collect_reads, topic: versions script: def prefix = task.ext.prefix ?: "${meta.id}" """ cat ${reads} > ${prefix}_all_reads.fq.gz - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gzip: \$(echo \$(gzip --version | head -n1 | sed 's/gzip //')) - END_VERSIONS """ stub: def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}_all_reads.fq; gzip ${prefix}_all_reads.fq - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gzip: \$(echo \$(gzip --version | head -n1 | sed 's/gzip //')) - END_VERSIONS """ } diff --git a/modules/local/dorado/aligner/main.nf b/modules/local/dorado/aligner/main.nf new file mode 100644 index 00000000..64e10137 --- /dev/null +++ b/modules/local/dorado/aligner/main.nf @@ -0,0 +1,43 @@ +process DORADO_ALIGNER { + tag "${meta.id}" + label 'process_high' + + container "docker.io/nanoporetech/dorado:sha00aa724a69ddc5f47d82bd413039f912fdaf4e77" + + input: + tuple val(meta), path(ref), path(reads) + + output: + tuple val(meta), path("${meta.id}_dorado_aligned.bam"), emit: bam + tuple val(meta), path("${meta.id}_dorado_aligned.bam.bai"), emit: bai + tuple val("${task.process}"), val('dorado'), eval('dorado --version 2>&1 | head -n1'), emit: versions_dorado, topic: versions + tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), topic: versions, emit: versions_samtools + + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + dorado aligner \\ + -t ${task.cpus} \\ + ${ref} \\ + ${reads} \\ + ${args} \\ + | samtools sort --threads ${task.cpus}\\ + > ${meta.id}_dorado_aligned.bam + + samtools index ${meta.id}_dorado_aligned.bam > ${meta.id}_dorado_aligned.bam.bai + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}/${prefix}.bam + touch ${prefix}/${prefix}.bai + """ +} diff --git a/modules/local/dorado/polish/main.nf b/modules/local/dorado/polish/main.nf new file mode 100644 index 00000000..3d8443cb --- /dev/null +++ b/modules/local/dorado/polish/main.nf @@ -0,0 +1,42 @@ +process DORADO_POLISH { + tag "${meta.id}" + label 'process_high' + + container "docker.io/nanoporetech/dorado:sha00aa724a69ddc5f47d82bd413039f912fdaf4e77" + + input: + tuple val(meta), path(assembly), path(alignment), path(index) + val(variant_call_format) + + output: + tuple val(meta), path("${meta.id}_dorado_polished.fa.gz"), emit: polished_alignment, optional: true + tuple val(meta), path("${meta.id}_dorado_polished*vcf"), emit: variant_calls, optional: true + tuple val("${task.process}"), val('dorado'), eval('dorado --version 2>&1 | head -n1'), emit: versions_dorado, topic: versions + + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def variants = ["vcf","gvcf"].contains(variant_call_format) ? "--${variant_call_format}" : "" + def outfile = variants ? "> ${meta.id}_dorado_polished.${variants}" : "| bgzip > ${meta.id}_dorado_polished.fa.gz" + """ + dorado polish \\ + -t ${task.cpus} \\ + ${alignment} \\ + ${assembly} \\ + ${args} \\ + ${variants} \\ + ${outfile} + """ + + stub: + def args = task.ext.args ?: '' + def variants = ["vcf","gvcf"].contains(variant_call_format) ? "--${variant_call_format}" : "" + def outfile = variants ? "touch ${meta.id}_dorado_polished.${variants}" : "echo '' | bgzip > ${meta.id}_dorado_polished.fa.gz" + + """ + ${outfile} + """ +} diff --git a/modules/local/genomescope/main.nf b/modules/local/genomescope/main.nf index 9f6d4696..e39c48e4 100644 --- a/modules/local/genomescope/main.nf +++ b/modules/local/genomescope/main.nf @@ -14,7 +14,8 @@ process GENOMESCOPE { tuple val(meta), path("*_plot.log.png") , emit: plot_log tuple val(meta), path("*_plot.png") , emit: plot tuple val(meta), env(est_hap_len) , emit: estimated_hap_len - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('genomescope2'), eval("genomescope2 -v | sed 's/GenomeScope //'"), emit: versions_genomescope, topic: versions + script: def prefix = task.ext.prefix ?: "${meta.id}" @@ -28,12 +29,8 @@ process GENOMESCOPE { | sed 's@ bp@@g' \\ | sed 's@,@@g' \\ | awk '{printf "%i", (\$4+\$5)/2 }') - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - genomescope2: \$(echo \$(genomescope2 -v | sed 's/GenomeScope //')) - END_VERSIONS """ + stub: def prefix = task.ext.prefix ?: "${meta.id}" """ @@ -41,9 +38,5 @@ process GENOMESCOPE { touch ${prefix}_plot.log.png touch ${prefix}_plot.png est_hap_len=1 - cat <<-END_VERSIONS > versions.yml - "${task.process}": - genomescope2: \$(echo \$(genomescope2 -v) | sed 's/GenomeScope //') - END_VERSIONS """ } diff --git a/modules/local/gfa2fa/environment.yml b/modules/local/gfa2fa/environment.yml index 2e1fcd06..dd58a0bd 100644 --- a/modules/local/gfa2fa/environment.yml +++ b/modules/local/gfa2fa/environment.yml @@ -2,10 +2,4 @@ channels: - conda-forge - bioconda dependencies: - - conda-forge::coreutils=9.5 - - conda-forge::grep=3.11 - - conda-forge::gzip=1.13 - - conda-forge::lbzip2=2.5 - - conda-forge::sed=4.8 - - conda-forge::tar=1.34 - - bioconda::mawk=1.3.4 + - conda-forge::samtools=1.22.1 diff --git a/modules/local/gfa2fa/main.nf b/modules/local/gfa2fa/main.nf index b258f8d6..dc11c08e 100644 --- a/modules/local/gfa2fa/main.nf +++ b/modules/local/gfa2fa/main.nf @@ -2,37 +2,27 @@ process GFA_2_FA { tag "${meta.id}" label 'process_low' conda "${moduleDir}/environment.yml" - container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/52/52ccce28d2ab928ab862e25aae26314d69c8e38bd41ca9431c67ef05221348aa/data' - : 'community.wave.seqera.io/library/coreutils_grep_gzip_lbzip2_pruned:838ba80435a629f8'}" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" input: tuple val(meta), path(gfa_file) output: tuple val(meta), path("*fa.gz"), emit: contigs_fasta - path "versions.yml", emit: versions + tuple val("${task.process}"), val('bgzip'), eval("bgzip --version | head -n1 | sed 's/bgzip (htslib) //'"), emit: versions_gzip, topic: versions script: """ outfile=\$(basename $gfa_file .gfa).fa.gz awk '/^S/{print ">"\$2;print \$3}' ${gfa_file} \\ - | gzip > \$outfile - cat <<-END_VERSIONS > versions.yml - "${task.process}": - awk: \$(mawk -Wversion | sed '1!d; s/.*Awk //; s/,.*//; s/ [0-9]*\$//') - gzip: \$(echo \$(gzip --version | head -n1 | sed 's/gzip //')) - END_VERSIONS + | bgzip > \$outfile """ stub: """ outfile=\$(basename $gfa_file .gfa).fa.gz touch \$outfile - cat <<-END_VERSIONS > versions.yml - "${task.process}": - awk: \$(mawk -Wversion | sed '1!d; s/.*Awk //; s/,.*//; s/ [0-9]*\$//') - gzip: \$(echo \$(gzip --version | head -n1 | sed 's/gzip //')) - END_VERSIONS """ } diff --git a/modules/local/jellyfish/count/main.nf b/modules/local/jellyfish/count/main.nf index 841f4a38..c371d2ac 100644 --- a/modules/local/jellyfish/count/main.nf +++ b/modules/local/jellyfish/count/main.nf @@ -10,7 +10,8 @@ process COUNT { output: tuple val(meta), path("*.jf"), emit: kmers - path "versions.yml", emit: versions + tuple val("${task.process}"), val('jellyfish'), eval("jellyfish --version sed 's/jellyfish //'"), emit: versions_jellyfish, topic: versions + script: def prefix = task.ext.prefix ?: "${meta.id}" @@ -25,25 +26,17 @@ process COUNT { cp ${fasta} ${fasta.baseName}.fasta fi jellyfish count \\ - -m ${params.kmer_length} \\ + -m ${meta.jellyfish_k} \\ -s 140M \\ -C \\ -t ${task.cpus} ${fasta.baseName}.fasta mv mer_counts.jf ${prefix}_mer_counts.jf - cat <<-END_VERSIONS > versions.yml - "${task.process}": - jellyfish: \$(echo \$(jellyfish --version sed 's/jellyfish //')) - END_VERSIONS """ stub: def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}_mer_counts.jf - cat <<-END_VERSIONS > versions.yml - "${task.process}": - jellyfish: \$(echo \$(jellyfish --version sed 's/jellyfish //')) - END_VERSIONS """ } diff --git a/modules/local/jellyfish/dump/main.nf b/modules/local/jellyfish/dump/main.nf index e4ed6d22..82c0ce9a 100644 --- a/modules/local/jellyfish/dump/main.nf +++ b/modules/local/jellyfish/dump/main.nf @@ -10,24 +10,16 @@ process DUMP { output: tuple val(meta), path("*.fa"), emit: dumped_kmers - path "versions.yml", emit: versions + tuple val("${task.process}"), val('jellyfish'), eval("jellyfish --version sed 's/jellyfish //'"), emit: versions_jellyfish, topic: versions script: def prefix = task.ext.prefix ?: "${meta.id}" """ jellyfish dump ${kmers} > ${prefix}_kmers.fa - cat <<-END_VERSIONS > versions.yml - "${task.process}": - jellyfish: \$(echo \$(jellyfish --version sed 's/jellyfish //')) - END_VERSIONS """ stub: def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}_kmers.fa - cat <<-END_VERSIONS > versions.yml - "${task.process}": - jellyfish: \$(echo \$(jellyfish --version sed 's/jellyfish //')) - END_VERSIONS """ } diff --git a/modules/local/jellyfish/histo/main.nf b/modules/local/jellyfish/histo/main.nf index 9f78f55d..42958b6d 100644 --- a/modules/local/jellyfish/histo/main.nf +++ b/modules/local/jellyfish/histo/main.nf @@ -10,25 +10,17 @@ process HISTO { output: tuple val(meta), path("*.tsv"), emit: histo - path "versions.yml", emit: versions + tuple val("${task.process}"), val('jellyfish'), eval("jellyfish --version sed 's/jellyfish //'"), emit: versions_jellyfish, topic: versions script: def prefix = task.ext.prefix ?: "${meta.id}" """ jellyfish histo ${kmers} > ${prefix}_hist.tsv - cat <<-END_VERSIONS > versions.yml - "${task.process}": - jellyfish: \$(echo \$(jellyfish --version sed 's/jellyfish //')) - END_VERSIONS """ stub: def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}_hist.tsv - cat <<-END_VERSIONS > versions.yml - "${task.process}": - jellyfish: \$(echo \$(jellyfish --version sed 's/jellyfish //')) - END_VERSIONS """ } diff --git a/modules/local/jellyfish/stats/main.nf b/modules/local/jellyfish/stats/main.nf index 64eebfbf..6e8d3318 100644 --- a/modules/local/jellyfish/stats/main.nf +++ b/modules/local/jellyfish/stats/main.nf @@ -10,26 +10,19 @@ process STATS { output: tuple val(meta), path("*.txt"), emit: stats - path "versions.yml", emit: versions + tuple val("${task.process}"), val('jellyfish'), eval("jellyfish --version sed 's/jellyfish //'"), emit: versions_jellyfish, topic: versions + script: def prefix = task.ext.prefix ?: "${meta.id}" """ jellyfish stats ${kmers} > ${prefix}_stats.txt - cat <<-END_VERSIONS > versions.yml - "${task.process}": - jellyfish: \$(echo \$(jellyfish --version sed 's/jellyfish //')) - END_VERSIONS - """ + """ stub: def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}_stats.txt - cat <<-END_VERSIONS > versions.yml - "${task.process}": - jellyfish: \$(echo \$(jellyfish --version sed 's/jellyfish //')) - END_VERSIONS """ } diff --git a/modules/local/longstitch/main.nf b/modules/local/longstitch/main.nf index 01362fc3..ebb28487 100644 --- a/modules/local/longstitch/main.nf +++ b/modules/local/longstitch/main.nf @@ -12,7 +12,8 @@ process LONGSTITCH { output: tuple val(meta), path("*.tigmint-ntLink-arks.fa"), emit: ntlLinks_arks_scaffolds tuple val(meta), path("*.tigmint-ntLink.fa"), emit: ntlLinks_scaffolds - path "versions.yml", emit: versions + tuple val("${task.process}"), val('LongStitch'), eval("longstitch | head -n1 | sed 's/LongStitch v//'"), emit: versions_longstitch, topic: versions + script: def prefix = task.ext.prefix ?: "${meta.id}" @@ -43,20 +44,11 @@ process LONGSTITCH { mv *.tigmint-ntLink.longstitch-scaffolds.fa ${prefix}.tigmint-ntLink.fa sed -i 's/\\(scaffold[0-9]*\\),.*/\\1/' ${prefix}.tigmint-ntLink.fa - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - LongStitch: \$(echo \$(longstitch | head -n1 | sed 's/LongStitch v//')) - END_VERSIONS """ stub: def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}.tigmint-ntLink-arks.fa touch ${prefix}.tigmint-ntLink.fa - cat <<-END_VERSIONS > versions.yml - "${task.process}": - LongStitch: \$(echo \$(longstitch | head -n1 | sed 's/LongStitch v//')) - END_VERSIONS """ } diff --git a/modules/local/medaka/main.nf b/modules/local/medaka/main.nf index 7117c7da..2a7440c3 100644 --- a/modules/local/medaka/main.nf +++ b/modules/local/medaka/main.nf @@ -14,7 +14,7 @@ process MEDAKA { output: tuple val(meta), path("*_medaka.fa.gz"), emit: assembly - path "versions.yml", emit: versions + tuple val("${task.process}"), val('medaka'), eval("medaka --version 2>&1 | sed 's/medaka //g'"), emit: versions_medaka, topic: versions when: task.ext.when == null || task.ext.when @@ -38,19 +38,10 @@ process MEDAKA { mv consensus.fasta ${prefix}.fa gzip -n ${prefix}.fa - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - medaka: \$( medaka --version 2>&1 | sed 's/medaka //g' ) - END_VERSIONS """ stub: def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}_medaka.fa.gz - cat <<-END_VERSIONS > versions.yml - "${task.process}": - medaka: \$( medaka --version 2>&1 | sed 's/medaka //g' ) - END_VERSIONS """ } diff --git a/modules/local/medaka/medaka_consensus/main.nf b/modules/local/medaka/medaka_consensus/main.nf index 80ef6e54..ee2a43c9 100644 --- a/modules/local/medaka/medaka_consensus/main.nf +++ b/modules/local/medaka/medaka_consensus/main.nf @@ -13,7 +13,7 @@ process MEDAKA_PARALLEL { output: tuple val(meta), path("*_medaka.fa.gz"), emit: assembly - path "versions.yml", emit: versions + tuple val("${task.process}"), val('medaka'), eval("medaka --version 2>&1 | sed 's/medaka //g'"), emit: versions_medaka, topic: versions when: task.ext.when == null || task.ext.when @@ -60,19 +60,10 @@ process MEDAKA_PARALLEL { inference/*.hdf \$assembly ${prefix}.fa gzip -n ${prefix}.fa - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - medaka: \$( medaka --version 2>&1 | sed 's/medaka //g' ) - END_VERSIONS """ stub: def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}_medaka.fa.gz - cat <<-END_VERSIONS > versions.yml - "${task.process}": - medaka: \$( medaka --version 2>&1 | sed 's/medaka //g' ) - END_VERSIONS """ } diff --git a/modules/local/nanoq/main.nf b/modules/local/nanoq/main.nf deleted file mode 100644 index 50d75135..00000000 --- a/modules/local/nanoq/main.nf +++ /dev/null @@ -1,43 +0,0 @@ -process NANOQ { - tag "${meta.id}" - label 'process_low' - conda "${moduleDir}/environment.yml" - - container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://depot.galaxyproject.org/singularity/nanoq:0.10.0--h031d066_2' - : 'biocontainers/nanoq:0.10.0--h031d066_2'}" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*_report.json"), emit: report - tuple val(meta), path("*_stats.json"), emit: stats - tuple val(meta), env(median), emit: median_length - path "versions.yml", emit: versions - - script: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - nanoq -i ${reads} -j -r ${prefix}_report.json -s -H -vvv > ${prefix}_stats.json - median=\$(cat ${prefix}_report.json | grep -o '"median_length":[0-9]*' | grep -o '[0-9]*') - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - nanoq: \$(nanoq -V | sed 's/nanoq //') - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}_report.json - touch ${prefix}_stats.json - median=1 - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - nanoq: \$(nanoq -V | sed 's/nanoq //') - END_VERSIONS - """ -} diff --git a/modules/local/quast/main.nf b/modules/local/quast/main.nf index 118c1b3a..8e063fa4 100644 --- a/modules/local/quast/main.nf +++ b/modules/local/quast/main.nf @@ -15,7 +15,7 @@ process QUAST { output: path "${meta.id}*/*", emit: results path "*report.tsv", emit: tsv - path "versions.yml", emit: versions + tuple val("${task.process}"), val('quast'), eval("quast.py --version 2>&1 | sed 's/^.*QUAST v//; s/ .*\$//' | tail -n1"), emit: versions_medaka, topic: versions when: task.ext.when == null || task.ext.when @@ -25,7 +25,7 @@ process QUAST { def prefix = task.ext.prefix ?: "${meta.id}" def features = use_gff ? "--features ${gff}" : '' def reference = use_fasta ? "-r ${fasta}" : '' - def reference_bam = params.use_ref ? "--ref-bam ${ref_bam}" : '' + def reference_bam = ref_bam ? "--ref-bam ${ref_bam}" : '' """ quast.py \\ @@ -41,21 +41,11 @@ process QUAST { ${args} ln -s ${prefix}/report.tsv ${prefix}_report.tsv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - quast: \$(quast.py --version 2>&1 | sed 's/^.*QUAST v//; s/ .*\$//' | tail -n1) - END_VERSIONS """ stub: def prefix = task.ext.prefix ?: "${meta.id}" """ mkdir ${prefix} && touch ${prefix}/report.tsv ln -s ${prefix}/report.tsv ${prefix}_report.tsv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - quast: \$(quast.py --version 2>&1 | sed 's/^.*QUAST v//; s/ .*\$//' | tail -n1) - END_VERSIONS """ } diff --git a/modules/local/report/main.nf b/modules/local/report/main.nf index 841e61b4..6bd04f9b 100644 --- a/modules/local/report/main.nf +++ b/modules/local/report/main.nf @@ -10,73 +10,73 @@ process REPORT { https://wave.seqera.io/view/builds/bd-be4a8863b7b76cf7_1 docker */ input: - path qmdir_files, stageAs: "*" - path funct_files, stageAs: "functions/*" - path nanoq_files, stageAs: "data/nanoq/*" - path jelly_files, stageAs: "data/genomescope/*" - path quast_files, stageAs: "data/quast/*" - path busco_files, stageAs: "data/busco/*" - path meryl_files, stageAs: "data/merqury/*" - path versions, stageAs: "software_versions.yml" + path qmdir_files, stageAs: "*" + path funct_files, stageAs: "functions/*" + path script_files, stageAs: "scripts/*" + path fastplong_files, stageAs: "data/fastplong/*" + path jelly_files, stageAs: "data/genomescope/*" + path quast_files, stageAs: "data/quast/*" + path busco_files, stageAs: "data/busco/*" + path meryl_files, stageAs: "data/merqury/*" + val versions + val groups output: tuple path("report.html"), path("report_files/*"), emit: report_html path ("busco_files/reports.csv"), emit: busco_table, optional: true path ("quast_files/reports.csv"), emit: quast_table, optional: true path ("genomescope_files/*"), emit: genomescope_plots, optional: true - path "versions.yml", emit: versions - + // Versions are not pushed to versions topic as it is an input. + tuple val("${task.process}"), val('R'), eval("R --version | head -n1 | sed 's/R version //; s/ .*//'"), emit: versions_R + tuple val("${task.process}"), val('r-tidyverse'), eval("ls /opt/conda/pkgs/ | grep tidyverse | sed 's/r-tidyverse-//; s/-.*//'"), emit: versions_tidyverse + tuple val("${task.process}"), val('r-plotly'), eval("ls /opt/conda/pkgs/ | grep plotly | sed 's/r-plotly-//; s/-.*//'"), emit: versions_plotly + tuple val("${task.process}"), val('r-quarto'), eval("ls /opt/conda/pkgs/ | grep r-quarto | sed 's/r-quarto-//; s/-.*//'"), emit: versions_rquarto + tuple val("${task.process}"), val('quarto-cli'), eval("quarto --version"), emit: versions_quartocli when: task.ext.when == null || task.ext.when script: def report_profile = "--profile base" - if (params.ont) { - report_profile = report_profile << ",nanoq" + def report_params = '' + if (fastplong_files) { + report_profile = report_profile << ",fastplong" + report_params = report_params << ' -P fastplong:true' } - if (params.quast) { + if (quast_files) { report_profile = report_profile << ",quast" + report_params = report_params << ' -P quast:true ' } - if (params.busco) { + if (busco_files) { report_profile = report_profile << ",busco" + report_params = report_params << ' -P busco:true' } - if (params.jellyfish) { + if (jelly_files) { report_profile = report_profile << ",jellyfish" + report_params = report_params << ' -P jellyfish:true' } - if (params.merqury) { + if (meryl_files) { report_profile = report_profile << ",merqury" + report_params = report_params << ' -P merqury:true' } - def report_params = '' - if (params.ont) { - report_params = report_params << ' -P nanoq:true' - } - if (params.quast) { - report_params = report_params << ' -P quast:true ' - } - if (params.busco) { - report_params = report_params << ' -P busco:true' - } - if (params.jellyfish) { - report_params = report_params << ' -P jellyfish:true' - } - if (params.merqury) { - report_params = report_params << ' -P merqury:true' - } + + def groupBuilder = new groovy.yaml.YamlBuilder() + groupBuilder(groups) + def group_content = groupBuilder.toString().tokenize('\n').join("\n ") + def versionBuilder = new groovy.yaml.YamlBuilder() + versionBuilder(versions) + def versions_content = versionBuilder.toString().tokenize('\n').join("\n ") """ + cat <<- END_YAML_GROUPS > groups.yml + ${group_content} + END_YAML_GROUPS + cat <<- END_YAML_VERSIONS > versions.yml + ${versions_content} + END_YAML_VERSIONS + export HOME="\$PWD" quarto render report.qmd \\ ${report_profile} \\ - ${report_params} \\ - --to dashboard - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - R: \$(R --version | head -n1 | sed 's/R version //; s/ .*//') - r-tidyverse: \$(ls /opt/conda/pkgs/ | grep tidyverse | sed 's/r-tidyverse-//; s/-.*//') - r-plotly: \$(ls /opt/conda/pkgs/ | grep plotly | sed 's/r-plotly-//; s/-.*//') - r-quarto: \$(ls /opt/conda/pkgs/ | grep r-quarto | sed 's/r-quarto-//; s/-.*//') - quarto-cli: \$(quarto --version) - END_VERSIONS + ${report_params} """ stub: """ @@ -85,14 +85,5 @@ process REPORT { mkdir busco_files && touch busco_files/reports.csv mkdir quast_files && touch quast_files/reports.csv mkdir genomescope_files && touch genomescope_files/file.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - R: \$(R --version | head -n1 | sed 's/R version //; s/ .*//') - r-tidyverse: \$(ls /opt/conda/pkgs/ | grep tidyverse | sed 's/r-tidyverse-//; s/-.*//') - r-plotly: \$(ls /opt/conda/pkgs/ | grep plotly | sed 's/r-plotly-//; s/-.*//') - r-quarto: \$(ls /opt/conda/pkgs/ | grep r-quarto | sed 's/r-quarto-//; s/-.*//') - quarto-cli: \$(quarto --version) - END_VERSIONS """ } diff --git a/modules/nf-core/busco/busco/environment.yml b/modules/nf-core/busco/busco/environment.yml index ba8a40c0..861982d0 100644 --- a/modules/nf-core/busco/busco/environment.yml +++ b/modules/nf-core/busco/busco/environment.yml @@ -3,7 +3,5 @@ channels: - conda-forge - bioconda - dependencies: - - bioconda::busco=5.8.3 - - bioconda::sepp=4.5.5 + - bioconda::busco=6.0.0 diff --git a/modules/nf-core/busco/busco/main.nf b/modules/nf-core/busco/busco/main.nf index 05ac4295..aab5920e 100644 --- a/modules/nf-core/busco/busco/main.nf +++ b/modules/nf-core/busco/busco/main.nf @@ -4,33 +4,38 @@ process BUSCO_BUSCO { conda "${moduleDir}/environment.yml" container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c6/c607f319867d96a38c8502f751458aa78bbd18fe4c7c4fa6b9d8350e6ba11ebe/data' - : 'community.wave.seqera.io/library/busco_sepp:f2dbc18a2f7a5b64'}" + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/41/4137d65ab5b90d2ae4fa9d3e0e8294ddccc287e53ca653bb3c63b8fdb03e882f/data' + : 'community.wave.seqera.io/library/busco:6.0.0--a9a1426105f81165'}" + // Note: one test had to be disabled when switching to Busco 6.0.0, cf https://github.com/nf-core/modules/pull/8781/files + // Try to restore it when upgrading Busco to a later version input: - tuple val(meta), path(fasta, stageAs:'tmp_input/*') - val mode // Required: One of genome, proteins, or transcriptome - val lineage // Required: lineage for checking against, or "auto/auto_prok/auto_euk" for enabling auto-lineage - path busco_lineages_path // Recommended: BUSCO lineages file - downloads if not set - path config_file // Optional: BUSCO configuration file - val clean_intermediates // Optional: Remove intermediate files + tuple val(meta), path(fasta, stageAs: 'tmp_input/*') + // Required: One of genome, proteins, or transcriptome + val mode + // Required: lineage for checking against, or "auto/auto_prok/auto_euk" for enabling auto-lineage + val lineage + // Recommended: BUSCO lineages file - downloads if not set + path busco_lineages_path + // Optional: BUSCO configuration file + path config_file + val clean_intermediates output: - tuple val(meta), path("*-busco.batch_summary.txt") , emit: batch_summary - tuple val(meta), path("short_summary.*.txt") , emit: short_summaries_txt , optional: true - tuple val(meta), path("short_summary.*.json") , emit: short_summaries_json, optional: true - tuple val(meta), path("*-busco.log") , emit: log , optional: true - tuple val(meta), path("*-busco/*/run_*/full_table.tsv") , emit: full_table , optional: true - tuple val(meta), path("*-busco/*/run_*/missing_busco_list.tsv") , emit: missing_busco_list , optional: true - tuple val(meta), path("*-busco/*/run_*/single_copy_proteins.faa") , emit: single_copy_proteins, optional: true - tuple val(meta), path("*-busco/*/run_*/busco_sequences") , emit: seq_dir , optional: true - tuple val(meta), path("*-busco/*/translated_proteins") , emit: translated_dir , optional: true - tuple val(meta), path("*-busco") , emit: busco_dir - tuple val(meta), path("busco_downloads/lineages/*") , emit: downloaded_lineages , optional: true - tuple val(meta), path("*-busco/*/run_*/busco_sequences/single_copy_busco_sequences/*.faa"), emit: single_copy_faa , optional: true - tuple val(meta), path("*-busco/*/run_*/busco_sequences/single_copy_busco_sequences/*.fna"), emit: single_copy_fna , optional: true - - path "versions.yml" , emit: versions + tuple val(meta), path("*-busco.batch_summary.txt"), emit: batch_summary + tuple val(meta), path("short_summary.*.txt"), emit: short_summaries_txt, optional: true + tuple val(meta), path("short_summary.*.json"), emit: short_summaries_json, optional: true + tuple val(meta), path("*-busco.log"), emit: log, optional: true + tuple val(meta), path("*-busco/*/run_*/full_table.tsv"), emit: full_table, optional: true + tuple val(meta), path("*-busco/*/run_*/missing_busco_list.tsv"), emit: missing_busco_list, optional: true + tuple val(meta), path("*-busco/*/run_*/single_copy_proteins.faa"), emit: single_copy_proteins, optional: true + tuple val(meta), path("*-busco/*/run_*/busco_sequences"), emit: seq_dir, optional: true + tuple val(meta), path("*-busco/*/translated_proteins"), emit: translated_dir, optional: true + tuple val(meta), path("*-busco"), emit: busco_dir + tuple val(meta), path("busco_downloads/lineages/*"), emit: downloaded_lineages, optional: true + tuple val(meta), path("*-busco/*/run_*/busco_sequences/single_copy_busco_sequences/*.faa"), emit: single_copy_faa, optional: true + tuple val(meta), path("*-busco/*/run_*/busco_sequences/single_copy_busco_sequences/*.fna"), emit: single_copy_fna, optional: true + tuple val("${task.process}"), val('busco'), eval("busco --version 2> /dev/null | sed 's/BUSCO //g'"), emit: versions_busco, topic: versions when: task.ext.when == null || task.ext.when @@ -104,10 +109,11 @@ process BUSCO_BUSCO { mv ${prefix}-busco/*/short_summary.*.{json,txt} . || echo "Short summaries were not available: No genes were found." mv ${prefix}-busco/logs/busco.log ${prefix}-busco.log - cat <<-END_VERSIONS > versions.yml - "${task.process}": - busco: \$( busco --version 2>&1 | sed 's/^BUSCO //' ) - END_VERSIONS + if grep 'Run failed; check logs' ${prefix}-busco.batch_summary.txt > /dev/null + then + echo "Busco run failed" + exit 1 + fi """ stub: @@ -116,10 +122,5 @@ process BUSCO_BUSCO { """ touch ${prefix}-busco.batch_summary.txt mkdir -p ${prefix}-busco/${fasta_name}/run_${lineage}/busco_sequences - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - busco: \$( busco --version 2>&1 | sed 's/^BUSCO //' ) - END_VERSIONS """ } diff --git a/modules/nf-core/busco/busco/meta.yml b/modules/nf-core/busco/busco/meta.yml index 0222e490..0ec0e158 100644 --- a/modules/nf-core/busco/busco/meta.yml +++ b/modules/nf-core/busco/busco/meta.yml @@ -7,14 +7,16 @@ keywords: - proteome tools: - busco: - description: BUSCO provides measures for quantitative assessment of genome assembly, - gene set, and transcriptome completeness based on evolutionarily informed expectations - of gene content from near-universal single-copy orthologs selected from OrthoDB. + description: BUSCO provides measures for quantitative assessment of genome + assembly, gene set, and transcriptome completeness based on evolutionarily + informed expectations of gene content from near-universal single-copy + orthologs selected from OrthoDB. homepage: https://busco.ezlab.org/ documentation: https://busco.ezlab.org/busco_userguide.html tool_dev_url: https://gitlab.com/ezlab/busco doi: "10.1007/978-1-4939-9173-0_14" - licence: ["MIT"] + licence: + - "MIT" identifier: biotools:busco input: - - meta: @@ -26,26 +28,29 @@ input: type: file description: Nucleic or amino acid sequence file in FASTA format. pattern: "*.{fasta,fna,fa,fasta.gz,fna.gz,fa.gz}" - - - mode: - type: string - description: The mode to run Busco in. One of genome, proteins, or transcriptome - pattern: "{genome,proteins,transcriptome}" - - - lineage: - type: string - description: The BUSCO lineage to use, or "auto", "auto_prok" or "auto_euk" - to automatically select lineage - - - busco_lineages_path: - type: directory - description: Path to local BUSCO lineages directory. - - - config_file: - type: file - description: Path to BUSCO config file. - - - clean_intermediates: - type: boolean - description: Flag to remove intermediate files. + ontologies: [] + - mode: + type: string + description: The mode to run Busco in. One of genome, proteins, or + transcriptome + pattern: "{genome,proteins,transcriptome}" + - lineage: + type: string + description: The BUSCO lineage to use, or "auto", "auto_prok" or "auto_euk" + to automatically select lineage + - busco_lineages_path: + type: directory + description: Path to local BUSCO lineages directory. + - config_file: + type: file + description: Path to BUSCO config file. + ontologies: [] + - clean_intermediates: + type: boolean + description: Flag to remove intermediate files. output: - - batch_summary: - - meta: + batch_summary: + - - meta: type: map description: | Groovy Map containing sample information @@ -54,8 +59,9 @@ output: type: file description: Summary of all sequence files analyzed pattern: "*-busco.batch_summary.txt" - - short_summaries_txt: - - meta: + ontologies: [] + short_summaries_txt: + - - meta: type: map description: | Groovy Map containing sample information @@ -64,8 +70,9 @@ output: type: file description: Short Busco summary in plain text format pattern: "short_summary.*.txt" - - short_summaries_json: - - meta: + ontologies: [] + short_summaries_json: + - - meta: type: map description: | Groovy Map containing sample information @@ -74,8 +81,10 @@ output: type: file description: Short Busco summary in JSON format pattern: "short_summary.*.json" - - log: - - meta: + ontologies: + - edam: http://edamontology.org/format_3464 + log: + - - meta: type: map description: | Groovy Map containing sample information @@ -84,8 +93,9 @@ output: type: file description: BUSCO main log pattern: "*-busco.log" - - full_table: - - meta: + ontologies: [] + full_table: + - - meta: type: map description: | Groovy Map containing sample information @@ -94,8 +104,10 @@ output: type: file description: Full BUSCO results table pattern: "full_table.tsv" - - missing_busco_list: - - meta: + ontologies: + - edam: http://edamontology.org/format_3475 + missing_busco_list: + - - meta: type: map description: | Groovy Map containing sample information @@ -104,8 +116,10 @@ output: type: file description: List of missing BUSCOs pattern: "missing_busco_list.tsv" - - single_copy_proteins: - - meta: + ontologies: + - edam: http://edamontology.org/format_3475 + single_copy_proteins: + - - meta: type: map description: | Groovy Map containing sample information @@ -114,8 +128,9 @@ output: type: file description: Fasta file of single copy proteins (transcriptome mode) pattern: "single_copy_proteins.faa" - - seq_dir: - - meta: + ontologies: [] + seq_dir: + - - meta: type: map description: | Groovy Map containing sample information @@ -124,19 +139,19 @@ output: type: directory description: BUSCO sequence directory pattern: "busco_sequences" - - translated_dir: - - meta: + translated_dir: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test' ] - "*-busco/*/translated_proteins": type: directory - description: Six frame translations of each transcript made by the transcriptome - mode + description: Six frame translations of each transcript made by the + transcriptome mode pattern: "translated_dir" - - busco_dir: - - meta: + busco_dir: + - - meta: type: map description: | Groovy Map containing sample information @@ -145,18 +160,19 @@ output: type: directory description: BUSCO lineage specific output pattern: "*-busco" - - downloaded_lineages: - - meta: + downloaded_lineages: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test' ] - - "busco_downloads/lineages/*": + - busco_downloads/lineages/*: type: directory - description: Lineages downloaded by BUSCO when running the analysis, for example bacteria_odb12 + description: Lineages downloaded by BUSCO when running the analysis, for + example bacteria_odb12 pattern: "busco_downloads/lineages/*" - - single_copy_faa: - - meta: + single_copy_faa: + - - meta: type: map description: | Groovy Map containing sample information @@ -165,8 +181,9 @@ output: type: file description: Single copy .faa sequence files pattern: "*-busco/*/run_*/busco_sequences/single_copy_busco_sequences/*.faa" - - single_copy_fna: - - meta: + ontologies: [] + single_copy_fna: + - - meta: type: map description: | Groovy Map containing sample information @@ -175,11 +192,28 @@ output: type: file description: Single copy .fna sequence files pattern: "*-busco/*/run_*/busco_sequences/single_copy_busco_sequences/*.fna" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions_busco: + - - ${task.process}: + type: string + description: The name of the process + - busco: + type: string + description: The name of the tool + - busco --version 2> /dev/null | sed 's/BUSCO //g': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - busco: + type: string + description: The name of the tool + - busco --version 2> /dev/null | sed 's/BUSCO //g': + type: eval + description: The expression to obtain the version of the tool authors: - "@priyanka-surana" - "@charles-plessy" diff --git a/modules/nf-core/busco/busco/tests/main.nf.test b/modules/nf-core/busco/busco/tests/main.nf.test index 411ceb86..3f1ad0d0 100644 --- a/modules/nf-core/busco/busco/tests/main.nf.test +++ b/modules/nf-core/busco/busco/tests/main.nf.test @@ -24,7 +24,7 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) ] input[1] = 'genome' - input[2] = 'bacteria_odb12' // Launch with 'auto' to use --auto-lineage, and specified lineages // 'auto' removed from test due to memory issues + input[2] = 'bacteria_odb10' // Launch with 'auto' to use --auto-lineage, and specified lineages // 'auto' removed from test due to memory issues input[3] = [] // Download busco lineage input[4] = [] // No config input[5] = false // Clean intermediates @@ -54,7 +54,7 @@ nextflow_process { process.out.batch_summary[0][1], process.out.full_table[0][1], process.out.missing_busco_list[0][1], - process.out.versions[0] + process.out.findAll { key, val -> key.startsWith("versions")} ).match() with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) { @@ -92,7 +92,7 @@ nextflow_process { ] ] input[1] = 'genome' - input[2] = 'bacteria_odb12' + input[2] = 'bacteria_odb10' input[3] = [] input[4] = [] input[5] = false @@ -136,7 +136,7 @@ nextflow_process { assert snapshot( process.out.full_table[0][1], process.out.missing_busco_list[0][1], - process.out.versions[0] + process.out.findAll { key, val -> key.startsWith("versions")} ).match() with(file(process.out.seq_dir[0][1][0]).listFiles().collect { it.name }) { @@ -163,72 +163,6 @@ nextflow_process { } - test("test_busco_eukaryote_metaeuk") { - - config './nextflow.config' - - when { - params { - busco_args = '--tar --metaeuk' - } - process { - """ - input[0] = [ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) - ] - input[1] = 'genome' - input[2] = 'eukaryota_odb10' - input[3] = [] - input[4] = [] - input[5] = false - """ - } - } - - then { - assert process.success - - with(path(process.out.short_summaries_txt[0][1]).text) { - assert contains('BUSCO version') - assert contains('The lineage dataset is') - assert contains('BUSCO was run in mode') - assert contains('Complete BUSCOs') - assert contains('Missing BUSCOs') - assert contains('Dependencies and versions') - } - - with(path(process.out.short_summaries_json[0][1]).text) { - assert contains('one_line_summary') - assert contains('mode') - assert contains('dataset') - } - - assert snapshot( - process.out.batch_summary[0][1], - process.out.full_table[0][1], - process.out.missing_busco_list[0][1], - process.out.versions[0] - ).match() - - with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) { - assert contains('single_copy_busco_sequences.tar.gz') - assert contains('multi_copy_busco_sequences.tar.gz') - assert contains('fragmented_busco_sequences.tar.gz') - } - - with(path(process.out.log[0][1]).text) { - assert contains('DEBUG:busco.run_BUSCO') - assert contains('Results from dataset') - assert contains('how to cite BUSCO') - - } - - assert process.out.single_copy_proteins == [] - assert process.out.translated_dir == [] - } - - } test("test_busco_eukaryote_augustus") { @@ -258,7 +192,7 @@ nextflow_process { assert snapshot( process.out.batch_summary[0][1], - process.out.versions[0] + process.out.findAll { key, val -> key.startsWith("versions")} ).match() with(path(process.out.log[0][1]).text) { @@ -292,7 +226,7 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/proteome.fasta', checkIfExists: true) ] input[1] = 'proteins' - input[2] = 'bacteria_odb12' + input[2] = 'bacteria_odb10' input[3] = [] input[4] = [] input[5] = false @@ -322,7 +256,7 @@ nextflow_process { process.out.batch_summary[0][1], process.out.full_table[0][1], process.out.missing_busco_list[0][1], - process.out.versions[0] + process.out.findAll { key, val -> key.startsWith("versions")} ).match() with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) { @@ -358,7 +292,7 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/illumina/fasta/test1.contigs.fa.gz', checkIfExists: true) ] input[1] = 'transcriptome' - input[2] = 'bacteria_odb12' + input[2] = 'bacteria_odb10' input[3] = [] input[4] = [] input[5] = false @@ -390,7 +324,7 @@ nextflow_process { process.out.missing_busco_list[0][1], process.out.translated_dir[0][1], process.out.single_copy_proteins[0][1], - process.out.versions[0] + process.out.findAll { key, val -> key.startsWith("versions")} ).match() with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) { @@ -423,7 +357,7 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) ] input[1] = 'genome' - input[2] = 'bacteria_odb12' + input[2] = 'bacteria_odb10' input[3] = [] input[4] = [] input[5] = true @@ -438,7 +372,7 @@ nextflow_process { process.out.batch_summary[0][1], process.out.full_table[0][1], process.out.missing_busco_list[0][1], - process.out.versions[0] + process.out.findAll { key, val -> key.startsWith("versions")} ).match() with(path(process.out.log[0][1]).text) { @@ -467,7 +401,7 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) ] input[1] = 'genome' - input[2] = 'bacteria_odb12' + input[2] = 'bacteria_odb10' input[3] = [] input[4] = [] input[5] = false @@ -480,9 +414,9 @@ nextflow_process { { assert process.success }, { assert snapshot( process.out.batch_summary, - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions")} ).match() } ) } } -} \ No newline at end of file +} diff --git a/modules/nf-core/busco/busco/tests/main.nf.test.snap b/modules/nf-core/busco/busco/tests/main.nf.test.snap index 1026524b..88d87bf9 100644 --- a/modules/nf-core/busco/busco/tests/main.nf.test.snap +++ b/modules/nf-core/busco/busco/tests/main.nf.test.snap @@ -6,157 +6,177 @@ { "id": "test" }, - "test-bacteria_odb12-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + "test-bacteria_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - [ - "versions.yml:md5,0046a4b8575cbc3635f2a9ee616fd840" - ] + { + "versions_busco": [ + [ + "BUSCO_BUSCO", + "busco", + "6.0.0" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2025-03-12T10:50:57.218573431" + "timestamp": "2026-02-10T14:56:39.925901" }, "test_busco_eukaryote_augustus": { "content": [ "test-eukaryota_odb10-busco.batch_summary.txt:md5,3ea3bdc423a461dae514d816bdc61c89", - "versions.yml:md5,0046a4b8575cbc3635f2a9ee616fd840" + { + "versions_busco": [ + [ + "BUSCO_BUSCO", + "busco", + "6.0.0" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2025-03-12T10:44:25.359421247" + "timestamp": "2026-02-10T14:55:25.677229" }, "test_busco_genome_single_fasta": { "content": [ - "test-bacteria_odb12-busco.batch_summary.txt:md5,e3e503e1540b633d95c273c465945740", - "full_table.tsv:md5,086f2ecdc90d47745c828c9b25357039", - "missing_busco_list.tsv:md5,9919aee2da9d30a3985aede354850a46", - "versions.yml:md5,0046a4b8575cbc3635f2a9ee616fd840" + "test-bacteria_odb10-busco.batch_summary.txt:md5,12e911830d66bab6dbf3523ac4392597", + "full_table.tsv:md5,660e2f556ca6efa97f0c2a8cebd94786", + "missing_busco_list.tsv:md5,0e08587f4dc65d9226a31433c1f9ba25", + { + "versions_busco": [ + [ + "BUSCO_BUSCO", + "busco", + "6.0.0" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2025-03-12T10:41:46.251404188" + "timestamp": "2026-02-10T14:54:16.672124" }, "test_busco_genome_multi_fasta": { "content": [ [ - "full_table.tsv:md5,5a6bf59055e2040e74797a1e36c8e374", - "full_table.tsv:md5,086f2ecdc90d47745c828c9b25357039" + "full_table.tsv:md5,26b1d35d975593834acb4d4a91e225a1", + "full_table.tsv:md5,660e2f556ca6efa97f0c2a8cebd94786" ], [ - "missing_busco_list.tsv:md5,a55eee6869fad9176d812e59886232fb", - "missing_busco_list.tsv:md5,9919aee2da9d30a3985aede354850a46" + "missing_busco_list.tsv:md5,5dcdc7707035904a7d467ca1026b399a", + "missing_busco_list.tsv:md5,0e08587f4dc65d9226a31433c1f9ba25" ], - "versions.yml:md5,0046a4b8575cbc3635f2a9ee616fd840" - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" - }, - "timestamp": "2025-03-12T10:42:28.126899794" - }, - "test_busco_eukaryote_metaeuk": { - "content": [ - "test-eukaryota_odb10-busco.batch_summary.txt:md5,ff6d8277e452a83ce9456bbee666feb6", - "full_table.tsv:md5,cfb55ab2ce590d2def51926324691aa8", - "missing_busco_list.tsv:md5,77e3d4503b2c13db0d611723fc83ab7e", - "versions.yml:md5,0046a4b8575cbc3635f2a9ee616fd840" + { + "versions_busco": [ + [ + "BUSCO_BUSCO", + "busco", + "6.0.0" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2025-03-12T10:43:59.997031348" + "timestamp": "2026-02-10T14:55:02.956339" }, "test_busco_cleanup": { "content": [ - "test-bacteria_odb12-busco.batch_summary.txt:md5,e3e503e1540b633d95c273c465945740", - "full_table.tsv:md5,086f2ecdc90d47745c828c9b25357039", - "missing_busco_list.tsv:md5,9919aee2da9d30a3985aede354850a46", - "versions.yml:md5,0046a4b8575cbc3635f2a9ee616fd840" + "test-bacteria_odb10-busco.batch_summary.txt:md5,12e911830d66bab6dbf3523ac4392597", + "full_table.tsv:md5,660e2f556ca6efa97f0c2a8cebd94786", + "missing_busco_list.tsv:md5,0e08587f4dc65d9226a31433c1f9ba25", + { + "versions_busco": [ + [ + "BUSCO_BUSCO", + "busco", + "6.0.0" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2025-03-12T10:50:48.928173488" + "timestamp": "2026-02-10T14:56:34.94761" }, "test_busco_transcriptome": { "content": [ - "test-bacteria_odb12-busco.batch_summary.txt:md5,6cd69d8a66b5f8b7fd4a9de758e7a739", - "full_table.tsv:md5,4efc19f8d2cc7ea9e73425f09cb3ed97", - "missing_busco_list.tsv:md5,55f0322d494e5c165508712be63062bf", + "test-bacteria_odb10-busco.batch_summary.txt:md5,8734b3f379c4c0928e5dd4ea1873dc64", + "full_table.tsv:md5,645b65b725fd8b30ff6808e0ac671a73", + "missing_busco_list.tsv:md5,b1cc1c22d484439ac128af2290d7d9dd", [ - "9767721at2.faa:md5,1731738ca153959391f8302fd5a3679f", - "9778364at2.faa:md5,7a19a6b6696ae53efce30457b4dd1ab2", - "9782003at2.faa:md5,65d2a613c903852681981f8e8427dc70", - "9790352at2.faa:md5,5e18cfb68122dff7a61c5517246223fc", - "9791908at2.faa:md5,707ef4501f93a6e0dc217e037f26da54", - "9793681at2.faa:md5,e361d654145e70f06c386e75ad90f943", - "9800696at2.faa:md5,9e2f431e4aada7bdc2c317747105b874", - "9801107at2.faa:md5,83933b1426fc9abfe8891c49838cd02f", - "9801213at2.faa:md5,ec340354a86728189c3d1a294c0ccbad", - "9801753at2.faa:md5,39c09bd8a831c90aab44ded14c56d0e6", - "9802065at2.faa:md5,8361fa013dc1cd29af938c9d5ffebfe4", - "9802219at2.faa:md5,9e23aed07790f460da634f7f6132e73d", - "9802304at2.faa:md5,86b259197441716075f3d3d18f8743ba", - "9802309at2.faa:md5,b4b4613e9b69baa9274140c1b26cc27b", - "9802672at2.faa:md5,6c6d592c2fbb0d7a4e5e1f47a15644f0", - "9803420at2.faa:md5,eec6f7189ce9a596ed6ead06f2229c8a", - "9803541at2.faa:md5,132954cc7bfcb1c1fe9da105867c4b78", - "9803667at2.faa:md5,ec31d499f6b523cb081af6a3284a5a5c", - "9803773at2.faa:md5,efbe4c35075dd8c871827d4e5ac72922", - "9804006at2.faa:md5,fca5b560714ba37be0be3e2597f74c5a", - "9804243at2.faa:md5,3280570e4357fb4daedaea8a066dbf0b", - "9804478at2.faa:md5,98c2cfd8f089812a41a1e66fea630b2d", - "9804933at2.faa:md5,de648025c49061c614c77e7c9ce7ab62", - "9805026at2.faa:md5,eea9da88f3cd718514493d6890bf7660", - "9806637at2.faa:md5,c8a9e0c37a8aeb1fd44db64fd93aa3e1", - "9806651at2.faa:md5,f5abacf8930d78c81fdeb0c91c8681a7", - "9807064at2.faa:md5,1167d5c4c044b4eb82fac5d1955e7130", - "9807233at2.faa:md5,7c8adb6556a7f9a0244e7c7e5f75f20d", - "9807240at2.faa:md5,2eff2de1ab83b22f3234a529a44e22bb", - "9807458at2.faa:md5,bee695d260b2b7f8980a636fed6aa0c0", - "9808036at2.faa:md5,797ca476d2c7820151fec98d2815d6cb", - "9808348at2.faa:md5,4e8573a5d287e01aa4f5de8b48feaa42", - "9808936at2.faa:md5,30333f3f62f8e3d0ea6f6544d49572c6", - "9809052at2.faa:md5,0590efbf94fce0ad212513dcb2e8176f", - "9809084at2.faa:md5,37e6214b4204dc31858e2ef2bad5db4a", - "9809356at2.faa:md5,e18c1d5a4931a25baf7dbd1a40c417dc", - "9809796at2.faa:md5,857aac8a22c00472bfc9add7fde94c5c", - "9810191at2.faa:md5,72b63933bb045b680e0635eb03915cc0", - "9811804at2.faa:md5,da341c24e763a949d16432bb052af321", - "9812272at2.faa:md5,7a54f872dd8243c6814852d40cf1bfc0", - "9812943at2.faa:md5,149da17f067cdce328a73f6364a95b26", - "9813375at2.faa:md5,49835b9f3188434c771a840b628b07f6", - "9814755at2.faa:md5,9b4c4648d250c2e6d04acb78f9cf6df0" + "1024388at2.faa:md5,797d603d262a6595a112e25b73e878b0", + "1054741at2.faa:md5,cd4b928cba6b19b4437746ba507e7195", + "1093223at2.faa:md5,df9549708e5ffcfaee6a74dd70a0e5dc", + "1151822at2.faa:md5,12726afc1cdc40c13392e1596e93df3a", + "143460at2.faa:md5,d887431fd988a5556a523440f02d9594", + "1491686at2.faa:md5,d03362d19979b27306c192f1c74a84e5", + "1504821at2.faa:md5,4f5f6e5c57bac0092c1d85ded73d7e67", + "1574817at2.faa:md5,1153e55998c2929eacad2aed7d08d248", + "1592033at2.faa:md5,bb7a59e5f3a57ba12d10dabf4c77ab57", + "1623045at2.faa:md5,8fe38155feb1802beb97ef7714837bf5", + "1661836at2.faa:md5,6c6d592c2fbb0d7a4e5e1f47a15644f0", + "1674344at2.faa:md5,bb41b44e53565a54cadf0b780532fe08", + "1698718at2.faa:md5,f233860000028eb00329aa85236c71e5", + "1990650at2.faa:md5,34a2d29c5f8b6253159ddb7a43fa1829", + "223233at2.faa:md5,dec6705c7846c989296e73942f953cbc", + "402899at2.faa:md5,acc0f271f9a586d2ce1ee41669b22999", + "505485at2.faa:md5,aa0391f8fa5d9bd19b30d844d5a99845", + "665824at2.faa:md5,47f8ad43b6a6078206feb48c2e552793", + "776861at2.faa:md5,f8b90c13f7c6be828dea3bb920195e3d", + "874197at2.faa:md5,8d22a35a768debe6f376fc695d233a69", + "932854at2.faa:md5,2eff2de1ab83b22f3234a529a44e22bb", + "95696at2.faa:md5,247bfd1aef432f7b5456307768e9149c" ], - "single_copy_proteins.faa:md5,14124def13668c6d9b0d589207754b31", - "versions.yml:md5,0046a4b8575cbc3635f2a9ee616fd840" + "single_copy_proteins.faa:md5,73e2c5d6a9b0f01f2deea3cc5f21b764", + { + "versions_busco": [ + [ + "BUSCO_BUSCO", + "busco", + "6.0.0" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2025-03-12T10:45:08.029718703" + "timestamp": "2026-02-10T14:55:59.115826" }, "test_busco_protein": { "content": [ - "test-bacteria_odb12-busco.batch_summary.txt:md5,44d4cdebd61a3c8e8981ddf1829f83b3", - "full_table.tsv:md5,350f9b1b6c37cfcf41be84e93ef41931", - "missing_busco_list.tsv:md5,a55eee6869fad9176d812e59886232fb", - "versions.yml:md5,0046a4b8575cbc3635f2a9ee616fd840" + "test-bacteria_odb10-busco.batch_summary.txt:md5,942dbb2d8ff26240860a794213db14a8", + "full_table.tsv:md5,4db33686f2755a09fdc9521ca89411bc", + "missing_busco_list.tsv:md5,5dcdc7707035904a7d467ca1026b399a", + { + "versions_busco": [ + [ + "BUSCO_BUSCO", + "busco", + "6.0.0" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2025-03-12T10:44:44.094048564" + "timestamp": "2026-02-10T14:55:41.794334" } } \ No newline at end of file diff --git a/modules/nf-core/bwamem2/index/environment.yml b/modules/nf-core/bwamem2/index/environment.yml new file mode 100644 index 00000000..f3637444 --- /dev/null +++ b/modules/nf-core/bwamem2/index/environment.yml @@ -0,0 +1,13 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda + +dependencies: + # renovate: datasource=conda depName=bioconda/bwa-mem2 + - bwa-mem2=2.3 + # renovate: datasource=conda depName=bioconda/htslib + - htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - samtools=1.22.1 diff --git a/modules/nf-core/bwamem2/index/main.nf b/modules/nf-core/bwamem2/index/main.nf new file mode 100644 index 00000000..cb2c4bb2 --- /dev/null +++ b/modules/nf-core/bwamem2/index/main.nf @@ -0,0 +1,45 @@ +process BWAMEM2_INDEX { + tag "$fasta" + // NOTE Requires 28N GB memory where N is the size of the reference sequence, floor of 280M + // source: https://github.com/bwa-mem2/bwa-mem2/issues/9 + memory { 280.MB * Math.ceil(fasta.size() / 10000000) * task.attempt } + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/e0/e05ce34b46ad42810eb29f74e4e304c0cb592b2ca15572929ed8bbaee58faf01/data' : + 'community.wave.seqera.io/library/bwa-mem2_htslib_samtools:db98f81f55b64113' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("bwamem2"), emit: index + tuple val("${task.process}"), val('bwamem2'), eval('bwa-mem2 version | grep -o -E "[0-9]+(\\.[0-9]+)+"'), emit: versions_bwamem2, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${fasta}" + def args = task.ext.args ?: '' + """ + mkdir bwamem2 + bwa-mem2 \\ + index \\ + $args \\ + -p bwamem2/${prefix} \\ + $fasta + """ + + stub: + def prefix = task.ext.prefix ?: "${fasta}" + + """ + mkdir bwamem2 + touch bwamem2/${prefix}.0123 + touch bwamem2/${prefix}.ann + touch bwamem2/${prefix}.pac + touch bwamem2/${prefix}.amb + touch bwamem2/${prefix}.bwt.2bit.64 + """ +} diff --git a/modules/nf-core/bwamem2/index/meta.yml b/modules/nf-core/bwamem2/index/meta.yml new file mode 100644 index 00000000..12074860 --- /dev/null +++ b/modules/nf-core/bwamem2/index/meta.yml @@ -0,0 +1,73 @@ +name: bwamem2_index +description: Create BWA-mem2 index for reference genome +keywords: + - index + - fasta + - genome + - reference +tools: + - bwamem2: + description: | + BWA-mem2 is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: https://github.com/bwa-mem2/bwa-mem2 + documentation: https://github.com/bwa-mem2/bwa-mem2#usage + licence: + - "MIT" + identifier: "biotools:bwa-mem2" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input genome fasta file + ontologies: + - edam: "http://edamontology.org/data_2044" + - edam: "http://edamontology.org/format_1929" +output: + index: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bwamem2: + type: string + description: BWA genome index files + pattern: "*.{0123,amb,ann,bwt.2bit.64,pac}" + ontologies: + - edam: "http://edamontology.org/data_3210" + versions_bwamem2: + - - ${task.process}: + type: string + description: The name of the process + - bwamem2: + type: string + description: BWA genome index files + pattern: "*.{0123,amb,ann,bwt.2bit.64,pac}" + ontologies: + - edam: "http://edamontology.org/data_3210" + - bwa-mem2 version | grep -o -E "[0-9]+(\.[0-9]+)+": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - bwamem2: + type: string + description: BWA genome index files + pattern: "*.{0123,amb,ann,bwt.2bit.64,pac}" + ontologies: + - edam: "http://edamontology.org/data_3210" + - bwa-mem2 version | grep -o -E "[0-9]+(\.[0-9]+)+": + type: eval + description: The expression to obtain the version of the tool +authors: + - "@maxulysse" +maintainers: + - "@maxulysse" diff --git a/modules/nf-core/bwamem2/index/tests/main.nf.test b/modules/nf-core/bwamem2/index/tests/main.nf.test new file mode 100644 index 00000000..3ee91048 --- /dev/null +++ b/modules/nf-core/bwamem2/index/tests/main.nf.test @@ -0,0 +1,54 @@ +nextflow_process { + + name "Test Process BWAMEM2_INDEX" + tag "modules_nfcore" + tag "modules" + tag "bwamem2" + tag "bwamem2/index" + script "../main.nf" + process "BWAMEM2_INDEX" + + test("fasta") { + + when { + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/bwamem2/index/tests/main.nf.test.snap b/modules/nf-core/bwamem2/index/tests/main.nf.test.snap new file mode 100644 index 00000000..776e87be --- /dev/null +++ b/modules/nf-core/bwamem2/index/tests/main.nf.test.snap @@ -0,0 +1,108 @@ +{ + "fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "genome.fasta.0123:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.amb:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.ann:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.bwt.2bit.64:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.pac:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + "BWAMEM2_INDEX", + "bwamem2", + "2.2.1" + ] + ], + "index": [ + [ + { + "id": "test" + }, + [ + "genome.fasta.0123:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.amb:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.ann:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.bwt.2bit.64:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.pac:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions_bwamem2": [ + [ + "BWAMEM2_INDEX", + "bwamem2", + "2.2.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-09T16:19:38.013344" + }, + "fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "genome.fasta.0123:md5,b02870de80106104abcb03cd9463e7d8", + "genome.fasta.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e", + "genome.fasta.ann:md5,c32e11f6c859f166c7525a9c1d583567", + "genome.fasta.bwt.2bit.64:md5,d097a1b82dee375d41a1ea69895a9216", + "genome.fasta.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66" + ] + ] + ], + "1": [ + [ + "BWAMEM2_INDEX", + "bwamem2", + "2.2.1" + ] + ], + "index": [ + [ + { + "id": "test" + }, + [ + "genome.fasta.0123:md5,b02870de80106104abcb03cd9463e7d8", + "genome.fasta.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e", + "genome.fasta.ann:md5,c32e11f6c859f166c7525a9c1d583567", + "genome.fasta.bwt.2bit.64:md5,d097a1b82dee375d41a1ea69895a9216", + "genome.fasta.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66" + ] + ] + ], + "versions_bwamem2": [ + [ + "BWAMEM2_INDEX", + "bwamem2", + "2.2.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-09T16:19:32.542622" + } +} \ No newline at end of file diff --git a/modules/nf-core/bwamem2/mem/environment.yml b/modules/nf-core/bwamem2/mem/environment.yml new file mode 100644 index 00000000..f3637444 --- /dev/null +++ b/modules/nf-core/bwamem2/mem/environment.yml @@ -0,0 +1,13 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda + +dependencies: + # renovate: datasource=conda depName=bioconda/bwa-mem2 + - bwa-mem2=2.3 + # renovate: datasource=conda depName=bioconda/htslib + - htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - samtools=1.22.1 diff --git a/modules/nf-core/bwamem2/mem/main.nf b/modules/nf-core/bwamem2/mem/main.nf new file mode 100644 index 00000000..d1c0ac8f --- /dev/null +++ b/modules/nf-core/bwamem2/mem/main.nf @@ -0,0 +1,71 @@ +process BWAMEM2_MEM { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/e0/e05ce34b46ad42810eb29f74e4e304c0cb592b2ca15572929ed8bbaee58faf01/data' : + 'community.wave.seqera.io/library/bwa-mem2_htslib_samtools:db98f81f55b64113' }" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(index) + tuple val(meta3), path(fasta) + val sort_bam + + output: + tuple val(meta), path("*.sam") , emit: sam , optional:true + tuple val(meta), path("*.bam") , emit: bam , optional:true + tuple val(meta), path("*.cram") , emit: cram, optional:true + tuple val(meta), path("*.crai") , emit: crai, optional:true + tuple val(meta), path("*.csi") , emit: csi , optional:true + tuple val("${task.process}"), val('bwamem2'), eval('bwa-mem2 version | grep -o -E "[0-9]+(\\.[0-9]+)+"'), emit: versions_bwamem2, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def samtools_command = sort_bam ? 'sort' : 'view' + + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension_matcher = (args2 =~ extension_pattern) + def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + + """ + INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` + + bwa-mem2 \\ + mem \\ + $args \\ + -t $task.cpus \\ + \$INDEX \\ + $reads \\ + | samtools $samtools_command $args2 -@ $task.cpus ${reference} -o ${prefix}.${extension} - + """ + + stub: + + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension_matcher = (args2 =~ extension_pattern) + def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + + def create_index = "" + if (extension == "cram") { + create_index = "touch ${prefix}.crai" + } else if (extension == "bam") { + create_index = "touch ${prefix}.csi" + } + + """ + touch ${prefix}.${extension} + ${create_index} + """ +} diff --git a/modules/nf-core/bwamem2/mem/meta.yml b/modules/nf-core/bwamem2/mem/meta.yml new file mode 100644 index 00000000..bcfd006d --- /dev/null +++ b/modules/nf-core/bwamem2/mem/meta.yml @@ -0,0 +1,148 @@ +name: bwamem2_mem +description: Performs fastq alignment to a fasta reference using BWA +keywords: + - mem + - bwa + - alignment + - map + - fastq + - bam + - sam +tools: + - bwa: + description: | + BWA-mem2 is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: https://github.com/bwa-mem2/bwa-mem2 + documentation: http://www.htslib.org/doc/samtools.html + arxiv: arXiv:1303.3997 + licence: + - "MIT" + identifier: "biotools:bwa-mem2" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + ontologies: + - edam: "http://edamontology.org/data_2044" + - edam: "http://edamontology.org/format_1930" + - - meta2: + type: map + description: | + Groovy Map containing reference/index information + e.g. [ id:'test' ] + - index: + type: file + description: BWA genome index files + pattern: "Directory containing BWA index *.{0132,amb,ann,bwt.2bit.64,pac}" + ontologies: + - edam: "http://edamontology.org/data_3210" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome in FASTA format + pattern: "*.{fa,fasta,fna}" + ontologies: + - edam: "http://edamontology.org/data_2044" + - edam: "http://edamontology.org/format_1929" + - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" +output: + sam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.sam": + type: file + description: Output SAM file containing read alignments + pattern: "*.{sam}" + ontologies: + - edam: "http://edamontology.org/format_2573" + bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + ontologies: + - edam: "http://edamontology.org/format_2572" + cram: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cram": + type: file + description: Output CRAM file containing read alignments + pattern: "*.{cram}" + ontologies: + - edam: "http://edamontology.org/format_3462" + crai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: Index file for CRAM file + pattern: "*.{crai}" + ontologies: [] + csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: Index file for BAM file + pattern: "*.{csi}" + ontologies: [] + versions_bwamem2: + - - ${task.process}: + type: string + description: The name of the process + - bwamem2: + type: string + description: The name of the tool + - bwa-mem2 version | grep -o -E "[0-9]+(\.[0-9]+)+": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - bwamem2: + type: string + description: The name of the tool + - bwa-mem2 version | grep -o -E "[0-9]+(\.[0-9]+)+": + type: eval + description: The expression to obtain the version of the tool +authors: + - "@maxulysse" + - "@matthdsm" +maintainers: + - "@maxulysse" + - "@matthdsm" diff --git a/modules/nf-core/bwamem2/mem/tests/main.nf.test b/modules/nf-core/bwamem2/mem/tests/main.nf.test new file mode 100644 index 00000000..20e37254 --- /dev/null +++ b/modules/nf-core/bwamem2/mem/tests/main.nf.test @@ -0,0 +1,179 @@ +nextflow_process { + + name "Test Process BWAMEM2_MEM" + script "../main.nf" + process "BWAMEM2_MEM" + + tag "modules" + tag "modules_nfcore" + tag "bwamem2" + tag "bwamem2/mem" + tag "bwamem2/index" + + setup { + run("BWAMEM2_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = Channel.of([ + [:], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + ]) + """ + } + } + } + + test("sarscov2 - fastq, index, fasta, false") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + input[1] = BWAMEM2_INDEX.out.index + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeaderMD5(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.findAll { key, val -> key.startsWith("versions")} + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, true") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + input[1] = BWAMEM2_INDEX.out.index + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeaderMD5(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.findAll { key, val -> key.startsWith("versions")} + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = BWAMEM2_INDEX.out.index + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeaderMD5(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.findAll { key, val -> key.startsWith("versions")} + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = BWAMEM2_INDEX.out.index + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeaderMD5(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.findAll { key, val -> key.startsWith("versions")} + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = BWAMEM2_INDEX.out.index + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bwamem2/mem/tests/main.nf.test.snap b/modules/nf-core/bwamem2/mem/tests/main.nf.test.snap new file mode 100644 index 00000000..74763935 --- /dev/null +++ b/modules/nf-core/bwamem2/mem/tests/main.nf.test.snap @@ -0,0 +1,161 @@ +{ + "sarscov2 - [fastq1, fastq2], index, fasta, false": { + "content": [ + "e414c2d48e2e44c2c52c20ecd88e8bd8", + "57aeef88ed701a8ebc8e2f0a381b2a6", + { + "versions_bwamem2": [ + [ + "BWAMEM2_MEM", + "bwamem2", + "2.2.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-09T16:25:00.500092" + }, + "sarscov2 - [fastq1, fastq2], index, fasta, true - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + "BWAMEM2_MEM", + "bwamem2", + "2.2.1" + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + + ], + "versions_bwamem2": [ + [ + "BWAMEM2_MEM", + "bwamem2", + "2.2.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-09T16:25:22.004027" + }, + "sarscov2 - [fastq1, fastq2], index, fasta, true": { + "content": [ + "716ed1ef39deaad346ca7cf86e08f959", + "af8628d9df18b2d3d4f6fd47ef2bb872", + { + "versions_bwamem2": [ + [ + "BWAMEM2_MEM", + "bwamem2", + "2.2.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-09T16:25:14.131056" + }, + "sarscov2 - fastq, index, fasta, false": { + "content": [ + "283a83f604f3f5338acedfee349dccf4", + "798439cbd7fd81cbcc5078022dc5479d", + { + "versions_bwamem2": [ + [ + "BWAMEM2_MEM", + "bwamem2", + "2.2.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-09T16:24:34.624533" + }, + "sarscov2 - fastq, index, fasta, true": { + "content": [ + "ed99048bb552cac58e39923b550b6d5b", + "94fcf617f5b994584c4e8d4044e16b4f", + { + "versions_bwamem2": [ + [ + "BWAMEM2_MEM", + "bwamem2", + "2.2.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-09T16:24:47.191245" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastp/environment.yml similarity index 68% rename from modules/nf-core/fastqc/environment.yml rename to modules/nf-core/fastp/environment.yml index f9f54ee9..0c36eed2 100644 --- a/modules/nf-core/fastqc/environment.yml +++ b/modules/nf-core/fastp/environment.yml @@ -4,4 +4,5 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::fastqc=0.12.1 + # renovate: datasource=conda depName=bioconda/fastp + - bioconda::fastp=1.0.1 diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf new file mode 100644 index 00000000..e13509ca --- /dev/null +++ b/modules/nf-core/fastp/main.nf @@ -0,0 +1,104 @@ +process FASTP { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/52/527b18847a97451091dba07a886b24f17f742a861f9f6c9a6bfb79d4f1f3bf9d/data' : + 'community.wave.seqera.io/library/fastp:1.0.1--c8b87fe62dcc103c' }" + + input: + tuple val(meta), path(reads), path(adapter_fasta) + val discard_trimmed_pass + val save_trimmed_fail + val save_merged + + output: + tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads + tuple val(meta), path('*.json') , emit: json + tuple val(meta), path('*.html') , emit: html + tuple val(meta), path('*.log') , emit: log + tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail + tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged + tuple val("${task.process}"), val('fastp'), eval('fastp --version 2>&1 | sed -e "s/fastp //g"'), emit: versions_fastp, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : "" + def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--failed_out ${prefix}.paired.fail.fastq.gz --unpaired1 ${prefix}_R1.fail.fastq.gz --unpaired2 ${prefix}_R2.fail.fastq.gz" : '' + def out_fq1 = discard_trimmed_pass ?: ( meta.single_end ? "--out1 ${prefix}.fastp.fastq.gz" : "--out1 ${prefix}_R1.fastp.fastq.gz" ) + def out_fq2 = discard_trimmed_pass ?: "--out2 ${prefix}_R2.fastp.fastq.gz" + // Added soft-links to original fastqs for consistent naming in MultiQC + // Use single ended for interleaved. Add --interleaved_in in config. + if ( task.ext.args?.contains('--interleaved_in') ) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --stdout \\ + --in1 ${prefix}.fastq.gz \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2>| >(tee ${prefix}.fastp.log >&2) \\ + | gzip -c > ${prefix}.fastp.fastq.gz + """ + } else if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --in1 ${prefix}.fastq.gz \\ + $out_fq1 \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2>| >(tee ${prefix}.fastp.log >&2) + """ + } else { + def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' + """ + [ ! -f ${prefix}_R1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_R1.fastq.gz + [ ! -f ${prefix}_R2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_R2.fastq.gz + fastp \\ + --in1 ${prefix}_R1.fastq.gz \\ + --in2 ${prefix}_R2.fastq.gz \\ + $out_fq1 \\ + $out_fq2 \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $merge_fastq \\ + --thread $task.cpus \\ + --detect_adapter_for_pe \\ + $args \\ + 2>| >(tee ${prefix}.fastp.log >&2) + """ + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def is_single_output = task.ext.args?.contains('--interleaved_in') || meta.single_end + def touch_reads = (discard_trimmed_pass) ? "" : (is_single_output) ? "echo '' | gzip > ${prefix}.fastp.fastq.gz" : "echo '' | gzip > ${prefix}_R1.fastp.fastq.gz ; echo '' | gzip > ${prefix}_R2.fastp.fastq.gz" + def touch_merged = (!is_single_output && save_merged) ? "echo '' | gzip > ${prefix}.merged.fastq.gz" : "" + def touch_fail_fastq = (!save_trimmed_fail) ? "" : meta.single_end ? "echo '' | gzip > ${prefix}.fail.fastq.gz" : "echo '' | gzip > ${prefix}.paired.fail.fastq.gz ; echo '' | gzip > ${prefix}_R1.fail.fastq.gz ; echo '' | gzip > ${prefix}_R2.fail.fastq.gz" + """ + $touch_reads + $touch_fail_fastq + $touch_merged + touch "${prefix}.fastp.json" + touch "${prefix}.fastp.html" + touch "${prefix}.fastp.log" + """ +} diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml new file mode 100644 index 00000000..a67be395 --- /dev/null +++ b/modules/nf-core/fastp/meta.yml @@ -0,0 +1,144 @@ +name: fastp +description: Perform adapter/quality trimming on sequencing reads +keywords: + - trimming + - quality control + - fastq +tools: + - fastp: + description: | + A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance. + documentation: https://github.com/OpenGene/fastp + doi: 10.1093/bioinformatics/bty560 + licence: ["MIT"] + identifier: biotools:fastp +input: + - - meta: + type: map + description: | + Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads. + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. If you wish to run interleaved paired-end data, supply as single-end data + but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. + ontologies: [] + - adapter_fasta: + type: file + description: File in FASTA format containing possible adapters to remove. + pattern: "*.{fasta,fna,fas,fa}" + ontologies: [] + - discard_trimmed_pass: + type: boolean + description: | + Specify true to not write any reads that pass trimming thresholds. + This can be used to use fastp for the output report only. + - save_trimmed_fail: + type: boolean + description: Specify true to save files that failed to pass trimming thresholds + ending in `*.fail.fastq.gz` + - save_merged: + type: boolean + description: Specify true to save all merged reads to a file ending in `*.merged.fastq.gz` +output: + reads: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fastp.fastq.gz": + type: file + description: The trimmed/modified/unmerged fastq reads + pattern: "*fastp.fastq.gz" + ontologies: + - edam: http://edamontology.org/format_1930 # FASTQ + - edam: http://edamontology.org/format_3989 # GZIP format + json: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.json": + type: file + description: Results in JSON format + pattern: "*.json" + ontologies: + - edam: http://edamontology.org/format_3464 # JSON + html: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.html": + type: file + description: Results in HTML format + pattern: "*.html" + ontologies: [] + log: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: fastq log file + pattern: "*.log" + ontologies: [] + reads_fail: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fail.fastq.gz": + type: file + description: Reads the failed the preprocessing + pattern: "*fail.fastq.gz" + ontologies: + - edam: http://edamontology.org/format_1930 # FASTQ + - edam: http://edamontology.org/format_3989 # GZIP format + reads_merged: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.merged.fastq.gz": + type: file + description: Reads that were successfully merged + pattern: "*.{merged.fastq.gz}" + ontologies: [] + versions_fastp: + - - "${task.process}": + type: string + description: The name of the process + - fastp: + type: string + description: The name of the tool + - 'fastp --version 2>&1 | sed -e "s/fastp //g"': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - "${task.process}": + type: string + description: The name of the process + - fastp: + type: string + description: The name of the tool + - 'fastp --version 2>&1 | sed -e "s/fastp //g"': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@drpatelh" + - "@kevinmenden" + - "@eit-maxlcummins" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test new file mode 100644 index 00000000..b7901578 --- /dev/null +++ b/modules/nf-core/fastp/tests/main.nf.test @@ -0,0 +1,661 @@ +nextflow_process { + + name "Test Process FASTP" + script "../main.nf" + process "FASTP" + tag "modules" + tag "modules_nfcore" + tag "fastp" + + test("test_fastp_single_end") { + + when { + + process { + """ + adapter_fasta = [] // empty list for no adapter file! + discard_trimmed_pass = false + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("single end (151 cycles)") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 99") }, + { assert snapshot( + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.findAll { key, val -> key.startsWith('versions') }).match() + } + ) + } + } + + test("test_fastp_paired_end") { + + when { + + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("Q30 bases: 12281(88.3716%)") }, + { assert snapshot( + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.findAll { key, val -> key.startsWith('versions') }).match() } + ) + } + } + + test("fastp test_fastp_interleaved") { + + config './nextflow.interleaved.config' + when { + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("paired end (151 cycles + 151 cycles)") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 162") }, + { assert process.out.reads_fail == [] }, + { assert process.out.reads_merged == [] }, + { assert snapshot( + process.out.reads, + process.out.findAll { key, val -> key.startsWith('versions') }).match() } + ) + } + } + + test("test_fastp_single_end_trim_fail") { + + when { + + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = true + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("single end (151 cycles)") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 99") }, + { assert snapshot( + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.findAll { key, val -> key.startsWith('versions') }).match() } + ) + } + } + + test("test_fastp_paired_end_trim_fail") { + + config './nextflow.save_failed.config' + when { + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = true + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 162") }, + { assert snapshot( + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.findAll { key, val -> key.startsWith('versions') }).match() } + ) + } + } + + test("test_fastp_paired_end_merged") { + + when { + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = false + save_merged = true + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("total reads: 75") }, + { assert snapshot( + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.findAll { key, val -> key.startsWith('versions') }).match() }, + ) + } + } + + test("test_fastp_paired_end_merged_adapterlist") { + + when { + process { + """ + adapter_fasta = file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true) + discard_trimmed_pass = false + save_trimmed_fail = false + save_merged = true + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ], + adapter_fasta + ]) + input[1] = false + input[2] = false + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("
") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("total bases: 13683") }, + { assert snapshot( + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.findAll { key, val -> key.startsWith('versions') }).match() } + ) + } + } + + test("test_fastp_single_end_qc_only") { + + when { + process { + """ + adapter_fasta = [] + discard_trimmed_pass = true + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("single end (151 cycles)") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 99") }, + { assert snapshot( + process.out.reads, + process.out.reads, + process.out.reads_fail, + process.out.reads_fail, + process.out.reads_merged, + process.out.reads_merged, + process.out.findAll { key, val -> key.startsWith('versions') }).match() } + ) + } + } + + test("test_fastp_paired_end_qc_only") { + + when { + process { + """ + adapter_fasta = [] + discard_trimmed_pass = true + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("Q30 bases: 12281(88.3716%)") }, + { assert snapshot( + process.out.reads, + process.out.reads, + process.out.reads_fail, + process.out.reads_fail, + process.out.reads_merged, + process.out.reads_merged, + process.out.findAll { key, val -> key.startsWith('versions') }).match() } + ) + } + } + + test("test_fastp_single_end - stub") { + + options "-stub" + + when { + + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end - stub") { + + options "-stub" + + when { + + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("fastp - stub test_fastp_interleaved") { + + options "-stub" + + config './nextflow.interleaved.config' + when { + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_single_end_trim_fail - stub") { + + options "-stub" + + when { + + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = true + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end_trim_fail - stub") { + + options "-stub" + + config './nextflow.save_failed.config' + when { + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = true + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end_merged - stub") { + + options "-stub" + + when { + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = false + save_merged = true + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end_merged_adapterlist - stub") { + + options "-stub" + + when { + process { + """ + adapter_fasta = file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true) + discard_trimmed_pass = false + save_trimmed_fail = false + save_merged = true + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_single_end_qc_only - stub") { + + options "-stub" + + when { + process { + """ + adapter_fasta = [] + discard_trimmed_pass = true + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end_qc_only - stub") { + + options "-stub" + + when { + process { + """ + adapter_fasta = [] + discard_trimmed_pass = true + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap new file mode 100644 index 00000000..56772358 --- /dev/null +++ b/modules/nf-core/fastp/tests/main.nf.test.snap @@ -0,0 +1,1376 @@ +{ + "test_fastp_single_end_qc_only - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-22T13:00:52.14535813" + }, + "test_fastp_paired_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7", + "test_R2.fastp.fastq.gz:md5,25cbdca08e2083dbd4f0502de6b62f39" + ] + ] + ], + [ + + ], + [ + + ], + { + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-23T09:46:26.421773402" + }, + "test_fastp_paired_end_merged_adapterlist": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,54b726a55e992a869fd3fa778afe1672", + "test_R2.fastp.fastq.gz:md5,29d3b33b869f7b63417b8ff07bb128ba" + ] + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,c873bb1ab3fa859dcc47306465e749d5" + ] + ], + { + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-23T09:46:59.832295907" + }, + "test_fastp_single_end_qc_only": { + "content": [ + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + { + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-23T09:47:06.486959565" + }, + "test_fastp_paired_end_trim_fail": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,6ff32a64c5188b9a9192be1398c262c7", + "test_R2.fastp.fastq.gz:md5,db0cb7c9977e94ac2b4b446ebd017a8a" + ] + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.paired.fail.fastq.gz:md5,409b687c734cedd7a1fec14d316e1366", + "test_R1.fail.fastq.gz:md5,4f273cf3159c13f79e8ffae12f5661f6", + "test_R2.fail.fastq.gz:md5,f97b9edefb5649aab661fbc9e71fc995" + ] + ] + ], + [ + + ], + { + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-23T09:46:46.736511024" + }, + "fastp - stub test_fastp_interleaved": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-22T13:00:16.097071654" + }, + "test_fastp_single_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-22T13:00:03.317192706" + }, + "test_fastp_paired_end_merged_adapterlist - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "6": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-22T13:00:44.851708205" + }, + "test_fastp_paired_end_merged - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "6": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-22T13:00:37.581047713" + }, + "test_fastp_paired_end_merged": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,54b726a55e992a869fd3fa778afe1672", + "test_R2.fastp.fastq.gz:md5,29d3b33b869f7b63417b8ff07bb128ba" + ] + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,c873bb1ab3fa859dcc47306465e749d5" + ] + ], + { + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-23T09:46:53.190202914" + }, + "test_fastp_paired_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-22T13:00:09.585957282" + }, + "test_fastp_single_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7" + ] + ], + [ + + ], + [ + + ], + { + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-23T09:46:19.624824985" + }, + "test_fastp_single_end_trim_fail - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "5": [ + + ], + "6": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "reads_fail": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "reads_merged": [ + + ], + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-22T13:00:22.800659826" + }, + "test_fastp_paired_end_trim_fail - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.paired.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R1.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R2.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "5": [ + + ], + "6": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_fail": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.paired.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R1.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R2.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_merged": [ + + ], + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-22T13:00:30.271734068" + }, + "fastp test_fastp_interleaved": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,217d62dc13a23e92513a1bd8e1bcea39" + ] + ], + { + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-23T09:46:33.4628687" + }, + "test_fastp_single_end_trim_fail": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fail.fastq.gz:md5,3e4aaadb66a5b8fc9b881bf39c227abd" + ] + ], + [ + + ], + { + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-23T09:46:39.895973372" + }, + "test_fastp_paired_end_qc_only": { + "content": [ + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + { + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-23T09:47:13.015833707" + }, + "test_fastp_paired_end_qc_only - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-22T13:00:59.670106791" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastp/tests/nextflow.interleaved.config b/modules/nf-core/fastp/tests/nextflow.interleaved.config new file mode 100644 index 00000000..4be8dbd2 --- /dev/null +++ b/modules/nf-core/fastp/tests/nextflow.interleaved.config @@ -0,0 +1,5 @@ +process { + withName: FASTP { + ext.args = "--interleaved_in -e 30" + } +} diff --git a/modules/nf-core/fastp/tests/nextflow.save_failed.config b/modules/nf-core/fastp/tests/nextflow.save_failed.config new file mode 100644 index 00000000..53b61b0c --- /dev/null +++ b/modules/nf-core/fastp/tests/nextflow.save_failed.config @@ -0,0 +1,5 @@ +process { + withName: FASTP { + ext.args = "-e 30" + } +} diff --git a/modules/local/nanoq/environment.yml b/modules/nf-core/fastplong/environment.yml similarity index 84% rename from modules/local/nanoq/environment.yml rename to modules/nf-core/fastplong/environment.yml index 1a95d24e..9fb22e54 100644 --- a/modules/local/nanoq/environment.yml +++ b/modules/nf-core/fastplong/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - "bioconda::nanoq=0.10.0" + - "bioconda::fastplong=0.3.0" diff --git a/modules/nf-core/fastplong/main.nf b/modules/nf-core/fastplong/main.nf new file mode 100644 index 00000000..ce1be5ef --- /dev/null +++ b/modules/nf-core/fastplong/main.nf @@ -0,0 +1,62 @@ +process FASTPLONG { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastplong:0.3.0--h224cc79_0': + 'biocontainers/fastplong:0.3.0--h224cc79_0' }" + + input: + tuple val(meta), path(reads) + path adapter_fasta + val discard_trimmed_pass + val save_trimmed_fail + + output: + tuple val(meta), path('*.fastplong.fastq.gz') , optional:true, emit: reads + tuple val(meta), path('*.json') , emit: json + tuple val(meta), path('*.html') , emit: html + tuple val(meta), path('*.log') , emit: log + tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail + tuple val("${task.process}"), val('fastplong'), eval('fastplong --version 2>&1 | sed -e "s/fastplong //g"'), emit: versions_fastplong, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : "" + def fail_fastq = save_trimmed_fail ? "--failed_out ${prefix}.fail.fastq.gz" : '' + def output_file = discard_trimmed_pass ? '' : "--out ${prefix}.fastplong.fastq.gz" + def report_title = task.ext.report_title ?: "${prefix}_fastplong_report" + """ + fastplong \\ + --in ${reads} \\ + $output_file \\ + --json ${prefix}.fastplong.json \\ + --html ${prefix}.fastplong.html \\ + $adapter_list \\ + $fail_fastq \\ + --thread $task.cpus \\ + --report_title $report_title\\ + $args \\ + 2> >(tee ${prefix}.fastplong.log >&2) + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def touch_reads = discard_trimmed_pass ? "" : "echo '' | gzip > ${prefix}.fastplong.fastq.gz" + def touch_fail = save_trimmed_fail ? "echo '' | gzip > ${prefix}.fail.fastq.gz" : "" + """ + echo $args + + $touch_reads + $touch_fail + touch ${prefix}.fastplong.json + touch ${prefix}.fastplong.html + touch ${prefix}.fastplong.log + """ +} diff --git a/modules/nf-core/fastplong/meta.yml b/modules/nf-core/fastplong/meta.yml new file mode 100644 index 00000000..53ebdd58 --- /dev/null +++ b/modules/nf-core/fastplong/meta.yml @@ -0,0 +1,126 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "fastplong" +description: Perform adapter/quality trimming and QC on long sequencing reads (ONT, + PacBio, etc.) +keywords: + - trimming + - quality control + - fastq + - long reads +tools: + - "fastplong": + description: "Ultra-fast preprocessing and quality control for long-read sequencing + data." + homepage: "https://github.com/OpenGene/fastplong/blob/v0.3.0/README.md" + documentation: "https://github.com/OpenGene/fastplong/blob/v0.3.0/README.md" + tool_dev_url: "https://github.com/OpenGene/fastplong" + doi: 10.1002/imt2.107 + licence: ["MIT"] + identifier: biotools:fastplong + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information. Use 'single_end: true' for single-end reads. + e.g. [ id:'test', single_end:true ] + - reads: + type: file + description: | + Input FASTQ file. Gzip-compressed files are supported. + pattern: "*.{fastq.gz,fastq}" + ontologies: + - edam: http://edamontology.org/format_1930 # FASTQ + - adapter_fasta: + type: file + description: | + Optional FASTA file containing adapter sequences to trim. + pattern: "*.{fasta,fa,fna}" + ontologies: [] + - discard_trimmed_pass: + type: boolean + description: | + If true, no reads that pass trimming thresholds will be written. Only reports will be generated. + - save_trimmed_fail: + type: boolean + description: | + If true, reads that fail filtering will be saved to a file ending in `*.fail.fastq.gz`. + +output: + reads: + - - meta: + type: map + description: Sample information map + - "*.fastplong.fastq.gz": + type: file + description: Trimmed and filtered reads + pattern: "*fastplong.fastq.gz" + ontologies: + - edam: http://edamontology.org/format_1930 # FASTQ + - edam: http://edamontology.org/format_3989 # GZIP format + json: + - - meta: + type: map + description: Sample information map + - "*.json": + type: file + description: QC report in JSON format + pattern: "*.json" + ontologies: + - edam: http://edamontology.org/format_3464 # JSON + html: + - - meta: + type: map + description: Sample information map + - "*.html": + type: file + description: QC report in HTML format + pattern: "*.html" + + ontologies: [] + log: + - - meta: + type: map + description: Sample information map + - "*.log": + type: file + description: Log file generated during trimming + pattern: "*.log" + ontologies: [] + reads_fail: + - - meta: + type: map + description: Sample information map + - "*.fail.fastq.gz": + type: file + description: Reads that failed quality/trimming filters + pattern: "*fail.fastq.gz" + ontologies: + - edam: http://edamontology.org/format_1930 # FASTQ + - edam: http://edamontology.org/format_3989 # GZIP format + versions_fastplong: + - - "${task.process}": + type: string + description: The name of the process + - fastplong: + type: string + description: The name of the tool + - 'fastplong --version 2>&1 | sed -e "s/fastplong //g"': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - "${task.process}": + type: string + description: The name of the process + - fastplong: + type: string + description: The name of the tool + - 'fastplong --version 2>&1 | sed -e "s/fastplong //g"': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@Lupphes" + - "@eit-maxlcummins" +maintainers: + - "@Lupphes" diff --git a/modules/nf-core/fastplong/tests/main.nf.test b/modules/nf-core/fastplong/tests/main.nf.test new file mode 100644 index 00000000..4f545c90 --- /dev/null +++ b/modules/nf-core/fastplong/tests/main.nf.test @@ -0,0 +1,70 @@ +nextflow_process { + + name "Test Process FASTPLONG" + script "../main.nf" + process "FASTPLONG" + + tag "modules" + tag "modules_nfcore" + tag "fastplong" + + + test("test_fastplong - pacbio") { + + when { + + process { + """ + input[0] = Channel.of([ + [ id:'alz.ccs', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/alz.ccs.fastq.gz', checkIfExists: true), ] + ]) + input[1] = [] + input[2] = false + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("total bases:272.128000 K") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 100") }, + { assert snapshot( + process.out.json, + process.out.reads, + process.out.reads_fail, + process.out.findAll { key, val -> key.startsWith('versions') }).match() } + ) + } + } + + test("test_fastplong - pacbio - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/alz.ccs.fastq.gz', checkIfExists: true), ] + ]) + input[1] = [] + input[2] = false + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/fastplong/tests/main.nf.test.snap b/modules/nf-core/fastplong/tests/main.nf.test.snap new file mode 100644 index 00000000..041c9b49 --- /dev/null +++ b/modules/nf-core/fastplong/tests/main.nf.test.snap @@ -0,0 +1,144 @@ +{ + "test_fastplong - pacbio": { + "content": [ + [ + [ + { + "id": "alz.ccs", + "single_end": true + }, + "alz.ccs.fastplong.json:md5,2bead4f1ec7984bf16de054f610befd7" + ] + ], + [ + [ + { + "id": "alz.ccs", + "single_end": true + }, + "alz.ccs.fastplong.fastq.gz:md5,23a6d8f301d0fd4d2da21f86ff0afac6" + ] + ], + [ + + ], + { + "versions_fastplong": [ + [ + "FASTPLONG", + "fastplong", + "0.3.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-23T09:48:16.59023584" + }, + "test_fastplong - pacbio - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastplong.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastplong.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastplong.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastplong.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + [ + "FASTPLONG", + "fastplong", + "0.3.0" + ] + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastplong.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastplong.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastplong.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastplong.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "reads_fail": [ + + ], + "versions_fastplong": [ + [ + "FASTPLONG", + "fastplong", + "0.3.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-22T13:04:24.553785802" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 23e16634..f5629527 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -1,6 +1,6 @@ process FASTQC { tag "${meta.id}" - label 'process_medium' + label 'process_low' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -11,9 +11,9 @@ process FASTQC { tuple val(meta), path(reads) output: - tuple val(meta), path("*.html"), emit: html - tuple val(meta), path("*.zip") , emit: zip - path "versions.yml" , emit: versions + tuple val(meta) , path("*.html") , emit: html + tuple val(meta) , path("*.zip") , emit: zip + tuple val("${task.process}"), val('fastqc'), eval('fastqc --version | sed "/FastQC v/!d; s/.*v//"'), emit: versions_fastqc, topic: versions when: task.ext.when == null || task.ext.when @@ -43,11 +43,6 @@ process FASTQC { --threads ${task.cpus} \\ --memory ${fastqc_memory} \\ ${renamed_files} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) - END_VERSIONS """ stub: @@ -55,10 +50,5 @@ process FASTQC { """ touch ${prefix}.html touch ${prefix}.zip - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) - END_VERSIONS """ } diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml deleted file mode 100644 index c8d9d025..00000000 --- a/modules/nf-core/fastqc/meta.yml +++ /dev/null @@ -1,72 +0,0 @@ -name: fastqc -description: Run FastQC on sequenced reads -keywords: - - quality control - - qc - - adapters - - fastq -tools: - - fastqc: - description: | - FastQC gives general quality metrics about your reads. - It provides information about the quality score distribution - across your reads, the per base sequence content (%A/C/G/T). - - You get information about adapter contamination and other - overrepresented sequences. - homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ - documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ - licence: ["GPL-2.0-only"] - identifier: biotools:fastqc -input: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. - ontologies: [] -output: - html: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - "*.html": - type: file - description: FastQC report - pattern: "*_{fastqc.html}" - ontologies: [] - zip: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - "*.zip": - type: file - description: FastQC report archive - pattern: "*_{fastqc.zip}" - ontologies: [] - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" - ontologies: - - edam: http://edamontology.org/format_3750 # YAML -authors: - - "@drpatelh" - - "@grst" - - "@ewels" - - "@FelixKrueger" -maintainers: - - "@drpatelh" - - "@grst" - - "@ewels" - - "@FelixKrueger" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test deleted file mode 100644 index e9d79a07..00000000 --- a/modules/nf-core/fastqc/tests/main.nf.test +++ /dev/null @@ -1,309 +0,0 @@ -nextflow_process { - - name "Test Process FASTQC" - script "../main.nf" - process "FASTQC" - - tag "modules" - tag "modules_nfcore" - tag "fastqc" - - test("sarscov2 single-end [fastq]") { - - when { - process { - """ - input[0] = Channel.of([ - [ id: 'test', single_end:true ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. - // looks like this:
Mon 2 Oct 2023
test.gz
- // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 - { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match() } - ) - } - } - - test("sarscov2 paired-end [fastq]") { - - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, - { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, - { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, - { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, - { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, - { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match() } - ) - } - } - - test("sarscov2 interleaved [fastq]") { - - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match() } - ) - } - } - - test("sarscov2 paired-end [bam]") { - - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match() } - ) - } - } - - test("sarscov2 multiple [fastq]") { - - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, - { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, - { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, - { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, - { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, - { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, - { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, - { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, - { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, - { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, - { assert path(process.out.html[0][1][2]).text.contains("File typeConventional base calls") }, - { assert path(process.out.html[0][1][3]).text.contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match() } - ) - } - } - - test("sarscov2 custom_prefix") { - - when { - process { - """ - input[0] = Channel.of([ - [ id:'mysample', single_end:true ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match() } - ) - } - } - - test("sarscov2 single-end [fastq] - stub") { - - options "-stub" - when { - process { - """ - input[0] = Channel.of([ - [ id: 'test', single_end:true ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("sarscov2 paired-end [fastq] - stub") { - - options "-stub" - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("sarscov2 interleaved [fastq] - stub") { - - options "-stub" - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("sarscov2 paired-end [bam] - stub") { - - options "-stub" - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("sarscov2 multiple [fastq] - stub") { - - options "-stub" - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("sarscov2 custom_prefix - stub") { - - options "-stub" - when { - process { - """ - input[0] = Channel.of([ - [ id:'mysample', single_end:true ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } -} diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap deleted file mode 100644 index d5db3092..00000000 --- a/modules/nf-core/fastqc/tests/main.nf.test.snap +++ /dev/null @@ -1,392 +0,0 @@ -{ - "sarscov2 custom_prefix": { - "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:02:16.374038" - }, - "sarscov2 single-end [fastq] - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": true - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": true - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "html": [ - [ - { - "id": "test", - "single_end": true - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "zip": [ - [ - { - "id": "test", - "single_end": true - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:02:24.993809" - }, - "sarscov2 custom_prefix - stub": { - "content": [ - { - "0": [ - [ - { - "id": "mysample", - "single_end": true - }, - "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "mysample", - "single_end": true - }, - "mysample.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "html": [ - [ - { - "id": "mysample", - "single_end": true - }, - "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "zip": [ - [ - { - "id": "mysample", - "single_end": true - }, - "mysample.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:03:10.93942" - }, - "sarscov2 interleaved [fastq]": { - "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:01:42.355718" - }, - "sarscov2 paired-end [bam]": { - "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:01:53.276274" - }, - "sarscov2 multiple [fastq]": { - "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:02:05.527626" - }, - "sarscov2 paired-end [fastq]": { - "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:01:31.188871" - }, - "sarscov2 paired-end [fastq] - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "html": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "zip": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:02:34.273566" - }, - "sarscov2 multiple [fastq] - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "html": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "zip": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:03:02.304411" - }, - "sarscov2 single-end [fastq]": { - "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:01:19.095607" - }, - "sarscov2 interleaved [fastq] - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "html": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "zip": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:02:44.640184" - }, - "sarscov2 paired-end [bam] - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "html": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "zip": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:02:53.550742" - } -} \ No newline at end of file diff --git a/modules/nf-core/flye/main.nf b/modules/nf-core/flye/main.nf index fecab194..9da156e3 100644 --- a/modules/nf-core/flye/main.nf +++ b/modules/nf-core/flye/main.nf @@ -18,7 +18,7 @@ process FLYE { tuple val(meta), path("*.txt") , emit: txt tuple val(meta), path("*.log") , emit: log tuple val(meta), path("*.json") , emit: json - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('flye'), eval('flye --version'), emit: versions_flye, topic: versions when: task.ext.when == null || task.ext.when @@ -43,11 +43,6 @@ process FLYE { mv assembly_info.txt ${prefix}.assembly_info.txt mv flye.log ${prefix}.flye.log mv params.json ${prefix}.params.json - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - flye: \$( flye --version ) - END_VERSIONS """ stub: @@ -59,10 +54,5 @@ process FLYE { echo contig_1 > ${prefix}.assembly_info.txt echo stub > ${prefix}.flye.log echo stub > ${prefix}.params.json - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - flye: \$( flye --version ) - END_VERSIONS """ } diff --git a/modules/nf-core/flye/meta.yml b/modules/nf-core/flye/meta.yml index 1e33c275..f77f4885 100644 --- a/modules/nf-core/flye/meta.yml +++ b/modules/nf-core/flye/meta.yml @@ -24,16 +24,18 @@ input: e.g. [ id:'test' ] - reads: type: file - description: Input reads from Oxford Nanopore or PacBio data in FASTA/FASTQ - format. + description: Input reads from Oxford Nanopore or PacBio data in + FASTA/FASTQ format. pattern: "*.{fasta,fastq,fasta.gz,fastq.gz,fa,fq,fa.gz,fq.gz}" - - - mode: - type: string - description: Flye mode depending on the input data (source and error rate) - pattern: "--pacbio-raw|--pacbio-corr|--pacbio-hifi|--nano-raw|--nano-corr|--nano-hq" + ontologies: + - edam: http://edamontology.org/format_1930 # FASTQ + - mode: + type: string + description: Flye mode depending on the input data (source and error rate) + pattern: "--pacbio-raw|--pacbio-corr|--pacbio-hifi|--nano-raw|--nano-corr|--nano-hq" output: - - fasta: - - meta: + fasta: + - - meta: type: map description: | Groovy Map containing sample information @@ -42,8 +44,10 @@ output: type: file description: Assembled FASTA file pattern: "*.fasta.gz" - - gfa: - - meta: + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + gfa: + - - meta: type: map description: | Groovy Map containing sample information @@ -52,8 +56,10 @@ output: type: file description: Repeat graph in gfa format pattern: "*.gfa.gz" - - gv: - - meta: + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + gv: + - - meta: type: map description: | Groovy Map containing sample information @@ -62,8 +68,10 @@ output: type: file description: Repeat graph in gv format pattern: "*.gv.gz" - - txt: - - meta: + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + txt: + - - meta: type: map description: | Groovy Map containing sample information @@ -72,8 +80,9 @@ output: type: file description: Extra information and statistics about resulting contigs pattern: "*.txt" - - log: - - meta: + ontologies: [] + log: + - - meta: type: map description: | Groovy Map containing sample information @@ -82,8 +91,9 @@ output: type: file description: Flye log file pattern: "*.log" - - json: - - meta: + ontologies: [] + json: + - - meta: type: map description: | Groovy Map containing sample information @@ -92,11 +102,31 @@ output: type: file description: Flye parameters pattern: "*.json" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3464 # JSON + versions_flye: + - - ${task.process}: + type: string + description: The name of the process + - flye: + type: string + description: The name of the tool + - flye --version: + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - flye: + type: string + description: The name of the tool + - flye --version: + type: eval + description: The expression to obtain the version of the tool + authors: - "@mirpedrol" maintainers: diff --git a/modules/nf-core/flye/tests/main.nf.test b/modules/nf-core/flye/tests/main.nf.test index afbf926e..e173518e 100644 --- a/modules/nf-core/flye/tests/main.nf.test +++ b/modules/nf-core/flye/tests/main.nf.test @@ -38,7 +38,7 @@ nextflow_process { file(process.out.txt.get(0).get(1)).name, path(process.out.txt.get(0).get(1)).readLines()[1].contains("contig_1"), process.out.json, - process.out.versions + process.out.findAll { key, val -> key.startsWith('versions') } ).match() } ) } @@ -72,7 +72,7 @@ nextflow_process { file(process.out.txt.get(0).get(1)).name, path(process.out.txt.get(0).get(1)).readLines()[1].contains("contig_1"), process.out.json, - process.out.versions + process.out.findAll { key, val -> key.startsWith('versions') } ).match() } ) } @@ -106,7 +106,7 @@ nextflow_process { file(process.out.txt.get(0).get(1)).name, path(process.out.txt.get(0).get(1)).readLines()[1].contains("contig_1"), process.out.json, - process.out.versions + process.out.findAll { key, val -> key.startsWith('versions') } ).match() } ) } @@ -140,7 +140,7 @@ nextflow_process { file(process.out.txt.get(0).get(1)).name, path(process.out.txt.get(0).get(1)).readLines()[1].contains("contig_1"), process.out.json, - process.out.versions + process.out.findAll { key, val -> key.startsWith('versions') } ).match() } ) } @@ -167,7 +167,7 @@ nextflow_process { { assert process.success }, { assert snapshot( process.out, - path(process.out.versions.get(0)).yaml + process.out.findAll { key, val -> key.startsWith('versions') } ).match() } ) diff --git a/modules/nf-core/flye/tests/main.nf.test.snap b/modules/nf-core/flye/tests/main.nf.test.snap index 7101f9ed..ed22fa9d 100644 --- a/modules/nf-core/flye/tests/main.nf.test.snap +++ b/modules/nf-core/flye/tests/main.nf.test.snap @@ -15,15 +15,21 @@ "test.params.json:md5,54b576cb6d4d27656878a7fd3657bde9" ] ], - [ - "versions.yml:md5,80496e451401dbc0269ec404801a90e3" - ] + { + "versions_flye": [ + [ + "FLYE", + "flye", + "2.9.5-b1801" + ] + ] + } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-09-16T13:41:09.075217" + "timestamp": "2026-01-22T13:55:32.315805611" }, "flye_pacbio_corr": { "content": [ @@ -41,15 +47,21 @@ "test.params.json:md5,54b576cb6d4d27656878a7fd3657bde9" ] ], - [ - "versions.yml:md5,80496e451401dbc0269ec404801a90e3" - ] + { + "versions_flye": [ + [ + "FLYE", + "flye", + "2.9.5-b1801" + ] + ] + } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-09-16T13:33:16.267658" + "timestamp": "2026-01-22T13:54:05.452301952" }, "flye_nano_corr": { "content": [ @@ -67,15 +79,21 @@ "test.params.json:md5,54b576cb6d4d27656878a7fd3657bde9" ] ], - [ - "versions.yml:md5,80496e451401dbc0269ec404801a90e3" - ] + { + "versions_flye": [ + [ + "FLYE", + "flye", + "2.9.5-b1801" + ] + ] + } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-09-16T13:44:28.522592" + "timestamp": "2026-01-22T13:58:20.270302808" }, "flye_nano_hq": { "content": [ @@ -93,15 +111,21 @@ "test.params.json:md5,54b576cb6d4d27656878a7fd3657bde9" ] ], - [ - "versions.yml:md5,80496e451401dbc0269ec404801a90e3" - ] + { + "versions_flye": [ + [ + "FLYE", + "flye", + "2.9.5-b1801" + ] + ] + } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-09-16T13:46:35.912198" + "timestamp": "2026-01-22T14:00:09.061930982" }, "flye_pacbio_raw - stub": { "content": [ @@ -155,7 +179,11 @@ ] ], "6": [ - "versions.yml:md5,80496e451401dbc0269ec404801a90e3" + [ + "FLYE", + "flye", + "2.9.5-b1801" + ] ], "fasta": [ [ @@ -205,20 +233,28 @@ "test.assembly_info.txt:md5,e3aec731279050302fc8d6f126b3030e" ] ], - "versions": [ - "versions.yml:md5,80496e451401dbc0269ec404801a90e3" + "versions_flye": [ + [ + "FLYE", + "flye", + "2.9.5-b1801" + ] ] }, { - "FLYE": { - "flye": "2.9.5-b1801" - } + "versions_flye": [ + [ + "FLYE", + "flye", + "2.9.5-b1801" + ] + ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-09-12T09:07:05.234775" + "timestamp": "2026-01-22T14:00:16.619909915" } } \ No newline at end of file diff --git a/modules/nf-core/hifiasm/main.nf b/modules/nf-core/hifiasm/main.nf index 7330920e..65d98df7 100644 --- a/modules/nf-core/hifiasm/main.nf +++ b/modules/nf-core/hifiasm/main.nf @@ -24,7 +24,8 @@ process HIFIASM { tuple val(meta), path("*.ec.fa.gz") , emit: corrected_reads , optional: true tuple val(meta), path("*.ovlp.paf.gz") , emit: read_overlaps , optional: true tuple val(meta), path("${prefix}.stderr.log") , emit: log - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('hifasm'), eval('hifiasm --version 2>&1'), emit: versions_hifiasm, topic: versions + when: task.ext.when == null || task.ext.when @@ -33,8 +34,8 @@ process HIFIASM { def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" - def long_reads_sorted = long_reads instanceof List ? long_reads.sort{ it.name } : long_reads - def ul_reads_sorted = ul_reads instanceof List ? ul_reads.sort{ it.name } : ul_reads + def long_reads_sorted = long_reads instanceof List ? long_reads.sort{ read -> read.name } : long_reads + def ul_reads_sorted = ul_reads instanceof List ? ul_reads.sort{ read -> read.name } : ul_reads def ultralong = ul_reads ? "--ul ${ul_reads_sorted}" : "" if([paternal_kmer_dump, maternal_kmer_dump].any() && [hic_read1, hic_read2].any()) { @@ -67,7 +68,7 @@ process HIFIASM { ${ultralong} \\ -o ${prefix} \\ ${long_reads_sorted} \\ - 2> >( tee ${prefix}.stderr.log >&2 ) + 2>| >( tee ${prefix}.stderr.log >&2 ) if [ -f ${prefix}.ec.fa ]; then gzip ${prefix}.ec.fa @@ -76,11 +77,6 @@ process HIFIASM { if [ -f ${prefix}.ovlp.paf ]; then gzip ${prefix}.ovlp.paf fi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - hifiasm: \$(hifiasm --version 2>&1) - END_VERSIONS """ stub: @@ -101,10 +97,5 @@ process HIFIASM { echo "" | gzip > ${prefix}.ec.fa.gz echo "" | gzip > ${prefix}.ovlp.paf.gz touch ${prefix}.stderr.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - hifiasm: \$(hifiasm --version 2>&1) - END_VERSIONS """ } diff --git a/modules/nf-core/hifiasm/meta.yml b/modules/nf-core/hifiasm/meta.yml index fcd211db..61a9a997 100644 --- a/modules/nf-core/hifiasm/meta.yml +++ b/modules/nf-core/hifiasm/meta.yml @@ -14,7 +14,8 @@ tools: documentation: https://github.com/chhylp123/hifiasm tool_dev_url: https://github.com/chhylp123/hifiasm doi: "10.1038/s41592-020-01056-5" - licence: ["MIT"] + licence: + - "MIT" identifier: biotools:hifiasm input: - - meta: @@ -24,7 +25,8 @@ input: e.g. [ id:'test', single_end:false ] - long_reads: type: file - description: Long reads PacBio HiFi reads or ONT reads (requires ext.arg '--ont'). + description: Long reads PacBio HiFi reads or ONT reads (requires ext.arg + '--ont'). ontologies: [] - ul_reads: type: file @@ -36,13 +38,13 @@ input: Groovy Map containing information about parental kmers. - paternal_kmer_dump: type: file - description: Yak kmer dump file for paternal reads (can be used for haplotype - resolution). It can have an arbitrary extension. + description: Yak kmer dump file for paternal reads (can be used for + haplotype resolution). It can have an arbitrary extension. ontologies: [] - maternal_kmer_dump: type: file - description: Yak kmer dump file for maternal reads (can be used for haplotype - resolution). It can have an arbitrary extension. + description: Yak kmer dump file for maternal reads (can be used for + haplotype resolution). It can have an arbitrary extension. ontologies: [] - - meta2: type: map @@ -65,8 +67,8 @@ input: description: bin files produced during a previous Hifiasm run ontologies: [] output: - - raw_unitigs: - - meta: + raw_unitigs: + - - meta: type: map description: | Groovy Map containing sample information @@ -75,8 +77,10 @@ output: type: file description: Raw unitigs pattern: "*.r_utg.gfa" - - bin_files: - - meta: + ontologies: + - edam: http://edamontology.org/format_3975 + bin_files: + - - meta: type: map description: | Groovy Map containing sample information @@ -91,8 +95,9 @@ output: initial results, which are the most computationally-expensive steps. pattern: "*.bin" - - processed_unitigs: - - meta: + ontologies: [] + processed_unitigs: + - - meta: type: map description: | Groovy Map containing sample information @@ -101,8 +106,10 @@ output: type: file description: Processed unitigs pattern: "*.p_utg.gfa" - - primary_contigs: - - meta: + ontologies: + - edam: http://edamontology.org/format_3975 + primary_contigs: + - - meta: type: map description: | Groovy Map containing sample information @@ -111,8 +118,9 @@ output: type: file description: Contigs representing the primary assembly pattern: "${prefix}.{p_ctg,bp.p_ctg,hic.p_ctg}.gfa" - - alternate_contigs: - - meta: + ontologies: [] + alternate_contigs: + - - meta: type: map description: | Groovy Map containing sample information @@ -121,13 +129,13 @@ output: type: file description: Contigs representing the alternative assembly pattern: "${prefix}.{a_ctg,hic.a_ctg}.gfa" - - hap1_contigs: - - meta: + ontologies: [] + hap1_contigs: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - pattern: "${prefix}.*.hap1.p_ctg.gfa" - ${prefix}.*.hap1.p_ctg.gfa: type: file description: | @@ -138,13 +146,13 @@ output: between contigs. In trio mode, they are fully phased paternal contigs all originating from a single parental haplotype. pattern: "${prefix}.*.hap1.p_ctg.gfa" - - hap2_contigs: - - meta: + ontologies: [] + hap2_contigs: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - pattern: "${prefix}.*.hap2.p_ctg.gfa" - ${prefix}.*.hap2.p_ctg.gfa: type: file description: | @@ -155,8 +163,9 @@ output: between contigs. In trio mode, they are fully phased paternal contigs all originating from a single parental haplotype. pattern: "${prefix}.*.hap2.p_ctg.gfa" - - corrected_reads: - - meta: + ontologies: [] + corrected_reads: + - - meta: type: map description: | Groovy Map containing sample information @@ -167,8 +176,10 @@ output: If option --write-ec specified, a gzipped fasta file containing the error corrected reads produced by the hifiasm error correction module pattern: "*.ec.fa.gz" - - read_overlaps: - - meta: + ontologies: + - edam: http://edamontology.org/format_3989 + read_overlaps: + - - meta: type: map description: | Groovy Map containing sample information @@ -179,23 +190,40 @@ output: If option --write-paf specified, a gzipped paf file describing the overlaps among all error-corrected reads pattern: "*.ovlp.paf.gz" - - log: - - meta: + ontologies: + - edam: http://edamontology.org/format_3989 + log: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - pattern: "*.stderr.log" - ${prefix}.stderr.log: type: file description: Stderr log pattern: "*.stderr.log" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" ontologies: [] + versions_hifiasm: + - - ${task.process}: + type: string + description: The name of the process + - hifasm: + type: string + description: The name of the tool + - hifiasm --version 2>&1: + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - hifasm: + type: string + description: The name of the tool + - hifiasm --version 2>&1: + type: eval + description: The expression to obtain the version of the tool authors: - "@sidorov-si" - "@scorreard" diff --git a/modules/nf-core/hifiasm/tests/main.nf.test b/modules/nf-core/hifiasm/tests/main.nf.test index 53edf404..d158847b 100644 --- a/modules/nf-core/hifiasm/tests/main.nf.test +++ b/modules/nf-core/hifiasm/tests/main.nf.test @@ -59,7 +59,7 @@ nextflow_process { process.out.hap2_contigs, process.out.fasta, process.out.paf, - process.out.versions + process.out.findAll { key, val -> key.startsWith('versions') } ).match() } ) } @@ -133,7 +133,7 @@ nextflow_process { process.out.primary_contigs, process.out.hap1_contigs, process.out.hap2_contigs, - process.out.versions + process.out.findAll { key, val -> key.startsWith('versions') } ).match() } ) } @@ -191,7 +191,7 @@ nextflow_process { process.out.primary_contigs, process.out.hap1_contigs, process.out.hap2_contigs, - process.out.versions + process.out.findAll { key, val -> key.startsWith('versions') } ).match() } ) } @@ -249,7 +249,7 @@ nextflow_process { process.out.raw_unitigs, process.out.processed_unitigs, process.out.hap1_contigs, - process.out.versions + process.out.findAll { key, val -> key.startsWith('versions') } ).match() } ) } @@ -309,7 +309,7 @@ nextflow_process { process.out.primary_contigs, process.out.alternate_contigs, process.out.hap1_contigs, - process.out.versions + process.out.findAll { key, val -> key.startsWith('versions') } ).match() } ) } @@ -455,4 +455,4 @@ nextflow_process { ) } } -} \ No newline at end of file +} diff --git a/modules/nf-core/hifiasm/tests/main.nf.test.snap b/modules/nf-core/hifiasm/tests/main.nf.test.snap index cf8a7eba..044a6dbb 100644 --- a/modules/nf-core/hifiasm/tests/main.nf.test.snap +++ b/modules/nf-core/hifiasm/tests/main.nf.test.snap @@ -36,15 +36,21 @@ "test.hic.hap1.p_ctg.gfa:md5,f67a8fdfa756961360732c79d189054d" ] ], - [ - "versions.yml:md5,b7828129854c24ce80a025f90bf3f557" - ] + { + "versions_hifiasm": [ + [ + "HIFIASM", + "hifasm", + "0.25.0-r726" + ] + ] + } ], + "timestamp": "2026-02-16T15:34:30.534362903", "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" - }, - "timestamp": "2025-04-15T14:24:48.67437687" + "nf-test": "0.9.4", + "nextflow": "26.01.1" + } }, "homo_sapiens pacbio hifi [fastq, [,], [,], [bin] ]": { "content": [ @@ -88,15 +94,21 @@ "test.bp.hap2.p_ctg.gfa:md5,ac2116fd2f22c67d4c304cbf9b9f7793" ] ], - [ - "versions.yml:md5,b7828129854c24ce80a025f90bf3f557" - ] + { + "versions_hifiasm": [ + [ + "HIFIASM", + "hifasm", + "0.25.0-r726" + ] + ] + } ], + "timestamp": "2026-02-16T16:19:44.451598985", "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" - }, - "timestamp": "2025-04-15T14:24:20.599937477" + "nf-test": "0.9.4", + "nextflow": "25.04.8" + } }, "homo_sapiens pacbio hifi [fastq x2, [,], [,], [,] ]": { "content": [ @@ -140,15 +152,21 @@ "test.bp.hap2.p_ctg.gfa:md5,ce096a66c9bba039c6a22ba9e9409d01" ] ], - [ - "versions.yml:md5,b7828129854c24ce80a025f90bf3f557" - ] + { + "versions_hifiasm": [ + [ + "HIFIASM", + "hifasm", + "0.25.0-r726" + ] + ] + } ], + "timestamp": "2026-02-16T16:19:55.227290441", "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" - }, - "timestamp": "2025-04-15T14:24:28.744387853" + "nf-test": "0.9.4", + "nextflow": "25.04.8" + } }, "homo_sapiens pacbio hifi [fastq, [,], [,], [,] ] - stub": { "content": [ @@ -176,7 +194,11 @@ ] ], "10": [ - "versions.yml:md5,b7828129854c24ce80a025f90bf3f557" + [ + "HIFIASM", + "hifasm", + "0.25.0-r726" + ] ], "2": [ [ @@ -334,16 +356,20 @@ "test.ovlp.paf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "versions": [ - "versions.yml:md5,b7828129854c24ce80a025f90bf3f557" + "versions_hifiasm": [ + [ + "HIFIASM", + "hifasm", + "0.25.0-r726" + ] ] } ], + "timestamp": "2026-02-16T10:53:50.939408883", "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" - }, - "timestamp": "2025-04-17T15:56:54.410648332" + "nf-test": "0.9.4", + "nextflow": "26.01.1" + } }, "homo_sapiens pacbio hifi [fastq, [,], [,], [,]]": { "content": [ @@ -389,15 +415,21 @@ ], null, null, - [ - "versions.yml:md5,b7828129854c24ce80a025f90bf3f557" - ] + { + "versions_hifiasm": [ + [ + "HIFIASM", + "hifasm", + "0.25.0-r726" + ] + ] + } ], + "timestamp": "2026-02-16T10:52:36.945051713", "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" - }, - "timestamp": "2025-04-15T14:24:12.033205578" + "nf-test": "0.9.4", + "nextflow": "26.01.1" + } }, "homo_sapiens pacbio hifi [fastq, [yak, yak], [,], [,] ]": { "content": [ @@ -425,14 +457,20 @@ "test.dip.hap1.p_ctg.gfa:md5,eed5da5f3dd415dbb711edb61a09802f" ] ], - [ - "versions.yml:md5,b7828129854c24ce80a025f90bf3f557" - ] + { + "versions_hifiasm": [ + [ + "HIFIASM", + "hifasm", + "0.25.0-r726" + ] + ] + } ], + "timestamp": "2026-02-16T16:20:05.943561958", "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" - }, - "timestamp": "2025-04-15T14:24:37.330378652" + "nf-test": "0.9.4", + "nextflow": "25.04.8" + } } } \ No newline at end of file diff --git a/modules/nf-core/liftoff/main.nf b/modules/nf-core/liftoff/main.nf index 649e4ba4..dec05984 100644 --- a/modules/nf-core/liftoff/main.nf +++ b/modules/nf-core/liftoff/main.nf @@ -15,7 +15,7 @@ process LIFTOFF { tuple val(meta), path("${prefix}.gff3") , emit: gff3 tuple val(meta), path("*.polished.gff3") , emit: polished_gff3, optional: true tuple val(meta), path("*.unmapped.txt") , emit: unmapped_txt - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('liftoff'), eval('liftoff --version | sed "s/v//"'), emit: versions_liftoff, topic: versions when: task.ext.when == null || task.ext.when @@ -56,11 +56,6 @@ process LIFTOFF { "${prefix}.gff3_polished" \\ "${prefix}.polished.gff3" \\ || echo "-polish is absent" - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - liftoff: \$(liftoff --version 2> /dev/null) - END_VERSIONS """ stub: @@ -71,10 +66,5 @@ process LIFTOFF { touch "${prefix}.gff3" touch "${prefix}.unmapped.txt" $touch_polished - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - liftoff: \$(liftoff --version 2> /dev/null) - END_VERSIONS """ } diff --git a/modules/nf-core/liftoff/meta.yml b/modules/nf-core/liftoff/meta.yml index 7d809f7f..35efaf78 100644 --- a/modules/nf-core/liftoff/meta.yml +++ b/modules/nf-core/liftoff/meta.yml @@ -1,4 +1,3 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "liftoff" description: | Uses Liftoff to accurately map annotations in GFF or GTF between assemblies of the same, @@ -18,7 +17,8 @@ tools: documentation: "https://github.com/agshumate/Liftoff" tool_dev_url: "https://github.com/agshumate/Liftoff" doi: "10.1093/bioinformatics/bty191" - licence: ["GPL v3 License"] + licence: + - "GPL v3 License" identifier: biotools:liftoff input: - - meta: @@ -30,22 +30,26 @@ input: type: file description: Target assembly in fasta format (can be gzipped) pattern: "*.{fa,fa.gz,fasta,fasta.gz,fas,fas.gz,fsa,fsa.gz}" - - - ref_fa: - type: file - description: Reference assembly in fasta format (can be gzipped) - pattern: "*.{fa,fa.gz,fasta,fasta.gz,fas,fas.gz,fsa,fsa.gz}" - - - ref_annotation: - type: file - description: Reference assembly annotations in gtf or gff3 format - pattern: "*.{gtf,gff3}" - - - ref_db: - type: file - description: | - Name of feature database; if not specified, the -g argument must - be provided and a database will be built automatically + ontologies: [] + - ref_fa: + type: file + description: Reference assembly in fasta format (can be gzipped) + pattern: "*.{fa,fa.gz,fasta,fasta.gz,fas,fas.gz,fsa,fsa.gz}" + ontologies: [] + - ref_annotation: + type: file + description: Reference assembly annotations in gtf or gff3 format + pattern: "*.{gtf,gff3}" + ontologies: [] + - ref_db: + type: file + description: | + Name of feature database; if not specified, the -g argument must + be provided and a database will be built automatically + ontologies: [] output: - - gff3: - - meta: + gff3: + - - meta: type: map description: | Groovy Map containing sample information @@ -54,18 +58,21 @@ output: type: file description: Lifted annotations for the target assembly in gff3 format pattern: "*.gff3" - - polished_gff3: - - meta: + ontologies: [] + polished_gff3: + - - meta: type: map description: | Groovy Map containing sample information e.g. `[ id:'test' ]` - "*.polished.gff3": type: file - description: Polished lifted annotations for the target assembly in gff3 format + description: Polished lifted annotations for the target assembly in gff3 + format pattern: "*.polished.gff3" - - unmapped_txt: - - meta: + ontologies: [] + unmapped_txt: + - - meta: type: map description: | Groovy Map containing sample information @@ -74,11 +81,28 @@ output: type: file description: List of unmapped reference annotations pattern: "*.unmapped.txt" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions_liftoff: + - - ${task.process}: + type: string + description: The name of the process + - liftoff: + type: string + description: The name of the tool + - liftoff --version | sed "s/v//": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - liftoff: + type: string + description: The name of the tool + - liftoff --version | sed "s/v//": + type: eval + description: The expression to obtain the version of the tool authors: - "@GallVp" maintainers: diff --git a/modules/nf-core/liftoff/tests/main.nf.test b/modules/nf-core/liftoff/tests/main.nf.test index 2a17cc81..bdc705a0 100644 --- a/modules/nf-core/liftoff/tests/main.nf.test +++ b/modules/nf-core/liftoff/tests/main.nf.test @@ -34,9 +34,9 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot(process.out.unmapped_txt).match("unmapped_txt") }, + { assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match() }, { assert file(process.out.gff3[0][1]).text.contains("chr21\tLiftoff\texon\t34608061\t34608118\t.\t+\t.") }, { assert file(process.out.polished_gff3[0][1]).text.contains("chr21\tLiftoff\texon\t34608061\t34608118\t.\t+\t.") }, - { assert snapshot(process.out.versions).match("versions") } ) } @@ -52,8 +52,8 @@ nextflow_process { [ id:'test' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) ] - input[1] = [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr1/genome.fasta.gz', checkIfExists: true) + input[1] = [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr1/genome.fasta.gz', checkIfExists: true) ] input[2] = [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr1/genome.gtf', checkIfExists: true) diff --git a/modules/nf-core/liftoff/tests/main.nf.test.snap b/modules/nf-core/liftoff/tests/main.nf.test.snap index f6064467..545f8ab3 100644 --- a/modules/nf-core/liftoff/tests/main.nf.test.snap +++ b/modules/nf-core/liftoff/tests/main.nf.test.snap @@ -10,23 +10,29 @@ ] ] ], + "timestamp": "2023-12-01T13:57:40.748507", "meta": { "nf-test": "0.8.4", "nextflow": "23.10.1" - }, - "timestamp": "2023-12-01T13:57:40.748507" + } }, - "versions": { + "homo_sapiens-genome_21_fasta-genome_1_fasta-genome_1_gtf": { "content": [ - [ - "versions.yml:md5,205d9c609e7fe27d8199550d842bdce8" - ] + { + "versions_liftoff": [ + [ + "LIFTOFF", + "liftoff", + "1.6.3" + ] + ] + } ], + "timestamp": "2026-02-16T17:56:04.622385608", "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2023-12-01T13:57:40.752414" + "nf-test": "0.9.4", + "nextflow": "25.04.8" + } }, "homo_sapiens-genome_21_fasta-genome_1_fasta-genome_1_gtf-stub": { "content": [ @@ -56,7 +62,11 @@ ] ], "3": [ - "versions.yml:md5,205d9c609e7fe27d8199550d842bdce8" + [ + "LIFTOFF", + "liftoff", + "1.6.3" + ] ], "gff3": [ [ @@ -82,15 +92,19 @@ "test.unmapped.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,205d9c609e7fe27d8199550d842bdce8" + "versions_liftoff": [ + [ + "LIFTOFF", + "liftoff", + "1.6.3" + ] ] } ], + "timestamp": "2026-02-16T17:56:15.752736312", "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-19T09:15:25.661428" + "nf-test": "0.9.4", + "nextflow": "25.04.8" + } } } \ No newline at end of file diff --git a/modules/nf-core/lima/main.nf b/modules/nf-core/lima/main.nf deleted file mode 100644 index e5b334b1..00000000 --- a/modules/nf-core/lima/main.nf +++ /dev/null @@ -1,82 +0,0 @@ -process LIMA { - tag "$meta.id" - label 'process_low' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/lima:2.12.0--h9ee0642_1' : - 'biocontainers/lima:2.12.0--h9ee0642_1' }" - - input: - tuple val(meta), path(ccs) - path primers - - output: - tuple val(meta), path("*.counts") , emit: counts - tuple val(meta), path("*.report") , emit: report - tuple val(meta), path("*.summary"), emit: summary - path "versions.yml" , emit: versions - - tuple val(meta), path("*.bam") , optional: true, emit: bam - tuple val(meta), path("*.bam.pbi") , optional: true, emit: pbi - tuple val(meta), path("*.{fa, fasta}") , optional: true, emit: fasta - tuple val(meta), path("*.{fa.gz, fasta.gz}"), optional: true, emit: fastagz - tuple val(meta), path("*.fastq") , optional: true, emit: fastq - tuple val(meta), path("*.fastq.gz") , optional: true, emit: fastqgz - tuple val(meta), path("*.xml") , optional: true, emit: xml - tuple val(meta), path("*.json") , optional: true, emit: json - tuple val(meta), path("*.clips") , optional: true, emit: clips - tuple val(meta), path("*.guess") , optional: true, emit: guess - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - if( "$ccs" == "${prefix}.bam" ) error "Input and output names are the same, set prefix in module configuration" - if( "$ccs" == "${prefix}.fasta" ) error "Input and output names are the same, set prefix in module configuration" - if( "$ccs" == "${prefix}.fasta.gz" ) error "Input and output names are the same, set prefix in module configuration" - if( "$ccs" == "${prefix}.fastq" ) error "Input and output names are the same, set prefix in module configuration" - if( "$ccs" == "${prefix}.fastq.gz" ) error "Input and output names are the same, set prefix in module configuration" - - """ - OUT_EXT="" - - if [[ $ccs =~ bam\$ ]]; then - OUT_EXT="bam" - elif [[ $ccs =~ fasta\$ ]]; then - OUT_EXT="fasta" - elif [[ $ccs =~ fasta.gz\$ ]]; then - OUT_EXT="fasta.gz" - elif [[ $ccs =~ fastq\$ ]]; then - OUT_EXT="fastq" - elif [[ $ccs =~ fastq.gz\$ ]]; then - OUT_EXT="fastq.gz" - fi - - lima \\ - $ccs \\ - $primers \\ - $prefix.\$OUT_EXT \\ - -j $task.cpus \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - lima: \$( lima --version | head -n1 | sed 's/lima //g' | sed 's/ (.\\+//g' ) - END_VERSIONS - """ - - stub: - """ - touch dummy.counts - touch dummy.report - touch dummy.summary - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - lima: \$( lima --version | head -n1 | sed 's/lima //g' | sed 's/ (.\\+//g' ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/lima/meta.yml b/modules/nf-core/lima/meta.yml deleted file mode 100644 index f22973f0..00000000 --- a/modules/nf-core/lima/meta.yml +++ /dev/null @@ -1,174 +0,0 @@ -name: lima -description: lima - The PacBio Barcode Demultiplexer and Primer Remover -keywords: - - isoseq - - ccs - - primer - - pacbio - - barcode -tools: - - lima: - description: lima - The PacBio Barcode Demultiplexer and Primer Remover - homepage: https://github.com/PacificBiosciences/pbbioconda - documentation: https://lima.how/ - tool_dev_url: https://github.com/pacificbiosciences/barcoding/ - licence: ["BSD-3-Clause-Clear"] - identifier: "" -input: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - ccs: - type: file - description: A BAM or fasta or fasta.gz or fastq or fastq.gz file of subreads - or ccs - pattern: "*.{bam,fasta,fasta.gz,fastq,fastq.gz}" - - - primers: - type: file - description: Fasta file, sequences of primers - pattern: "*.fasta" -output: - - counts: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - "*.counts": - type: file - description: A tabulated file of describing pairs of primers - pattern: "*.counts" - - report: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - "*.report": - type: file - description: A tab-separated file about each ZMW, unfiltered - pattern: "*.report" - - summary: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - "*.summary": - type: file - description: This file shows how many ZMWs have been filtered, how ZMWs many - are same/different, and how many reads have been filtered. - pattern: "*.summary" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" - - bam: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - "*.bam": - type: file - description: A bam file of ccs purged of primers - pattern: "*.bam" - - pbi: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - "*.bam.pbi": - type: file - description: Pacbio index file of ccs purged of primers - pattern: "*.bam" - - fasta: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - "*.{fa, fasta}": - type: file - description: A fasta file of ccs purged of primers. - pattern: "*.fa" - - fastagz: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - "*.{fa.gz, fasta.gz}": - type: file - description: A fasta.gz file of ccs purged of primers. - pattern: "*.fasta.gz" - - fastq: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - "*.fastq": - type: file - description: A fastq file of ccs purged of primers. - pattern: "*.fastq" - - fastqgz: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - "*.fastq.gz": - type: file - description: A fastq.gz file of ccs purged of primers. - pattern: "*.fastq.gz" - - xml: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - "*.xml": - type: file - description: An XML file representing a set of a particular sequence data type - such as subreads, references or aligned subreads. - pattern: "*.xml" - - json: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - "*.json": - type: file - description: A metadata json file - pattern: "*.json" - - clips: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - "*.clips": - type: file - description: A fasta file of clipped primers - pattern: "*.clips" - - guess: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - "*.guess": - type: file - description: A second tabulated file of describing pairs of primers (no doc - available) - pattern: "*.guess" -authors: - - "@sguizard" -maintainers: - - "@sguizard" diff --git a/modules/nf-core/lima/tests/main.nf.test b/modules/nf-core/lima/tests/main.nf.test deleted file mode 100644 index f5cb6b86..00000000 --- a/modules/nf-core/lima/tests/main.nf.test +++ /dev/null @@ -1,263 +0,0 @@ -nextflow_process { - - name "Test Process LIMA" - script "../main.nf" - config "./nextflow.config" - process "LIMA" - - tag "modules" - tag "modules_nfcore" - tag "lima" - - test("LIMA - Primer Removal - Input => bam") { - - when { - process { - """ - input[0] = [ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/alz.ccs.bam', checkIfExists: true), - ] - input[1] = [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fasta/primers.fasta', checkIfExists: true) ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out.counts).match("counts") }, - { assert snapshot(process.out.report).match("report") }, - { assert snapshot(process.out.summary).match("summary") }, - { assert snapshot(process.out.versions).match("versions") }, - { assert snapshot(process.out.bam).match("bam") }, - { assert snapshot(process.out.pbi).match("pbi") }, - { assert snapshot(process.out.fasta).match("fasta") }, - { assert snapshot(process.out.fastagz).match("fastagz") }, - { assert snapshot(process.out.fastq).match("fastq") }, - { assert snapshot(process.out.fastqgz).match("fastqgz") }, - { assert snapshot(process.out.clips).match("clips") }, - { assert snapshot(process.out.guess).match("guess") } - ) - } - - } - - test("LIMA - Primer Removal - Input => fa") { - - when { - process { - """ - input[0] = [ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fasta/alz.ccs.fasta', checkIfExists: true), - ] - input[1] = [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fasta/primers.fasta', checkIfExists: true) ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("LIMA - Primer Removal - Input => fa.gz") { - - when { - process { - """ - input[0] = [ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fasta/alz.ccs.fasta.gz', checkIfExists: true), - ] - input[1] = [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fasta/primers.fasta', checkIfExists: true) ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("LIMA - Primer Removal - Input => fq") { - - when { - process { - """ - input[0] = [ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/alz.ccs.fastq', checkIfExists: true), - ] - input[1] = [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fasta/primers.fasta', checkIfExists: true) ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("LIMA - Primer Removal - Input => fq.gz") { - - when { - process { - """ - input[0] = [ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/alz.ccs.fastq.gz', checkIfExists: true), - ] - input[1] = [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fasta/primers.fasta', checkIfExists: true) ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("LIMA - Primer Removal - Input => bam - stub") { - - options "-stub" - - when { - process { - """ - input[0] = [ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/alz.ccs.bam', checkIfExists: true), - ] - input[1] = [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fasta/primers.fasta', checkIfExists: true) ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("LIMA - Primer Removal - Input => fa - stub") { - - options "-stub" - - when { - process { - """ - input[0] = [ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fasta/alz.ccs.fasta', checkIfExists: true), - ] - input[1] = [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fasta/primers.fasta', checkIfExists: true) ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("LIMA - Primer Removal - Input => fa.gz - stub") { - - options "-stub" - - when { - process { - """ - input[0] = [ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fasta/alz.ccs.fasta.gz', checkIfExists: true), - ] - input[1] = [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fasta/primers.fasta', checkIfExists: true) ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("LIMA - Primer Removal - Input => fq - stub") { - - options "-stub" - - when { - process { - """ - input[0] = [ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/alz.ccs.fastq', checkIfExists: true), - ] - input[1] = [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fasta/primers.fasta', checkIfExists: true) ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("LIMA - Primer Removal - Input => fq.gz - stub") { - - options "-stub" - - when { - process { - """ - input[0] = [ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/alz.ccs.fastq.gz', checkIfExists: true), - ] - input[1] = [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fasta/primers.fasta', checkIfExists: true) ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - -} \ No newline at end of file diff --git a/modules/nf-core/lima/tests/main.nf.test.snap b/modules/nf-core/lima/tests/main.nf.test.snap deleted file mode 100644 index 334b6936..00000000 --- a/modules/nf-core/lima/tests/main.nf.test.snap +++ /dev/null @@ -1,1486 +0,0 @@ -{ - "summary": { - "content": [ - [ - [ - { - "id": "test" - }, - "test.fl.lima.summary:md5,bcbcaaaca418bdeb91141c81715ca420" - ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-01-17T11:35:00.704932" - }, - "fastqgz": { - "content": [ - [ - - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-01-17T11:35:01.98256" - }, - "counts": { - "content": [ - [ - [ - { - "id": "test" - }, - "test.fl.lima.counts:md5,842c6a23ca2de504ced4538ad5111da1" - ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-01-17T11:35:00.491188" - }, - "clips": { - "content": [ - [ - [ - { - "id": "test" - }, - "test.fl.lima.clips:md5,fa03bc75bd78b2648a139fd67c69208f" - ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-01-17T11:35:02.19417" - }, - "LIMA - Primer Removal - Input => fq.gz - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "dummy.counts:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test" - }, - "dummy.report:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "10": [ - - ], - "11": [ - - ], - "12": [ - - ], - "13": [ - - ], - "2": [ - [ - { - "id": "test" - }, - "dummy.summary:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "3": [ - "versions.yml:md5,3253df3f697bdcd8cceee60e0b4ebdaf" - ], - "4": [ - - ], - "5": [ - - ], - "6": [ - - ], - "7": [ - - ], - "8": [ - - ], - "9": [ - - ], - "bam": [ - - ], - "clips": [ - - ], - "counts": [ - [ - { - "id": "test" - }, - "dummy.counts:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "fasta": [ - - ], - "fastagz": [ - - ], - "fastq": [ - - ], - "fastqgz": [ - - ], - "guess": [ - - ], - "json": [ - - ], - "pbi": [ - - ], - "report": [ - [ - { - "id": "test" - }, - "dummy.report:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "summary": [ - [ - { - "id": "test" - }, - "dummy.summary:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,3253df3f697bdcd8cceee60e0b4ebdaf" - ], - "xml": [ - - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.10.3" - }, - "timestamp": "2025-01-13T13:17:04.481318" - }, - "LIMA - Primer Removal - Input => fq.gz": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.fl.lima.counts:md5,767b687e6eda7b24cd0e577f527eb2f0" - ] - ], - "1": [ - [ - { - "id": "test" - }, - "test.fl.lima.report:md5,ad2a9b1eeb4cda4a1f69ef4b7520b5fd" - ] - ], - "10": [ - - ], - "11": [ - - ], - "12": [ - [ - { - "id": "test" - }, - "test.fl.lima.clips:md5,5c16ef8122f6f1798acc30eb8a30828c" - ] - ], - "13": [ - [ - { - "id": "test" - }, - "test.fl.lima.guess:md5,31b988aab6bda84867e704b9edd8a763" - ] - ], - "2": [ - [ - { - "id": "test" - }, - "test.fl.lima.summary:md5,e91d3c386aaf4effa63f33ee2eb7da2a" - ] - ], - "3": [ - "versions.yml:md5,3253df3f697bdcd8cceee60e0b4ebdaf" - ], - "4": [ - [ - { - "id": "test" - }, - "test.fl.NEB_5p--NEB_Clontech_3p.bam:md5,c5d3d376ca7ffc32ef5cbabcc9850804" - ] - ], - "5": [ - [ - { - "id": "test" - }, - "test.fl.NEB_5p--NEB_Clontech_3p.bam.pbi:md5,d1d6a2f961b9fb3d29837555706c59eb" - ] - ], - "6": [ - - ], - "7": [ - - ], - "8": [ - - ], - "9": [ - [ - { - "id": "test" - }, - "test.fl.NEB_5p--NEB_Clontech_3p.fastq.gz:md5,ef395f689c5566f501e300bb83d7a5f2" - ] - ], - "bam": [ - [ - { - "id": "test" - }, - "test.fl.NEB_5p--NEB_Clontech_3p.bam:md5,c5d3d376ca7ffc32ef5cbabcc9850804" - ] - ], - "clips": [ - [ - { - "id": "test" - }, - "test.fl.lima.clips:md5,5c16ef8122f6f1798acc30eb8a30828c" - ] - ], - "counts": [ - [ - { - "id": "test" - }, - "test.fl.lima.counts:md5,767b687e6eda7b24cd0e577f527eb2f0" - ] - ], - "fasta": [ - - ], - "fastagz": [ - - ], - "fastq": [ - - ], - "fastqgz": [ - [ - { - "id": "test" - }, - "test.fl.NEB_5p--NEB_Clontech_3p.fastq.gz:md5,ef395f689c5566f501e300bb83d7a5f2" - ] - ], - "guess": [ - [ - { - "id": "test" - }, - "test.fl.lima.guess:md5,31b988aab6bda84867e704b9edd8a763" - ] - ], - "json": [ - - ], - "pbi": [ - [ - { - "id": "test" - }, - "test.fl.NEB_5p--NEB_Clontech_3p.bam.pbi:md5,d1d6a2f961b9fb3d29837555706c59eb" - ] - ], - "report": [ - [ - { - "id": "test" - }, - "test.fl.lima.report:md5,ad2a9b1eeb4cda4a1f69ef4b7520b5fd" - ] - ], - "summary": [ - [ - { - "id": "test" - }, - "test.fl.lima.summary:md5,e91d3c386aaf4effa63f33ee2eb7da2a" - ] - ], - "versions": [ - "versions.yml:md5,3253df3f697bdcd8cceee60e0b4ebdaf" - ], - "xml": [ - - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.10.3" - }, - "timestamp": "2025-01-13T13:16:41.110353" - }, - "LIMA - Primer Removal - Input => fq": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.fl.lima.counts:md5,767b687e6eda7b24cd0e577f527eb2f0" - ] - ], - "1": [ - [ - { - "id": "test" - }, - "test.fl.lima.report:md5,ad2a9b1eeb4cda4a1f69ef4b7520b5fd" - ] - ], - "10": [ - - ], - "11": [ - - ], - "12": [ - [ - { - "id": "test" - }, - "test.fl.lima.clips:md5,5c16ef8122f6f1798acc30eb8a30828c" - ] - ], - "13": [ - [ - { - "id": "test" - }, - "test.fl.lima.guess:md5,31b988aab6bda84867e704b9edd8a763" - ] - ], - "2": [ - [ - { - "id": "test" - }, - "test.fl.lima.summary:md5,e91d3c386aaf4effa63f33ee2eb7da2a" - ] - ], - "3": [ - "versions.yml:md5,3253df3f697bdcd8cceee60e0b4ebdaf" - ], - "4": [ - [ - { - "id": "test" - }, - "test.fl.NEB_5p--NEB_Clontech_3p.bam:md5,c5d3d376ca7ffc32ef5cbabcc9850804" - ] - ], - "5": [ - [ - { - "id": "test" - }, - "test.fl.NEB_5p--NEB_Clontech_3p.bam.pbi:md5,d1d6a2f961b9fb3d29837555706c59eb" - ] - ], - "6": [ - - ], - "7": [ - - ], - "8": [ - [ - { - "id": "test" - }, - "test.fl.NEB_5p--NEB_Clontech_3p.fastq:md5,ef395f689c5566f501e300bb83d7a5f2" - ] - ], - "9": [ - - ], - "bam": [ - [ - { - "id": "test" - }, - "test.fl.NEB_5p--NEB_Clontech_3p.bam:md5,c5d3d376ca7ffc32ef5cbabcc9850804" - ] - ], - "clips": [ - [ - { - "id": "test" - }, - "test.fl.lima.clips:md5,5c16ef8122f6f1798acc30eb8a30828c" - ] - ], - "counts": [ - [ - { - "id": "test" - }, - "test.fl.lima.counts:md5,767b687e6eda7b24cd0e577f527eb2f0" - ] - ], - "fasta": [ - - ], - "fastagz": [ - - ], - "fastq": [ - [ - { - "id": "test" - }, - "test.fl.NEB_5p--NEB_Clontech_3p.fastq:md5,ef395f689c5566f501e300bb83d7a5f2" - ] - ], - "fastqgz": [ - - ], - "guess": [ - [ - { - "id": "test" - }, - "test.fl.lima.guess:md5,31b988aab6bda84867e704b9edd8a763" - ] - ], - "json": [ - - ], - "pbi": [ - [ - { - "id": "test" - }, - "test.fl.NEB_5p--NEB_Clontech_3p.bam.pbi:md5,d1d6a2f961b9fb3d29837555706c59eb" - ] - ], - "report": [ - [ - { - "id": "test" - }, - "test.fl.lima.report:md5,ad2a9b1eeb4cda4a1f69ef4b7520b5fd" - ] - ], - "summary": [ - [ - { - "id": "test" - }, - "test.fl.lima.summary:md5,e91d3c386aaf4effa63f33ee2eb7da2a" - ] - ], - "versions": [ - "versions.yml:md5,3253df3f697bdcd8cceee60e0b4ebdaf" - ], - "xml": [ - - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.10.3" - }, - "timestamp": "2025-01-13T13:16:31.41132" - }, - "LIMA - Primer Removal - Input => fa.gz - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "dummy.counts:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test" - }, - "dummy.report:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "10": [ - - ], - "11": [ - - ], - "12": [ - - ], - "13": [ - - ], - "2": [ - [ - { - "id": "test" - }, - "dummy.summary:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "3": [ - "versions.yml:md5,3253df3f697bdcd8cceee60e0b4ebdaf" - ], - "4": [ - - ], - "5": [ - - ], - "6": [ - - ], - "7": [ - - ], - "8": [ - - ], - "9": [ - - ], - "bam": [ - - ], - "clips": [ - - ], - "counts": [ - [ - { - "id": "test" - }, - "dummy.counts:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "fasta": [ - - ], - "fastagz": [ - - ], - "fastq": [ - - ], - "fastqgz": [ - - ], - "guess": [ - - ], - "json": [ - - ], - "pbi": [ - - ], - "report": [ - [ - { - "id": "test" - }, - "dummy.report:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "summary": [ - [ - { - "id": "test" - }, - "dummy.summary:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,3253df3f697bdcd8cceee60e0b4ebdaf" - ], - "xml": [ - - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.10.3" - }, - "timestamp": "2025-01-13T13:16:55.396572" - }, - "fasta": { - "content": [ - [ - - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-01-17T11:35:01.262691" - }, - "LIMA - Primer Removal - Input => fq - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "dummy.counts:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test" - }, - "dummy.report:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "10": [ - - ], - "11": [ - - ], - "12": [ - - ], - "13": [ - - ], - "2": [ - [ - { - "id": "test" - }, - "dummy.summary:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "3": [ - "versions.yml:md5,3253df3f697bdcd8cceee60e0b4ebdaf" - ], - "4": [ - - ], - "5": [ - - ], - "6": [ - - ], - "7": [ - - ], - "8": [ - - ], - "9": [ - - ], - "bam": [ - - ], - "clips": [ - - ], - "counts": [ - [ - { - "id": "test" - }, - "dummy.counts:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "fasta": [ - - ], - "fastagz": [ - - ], - "fastq": [ - - ], - "fastqgz": [ - - ], - "guess": [ - - ], - "json": [ - - ], - "pbi": [ - - ], - "report": [ - [ - { - "id": "test" - }, - "dummy.report:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "summary": [ - [ - { - "id": "test" - }, - "dummy.summary:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,3253df3f697bdcd8cceee60e0b4ebdaf" - ], - "xml": [ - - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.10.3" - }, - "timestamp": "2025-01-13T13:16:59.983156" - }, - "bam": { - "content": [ - [ - [ - { - "id": "test" - }, - "test.fl.NEB_5p--NEB_Clontech_3p.bam:md5,59b04f200c309b0a60a3f182d22f6910" - ] - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.10.3" - }, - "timestamp": "2025-01-13T13:16:01.243385" - }, - "LIMA - Primer Removal - Input => bam - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "dummy.counts:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test" - }, - "dummy.report:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "10": [ - - ], - "11": [ - - ], - "12": [ - - ], - "13": [ - - ], - "2": [ - [ - { - "id": "test" - }, - "dummy.summary:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "3": [ - "versions.yml:md5,3253df3f697bdcd8cceee60e0b4ebdaf" - ], - "4": [ - - ], - "5": [ - - ], - "6": [ - - ], - "7": [ - - ], - "8": [ - - ], - "9": [ - - ], - "bam": [ - - ], - "clips": [ - - ], - "counts": [ - [ - { - "id": "test" - }, - "dummy.counts:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "fasta": [ - - ], - "fastagz": [ - - ], - "fastq": [ - - ], - "fastqgz": [ - - ], - "guess": [ - - ], - "json": [ - - ], - "pbi": [ - - ], - "report": [ - [ - { - "id": "test" - }, - "dummy.report:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "summary": [ - [ - { - "id": "test" - }, - "dummy.summary:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,3253df3f697bdcd8cceee60e0b4ebdaf" - ], - "xml": [ - - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.10.3" - }, - "timestamp": "2025-01-13T13:16:46.316856" - }, - "LIMA - Primer Removal - Input => fa - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "dummy.counts:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test" - }, - "dummy.report:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "10": [ - - ], - "11": [ - - ], - "12": [ - - ], - "13": [ - - ], - "2": [ - [ - { - "id": "test" - }, - "dummy.summary:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "3": [ - "versions.yml:md5,3253df3f697bdcd8cceee60e0b4ebdaf" - ], - "4": [ - - ], - "5": [ - - ], - "6": [ - - ], - "7": [ - - ], - "8": [ - - ], - "9": [ - - ], - "bam": [ - - ], - "clips": [ - - ], - "counts": [ - [ - { - "id": "test" - }, - "dummy.counts:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "fasta": [ - - ], - "fastagz": [ - - ], - "fastq": [ - - ], - "fastqgz": [ - - ], - "guess": [ - - ], - "json": [ - - ], - "pbi": [ - - ], - "report": [ - [ - { - "id": "test" - }, - "dummy.report:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "summary": [ - [ - { - "id": "test" - }, - "dummy.summary:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,3253df3f697bdcd8cceee60e0b4ebdaf" - ], - "xml": [ - - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.10.3" - }, - "timestamp": "2025-01-13T13:16:50.996309" - }, - "fastagz": { - "content": [ - [ - - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-01-17T11:35:01.493258" - }, - "versions": { - "content": [ - [ - "versions.yml:md5,3253df3f697bdcd8cceee60e0b4ebdaf" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.10.3" - }, - "timestamp": "2025-01-13T13:16:01.136138" - }, - "guess": { - "content": [ - [ - [ - { - "id": "test" - }, - "test.fl.lima.guess:md5,d3675af3ca8a908ee9e3c231668392d3" - ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-01-17T11:35:02.396472" - }, - "pbi": { - "content": [ - [ - [ - { - "id": "test" - }, - "test.fl.NEB_5p--NEB_Clontech_3p.bam.pbi:md5,851cf26eb54e4399cba5241db969dc0c" - ] - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.10.3" - }, - "timestamp": "2025-01-13T13:16:01.343034" - }, - "report": { - "content": [ - [ - [ - { - "id": "test" - }, - "test.fl.lima.report:md5,dc073985322ae0a003ccc7e0fa4db5e6" - ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-01-17T11:35:00.626772" - }, - "LIMA - Primer Removal - Input => fa": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.fl.lima.counts:md5,a4ceaa408be334eaa711577e95f8730e" - ] - ], - "1": [ - [ - { - "id": "test" - }, - "test.fl.lima.report:md5,bd4a8bde17471563cf91aab4c787911d" - ] - ], - "10": [ - - ], - "11": [ - - ], - "12": [ - [ - { - "id": "test" - }, - "test.fl.lima.clips:md5,1012bc8874a14836f291bac48e8482a4" - ] - ], - "13": [ - [ - { - "id": "test" - }, - "test.fl.lima.guess:md5,651e5f2b438b8ceadb3e06a2177e1818" - ] - ], - "2": [ - [ - { - "id": "test" - }, - "test.fl.lima.summary:md5,03be2311ba4afb878d8e547ab38c11eb" - ] - ], - "3": [ - "versions.yml:md5,3253df3f697bdcd8cceee60e0b4ebdaf" - ], - "4": [ - [ - { - "id": "test" - }, - "test.fl.NEB_5p--NEB_Clontech_3p.bam:md5,c5d3d376ca7ffc32ef5cbabcc9850804" - ] - ], - "5": [ - [ - { - "id": "test" - }, - "test.fl.NEB_5p--NEB_Clontech_3p.bam.pbi:md5,d1d6a2f961b9fb3d29837555706c59eb" - ] - ], - "6": [ - - ], - "7": [ - - ], - "8": [ - - ], - "9": [ - - ], - "bam": [ - [ - { - "id": "test" - }, - "test.fl.NEB_5p--NEB_Clontech_3p.bam:md5,c5d3d376ca7ffc32ef5cbabcc9850804" - ] - ], - "clips": [ - [ - { - "id": "test" - }, - "test.fl.lima.clips:md5,1012bc8874a14836f291bac48e8482a4" - ] - ], - "counts": [ - [ - { - "id": "test" - }, - "test.fl.lima.counts:md5,a4ceaa408be334eaa711577e95f8730e" - ] - ], - "fasta": [ - - ], - "fastagz": [ - - ], - "fastq": [ - - ], - "fastqgz": [ - - ], - "guess": [ - [ - { - "id": "test" - }, - "test.fl.lima.guess:md5,651e5f2b438b8ceadb3e06a2177e1818" - ] - ], - "json": [ - - ], - "pbi": [ - [ - { - "id": "test" - }, - "test.fl.NEB_5p--NEB_Clontech_3p.bam.pbi:md5,d1d6a2f961b9fb3d29837555706c59eb" - ] - ], - "report": [ - [ - { - "id": "test" - }, - "test.fl.lima.report:md5,bd4a8bde17471563cf91aab4c787911d" - ] - ], - "summary": [ - [ - { - "id": "test" - }, - "test.fl.lima.summary:md5,03be2311ba4afb878d8e547ab38c11eb" - ] - ], - "versions": [ - "versions.yml:md5,3253df3f697bdcd8cceee60e0b4ebdaf" - ], - "xml": [ - - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.10.3" - }, - "timestamp": "2025-01-13T13:16:11.510737" - }, - "fastq": { - "content": [ - [ - - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-01-17T11:35:01.752773" - }, - "LIMA - Primer Removal - Input => fa.gz": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.fl.lima.counts:md5,a4ceaa408be334eaa711577e95f8730e" - ] - ], - "1": [ - [ - { - "id": "test" - }, - "test.fl.lima.report:md5,bd4a8bde17471563cf91aab4c787911d" - ] - ], - "10": [ - - ], - "11": [ - - ], - "12": [ - [ - { - "id": "test" - }, - "test.fl.lima.clips:md5,1012bc8874a14836f291bac48e8482a4" - ] - ], - "13": [ - [ - { - "id": "test" - }, - "test.fl.lima.guess:md5,651e5f2b438b8ceadb3e06a2177e1818" - ] - ], - "2": [ - [ - { - "id": "test" - }, - "test.fl.lima.summary:md5,03be2311ba4afb878d8e547ab38c11eb" - ] - ], - "3": [ - "versions.yml:md5,3253df3f697bdcd8cceee60e0b4ebdaf" - ], - "4": [ - [ - { - "id": "test" - }, - "test.fl.NEB_5p--NEB_Clontech_3p.bam:md5,c5d3d376ca7ffc32ef5cbabcc9850804" - ] - ], - "5": [ - [ - { - "id": "test" - }, - "test.fl.NEB_5p--NEB_Clontech_3p.bam.pbi:md5,d1d6a2f961b9fb3d29837555706c59eb" - ] - ], - "6": [ - - ], - "7": [ - - ], - "8": [ - - ], - "9": [ - - ], - "bam": [ - [ - { - "id": "test" - }, - "test.fl.NEB_5p--NEB_Clontech_3p.bam:md5,c5d3d376ca7ffc32ef5cbabcc9850804" - ] - ], - "clips": [ - [ - { - "id": "test" - }, - "test.fl.lima.clips:md5,1012bc8874a14836f291bac48e8482a4" - ] - ], - "counts": [ - [ - { - "id": "test" - }, - "test.fl.lima.counts:md5,a4ceaa408be334eaa711577e95f8730e" - ] - ], - "fasta": [ - - ], - "fastagz": [ - - ], - "fastq": [ - - ], - "fastqgz": [ - - ], - "guess": [ - [ - { - "id": "test" - }, - "test.fl.lima.guess:md5,651e5f2b438b8ceadb3e06a2177e1818" - ] - ], - "json": [ - - ], - "pbi": [ - [ - { - "id": "test" - }, - "test.fl.NEB_5p--NEB_Clontech_3p.bam.pbi:md5,d1d6a2f961b9fb3d29837555706c59eb" - ] - ], - "report": [ - [ - { - "id": "test" - }, - "test.fl.lima.report:md5,bd4a8bde17471563cf91aab4c787911d" - ] - ], - "summary": [ - [ - { - "id": "test" - }, - "test.fl.lima.summary:md5,03be2311ba4afb878d8e547ab38c11eb" - ] - ], - "versions": [ - "versions.yml:md5,3253df3f697bdcd8cceee60e0b4ebdaf" - ], - "xml": [ - - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.10.3" - }, - "timestamp": "2025-01-13T13:16:21.321812" - } -} \ No newline at end of file diff --git a/modules/nf-core/lima/tests/nextflow.config b/modules/nf-core/lima/tests/nextflow.config deleted file mode 100644 index ac259b70..00000000 --- a/modules/nf-core/lima/tests/nextflow.config +++ /dev/null @@ -1,7 +0,0 @@ -process { - - publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } - ext.args = '--isoseq --peek-guess' - ext.prefix = { "${meta.id}.fl" } - -} diff --git a/modules/nf-core/links/links.diff b/modules/nf-core/links/links.diff new file mode 100644 index 00000000..1386c88a --- /dev/null +++ b/modules/nf-core/links/links.diff @@ -0,0 +1,21 @@ +Changes in component 'nf-core/links' +'modules/nf-core/links/environment.yml' is unchanged +'modules/nf-core/links/meta.yml' is unchanged +Changes in 'links/main.nf': +--- modules/nf-core/links/main.nf ++++ modules/nf-core/links/main.nf +@@ -30,7 +30,8 @@ + script: + def prefix = task.ext.prefix ?: "${meta.id}" + // Currently LINKS does not support more than 4 threads +- def nthreads = "${task.cpus}" < 4 ? "${task.cpus}" : 4 ++ def ncpu = "${task.cpus}".toInteger() ++ def nthreads = ncpu < 4 ? ncpu : 4 + def args = task.ext.args ?: "" + """ + if [[ ${assembly} == *.gz ]]; + +'modules/nf-core/links/tests/nextflow.config' is unchanged +'modules/nf-core/links/tests/main.nf.test' is unchanged +'modules/nf-core/links/tests/main.nf.test.snap' is unchanged +************************************************************ diff --git a/modules/nf-core/links/main.nf b/modules/nf-core/links/main.nf index c55bc661..0034f41c 100644 --- a/modules/nf-core/links/main.nf +++ b/modules/nf-core/links/main.nf @@ -22,7 +22,7 @@ process LINKS { tuple val(meta), path("*.assembly_correspondence.tsv"), emit: assembly_correspondence tuple val(meta), path("*.simplepair_checkpoint.tsv"), emit: simplepair_checkpoint, optional: true tuple val(meta), path("*.tigpair_checkpoint.tsv"), emit: tigpair_checkpoint - path "versions.yml", emit: versions + tuple val("${task.process}"), val('liftoff'), eval("echo \$(LINKS | grep -o 'LINKS v.*' | sed 's/LINKS v//')"), emit: versions_links, topic: versions when: task.ext.when == null || task.ext.when @@ -30,7 +30,8 @@ process LINKS { script: def prefix = task.ext.prefix ?: "${meta.id}" // Currently LINKS does not support more than 4 threads - def nthreads = "${task.cpus}" < 4 ? "${task.cpus}" : 4 + def ncpu = "${task.cpus}".toInteger() + def nthreads = ncpu < 4 ? ncpu : 4 def args = task.ext.args ?: "" """ if [[ ${assembly} == *.gz ]]; @@ -43,7 +44,7 @@ process LINKS { for read_file in ${reads}; do if [[ \$read_file == *.gz ]]; - then + then gzip -dc \$read_file > \$(basename \$read_file .gz) echo \$(basename \$read_file .gz) >> readfile.fof else diff --git a/modules/nf-core/links/meta.yml b/modules/nf-core/links/meta.yml index 852cf2bb..9cdcde4f 100644 --- a/modules/nf-core/links/meta.yml +++ b/modules/nf-core/links/meta.yml @@ -1,10 +1,9 @@ ---- name: "links" description: | LINKS is a genomics application for scaffolding genome assemblies with long reads, - such as those produced by Oxford Nanopore Technologies Ltd. - It can be used to scaffold high-quality draft genome assemblies with any long sequences - (eg. ONT reads, PacBio reads, other draft genomes, etc). + such as those produced by Oxford Nanopore Technologies Ltd. + It can be used to scaffold high-quality draft genome assemblies with any long sequences + (eg. ONT reads, PacBio reads, other draft genomes, etc). It is also used to scaffold contig pairs linked by ARCS/ARKS. This module is for LINKS >=2.0.0 and does not support MPET input. keywords: @@ -18,9 +17,9 @@ tools: documentation: "https://github.com/bcgsc/LINKS" tool_dev_url: "https://github.com/bcgsc/LINKS" doi: "10.1186/s13742-015-0076-3" - licence: ["GPL v3"] + licence: + - "GPL v3" identifier: "" - input: - - meta: type: map @@ -31,6 +30,7 @@ input: type: file description: (Multi-)fasta file containing the draft assembly pattern: "*.{fa,fasta,fa.gz,fasta.gz}" + ontologies: [] - - meta2: type: map description: | @@ -38,12 +38,14 @@ input: e.g. `[ id:'sample1' ]` - reads: type: file - description: fastq file(s) containing the long reads to be used for scaffolding + description: fastq file(s) containing the long reads to be used for + scaffolding pattern: "*.{fq,fastq,fq.gz,fastq.gz}" - + ontologies: + - edam: http://edamontology.org/format_1930 output: - - log: - - meta: + log: + - - meta: type: map description: | Groovy Map containing sample information @@ -52,8 +54,9 @@ output: type: file description: text file; Logs execution time / errors / pairing stats. pattern: "*.log" - - pairing_distribution: - - meta: + ontologies: [] + pairing_distribution: + - - meta: type: map description: | Groovy Map containing sample information @@ -66,8 +69,10 @@ output: within the same contig. 2nd column is the number of pairs at that distance. pattern: "*.pairing_distribution.csv" - - pairing_issues: - - meta: + ontologies: + - edam: http://edamontology.org/format_3752 + pairing_issues: + - - meta: type: map description: | Groovy Map containing sample information @@ -78,8 +83,9 @@ output: text file; Lists all pairing issues encountered between contig pairs and illogical/out-of-bounds pairing. pattern: "*.pairing_issues" - - scaffolds_csv: - - meta: + ontologies: [] + scaffolds_csv: + - - meta: type: map description: | Groovy Map containing sample information @@ -88,8 +94,9 @@ output: type: file description: comma-separated file; containing the new scaffold(s) pattern: "*.scaffolds" - - scaffolds_fasta: - - meta: + ontologies: [] + scaffolds_fasta: + - - meta: type: map description: | Groovy Map containing sample information @@ -98,8 +105,9 @@ output: type: file description: fasta file of the new scaffold sequence pattern: "*.scaffolds.fa" - - bloom: - - meta: + ontologies: [] + bloom: + - - meta: type: map description: | Groovy Map containing sample information @@ -110,8 +118,9 @@ output: Bloom filter created by shredding the -f input into k-mers of size -k pattern: "*.bloom" - - scaffolds_graph: - - meta: + ontologies: [] + scaffolds_graph: + - - meta: type: map description: | Groovy Map containing sample information @@ -119,11 +128,12 @@ output: - "*.gv": type: file description: | - scaffold graph (for visualizing merges), can be rendered + scaffold graph (for visualizing merges), can be rendered in neato, graphviz, etc pattern: "*.gv" - - assembly_correspondence: - - meta: + ontologies: [] + assembly_correspondence: + - - meta: type: map description: | Groovy Map containing sample information @@ -135,18 +145,23 @@ output: contig ID, original_name, #linking kmer pairs, links ratio, gap or overlap pattern: "*.assembly_correspondence.tsv" - - simplepair_checkpoint: - - meta: + ontologies: + - edam: http://edamontology.org/format_3475 + simplepair_checkpoint: + - - meta: type: map description: | Groovy Map containing sample information e.g. `[ id:'sample1']` - "*.simplepair_checkpoint.tsv": type: file - description: checkpoint file, contains info to rebuild datastructure for .gv graph + description: checkpoint file, contains info to rebuild datastructure for + .gv graph pattern: "*.simplepair_checkpoint.tsv" - - tigpair_checkpoint: - - meta: + ontologies: + - edam: http://edamontology.org/format_3475 + tigpair_checkpoint: + - - meta: type: map description: | Groovy Map containing sample information @@ -154,21 +169,38 @@ output: - "*.tigpair_checkpoint.tsv": type: file description: | - if -b BASNAME.tigpair_checkpoint.tsv is present, + if -b BASNAME.tigpair_checkpoint.tsv is present, LINKS will skip the kmer pair extraction and contig pairing stages. Delete this file to force LINKS to start at the beginning. - This file can be used to: - 1) quickly test parameters (-l min. links / -a min. links ratio), + This file can be used to: + 1) quickly test parameters (-l min. links / -a min. links ratio), 2) quickly recover from crash, 3) explore very large kmer spaces, 4) scaffold with output of ARCS pattern: "*.tigpair_checkpoint.tsv" - - versions: - - "versions.yml": - type: file - description: File containing software versions - pattern: "versions.yml" - + ontologies: + - edam: http://edamontology.org/format_3475 + versions_links: + - - ${task.process}: + type: string + description: The name of the process + - liftoff: + type: string + description: The name of the tool + - echo \$(LINKS | grep -o 'LINKS v.*' | sed 's/LINKS v//'): + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - liftoff: + type: string + description: The name of the tool + - echo \$(LINKS | grep -o 'LINKS v.*' | sed 's/LINKS v//'): + type: eval + description: The expression to obtain the version of the tool authors: - "@nschan" maintainers: diff --git a/modules/nf-core/links/tests/main.nf.test b/modules/nf-core/links/tests/main.nf.test index bbffb1dd..b10ecd46 100644 --- a/modules/nf-core/links/tests/main.nf.test +++ b/modules/nf-core/links/tests/main.nf.test @@ -21,7 +21,7 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true), ] input[1] = [ - [ id:'test'], + [ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), ] """ @@ -33,14 +33,14 @@ nextflow_process { { assert process.success }, { assert snapshot( file(process.out.log[0][1]).name, - process.out.pairing_issues, + path(process.out.pairing_issues[0][1]).text == "", process.out.scaffolds_csv, process.out.scaffolds_fasta, process.out.bloom, file(process.out.scaffolds_graph[0][1]).name, process.out.assembly_correspondence, - process.out.tigpair_checkpoint, - process.out.versions + file(process.out.tigpair_checkpoint[0][1]).name, + process.out.findAll { key, val -> key.startsWith('versions') } ).match() } ) @@ -62,7 +62,7 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/scaffolds.fasta', checkIfExists: true), ] input[1] = [ - [ id:'test'], + [ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true), ] """ @@ -74,14 +74,14 @@ nextflow_process { { assert process.success }, { assert snapshot( file(process.out.log[0][1]).name, - process.out.pairing_issues, + file(process.out.pairing_issues[0][1]).text=="", process.out.scaffolds_csv, process.out.scaffolds_fasta, process.out.bloom, file(process.out.scaffolds_graph[0][1]).name, process.out.assembly_correspondence, - process.out.tigpair_checkpoint, - process.out.versions + file(process.out.tigpair_checkpoint[0][1]).text=="", + process.out.findAll { key, val -> key.startsWith('versions') } ).match() } ) @@ -104,7 +104,7 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), ] input[1] = [ - [ id:'test'], + [ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true), ] """ diff --git a/modules/nf-core/links/tests/main.nf.test.snap b/modules/nf-core/links/tests/main.nf.test.snap index 8c19d398..a7870de4 100644 --- a/modules/nf-core/links/tests/main.nf.test.snap +++ b/modules/nf-core/links/tests/main.nf.test.snap @@ -2,14 +2,7 @@ "LINKS - sarscov2 test data - scaffolds": { "content": [ "test.log", - [ - [ - { - "id": "test" - }, - "test.pairing_issues:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], + true, [ [ { @@ -43,23 +36,22 @@ "test.assembly_correspondence.tsv:md5,a65d30663dce705d382df52ab87ca8a4" ] ], - [ - [ - { - "id": "test" - }, - "test.tigpair_checkpoint.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + true, + { + "versions_links": [ + [ + "LINKS", + "liftoff", + "2.0.1" + ] ] - ], - [ - "versions.yml:md5,f58863e433b849b1ef0dfc19cb57656b" - ] + } ], + "timestamp": "2026-02-17T16:35:15.663121765", "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" - }, - "timestamp": "2025-04-25T14:13:53.050775593" + "nf-test": "0.9.4", + "nextflow": "25.04.8" + } }, "LINKS - stub": { "content": [ @@ -81,7 +73,11 @@ ] ], "10": [ - "versions.yml:md5,f58863e433b849b1ef0dfc19cb57656b" + [ + "LINKS", + "liftoff", + "2.0.1" + ] ], "2": [ [ @@ -227,28 +223,25 @@ "test.tigpair_checkpoint.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,f58863e433b849b1ef0dfc19cb57656b" + "versions_links": [ + [ + "LINKS", + "liftoff", + "2.0.1" + ] ] } ], + "timestamp": "2026-02-16T11:20:02.239712752", "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" - }, - "timestamp": "2025-04-11T11:49:53.947870525" + "nf-test": "0.9.4", + "nextflow": "26.01.1" + } }, "LINKS - sarscov2 test data - contigs": { "content": [ "test.log", - [ - [ - { - "id": "test" - }, - "test.pairing_issues:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], + true, [ [ { @@ -282,22 +275,21 @@ "test.assembly_correspondence.tsv:md5,b36e951b0a1bb4b1c1ccd50925392e3d" ] ], - [ - [ - { - "id": "test" - }, - "test.tigpair_checkpoint.tsv:md5,168f2075f524a86216118c7230ad65e9" + "test.tigpair_checkpoint.tsv", + { + "versions_links": [ + [ + "LINKS", + "liftoff", + "2.0.1" + ] ] - ], - [ - "versions.yml:md5,f58863e433b849b1ef0dfc19cb57656b" - ] + } ], + "timestamp": "2026-02-17T10:53:57.921992204", "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" - }, - "timestamp": "2025-04-25T14:07:49.212617595" + "nf-test": "0.9.4", + "nextflow": "25.04.8" + } } } \ No newline at end of file diff --git a/modules/nf-core/merqury/merqury/main.nf b/modules/nf-core/merqury/merqury/main.nf index 13e07e88..5d3871a5 100644 --- a/modules/nf-core/merqury/merqury/main.nf +++ b/modules/nf-core/merqury/merqury/main.nf @@ -16,19 +16,20 @@ process MERQURY_MERQURY { tuple val(meta), path("*_only.wig") , emit: assembly_only_kmers_wig tuple val(meta), path("*.completeness.stats"), emit: stats tuple val(meta), path("*.dist_only.hist") , emit: dist_hist - tuple val(meta), path("*.spectra-cn.fl.png") , emit: spectra_cn_fl_png, optional: true // optional to make full_test pass, where this file is not created. - tuple val(meta), path("*.spectra-cn.hist") , emit: spectra_cn_hist, optional: true // optional to make full_test pass, where this file is not created. - tuple val(meta), path("*.spectra-cn.ln.png") , emit: spectra_cn_ln_png, optional: true // optional to make full_test pass, where this file is not created. - tuple val(meta), path("*.spectra-cn.st.png") , emit: spectra_cn_st_png, optional: true // optional to make full_test pass, where this file is not created. - tuple val(meta), path("*.spectra-asm.fl.png"), emit: spectra_asm_fl_png, optional: true // optional to make full_test pass, where this file is not created. - tuple val(meta), path("*.spectra-asm.hist") , emit: spectra_asm_hist, optional: true // optional to make full_test pass, where this file is not created. - tuple val(meta), path("*.spectra-asm.ln.png"), emit: spectra_asm_ln_png, optional: true // optional to make full_test pass, where this file is not created. - tuple val(meta), path("*.spectra-asm.st.png"), emit: spectra_asm_st_png, optional: true // optional to make full_test pass, where this file is not created. - tuple val(meta), path("${prefix}.qv") , emit: assembly_qv - tuple val(meta), path("${prefix}.*.qv") , emit: scaffold_qv, optional: true // optional to make full_test pass, where this file is not created. - tuple val(meta), path("*.hist.ploidy") , emit: read_ploidy, optional: true // optional to make full_test pass, where this file is not created. - tuple val(meta), path("*.hapmers.blob.png") , emit: hapmers_blob_png, optional: true - path "versions.yml" , emit: versions + tuple val(meta), path("*.spectra-cn.fl.png") , emit: spectra_cn_fl_png, optional: true // optional to make full_test pass, where this file is not created. + tuple val(meta), path("*.spectra-cn.hist") , emit: spectra_cn_hist, optional: true // optional to make full_test pass, where this file is not created. + tuple val(meta), path("*.spectra-cn.ln.png") , emit: spectra_cn_ln_png, optional: true // optional to make full_test pass, where this file is not created. + tuple val(meta), path("*.spectra-cn.st.png") , emit: spectra_cn_st_png, optional: true // optional to make full_test pass, where this file is not created. + tuple val(meta), path("*.spectra-asm.fl.png"), emit: spectra_asm_fl_png, optional: true // optional to make full_test pass, where this file is not created. + tuple val(meta), path("*.spectra-asm.hist") , emit: spectra_asm_hist, optional: true // optional to make full_test pass, where this file is not created. + tuple val(meta), path("*.spectra-asm.ln.png"), emit: spectra_asm_ln_png, optional: true // optional to make full_test pass, where this file is not created. + tuple val(meta), path("*.spectra-asm.st.png"), emit: spectra_asm_st_png, optional: true // optional to make full_test pass, where this file is not created. + tuple val(meta), path("${prefix}.qv") , emit: assembly_qv, optional: true // optional to make full_test pass, where this file is not created. + tuple val(meta), path("${prefix}.*.qv") , emit: scaffold_qv, optional: true // optional to make full_test pass, where this file is not created. + tuple val(meta), path("*.hist.ploidy") , emit: read_ploidy, optional: true // optional to make full_test pass, where this file is not created. + tuple val(meta), path("*.hapmers.blob.png") , emit: hapmers_blob_png , optional: true + // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + tuple val("${task.process}"), val('merqury'), val('1.3'), emit: versions_merqury, topic: versions when: task.ext.when == null || task.ext.when @@ -36,7 +37,6 @@ process MERQURY_MERQURY { script: // def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = 1.3 // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ # Nextflow changes the container --entrypoint to /bin/bash (container default entrypoint: /usr/local/env-execute) # Check for container variable initialisation script and source it. @@ -52,11 +52,6 @@ process MERQURY_MERQURY { $meryl_db \\ $assembly \\ $prefix - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - merqury: $VERSION - END_VERSIONS """ stub: @@ -78,10 +73,5 @@ process MERQURY_MERQURY { touch ${prefix}.qv touch ${prefix}.${prefix}.qv touch ${prefix}.hist.ploidy - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - merqury: $VERSION - END_VERSIONS """ } diff --git a/modules/nf-core/merqury/merqury/merqury-merqury.diff b/modules/nf-core/merqury/merqury/merqury-merqury.diff index a0a65d32..9da9985a 100644 --- a/modules/nf-core/merqury/merqury/merqury-merqury.diff +++ b/modules/nf-core/merqury/merqury/merqury-merqury.diff @@ -4,7 +4,7 @@ Changes in component 'nf-core/merqury/merqury' Changes in 'merqury/merqury/main.nf': --- modules/nf-core/merqury/merqury/main.nf +++ modules/nf-core/merqury/merqury/main.nf -@@ -16,18 +16,18 @@ +@@ -16,17 +16,17 @@ tuple val(meta), path("*_only.wig") , emit: assembly_only_kmers_wig tuple val(meta), path("*.completeness.stats"), emit: stats tuple val(meta), path("*.dist_only.hist") , emit: dist_hist @@ -16,24 +16,23 @@ Changes in 'merqury/merqury/main.nf': - tuple val(meta), path("*.spectra-asm.hist") , emit: spectra_asm_hist - tuple val(meta), path("*.spectra-asm.ln.png"), emit: spectra_asm_ln_png - tuple val(meta), path("*.spectra-asm.st.png"), emit: spectra_asm_st_png -+ tuple val(meta), path("*.spectra-cn.fl.png") , emit: spectra_cn_fl_png, optional: true // optional to make full_test pass, where this file is not created. -+ tuple val(meta), path("*.spectra-cn.hist") , emit: spectra_cn_hist, optional: true // optional to make full_test pass, where this file is not created. -+ tuple val(meta), path("*.spectra-cn.ln.png") , emit: spectra_cn_ln_png, optional: true // optional to make full_test pass, where this file is not created. -+ tuple val(meta), path("*.spectra-cn.st.png") , emit: spectra_cn_st_png, optional: true // optional to make full_test pass, where this file is not created. -+ tuple val(meta), path("*.spectra-asm.fl.png"), emit: spectra_asm_fl_png, optional: true // optional to make full_test pass, where this file is not created. -+ tuple val(meta), path("*.spectra-asm.hist") , emit: spectra_asm_hist, optional: true // optional to make full_test pass, where this file is not created. -+ tuple val(meta), path("*.spectra-asm.ln.png"), emit: spectra_asm_ln_png, optional: true // optional to make full_test pass, where this file is not created. -+ tuple val(meta), path("*.spectra-asm.st.png"), emit: spectra_asm_st_png, optional: true // optional to make full_test pass, where this file is not created. - tuple val(meta), path("${prefix}.qv") , emit: assembly_qv +- tuple val(meta), path("${prefix}.qv") , emit: assembly_qv - tuple val(meta), path("${prefix}.*.qv") , emit: scaffold_qv - tuple val(meta), path("*.hist.ploidy") , emit: read_ploidy -- tuple val(meta), path("*.hapmers.blob.png") , emit: hapmers_blob_png , optional: true -+ tuple val(meta), path("${prefix}.*.qv") , emit: scaffold_qv, optional: true // optional to make full_test pass, where this file is not created. -+ tuple val(meta), path("*.hist.ploidy") , emit: read_ploidy, optional: true // optional to make full_test pass, where this file is not created. -+ tuple val(meta), path("*.hapmers.blob.png") , emit: hapmers_blob_png, optional: true - path "versions.yml" , emit: versions - - when: ++ tuple val(meta), path("*.spectra-cn.fl.png") , emit: spectra_cn_fl_png, optional: true // optional to make full_test pass, where this file is not created. ++ tuple val(meta), path("*.spectra-cn.hist") , emit: spectra_cn_hist, optional: true // optional to make full_test pass, where this file is not created. ++ tuple val(meta), path("*.spectra-cn.ln.png") , emit: spectra_cn_ln_png, optional: true // optional to make full_test pass, where this file is not created. ++ tuple val(meta), path("*.spectra-cn.st.png") , emit: spectra_cn_st_png, optional: true // optional to make full_test pass, where this file is not created. ++ tuple val(meta), path("*.spectra-asm.fl.png"), emit: spectra_asm_fl_png, optional: true // optional to make full_test pass, where this file is not created. ++ tuple val(meta), path("*.spectra-asm.hist") , emit: spectra_asm_hist, optional: true // optional to make full_test pass, where this file is not created. ++ tuple val(meta), path("*.spectra-asm.ln.png"), emit: spectra_asm_ln_png, optional: true // optional to make full_test pass, where this file is not created. ++ tuple val(meta), path("*.spectra-asm.st.png"), emit: spectra_asm_st_png, optional: true // optional to make full_test pass, where this file is not created. ++ tuple val(meta), path("${prefix}.qv") , emit: assembly_qv, optional: true // optional to make full_test pass, where this file is not created. ++ tuple val(meta), path("${prefix}.*.qv") , emit: scaffold_qv, optional: true // optional to make full_test pass, where this file is not created. ++ tuple val(meta), path("*.hist.ploidy") , emit: read_ploidy, optional: true // optional to make full_test pass, where this file is not created. + tuple val(meta), path("*.hapmers.blob.png") , emit: hapmers_blob_png , optional: true + // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + tuple val("${task.process}"), val('merqury'), val('1.3'), emit: versions_merqury, topic: versions 'modules/nf-core/merqury/merqury/tests/main.nf.test' is unchanged 'modules/nf-core/merqury/merqury/tests/main.nf.test.snap' is unchanged diff --git a/modules/nf-core/merqury/merqury/meta.yml b/modules/nf-core/merqury/merqury/meta.yml index 7e8d875a..c4ef77c8 100644 --- a/modules/nf-core/merqury/merqury/meta.yml +++ b/modules/nf-core/merqury/merqury/meta.yml @@ -9,7 +9,8 @@ tools: description: "Evaluate genome assemblies with k-mers and more." tool_dev_url: "https://github.com/marbl/merqury" doi: "10.1186/s13059-020-02134-9" - licence: ["PUBLIC DOMAIN"] + licence: + - "PUBLIC DOMAIN" identifier: biotools:merqury input: - - meta: @@ -20,12 +21,14 @@ input: - meryl_db: type: file description: "Meryl read database" + ontologies: [] - assembly: type: file description: FASTA assembly file + ontologies: [] output: - - assembly_only_kmers_bed: - - meta: + assembly_only_kmers_bed: + - - meta: type: map description: | Groovy Map containing sample information @@ -35,8 +38,9 @@ output: description: "The positions of the k-mers found only in an assembly for further investigation in .bed" pattern: "*_only.bed" - - assembly_only_kmers_wig: - - meta: + ontologies: [] + assembly_only_kmers_wig: + - - meta: type: map description: | Groovy Map containing sample information @@ -46,8 +50,9 @@ output: description: "The positions of the k-mers found only in an assembly for further investigation in .wig" pattern: "*_only.wig" - - stats: - - meta: + ontologies: [] + stats: + - - meta: type: map description: | Groovy Map containing sample information @@ -56,8 +61,9 @@ output: type: file description: Assembly statistics file pattern: "*.completeness.stats" - - dist_hist: - - meta: + ontologies: [] + dist_hist: + - - meta: type: map description: | Groovy Map containing sample information @@ -66,8 +72,9 @@ output: type: file description: Histogram pattern: "*.dist_only.hist" - - spectra_cn_fl_png: - - meta: + ontologies: [] + spectra_cn_fl_png: + - - meta: type: map description: | Groovy Map containing sample information @@ -76,8 +83,9 @@ output: type: file description: "Unstacked copy number spectra filled plot in PNG format" pattern: "*.spectra-cn.fl.png" - - spectra_cn_hist: - - meta: + ontologies: [] + spectra_cn_hist: + - - meta: type: map description: | Groovy Map containing sample information @@ -86,8 +94,9 @@ output: type: file description: "Copy number spectra histogram" pattern: "*.spectra-cn.hist" - - spectra_cn_ln_png: - - meta: + ontologies: [] + spectra_cn_ln_png: + - - meta: type: map description: | Groovy Map containing sample information @@ -96,8 +105,9 @@ output: type: file description: "Unstacked copy number spectra line plot in PNG format" pattern: "*.spectra-cn.ln.png" - - spectra_cn_st_png: - - meta: + ontologies: [] + spectra_cn_st_png: + - - meta: type: map description: | Groovy Map containing sample information @@ -106,8 +116,9 @@ output: type: file description: "Stacked copy number spectra line plot in PNG format" pattern: "*.spectra-cn.st.png" - - spectra_asm_fl_png: - - meta: + ontologies: [] + spectra_asm_fl_png: + - - meta: type: map description: | Groovy Map containing sample information @@ -116,8 +127,9 @@ output: type: file description: "Unstacked assembly spectra filled plot in PNG format" pattern: "*.spectra-asm.fl.png" - - spectra_asm_hist: - - meta: + ontologies: [] + spectra_asm_hist: + - - meta: type: map description: | Groovy Map containing sample information @@ -126,8 +138,9 @@ output: type: file description: "Assembly spectra histogram" pattern: "*.spectra-asm.hist" - - spectra_asm_ln_png: - - meta: + ontologies: [] + spectra_asm_ln_png: + - - meta: type: map description: | Groovy Map containing sample information @@ -136,8 +149,9 @@ output: type: file description: "Unstacked assembly spectra line plot in PNG format" pattern: "*.spectra-asm.ln.png" - - spectra_asm_st_png: - - meta: + ontologies: [] + spectra_asm_st_png: + - - meta: type: map description: | Groovy Map containing sample information @@ -146,8 +160,9 @@ output: type: file description: "Stacked assembly spectra line plot in PNG format" pattern: "*.spectra-asm.st.png" - - assembly_qv: - - meta: + ontologies: [] + assembly_qv: + - - meta: type: map description: | Groovy Map containing sample information @@ -156,8 +171,9 @@ output: type: file description: "Assembly consensus quality estimation" pattern: "*.qv" - - scaffold_qv: - - meta: + ontologies: [] + scaffold_qv: + - - meta: type: map description: | Groovy Map containing sample information @@ -166,8 +182,9 @@ output: type: file description: "Scaffold consensus quality estimation" pattern: "*.qv" - - read_ploidy: - - meta: + ontologies: [] + read_ploidy: + - - meta: type: map description: | Groovy Map containing sample information @@ -176,8 +193,9 @@ output: type: file description: "Ploidy estimate from read k-mer database" pattern: "*.hist.ploidy" - - hapmers_blob_png: - - meta: + ontologies: [] + hapmers_blob_png: + - - meta: type: map description: | Groovy Map containing sample information @@ -186,11 +204,28 @@ output: type: file description: "Hap-mer blob plot" pattern: "*.hapmers.blob.png" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions_merqury: + - - ${task.process}: + type: string + description: The name of the process + - merqury: + type: string + description: The name of the tool + - '"1.3"': + type: string + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - merqury: + type: string + description: The name of the tool + - '"1.3"': + type: string + description: The expression to obtain the version of the tool authors: - "@mahesh-panchal" maintainers: diff --git a/modules/nf-core/merqury/merqury/tests/main.nf.test b/modules/nf-core/merqury/merqury/tests/main.nf.test index 46a07c02..7d961bb8 100644 --- a/modules/nf-core/merqury/merqury/tests/main.nf.test +++ b/modules/nf-core/merqury/merqury/tests/main.nf.test @@ -66,7 +66,7 @@ nextflow_process { process.out.assembly_qv, process.out.scaffold_qv, process.out.read_ploidy, - process.out.versions, + process.out.findAll { key, val -> key.startsWith('versions') }, file(process.out.spectra_cn_fl_png[0][1]).name, file(process.out.spectra_cn_ln_png[0][1]).name, file(process.out.spectra_cn_st_png[0][1]).name, @@ -146,7 +146,7 @@ nextflow_process { process.out.assembly_qv, process.out.scaffold_qv, process.out.read_ploidy, - process.out.versions, + process.out.findAll { key, val -> key.startsWith('versions') }, process.out.spectra_cn_fl_png[0][1] .collect { file(it).name }.join(','), process.out.spectra_cn_ln_png[0][1] .collect { file(it).name }.join(','), process.out.spectra_cn_st_png[0][1] .collect { file(it).name }.join(','), @@ -186,4 +186,4 @@ nextflow_process { } -} \ No newline at end of file +} diff --git a/modules/nf-core/merqury/merqury/tests/main.nf.test.snap b/modules/nf-core/merqury/merqury/tests/main.nf.test.snap index 4081b60f..2c52cd0f 100644 --- a/modules/nf-core/merqury/merqury/tests/main.nf.test.snap +++ b/modules/nf-core/merqury/merqury/tests/main.nf.test.snap @@ -69,7 +69,11 @@ ], "16": [ - "versions.yml:md5,825a4c61369638389227eee16dfb08b5" + [ + "MERQURY_MERQURY", + "merqury", + "1.3" + ] ], "2": [ [ @@ -281,16 +285,20 @@ "test.completeness.stats:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,825a4c61369638389227eee16dfb08b5" + "versions_merqury": [ + [ + "MERQURY_MERQURY", + "merqury", + "1.3" + ] ] } ], + "timestamp": "2026-02-16T14:11:24.534630119", "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-05-22T21:00:35.907142" + "nf-test": "0.9.4", + "nextflow": "26.01.1" + } }, "homo_sapiens-genome": { "content": [ @@ -375,9 +383,15 @@ "test.unionsum.hist.ploidy:md5,3fe5dc377d8c9562980e1b93d01cac94" ] ], - [ - "versions.yml:md5,825a4c61369638389227eee16dfb08b5" - ], + { + "versions_merqury": [ + [ + "MERQURY_MERQURY", + "merqury", + "1.3" + ] + ] + }, "test.genome.spectra-cn.fl.png", "test.genome.spectra-cn.ln.png", "test.genome.spectra-cn.st.png", @@ -385,11 +399,11 @@ "test.spectra-asm.ln.png", "test.spectra-asm.st.png" ], + "timestamp": "2026-02-16T15:50:35.075502455", "meta": { - "nf-test": "0.9.1", - "nextflow": "24.10.0" - }, - "timestamp": "2024-11-04T12:42:32.360126893" + "nf-test": "0.9.4", + "nextflow": "25.04.8" + } }, "homo_sapiens-genome-trio": { "content": [ @@ -474,9 +488,15 @@ "test.unionsum.hist.ploidy:md5,3fe5dc377d8c9562980e1b93d01cac94" ] ], - [ - "versions.yml:md5,825a4c61369638389227eee16dfb08b5" - ], + { + "versions_merqury": [ + [ + "MERQURY_MERQURY", + "merqury", + "1.3" + ] + ] + }, "test.genome.spectra-cn.fl.png,test.genome.test.test2_1.spectra-cn.fl.png,test.genome.test.test2_2.spectra-cn.fl.png", "test.genome.spectra-cn.ln.png,test.genome.test.test2_1.spectra-cn.ln.png,test.genome.test.test2_2.spectra-cn.ln.png", "test.genome.spectra-cn.st.png,test.genome.test.test2_1.spectra-cn.st.png,test.genome.test.test2_2.spectra-cn.st.png", @@ -485,10 +505,10 @@ "test.spectra-asm.st.png", "test.hapmers.blob.png" ], + "timestamp": "2026-02-16T15:52:37.264260087", "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.2" - }, - "timestamp": "2024-12-04T20:06:48.980375307" + "nf-test": "0.9.4", + "nextflow": "25.04.8" + } } } \ No newline at end of file diff --git a/modules/nf-core/meryl/count/environment.yml b/modules/nf-core/meryl/count/environment.yml index deebca1f..84d58043 100644 --- a/modules/nf-core/meryl/count/environment.yml +++ b/modules/nf-core/meryl/count/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::meryl=1.4.1 + - bioconda::meryl=1.4.1=h4ac6f70_0 diff --git a/modules/nf-core/meryl/count/main.nf b/modules/nf-core/meryl/count/main.nf index c90079d6..93c4ec0d 100644 --- a/modules/nf-core/meryl/count/main.nf +++ b/modules/nf-core/meryl/count/main.nf @@ -4,8 +4,8 @@ process MERYL_COUNT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/meryl:1.4.1--h4ac6f70_1': - 'biocontainers/meryl:1.4.1--h4ac6f70_1' }" + 'https://depot.galaxyproject.org/singularity/meryl:1.4.1--h4ac6f70_0': + 'biocontainers/meryl:1.4.1--h4ac6f70_0' }" input: tuple val(meta), path(reads) @@ -13,7 +13,8 @@ process MERYL_COUNT { output: tuple val(meta), path("*.meryl") , emit: meryl_db - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('meryl'), eval("meryl --version |& sed 's/meryl //'"), emit: versions_meryl, topic: versions + when: task.ext.when == null || task.ext.when @@ -32,11 +33,6 @@ process MERYL_COUNT { \$READ \\ output ${prefix}.\${READ%.f*}.meryl done - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - meryl: \$( meryl --version |& sed -n 's/.* \\([a-f0-9]\\{40\\}\\))/\\1/p' ) - END_VERSIONS """ stub: @@ -45,10 +41,5 @@ process MERYL_COUNT { for READ in ${reads}; do touch ${prefix}.\${READ%.f*}.meryl done - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - meryl: \$( meryl --version |& sed -n 's/.* \\([a-f0-9]\\{40\\}\\))/\\1/p' ) - END_VERSIONS """ } diff --git a/modules/nf-core/meryl/count/meta.yml b/modules/nf-core/meryl/count/meta.yml index a110a610..080eef04 100644 --- a/modules/nf-core/meryl/count/meta.yml +++ b/modules/nf-core/meryl/count/meta.yml @@ -10,7 +10,8 @@ tools: homepage: "https://github.com/marbl/meryl" documentation: "https://meryl.readthedocs.io/en/latest/quick-start.html" tool_dev_url: "https://github.com/marbl/meryl" - licence: ["GPL"] + licence: + - "GPL" identifier: biotools:meryl input: - - meta: @@ -23,12 +24,13 @@ input: description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. - - - kvalue: - type: integer - description: An integer value of k to use as the k-mer value. + ontologies: [] + - kvalue: + type: integer + description: An integer value of k to use as the k-mer value. output: - - meryl_db: - - meta: + meryl_db: + - - meta: type: map description: | Groovy Map containing sample information @@ -37,11 +39,27 @@ output: type: directory description: A Meryl k-mer database pattern: "*.meryl" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + versions_meryl: + - - ${task.process}: + type: string + description: The name of the process + - meryl: + type: string + description: The name of the tool + - meryl --version |& sed 's/meryl //': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - meryl: + type: string + description: The name of the tool + - meryl --version |& sed 's/meryl //': + type: eval + description: The expression to obtain the version of the tool authors: - "@mahesh-panchal" maintainers: diff --git a/modules/nf-core/meryl/count/tests/main.nf.test.snap b/modules/nf-core/meryl/count/tests/main.nf.test.snap index 2e94c03b..ccbb599f 100644 --- a/modules/nf-core/meryl/count/tests/main.nf.test.snap +++ b/modules/nf-core/meryl/count/tests/main.nf.test.snap @@ -142,7 +142,11 @@ ] ], "1": [ - "versions.yml:md5,f1c1f87947a64d681c3f0678036cafeb" + [ + "MERYL_COUNT", + "meryl", + "1.4.1" + ] ], "meryl_db": [ [ @@ -283,16 +287,20 @@ ] ] ], - "versions": [ - "versions.yml:md5,f1c1f87947a64d681c3f0678036cafeb" + "versions_meryl": [ + [ + "MERYL_COUNT", + "meryl", + "1.4.1" + ] ] } ], + "timestamp": "2026-02-17T14:13:53.351513294", "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.2" - }, - "timestamp": "2024-12-04T12:27:45.159763589" + "nf-test": "0.9.4", + "nextflow": "25.04.8" + } }, "bacteroides_fragilis - fastq - stub": { "content": [ @@ -307,7 +315,11 @@ ] ], "1": [ - "versions.yml:md5,f1c1f87947a64d681c3f0678036cafeb" + [ + "MERYL_COUNT", + "meryl", + "1.4.1" + ] ], "meryl_db": [ [ @@ -318,15 +330,19 @@ "test.test1_1.meryl:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,f1c1f87947a64d681c3f0678036cafeb" + "versions_meryl": [ + [ + "MERYL_COUNT", + "meryl", + "1.4.1" + ] ] } ], + "timestamp": "2026-02-17T14:14:00.895730474", "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.2" - }, - "timestamp": "2024-12-04T20:07:50.042776716" + "nf-test": "0.9.4", + "nextflow": "25.04.8" + } } } \ No newline at end of file diff --git a/modules/nf-core/meryl/unionsum/environment.yml b/modules/nf-core/meryl/unionsum/environment.yml index deebca1f..84d58043 100644 --- a/modules/nf-core/meryl/unionsum/environment.yml +++ b/modules/nf-core/meryl/unionsum/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::meryl=1.4.1 + - bioconda::meryl=1.4.1=h4ac6f70_0 diff --git a/modules/nf-core/meryl/unionsum/main.nf b/modules/nf-core/meryl/unionsum/main.nf index bc2853b0..8bf3c21b 100644 --- a/modules/nf-core/meryl/unionsum/main.nf +++ b/modules/nf-core/meryl/unionsum/main.nf @@ -13,7 +13,7 @@ process MERYL_UNIONSUM { output: tuple val(meta), path("*.unionsum.meryl"), emit: meryl_db - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('meryl'), eval("meryl --version |& sed 's/meryl //'"), emit: versions_meryl, topic: versions when: task.ext.when == null || task.ext.when @@ -29,11 +29,6 @@ process MERYL_UNIONSUM { $args \\ output ${prefix}.unionsum.meryl \\ $meryl_dbs - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - meryl: \$( meryl --version |& sed 's/meryl //' ) - END_VERSIONS """ stub: @@ -41,10 +36,5 @@ process MERYL_UNIONSUM { def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}.unionsum.meryl - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - meryl: \$( meryl --version |& sed 's/meryl //' ) - END_VERSIONS """ } diff --git a/modules/nf-core/meryl/unionsum/meta.yml b/modules/nf-core/meryl/unionsum/meta.yml index e9e13051..add02adf 100644 --- a/modules/nf-core/meryl/unionsum/meta.yml +++ b/modules/nf-core/meryl/unionsum/meta.yml @@ -10,7 +10,8 @@ tools: homepage: "https://github.com/marbl/meryl" documentation: "https://meryl.readthedocs.io/en/latest/quick-start.html" tool_dev_url: "https://github.com/marbl/meryl" - licence: ["GPL"] + licence: + - "GPL" identifier: biotools:meryl input: - - meta: @@ -21,25 +22,42 @@ input: - meryl_dbs: type: directory description: Meryl k-mer databases - - - kvalue: - type: integer - description: An integer value of k to use as the k-mer value. + - kvalue: + type: integer + description: An integer value of k to use as the k-mer value. output: - - meryl_db: - - meta: + meryl_db: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - "*.unionsum.meryl": type: directory - description: A Meryl k-mer database that is the union sum of the input databases + description: A Meryl k-mer database that is the union sum of the input + databases pattern: "*.unionsum.meryl" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + versions_meryl: + - - ${task.process}: + type: string + description: The name of the process + - meryl: + type: string + description: The name of the tool + - meryl --version |& sed 's/meryl //': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - meryl: + type: string + description: The name of the tool + - meryl --version |& sed 's/meryl //': + type: eval + description: The expression to obtain the version of the tool authors: - "@mahesh-panchal" maintainers: diff --git a/modules/nf-core/meryl/unionsum/tests/main.nf.test.snap b/modules/nf-core/meryl/unionsum/tests/main.nf.test.snap index fd29eb18..923bac5a 100644 --- a/modules/nf-core/meryl/unionsum/tests/main.nf.test.snap +++ b/modules/nf-core/meryl/unionsum/tests/main.nf.test.snap @@ -12,7 +12,11 @@ ] ], "1": [ - "versions.yml:md5,c97980ac5ebd37a77768c105861ad719" + [ + "MERYL_UNIONSUM", + "meryl", + "1.4.1" + ] ], "meryl_db": [ [ @@ -23,16 +27,20 @@ "test.unionsum.meryl:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,c97980ac5ebd37a77768c105861ad719" + "versions_meryl": [ + [ + "MERYL_UNIONSUM", + "meryl", + "1.4.1" + ] ] } ], + "timestamp": "2026-02-17T14:05:04.447542381", "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-05-22T12:40:21.306142" + "nf-test": "0.9.4", + "nextflow": "25.04.8" + } }, "sarscov2 - fastq - single_end": { "content": [ @@ -177,7 +185,11 @@ ] ], "1": [ - "versions.yml:md5,c97980ac5ebd37a77768c105861ad719" + [ + "MERYL_UNIONSUM", + "meryl", + "1.4.1" + ] ], "meryl_db": [ [ @@ -318,16 +330,20 @@ ] ] ], - "versions": [ - "versions.yml:md5,c97980ac5ebd37a77768c105861ad719" + "versions_meryl": [ + [ + "MERYL_UNIONSUM", + "meryl", + "1.4.1" + ] ] } ], + "timestamp": "2026-02-17T14:04:44.447203629", "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-27T10:19:25.091170112" + "nf-test": "0.9.4", + "nextflow": "25.04.8" + } }, "sarscov2 - fastq - paired_end": { "content": [ @@ -472,7 +488,11 @@ ] ], "1": [ - "versions.yml:md5,c97980ac5ebd37a77768c105861ad719" + [ + "MERYL_UNIONSUM", + "meryl", + "1.4.1" + ] ], "meryl_db": [ [ @@ -613,15 +633,19 @@ ] ] ], - "versions": [ - "versions.yml:md5,c97980ac5ebd37a77768c105861ad719" + "versions_meryl": [ + [ + "MERYL_UNIONSUM", + "meryl", + "1.4.1" + ] ] } ], + "timestamp": "2026-02-17T14:04:55.83723977", "meta": { - "nf-test": "0.9.1", - "nextflow": "24.10.0" - }, - "timestamp": "2024-11-04T12:45:34.142906416" + "nf-test": "0.9.4", + "nextflow": "25.04.8" + } } } \ No newline at end of file diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf index 50e3ecf9..b5748313 100644 --- a/modules/nf-core/minimap2/align/main.nf +++ b/modules/nf-core/minimap2/align/main.nf @@ -19,7 +19,7 @@ process MINIMAP2_ALIGN { tuple val(meta), path("*.paf") , optional: true, emit: paf tuple val(meta), path("*.bam") , optional: true, emit: bam tuple val(meta), path("*.bam.${bam_index_extension}"), optional: true, emit: index - path "versions.yml" , emit: versions + tuple val("${task.process}"), val("minimap2"), eval("minimap2 --version"), topic: versions, emit: versions_minimap2 when: task.ext.when == null || task.ext.when @@ -37,25 +37,17 @@ process MINIMAP2_ALIGN { def bam_input = "${reads.extension}".matches('sam|bam|cram') def samtools_reset_fastq = bam_input ? "samtools reset --threads ${task.cpus-1} $args3 $reads | samtools fastq --threads ${task.cpus-1} $args4 |" : '' def query = bam_input ? "-" : reads - def target = reference ?: (bam_input ? error("BAM input requires reference") : reads) - + def target = reference ?: (bam_input ? error("Error: minimap2/align BAM input mode requires reference") : reads) """ $samtools_reset_fastq \\ minimap2 \\ - $args \\ - -t $task.cpus \\ - $target \\ - $query \\ - $cigar_paf \\ - $set_cigar_bam \\ - $bam_output - - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - minimap2: \$(minimap2 --version 2>&1) - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS + ${args} \\ + -t ${task.cpus} \\ + ${target} \\ + ${query} \\ + ${cigar_paf} \\ + ${set_cigar_bam} \\ + ${bam_output} """ stub: @@ -63,15 +55,11 @@ process MINIMAP2_ALIGN { def output_file = bam_format ? "${prefix}.bam" : "${prefix}.paf" def bam_index = bam_index_extension ? "touch ${prefix}.bam.${bam_index_extension}" : "" def bam_input = "${reads.extension}".matches('sam|bam|cram') - def target = reference ?: (bam_input ? error("BAM input requires reference") : reads) - + if(bam_input && !reference) { + error("Error: minimap2/align BAM input mode requires reference!") + } """ touch $output_file ${bam_index} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - minimap2: \$(minimap2 --version 2>&1) - END_VERSIONS """ } diff --git a/modules/nf-core/minimap2/align/meta.yml b/modules/nf-core/minimap2/align/meta.yml index a4cfc891..40bb20ad 100644 --- a/modules/nf-core/minimap2/align/meta.yml +++ b/modules/nf-core/minimap2/align/meta.yml @@ -26,6 +26,7 @@ input: description: | List of input FASTA or FASTQ files of size 1 and 2 for single-end and paired-end data, respectively. + ontologies: [] - - meta2: type: map description: | @@ -35,23 +36,24 @@ input: type: file description: | Reference database in FASTA format. - - - bam_format: - type: boolean - description: Specify that output should be in BAM format - - - bam_index_extension: - type: string - description: BAM alignment index extension (e.g. "bai") - - - cigar_paf_format: - type: boolean - description: Specify that output CIGAR should be in PAF format - - - cigar_bam: - type: boolean - description: | - Write CIGAR with >65535 ops at the CG tag. This is recommended when - doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations) + ontologies: [] + - bam_format: + type: boolean + description: Specify that output should be in BAM format + - bam_index_extension: + type: string + description: BAM alignment index extension (e.g. "bai") + - cigar_paf_format: + type: boolean + description: Specify that output CIGAR should be in PAF format + - cigar_bam: + type: boolean + description: | + Write CIGAR with >65535 ops at the CG tag. This is recommended when + doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations) output: - - paf: - - meta: + paf: + - - meta: type: map description: | Groovy Map containing sample information @@ -60,8 +62,9 @@ output: type: file description: Alignment in PAF format pattern: "*.paf" - - bam: - - meta: + ontologies: [] + bam: + - - meta: type: map description: | Groovy Map containing sample information @@ -70,8 +73,9 @@ output: type: file description: Alignment in BAM format pattern: "*.bam" - - index: - - meta: + ontologies: [] + index: + - - meta: type: map description: | Groovy Map containing sample information @@ -80,11 +84,28 @@ output: type: file description: BAM alignment index pattern: "*.bam.*" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions_minimap2: + - - ${task.process}: + type: string + description: The process name + - minimap2: + type: string + description: The tool name + - minimap2 --version: + type: eval + description: The tool version +topics: + versions: + - - ${task.process}: + type: string + description: The process name + - minimap2: + type: string + description: The tool name + - minimap2 --version: + type: eval + description: The tool version authors: - "@heuermh" - "@sofstam" diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test b/modules/nf-core/minimap2/align/tests/main.nf.test index 4072c171..34597d6f 100644 --- a/modules/nf-core/minimap2/align/tests/main.nf.test +++ b/modules/nf-core/minimap2/align/tests/main.nf.test @@ -36,7 +36,7 @@ nextflow_process { { assert snapshot( bam(process.out.bam[0][1]).getHeader(), bam(process.out.bam[0][1]).getReadsMD5(), - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions_") } ).match() } ) } @@ -71,7 +71,7 @@ nextflow_process { bam(process.out.bam[0][1]).getHeader(), bam(process.out.bam[0][1]).getReadsMD5(), file(process.out.index[0][1]).name, - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions_") } ).match() } ) } @@ -108,7 +108,7 @@ nextflow_process { { assert snapshot( bam(process.out.bam[0][1]).getHeader(), bam(process.out.bam[0][1]).getReadsMD5(), - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions_") } ).match() } ) } @@ -142,7 +142,7 @@ nextflow_process { { assert snapshot( bam(process.out.bam[0][1]).getHeader(), bam(process.out.bam[0][1]).getReadsMD5(), - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions_") } ).match() } ) } @@ -176,7 +176,7 @@ nextflow_process { { assert snapshot( bam(process.out.bam[0][1]).getHeader(), bam(process.out.bam[0][1]).getReadsMD5(), - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions_") } ).match() } ) } @@ -211,7 +211,7 @@ nextflow_process { bam(process.out.bam[0][1]).getHeader(), bam(process.out.bam[0][1]).getReadsMD5(), file(process.out.index[0][1]).name, - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions_") } ).match() } ) } @@ -438,4 +438,4 @@ nextflow_process { } -} \ No newline at end of file +} diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test.snap b/modules/nf-core/minimap2/align/tests/main.nf.test.snap index 89f20336..93e0eb3b 100644 --- a/modules/nf-core/minimap2/align/tests/main.nf.test.snap +++ b/modules/nf-core/minimap2/align/tests/main.nf.test.snap @@ -9,15 +9,21 @@ ], "5d426b9a5f5b2c54f1d7f1e4c238ae94", "test.bam.bai", - [ - "versions.yml:md5,660fcf8ff66d4dce2045ffa0e325eed8" - ] + { + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-04-22T14:48:23.829797899" + "timestamp": "2026-01-22T15:02:10.851485367" }, "sarscov2 - bam, fasta, true, 'bai', false, false - stub": { "content": [ @@ -44,7 +50,11 @@ ] ], "3": [ - "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] ], "bam": [ [ @@ -67,16 +77,20 @@ "paf": [ ], - "versions": [ - "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-04-22T14:48:54.665655242" + "timestamp": "2026-01-22T15:02:56.708796666" }, "sarscov2 - fastq, fasta, true, 'bai', false, false - stub": { "content": [ @@ -103,7 +117,11 @@ ] ], "3": [ - "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] ], "bam": [ [ @@ -126,16 +144,20 @@ "paf": [ ], - "versions": [ - "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-04-22T14:48:38.492212433" + "timestamp": "2026-01-22T15:02:32.614463827" }, "sarscov2 - fastq, fasta, false, [], false, false - stub": { "content": [ @@ -156,7 +178,11 @@ ], "3": [ - "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] ], "bam": [ @@ -173,16 +199,20 @@ "test.paf:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-04-22T14:48:43.879647142" + "timestamp": "2026-01-22T15:02:40.02163098" }, "sarscov2 - fastq, fasta, true, [], false, false - stub": { "content": [ @@ -203,7 +233,11 @@ ], "3": [ - "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] ], "bam": [ [ @@ -220,16 +254,20 @@ "paf": [ ], - "versions": [ - "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-04-22T14:48:33.262333471" + "timestamp": "2026-01-22T15:02:25.102539679" }, "sarscov2 - [fastq1, fastq2], fasta, true, false, false": { "content": [ @@ -240,15 +278,21 @@ "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam" ], "1bc392244f228bf52cf0b5a8f6a654c9", - [ - "versions.yml:md5,660fcf8ff66d4dce2045ffa0e325eed8" - ] + { + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-04-22T14:48:07.571731983" + "timestamp": "2026-01-22T15:01:46.456636022" }, "sarscov2 - fastq, fasta, true, [], false, false": { "content": [ @@ -259,15 +303,21 @@ "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam" ], "f194745c0ccfcb2a9c0aee094a08750", - [ - "versions.yml:md5,660fcf8ff66d4dce2045ffa0e325eed8" - ] + { + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-04-22T14:47:56.497792473" + "timestamp": "2026-01-22T15:01:30.525133177" }, "sarscov2 - fastq, fasta, true, 'bai', false, false": { "content": [ @@ -279,15 +329,21 @@ ], "f194745c0ccfcb2a9c0aee094a08750", "test.bam.bai", - [ - "versions.yml:md5,660fcf8ff66d4dce2045ffa0e325eed8" - ] + { + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-04-22T14:48:01.888544427" + "timestamp": "2026-01-22T15:01:38.84829029" }, "sarscov2 - bam, fasta, true, [], false, false": { "content": [ @@ -298,15 +354,21 @@ "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam" ], "5d426b9a5f5b2c54f1d7f1e4c238ae94", - [ - "versions.yml:md5,660fcf8ff66d4dce2045ffa0e325eed8" - ] + { + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-04-22T14:48:18.376062313" + "timestamp": "2026-01-22T15:02:02.351060285" }, "sarscov2 - bam, fasta, true, [], false, false - stub": { "content": [ @@ -327,7 +389,11 @@ ], "3": [ - "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] ], "bam": [ [ @@ -344,16 +410,20 @@ "paf": [ ], - "versions": [ - "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-04-22T14:48:49.268693724" + "timestamp": "2026-01-22T15:02:47.579634041" }, "sarscov2 - fastq, [], true, false, false": { "content": [ @@ -463,14 +533,20 @@ "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam" ], "16c1c651f8ec67383bcdee3c55aed94f", - [ - "versions.yml:md5,660fcf8ff66d4dce2045ffa0e325eed8" - ] + { + "versions_minimap2": [ + [ + "MINIMAP2_ALIGN", + "minimap2", + "2.29-r1283" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-04-22T14:48:12.942360555" + "timestamp": "2026-01-22T15:01:54.090788633" } } \ No newline at end of file diff --git a/modules/nf-core/porechop/porechop/environment.yml b/modules/nf-core/picard/addorreplacereadgroups/environment.yml similarity index 68% rename from modules/nf-core/porechop/porechop/environment.yml rename to modules/nf-core/picard/addorreplacereadgroups/environment.yml index 109cf8bd..b4ac4fe0 100644 --- a/modules/nf-core/porechop/porechop/environment.yml +++ b/modules/nf-core/picard/addorreplacereadgroups/environment.yml @@ -4,5 +4,5 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::porechop=0.2.4 - - conda-forge::pigz=2.8 + # renovate: datasource=conda depName=bioconda/picard + - bioconda::picard=3.4.0 diff --git a/modules/nf-core/picard/addorreplacereadgroups/main.nf b/modules/nf-core/picard/addorreplacereadgroups/main.nf new file mode 100644 index 00000000..5ef3b7d8 --- /dev/null +++ b/modules/nf-core/picard/addorreplacereadgroups/main.nf @@ -0,0 +1,57 @@ +process PICARD_ADDORREPLACEREADGROUPS { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/08/0861295baa7c01fc593a9da94e82b44a729dcaf8da92be8e565da109aa549b25/data' : + 'community.wave.seqera.io/library/picard:3.4.0--e9963040df0a9bf6' }" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fasta_index) + + output: + tuple val(meta), path("*.bam") , emit: bam, optional: true + tuple val(meta), path("*.bai") , emit: bai, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + tuple val("${task.process}"), val('picard'), eval("picard AddOrReplaceReadGroups --version 2>&1 | sed -n 's/.*Version://p'"), topic: versions, emit: versions_picard + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "${reads.getExtension()}" + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" + def avail_mem = 3072 + if (!task.memory) { + log.info '[Picard AddOrReplaceReadGroups] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + + if ("$reads" == "${prefix}.${suffix}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + + """ + picard \\ + -Xmx${avail_mem}M \\ + AddOrReplaceReadGroups \\ + $args \\ + $reference \\ + --INPUT ${reads} \\ + --OUTPUT ${prefix}.${suffix} + + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "${reads.getExtension()}" + if ("$reads" == "${prefix}.${suffix}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch ${prefix}.${suffix} + + """ +} diff --git a/modules/nf-core/picard/addorreplacereadgroups/meta.yml b/modules/nf-core/picard/addorreplacereadgroups/meta.yml new file mode 100644 index 00000000..6c3ed759 --- /dev/null +++ b/modules/nf-core/picard/addorreplacereadgroups/meta.yml @@ -0,0 +1,117 @@ +name: picard_addorreplacereadgroups +description: Assigns all the reads in a file to a single new read-group +keywords: + - add + - replace + - read-group + - picard +tools: + - picard: + description: | + A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) + data and formats such as SAM/BAM/CRAM and VCF. + homepage: https://broadinstitute.github.io/picard/ + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037226472-AddOrReplaceReadGroups-Picard- + tool_dev_url: https://github.com/broadinstitute/picard + licence: ["MIT"] + identifier: biotools:picard_tools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: Sequence reads file, can be SAM/BAM/CRAM format + pattern: "*.{bam,cram,sam}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Reference genome file + pattern: "*.{fasta,fa,fasta.gz,fa.gz}" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta_index: + type: file + description: Reference genome index file + pattern: "*.{fai,fasta.fai,fa.fai,fasta.gz.fai,fa.gz.fai}" + ontologies: [] +output: + bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: Output BAM file + pattern: "*.{bam}" + ontologies: [] + bai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: An optional BAM index file + pattern: "*.{bai}" + ontologies: [] + cram: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cram": + type: file + description: Output CRAM file + pattern: "*.{cram}" + ontologies: [] + versions_picard: + - - ${task.process}: + type: string + description: The process the versions were collected from + - picard: + type: string + description: The tool name + - "picard AddOrReplaceReadGroups --version 2>&1 | sed -n 's/.*Version://p'": + type: string + description: The command used to generate the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - picard: + type: string + description: The tool name + - "picard AddOrReplaceReadGroups --version 2>&1 | sed -n 's/.*Version://p'": + type: string + description: The command used to generate the version of the tool + +authors: + - "@sateeshperi" + - "@mjcipriano" + - "@hseabolt" + - "@cmatKhan" + - "@muffato" +maintainers: + - "@sateeshperi" + - "@mjcipriano" + - "@hseabolt" + - "@cmatKhan" + - "@muffato" diff --git a/modules/nf-core/picard/addorreplacereadgroups/tests/bam.config b/modules/nf-core/picard/addorreplacereadgroups/tests/bam.config new file mode 100644 index 00000000..3f37c2fd --- /dev/null +++ b/modules/nf-core/picard/addorreplacereadgroups/tests/bam.config @@ -0,0 +1,13 @@ +process { + withName: 'PICARD_ADDORREPLACEREADGROUPS'{ + ext.prefix = { "${meta.id}.replaced"} + ext.args = {[ + "--CREATE_INDEX", + "-LB ${meta.id}", + "-PL ILLUMINA", + "-PU bc1", + "-SM ${meta.id}" + ].join(' ').trim()} + } + +} diff --git a/modules/nf-core/picard/addorreplacereadgroups/tests/cram.config b/modules/nf-core/picard/addorreplacereadgroups/tests/cram.config new file mode 100644 index 00000000..966c14d7 --- /dev/null +++ b/modules/nf-core/picard/addorreplacereadgroups/tests/cram.config @@ -0,0 +1,13 @@ +process { + withName: 'PICARD_ADDORREPLACEREADGROUPS'{ + ext.prefix = { "${meta.id}.replaced"} + ext.args = {[ + "-LB ${meta.id}", + "-PL ILLUMINA", + "-PU bc1", + "-SM ${meta.id}" + ].join(' ').trim()} + ext.suffix = { "cram" } + } + +} diff --git a/modules/nf-core/picard/addorreplacereadgroups/tests/main.nf.test b/modules/nf-core/picard/addorreplacereadgroups/tests/main.nf.test new file mode 100644 index 00000000..45729f3b --- /dev/null +++ b/modules/nf-core/picard/addorreplacereadgroups/tests/main.nf.test @@ -0,0 +1,93 @@ + +nextflow_process { + + name "Test Process PICARD_ADDORREPLACEREADGROUPS" + script "../main.nf" + process "PICARD_ADDORREPLACEREADGROUPS" + + tag "modules" + tag "modules_nfcore" + tag "picard" + tag "picard/addorreplacereadgroups" + + test("sarscov2 - bam") { + config "./bam.config" + + when { + process { + """ + input[0] = [ [:], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) ] + input[1] = [ [:], [] ] + input[2] = [ [:], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.bai[0][1]).name, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() + }, + ) + } + + } + + test("homo_sapiens - cram") { + config "./cram.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), ] + ] + input[1] = [ [:], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + input[2] = [ [:], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.cram[0][1]).name, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() + }, + ) + } + + } + + test("sarscov2 - bam - stub") { + config "./bam.config" + options "-stub" + + when { + process { + """ + input[0] = [ [:], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) ] + input[1] = [ [:], [] ] + input[2] = [ [:], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/picard/addorreplacereadgroups/tests/main.nf.test.snap b/modules/nf-core/picard/addorreplacereadgroups/tests/main.nf.test.snap new file mode 100644 index 00000000..d2a99ee2 --- /dev/null +++ b/modules/nf-core/picard/addorreplacereadgroups/tests/main.nf.test.snap @@ -0,0 +1,94 @@ +{ + "homo_sapiens - cram": { + "content": [ + "test.replaced.cram", + { + "versions_picard": [ + [ + "PICARD_ADDORREPLACEREADGROUPS", + "picard", + "3.4.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-02T09:36:16.966842212" + }, + "sarscov2 - bam - stub": { + "content": [ + { + "0": [ + [ + { + + }, + "null.replaced.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "PICARD_ADDORREPLACEREADGROUPS", + "picard", + "3.4.0" + ] + ], + "bai": [ + + ], + "bam": [ + [ + { + + }, + "null.replaced.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cram": [ + + ], + "versions_picard": [ + [ + "PICARD_ADDORREPLACEREADGROUPS", + "picard", + "3.4.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-02T09:36:29.861163004" + }, + "sarscov2 - bam": { + "content": [ + "null.replaced.bam", + "null.replaced.bai", + { + "versions_picard": [ + [ + "PICARD_ADDORREPLACEREADGROUPS", + "picard", + "3.4.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-02T09:36:00.935196996" + } +} \ No newline at end of file diff --git a/modules/nf-core/picard/markduplicates/environment.yml b/modules/nf-core/picard/markduplicates/environment.yml new file mode 100644 index 00000000..b4ac4fe0 --- /dev/null +++ b/modules/nf-core/picard/markduplicates/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/picard + - bioconda::picard=3.4.0 diff --git a/modules/nf-core/picard/markduplicates/main.nf b/modules/nf-core/picard/markduplicates/main.nf new file mode 100644 index 00000000..10621e01 --- /dev/null +++ b/modules/nf-core/picard/markduplicates/main.nf @@ -0,0 +1,61 @@ +process PICARD_MARKDUPLICATES { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/08/0861295baa7c01fc593a9da94e82b44a729dcaf8da92be8e565da109aa549b25/data' : + 'community.wave.seqera.io/library/picard:3.4.0--e9963040df0a9bf6' }" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + + output: + tuple val(meta), path("*.bam") , emit: bam, optional: true + tuple val(meta), path("*.bai") , emit: bai, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + tuple val(meta), path("*.metrics.txt"), emit: metrics + tuple val("${task.process}"), val('picard'), eval("picard MarkDuplicates --version 2>&1 | sed -n 's/^Version:*//p'"), topic: versions, emit: versions_picard + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "${reads.getExtension()}" + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" + def avail_mem = 3072 + if (!task.memory) { + log.info '[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + + if ("$reads" == "${prefix}.${suffix}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + + """ + picard \\ + -Xmx${avail_mem}M \\ + MarkDuplicates \\ + $args \\ + --INPUT $reads \\ + --OUTPUT ${prefix}.${suffix} \\ + $reference \\ + --METRICS_FILE ${prefix}.MarkDuplicates.metrics.txt + + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "${reads.getExtension()}" + if ("$reads" == "${prefix}.${suffix}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch ${prefix}.${suffix} + touch ${prefix}.${suffix}.bai + touch ${prefix}.MarkDuplicates.metrics.txt + + """ +} diff --git a/modules/nf-core/picard/markduplicates/meta.yml b/modules/nf-core/picard/markduplicates/meta.yml new file mode 100644 index 00000000..0ec99c7f --- /dev/null +++ b/modules/nf-core/picard/markduplicates/meta.yml @@ -0,0 +1,126 @@ +name: picard_markduplicates +description: Locate and tag duplicate reads in a BAM file +keywords: + - markduplicates + - pcr + - duplicates + - bam + - sam + - cram +tools: + - picard: + description: | + A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) + data and formats such as SAM/BAM/CRAM and VCF. + homepage: https://broadinstitute.github.io/picard/ + documentation: https://broadinstitute.github.io/picard/ + licence: ["MIT"] + identifier: biotools:picard_tools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: Sequence reads file, can be SAM/BAM/CRAM format + pattern: "*.{bam,cram,sam}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome fasta file, required for CRAM input + pattern: "*.{fasta,fa}" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Reference genome fasta index + pattern: "*.{fai}" + ontologies: [] +output: + bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: BAM file with duplicate reads marked/removed + pattern: "*.{bam}" + ontologies: [] + bai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: An optional BAM index file. If desired, --CREATE_INDEX must be + passed as a flag + pattern: "*.{bai}" + ontologies: [] + cram: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cram": + type: file + description: Output CRAM file + pattern: "*.{cram}" + ontologies: [] + metrics: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.metrics.txt": + type: file + description: Duplicate metrics file generated by picard + pattern: "*.{metrics.txt}" + ontologies: [] + versions_picard: + - - ${task.process}: + type: string + description: The process the versions were collected from + - picard: + type: string + description: The tool name + - "picard MarkDuplicates --version 2>&1 | sed -n 's/^Version:*//p'": + type: string + description: The command used to generate the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - picard: + type: string + description: The tool name + - "picard MarkDuplicates --version 2>&1 | sed -n 's/^Version:*//p'": + type: string + description: The command used to generate the version of the tool + +authors: + - "@drpatelh" + - "@projectoriented" + - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@projectoriented" + - "@ramprasadn" diff --git a/modules/nf-core/picard/markduplicates/tests/main.nf.test b/modules/nf-core/picard/markduplicates/tests/main.nf.test new file mode 100644 index 00000000..e18723be --- /dev/null +++ b/modules/nf-core/picard/markduplicates/tests/main.nf.test @@ -0,0 +1,173 @@ +nextflow_process { + + name "Test Process PICARD_MARKDUPLICATES" + script "../main.nf" + process "PICARD_MARKDUPLICATES" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "picard" + tag "picard/markduplicates" + + test("sarscov2 [unsorted bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = [ [:], [] ] + input[2] = [ [:], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + path(process.out.metrics.get(0).get(1)).readLines()[0..2], + process.out.findAll { key, val -> key.startsWith("versions") }) + .match() } + ) + } + } + + test("sarscov2 [sorted bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + input[1] = [ [:], [] ] + input[2] = [ [:], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + path(process.out.metrics.get(0).get(1)).readLines()[0..2], + process.out.findAll { key, val -> key.startsWith("versions") }) + .match() } + ) + } + } + + test("homo_sapiens [cram]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.cram[0][1]).name, + path(process.out.metrics.get(0).get(1)).readLines()[0..2], + process.out.findAll { key, val -> key.startsWith("versions") }) + .match() } + ) + } + } + + test("sarscov2 [unsorted bam] - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = [ [:], [] ] + input[2] = [ [:], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 [sorted bam] - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + input[1] = [ [:], [] ] + input[2] = [ [:], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("homo_sapiens [cram] - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/picard/markduplicates/tests/main.nf.test.snap b/modules/nf-core/picard/markduplicates/tests/main.nf.test.snap new file mode 100644 index 00000000..84801384 --- /dev/null +++ b/modules/nf-core/picard/markduplicates/tests/main.nf.test.snap @@ -0,0 +1,329 @@ +{ + "sarscov2 [sorted bam] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.marked.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.marked.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.marked.MarkDuplicates.metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + "PICARD_MARKDUPLICATES", + "picard", + "3.4.0" + ] + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.marked.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.marked.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cram": [ + + ], + "metrics": [ + [ + { + "id": "test", + "single_end": false + }, + "test.marked.MarkDuplicates.metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_picard": [ + [ + "PICARD_MARKDUPLICATES", + "picard", + "3.4.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-02T11:46:02.077382134" + }, + "sarscov2 [unsorted bam] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.marked.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.marked.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.marked.MarkDuplicates.metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + "PICARD_MARKDUPLICATES", + "picard", + "3.4.0" + ] + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.marked.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.marked.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cram": [ + + ], + "metrics": [ + [ + { + "id": "test", + "single_end": false + }, + "test.marked.MarkDuplicates.metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_picard": [ + [ + "PICARD_MARKDUPLICATES", + "picard", + "3.4.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-02T11:45:49.985589186" + }, + "sarscov2 [unsorted bam]": { + "content": [ + "test.marked.bam", + [ + "## htsjdk.samtools.metrics.StringHeader", + "# MarkDuplicates --INPUT test.paired_end.bam --OUTPUT test.marked.bam --METRICS_FILE test.marked.MarkDuplicates.metrics.txt --ASSUME_SORT_ORDER queryname --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_DUP_STRATEGY FLOW_QUALITY_SUM_STRATEGY --FLOW_USE_END_IN_UNPAIRED_READS false --FLOW_USE_UNPAIRED_CLIPPED_END false --FLOW_UNPAIRED_END_UNCERTAINTY 0 --FLOW_UNPAIRED_START_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false", + "## htsjdk.samtools.metrics.StringHeader" + ], + { + "versions_picard": [ + [ + "PICARD_MARKDUPLICATES", + "picard", + "3.4.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-02T11:42:49.704752548" + }, + "sarscov2 [sorted bam]": { + "content": [ + "test.marked.bam", + [ + "## htsjdk.samtools.metrics.StringHeader", + "# MarkDuplicates --INPUT test.paired_end.sorted.bam --OUTPUT test.marked.bam --METRICS_FILE test.marked.MarkDuplicates.metrics.txt --ASSUME_SORT_ORDER queryname --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_DUP_STRATEGY FLOW_QUALITY_SUM_STRATEGY --FLOW_USE_END_IN_UNPAIRED_READS false --FLOW_USE_UNPAIRED_CLIPPED_END false --FLOW_UNPAIRED_END_UNCERTAINTY 0 --FLOW_UNPAIRED_START_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false", + "## htsjdk.samtools.metrics.StringHeader" + ], + { + "versions_picard": [ + [ + "PICARD_MARKDUPLICATES", + "picard", + "3.4.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-02T11:44:17.407572621" + }, + "homo_sapiens [cram]": { + "content": [ + "test.marked.cram", + [ + "## htsjdk.samtools.metrics.StringHeader", + "# MarkDuplicates --INPUT test.paired_end.sorted.cram --OUTPUT test.marked.cram --METRICS_FILE test.marked.MarkDuplicates.metrics.txt --ASSUME_SORT_ORDER queryname --REFERENCE_SEQUENCE genome.fasta --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_DUP_STRATEGY FLOW_QUALITY_SUM_STRATEGY --FLOW_USE_END_IN_UNPAIRED_READS false --FLOW_USE_UNPAIRED_CLIPPED_END false --FLOW_UNPAIRED_END_UNCERTAINTY 0 --FLOW_UNPAIRED_START_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false", + "## htsjdk.samtools.metrics.StringHeader" + ], + { + "versions_picard": [ + [ + "PICARD_MARKDUPLICATES", + "picard", + "3.4.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-02T11:45:33.412603893" + }, + "homo_sapiens [cram] - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.marked.cram.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.marked.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.marked.MarkDuplicates.metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + "PICARD_MARKDUPLICATES", + "picard", + "3.4.0" + ] + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.marked.cram.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam": [ + + ], + "cram": [ + [ + { + "id": "test", + "single_end": false + }, + "test.marked.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "metrics": [ + [ + { + "id": "test", + "single_end": false + }, + "test.marked.MarkDuplicates.metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_picard": [ + [ + "PICARD_MARKDUPLICATES", + "picard", + "3.4.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-02T11:46:18.599127485" + } +} \ No newline at end of file diff --git a/modules/nf-core/picard/markduplicates/tests/nextflow.config b/modules/nf-core/picard/markduplicates/tests/nextflow.config new file mode 100644 index 00000000..02818dd6 --- /dev/null +++ b/modules/nf-core/picard/markduplicates/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: PICARD_MARKDUPLICATES { + ext.prefix = { "${meta.id}.marked" } + ext.args = '--ASSUME_SORT_ORDER queryname' + } +} diff --git a/modules/nf-core/pilon/main.nf b/modules/nf-core/pilon/main.nf index 92cac75a..2a3cbe93 100644 --- a/modules/nf-core/pilon/main.nf +++ b/modules/nf-core/pilon/main.nf @@ -18,7 +18,8 @@ process PILON { tuple val(meta), path("*.change"), emit: change_record , optional : true tuple val(meta), path("*.bed") , emit: tracks_bed , optional : true tuple val(meta), path("*.wig") , emit: tracks_wig , optional : true - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('pilon'), eval("pilon --version | sed 's/^.*version //; s/ .*\$//'"), emit: versions_pilon, topic: versions + when: task.ext.when == null || task.ext.when @@ -43,11 +44,6 @@ process PILON { --output ${prefix} \\ $args \\ --$pilon_mode $bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - pilon: \$(echo \$(pilon --version) | sed 's/^.*version //; s/ .*\$//' ) - END_VERSIONS """ stub: @@ -60,11 +56,6 @@ process PILON { touch ${prefix}.change touch ${prefix}.bed touch ${prefix}.wig - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - pilon: \$(echo \$(pilon --version) | sed 's/^.*version //; s/ .*\$//' ) - END_VERSIONS """ } diff --git a/modules/nf-core/pilon/meta.yml b/modules/nf-core/pilon/meta.yml index 38d9006d..db0e96d9 100644 --- a/modules/nf-core/pilon/meta.yml +++ b/modules/nf-core/pilon/meta.yml @@ -1,6 +1,6 @@ name: "pilon" -description: Automatically improve draft assemblies and find variation among strains, - including large event detection +description: Automatically improve draft assemblies and find variation among + strains, including large event detection keywords: - polishing - assembly @@ -13,7 +13,8 @@ tools: documentation: "https://github.com/broadinstitute/pilon/wiki/Requirements-&-Usage" tool_dev_url: "https://github.com/broadinstitute/pilon" doi: "10.1371/journal.pone.0112963" - licence: ["GPL-2.0-or-later"] + licence: + - "GPL-2.0-or-later" identifier: biotools:pilon input: - - meta: @@ -25,6 +26,7 @@ input: type: file description: FASTA of the input genome pattern: "*.{fasta}" + ontologies: [] - - meta2: type: map description: | @@ -34,22 +36,28 @@ input: type: file description: BAM file of reads aligned to the input genome pattern: "*.{bam}" + ontologies: [] - bai: type: file description: BAI file (BAM index) of BAM reads aligned to the input genome pattern: "*.{bai}" - - - pilon_mode: - type: string - description: Indicates the type of bam file used (frags for paired-end sequencing - of DNA fragments, such as Illumina paired-end reads of fragment size <1000bp, - jumps for paired sequencing data of larger insert size, such as Illumina mate - pair libraries, typically of insert size >1000bp, unpaired for unpaired sequencing - reads, bam will automatically classify the BAM as one of the three types above - (version 1.17 and higher). - enum: ["frags", "jumps", "unpaired", "bam"] + ontologies: [] + - pilon_mode: + type: string + description: Indicates the type of bam file used (frags for paired-end + sequencing of DNA fragments, such as Illumina paired-end reads of fragment + size <1000bp, jumps for paired sequencing data of larger insert size, such + as Illumina mate pair libraries, typically of insert size >1000bp, + unpaired for unpaired sequencing reads, bam will automatically classify + the BAM as one of the three types above (version 1.17 and higher). + enum: + - "frags" + - "jumps" + - "unpaired" + - "bam" output: - - improved_assembly: - - meta: + improved_assembly: + - - meta: type: map description: | Groovy Map containing sample information @@ -58,8 +66,9 @@ output: type: file description: fasta file, improved assembly pattern: "*.{fasta}" - - vcf: - - meta: + ontologies: [] + vcf: + - - meta: type: map description: | Groovy Map containing sample information @@ -68,44 +77,64 @@ output: type: file description: Pilon variant output pattern: "*.{vcf}" - - change_record: - - meta: + ontologies: [] + change_record: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - "*.change": type: file - description: file containing a space-delimited record of every change made in - the assembly as instructed by the --fix option + description: file containing a space-delimited record of every change + made in the assembly as instructed by the --fix option pattern: "*.{change}" - - tracks_bed: - - meta: + ontologies: [] + tracks_bed: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - "*.bed": type: file - description: files that may be viewed in genome browsers such as IGV, GenomeView, - and other applications that support these formats + description: files that may be viewed in genome browsers such as IGV, + GenomeView, and other applications that support these formats pattern: "*.{bed}" - - tracks_wig: - - meta: + ontologies: [] + tracks_wig: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - "*.wig": type: file - description: files that may be viewed in genome browsers such as IGV, GenomeView, - and other applications that support these formats + description: files that may be viewed in genome browsers such as IGV, + GenomeView, and other applications that support these formats pattern: "*.{wig}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions_pilon: + - - ${task.process}: + type: string + description: The name of the process + - pilon: + type: string + description: The name of the tool + - pilon --version | sed 's/^.*version //; s/ .*\$//': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - pilon: + type: string + description: The name of the tool + - pilon --version | sed 's/^.*version //; s/ .*\$//': + type: eval + description: The expression to obtain the version of the tool authors: - "@scorreard" maintainers: diff --git a/modules/nf-core/pilon/tests/main.nf.test.snap b/modules/nf-core/pilon/tests/main.nf.test.snap index 91208228..d8a284f1 100644 --- a/modules/nf-core/pilon/tests/main.nf.test.snap +++ b/modules/nf-core/pilon/tests/main.nf.test.snap @@ -48,7 +48,11 @@ ] ], "5": [ - "versions.yml:md5,73f60b48e5c3838296b66520b61a551a" + [ + "PILON", + "pilon", + "1.24" + ] ], "change_record": [ [ @@ -95,16 +99,20 @@ "test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,73f60b48e5c3838296b66520b61a551a" + "versions_pilon": [ + [ + "PILON", + "pilon", + "1.24" + ] ] } ], + "timestamp": "2026-02-16T15:08:00.664784809", "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-06-27T13:18:02.740029689" + "nf-test": "0.9.4", + "nextflow": "26.01.1" + } }, "homo sapiens bam": { "content": [ @@ -131,7 +139,11 @@ ], "5": [ - "versions.yml:md5,73f60b48e5c3838296b66520b61a551a" + [ + "PILON", + "pilon", + "1.24" + ] ], "change_record": [ @@ -154,16 +166,20 @@ "vcf": [ ], - "versions": [ - "versions.yml:md5,73f60b48e5c3838296b66520b61a551a" + "versions_pilon": [ + [ + "PILON", + "pilon", + "1.24" + ] ] } ], + "timestamp": "2026-02-16T15:07:51.999859591", "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-06-14T11:20:04.988498289" + "nf-test": "0.9.4", + "nextflow": "26.01.1" + } }, "homo sapiens frags": { "content": [ @@ -190,7 +206,11 @@ ], "5": [ - "versions.yml:md5,73f60b48e5c3838296b66520b61a551a" + [ + "PILON", + "pilon", + "1.24" + ] ], "change_record": [ @@ -213,16 +233,20 @@ "vcf": [ ], - "versions": [ - "versions.yml:md5,73f60b48e5c3838296b66520b61a551a" + "versions_pilon": [ + [ + "PILON", + "pilon", + "1.24" + ] ] } ], + "timestamp": "2026-02-16T15:10:08.79891187", "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-06-14T11:29:53.995788701" + "nf-test": "0.9.4", + "nextflow": "26.01.1" + } }, "homo sapiens frags - stub": { "content": [ @@ -273,7 +297,11 @@ ] ], "5": [ - "versions.yml:md5,73f60b48e5c3838296b66520b61a551a" + [ + "PILON", + "pilon", + "1.24" + ] ], "change_record": [ [ @@ -320,15 +348,19 @@ "test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,73f60b48e5c3838296b66520b61a551a" + "versions_pilon": [ + [ + "PILON", + "pilon", + "1.24" + ] ] } ], + "timestamp": "2026-02-16T15:08:13.764321711", "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-06-27T13:18:31.732478412" + "nf-test": "0.9.4", + "nextflow": "26.01.1" + } } } \ No newline at end of file diff --git a/modules/nf-core/porechop/porechop/main.nf b/modules/nf-core/porechop/porechop/main.nf deleted file mode 100644 index 34daf3e8..00000000 --- a/modules/nf-core/porechop/porechop/main.nf +++ /dev/null @@ -1,49 +0,0 @@ -process PORECHOP_PORECHOP { - tag "$meta.id" - label 'process_medium' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/2b/2bce1f10c51906a66c4c4d3a7485394f67e304177192ad1cce6cf586a3a18bae/data' : - 'community.wave.seqera.io/library/porechop_pigz:d1655e5b5bad786c' }" - - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*.fastq.gz"), emit: reads - tuple val(meta), path("*.log") , emit: log - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - porechop \\ - -i $reads \\ - -t $task.cpus \\ - $args \\ - -o ${prefix}.fastq.gz \\ - > ${prefix}.log - cat <<-END_VERSIONS > versions.yml - "${task.process}": - porechop: \$( porechop --version ) - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.fastq - gzip ${prefix}.fastq - touch ${prefix}.log - cat <<-END_VERSIONS > versions.yml - "${task.process}": - porechop: \$( porechop --version ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/porechop/porechop/meta.yml b/modules/nf-core/porechop/porechop/meta.yml deleted file mode 100644 index 9e61c054..00000000 --- a/modules/nf-core/porechop/porechop/meta.yml +++ /dev/null @@ -1,71 +0,0 @@ -name: "porechop_porechop" -description: Adapter removal and demultiplexing of Oxford Nanopore reads -keywords: - - adapter - - nanopore - - demultiplexing -tools: - - porechop: - description: Adapter removal and demultiplexing of Oxford Nanopore reads - homepage: "https://github.com/rrwick/Porechop" - documentation: "https://github.com/rrwick/Porechop" - tool_dev_url: "https://github.com/rrwick/Porechop" - doi: "10.1099/mgen.0.000132" - licence: ["GPL v3"] - identifier: "" -input: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: fastq/fastq.gz file - pattern: "*.{fastq,fastq.gz,fq,fq.gz}" -output: - - reads: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - "*.fastq.gz": - type: file - description: Demultiplexed and/or adapter-trimmed fastq.gz file - pattern: "*.{fastq.gz}" - - log: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - "*.log": - type: file - description: Log file containing stdout information - pattern: "*.log" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@ggabernet" - - "@jasmezz" - - "@d4straub" - - "@LaurenceKuhl" - - "@SusiJo" - - "@jonasscheid" - - "@jonoave" - - "@GokceOGUZ" - - "@jfy133" -maintainers: - - "@ggabernet" - - "@jasmezz" - - "@d4straub" - - "@LaurenceKuhl" - - "@SusiJo" - - "@jonasscheid" - - "@jonoave" - - "@GokceOGUZ" - - "@jfy133" diff --git a/modules/nf-core/porechop/porechop/tests/main.nf.test b/modules/nf-core/porechop/porechop/tests/main.nf.test deleted file mode 100644 index ed3f6986..00000000 --- a/modules/nf-core/porechop/porechop/tests/main.nf.test +++ /dev/null @@ -1,62 +0,0 @@ -nextflow_process { - - name "Test Process PORECHOP_PORECHOP" - script "../main.nf" - process "PORECHOP_PORECHOP" - config "./nextflow.config" - - tag "modules" - tag "modules_nfcore" - tag "porechop" - tag "porechop/porechop" - - test("sarscov2 - nanopore - fastq") { - - when { - process { - """ - input[0] = [ - [ id:'test', single_end:true ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) - ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out.reads).match("reads") }, - { assert snapshot(process.out.versions).match("versions") }, - // complete log is not stable. These first lines should be stable - { assert snapshot(path(process.out.log.get(0).get(1)).readLines()[0..7]).match("log")} - ) - } - - } - - - test("stub") { - options "-stub" - - when { - process { - """ - input[0] = [ [ id:'test', single_end:true ], - [] - ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - -} diff --git a/modules/nf-core/porechop/porechop/tests/main.nf.test.snap b/modules/nf-core/porechop/porechop/tests/main.nf.test.snap deleted file mode 100644 index cf544d2d..00000000 --- a/modules/nf-core/porechop/porechop/tests/main.nf.test.snap +++ /dev/null @@ -1,88 +0,0 @@ -{ - "versions": { - "content": [ - [ - "versions.yml:md5,712c0753b56d0fb530092dfb5bdf2e5c" - ] - ], - "timestamp": "2023-12-18T07:47:16.83444" - }, - "log": { - "content": [ - [ - "", - "\u001b[1m\u001b[4mLoading reads\u001b[0m", - "test.fastq.gz", - "100 reads loaded", - "", - "", - "\u001b[1m\u001b[4mLooking for known adapter sets\u001b[0m", - "" - ] - ], - "timestamp": "2023-12-18T07:47:16.853899" - }, - "reads": { - "content": [ - [ - [ - { - "id": "test", - "single_end": true - }, - "test_porechop.fastq.gz:md5,886fdb859fb50e0dddd35007bcff043e" - ] - ] - ], - "timestamp": "2023-12-18T07:47:16.811393" - }, - "stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": true - }, - "test_porechop.fastq.gz:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": true - }, - "test_porechop.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,712c0753b56d0fb530092dfb5bdf2e5c" - ], - "log": [ - [ - { - "id": "test", - "single_end": true - }, - "test_porechop.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "reads": [ - [ - { - "id": "test", - "single_end": true - }, - "test_porechop.fastq.gz:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,712c0753b56d0fb530092dfb5bdf2e5c" - ] - } - ], - "timestamp": "2023-12-18T07:47:37.814949" - } -} \ No newline at end of file diff --git a/modules/nf-core/porechop/porechop/tests/nextflow.config b/modules/nf-core/porechop/porechop/tests/nextflow.config deleted file mode 100644 index a9ecf7b6..00000000 --- a/modules/nf-core/porechop/porechop/tests/nextflow.config +++ /dev/null @@ -1,9 +0,0 @@ -process { - - - withName: PORECHOP_PORECHOP { - ext.args = '' - ext.prefix = { "${meta.id}_porechop" } - } - -} diff --git a/modules/nf-core/ragtag/patch/main.nf b/modules/nf-core/ragtag/patch/main.nf index 4e8cf455..cfd8e35a 100644 --- a/modules/nf-core/ragtag/patch/main.nf +++ b/modules/nf-core/ragtag/patch/main.nf @@ -8,11 +8,11 @@ process RAGTAG_PATCH { : 'biocontainers/ragtag:2.1.0--pyhb7b1952_0'}" input: - tuple val(meta), path(target, name: 'target/*') + tuple val(meta), path(target, name: 'target/*') tuple val(meta2), path(query, name: 'query/*') tuple val(meta3), path(exclude) tuple val(meta4), path(skip) - + output: tuple val(meta), path("*.patch.fasta"), emit: patch_fasta tuple val(meta), path("*.patch.agp"), emit: patch_agp @@ -23,7 +23,8 @@ process RAGTAG_PATCH { tuple val(meta), path("*.rename.agp"), emit: qry_rename_agp, optional: true tuple val(meta), path("*.rename.fasta"), emit: qry_rename_fasta, optional: true tuple val(meta), path("*.patch.err"), emit: stderr - path "versions.yml", emit: versions + tuple val("${task.process}"), val('ragtag'), eval("ragtag.py -v | sed 's/v//'"), emit: versions_ragtag, topic: versions + when: task.ext.when == null || task.ext.when @@ -56,7 +57,7 @@ process RAGTAG_PATCH { ${arg_exclude} \\ ${arg_skip} \\ ${args} \\ - 2> >( tee ${prefix}.stderr.log >&2 ) \\ + 2>| >( tee ${prefix}.stderr.log >&2 ) \\ | tee ${prefix}.stdout.log kill -TERM "\$tailpid" @@ -76,14 +77,9 @@ process RAGTAG_PATCH { mv ${prefix}/ragtag.patch.err ${prefix}.patch.err # Move the assembly files from prefix folder, and add prefix for alignment_file in \$(ls ${prefix}/ragtag.patch.asm.*); - do + do mv "\$alignment_file" "\${alignment_file/${prefix}\\//${prefix}_}" done - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - ragtag: \$(echo \$(ragtag.py -v | sed 's/v//')) - END_VERSIONS """ stub: @@ -101,9 +97,5 @@ process RAGTAG_PATCH { touch ${prefix}.rename.fasta touch ${prefix}.ragtag.patch.asm.1 touch ${prefix}.patch.err - - cat <<-END_VERSIONS > versions.yml - ragtag: \$(echo \$(ragtag.py -v | sed 's/v//')) - END_VERSIONS """ } diff --git a/modules/nf-core/ragtag/patch/meta.yml b/modules/nf-core/ragtag/patch/meta.yml index d74ee3d2..30976dba 100644 --- a/modules/nf-core/ragtag/patch/meta.yml +++ b/modules/nf-core/ragtag/patch/meta.yml @@ -1,7 +1,6 @@ name: "ragtag_patch" description: "Homology-based assembly patching: Make continuous joins and fill gaps in 'target.fa' using sequences from 'query.fa'" - keywords: - assembly - consensus @@ -14,7 +13,8 @@ tools: documentation: "https://github.com/malonge/RagTag/wiki" tool_dev_url: "https://github.com/malonge/RagTag" doi: "10.1186/s13059-022-02823-7" - licence: ["MIT"] + licence: + - "MIT" identifier: biotools:ragtag input: - - meta: @@ -26,6 +26,7 @@ input: type: file description: Target assembly pattern: "*.{fasta,fasta.gz}" + ontologies: [] - - meta2: type: map description: | @@ -35,6 +36,7 @@ input: type: file description: Query assembly pattern: "*.{fasta,fasta.gz}" + ontologies: [] - - meta3: type: map description: | @@ -44,6 +46,7 @@ input: type: file description: list of target sequences to ignore pattern: "*.txt" + ontologies: [] - - meta4: type: map description: | @@ -53,9 +56,10 @@ input: type: file description: list of query sequences to ignore pattern: "*.txt" + ontologies: [] output: - - patch_fasta: - - meta: + patch_fasta: + - - meta: type: map description: | Groovy Map containing sample information @@ -64,8 +68,9 @@ output: type: file description: FASTA file containing the patched assembly pattern: "*.patch.fasta" - - patch_agp: - - meta: + ontologies: [] + patch_agp: + - - meta: type: map description: | Groovy Map containing sample information @@ -74,19 +79,22 @@ output: type: file description: AGP file defining how ragtag.patch.fasta is built pattern: "*.patch.agp" - - patch_components_fasta: - - meta: + ontologies: [] + patch_components_fasta: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test' ] - "*.comps.fasta": type: file - description: The split target assembly and the renamed query assembly combined - into one FASTA file. This file contains all components in ragtag.patch.agp + description: The split target assembly and the renamed query assembly + combined into one FASTA file. This file contains all components in + ragtag.patch.agp pattern: "*.comps.fasta" - - assembly_alignments: - - meta: + ontologies: [] + assembly_alignments: + - - meta: type: map description: | Groovy Map containing sample information @@ -95,18 +103,21 @@ output: type: file description: Assembly alignment files pattern: "*.ragtag.patch.asm.*" - - target_splits_agp: - - meta: + ontologies: [] + target_splits_agp: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test' ] - "*.ctg.agp": type: file - description: An AGP file defining how the target assembly was split at gaps + description: An AGP file defining how the target assembly was split at + gaps pattern: "*.ctg.agp" - - target_splits_fasta: - - meta: + ontologies: [] + target_splits_fasta: + - - meta: type: map description: | Groovy Map containing sample information @@ -115,8 +126,9 @@ output: type: file description: FASTA file containing the target assembly split at gaps pattern: "*.ctg.fasta" - - qry_rename_agp: - - meta: + ontologies: [] + qry_rename_agp: + - - meta: type: map description: | Groovy Map containing sample information @@ -125,18 +137,21 @@ output: type: file description: An AGP file defining the new names for query sequences pattern: "*.rename.agp" - - qry_rename_fasta: - - meta: + ontologies: [] + qry_rename_fasta: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test' ] - "*.rename.fasta": type: file - description: A FASTA file with the original query sequence, but with new names + description: A FASTA file with the original query sequence, but with new + names pattern: "*.rename.fasta" - - stderr: - - meta: + ontologies: [] + stderr: + - - meta: type: map description: | Groovy Map containing sample information @@ -145,11 +160,28 @@ output: type: file description: Standard error logging for all external RagTag commands pattern: "*.patch.err" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions_ragtag: + - - ${task.process}: + type: string + description: The name of the process + - ragtag: + type: string + description: The name of the tool + - ragtag.py -v | sed 's/v//': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - ragtag: + type: string + description: The name of the tool + - ragtag.py -v | sed 's/v//': + type: eval + description: The expression to obtain the version of the tool authors: - "@nschan" maintainers: diff --git a/modules/nf-core/ragtag/patch/tests/main.nf.test b/modules/nf-core/ragtag/patch/tests/main.nf.test index a7c0fee8..e532a95d 100644 --- a/modules/nf-core/ragtag/patch/tests/main.nf.test +++ b/modules/nf-core/ragtag/patch/tests/main.nf.test @@ -44,7 +44,7 @@ test("A. thaliana Col-0 test data - ragtag - patch") { process.out.patch_components_fasta, process.out.target_splits_agp, process.out.target_splits_fasta, - process.out.versions + process.out.findAll { key, val -> key.startsWith('versions') } ).match() }, ) diff --git a/modules/nf-core/ragtag/patch/tests/main.nf.test.snap b/modules/nf-core/ragtag/patch/tests/main.nf.test.snap index b1444692..db994efe 100644 --- a/modules/nf-core/ragtag/patch/tests/main.nf.test.snap +++ b/modules/nf-core/ragtag/patch/tests/main.nf.test.snap @@ -75,7 +75,11 @@ ] ], "9": [ - "versions.yml:md5,cecbb39907d607affa6522e395b78a1f" + [ + "RAGTAG_PATCH", + "ragtag", + "2.1.0" + ] ], "assembly_alignments": [ [ @@ -149,16 +153,20 @@ "test.ctg.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,cecbb39907d607affa6522e395b78a1f" + "versions_ragtag": [ + [ + "RAGTAG_PATCH", + "ragtag", + "2.1.0" + ] ] } ], + "timestamp": "2026-02-16T15:14:07.370603076", "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" - }, - "timestamp": "2025-04-04T14:10:01.648597527" + "nf-test": "0.9.4", + "nextflow": "25.04.8" + } }, "A. thaliana Col-0 test data - ragtag - patch": { "content": [ @@ -202,14 +210,20 @@ "test.ctg.fasta:md5,5cf615df690061ab15e4fee62abf3ebc" ] ], - [ - "versions.yml:md5,4c0992a27edf294209711ce4f181eb5a" - ] + { + "versions_ragtag": [ + [ + "RAGTAG_PATCH", + "ragtag", + "2.1.0" + ] + ] + } ], + "timestamp": "2026-02-16T15:55:07.632218179", "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" - }, - "timestamp": "2025-04-04T14:25:42.121285998" + "nf-test": "0.9.4", + "nextflow": "25.04.8" + } } } \ No newline at end of file diff --git a/modules/nf-core/ragtag/scaffold/main.nf b/modules/nf-core/ragtag/scaffold/main.nf index c3930c12..e96bfc05 100644 --- a/modules/nf-core/ragtag/scaffold/main.nf +++ b/modules/nf-core/ragtag/scaffold/main.nf @@ -17,7 +17,7 @@ process RAGTAG_SCAFFOLD { tuple val(meta), path("*.fasta"), emit: corrected_assembly tuple val(meta), path("*.agp"), emit: corrected_agp tuple val(meta), path("*.stats"), emit: corrected_stats - path "versions.yml", emit: versions + tuple val("${task.process}"), val('ragtag'), eval("ragtag.py -v | sed 's/v//'"), emit: versions_ragtag, topic: versions when: task.ext.when == null || task.ext.when @@ -51,17 +51,12 @@ process RAGTAG_SCAFFOLD { ${arg_skip} \\ ${arg_hard_skip} \\ ${args} \\ - 2> >( tee ${prefix}.stderr.log >&2 ) \\ + 2>| >( tee ${prefix}.stderr.log >&2 ) \\ | tee ${prefix}.stdout.log mv ${prefix}/ragtag.scaffold.fasta ${prefix}.fasta mv ${prefix}/ragtag.scaffold.agp ${prefix}.agp mv ${prefix}/ragtag.scaffold.stats ${prefix}.stats - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - ragtag: \$(echo \$(ragtag.py -v | sed 's/v//')) - END_VERSIONS """ stub: @@ -74,9 +69,5 @@ process RAGTAG_SCAFFOLD { touch ${prefix}.fasta touch ${prefix}.agp touch ${prefix}.stats - - cat <<-END_VERSIONS > versions.yml - ragtag: \$(echo \$(ragtag.py -v | sed 's/v//')) - END_VERSIONS """ } diff --git a/modules/nf-core/ragtag/scaffold/meta.yml b/modules/nf-core/ragtag/scaffold/meta.yml index 62eb0e49..5f3127fc 100644 --- a/modules/nf-core/ragtag/scaffold/meta.yml +++ b/modules/nf-core/ragtag/scaffold/meta.yml @@ -1,4 +1,3 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: ragtag_scaffold description: | Scaffolding is the process of ordering and orienting draft assembly (query) @@ -18,9 +17,9 @@ tools: documentation: "https://github.com/malonge/RagTag/wiki" tool_dev_url: "https://github.com/malonge/RagTag" doi: "10.1186/s13059-022-02823-7" - licence: ["MIT"] + licence: + - "MIT" identifier: biotools:ragtag - input: - - meta: type: map @@ -31,6 +30,7 @@ input: type: file description: Assembly to be scaffolded pattern: "*.{fasta,fasta.gz,fa,fa.gz}" + ontologies: [] - - meta2: type: map description: | @@ -40,6 +40,7 @@ input: type: file description: Reference assembly pattern: "*.{fasta,fasta.gz,fa,fa.gz}" + ontologies: [] - - meta3: type: map description: | @@ -49,6 +50,7 @@ input: type: file description: list of target sequences to ignore pattern: "*.txt" + ontologies: [] - - meta4: type: map description: | @@ -58,14 +60,16 @@ input: type: file description: list of query sequences to leave unplaced pattern: "*.txt" + ontologies: [] - hard_skip: type: file - description: list of query headers to leave unplaced and exclude from 'chr0' - ('-C') + description: list of query headers to leave unplaced and exclude from + 'chr0' ('-C') pattern: "*.txt" + ontologies: [] output: - - corrected_assembly: - - meta: + corrected_assembly: + - - meta: type: map description: | Groovy Map containing sample information @@ -74,8 +78,9 @@ output: type: file description: FASTA file containing the patched assembly pattern: "*.fasta" - - corrected_agp: - - meta: + ontologies: [] + corrected_agp: + - - meta: type: map description: | Groovy Map containing sample information @@ -84,8 +89,9 @@ output: type: file description: agp file defining how corrected_assembly is built pattern: "*.agp" - - corrected_stats: - - meta: + ontologies: [] + corrected_stats: + - - meta: type: map description: | Groovy Map containing sample information @@ -94,12 +100,28 @@ output: type: file description: Statistics on the scaffold pattern: "*.stats" - - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions_ragtag: + - - ${task.process}: + type: string + description: The name of the process + - ragtag: + type: string + description: The name of the tool + - ragtag.py -v | sed 's/v//': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - ragtag: + type: string + description: The name of the tool + - ragtag.py -v | sed 's/v//': + type: eval + description: The expression to obtain the version of the tool authors: - "@nschan" maintainers: diff --git a/modules/nf-core/ragtag/scaffold/tests/main.nf.test b/modules/nf-core/ragtag/scaffold/tests/main.nf.test index 51b42642..84e6b09e 100644 --- a/modules/nf-core/ragtag/scaffold/tests/main.nf.test +++ b/modules/nf-core/ragtag/scaffold/tests/main.nf.test @@ -30,7 +30,7 @@ nextflow_process { [], [], [] - ] + ] """ } } @@ -66,7 +66,7 @@ nextflow_process { [], [], [] - ] + ] """ } } diff --git a/modules/nf-core/ragtag/scaffold/tests/main.nf.test.snap b/modules/nf-core/ragtag/scaffold/tests/main.nf.test.snap index e4faf0b0..cb27f70a 100644 --- a/modules/nf-core/ragtag/scaffold/tests/main.nf.test.snap +++ b/modules/nf-core/ragtag/scaffold/tests/main.nf.test.snap @@ -27,7 +27,11 @@ ] ], "3": [ - "versions.yml:md5,48710c1720f668d8ba3397f99892959e" + [ + "RAGTAG_SCAFFOLD", + "ragtag", + "2.1.0" + ] ], "corrected_agp": [ [ @@ -53,16 +57,20 @@ "test.stats:md5,209e973e4bac1653b8d5fddb7fa13b63" ] ], - "versions": [ - "versions.yml:md5,48710c1720f668d8ba3397f99892959e" + "versions_ragtag": [ + [ + "RAGTAG_SCAFFOLD", + "ragtag", + "2.1.0" + ] ] } ], + "timestamp": "2026-02-16T15:16:15.735574744", "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" - }, - "timestamp": "2025-04-04T13:37:54.181644032" + "nf-test": "0.9.4", + "nextflow": "26.01.1" + } }, "A. thaliana Col-0 test data - ragtag - scaffold - stub": { "content": [ @@ -92,7 +100,11 @@ ] ], "3": [ - "versions.yml:md5,cecbb39907d607affa6522e395b78a1f" + [ + "RAGTAG_SCAFFOLD", + "ragtag", + "2.1.0" + ] ], "corrected_agp": [ [ @@ -118,15 +130,19 @@ "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,cecbb39907d607affa6522e395b78a1f" + "versions_ragtag": [ + [ + "RAGTAG_SCAFFOLD", + "ragtag", + "2.1.0" + ] ] } ], + "timestamp": "2026-02-16T15:16:24.915889235", "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" - }, - "timestamp": "2025-04-04T13:38:21.635495713" + "nf-test": "0.9.4", + "nextflow": "26.01.1" + } } } \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/environment.yml b/modules/nf-core/samtools/faidx/environment.yml new file mode 100644 index 00000000..89e12a64 --- /dev/null +++ b/modules/nf-core/samtools/faidx/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf new file mode 100644 index 00000000..97bfb578 --- /dev/null +++ b/modules/nf-core/samtools/faidx/main.nf @@ -0,0 +1,49 @@ +process SAMTOOLS_FAIDX { + tag "$fasta" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" + + input: + tuple val(meta), path(fasta), path(fai) + val get_sizes + + output: + tuple val(meta), path ("*.{fa,fasta}") , emit: fa, optional: true + tuple val(meta), path ("*.sizes") , emit: sizes, optional: true + tuple val(meta), path ("*.fai") , emit: fai, optional: true + tuple val(meta), path ("*.gzi") , emit: gzi, optional: true + tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), topic: versions, emit: versions_samtools + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def get_sizes_command = get_sizes ? "cut -f 1,2 ${fasta}.fai > ${fasta}.sizes" : '' + """ + samtools \\ + faidx \\ + $fasta \\ + $args + + ${get_sizes_command} + """ + + stub: + def match = (task.ext.args =~ /-o(?:utput)?\s(.*)\s?/).findAll() + def fastacmd = match[0] ? "touch ${match[0][1]}" : '' + def get_sizes_command = get_sizes ? "touch ${fasta}.sizes" : '' + """ + ${fastacmd} + touch ${fasta}.fai + if [[ "${fasta.extension}" == "gz" ]]; then + touch ${fasta}.gzi + fi + + ${get_sizes_command} + """ +} diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml new file mode 100644 index 00000000..80aae1da --- /dev/null +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -0,0 +1,112 @@ +name: samtools_faidx +description: Index FASTA file, and optionally generate a file of chromosome + sizes +keywords: + - index + - fasta + - faidx + - chromosome +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: + - "MIT" + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: FASTA file + pattern: "*.{fa,fasta}" + ontologies: [] + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" + ontologies: [] + - get_sizes: + type: boolean + description: use cut to get the sizes of the index (true) or not (false) +output: + fa: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{fa,fasta}": + type: file + description: FASTA file + pattern: "*.{fa}" + ontologies: [] + sizes: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.sizes": + type: file + description: File containing chromosome lengths + pattern: "*.{sizes}" + ontologies: [] + fai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fai": + type: file + description: FASTA index file + pattern: "*.{fai}" + ontologies: [] + gzi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.gzi": + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + ontologies: [] + versions_samtools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - samtools: + type: string + description: The tool name + - "samtools version | sed '1!d;s/.* //'": + type: eval + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - samtools: + type: string + description: The tool name + - "samtools version | sed '1!d;s/.* //'": + type: eval + description: The command used to generate the version of the tool +authors: + - "@drpatelh" + - "@ewels" + - "@phue" +maintainers: + - "@maxulysse" + - "@phue" diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test b/modules/nf-core/samtools/faidx/tests/main.nf.test new file mode 100644 index 00000000..9a86db86 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test @@ -0,0 +1,253 @@ +nextflow_process { + + name "Test Process SAMTOOLS_FAIDX" + script "../main.nf" + process "SAMTOOLS_FAIDX" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/faidx" + config "./nextflow.config" + + test("test_samtools_faidx") { + + when { + params { + module_args = '' + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + [] + ] + input[1] = false + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match()} + ) + } + } + + test("test_samtools_faidx_bgzip") { + + when { + params { + module_args = '' + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true), + [] + ] + input[1] = false + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match()} + ) + } + } + + test("test_samtools_faidx_fasta") { + + when { + params { + module_args = 'MT192765.1 -o extract.fa' + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + input[1] = false + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match()} + ) + } + } + + test("test_samtools_faidx_stub_fasta") { + + options "-stub" + when { + params { + module_args = '-o extract.fa' + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + input[1] = false + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match()} + ) + } + } + + test("test_samtools_faidx_stub_fai") { + + options "-stub" + when { + params { + module_args = '' + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + [] + ] + input[1] = false + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match()} + ) + } + } + + test("test_samtools_faidx_get_sizes") { + + when { + params { + module_args = '' + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + [] + ] + input[1] = true + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match()} + ) + } + } + + test("test_samtools_faidx_get_sizes_bgzip") { + + when { + params { + module_args = '' + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true), + [] + ] + input[1] = true + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match()} + ) + } + } + + test("test_samtools_faidx_get_sizes - stub") { + + options "-stub" + + when { + params { + module_args = '' + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + [] + ] + input[1] = true + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match()} + ) + } + } + + test("test_samtools_faidx_get_sizes_bgzip - stub") { + + options "-stub" + + when { + params { + module_args = '' + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true), + [] + ] + input[1] = true + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match()} + ) + } + } + +} diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test.snap b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap new file mode 100644 index 00000000..41697444 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap @@ -0,0 +1,352 @@ +{ + "test_samtools_faidx": { + "content": [ + { + "fa": [ + + ], + "fai": [ + [ + { + "id": "test" + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + + ], + "sizes": [ + + ], + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T15:39:12.541649151" + }, + "test_samtools_faidx_get_sizes_bgzip - stub": { + "content": [ + { + "fa": [ + + ], + "fai": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.fai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gzi": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sizes": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T15:41:44.040426987" + }, + "test_samtools_faidx_get_sizes": { + "content": [ + { + "fa": [ + + ], + "fai": [ + [ + { + "id": "test" + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + + ], + "sizes": [ + [ + { + "id": "test" + }, + "genome.fasta.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" + ] + ], + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T15:47:03.653912015" + }, + "test_samtools_faidx_bgzip": { + "content": [ + { + "fa": [ + + ], + "fai": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "sizes": [ + + ], + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T15:50:04.023566795" + }, + "test_samtools_faidx_fasta": { + "content": [ + { + "fa": [ + [ + { + "id": "test" + }, + "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36" + ] + ], + "fai": [ + + ], + "gzi": [ + + ], + "sizes": [ + + ], + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T15:39:23.529404162" + }, + "test_samtools_faidx_get_sizes - stub": { + "content": [ + { + "fa": [ + + ], + "fai": [ + [ + { + "id": "test" + }, + "genome.fasta.fai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gzi": [ + + ], + "sizes": [ + [ + { + "id": "test" + }, + "genome.fasta.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T15:41:39.039834304" + }, + "test_samtools_faidx_stub_fasta": { + "content": [ + { + "fa": [ + [ + { + "id": "test" + }, + "extract.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fai": [ + + ], + "gzi": [ + + ], + "sizes": [ + + ], + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T15:39:28.961701609" + }, + "test_samtools_faidx_stub_fai": { + "content": [ + { + "fa": [ + + ], + "fai": [ + [ + { + "id": "test" + }, + "genome.fasta.fai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gzi": [ + + ], + "sizes": [ + + ], + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T15:39:34.471028474" + }, + "test_samtools_faidx_get_sizes_bgzip": { + "content": [ + { + "fa": [ + + ], + "fai": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "sizes": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" + ] + ], + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T15:39:45.439016495" + } +} \ No newline at end of file diff --git a/modules/nf-core/trimgalore/tests/nextflow.config b/modules/nf-core/samtools/faidx/tests/nextflow.config similarity index 62% rename from modules/nf-core/trimgalore/tests/nextflow.config rename to modules/nf-core/samtools/faidx/tests/nextflow.config index d8e3ac13..202c036e 100644 --- a/modules/nf-core/trimgalore/tests/nextflow.config +++ b/modules/nf-core/samtools/faidx/tests/nextflow.config @@ -1,5 +1,7 @@ process { - withName: TRIMGALORE { + + withName: SAMTOOLS_FAIDX { ext.args = params.module_args } + } diff --git a/modules/nf-core/samtools/fastq/environment.yml b/modules/nf-core/samtools/fastq/environment.yml index 62054fc9..89e12a64 100644 --- a/modules/nf-core/samtools/fastq/environment.yml +++ b/modules/nf-core/samtools/fastq/environment.yml @@ -4,5 +4,7 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::htslib=1.21 - - bioconda::samtools=1.21 + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/fastq/main.nf b/modules/nf-core/samtools/fastq/main.nf index 696d668f..922dbec3 100644 --- a/modules/nf-core/samtools/fastq/main.nf +++ b/modules/nf-core/samtools/fastq/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_FASTQ { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : - 'biocontainers/samtools:1.21--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" input: tuple val(meta), path(input) @@ -16,7 +16,7 @@ process SAMTOOLS_FASTQ { tuple val(meta), path("*_interleaved.fastq") , optional:true, emit: interleaved tuple val(meta), path("*_singleton.fastq.gz") , optional:true, emit: singleton tuple val(meta), path("*_other.fastq.gz") , optional:true, emit: other - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), emit: versions_samtools, topic: versions when: task.ext.when == null || task.ext.when @@ -28,6 +28,7 @@ process SAMTOOLS_FASTQ { meta.single_end ? "-1 ${prefix}_1.fastq.gz -s ${prefix}_singleton.fastq.gz" : "-1 ${prefix}_1.fastq.gz -2 ${prefix}_2.fastq.gz -s ${prefix}_singleton.fastq.gz" """ + # Note: --threads value represents *additional* CPUs to allocate (total CPUs = 1 + --threads). samtools \\ fastq \\ $args \\ @@ -35,11 +36,6 @@ process SAMTOOLS_FASTQ { -0 ${prefix}_other.fastq.gz \\ $input \\ $output - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ stub: @@ -50,10 +46,5 @@ process SAMTOOLS_FASTQ { """ ${output} echo | gzip > ${prefix}_other.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ } diff --git a/modules/nf-core/samtools/fastq/meta.yml b/modules/nf-core/samtools/fastq/meta.yml index c15a0b6f..cab17ffa 100644 --- a/modules/nf-core/samtools/fastq/meta.yml +++ b/modules/nf-core/samtools/fastq/meta.yml @@ -14,7 +14,8 @@ tools: homepage: http://www.htslib.org/ documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] + licence: + - "MIT" identifier: biotools:samtools input: - - meta: @@ -26,34 +27,38 @@ input: type: file description: BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" - - - interleave: - type: boolean - description: Set true for interleaved fastq file + ontologies: [] + - interleave: + type: boolean + description: Set true for interleaved fastq file output: - - fastq: - - meta: + fastq: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - "*_{1,2}.fastq.gz": type: file - description: Compressed FASTQ file(s) with reads with either the READ1 or READ2 - flag set in separate files. + description: Compressed FASTQ file(s) with reads with either the READ1 + or READ2 flag set in separate files. pattern: "*_{1,2}.fastq.gz" - - interleaved: - - meta: + ontologies: [] + interleaved: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - "*_interleaved.fastq": type: file - description: Compressed FASTQ file with reads with either the READ1 or READ2 - flag set in a combined file. Needs collated input file. + description: Compressed FASTQ file with reads with either the READ1 or + READ2 flag set in a combined file. Needs collated input file. pattern: "*_interleaved.fastq.gz" - - singleton: - - meta: + ontologies: + - edam: http://edamontology.org/format_3989 + singleton: + - - meta: type: map description: | Groovy Map containing sample information @@ -62,22 +67,42 @@ output: type: file description: Compressed FASTQ file with singleton reads pattern: "*_singleton.fastq.gz" - - other: - - meta: + ontologies: + - edam: http://edamontology.org/format_3989 + other: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - "*_other.fastq.gz": type: file - description: Compressed FASTQ file with reads with either both READ1 and READ2 - flags set or unset + description: Compressed FASTQ file with reads with either both READ1 and + READ2 flags set or unset pattern: "*_other.fastq.gz" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3989 + versions_samtools: + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - samtools version | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - samtools version | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool authors: - "@priyanka-surana" - "@suzannejin" diff --git a/modules/nf-core/samtools/fastq/tests/main.nf.test b/modules/nf-core/samtools/fastq/tests/main.nf.test index 971ea1d4..46ec7e7c 100644 --- a/modules/nf-core/samtools/fastq/tests/main.nf.test +++ b/modules/nf-core/samtools/fastq/tests/main.nf.test @@ -32,7 +32,7 @@ nextflow_process { { assert snapshot(process.out.interleaved).match("bam_interleaved") }, { assert snapshot(file(process.out.singleton[0][1]).name).match("bam_singleton") }, { assert snapshot(file(process.out.other[0][1]).name).match("bam_other") }, - { assert snapshot(process.out.versions).match("bam_versions") } + { assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match("bam_versions") } ) } } @@ -60,7 +60,7 @@ nextflow_process { { assert snapshot(path(process.out.interleaved[0][1]).readLines()[0..6]).match("bam_interlinterleave_eaved") }, { assert snapshot(process.out.singleton).match("bam_singinterleave_leton") }, { assert snapshot(file(process.out.other[0][1]).name).match("bam_interleave_other") }, - { assert snapshot(process.out.versions).match("bam_verinterleave_sions") } + { assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match("bam_verinterleave_sions") } ) } } @@ -86,7 +86,13 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.fastq, + process.out.interleaved, + process.out.singleton, + process.out.other, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } ) } } @@ -112,7 +118,13 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.fastq, + process.out.interleaved, + process.out.singleton, + process.out.other, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } ) } } diff --git a/modules/nf-core/samtools/fastq/tests/main.nf.test.snap b/modules/nf-core/samtools/fastq/tests/main.nf.test.snap index ff63f9ae..17b5ade9 100644 --- a/modules/nf-core/samtools/fastq/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/fastq/tests/main.nf.test.snap @@ -29,86 +29,54 @@ }, "bam - stub": { "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - [ - "test_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "test_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ] - ], - "1": [ - - ], - "2": [ - [ - { - "id": "test", - "single_end": false - }, - "test_singleton.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "3": [ - [ - { - "id": "test", - "single_end": false - }, - "test_other.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "4": [ - "versions.yml:md5,11e074d69900de5a7dfdbe1fb4e789fd" - ], - "fastq": [ - [ - { - "id": "test", - "single_end": false - }, - [ - "test_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "test_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ] - ], - "interleaved": [ - - ], - "other": [ + [ + [ + { + "id": "test", + "single_end": false + }, [ - { - "id": "test", - "single_end": false - }, - "test_other.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + "test_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] - ], - "singleton": [ + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_singleton.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_other.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + { + "versions_samtools": [ [ - { - "id": "test", - "single_end": false - }, - "test_singleton.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + "SAMTOOLS_FASTQ", + "samtools", + "1.22.1" ] - ], - "versions": [ - "versions.yml:md5,11e074d69900de5a7dfdbe1fb4e789fd" ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-03-05T12:50:58.986886415" + "timestamp": "2026-02-02T17:05:55.514934" }, "bam_fastq": { "content": [ @@ -175,27 +143,39 @@ }, "bam_versions": { "content": [ - [ - "versions.yml:md5,11e074d69900de5a7dfdbe1fb4e789fd" - ] + { + "versions_samtools": [ + [ + "SAMTOOLS_FASTQ", + "samtools", + "1.22.1" + ] + ] + } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-09-16T08:00:41.44921616" + "timestamp": "2026-02-02T17:05:40.578464" }, "bam_verinterleave_sions": { "content": [ - [ - "versions.yml:md5,11e074d69900de5a7dfdbe1fb4e789fd" - ] + { + "versions_samtools": [ + [ + "SAMTOOLS_FASTQ", + "samtools", + "1.22.1" + ] + ] + } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-09-16T08:00:56.47781168" + "timestamp": "2026-02-02T17:05:47.62481" }, "bam_singleton": { "content": [ @@ -221,67 +201,44 @@ }, "bam_interleave - stub": { "content": [ + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_interleaved.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_other.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], { - "0": [ - - ], - "1": [ + "versions_samtools": [ [ - { - "id": "test", - "single_end": false - }, - "test_interleaved.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + "SAMTOOLS_FASTQ", + "samtools", + "1.22.1" ] - ], - "2": [ - - ], - "3": [ - [ - { - "id": "test", - "single_end": false - }, - "test_other.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "4": [ - "versions.yml:md5,11e074d69900de5a7dfdbe1fb4e789fd" - ], - "fastq": [ - - ], - "interleaved": [ - [ - { - "id": "test", - "single_end": false - }, - "test_interleaved.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "other": [ - [ - { - "id": "test", - "single_end": false - }, - "test_other.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "singleton": [ - - ], - "versions": [ - "versions.yml:md5,11e074d69900de5a7dfdbe1fb4e789fd" ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-03-05T12:51:10.155471004" + "timestamp": "2026-02-02T17:06:03.676263" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/flagstat/environment.yml b/modules/nf-core/samtools/flagstat/environment.yml index 62054fc9..89e12a64 100644 --- a/modules/nf-core/samtools/flagstat/environment.yml +++ b/modules/nf-core/samtools/flagstat/environment.yml @@ -4,5 +4,7 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::htslib=1.21 - - bioconda::samtools=1.21 + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf index c23f3a5c..0cfb7e87 100644 --- a/modules/nf-core/samtools/flagstat/main.nf +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -4,15 +4,15 @@ process SAMTOOLS_FLAGSTAT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : - 'biocontainers/samtools:1.21--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" input: tuple val(meta), path(bam), path(bai) output: tuple val(meta), path("*.flagstat"), emit: flagstat - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), emit: versions_samtools, topic: versions when: task.ext.when == null || task.ext.when @@ -25,21 +25,23 @@ process SAMTOOLS_FLAGSTAT { --threads ${task.cpus} \\ $bam \\ > ${prefix}.flagstat - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.flagstat - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS + cat <<-END_FLAGSTAT > ${prefix}.flagstat + 1000000 + 0 in total (QC-passed reads + QC-failed reads) + 0 + 0 secondary + 0 + 0 supplementary + 0 + 0 duplicates + 900000 + 0 mapped (90.00% : N/A) + 1000000 + 0 paired in sequencing + 500000 + 0 read1 + 500000 + 0 read2 + 800000 + 0 properly paired (80.00% : N/A) + 850000 + 0 with mate mapped to a different chr + 50000 + 0 with mate mapped to a different chr (mapQ>=5) + END_FLAGSTAT """ } diff --git a/modules/nf-core/samtools/flagstat/meta.yml b/modules/nf-core/samtools/flagstat/meta.yml index cdc4c254..8caa1bcc 100644 --- a/modules/nf-core/samtools/flagstat/meta.yml +++ b/modules/nf-core/samtools/flagstat/meta.yml @@ -1,6 +1,6 @@ name: samtools_flagstat -description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG - type +description: Counts the number of alignments in a BAM/CRAM/SAM file for each + FLAG type keywords: - stats - mapping @@ -17,7 +17,8 @@ tools: homepage: http://www.htslib.org/ documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] + licence: + - "MIT" identifier: biotools:samtools input: - - meta: @@ -29,13 +30,15 @@ input: type: file description: BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" + ontologies: [] - bai: type: file description: Index for BAM/CRAM/SAM file pattern: "*.{bai,crai,sai}" + ontologies: [] output: - - flagstat: - - meta: + flagstat: + - - meta: type: map description: | Groovy Map containing sample information @@ -44,11 +47,28 @@ output: type: file description: File containing samtools flagstat output pattern: "*.{flagstat}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions_samtools: + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - samtools version | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - samtools version | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool authors: - "@drpatelh" maintainers: diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap index 04c3852b..f5c882da 100644 --- a/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap @@ -8,11 +8,15 @@ "id": "test", "single_end": false }, - "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" ] ], "1": [ - "versions.yml:md5,108a155f2d4a99f50bf3176904208d27" + [ + "SAMTOOLS_FLAGSTAT", + "samtools", + "1.22.1" + ] ], "flagstat": [ [ @@ -20,19 +24,23 @@ "id": "test", "single_end": false }, - "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" ] ], - "versions": [ - "versions.yml:md5,108a155f2d4a99f50bf3176904208d27" + "versions_samtools": [ + [ + "SAMTOOLS_FLAGSTAT", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-09-16T08:02:58.866491759" + "timestamp": "2026-02-03T11:14:30.820969684" }, "BAM": { "content": [ @@ -47,7 +55,11 @@ ] ], "1": [ - "versions.yml:md5,108a155f2d4a99f50bf3176904208d27" + [ + "SAMTOOLS_FLAGSTAT", + "samtools", + "1.22.1" + ] ], "flagstat": [ [ @@ -58,15 +70,19 @@ "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" ] ], - "versions": [ - "versions.yml:md5,108a155f2d4a99f50bf3176904208d27" + "versions_samtools": [ + [ + "SAMTOOLS_FLAGSTAT", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-09-16T08:02:47.383332837" + "timestamp": "2026-02-03T11:14:25.581619424" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/idxstats/environment.yml b/modules/nf-core/samtools/idxstats/environment.yml index 62054fc9..89e12a64 100644 --- a/modules/nf-core/samtools/idxstats/environment.yml +++ b/modules/nf-core/samtools/idxstats/environment.yml @@ -4,5 +4,7 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::htslib=1.21 - - bioconda::samtools=1.21 + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/idxstats/main.nf b/modules/nf-core/samtools/idxstats/main.nf index e2bb6b20..d5b70a7f 100644 --- a/modules/nf-core/samtools/idxstats/main.nf +++ b/modules/nf-core/samtools/idxstats/main.nf @@ -4,15 +4,15 @@ process SAMTOOLS_IDXSTATS { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : - 'biocontainers/samtools:1.21--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" input: tuple val(meta), path(bam), path(bai) output: tuple val(meta), path("*.idxstats"), emit: idxstats - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), emit: versions_samtools, topic: versions when: task.ext.when == null || task.ext.when @@ -21,16 +21,12 @@ process SAMTOOLS_IDXSTATS { def prefix = task.ext.prefix ?: "${meta.id}" """ + # Note: --threads value represents *additional* CPUs to allocate (total CPUs = 1 + --threads). samtools \\ idxstats \\ --threads ${task.cpus-1} \\ $bam \\ > ${prefix}.idxstats - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ stub: @@ -38,10 +34,5 @@ process SAMTOOLS_IDXSTATS { """ touch ${prefix}.idxstats - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ } diff --git a/modules/nf-core/samtools/idxstats/meta.yml b/modules/nf-core/samtools/idxstats/meta.yml index f0a6bcb2..fd153841 100644 --- a/modules/nf-core/samtools/idxstats/meta.yml +++ b/modules/nf-core/samtools/idxstats/meta.yml @@ -17,7 +17,8 @@ tools: homepage: http://www.htslib.org/ documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] + licence: + - "MIT" identifier: biotools:samtools input: - - meta: @@ -29,13 +30,15 @@ input: type: file description: BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" + ontologies: [] - bai: type: file description: Index for BAM/CRAM/SAM file pattern: "*.{bai,crai,sai}" + ontologies: [] output: - - idxstats: - - meta: + idxstats: + - - meta: type: map description: | Groovy Map containing sample information @@ -44,11 +47,28 @@ output: type: file description: File containing samtools idxstats output pattern: "*.{idxstats}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions_samtools: + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - samtools version | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - samtools version | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool authors: - "@drpatelh" maintainers: diff --git a/modules/nf-core/samtools/idxstats/tests/main.nf.test b/modules/nf-core/samtools/idxstats/tests/main.nf.test index 5fd1fc78..c990cd55 100644 --- a/modules/nf-core/samtools/idxstats/tests/main.nf.test +++ b/modules/nf-core/samtools/idxstats/tests/main.nf.test @@ -25,7 +25,10 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.idxstats, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } ) } } @@ -47,7 +50,10 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.idxstats, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } ) } }} diff --git a/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap b/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap index 2cc89a3b..19a54c7c 100644 --- a/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap @@ -1,72 +1,56 @@ { "bam - stub": { "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - "versions.yml:md5,c8d7394830c3c1e5be150589571534fb" - ], - "idxstats": [ + "versions_samtools": [ [ - { - "id": "test", - "single_end": false - }, - "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + "SAMTOOLS_IDXSTATS", + "samtools", + "1.22.1" ] - ], - "versions": [ - "versions.yml:md5,c8d7394830c3c1e5be150589571534fb" ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-09-16T08:11:56.466856235" + "timestamp": "2026-02-02T16:21:46.333090477" }, "bam": { "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ], { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" - ] - ], - "1": [ - "versions.yml:md5,c8d7394830c3c1e5be150589571534fb" - ], - "idxstats": [ + "versions_samtools": [ [ - { - "id": "test", - "single_end": false - }, - "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + "SAMTOOLS_IDXSTATS", + "samtools", + "1.22.1" ] - ], - "versions": [ - "versions.yml:md5,c8d7394830c3c1e5be150589571534fb" ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-09-16T08:11:46.311550359" + "timestamp": "2026-02-02T16:21:41.063422521" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml index 62054fc9..89e12a64 100644 --- a/modules/nf-core/samtools/index/environment.yml +++ b/modules/nf-core/samtools/index/environment.yml @@ -4,5 +4,7 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::htslib=1.21 - - bioconda::samtools=1.21 + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf index 31175610..e2a0e56d 100644 --- a/modules/nf-core/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_INDEX { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : - 'biocontainers/samtools:1.21--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" input: tuple val(meta), path(input) @@ -14,7 +14,7 @@ process SAMTOOLS_INDEX { tuple val(meta), path("*.bai") , optional:true, emit: bai tuple val(meta), path("*.csi") , optional:true, emit: csi tuple val(meta), path("*.crai"), optional:true, emit: crai - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), emit: versions_samtools, topic: versions when: task.ext.when == null || task.ext.when @@ -24,14 +24,9 @@ process SAMTOOLS_INDEX { """ samtools \\ index \\ - -@ ${task.cpus-1} \\ + -@ ${task.cpus} \\ $args \\ $input - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ stub: @@ -40,10 +35,5 @@ process SAMTOOLS_INDEX { "crai" : args.contains("-c") ? "csi" : "bai" """ touch ${input}.${extension} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ } diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml index db8df0d5..c6d4ce25 100644 --- a/modules/nf-core/samtools/index/meta.yml +++ b/modules/nf-core/samtools/index/meta.yml @@ -14,7 +14,8 @@ tools: homepage: http://www.htslib.org/ documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] + licence: + - "MIT" identifier: biotools:samtools input: - - meta: @@ -25,9 +26,10 @@ input: - input: type: file description: input file + ontologies: [] output: - - bai: - - meta: + bai: + - - meta: type: map description: | Groovy Map containing sample information @@ -36,8 +38,9 @@ output: type: file description: BAM/CRAM/SAM index file pattern: "*.{bai,crai,sai}" - - csi: - - meta: + ontologies: [] + csi: + - - meta: type: map description: | Groovy Map containing sample information @@ -46,8 +49,9 @@ output: type: file description: CSI index file pattern: "*.{csi}" - - crai: - - meta: + ontologies: [] + crai: + - - meta: type: map description: | Groovy Map containing sample information @@ -56,11 +60,28 @@ output: type: file description: BAM/CRAM/SAM index file pattern: "*.{bai,crai,sai}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions_samtools: + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - samtools version | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - samtools version | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool authors: - "@drpatelh" - "@ewels" diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test index ca34fb5c..c96cec86 100644 --- a/modules/nf-core/samtools/index/tests/main.nf.test +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -23,7 +23,10 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.bai, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } ) } } @@ -43,7 +46,10 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.crai, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } ) } } @@ -67,7 +73,7 @@ nextflow_process { { assert process.success }, { assert snapshot( file(process.out.csi[0][1]).name, - process.out.versions + process.out.findAll { key, val -> key.startsWith('versions') } ).match() } ) } @@ -89,7 +95,10 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.bai, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } ) } } @@ -110,7 +119,10 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.crai, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } ) } } @@ -133,7 +145,10 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.csi, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } ) } } diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap index 72d65e81..afc8a1ff 100644 --- a/modules/nf-core/samtools/index/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -1,250 +1,156 @@ { "csi - stub": { "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], { - "0": [ - - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - - ], - "3": [ - "versions.yml:md5,5e09a6fdf76de396728f877193d72315" - ], - "bai": [ - - ], - "crai": [ - - ], - "csi": [ + "versions_samtools": [ [ - { - "id": "test", - "single_end": false - }, - "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + "SAMTOOLS_INDEX", + "samtools", + "1.22.1" ] - ], - "versions": [ - "versions.yml:md5,5e09a6fdf76de396728f877193d72315" ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-09-16T08:21:25.261127166" + "timestamp": "2026-01-28T17:52:10.030187" }, "crai - stub": { "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], { - "0": [ - - ], - "1": [ - - ], - "2": [ + "versions_samtools": [ [ - { - "id": "test", - "single_end": false - }, - "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + "SAMTOOLS_INDEX", + "samtools", + "1.22.1" ] - ], - "3": [ - "versions.yml:md5,5e09a6fdf76de396728f877193d72315" - ], - "bai": [ - - ], - "crai": [ - [ - { - "id": "test", - "single_end": false - }, - "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "csi": [ - - ], - "versions": [ - "versions.yml:md5,5e09a6fdf76de396728f877193d72315" ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-09-16T08:21:12.653194876" + "timestamp": "2026-01-28T17:51:59.125484" }, "bai - stub": { "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], { - "0": [ + "versions_samtools": [ [ - { - "id": "test", - "single_end": false - }, - "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + "SAMTOOLS_INDEX", + "samtools", + "1.22.1" ] - ], - "1": [ - - ], - "2": [ - - ], - "3": [ - "versions.yml:md5,5e09a6fdf76de396728f877193d72315" - ], - "bai": [ - [ - { - "id": "test", - "single_end": false - }, - "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "crai": [ - - ], - "csi": [ - - ], - "versions": [ - "versions.yml:md5,5e09a6fdf76de396728f877193d72315" ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-09-16T08:21:01.854932651" + "timestamp": "2026-01-28T17:51:47.277042" }, "csi": { "content": [ "test.paired_end.sorted.bam.csi", - [ - "versions.yml:md5,5e09a6fdf76de396728f877193d72315" - ] + { + "versions_samtools": [ + [ + "SAMTOOLS_INDEX", + "samtools", + "1.22.1" + ] + ] + } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-09-16T08:20:51.485364222" + "timestamp": "2026-01-28T17:51:35.758735" }, "crai": { "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], { - "0": [ - - ], - "1": [ - - ], - "2": [ + "versions_samtools": [ [ - { - "id": "test", - "single_end": false - }, - "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + "SAMTOOLS_INDEX", + "samtools", + "1.22.1" ] - ], - "3": [ - "versions.yml:md5,5e09a6fdf76de396728f877193d72315" - ], - "bai": [ - - ], - "crai": [ - [ - { - "id": "test", - "single_end": false - }, - "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" - ] - ], - "csi": [ - - ], - "versions": [ - "versions.yml:md5,5e09a6fdf76de396728f877193d72315" ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-09-16T08:20:40.518873972" + "timestamp": "2026-01-28T17:51:26.561965" }, "bai": { "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" - ] - ], - "1": [ - - ], - "2": [ - - ], - "3": [ - "versions.yml:md5,5e09a6fdf76de396728f877193d72315" - ], - "bai": [ + "versions_samtools": [ [ - { - "id": "test", - "single_end": false - }, - "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + "SAMTOOLS_INDEX", + "samtools", + "1.22.1" ] - ], - "crai": [ - - ], - "csi": [ - - ], - "versions": [ - "versions.yml:md5,5e09a6fdf76de396728f877193d72315" ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-09-16T08:20:21.184050361" + "timestamp": "2026-01-28T17:51:15.299035" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/sort/environment.yml b/modules/nf-core/samtools/sort/environment.yml index 62054fc9..89e12a64 100644 --- a/modules/nf-core/samtools/sort/environment.yml +++ b/modules/nf-core/samtools/sort/environment.yml @@ -4,5 +4,7 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::htslib=1.21 - - bioconda::samtools=1.21 + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf index caf3c61a..6b5aa31d 100644 --- a/modules/nf-core/samtools/sort/main.nf +++ b/modules/nf-core/samtools/sort/main.nf @@ -4,30 +4,41 @@ process SAMTOOLS_SORT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : - 'biocontainers/samtools:1.21--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" input: tuple val(meta) , path(bam) tuple val(meta2), path(fasta) + val index_format output: - tuple val(meta), path("*.bam"), emit: bam, optional: true - tuple val(meta), path("*.cram"), emit: cram, optional: true - tuple val(meta), path("*.crai"), emit: crai, optional: true - tuple val(meta), path("*.csi"), emit: csi, optional: true - path "versions.yml", emit: versions + tuple val(meta), path("${prefix}.bam"), emit: bam, optional: true + tuple val(meta), path("${prefix}.cram"), emit: cram, optional: true + tuple val(meta), path("${prefix}.sam"), emit: sam, optional: true + tuple val(meta), path("${prefix}.${extension}.crai"), emit: crai, optional: true + tuple val(meta), path("${prefix}.${extension}.csi"), emit: csi, optional: true + tuple val(meta), path("${prefix}.${extension}.bai"), emit: bai, optional: true + tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), topic: versions, emit: versions_samtools when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def extension = args.contains("--output-fmt sam") ? "sam" : - args.contains("--output-fmt cram") ? "cram" : - "bam" + prefix = task.ext.prefix ?: "${meta.id}" + extension = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt cram") ? "cram" : + "bam" def reference = fasta ? "--reference ${fasta}" : "" + output_file = index_format ? "${prefix}.${extension}##idx##${prefix}.${extension}.${index_format} --write-index" : "${prefix}.${extension}" + if (index_format) { + if (!index_format.matches('bai|csi|crai')) { + error "Index format not one of bai, csi, crai." + } else if (extension == "sam") { + error "Indexing not compatible with SAM output" + } + } if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ @@ -39,34 +50,29 @@ process SAMTOOLS_SORT { -T ${prefix} \\ --threads $task.cpus \\ ${reference} \\ - -o ${prefix}.${extension} \\ + -o ${output_file} \\ - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ stub: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def extension = args.contains("--output-fmt sam") ? "sam" : - args.contains("--output-fmt cram") ? "cram" : - "bam" + prefix = task.ext.prefix ?: "${meta.id}" + extension = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt cram") ? "cram" : + "bam" + if (index_format) { + if (!index_format.matches('bai|csi|crai')) { + error "Index format not one of bai, csi, crai." + } else if (extension == "sam") { + error "Indexing not compatible with SAM output" + } + } + index = index_format ? "touch ${prefix}.${extension}.${index_format}" : "" + """ touch ${prefix}.${extension} - if [ "${extension}" == "bam" ]; - then - touch ${prefix}.${extension}.csi - elif [ "${extension}" == "cram" ]; - then - touch ${prefix}.${extension}.crai - fi + ${index} - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ } diff --git a/modules/nf-core/samtools/sort/meta.yml b/modules/nf-core/samtools/sort/meta.yml index a9dbec5a..69968304 100644 --- a/modules/nf-core/samtools/sort/meta.yml +++ b/modules/nf-core/samtools/sort/meta.yml @@ -26,6 +26,7 @@ input: type: file description: BAM/CRAM/SAM file(s) pattern: "*.{bam,cram,sam}" + ontologies: [] - - meta2: type: map description: | @@ -36,52 +37,101 @@ input: description: Reference genome FASTA file pattern: "*.{fa,fasta,fna}" optional: true + ontologies: [] + - index_format: + type: string + description: Index format to use (optional) + pattern: "bai|csi|crai" output: - - bam: - - meta: + bam: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.bam": + - "${prefix}.bam": type: file description: Sorted BAM file pattern: "*.{bam}" - - cram: - - meta: + ontologies: [] + cram: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.cram": + - "${prefix}.cram": type: file description: Sorted CRAM file pattern: "*.{cram}" - - crai: - - meta: + ontologies: [] + sam: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.crai": + - "${prefix}.sam": + type: file + description: Sorted SAM file + pattern: "*.{sam}" + ontologies: [] + crai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.${extension}.crai": type: file description: CRAM index file (optional) pattern: "*.crai" - - csi: - - meta: + ontologies: [] + csi: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.csi": + - "${prefix}.${extension}.csi": type: file description: BAM index file (optional) pattern: "*.csi" - - versions: - - versions.yml: + ontologies: [] + bai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.${extension}.bai": type: file - description: File containing software versions - pattern: "versions.yml" + description: BAM index file (optional) + pattern: "*.bai" + ontologies: [] + versions_samtools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - samtools: + type: string + description: The tool name + - "samtools version | sed '1!d;s/.* //'": + type: string + description: The command used to generate the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - samtools: + type: string + description: The tool name + - "samtools version | sed '1!d;s/.* //'": + type: string + description: The command used to generate the version of the tool + authors: - "@drpatelh" - "@ewels" diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test b/modules/nf-core/samtools/sort/tests/main.nf.test index b05e6691..df47bb25 100644 --- a/modules/nf-core/samtools/sort/tests/main.nf.test +++ b/modules/nf-core/samtools/sort/tests/main.nf.test @@ -8,7 +8,7 @@ nextflow_process { tag "samtools" tag "samtools/sort" - test("bam") { + test("bam_no_index") { config "./nextflow.config" @@ -23,6 +23,7 @@ nextflow_process { [ id:'fasta' ], // meta map file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ]) + input[2] = '' """ } } @@ -32,8 +33,72 @@ nextflow_process { { assert process.success }, { assert snapshot( process.out.bam, - process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, - process.out.versions + process.out.bai, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match()} + ) + } + } + + test("bam_bai_index") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = 'bai' + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.bai, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match()} + ) + } + } + + test("bam_csi_index") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = 'csi' + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.csi, + process.out.findAll { key, val -> key.startsWith("versions") } ).match()} ) } @@ -57,6 +122,77 @@ nextflow_process { [ id:'fasta' ], // meta map file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]) + input[2] = '' + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match()} + ) + } + } + + test("multiple bam bai index") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = 'bai' + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.bai.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match()} + ) + } + } + + test("multiple bam csi index") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = 'csi' """ } } @@ -67,7 +203,7 @@ nextflow_process { { assert snapshot( process.out.bam, process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions") } ).match()} ) } @@ -88,6 +224,7 @@ nextflow_process { [ id:'fasta' ], // meta map file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]) + input[2] = '' """ } } @@ -98,7 +235,7 @@ nextflow_process { { assert snapshot( process.out.cram.collect { it.collect { it instanceof Map ? it : file(it).name } }, process.out.crai.collect { it.collect { it instanceof Map ? it : file(it).name } }, - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions") } ).match()} ) } @@ -120,6 +257,7 @@ nextflow_process { [ id:'fasta' ], // meta map file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ]) + input[2] = '' """ } } @@ -127,7 +265,7 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions") }).match() } ) } } @@ -150,6 +288,7 @@ nextflow_process { [ id:'fasta' ], // meta map file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]) + input[2] = '' """ } } @@ -157,7 +296,7 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions") }).match() } ) } } @@ -178,6 +317,7 @@ nextflow_process { [ id:'fasta' ], // meta map file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]) + input[2] = '' """ } } @@ -185,7 +325,7 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions") }).match() } ) } } diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test.snap b/modules/nf-core/samtools/sort/tests/main.nf.test.snap index 469891fe..4e618fa3 100644 --- a/modules/nf-core/samtools/sort/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/sort/tests/main.nf.test.snap @@ -19,147 +19,77 @@ "test.sorted.cram.crai" ] ], - [ - "versions.yml:md5,2659b187d681241451539d4c53500b9f" - ] + { + "versions_samtools": [ + [ + "SAMTOOLS_SORT", + "samtools", + "1.22.1" + ] + ] + } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-09-16T08:49:58.207549273" + "timestamp": "2025-10-29T12:47:01.171084" }, - "bam - stub": { + "bam_csi_index": { "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,72ca1dff5344a5e5e6b892fe5f6b134d" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,01394e702c729cb478df914ffaf9f7f8" + ] + ], { - "0": [ + "versions_samtools": [ [ - { - "id": "test", - "single_end": false - }, - "test.sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + "SAMTOOLS_SORT", + "samtools", + "1.22.1" ] - ], - "1": [ - - ], - "2": [ - - ], - "3": [ - [ - { - "id": "test", - "single_end": false - }, - "test.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "4": [ - "versions.yml:md5,2659b187d681241451539d4c53500b9f" - ], - "bam": [ - [ - { - "id": "test", - "single_end": false - }, - "test.sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "crai": [ - - ], - "cram": [ - - ], - "csi": [ - [ - { - "id": "test", - "single_end": false - }, - "test.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,2659b187d681241451539d4c53500b9f" ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-09-16T08:50:08.630951018" + "timestamp": "2025-10-29T12:46:00.961675" }, - "cram - stub": { + "bam - stub": { "content": [ { - "0": [ - - ], - "1": [ + "versions_samtools": [ [ - { - "id": "test", - "single_end": false - }, - "test.sorted.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + "SAMTOOLS_SORT", + "samtools", + "1.22.1" ] - ], - "2": [ - [ - { - "id": "test", - "single_end": false - }, - "test.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "3": [ - - ], - "4": [ - "versions.yml:md5,2659b187d681241451539d4c53500b9f" - ], - "bam": [ - - ], - "crai": [ - [ - { - "id": "test", - "single_end": false - }, - "test.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "cram": [ - [ - { - "id": "test", - "single_end": false - }, - "test.sorted.cram:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "csi": [ - - ], - "versions": [ - "versions.yml:md5,2659b187d681241451539d4c53500b9f" ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-09-16T08:50:19.061912443" + "timestamp": "2025-10-29T12:47:12.154354" }, - "multiple bam": { + "multiple bam bai index": { "content": [ [ [ @@ -167,7 +97,7 @@ "id": "test", "single_end": false }, - "test.sorted.bam:md5,8a16ba90c7d294cbb4c33ac0f7127a12" + "test.sorted.bam:md5,3ffa2affc29f0aa6e7b36dded84625fe" ] ], [ @@ -176,85 +106,122 @@ "id": "test", "single_end": false }, - "test.sorted.bam.csi" + "test.sorted.bam.bai" ] ], - [ - "versions.yml:md5,2659b187d681241451539d4c53500b9f" - ] + { + "versions_samtools": [ + [ + "SAMTOOLS_SORT", + "samtools", + "1.22.1" + ] + ] + } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.09.0" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-10-08T11:59:55.479443" + "timestamp": "2025-10-29T12:46:25.488622" }, - "multiple bam - stub": { + "cram - stub": { "content": [ { - "0": [ + "versions_samtools": [ [ - { - "id": "test", - "single_end": false - }, - "test.sorted.bam:md5,8a16ba90c7d294cbb4c33ac0f7127a12" + "SAMTOOLS_SORT", + "samtools", + "1.22.1" ] - ], - "1": [ - - ], - "2": [ - - ], - "3": [ + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-29T12:47:28.485045" + }, + "multiple bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,cd4eb0077f25e9cff395366b8883dd1f" + ] + ], + [ + + ], + { + "versions_samtools": [ [ - { - "id": "test", - "single_end": false - }, - "test.sorted.bam.csi:md5,d185916eaff9afeb4d0aeab3310371f9" + "SAMTOOLS_SORT", + "samtools", + "1.22.1" ] - ], - "4": [ - "versions.yml:md5,2659b187d681241451539d4c53500b9f" - ], - "bam": [ + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-29T12:46:13.168476" + }, + "multiple bam - stub": { + "content": [ + { + "versions_samtools": [ [ - { - "id": "test", - "single_end": false - }, - "test.sorted.bam:md5,8a16ba90c7d294cbb4c33ac0f7127a12" + "SAMTOOLS_SORT", + "samtools", + "1.22.1" ] - ], - "crai": [ - - ], - "cram": [ - - ], - "csi": [ + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-29T12:47:21.628088" + }, + "bam_no_index": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,26b27d1f9bcb61c25da21b562349784e" + ] + ], + [ + + ], + { + "versions_samtools": [ [ - { - "id": "test", - "single_end": false - }, - "test.sorted.bam.csi:md5,d185916eaff9afeb4d0aeab3310371f9" + "SAMTOOLS_SORT", + "samtools", + "1.22.1" ] - ], - "versions": [ - "versions.yml:md5,2659b187d681241451539d4c53500b9f" ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.09.0" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-10-08T11:36:13.781404" + "timestamp": "2025-10-29T12:45:47.139418" }, - "bam": { + "multiple bam csi index": { "content": [ [ [ @@ -262,7 +229,7 @@ "id": "test", "single_end": false }, - "test.sorted.bam:md5,34aa85e86abefe637f7a4a9887f016fc" + "test.sorted.bam:md5,295503ba5342531a3310c33ad0efbc22" ] ], [ @@ -274,14 +241,56 @@ "test.sorted.bam.csi" ] ], + { + "versions_samtools": [ + [ + "SAMTOOLS_SORT", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-29T12:46:51.5531" + }, + "bam_bai_index": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,cae7564cb83bb4a5911205bf94124b54" + ] + ], [ - "versions.yml:md5,2659b187d681241451539d4c53500b9f" - ] + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.bai:md5,50dd467c169545a4d5d1f709f7e986e0" + ] + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_SORT", + "samtools", + "1.22.1" + ] + ] + } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.09.0" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-10-08T11:59:46.372244" + "timestamp": "2025-10-29T12:45:52.796936" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/sort/tests/nextflow.config b/modules/nf-core/samtools/sort/tests/nextflow.config index f642771f..723f62b2 100644 --- a/modules/nf-core/samtools/sort/tests/nextflow.config +++ b/modules/nf-core/samtools/sort/tests/nextflow.config @@ -2,7 +2,6 @@ process { withName: SAMTOOLS_SORT { ext.prefix = { "${meta.id}.sorted" } - ext.args = "--write-index" } } diff --git a/modules/nf-core/samtools/stats/environment.yml b/modules/nf-core/samtools/stats/environment.yml index 62054fc9..89e12a64 100644 --- a/modules/nf-core/samtools/stats/environment.yml +++ b/modules/nf-core/samtools/stats/environment.yml @@ -4,5 +4,7 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::htslib=1.21 - - bioconda::samtools=1.21 + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf index 4443948b..57d24680 100644 --- a/modules/nf-core/samtools/stats/main.nf +++ b/modules/nf-core/samtools/stats/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_STATS { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : - 'biocontainers/samtools:1.21--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" input: tuple val(meta), path(input), path(input_index) @@ -13,36 +13,28 @@ process SAMTOOLS_STATS { output: tuple val(meta), path("*.stats"), emit: stats - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('samtools'), eval('samtools version | sed "1!d;s/.* //"'), emit: versions_samtools, topic: versions when: task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--reference ${fasta}" : "" """ samtools \\ stats \\ + ${args} \\ --threads ${task.cpus} \\ ${reference} \\ ${input} \\ > ${prefix}.stats - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ stub: def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}.stats - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS """ } diff --git a/modules/nf-core/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml index 77b020f7..5c59cce4 100644 --- a/modules/nf-core/samtools/stats/meta.yml +++ b/modules/nf-core/samtools/stats/meta.yml @@ -27,10 +27,12 @@ input: type: file description: BAM/CRAM file from alignment pattern: "*.{bam,cram}" + ontologies: [] - input_index: type: file description: BAI/CRAI file from alignment pattern: "*.{bai,crai}" + ontologies: [] - - meta2: type: map description: | @@ -40,9 +42,10 @@ input: type: file description: Reference file the CRAM was created with (optional) pattern: "*.{fasta,fa}" + ontologies: [] output: - - stats: - - meta: + stats: + - - meta: type: map description: | Groovy Map containing sample information @@ -51,11 +54,30 @@ output: type: file description: File containing samtools stats output pattern: "*.{stats}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions_samtools: + - - ${task.process}: + type: string + description: Name of the process + - samtools: + type: string + description: Name of the tool + - samtools version | sed "1!d;s/.* //": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: Name of the process + - samtools: + type: string + description: Name of the tool + - samtools version | sed "1!d;s/.* //": + type: eval + description: The expression to obtain the version of the tool + authors: - "@drpatelh" - "@FriederikeHanssen" diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test.snap b/modules/nf-core/samtools/stats/tests/main.nf.test.snap index df507be7..94d981b2 100644 --- a/modules/nf-core/samtools/stats/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/stats/tests/main.nf.test.snap @@ -8,11 +8,15 @@ "id": "test", "single_end": false }, - "test.stats:md5,a27fe55e49a341f92379bb20a65c6a06" + "test.stats:md5,f4aec6c41b73d34ac2fc6b3253aa39ba" ] ], "1": [ - "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] ], "stats": [ [ @@ -20,19 +24,23 @@ "id": "test", "single_end": false }, - "test.stats:md5,a27fe55e49a341f92379bb20a65c6a06" + "test.stats:md5,f4aec6c41b73d34ac2fc6b3253aa39ba" ] ], - "versions": [ - "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + "versions_samtools": [ + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-09-16T09:29:16.767396182" + "timestamp": "2025-11-01T02:27:18.460724" }, "bam - stub": { "content": [ @@ -47,7 +55,11 @@ ] ], "1": [ - "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] ], "stats": [ [ @@ -58,16 +70,20 @@ "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + "versions_samtools": [ + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-09-16T09:29:29.721580274" + "timestamp": "2025-11-01T02:27:30.245839" }, "cram - stub": { "content": [ @@ -82,7 +98,11 @@ ] ], "1": [ - "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] ], "stats": [ [ @@ -93,16 +113,20 @@ "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + "versions_samtools": [ + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-09-16T09:29:53.567964304" + "timestamp": "2025-11-01T02:27:39.041649" }, "bam": { "content": [ @@ -113,11 +137,15 @@ "id": "test", "single_end": false }, - "test.stats:md5,d53a2584376d78942839e9933a34d11b" + "test.stats:md5,41ba8ad30ddb598dadb177a54c222ab9" ] ], "1": [ - "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] ], "stats": [ [ @@ -125,18 +153,22 @@ "id": "test", "single_end": false }, - "test.stats:md5,d53a2584376d78942839e9933a34d11b" + "test.stats:md5,41ba8ad30ddb598dadb177a54c222ab9" ] ], - "versions": [ - "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + "versions_samtools": [ + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2024-09-16T09:28:50.73610604" + "timestamp": "2025-11-01T02:26:55.988241" } } \ No newline at end of file diff --git a/modules/nf-core/trimgalore/environment.yml b/modules/nf-core/trimgalore/environment.yml deleted file mode 100644 index 568b9e72..00000000 --- a/modules/nf-core/trimgalore/environment.yml +++ /dev/null @@ -1,9 +0,0 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -channels: - - conda-forge - - bioconda -dependencies: - - bioconda::cutadapt=4.9 - - bioconda::trim-galore=0.6.10 - - conda-forge::pigz=2.8 diff --git a/modules/nf-core/trimgalore/main.nf b/modules/nf-core/trimgalore/main.nf deleted file mode 100644 index 5fe53669..00000000 --- a/modules/nf-core/trimgalore/main.nf +++ /dev/null @@ -1,107 +0,0 @@ -process TRIMGALORE { - tag "${meta.id}" - label 'process_high' - - conda "${moduleDir}/environment.yml" - container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/9b/9becad054093ad4083a961d12733f2a742e11728fe9aa815d678b882b3ede520/data' - : 'community.wave.seqera.io/library/cutadapt_trim-galore_pigz:a98edd405b34582d'}" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*{3prime,5prime,trimmed,val}{,_1,_2}.fq.gz"), emit: reads - tuple val(meta), path("*report.txt") , emit: log, optional: true - tuple val(meta), path("*unpaired{,_1,_2}.fq.gz") , emit: unpaired, optional: true - tuple val(meta), path("*.html") , emit: html, optional: true - tuple val(meta), path("*.zip") , emit: zip, optional: true - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - // Calculate number of --cores for TrimGalore based on value of task.cpus - // See: https://github.com/FelixKrueger/TrimGalore/blob/master/CHANGELOG.md#version-060-release-on-1-mar-2019 - // See: https://github.com/nf-core/atacseq/pull/65 - def cores = 1 - if (task.cpus) { - cores = (task.cpus as int) - 4 - if (meta.single_end) { - cores = (task.cpus as int) - 3 - } - if (cores < 1) { - cores = 1 - } - if (cores > 8) { - cores = 8 - } - } - - // Added soft-links to original fastqs for consistent naming in MultiQC - def prefix = task.ext.prefix ?: "${meta.id}" - if (meta.single_end) { - def args_list = args.split("\\s(?=--)").toList() - args_list.removeAll { it.toLowerCase().contains('_r2 ') } - """ - [ ! -f ${prefix}.fastq.gz ] && ln -s ${reads} ${prefix}.fastq.gz - trim_galore \\ - ${args_list.join(' ')} \\ - --cores ${cores} \\ - --gzip \\ - ${prefix}.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - trimgalore: \$(echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//') - cutadapt: \$(cutadapt --version) - pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) - END_VERSIONS - """ - } - else { - """ - [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz - [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz - trim_galore \\ - ${args} \\ - --cores ${cores} \\ - --paired \\ - --gzip \\ - ${prefix}_1.fastq.gz \\ - ${prefix}_2.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - trimgalore: \$(echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//') - cutadapt: \$(cutadapt --version) - pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) - END_VERSIONS - """ - } - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - if (meta.single_end) { - output_command = "echo '' | gzip > ${prefix}_trimmed.fq.gz ;" - output_command += "touch ${prefix}.fastq.gz_trimming_report.txt" - } - else { - output_command = "echo '' | gzip > ${prefix}_1_trimmed.fq.gz ;" - output_command += "touch ${prefix}_1.fastq.gz_trimming_report.txt ;" - output_command += "echo '' | gzip > ${prefix}_2_trimmed.fq.gz ;" - output_command += "touch ${prefix}_2.fastq.gz_trimming_report.txt" - } - """ - ${output_command} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - trimgalore: \$(echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//') - cutadapt: \$(cutadapt --version) - pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/trimgalore/meta.yml b/modules/nf-core/trimgalore/meta.yml deleted file mode 100644 index bd793635..00000000 --- a/modules/nf-core/trimgalore/meta.yml +++ /dev/null @@ -1,105 +0,0 @@ -name: trimgalore -description: Trim FastQ files using Trim Galore! -keywords: - - trimming - - adapters - - sequencing adapters - - fastq -tools: - - trimgalore: - description: | - A wrapper tool around Cutadapt and FastQC to consistently apply quality - and adapter trimming to FastQ files, with some extra functionality for - MspI-digested RRBS-type (Reduced Representation Bisufite-Seq) libraries. - homepage: https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/ - documentation: https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md - licence: ["GPL-3.0-or-later"] - identifier: "" -input: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. -output: - - reads: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - "*{3prime,5prime,trimmed,val}{,_1,_2}.fq.gz": - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - pattern: "*{3prime,5prime,trimmed,val}{,_1,_2}.fq.gz" - - log: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - pattern: "*_{report.txt}" - - "*report.txt": - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - pattern: "*_{report.txt}" - - unpaired: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - "*unpaired{,_1,_2}.fq.gz": - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - pattern: "*unpaired*.fq.gz" - - html: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - pattern: "*_{fastqc.html}" - - "*.html": - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - pattern: "*_{fastqc.html}" - - zip: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - pattern: "*_{fastqc.zip}" - - "*.zip": - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - pattern: "*_{fastqc.zip}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@ewels" - - "@FelixKrueger" -maintainers: - - "@drpatelh" - - "@ewels" - - "@FelixKrueger" diff --git a/modules/nf-core/trimgalore/tests/main.nf.test b/modules/nf-core/trimgalore/tests/main.nf.test deleted file mode 100644 index c01672c4..00000000 --- a/modules/nf-core/trimgalore/tests/main.nf.test +++ /dev/null @@ -1,188 +0,0 @@ -nextflow_process { - - name "Test Process TRIMGALORE" - script "../main.nf" - process "TRIMGALORE" - tag "modules" - tag "modules_nfcore" - tag "trimgalore" - - test("test_trimgalore_single_end") { - - when { - process { - """ - input[0] = [ [ id:'test', single_end:true ], // meta map - [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true) ] - ] - """ - } - } - - then { - def read_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", - "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", - "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE - { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } - } - }, - { report1_lines.each { report1_line -> - { assert path(process.out.log.get(0).get(1)).getText().contains(report1_line) } - } - }, - { assert snapshot(path(process.out.versions.get(0)).yaml).match() }, - ) - } - } - - test("test_trimgalore_single_end - stub") { - - options "-stub" - - when { - process { - """ - input[0] = [ [ id:'test', single_end:true ], // meta map - [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true) ] - ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot( - process.out, - path(process.out.versions.get(0)).yaml - ).match() }, - ) - } - } - - test("test_trimgalore_paired_end") { - - when { - process { - """ - input[0] = [ [ id:'test', single_end:false ], // meta map - [ - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) - ] - ] - """ - } - } - - then { - def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", - "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", - "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE - { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } - } - }, - { read2_lines.each { read2_line -> - { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } - } - }, - { report1_lines.each { report1_line -> - { assert path(process.out.log.get(0).get(1).get(0)).getText().contains(report1_line) } - } - }, - { report2_lines.each { report2_line -> - { assert path(process.out.log.get(0).get(1).get(1)).getText().contains(report2_line) } - } - }, - { assert snapshot(path(process.out.versions.get(0)).yaml).match() }, - ) - } - } - - test("test_trimgalore_paired_end_keep_unpaired") { - - config "./nextflow.config" - - when { - - params { - module_args = '--retain_unpaired --length 150' - } - - process { - """ - input[0] = [ [ id:'test', single_end:false ], // meta map - [ - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) - ] - ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot( - path(process.out.versions.get(0)).yaml, - process.out.reads, - process.out.unpaired - ).match() }, - ) - } - } - - test("test_trimgalore_paired_end - stub") { - - options "-stub" - - when { - process { - """ - input[0] = [ [ id:'test', single_end:false ], // meta map - [ - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) - ] - ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() }, - { assert snapshot(path(process.out.versions.get(0)).yaml).match("versions") }, - ) - } - } -} \ No newline at end of file diff --git a/modules/nf-core/trimgalore/tests/main.nf.test.snap b/modules/nf-core/trimgalore/tests/main.nf.test.snap deleted file mode 100644 index c454ad52..00000000 --- a/modules/nf-core/trimgalore/tests/main.nf.test.snap +++ /dev/null @@ -1,251 +0,0 @@ -{ - "test_trimgalore_single_end": { - "content": [ - { - "TRIMGALORE": { - "trimgalore": "0.6.10", - "cutadapt": 4.9, - "pigz": 2.8 - } - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" - }, - "timestamp": "2025-01-06T12:25:01.330769598" - }, - "test_trimgalore_single_end - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": true - }, - "test_trimmed.fq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": true - }, - "test.fastq.gz_trimming_report.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - - ], - "3": [ - - ], - "4": [ - - ], - "5": [ - "versions.yml:md5,5928323d579768de37e83c56c821757f" - ], - "html": [ - - ], - "log": [ - [ - { - "id": "test", - "single_end": true - }, - "test.fastq.gz_trimming_report.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "reads": [ - [ - { - "id": "test", - "single_end": true - }, - "test_trimmed.fq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "unpaired": [ - - ], - "versions": [ - "versions.yml:md5,5928323d579768de37e83c56c821757f" - ], - "zip": [ - - ] - }, - { - "TRIMGALORE": { - "trimgalore": "0.6.10", - "cutadapt": 4.9, - "pigz": 2.8 - } - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" - }, - "timestamp": "2025-01-06T12:25:15.582246999" - }, - "test_trimgalore_paired_end - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - [ - "test_1_trimmed.fq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "test_2_trimmed.fq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - [ - "test_1.fastq.gz_trimming_report.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "test_2.fastq.gz_trimming_report.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - ], - "2": [ - - ], - "3": [ - - ], - "4": [ - - ], - "5": [ - "versions.yml:md5,5928323d579768de37e83c56c821757f" - ], - "html": [ - - ], - "log": [ - [ - { - "id": "test", - "single_end": false - }, - [ - "test_1.fastq.gz_trimming_report.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "test_2.fastq.gz_trimming_report.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - ], - "reads": [ - [ - { - "id": "test", - "single_end": false - }, - [ - "test_1_trimmed.fq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "test_2_trimmed.fq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ] - ], - "unpaired": [ - - ], - "versions": [ - "versions.yml:md5,5928323d579768de37e83c56c821757f" - ], - "zip": [ - - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" - }, - "timestamp": "2025-01-06T12:26:05.201562315" - }, - "versions": { - "content": [ - { - "TRIMGALORE": { - "trimgalore": "0.6.10", - "cutadapt": 4.9, - "pigz": 2.8 - } - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" - }, - "timestamp": "2025-01-06T12:26:05.229598492" - }, - "test_trimgalore_paired_end": { - "content": [ - { - "TRIMGALORE": { - "trimgalore": "0.6.10", - "cutadapt": 4.9, - "pigz": 2.8 - } - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" - }, - "timestamp": "2025-01-06T12:25:33.510924538" - }, - "test_trimgalore_paired_end_keep_unpaired": { - "content": [ - { - "TRIMGALORE": { - "trimgalore": "0.6.10", - "cutadapt": 4.9, - "pigz": 2.8 - } - }, - [ - [ - { - "id": "test", - "single_end": false - }, - [ - "test_1_val_1.fq.gz:md5,75413e85910bbc2e1556e12f6479f935", - "test_2_val_2.fq.gz:md5,d3c588c12646ebd36a0812fe02d0bda6" - ] - ] - ], - [ - [ - { - "id": "test", - "single_end": false - }, - [ - "test_1_unpaired_1.fq.gz:md5,17e0e878f6d0e93b9008a05f128660b6", - "test_2_unpaired_2.fq.gz:md5,b09a064368a867e099e66df5ef69b044" - ] - ] - ] - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.3" - }, - "timestamp": "2025-01-06T12:25:46.461002981" - } -} \ No newline at end of file diff --git a/modules/nf-core/lima/environment.yml b/modules/nf-core/yahs/environment.yml similarity index 86% rename from modules/nf-core/lima/environment.yml rename to modules/nf-core/yahs/environment.yml index 2e56e30d..051b20b6 100644 --- a/modules/nf-core/lima/environment.yml +++ b/modules/nf-core/yahs/environment.yml @@ -3,6 +3,5 @@ channels: - conda-forge - bioconda - dependencies: - - bioconda::lima=2.12.0 + - bioconda::yahs=1.2.2 diff --git a/modules/nf-core/yahs/main.nf b/modules/nf-core/yahs/main.nf new file mode 100644 index 00000000..cb338ae8 --- /dev/null +++ b/modules/nf-core/yahs/main.nf @@ -0,0 +1,52 @@ +process YAHS { + tag "${meta.id}" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/yahs:1.2.2--h577a1d6_1': + 'biocontainers/yahs:1.2.2--h577a1d6_1' }" + + input: + tuple val(meta), path(fasta), path(fai), path(hic_map), path(agp) + + output: + // note: typo in yahs file outputs - it writes "inital", not "initial" + tuple val(meta), path("${prefix}_scaffolds_final.fa") , emit: scaffolds_fasta , optional: true + tuple val(meta), path("${prefix}_scaffolds_final.agp") , emit: scaffolds_agp , optional: true + tuple val(meta), path("${prefix}_{inital,no}_break*.agp"), emit: initial_break_agp , optional: true + tuple val(meta), path("${prefix}_r*_*.agp") , emit: round_agp , optional: true + tuple val(meta), path("${prefix}.bin") , emit: binary + tuple val(meta), path("${prefix}.log") , emit: log + tuple val("${task.process}"), val('yahs'), eval("yahs --version 2>&1"), emit: versions_yahs, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def agp_input = agp ? "-a ${agp}" : "" + """ + yahs \\ + -o ${prefix} \\ + ${agp_input} \\ + ${args} \\ + ${fasta} \\ + ${hic_map} \\ + 2>| >( tee ${prefix}.log >&2 ) + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_scaffolds_final.fa + touch ${prefix}_scaffolds_final.agp + touch ${prefix}_inital_break_01.agp + touch ${prefix}_no_break.agp + touch ${prefix}_r01.agp + touch ${prefix}_r01_break.agp + touch ${prefix}.bin + touch ${prefix}.log + """ +} diff --git a/modules/nf-core/yahs/meta.yml b/modules/nf-core/yahs/meta.yml new file mode 100644 index 00000000..3d06083c --- /dev/null +++ b/modules/nf-core/yahs/meta.yml @@ -0,0 +1,150 @@ +name: "yahs" +description: Performs assembly scaffolding using YaHS +keywords: + - scaffolding + - assembly + - yahs + - hic +tools: + - "yahs": + description: "YaHS, yet another Hi-C scaffolding tool." + homepage: "https://github.com/c-zhou/yahs" + documentation: "https://github.com/c-zhou/yahs" + tool_dev_url: "https://github.com/c-zhou/yahs" + doi: "10.1093/bioinformatics/btac808" + licence: + - "MIT" + identifier: biotools:yahs +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - fasta: + type: file + description: FASTA reference file + pattern: "*.{fasta,fa}" + ontologies: + - edam: http://edamontology.org/format_1929 + - fai: + type: file + description: index of the reference file + pattern: "*.{fai}" + ontologies: + - edam: http://edamontology.org/format_3475 + - hic_map: + type: file + description: BED file containing coordinates of read alignments + pattern: "*.{bed,bam,bin}" + ontologies: + - edam: http://edamontology.org/format_3003 + - edam: http://edamontology.org/format_2572 + - agp: + type: file + description: | + Optional AGP file describing a set of scaffolds from the input contigs + to use as a start point + pattern: "*.agp" + ontologies: + - edam: http://edamontology.org/format_3693 +output: + scaffolds_fasta: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ${prefix}_scaffolds_final.fa: + type: file + description: FASTA file with resulting contigs + pattern: "${prefix}_scaffolds_final.fa" + ontologies: + - edam: http://edamontology.org/format_1929 + scaffolds_agp: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ${prefix}_scaffolds_final.agp: + type: file + description: AGP file containing contigs placing coordinates + pattern: "${prefix}_scaffolds_final.agp" + ontologies: + - edam: http://edamontology.org/format_3693 + initial_break_agp: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ${prefix}_{inital,no}_break*.agp: + type: file + description: AGP file describing initial contig breaks + pattern: "${prefix}_{inital,no}_break*.agp" + ontologies: + - edam: http://edamontology.org/format_3693 + round_agp: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ${prefix}_r*_*.agp: + type: file + description: AGP file describing intermediate rounds of scaffolding + pattern: "${prefix}_{initial,no}_break*.agp" + ontologies: + - edam: http://edamontology.org/format_3693 + binary: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ${prefix}.bin: + type: file + description: | + Binary data file with alignment results of Hi-C reads + to the contigs in internal YaHS binary format + pattern: "${prefix}.bin" + ontologies: [] + log: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ${prefix}.log: + type: file + description: Log file describing YaHS run + pattern: "${prefix}.log" + ontologies: + - edam: "http://edamontology.org/format_2330" + versions_yahs: + - - ${task.process}: + type: string + description: The name of the process + - yahs: + type: string + description: The name of the tool + - yahs --version 2>&1: + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - yahs: + type: string + description: The name of the tool + - yahs --version 2>&1: + type: eval + description: The expression to obtain the version of the tool +authors: + - "@ksenia-krasheninnikova" +maintainers: + - "@ksenia-krasheninnikova" + - "@yy5" diff --git a/modules/nf-core/yahs/tests/main.nf.test b/modules/nf-core/yahs/tests/main.nf.test new file mode 100644 index 00000000..e676604d --- /dev/null +++ b/modules/nf-core/yahs/tests/main.nf.test @@ -0,0 +1,138 @@ +nextflow_process { + + name "Test Process YAHS" + script "../main.nf" + process "YAHS" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "yahs" + + test("homo_sapiens - bam - fasta - fai") { + + when { + + params { + yahs_args = "" + } + + process { + """ + input[0] = [ + [ id: "test" ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.scaffolds_fasta, + process.out.scaffolds_agp, + process.out.initial_break_agp, + process.out.round_agp, + process.out.binary, + process.out.findAll { key, val -> key.startsWith("versions")}, + ).match() }, + { file(process.out.log.get(0).get(1)).readLines().last().contains("Real time") } + ) + } + } + + test("homo_sapiens - bam - fasta - fai - agp") { + + setup { + + run("YAHS", alias: "YAHS_INIT") { + script "../main.nf" + process { + """ + input[0] = [ + [ id: "test" ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + [] + ] + """ + } + } + + } + + when { + + params { + yahs_args = "--no-contig-ec" + } + + process { + """ + input[0] = channel.of( + [ + [ id: "test" ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + ] + ).combine(YAHS_INIT.out.scaffolds_agp, by: 0) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.scaffolds_fasta, + process.out.scaffolds_agp, + process.out.initial_break_agp, + process.out.round_agp, + process.out.binary, + process.out.findAll { key, val -> key.startsWith("versions")}, + ).match() }, + { file(process.out.log.get(0).get(1)).readLines().last().contains("Real time") } + ) + } + } + + test("homo_sapiens - bam - fasta - fai - stub") { + + options "-stub" + + when { + + params { + yahs_args = "" + } + + process { + """ + input[0] = [ + [ id: "test" ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + +} diff --git a/modules/nf-core/yahs/tests/main.nf.test.snap b/modules/nf-core/yahs/tests/main.nf.test.snap new file mode 100644 index 00000000..e41229da --- /dev/null +++ b/modules/nf-core/yahs/tests/main.nf.test.snap @@ -0,0 +1,230 @@ +{ + "homo_sapiens - bam - fasta - fai - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_scaffolds_final.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test_scaffolds_final.agp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + [ + "test_inital_break_01.agp:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_no_break.agp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test_r01_break.agp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test" + }, + "test.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + "YAHS", + "yahs", + "1.2.2" + ] + ], + "binary": [ + [ + { + "id": "test" + }, + "test.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "initial_break_agp": [ + [ + { + "id": "test" + }, + [ + "test_inital_break_01.agp:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_no_break.agp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "log": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "round_agp": [ + [ + { + "id": "test" + }, + "test_r01_break.agp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "scaffolds_agp": [ + [ + { + "id": "test" + }, + "test_scaffolds_final.agp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "scaffolds_fasta": [ + [ + { + "id": "test" + }, + "test_scaffolds_final.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_yahs": [ + [ + "YAHS", + "yahs", + "1.2.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T13:13:16.431723" + }, + "homo_sapiens - bam - fasta - fai": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_scaffolds_final.fa:md5,a767604036a4cd6980ebd24f11e4dd95" + ] + ], + [ + [ + { + "id": "test" + }, + "test_scaffolds_final.agp:md5,374235b079d9e1c738c6f12697029b78" + ] + ], + [ + [ + { + "id": "test" + }, + "test_inital_break_01.agp:md5,374235b079d9e1c738c6f12697029b78" + ] + ], + [ + + ], + [ + [ + { + "id": "test" + }, + "test.bin:md5,da45a2ec8a97fc24783e9a63373db379" + ] + ], + { + "versions_yahs": [ + [ + "YAHS", + "yahs", + "1.2.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T13:13:08.310976" + }, + "homo_sapiens - bam - fasta - fai - agp": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_scaffolds_final.fa:md5,a767604036a4cd6980ebd24f11e4dd95" + ] + ], + [ + [ + { + "id": "test" + }, + "test_scaffolds_final.agp:md5,374235b079d9e1c738c6f12697029b78" + ] + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test" + }, + "test.bin:md5,da45a2ec8a97fc24783e9a63373db379" + ] + ], + { + "versions_yahs": [ + [ + "YAHS", + "yahs", + "1.2.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T13:13:12.93575" + } +} \ No newline at end of file diff --git a/modules/nf-core/yahs/tests/nextflow.config b/modules/nf-core/yahs/tests/nextflow.config new file mode 100644 index 00000000..15279238 --- /dev/null +++ b/modules/nf-core/yahs/tests/nextflow.config @@ -0,0 +1,10 @@ +process { + + withName: 'YAHS' { + ext.args = params.yahs_args + } + + withName: 'YAHS_INIT' { + ext.args = params.yahs_args + } +} diff --git a/nextflow.config b/nextflow.config index d20e92b5..c60089a1 100644 --- a/nextflow.config +++ b/nextflow.config @@ -42,38 +42,62 @@ params { // Pipeline params input = '' // input file outdir = null // outdir - use_ref = true // use a reference genome (requires fasta + gff) - skip_alignments = false // Intended for QC-oriented (re)-runs, alignments (to ref) are provided - skip_assembly = false // Intended for QC-oriented (re)-runs, assemblies are provided + csi_index_size = 14 + // Assembly strategy + strategy = "single" // assembly_strategy + assembler = null + assembler_ont = null + assembler_hifi = null + assembler_ont_args = '' + assembler_hifi_args = '' + flye_mode = '--nano-hq' + // == Read QC and trimming == // -- ONT - ont = false // ont reads available? - collect = false // collect ONT reads into a single file - porechop = false // run porechop on ONT - read_length = null // avg read length, can be estimated from reads - // -- Jellyfish (ONT reads only) -- - jellyfish = true // run jellyfish - dump = false // dump output - kmer_length = 21 // kmer length + ontreads = null + ont_collect = false // collect ONT reads into a single file + ont_adapters = [] // list of adapters for fastplong + ont_fastplong_args = '' // args for fastplong + // -- Jellyfish (qc reads) -- + jellyfish = false + jellyfish_k = 21 // -- HiFi -- - hifi = false // HiFi reads available? - lima = false // run lima on HiFi reads? - pacbio_primers = null // if lima, then this needs to be a path to a list of primers - // -- ASSEMBLY - assembler = "flye" // assembler to use + hifireads = null + hifi_adapters = [] + hifi_fastplong_args = '' // args for fastplong + // -- Short read -- + use_short_reads = false // short reads available? + shortread_trim = false // trim short reads? + shortread_F = [] // fwd shortreads + shortread_R = [] // rev shortreads + paired = null + // HiC + hic_aligner = "bwa-mem2" + hic_F = [] + hic_R = [] + hic_trim = true + // == ASSEMBLY == + assembler = "hifiasm" // assembler to use + assembly_scaffolding_order = "ont_on_hifi" // -- Assembly: Flye -- genome_size = null // genomesize, optional, can be estimated from ONT reads - flye_mode = '--nano-hq' // flye mode - flye_args = "" // extra flye args + // DEPRECATED: flye_mode = '--nano-hq' + flye_args = '' // extra flye args // -- Assembly: hifiasm -- - hifiasm_ont = false // combine hifi and ONT with hifiasm --ul? - hifiasm_args = "" // extra hifiasm args - // -- Short read -- - short_reads = false // short reads available? - trim_short_reads = true // trim short reads? + hifiasm_args = '' // extra hifiasm args + // QC Oriented extra options + assembly = null + ref_map_bam = null + assembly_map_bam = null + // == Reference == + use_ref = false // use a reference genome (requires fasta + gff) + ref_fasta = null + ref_gff = null // -- POLISHING // -- Polish: medaka polish_medaka = false // run medaka - medaka_model = "" // model for medaka, if empty medaka will guess + medaka_model = '' // model for medaka, if empty medaka will guess + // -- Polish: dorado + polish_dorado = false // -- Polish: pilon polish_pilon = false // run pilon // -- QC -- @@ -81,16 +105,17 @@ params { meryl_k = 21 // k for meryl merqury = true // -- QC : Busco - busco = true // run busco + busco = false // run busco busco_db = '' // path to busco db - busco_lineage = "brassicales_odb10" // busco lineage + busco_lineage = "auto_euk" // busco lineage // -- QC: QUAST - quast = true // run quast - qc_reads = "ONT" // if both ONT and HiFi reads are available, which should be used for QC alignments + quast = false // run quast + qc_reads = "ont" // if both ONT and HiFi reads are available, which should be used for QC alignments // -- SCAFFOLDING scaffold_links = false // Scaffold with LINKS scaffold_longstitch = false // Scaffold with Longstitch scaffold_ragtag = false // Scaffold with ragtag + scaffold_hic = true // Scaffold with HiC // -- ANNOTATIONS lift_annotations = true // lift annotations from reference (if reference is provided) } @@ -104,8 +129,6 @@ profiles { process.beforeScript = 'echo $HOSTNAME' cleanup = false nextflow.enable.configProcessNamesValidation = true - dumpHashes = true - } conda { conda.enabled = true @@ -227,13 +250,6 @@ profiles { test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } - hifi_flye { includeConfig 'configs/hifi_flye.config' } // Hifi-reads with flye - hifi_hifiasm { includeConfig 'configs/hifi_hifiasm.config' } // hifi-reads with hifiasm - ont_flye { includeConfig 'configs/ont_flye.config' } // ont-reads with flye - ont_hifiasm { includeConfig 'configs/ont_hifiasm.config' } // ont-reads with hifiasm - hifiont_hifiasm { includeConfig 'configs/hifi_ont_hifiasm_ul.config' } // ont and hifi reads with hifiasm --ul - hifiont_flye_on_hifiasm { includeConfig 'configs/hifi_ont_flye_on_hifiasm.config' } // ont and hifi reads. ONT via flye, Hifi via hifiasm, scaffold flye on hifiasm - hifiont_hifiasm_on_hifiasm { includeConfig 'configs/hifi_ont_hifiasm_on_hifiasm.config' } // ont and hifi reads. ONT via hifiasm, Hifi via hifiasm, scaffold ONT on HiFi } // Load nf-core custom profiles from different institutions @@ -242,12 +258,6 @@ profiles { // Load nf-core/genomeassembler custom profiles from different institutions. includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" -// Load nf-core custom profiles from different institutions - -// If params.custom_config_base is set AND either the NXF_OFFLINE environment variable is not set or params.custom_config_base is a local path, the nfcore_custom.config file from the specified base path is included. -// Load nf-core/genomeassembler custom profiles from different institutions. -includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" - // Load nf-core/genomeassembler custom profiles from different institutions. includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/pipeline/genomeassembler.config" : "/dev/null" @@ -286,22 +296,22 @@ process.shell = [ // Disable process selector warnings by default. Use debug profile to enable warnings. nextflow.enable.configProcessNamesValidation = false -def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + timeline { enabled = true - file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${params.trace_report_suffix}.html" } report { enabled = true - file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_report_${params.trace_report_suffix}.html" } trace { enabled = true - file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${params.trace_report_suffix}.txt" } dag { enabled = true - file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${params.trace_report_suffix}.html" } manifest { @@ -321,8 +331,8 @@ manifest { description = """Assemble genomes from long ONT or pacbio HiFi reads""" mainScript = 'main.nf' defaultBranch = 'master' - nextflowVersion = '!>=25.04.0' - version = '1.1.0' + nextflowVersion = '!>=25.10.0' + version = '2.0.0dev' doi = '10.5281/zenodo.14986998' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 636070bc..da2d9da1 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -151,108 +151,54 @@ } } }, - "general_parameters": { - "title": "General parameters", + "reference_parameters": { + "title": "Reference Parameters", "type": "object", "description": "Options controlling pipeline behavior", "default": "", "properties": { - "use_ref": { - "type": "boolean", - "description": "use reference genome", - "default": true + "ref_fasta": { + "type": "string", + "description": "Path to reference genome seqeunce (fasta)" }, - "skip_assembly": { - "type": "boolean", - "description": "skip assembly steps", - "help_text": "Skip assembly and perform only qc. Requires 'assembly' column in the samplesheet" + "ref_gff": { + "type": "string", + "description": "Path to reference genome annotations (gff)" }, - "skip_alignments": { + "use_ref": { "type": "boolean", - "description": "skip alignments during qc" + "description": "use reference genome", + "hidden": true } }, "fa_icon": "fas fa-bacon" }, - "ont_options": { - "title": "ONT options", - "type": "object", - "description": "Options for ONT reads", - "default": "", - "properties": { - "ont": { - "type": "boolean", - "description": "ONT reads available?" - }, - "collect": { - "type": "boolean", - "description": "collect ONT reads into a single file" - }, - "porechop": { - "type": "boolean", - "description": "run porechop on ONT reads" - }, - "read_length": { - "type": "integer", - "description": "read length for genomescope (ONT only)", - "minimum": 1 - }, - "jellyfish": { - "type": "boolean", - "description": "run jellyfish on ONT reads to compute k-mer distribution and estimate genome size", - "default": true - }, - "dump": { - "type": "boolean", - "description": "dump jellyfish output" - }, - "kmer_length": { - "type": "integer", - "description": "kmer length to be used for jellyfish", - "default": 21, - "minimum": 1 - } - } - }, - "hifi_options": { - "title": "HiFi options", - "type": "object", - "description": "Options for HiFi reads", - "default": "", - "properties": { - "hifi": { - "type": "boolean", - "description": "HiFi reads available?" - }, - "lima": { - "type": "boolean", - "description": "run lima on HiFi reads?" - }, - "pacbio_primers": { - "type": "string", - "format": "file-path", - "exists": true, - "pattern": "^\\S+\\.fn?a(sta)?$", - "description": "file containing pacbio primers for trimming with lima" - } - } - }, "assembly_options": { "title": "Assembly options", "type": "object", "description": "Options controlling assembly", "default": "", "properties": { + "strategy": { + "type": "string", + "default": "single", + "description": "Assembly strategy to use. Valid choices are `'single'`, `'hybrid'` and `'scaffold'`" + }, "assembler": { "type": "string", - "description": "Assembler to use. Valid choices are: `'hifiasm'`, `'flye'`, `'flye_on_hifiasm'` or `hifiasm_on_hifiasm`. `flye_on_hifiasm` will scaffold flye assembly (ont) on hifiasm (hifi) assembly using ragtag. `hifiasm_on_hifiasm` will scaffold hifiasm (ont) onto hifiasm (HiFi) using ragtag", - "enum": ["flye", "hifiasm", "flye_on_hifiasm", "hifiasm_on_hifiasm"], - "default": "flye" + "description": "Assembler to use. Valid choices depend on strategy; for single either `flye` or `hifiasm`, hybrid can be done with `hifiasm` and for scaffolded assembly provide the names of the assemblers separated with an underscore. The first assembler will be used for ONT reads, the second for HiFi reads. (see below, asembler1 and assembler_hifi)", + "enum": ["flye", "hifiasm", "flye_hifiasm", "hifiasm_hifiasm", "flye_flye", "hifiasm_flye"], + "default": "hifiasm" + }, + "assembly_scaffolding_order": { + "type": "string", + "default": "ont_on_hifi", + "description": "When strategy is \"scaffold\", which assembly should be scaffolded onto which?", + "enum": ["ont_on_hifi", "hifi_on_ont"] }, "genome_size": { - "type": "integer", - "description": "expected genome size, optional", - "minimum": 1 + "type": "string", + "description": "expected genome size, optional" }, "flye_mode": { "type": "string", @@ -264,36 +210,72 @@ "type": "string", "description": "additional args for flye" }, - "hifiasm_ont": { - "type": "boolean", - "description": "Use hifi and ONT reads with `hifiasm --ul`" - }, "hifiasm_args": { "type": "string", "description": "Extra arguments passed to `hifiasm`" + }, + "assembler_ont": { + "type": "string", + "description": "assembler_ont assembles ONT reads. This option is mainly useful when building more complex samplesheets" + }, + "assembler_ont_args": { + "type": "string", + "description": "Arguments to be passed to assembler_ont (ONT)" + }, + "assembler_hifi": { + "type": "string", + "description": "assembler_hifi assembles HiFi reads. This option is mainly useful when building more complex samplesheets" + }, + "assembler_hifi_args": { + "type": "string", + "description": "Arguments to be passed to assembler_hifi (HiFi)" } } }, - "short_read_options": { - "title": "Short read options", + "long_read_preprocessing": { + "title": "Long-read preprocessing", "type": "object", - "description": "Options for short reads", + "description": "", "default": "", "properties": { - "short_reads": { + "ontreads": { + "type": "string", + "description": "Path to ONT reads" + }, + "ont_collect": { "type": "boolean", - "description": "Short reads available?" + "description": "Collect ONT reads from several files?" }, - "trim_short_reads": { + "ont_adapters": { + "type": "string", + "default": "[]", + "description": "Adaptors for ONT read-trimming" + }, + "ont_fastplong_args": { + "type": "string", + "description": "Additional args to be passed to fastplong for ONT reads" + }, + "hifireads": { + "type": "string", + "description": "Path to HiFi reads" + }, + "hifi_adapters": { + "type": "string", + "default": "[]", + "description": "Adaptors for HiFi read-trimming" + }, + "hifi_fastplong_args": { + "type": "string", + "description": "Additional args to be passed to fastplong for HiFi reads" + }, + "jellyfish": { "type": "boolean", - "description": "trim short reads with trimgalore", - "default": true + "description": "Run jellyfish and genomescope (recommended)" }, - "meryl_k": { + "jellyfish_k": { "type": "integer", - "description": "kmer length for meryl / merqury", "default": 21, - "minimum": 1 + "description": "Value of k used during k-mer analysis with jellyfish" } } }, @@ -307,6 +289,10 @@ "type": "boolean", "description": "Polish assembly with pilon? Requires short reads" }, + "polish_dorado": { + "type": "boolean", + "description": "Polish assembly with dorado (ONT only)" + }, "polish_medaka": { "type": "boolean", "description": "Polish assembly with medaka (ONT only)" @@ -334,6 +320,17 @@ "scaffold_ragtag": { "type": "boolean", "description": "Scaffold with ragtag (requires reference)?" + }, + "scaffold_hic": { + "type": "boolean", + "description": "Scaffold using HiC reads using yahs (requires reads)?", + "default": true + }, + "hic_aligner": { + "type": "string", + "enum": ["bwa-mem2", "minimap2"], + "description": "Aligner to use for HiC reads; default: bwa-mem2", + "default": "bwa-mem2" } } }, @@ -346,18 +343,17 @@ "merqury": { "type": "boolean", "default": true, - "description": "Run merqury" + "description": "Run merqury (if short reads are provided)" }, "qc_reads": { "type": "string", - "description": "Long reads that should be used for QC when both ONT and HiFi reads are provided. Options are `'ONT'` or `'HIFI'`", - "enum": ["ONT", "HIFI"], - "default": "ONT" + "description": "Long reads that should be used for QC when both ONT and HiFi reads are provided. Options are `'ont'` or `'hifi'`", + "enum": ["ont", "hifi"], + "default": "ont" }, "busco": { "type": "boolean", - "description": "Run BUSCO?", - "default": true + "description": "Run BUSCO?" }, "busco_db": { "type": "string", @@ -367,12 +363,28 @@ "busco_lineage": { "type": "string", "description": "Busco lineage to use", - "default": "brassicales_odb10" + "default": "auto_euk" }, "quast": { "type": "boolean", - "description": "Run quast", - "default": true + "description": "Run quast" + }, + "ref_map_bam": { + "type": "string", + "description": "A mapping (bam) of reads mapped to the reference can be provided for QC. If provided, alignment to reference fasta will not run." + }, + "assembly": { + "type": "string", + "description": "Can be used to proved existing assembly will skip assembly and perform downstream steps including qc" + }, + "assembly_map_bam": { + "type": "string", + "description": "A mapping (bam) of reads mapped to the provided assembly can be specified for QC. If provided, alignment to the provided assembly fasta will not run" + }, + "csi_index_size": { + "type": "integer", + "description": "Index size to use for csi index (default: 14), creating and index of size 2^csi_index_size. See samtools index documentation for details. ", + "default": 14 } } }, @@ -384,10 +396,69 @@ "properties": { "lift_annotations": { "type": "boolean", - "description": "Lift-over annotations (requires reference)?", + "description": "Lift-over annotations (requires ref_gff)?", "default": true } } + }, + "short_read_options": { + "title": "Short read options", + "type": "object", + "description": "Options for short reads", + "default": "", + "properties": { + "use_short_reads": { + "type": "boolean", + "description": "Use short reads?" + }, + "shortread_trim": { + "type": "boolean", + "description": "Trim short reads?" + }, + "meryl_k": { + "type": "integer", + "description": "kmer length for meryl / merqury", + "default": 21, + "minimum": 1 + }, + "shortread_F": { + "type": "string", + "description": "Path to forward short reads", + "default": "[]" + }, + "shortread_R": { + "type": "string", + "description": "Path to reverse short reads", + "default": "[]" + }, + "paired": { + "type": "string", + "description": "Are shortreads paired?" + } + } + }, + "hic_options": { + "title": "HiC short read options", + "type": "object", + "description": "Options for HiC short reads", + "default": "", + "properties": { + "hic_trim": { + "type": "boolean", + "description": "Trim HiC short reads?", + "default": true + }, + "hic_F": { + "type": "string", + "description": "Path to forward HiC short reads", + "default": "[]" + }, + "hic_R": { + "type": "string", + "description": "Path to reverse HiC short reads", + "default": "[]" + } + } } }, "allOf": [ @@ -401,19 +472,13 @@ "$ref": "#/$defs/generic_options" }, { - "$ref": "#/$defs/general_parameters" - }, - { - "$ref": "#/$defs/ont_options" - }, - { - "$ref": "#/$defs/hifi_options" + "$ref": "#/$defs/reference_parameters" }, { "$ref": "#/$defs/assembly_options" }, { - "$ref": "#/$defs/short_read_options" + "$ref": "#/$defs/long_read_preprocessing" }, { "$ref": "#/$defs/polishing_options" @@ -426,6 +491,12 @@ }, { "$ref": "#/$defs/annotations_options" + }, + { + "$ref": "#/$defs/short_read_options" + }, + { + "$ref": "#/$defs/hic_options" } ] } diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index c87dc35b..4a4fad02 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -23,7 +23,7 @@ "@type": "Dataset", "creativeWorkStatus": "Stable", "datePublished": "2025-11-20T09:31:46+00:00", - "description": "

\n \n \n \"nf-core/genomeassembler\"\n \n

\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/genomeassembler)\n[![GitHub Actions CI Status](https://github.com/nf-core/genomeassembler/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/genomeassembler/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/genomeassembler)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23genomeassembler-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/genomeassembler)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/genomeassembler** is a bioinformatics pipeline that ...\n\n\n\n\n1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n\n\nNow, you can run the pipeline using:\n\n\n\n```bash\nnextflow run nf-core/genomeassembler \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/genomeassembler/usage) and the [parameter documentation](https://nf-co.re/genomeassembler/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/genomeassembler/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/genomeassembler/output).\n\n## Credits\n\nnf-core/genomeassembler was originally written by Niklas Schandry.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#genomeassembler` channel](https://nfcore.slack.com/channels/genomeassembler) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "description": "

\n \n \n \"nf-core/genomeassembler\"\n \n

\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/genomeassembler)\n[![GitHub Actions CI Status](https://github.com/nf-core/genomeassembler/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/genomeassembler/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.14986998)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/genomeassembler)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23genomeassembler-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/genomeassembler)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/genomeassembler** is a bioinformatics pipeline that carries out genome assembly, polishing and scaffolding from long reads (ONT or pacbio). Assembly can be done via `flye` or `hifiasm`, polishing can be carried out with `medaka` (ONT), or `pilon` (requires short-reads), and scaffolding can be done using `LINKS`, `Longstitch`, or `RagTag` (if a reference is available). Quality control includes `BUSCO`, `QUAST` and `merqury` (requires short-reads).\nCurrently, this pipeline does not implement phasing of polyploid genomes or HiC scaffolding.\n\n\n \n \"nf-core/genomeassembler\"\n\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,ontreads,hifireads,ref_fasta,ref_gff,shortread_F,shortread_R,paired\nsampleName,ontreads.fa.gz,hifireads.fa.gz,assembly.fasta.gz,reference.fasta,reference.gff,short_F1.fastq,short_F2.fastq,true\n```\n\nEach row represents one genome to be assembled. `sample` should contain the name of the sample, `ontreads` should contain a path to ONT reads (fastq.gz), `hifireads` a path to HiFi reads (fastq.gz), `ref_fasta` and `ref_gff` contain reference genome fasta and annotations. `shortread_F` and `shortread_R` contain paths to short-read data, `paired` indicates if short-reads are paired. Columns can be omitted if they contain no data, with the exception of `shortread_R`, which needs to be present if `shortread_F` is there, even if it is empty.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/genomeassembler \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/genomeassembler/usage) and the [parameter documentation](https://nf-co.re/genomeassembler/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/genomeassembler/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/genomeassembler/output).\n\n## Credits\n\nnf-core/genomeassembler was written, and is currently maintained by [Niklas Schandry](https://github.com/nschan), of the Faculty of Biology of the Ludwig-Maximilians University (LMU) in Munich, Germany, with funding support from the German Research Foundation (Deutsche Forschungsgemeinschaft\u00a0[DFG], via Transregional Research Center TRR356 grant 491090170-A05 to Niklas Schandry).\n\nI thank the following people for their extensive assistance and constructive reviews during the development of this pipeline:\n\n- [Mahesh Binzer-Panchal](https://github.com/mahesh-panchal)\n- [Matthias H\u00f6rtenhuber](https://github.com/mashehu)\n- [Louis Le N\u00e9zet](https://github.com/LouisLeNezet)\n- [J\u00falia Mir Pedrol](https://github.com/mirpedrol)\n- [Daniel Straub](https://github.com/d4straub)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#genomeassembler` channel](https://nfcore.slack.com/channels/genomeassembler) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/genomeassembler for your analysis, please cite it using the following doi: [10.5281/zenodo.14986998](https://doi.org/10.5281/zenodo.14986998)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" @@ -137,26 +137,39 @@ "@id": "https://orcid.org/0000-0003-3099-7860" }, { - "@id": "https://orcid.org/0000-0002-7860-3560" + "@id": "https://orcid.org/0000-0003-1675-0677" }, { - "@id": "https://orcid.org/0000-0003-1675-0677" + "@id": "https://orcid.org/0000-0002-7860-3560" } ], "dateCreated": "", "dateModified": "2025-11-20T09:31:46Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", - "keywords": ["nf-core", "nextflow", "genome-assembly"], - "license": ["MIT"], - "name": ["nf-core/genomeassembler"], + "keywords": [ + "nf-core", + "nextflow", + "genome-assembly" + ], + "license": [ + "MIT" + ], + "name": [ + "nf-core/genomeassembler" + ], "programmingLanguage": { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow" }, "sdPublisher": { "@id": "https://nf-co.re/" }, - "url": ["https://github.com/nf-core/genomeassembler", "https://nf-co.re/genomeassembler/1.1.0/"], - "version": ["1.1.0"] + "url": [ + "https://github.com/nf-core/genomeassembler", + "https://nf-co.re/genomeassembler/1.1.0/" + ], + "version": [ + "1.1.0" + ] }, { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow", @@ -328,17 +341,17 @@ "email": "niklas@bio.lmu.de", "name": "Niklas Schandry" }, - { - "@id": "https://orcid.org/0000-0002-7860-3560", - "@type": "Person", - "email": "mm49@sanger.ac.uk", - "name": "Matthieu Muffato" - }, { "@id": "https://orcid.org/0000-0003-1675-0677", "@type": "Person", "email": "mahesh.binzer-panchal@nbis.se", "name": "Mahesh Binzer-Panchal" + }, + { + "@id": "https://orcid.org/0000-0002-7860-3560", + "@type": "Person", + "email": "mm49@sanger.ac.uk", + "name": "Matthieu Muffato" } ] } \ No newline at end of file diff --git a/subworkflows/local/assemble/main.nf b/subworkflows/local/assemble/main.nf index 1030beb9..243036f0 100644 --- a/subworkflows/local/assemble/main.nf +++ b/subworkflows/local/assemble/main.nf @@ -1,223 +1,543 @@ -include { FLYE } from '../../../modules/nf-core/flye/main' +include { FLYE as FLYE_ONT} from '../../../modules/nf-core/flye/main' +include { FLYE as FLYE_HIFI} from '../../../modules/nf-core/flye/main' include { HIFIASM } from '../../../modules/nf-core/hifiasm/main' include { HIFIASM as HIFIASM_ONT } from '../../../modules/nf-core/hifiasm/main' include { GFA_2_FA as GFA_2_FA_HIFI } from '../../../modules/local/gfa2fa/main' include { GFA_2_FA as GFA_2_FA_ONT} from '../../../modules/local/gfa2fa/main' include { MAP_TO_REF } from '../mapping/map_to_ref/main' -include { RUN_LIFTOFF } from '../liftoff/main' +include { LIFTOFF } from '../../../modules/nf-core/liftoff/main' include { RAGTAG_PATCH } from '../../../modules/nf-core/ragtag/patch/main' include { QC } from '../qc/main' workflow ASSEMBLE { take: - ont_reads // meta, reads - hifi_reads // meta, reads - ch_input - genome_size + ch_main meryl_kmers main: - // Empty channels - Channel.empty().set { ch_refs } - Channel.empty().set { ch_ref_bam } - Channel.empty().set { ch_assembly_bam } - Channel.empty().set { ch_assembly } - Channel.empty().set { flye_inputs } - Channel.empty().set { hifiasm_inputs } - Channel.empty().set { longreads } - Channel.empty().set { ch_versions } - - if (params.use_ref) { - ch_input - .map { row -> [row.meta, row.ref_fasta] } - .set { ch_refs } - } - - if (params.skip_assembly) { - // Sample sheet layout when skipping assembly - // sample,ontreads,assembly,ref_fasta,ref_gff - ch_input - .map { row -> [row.meta, row.assembly] } - .set { ch_assembly } - } - if (!params.skip_assembly) { - def hifi_only = params.hifi && !params.ont ? true : false - // Define inputs for flye - if (params.assembler == "flye") { - if (params.hifi) { - hifi_reads - .map { it -> [it[0], it[1]] } - .set { flye_inputs } - } - if (params.ont) { - ont_reads.set { flye_inputs } - } - // Run flye - flye_inputs - .join(genome_size) - .map { meta, reads, genomesize -> [meta +[ genome_size: genomesize ], reads] } - .set { flye_inputs } - FLYE(flye_inputs, params.flye_mode) - FLYE.out.fasta.map { meta, assembly -> [meta - meta.subMap('genome_size'), assembly] }.set { ch_assembly } - ch_versions = ch_versions.mix(FLYE.out.versions) - } - if (params.assembler == "hifiasm") { - // HiFi and ONT reads in ultralong mode - if (params.hifi && params.ont) { - hifi_reads - .join(ont_reads) - .set { hifiasm_inputs } - HIFIASM(hifiasm_inputs, [[], [], []], [[], [], []], [[], []]) - GFA_2_FA_HIFI(HIFIASM.out.processed_unitigs) - GFA_2_FA_HIFI.out.contigs_fasta.set { ch_assembly } - - ch_versions = ch_versions.mix(HIFIASM.out.versions).mix(GFA_2_FA_HIFI.out.versions) - } - // ONT reads only - if (!params.hifi && params.ont) { - ont_reads - .map { meta, ontreads -> [meta, ontreads, []] } - .set { hifiasm_inputs } - HIFIASM_ONT(hifiasm_inputs, [[], [], []], [[], [], []], [[], []]) - GFA_2_FA_ONT(HIFIASM_ONT.out.processed_unitigs) - GFA_2_FA_ONT.out.contigs_fasta.set { ch_assembly } - ch_versions = ch_versions.mix(HIFIASM_ONT.out.versions).mix(GFA_2_FA_ONT.out.versions) - } - // HiFI reads only - if (params.hifi && !params.ont) { - hifi_reads - .map { meta, ontreads -> [meta, ontreads, []] } - .set { hifiasm_inputs } - HIFIASM(hifiasm_inputs, [[], [], []], [[], [], []], [[], []]) + /* + Samples are split into those that need assembly, and those that will not be assembled (i.e. assemblies are provided) + */ + ch_main.dump(tag: "Assemble - Inputs") - GFA_2_FA_HIFI(HIFIASM.out.processed_unitigs) - GFA_2_FA_HIFI.out.contigs_fasta.set { ch_assembly } + ch_main + .branch { + it -> + to_assemble: !it.meta.assembly + no_assemble: it.meta.assembly + } + .set { + ch_main_branched + } + /* + There are three assembly strategies: + - Single: Using a single assembler with one type of reads + - Hybrid: Using a single assembler with both types of read in one run (only hifiasm --ul) + - Scaffold: Separately assembling ONT and HiFi reads, and then scaffolding one onto the other - ch_versions = ch_versions.mix(HIFIASM.out.versions).mix(GFA_2_FA_HIFI.out.versions) + Each sample can only have one strategy, and branching happens here. + */ + ch_main_branched + .to_assemble + .branch { it -> + single: it.meta.strategy == "single" + hybrid: it.meta.strategy == "hybrid" + scaffold: it.meta.strategy == "scaffold" } + .set { ch_main_assemble_branched } + + ch_main_assemble_branched + .single + .dump(tag: "Assemble: Branched: Single") + ch_main_assemble_branched + .hybrid + //.view {"Assemble: Hybrid: $it"} + .dump(tag: "Assemble: Branched: Hybrid") + ch_main_assemble_branched + .scaffold + .dump(tag: "Assemble: Branched: scaffold") + + /* + ========================= + FLYE ASSEMBLER + ========================= + */ + /* + Inputs for flye assembler: + - Samples with single strategy, where the assembler is flye + - Samples from the scaffold strategy where either (or both) assembler is flye + */ + + ch_main_assemble_branched + .single + .filter { it -> it.meta.assembler_ont == "flye" } + .mix( + // Add in the scaffolding samples where flye is used + ch_main_assemble_branched + .scaffold + .filter { it -> it.meta.assembler_ont == "flye" || it.meta.assembler_hifi == "flye" } + ) + .set { ch_main_assemble_flye } + + // Assembly flye branch + // Extra args per sample are stored in the meta map, so is the estimated / expected genome size + // The inputs are created once for ONT and once for HiFi + ch_main_assemble_flye + .filter { it -> it.meta.assembler_ont == "flye" && it.meta.ontreads } + .multiMap { + it -> + reads: [ + it.meta, + it.meta.ontreads ?: [], + ] + mode: it.meta.assembler_ont == "flye" ? "--nano-hq" : null } - if (params.assembler == "flye_on_hifiasm" | params.assembler == "hifiasm_on_hifiasm") { - // Run hifiasm - hifi_reads - .map { meta, hifireads -> [meta, hifireads, []] } - .set { hifiasm_inputs } - HIFIASM(hifiasm_inputs, [[], [], []], [[], [], []], [[], []]) - - GFA_2_FA_HIFI(HIFIASM.out.processed_unitigs) - - ch_versions = ch_versions.mix(HIFIASM.out.versions).mix(GFA_2_FA_HIFI.out.versions) - - if(params.assembler == "flye_on_hifiasm") { - // Run flye - ont_reads - .join(genome_size) - .map { meta, reads, genomesize -> [[id: meta.id, genome_size: genomesize], reads]} - .set { flye_inputs } - - FLYE(flye_inputs, params.flye_mode) - FLYE.out.fasta - .map { meta, assembly -> [[id: meta.id], assembly] } - .join( - GFA_2_FA_HIFI.out.contigs_fasta - ) - .multiMap { meta, flye_fasta, hifiasm_fasta -> - target: [meta, flye_fasta] - query: [meta, hifiasm_fasta] - } - .set { ragtag_in } - ch_versions = ch_versions.mix(FLYE.out.versions) + .set { flye_ont_inputs } + + // These are the hifi samples + ch_main_assemble_flye + // Those where the hifi assembler is flye, or where there is only one assembler and only hifireads + .filter { it -> + it.meta.assembler_hifi == "flye" && it.meta.hifireads || + ( + it.meta.strategy == "single" && + it.meta.hifireads && + !it.meta.ontreads && + it.meta.assembler == "flye" + ) + } + .multiMap { + it -> + reads: [ + it.meta, + it.meta.hifireads ?: [], + ] + mode: it.meta.assembler_hifi == "flye" ? "--pacbio-hifi" : null + } + .set { flye_hifi_inputs } + + flye_ont_inputs.reads.dump(tag: "Assemble: Flye-ONT inputs") + flye_hifi_inputs.reads.dump(tag: "Assemble: Flye-HIFI inputs") + + // Run through flye + FLYE_ONT(flye_ont_inputs.reads, flye_ont_inputs.mode) + FLYE_HIFI(flye_hifi_inputs.reads, flye_hifi_inputs.mode) + + /* + ========================= + HIFIASM ASSEMBLER + ========================= + */ + /* Hifiasm: everything that is not hifiasm-ONT + - Single branch with hifiasm as assembler and no ont reads (only hifireads) + - Hybrid assembly + - Scaffold samples where assembler_hifi (hifi assembler) is hifiasm + */ + //ch_main_assemble_branched.hybrid.view {"ASSEMBLE: Branched: Hybrid"} + ch_main_assemble_branched + .single + .filter { + it -> it.meta.assembler_hifi == "hifiasm" } - if(params.assembler == "hifiasm_on_hifiasm") { - // Run hifiasm --ont - ont_reads - .map { meta, ontreads -> [meta, ontreads, []] } - .set { hifiasm_inputs } - HIFIASM_ONT(hifiasm_inputs,[[], [], []], [[], [], []], [[], []]) - GFA_2_FA_ONT(HIFIASM_ONT.out.processed_unitigs) - GFA_2_FA_ONT.out.contigs_fasta - .join( - GFA_2_FA_HIFI.out.contigs_fasta - ) - .multiMap { meta, ont_assembly, hifi_assembly -> - target: [meta, ont_assembly] - query: [meta, hifi_assembly] + .mix( + ch_main_assemble_branched + .hybrid + .filter { + it -> it.meta.assembler_ont == "hifiasm" } - .set { ragtag_in } - ch_versions = ch_versions.mix(HIFIASM_ONT.out.versions).mix(GFA_2_FA_ONT.out.versions) - } + ) + .mix(ch_main_assemble_branched + .scaffold + .filter { + it -> it.meta.assembler_hifi == "hifiasm" + } + ) + .map { + it -> [ + it.meta, + it.meta.hifireads, + // for hybrid samples include ONT reads in 3rd slot of first input (see hifiasm module) + (it.meta.strategy == "hybrid" && it.meta.ontreads) ? it.meta.ontreads : [] + ] + } + .set { ch_main_assemble_hifi_hifiasm } + + ch_main_assemble_hifi_hifiasm.dump(tag: "Assemble: hifiasm HIFI inputs") + + //ch_main_assemble_hifi_hifiasm.view { "Assemble: hifiasm HIFI inputs: $it" } + + HIFIASM(ch_main_assemble_hifi_hifiasm, + [[], [], []], + [[], [], []], + [[], []]) + + // hifiasm produces GFA files + GFA_2_FA_HIFI( HIFIASM.out.primary_contigs ) - RAGTAG_PATCH(ragtag_in.target, ragtag_in.query, [[], []], [[], []] ) - // takes: meta, assembly (ont), reference (hifi) - RAGTAG_PATCH.out.patch_fasta.set { ch_assembly } - ch_versions = ch_versions.mix(RAGTAG_PATCH.out.versions) - } - } /* - Prepare alignments + hifiasm with ONLY ont reads. + Assemble hifiasm_ont branch: + Single branch with hifiasm and only ont reads + Scaffold branch where assembler_ont (ont assembler) is hifiasm */ - if (params.skip_alignments) { - // Sample sheet layout when skipping assembly and mapping - // sample,ontreads,assembly,ref_fasta,ref_gff,assembly_bam,assembly_bai,ref_bam - ch_input - .map { row -> [row.meta, row.ref_bam] } - .set { ch_ref_bam } - - ch_input - .map { row -> [row.meta, row.assembly_bam] } - .set { ch_assembly_bam } - } - else { - Channel.empty().set { ch_ref_bam } - if (params.assembler == "flye") { - flye_inputs - .map { meta, reads -> [[id: meta.id], reads] } - .set { longreads } - } - if (params.assembler == "hifiasm" || params.assembler == "flye_on_hifiasm" || params.assembler == "hifiasm_on_hifiasm") { - hifiasm_inputs - .map { meta, long_reads, _ultralong -> [meta, long_reads] } - .set { longreads } - // When using either hifiasm_ont or flye_on_hifiasm, both reads are available, which should be used for qc? - if (params.hifi && params.ont) { - if (params.qc_reads == 'ONT') { - ont_reads - .map { it -> [it[0], it[1]] } - .set { longreads } - } - if (params.qc_reads == 'HIFI') { - hifi_reads - .map { it -> [it[0], it[1]] } - .set { longreads } + + ch_main_assemble_branched + .single + .filter { it -> it.meta.assembler_ont == "hifiasm" && it.meta.ontreads } + .mix(ch_main_assemble_branched + .scaffold + .filter { it -> it.meta.assembler_ont == "hifiasm" } + ) + .set { ch_main_assemble_ont_hifiasm } + + ch_main_assemble_ont_hifiasm.dump(tag: "Assemble: hifiasm ONT inputs") + + HIFIASM_ONT(ch_main_assemble_ont_hifiasm.map { it -> [ it.meta, it.meta.ontreads, [] ] }, [[], [], []], [[], [], []], [[], []]) + + GFA_2_FA_ONT( HIFIASM_ONT.out.primary_contigs) + + // Flye: + FLYE_ONT.out.fasta + .filter { + meta, _fasta -> meta.strategy != "scaffold" + } + .map { meta_old, assembly -> [meta: meta_old + [ assembly: assembly ] ] } + .mix( + FLYE_HIFI.out.fasta + .filter { + meta, _fasta -> meta.strategy != "scaffold" } - } + .map { meta_old, assembly -> [meta: meta_old + [ assembly: assembly ] ] } + ) + .set { flye_assemblies } + + flye_assemblies.dump(tag: "Assemble: Flye assemblies") + + // regernerate meta maps + GFA_2_FA_HIFI.out.contigs_fasta + .filter { it -> it[0].strategy != "scaffold" } + .map { meta_old, assembly -> + [ + meta: meta_old + + // stick assembly into the correct key + [ + assembly: (meta_old.strategy == "single" && meta_old.assembler_hifi == "hifiasm") || (meta_old.strategy == "hybrid" && meta_old.assembler_ont == "hifiasm") ? assembly : null, + ] + ] } + .set { hifiasm_hifi_assemblies } - if (params.quast) { - if (params.use_ref) { - MAP_TO_REF(longreads, ch_refs) + hifiasm_hifi_assemblies.dump(tag: "Assemble: hifiasm HIFI assemblies") - MAP_TO_REF.out.ch_aln_to_ref_bam.set { ch_ref_bam } - } + GFA_2_FA_ONT.out.contigs_fasta + .filter { meta, _fasta -> meta.strategy != "scaffold" } + .map { meta, assembly -> + [ + meta: meta + + [ + assembly: assembly + ] + ] } - } + .set { hifiasm_ont_assemblies } + + hifiasm_ont_assemblies.dump(tag: "Assemble: hifiasm ONT assemblies") + /* - QC on initial assembly + ========================= + SCAFFOLDING + ========================= */ - QC(ch_input, longreads, ch_assembly, ch_ref_bam, meryl_kmers) - ch_versions = ch_versions.mix(QC.out.versions) - if (params.lift_annotations) { - RUN_LIFTOFF(ch_assembly, ch_input) - ch_versions = ch_versions.mix(RUN_LIFTOFF.out.versions) - } + // The single and hybrid channels can be mixed and forwarded. + + flye_assemblies + .mix(hifiasm_hifi_assemblies) + .mix(hifiasm_ont_assemblies) + .set { ch_assemblies_no_scaffold } + + ch_assemblies_no_scaffold.dump(tag: "Assemble: Assemblies without scaffolding") + + + /* + ------------------- + Prepare Scaffolding + ------------------- + */ + // This leaves the scaffold strategy. + // scaffolds can be: FLYE-HIFIASM, FLYE-FLYE, HIFIASM-HIFIASM or HIFIASM-FLYE + // The above is (ONT-HIFI) + + FLYE_ONT.out.fasta + // Flye-hifiasm + .filter { meta, _fasta -> + meta.strategy == "scaffold" && + meta.assembler_ont == "flye" && + meta.assembler_hifi == "hifiasm" + } + .map { meta, fasta -> [meta.id, meta, fasta] } + .join( + GFA_2_FA_HIFI + .out + .contigs_fasta + .filter { meta, _fasta -> + meta.strategy == "scaffold" && + meta.assembler_ont == "flye" && + meta.assembler_hifi == "hifiasm" + } + .map { meta, fasta -> [ meta.id, fasta ] } + ) + .map { _id, meta_old, assembly_flye, assembly_hifiasm -> + [ + meta: meta_old - + meta_old.subMap("hifiasm_assembly", "assembly_hifi", "assembly_ont", "flye_assembly") + + [ + assembly_ont: assembly_flye, + assembly_hifi: assembly_hifiasm + ] + ] + } + .set{ scaffold_flye_hifiasm } + + // flye-flye + FLYE_ONT.out.fasta + .filter { + meta, _fasta -> meta.strategy == "scaffold" && meta.assembler_ont == "flye" & meta.assembler_hifi == "flye" + } + .map { + meta, fasta -> [meta.id, meta, fasta] // id, meta, ont assembly + } + .join( + FLYE_HIFI.out.fasta + .filter { + meta, _fasta -> meta.strategy == "scaffold" && meta.assembler_ont == "flye" & meta.assembler_hifi == "flye" + } + .map { + meta, fasta -> [ meta.id, fasta ] // id, hifi assembly + }, + ) + .map { _id, meta, ont_assembly, hifi_assembly -> + [ + meta: meta + + [ + assembly_ont: ont_assembly, + assembly_hifi: hifi_assembly + ] + ] + } + .set { scaffold_flye_flye } + + // hifiasm_flye + GFA_2_FA_ONT.out.contigs_fasta + .filter { + meta, _assembly -> meta.strategy == "scaffold" && + meta.assembler_ont == "hifiasm" && + meta.assembler_hifi == "flye" + } + .map { + meta, assembly -> + [ + meta.id, + meta, + assembly + ] + } + .join( + FLYE_HIFI.out.fasta + .filter{ meta, _fasta -> meta.strategy == "scaffold" && meta.assembler_ont == "hifiasm" && meta.assembler_hifi == "flye" } + .map { meta, fasta -> [ meta.id, fasta ] } + ) + .map { + _id, meta, hifiasm_ont_assembly, flye_hifi_assembly -> + [ + meta: meta + + [ + assembly_ont: hifiasm_ont_assembly, + assembly_hifi: flye_hifi_assembly + ] + ] + } + .set{ scaffold_hifiasm_flye } + + // hifiasm_hifiasm + GFA_2_FA_ONT.out.contigs_fasta + .filter { + meta, _assembly -> meta.strategy == "scaffold" && + meta.assembler_ont == "hifiasm" && + meta.assembler_hifi == "hifiasm" + } + .map { + meta, assembly -> + [ + meta.id, + meta, + assembly + ] + } + .join( + GFA_2_FA_HIFI.out.contigs_fasta + .filter { + meta, _assembly -> meta.strategy == "scaffold" && + meta.assembler_ont == "hifiasm" && + meta.assembler_hifi == "hifiasm" + } + .map { + meta, assembly -> + [ + meta.id, + assembly + ] + } + ) + .map { + _id, meta, assembly_ont, assembly_hifi -> + [ + meta: meta + + [ + assembly_ont: assembly_ont, + assembly_hifi: assembly_hifi + ] + ] + } + .set{ scaffold_hifiasm_hifiasm } + + // branch to scaffold those assemblies that need it + + scaffold_flye_hifiasm + .mix(scaffold_flye_flye) + .mix(scaffold_hifiasm_flye) + .mix(scaffold_hifiasm_hifiasm) + .set { ch_to_scaffold } + + ch_to_scaffold.dump(tag: "Assemble: Assemblies with scaffolding - inputs") + + // For scaffolding, depeding on which strategy used, the correct assembly needs to go into either target or query: + // assembly_ont is always ONT, assembly_hifi is always HiFi + + ch_to_scaffold + .multiMap { + it -> + target: [ + it.meta, + it.meta.assembly_scaffolding_order == "ont_on_hifi" ? (it.meta.assembly_ont) : (it.meta.assembly_hifi) + ] + query: [ + it.meta, + it.meta.assembly_scaffolding_order == "ont_on_hifi" ? (it.meta.assembly_hifi) : (it.meta.assembly_ont) + ] + } + .set { ragtag_in } + + ragtag_in.target.dump(tag: "ASSEMBLE: SCAFFOLD: RAGTAG_PATCH INPUT: TARGET") + ragtag_in.query.dump( tag: "ASSEMBLE: SCAFFOLD: RAGTAG_PATCH INPUT: QUERY") + // Scaffold with PATCH + RAGTAG_PATCH(ragtag_in.target, ragtag_in.query, [[], []], [[], []] ) + + // Update meta + RAGTAG_PATCH.out.patch_fasta + .map { meta, patched -> [meta: meta + [assembly: patched] ] } + .set { ch_assemblies_scaffold } + + ch_assemblies_scaffold.dump(tag: "Assemble: Assemblies with scaffolding - outputs") + + // Mix everything assembled back togehter + ch_assemblies_no_scaffold + .mix(ch_assemblies_scaffold) + .set { ch_main_assembled } + + ch_main_assembled.dump(tag: "Assemble: Assembled") + + // Mix with whatever was not destined for assembly + ch_main_branched + .no_assemble + .mix( ch_main_assembled ) + .set { ch_main_to_mapping } + + ch_main_to_mapping.dump(tag: "Assemble: TO MAPPING") + + // QUAST is the only QC tool that requires mapping + + ch_main_to_mapping + .branch { + it -> + quast: it.meta.quast + no_quast: !it.meta.quast + } + // Note that this channel is set here but only the quast branch is further used + .set { ch_main_quast_branch } + + // If QUAST should run, and we need an alignment to reference, this is created here + ch_main_quast_branch + .quast + .branch { + it -> + use_ref: it.meta.use_ref + no_use_ref: !it.meta.use_ref + } + .set { + ch_quast_branched + } + + // Alignment is actually only created if no bam file is provided + ch_quast_branched + .use_ref + .branch { it -> + to_map: !it.meta.ref_map_bam + dont_map: it.meta.ref_map_bam + } + .set { ch_ref_mapping_branched } + + // Use the QC reads and map them to ref + ch_ref_mapping_branched + .to_map + .map { + it -> + [ it.meta, it.meta.qc_reads_path, it.meta.ref_fasta ] + } + .set { map_to_ref_in } + + MAP_TO_REF(map_to_ref_in) + + // Add the ref mapping to the large main channel + MAP_TO_REF.out.ch_aln_to_ref_bam + .map { meta, bam -> [ meta: meta + [ref_map_bam: bam] ] } + .mix(ch_ref_mapping_branched.dont_map) + .mix(ch_quast_branched.no_use_ref) + // above recreates ch_main_quast_branch.quast + .mix(ch_main_quast_branch.no_quast) + .set { ch_main_to_qc } + + ch_main_to_qc.dump(tag: "ASSEMBLE: QC INPUT") + //QC on initial assembly + + // scaffolds to QC need to be defined here, this is what is in the assembly slot + ch_main_to_qc + .map { it -> [it.meta.id, it.meta.assembly] } + .set { scaffolds } + + QC(ch_main_to_qc, scaffolds, meryl_kmers) + + // If annotation liftover on the initial assembly is desired, it happens here. + ch_main_to_qc + .filter { + it -> it.meta.lift_annotations + } + .map { it -> + [ + it.meta, + it.meta.assembly, + it.meta.ref_fasta, + it.meta.ref_gff + ] + } + .set { liftoff_in } + liftoff_in.dump(tag: "ASSEMBLE: LIFTOFF: INPUT") + LIFTOFF(liftoff_in, []) emit: - assembly = ch_assembly - ref_bam = ch_ref_bam - longreads + ch_main = ch_main_to_qc assembly_quast_reports = QC.out.quast_out assembly_busco_reports = QC.out.busco_out assembly_merqury_reports = QC.out.merqury_report_files - versions = ch_versions } diff --git a/subworkflows/local/bam_sort_stat/main.nf b/subworkflows/local/bam_sort_stat/main.nf index 89f8cdf4..826d36a7 100644 --- a/subworkflows/local/bam_sort_stat/main.nf +++ b/subworkflows/local/bam_sort_stat/main.nf @@ -17,18 +17,13 @@ workflow BAM_INDEX_STATS_SAMTOOLS { fasta main: - Channel.empty().set { ch_versions } - SAMTOOLS_INDEX(bam) - BAM_STATS_SAMTOOLS(bam.join(SAMTOOLS_INDEX.out.bai, by: [0]), fasta) - - versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions).mix(BAM_STATS_SAMTOOLS.out.versions) + BAM_STATS_SAMTOOLS(bam.join(SAMTOOLS_INDEX.out.csi, by: [0]), fasta) emit: - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] + bai = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ] stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - versions } diff --git a/subworkflows/local/hifi/main.nf b/subworkflows/local/hifi/main.nf deleted file mode 100644 index ea2fd032..00000000 --- a/subworkflows/local/hifi/main.nf +++ /dev/null @@ -1,22 +0,0 @@ -include { PREPARE_HIFI } from '../prepare_hifi/main' - - -workflow HIFI { - take: - inputs - - main: - Channel.empty().set { ch_versions } - - PREPARE_HIFI(inputs) - - PREPARE_HIFI.out.hifireads.set { hifi_reads } - - ch_versions.mix(PREPARE_HIFI.out.versions) - - versions = ch_versions - - emit: - hifi_reads - versions -} diff --git a/subworkflows/local/jellyfish/main.nf b/subworkflows/local/jellyfish/main.nf deleted file mode 100644 index e1257b58..00000000 --- a/subworkflows/local/jellyfish/main.nf +++ /dev/null @@ -1,57 +0,0 @@ -include { COUNT } from '../../../modules/local/jellyfish/count/main' -include { DUMP } from '../../../modules/local/jellyfish/dump/main' -include { HISTO } from '../../../modules/local/jellyfish/histo/main' -include { STATS } from '../../../modules/local/jellyfish/stats/main' -include { GENOMESCOPE } from '../../../modules/local/genomescope/main' - -workflow JELLYFISH { - take: - samples // id, fasta - nanoq_out - - main: - Channel.empty().set { genomescope_in } - Channel.empty().set { ch_versions } - COUNT(samples) - COUNT.out.kmers.set { kmers } - - ch_versions = ch_versions.mix(COUNT.out.versions) - - if (params.dump) { - DUMP(kmers) - ch_versions = ch_versions.mix(DUMP.out.versions) - } - - HISTO(kmers) - ch_versions = ch_versions.mix(HISTO.out.versions) - - if (!params.read_length == null) { - HISTO.out.histo.map { it -> [it[0], it[1], params.kmer_length, params.read_length] }.set { genomescope_in } - } - - if (params.read_length == null) { - HISTO.out.histo.map { it -> [it[0], it[1], params.kmer_length] }.join(nanoq_out).set { genomescope_in } - } - - GENOMESCOPE(genomescope_in) - - ch_versions = ch_versions.mix(GENOMESCOPE.out.versions) - - STATS(kmers) - - ch_versions = ch_versions.mix(STATS.out.versions) - - GENOMESCOPE.out.estimated_hap_len.set { hap_len } - - GENOMESCOPE.out.summary.set { genomescope_summary } - - GENOMESCOPE.out.plot.set { genomescope_plot } - - versions = ch_versions - - emit: - hap_len - genomescope_summary - genomescope_plot - versions -} diff --git a/subworkflows/local/liftoff/main.nf b/subworkflows/local/liftoff/main.nf index 7d2fd024..baee9e18 100644 --- a/subworkflows/local/liftoff/main.nf +++ b/subworkflows/local/liftoff/main.nf @@ -2,23 +2,13 @@ include { LIFTOFF } from '../../../modules/nf-core/liftoff/main' workflow RUN_LIFTOFF { take: - assembly - inputs + liftoff_in main: - Channel.empty().set { ch_versions } - assembly - .join( - inputs.map { row -> [row.meta, row.ref_fasta, row.ref_gff] } - ) - .set { liftoff_in } - LIFTOFF(liftoff_in, []) LIFTOFF.out.gff3.set { lifted_annotations } - versions = ch_versions.mix(LIFTOFF.out.versions) emit: lifted_annotations - versions } diff --git a/subworkflows/local/mapping/map_sr/main.nf b/subworkflows/local/mapping/map_sr/main.nf index 838d7454..e29204c2 100644 --- a/subworkflows/local/mapping/map_sr/main.nf +++ b/subworkflows/local/mapping/map_sr/main.nf @@ -7,17 +7,13 @@ workflow MAP_SR { genome_assembly main: - Channel.empty().set { ch_versions } // map reads to assembly in_reads - .map { meta, reads -> [[id: meta.id], reads] } .join(genome_assembly) .set { map_assembly } ALIGN_SHORT(map_assembly, true, 'bai', false, false) - versions = ch_versions.mix(ALIGN_SHORT.out.versions) - ALIGN_SHORT.out.bam.set { aln_to_assembly_bam } ALIGN_SHORT.out.index.set { aln_to_assembly_bai } @@ -32,8 +28,6 @@ workflow MAP_SR { BAM_STATS(aln_to_assembly_bam_bai, ch_fasta) - versions = ch_versions.mix(BAM_STATS.out.versions) - aln_to_assembly_bam .join(aln_to_assembly_bai) .set { aln_to_assembly_bam_bai } @@ -42,5 +36,4 @@ workflow MAP_SR { aln_to_assembly_bam aln_to_assembly_bai aln_to_assembly_bam_bai - versions } diff --git a/subworkflows/local/mapping/map_to_assembly/main.nf b/subworkflows/local/mapping/map_to_assembly/main.nf index 2489a8ed..47c31897 100644 --- a/subworkflows/local/mapping/map_to_assembly/main.nf +++ b/subworkflows/local/mapping/map_to_assembly/main.nf @@ -3,20 +3,16 @@ include { BAM_STATS_SAMTOOLS as BAM_STATS } from '../../../nf-core/bam_stats_sam workflow MAP_TO_ASSEMBLY { take: - in_reads - genome_assembly + map_assembly // meta: [id, qc_reads], reads, refs main: - Channel.empty().set { ch_versions } - // map reads to assembly - in_reads - .join(genome_assembly) - .set { map_assembly } - ALIGN(map_assembly, true, 'bai', false, false) - ALIGN.out.bam.set { aln_to_assembly_bam } - ALIGN.out.index.set { aln_to_assembly_bai } + ALIGN.out.bam + .set { aln_to_assembly_bam } + + ALIGN.out.index + .set { aln_to_assembly_bai } map_assembly .map { meta, _reads, fasta -> [meta, fasta] } @@ -28,9 +24,6 @@ workflow MAP_TO_ASSEMBLY { BAM_STATS(aln_to_assembly_bam_bai, ch_fasta ) - versions = ch_versions.mix(ALIGN.out.versions).mix(BAM_STATS.out.versions) - emit: - aln_to_assembly_bam - versions + aln_to_assembly_bam // [id], bam } diff --git a/subworkflows/local/mapping/map_to_ref/main.nf b/subworkflows/local/mapping/map_to_ref/main.nf index 2293e4b8..d97df25d 100644 --- a/subworkflows/local/mapping/map_to_ref/main.nf +++ b/subworkflows/local/mapping/map_to_ref/main.nf @@ -3,33 +3,28 @@ include { BAM_STATS_SAMTOOLS as BAM_STATS } from '../../../nf-core/bam_stats_sam workflow MAP_TO_REF { take: - in_reads - ch_refs + ch_map_ref // meta, reads, refs main: - Channel.empty().set { ch_versions } // Map reads to reference - in_reads - .join(ch_refs) - .set { ch_map_ref_in } + ALIGN(ch_map_ref, true, 'bai', false, false) - ALIGN(ch_map_ref_in, true, 'bai', false, false) + ALIGN.out.bam + .set { ch_aln_to_ref_bam } - ALIGN.out.bam.set { ch_aln_to_ref_bam } + ALIGN.out.index + .set { aln_to_ref_bai } ch_aln_to_ref_bam - .join(ALIGN.out.index) + .join(aln_to_ref_bai) .set { ch_aln_to_ref_bam_bai } - ch_map_ref_in - .map { meta, _reads, fasta -> [meta, fasta] } + ch_map_ref + .map { meta, _reads, fasta -> [[meta], fasta] } .set { ch_fasta } BAM_STATS(ch_aln_to_ref_bam_bai, ch_fasta) - versions = ch_versions.mix(ALIGN.out.versions).mix(BAM_STATS.out.versions) - emit: - ch_aln_to_ref_bam - versions + ch_aln_to_ref_bam // meta, bam } diff --git a/subworkflows/local/ont/main.nf b/subworkflows/local/ont/main.nf deleted file mode 100644 index 5f079840..00000000 --- a/subworkflows/local/ont/main.nf +++ /dev/null @@ -1,45 +0,0 @@ -include { PREPARE_ONT } from '../prepare_ont/main' -include { JELLYFISH } from '../jellyfish/main' - -workflow ONT { - take: - input_channel - genome_size - - main: - Channel.empty().set { ch_versions } - Channel.of([[],[]]) - .tap { genomescope_summary } - .tap { genomescope_plot } - - PREPARE_ONT(input_channel) - - PREPARE_ONT.out.trimmed.set { ont_reads } - - PREPARE_ONT.out.nanoq_report.set { nanoq_report } - - PREPARE_ONT.out.nanoq_stats.set { nanoq_stats } - - ch_versions = ch_versions.mix(PREPARE_ONT.out.versions) - - if (params.jellyfish) { - JELLYFISH(PREPARE_ONT.out.trimmed, PREPARE_ONT.out.med_len) - if (params.genome_size == null) { - JELLYFISH.out.hap_len.set { genome_size } - } - JELLYFISH.out.genomescope_summary.set { genomescope_summary } - JELLYFISH.out.genomescope_plot.set { genomescope_plot } - ch_versions = ch_versions.mix(JELLYFISH.out.versions) - } - - versions = ch_versions - - emit: - ont_reads - genome_size - nanoq_report - nanoq_stats - genomescope_plot - genomescope_summary - versions -} diff --git a/subworkflows/local/polishing/dorado/main.nf b/subworkflows/local/polishing/dorado/main.nf new file mode 100644 index 00000000..be3663d2 --- /dev/null +++ b/subworkflows/local/polishing/dorado/main.nf @@ -0,0 +1,59 @@ +include { DORADO_ALIGNER as ALIGN } from '../../../../modules/local/dorado/aligner/main.nf' +include { DORADO_POLISH as POLISH } from '../../../../modules/local/dorado/polish/main.nf' +include { QC } from '../../qc/main.nf' +include { LIFTOFF } from '../../../../modules/nf-core/liftoff/main' + +workflow POLISH_DORADO { + take: + ch_main + meryl_kmers + + main: + + ch_main + .map { it -> [it.meta, it.meta.assembly, it.meta.ontreads] } + .set { ch_aln_in } + + ALIGN(ch_aln_in) + + ALIGN.out.bam + .join(ALIGN.out.bai) + .map {meta, bam, bai-> [ meta, meta.assembly, bam, bai ] } + .set { ch_polish_in } + + POLISH(ch_polish_in, []) + + POLISH.out.polished_alignment.set { polished_assembly } + + polished_assembly + .map { meta, polished_dorado -> [meta: meta + [ polished: [polished_dorado: polished_dorado ] ] ]} + .set { ch_main_out } + + QC( + ch_main_out.map { it -> [meta: it.meta - it.meta.subMap("assembly_map_bam") + [ assembly_map_bam: null] ] }, + polished_assembly.map { meta, polished -> [meta.id, polished] }, + meryl_kmers + ) + + ch_main_out + .filter { + it -> it.meta.lift_annotations + } + .map { it -> + [ + it.meta, + it.meta.polished.dorado, + it.meta.ref_fasta, + it.meta.ref_gff + ] + } + .set { liftoff_in } + + LIFTOFF(liftoff_in, []) + + emit: + ch_main = ch_main_out + quast_out = QC.out.quast_out + busco_out = QC.out.busco_out + merqury_report_files = QC.out.merqury_report_files +} diff --git a/subworkflows/local/polishing/main.nf b/subworkflows/local/polishing/main.nf index 1c5a4dd9..a42c5d2f 100644 --- a/subworkflows/local/polishing/main.nf +++ b/subworkflows/local/polishing/main.nf @@ -1,81 +1,89 @@ -include { POLISH_MEDAKA } from './medaka/polish_medaka/main' -include { POLISH_PILON } from './pilon/polish_pilon/main' +include { POLISH_MEDAKA } from './medaka/polish_medaka/main.nf' +include { POLISH_PILON } from './pilon/polish_pilon/main.nf' +include { POLISH_DORADO } from './dorado/main.nf' workflow POLISH { take: - inputs - ch_ont_reads - ch_longreads - ch_shortreads - ch_polished_genome - reference_bam + ch_main meryl_kmers main: + channel.empty().set { polish_busco_reports } + channel.empty().set { polish_quast_reports } + channel.empty().set { polish_merqury_reports } + + ch_main + .branch { it -> + def medaka_polishers = ["medaka","medaka+pilon"] + def dorado_polishers = ["dorado","dorado+pilon"] + medaka: medaka_polishers.contains(it.meta.polish) + dorado: dorado_polishers.contains(it.meta.polish) + no_ont_polish: !medaka_polishers.contains(it.meta.polish) && !dorado_polishers.contains(it.meta.polish) + } + .set { ch_main_polish } - Channel.empty().set { ch_versions } - Channel.empty().set { polish_busco_reports } - Channel.empty().set { polish_quast_reports } - Channel.empty().set { polish_merqury_reports } + POLISH_MEDAKA(ch_main_polish.medaka, meryl_kmers) - if (params.polish_medaka) { + POLISH_DORADO(ch_main_polish.dorado, meryl_kmers) - if (params.hifiasm_ont) { - error('Medaka should not be used on ONT-HiFi hybrid assemblies') - } - if (params.hifi && !params.ont) { - error('Medaka should not be used on HiFi assemblies') - } + POLISH_MEDAKA.out.busco_out + .mix(POLISH_DORADO.out.busco_out) + .set { polish_busco_reports } - POLISH_MEDAKA(inputs, ch_ont_reads, ch_polished_genome, reference_bam, meryl_kmers) + POLISH_MEDAKA.out.quast_out + .mix(POLISH_DORADO.out.quast_out) + .set { polish_quast_reports } - POLISH_MEDAKA.out.polished_assembly.set { ch_polished_genome } + POLISH_MEDAKA.out.merqury_report_files + .mix(POLISH_DORADO.out.merqury_report_files) + .set { polish_merqury_reports } - POLISH_MEDAKA.out.busco_out.set { polish_busco_reports } + POLISH_MEDAKA.out.ch_main + .mix(POLISH_DORADO.out.ch_main) + .mix(ch_main_polish.no_ont_polish) + .set { ch_main_polish_pilon } - POLISH_MEDAKA.out.quast_out.set { polish_quast_reports } - POLISH_MEDAKA.out.merqury_report_files.set { polish_merqury_reports } - - ch_versions = ch_versions.mix(POLISH_MEDAKA.out.versions) - } /* Polishing with short reads using pilon */ - if (params.polish_pilon) { - POLISH_PILON(inputs, ch_shortreads, ch_longreads, ch_polished_genome, reference_bam, meryl_kmers) - - POLISH_PILON.out.pilon_polished.set { ch_polished_genome } + ch_main_polish_pilon + .branch { + it -> + def pilon_polishers = ["pilon","medaka+pilon", "dorado+pilon"] + pilon: pilon_polishers.contains(it.meta.polish) + no_pilon: true + } + .set { ch_main_polish_pilon_in } - polish_busco_reports - .concat( - POLISH_PILON.out.busco_out - ) - .set { polish_busco_reports } + POLISH_PILON(ch_main_polish_pilon_in.pilon, meryl_kmers) - polish_quast_reports - .concat( - POLISH_PILON.out.quast_out - ) - .set { polish_quast_reports } + ch_main_polish_pilon_in.no_pilon.mix(POLISH_PILON.out.ch_main) + .set { ch_out } - polish_merqury_reports - .concat( - POLISH_PILON.out.merqury_report_files - ) - .set { polish_merqury_reports } + polish_busco_reports + .concat( + POLISH_PILON.out.busco_out + ) + .set { polish_busco_reports } - ch_versions = ch_versions.mix(POLISH_PILON.out.versions) - } + polish_quast_reports + .concat( + POLISH_PILON.out.quast_out + ) + .set { polish_quast_reports } - versions = ch_versions + polish_merqury_reports + .concat( + POLISH_PILON.out.merqury_report_files + ) + .set { polish_merqury_reports } emit: - ch_polished_genome + ch_main = ch_out polish_busco_reports polish_quast_reports polish_merqury_reports - versions } diff --git a/subworkflows/local/polishing/medaka/polish_medaka/main.nf b/subworkflows/local/polishing/medaka/polish_medaka/main.nf index e4d459fa..d11228fa 100644 --- a/subworkflows/local/polishing/medaka/polish_medaka/main.nf +++ b/subworkflows/local/polishing/medaka/polish_medaka/main.nf @@ -1,41 +1,61 @@ -include { RUN_MEDAKA } from '../run_medaka/main' +include { MEDAKA_PARALLEL as MEDAKA } from '../../../../../modules/local/medaka/medaka_consensus/main' include { QC } from '../../../qc/main.nf' -include { RUN_LIFTOFF } from '../../../liftoff/main' +include { LIFTOFF } from '../../../../../modules/nf-core/liftoff/main' workflow POLISH_MEDAKA { take: - ch_input - in_reads - assembly - ch_aln_to_ref + ch_main meryl_kmers main: - Channel.empty().set { ch_versions } - Channel.empty().set { quast_out } - Channel.empty().set { busco_out } - Channel.empty().set { merqury_report_files } + channel.empty().set { ch_versions } - RUN_MEDAKA(in_reads, assembly) - RUN_MEDAKA.out.medaka_out.set { polished_assembly } + ch_main + .map { + it -> + [ it.meta, it.meta.ontreads, it.meta.assembly ] - ch_versions = ch_versions.mix(RUN_MEDAKA.out.versions) + } + .set { ch_medaka_in } - QC(ch_input, in_reads, polished_assembly, ch_aln_to_ref, meryl_kmers) + MEDAKA(ch_medaka_in) - ch_versions = ch_versions.mix(QC.out.versions) + MEDAKA.out.assembly.set { polished_assembly } - if (params.lift_annotations) { - RUN_LIFTOFF(polished_assembly, ch_input) - ch_versions = ch_versions.mix(RUN_LIFTOFF.out.versions) - } - - versions = ch_versions + polished_assembly + .map { meta, polished_medaka -> [meta: meta + [ polished: [medaka: polished_medaka ] ] ]} + // After joining re-create the maps from the stored map + .set { ch_medaka_out } + + ch_medaka_out + .set { ch_main_out } + + QC( + ch_medaka_out.map { it -> [meta: it.meta - it.meta.subMap("assembly_map_bam") + [ assembly_map_bam: null] ] }, + polished_assembly.map { meta, polished -> [meta.id, polished] }, + meryl_kmers + ) + + + ch_medaka_out + .filter { + it -> it.meta.lift_annotations + } + .map { it -> + [ + it.meta, + it.meta.polished.medaka, + it.meta.ref_fasta, + it.meta.ref_gff + ] + } + .set { liftoff_in } + + LIFTOFF(liftoff_in, []) emit: - polished_assembly + ch_main = ch_main_out quast_out = QC.out.quast_out busco_out = QC.out.busco_out merqury_report_files = QC.out.merqury_report_files - versions } diff --git a/subworkflows/local/polishing/medaka/run_medaka/main.nf b/subworkflows/local/polishing/medaka/run_medaka/main.nf deleted file mode 100644 index 2ba3ceb6..00000000 --- a/subworkflows/local/polishing/medaka/run_medaka/main.nf +++ /dev/null @@ -1,22 +0,0 @@ -include { MEDAKA_PARALLEL as MEDAKA } from '../../../../../modules/local/medaka/medaka_consensus/main' - -workflow RUN_MEDAKA { - take: - in_reads - assembly - - main: - - in_reads - .join(assembly) - .set { medaka_in } - - MEDAKA(medaka_in) - - MEDAKA.out.assembly.set { medaka_out } - MEDAKA.out.versions.set { versions } - - emit: - medaka_out - versions -} diff --git a/subworkflows/local/polishing/pilon/polish_pilon/main.nf b/subworkflows/local/polishing/pilon/polish_pilon/main.nf index 3b7df47a..cf33f62e 100644 --- a/subworkflows/local/polishing/pilon/polish_pilon/main.nf +++ b/subworkflows/local/polishing/pilon/polish_pilon/main.nf @@ -1,45 +1,77 @@ -include { RUN_PILON } from '../run_pilon/main' +include { PILON } from '../../../../../modules/nf-core/pilon/main' include { MAP_SR } from '../../../mapping/map_sr/main' -include { RUN_LIFTOFF } from '../../../liftoff/main' +include { LIFTOFF } from '../../../../../modules/nf-core/liftoff/main' include { QC } from '../../../qc/main.nf' workflow POLISH_PILON { take: - ch_input - shortreads - in_reads - assembly - ch_aln_to_ref + ch_main meryl_kmers main: - Channel.empty().set { ch_versions } + channel.empty().set { ch_versions } - MAP_SR(shortreads, assembly) + ch_main + .multiMap { + it -> + shortreads: [it.meta, it.meta.shortreads] + assembly: [ + it.meta, + it.meta.polish == "medaka+pilon" ? it.meta.polished.medaka : it.meta.polish == "dorado+pilon" ? it.meta.polished.dorado : it.meta.assembly + ] + } + .set { map_sr_in } - ch_versions = ch_versions.mix(MAP_SR.out.versions) + //map_sr_in.shortreads.view {"POLISH_PILON: map_sr_in.shortreads: $it"} + //map_sr_in.assembly.view {"POLISH_PILON: map_sr_in.assembly: $it"} - RUN_PILON(assembly, MAP_SR.out.aln_to_assembly_bam_bai) + MAP_SR(map_sr_in.shortreads, map_sr_in.assembly) - RUN_PILON.out.improved_assembly.set { pilon_polished } - ch_versions = ch_versions.mix(RUN_PILON.out.versions) + map_sr_in.assembly + .join(MAP_SR.out.aln_to_assembly_bam_bai) + .multiMap { + meta, assembly, bam, bai -> + assembly: [meta, assembly] + bam_bai: [meta, bam, bai] + } + .set { pilon_in } - QC(ch_input, in_reads, pilon_polished, ch_aln_to_ref, meryl_kmers) + PILON( + pilon_in.assembly, + pilon_in.bam_bai, + "bam", + ) - ch_versions = ch_versions.mix(QC.out.versions) + pilon_polished = PILON.out.improved_assembly - if (params.lift_annotations) { - RUN_LIFTOFF(pilon_polished, ch_input) - ch_versions = ch_versions.mix(RUN_LIFTOFF.out.versions) - } + pilon_polished + .map { meta, polished_pilon -> [ meta: meta + [ polished: [pilon: polished_pilon] ] ] } + .set { ch_main } + + QC(ch_main.map { it -> [meta: it.meta - it.meta.subMap("assembly_map_bam") + [assembly_map_bam: null] ]}, + pilon_polished.map {meta, polished -> [meta.id, polished ]}, + meryl_kmers) - versions = ch_versions + ch_main + .filter { + it -> it.meta.lift_annotations + } + .map { it -> + [ + it.meta, + it.meta.polished.pilon, + it.meta.ref_fasta, + it.meta.ref_gff + ] + } + .set { liftoff_in } + + LIFTOFF(liftoff_in, []) emit: - pilon_polished + ch_main quast_out = QC.out.quast_out busco_out = QC.out.busco_out merqury_report_files = QC.out.merqury_report_files - versions } diff --git a/subworkflows/local/polishing/pilon/run_pilon/main.nf b/subworkflows/local/polishing/pilon/run_pilon/main.nf deleted file mode 100644 index e2c29efb..00000000 --- a/subworkflows/local/polishing/pilon/run_pilon/main.nf +++ /dev/null @@ -1,23 +0,0 @@ -include { PILON } from '../../../../../modules/nf-core/pilon/main' - -workflow RUN_PILON { - take: - assembly_in - aln_to_assembly_bam_bai - - main: - assembly_in - .join(aln_to_assembly_bam_bai) - .set { pilon_in } - PILON( - pilon_in.map { meta, assembly, _bam, _bai -> [meta, assembly] }, - pilon_in.map { meta, _assembly, bam, bai -> [meta, bam, bai] }, - "bam", - ) - versions = PILON.out.versions - improved_assembly = PILON.out.improved_assembly - - emit: - improved_assembly - versions -} diff --git a/subworkflows/local/prepare/jellyfish/main.nf b/subworkflows/local/prepare/jellyfish/main.nf new file mode 100644 index 00000000..644ff638 --- /dev/null +++ b/subworkflows/local/prepare/jellyfish/main.nf @@ -0,0 +1,96 @@ +include { COUNT } from '../../../../modules/local/jellyfish/count/main' +include { HISTO } from '../../../../modules/local/jellyfish/histo/main' +include { STATS } from '../../../../modules/local/jellyfish/stats/main' +include { GENOMESCOPE } from '../../../../modules/local/genomescope/main' + +workflow JELLYFISH { + take: + ch_main + + main: + channel.empty().set { genomescope_in } + channel.empty().set { ch_versions } + + ch_main + .filter { it -> it.meta.group } + .map { it -> + [ + it.meta, + it.meta.group, + it.meta.jellyfish_k, + it.meta.qc_reads_path, + it.meta.qc_read_mean + ] + } + .groupTuple(by: 1) + .map { + it -> + [ + meta: [ + id: it[1], + metas: it[0], + jellyfish_k: it[2][0], + qc_read_mean: it[4][0] + ], + qc_reads_path: it[3][0] + ] + } + .mix( + ch_main + .filter { it -> !it.meta.group } + .map { + it -> + [ + meta: it.meta, + qc_reads_path: it.meta.qc_reads_path + ] + } + ) + .set { samples } + + COUNT(samples) + COUNT.out.kmers.set { kmers } + + HISTO(kmers) + + HISTO.out.histo + .map { meta, hist -> + [ + meta, + hist, + meta.jellyfish_k, + meta.qc_read_mean + + ] + } + .set { genomescope_in } + + STATS(kmers) + + GENOMESCOPE(genomescope_in) + + GENOMESCOPE.out.estimated_hap_len + .filter { it -> it[0].metas } + .flatMap { it -> + it[0].metas + .collect { meta -> [ meta: meta + [ genome_size: it[1] ] ] } + } + .mix(GENOMESCOPE.out.estimated_hap_len + .filter { it -> !it[0].metas } + .map { + it -> [ meta: it[0] + [ genome_size: it[1] ] ] + } + ) + .set { outputs } + + outputs.dump(tag: "Jellyfish outputs") + + GENOMESCOPE.out.summary.set { genomescope_summary } + + GENOMESCOPE.out.plot.set { genomescope_plot } + + emit: + main_out = outputs + genomescope_summary + genomescope_plot +} diff --git a/subworkflows/local/prepare/main.nf b/subworkflows/local/prepare/main.nf new file mode 100644 index 00000000..8e199465 --- /dev/null +++ b/subworkflows/local/prepare/main.nf @@ -0,0 +1,256 @@ +include { PREPARE_ONT as ONT } from './prepare_ont/main' +include { PREPARE_HIFI as HIFI } from './prepare_hifi/main' +include { PREPARE_SHORTREADS as SHORTREADS } from './prepare_shortreads/main' +include { JELLYFISH } from './jellyfish/main' + +workflow PREPARE { + /* + Subworkflows in prepare implement sample grouping. + SHORTREADS, JELLYFISH, ONT and HIFI each implement + the same logic for sample grouping. + Grouping needs to be specified by the user, and can + be used to create sample groups that share inputs, to + minimize redundant input preparations. + Reads of samples from the same group will be prepared + only once, and then the original channel is restored. + + Brief description how this works: + // Move group information into channel, if it exists + .filter { it -> it.meta.group } + .map { it -> [it.meta, it.meta.group, it.meta.ontreads] } + // Group by group + .groupTuple(by: 1) + // Collect all sample-meta into a group meta slot named metas + // Use unique reads; user responsible to group correctly + .map { + it -> + [ + [ + id: it[1], // the group + metas: it[0] + ], + it[2].unique()[0] // Ontreads + ] + } + + After this input channel has been processed, the samples are + recreated from meta[metas]: + + process.OUT + // Take samples with metas in slot [0] + .filter { it -> it[0].metas } + .flatMap { it -> + // $it looks like [meta, output_path] + // recreate meta from metas and update path. + it[0].metas + .collect { meta -> [ + meta: meta - meta.subMap("ontreads") + [ontreads: it[1]] + ] + } + } + .mix( + process.OUT + .filter { it -> !it[0].metas } + .map {meta, ontreads -> [meta: meta -meta.subMap("ontreads") + [ontreads: ontreads]]} + ) + + + */ + take: ch_main + + main: + channel.empty().set{ ch_main_shortreaded } + ch_main + .filter { + it -> ((it.meta.shortread_F && it.meta.use_short_reads) || it.hic_trim) ? true : false + } + .set { shortreads } + + ch_main + .filter { + it -> (it.meta.ontreads) ? true : false + } + .set { ontreads } + + ch_main + .filter { + it -> (it.meta.hifireads) ? true : false + } + .set { hifireads } + + + // adapted to sample-logic + + SHORTREADS(shortreads) + + SHORTREADS.out.meryl_kmers.set { meryl_kmers } + + // This changes ch_main shortreads_F and _R become one tuple, paired is gone. + + // put shortreads back together with samples without shortreads + + // Added mix with empty to make sure that the channel exists + ch_main_shortreaded + .mix( + ch_main + .filter { + it -> !it.meta.shortread_F && !it.meta.hic_F + } + .map { it -> [meta: it.meta - it.meta.subMap("shortread_F","shortread_R", "paired") + [shorteads: null] ]} + .mix(SHORTREADS.out.main_out) + ) + .set { ch_main_shortreaded } + + ONT(ontreads) + + ONT.out.main_out.set { ch_main_ont_prepped } + + // Continue here with switching to meta + + HIFI(hifireads) + + HIFI.out.main_out.set { ch_main_hifi_prepped } + + ch_main_shortreaded + // ADD ONT READS + .filter { + it -> it.meta.ontreads ? true : false + } + .map { it -> [it.meta.id, it.meta - it.meta.subMap("ontreads")]} + .join( + ch_main_ont_prepped + .map { it -> [it.meta.id, it.meta.ontreads] } + ) + // After joining re-create the maps from the stored map + .map { _id, meta_old, ont_reads -> + [ + meta: meta_old -meta_old.subMap("ontreads") + [ontreads: ont_reads] + ] + } + // mix back in those samples where nothing was done to the ont reads + .mix(ch_main_shortreaded + .filter { + it -> it.meta.ontreads ? false : true + } + ) + .set { + ch_main_sr_ont + } + + // Add prepared hifi-reads: + + ch_main_sr_ont + .filter { + it -> it.meta.hifireads ? true : false + } + .map { it -> [it.meta.id, it.meta - it.meta.subMap("hifireads")]} + .join( + ch_main_hifi_prepped + .map { it -> [it.meta.id, it.meta.hifireads] } + ) + // After joining re-create the maps from the stored map + .map { _id, meta_old, hifi_reads -> + [ + meta: meta_old + [hifireads: hifi_reads] + ] + } + // mix back in those samples where nothing was done to the hifireads reads + .mix(ch_main_sr_ont + .filter { + it -> it.meta.hifireads ? false : true + } + ) + .set { + ch_main_prepared + } + + // Get average read length of the QC reads from fastplong json report + def slurp = new groovy.json.JsonSlurper() + + ch_main_prepared + .filter { it -> it.meta.qc_reads.toLowerCase() == "ont" } + .map { it -> + [ + it.meta.id, + it.meta - it.meta.subMap("fastplong_json") + ] + } + .join( + ONT.out.fastplong_ont_reports + .map { it -> [ it[0].id, it[1] ]} + ) + .map { + _id, meta_old, json -> [meta: meta_old + [fastplong_json: json]] + } + .mix( + ch_main_prepared + .filter { it -> it.meta.qc_reads.toLowerCase() == "hifi" } + .map { + it -> [ + it.meta.id, it.meta - it.meta.subMap("fastplong_json")]} + .join( + HIFI.out.fastplong_hifi_reports + .map { it -> [ it[0].id, it[1] ]} + ) + .map { + _id, meta_old, json -> [meta: meta_old + [fastplong_json: json]] + } + ) + .map { it -> + [ + meta: it.meta + + [ + qc_read_mean: slurp.parse(it.meta.fastplong_json) + .summary + .after_filtering + .read_mean_length ?: + slurp.parse(it.meta.fastplong_json) + .summary + .before_filtering + .read_mean_length + ] + ] + } + // branch this channel for jellyfish + .branch { + it -> + jelly: it.meta.jellyfish + no_jelly: !it.meta.jellyfish + } + .set { ch_main_jellyfish_branched } + + JELLYFISH(ch_main_jellyfish_branched.jelly) + + + ch_main_jellyfish_branched.no_jelly + .mix( JELLYFISH.out.main_out ) + // At this stage, make sure that qc_read_path for downstream qc is using the prepared reads. + .map { it -> + [ + meta: it.meta - + it.meta.subMap("qc_read_path") + + [ + qc_read_path: it.meta.qc_reads.toLowerCase() == "ont" ? + it.meta.ontreads : + it.meta.hifireads + ] + ] + } + .set { main_out } + + main_out.dump(tag: "Prepare: Combined outputs") + + JELLYFISH.out.genomescope_summary.set { genomescope_summary } + + JELLYFISH.out.genomescope_plot.set { genomescope_plot } + + fastplong_json_reports = HIFI.out.fastplong_hifi_reports.mix(ONT.out.fastplong_ont_reports) + + emit: + ch_main = main_out + fastplong_json_reports + fastp_json_reports = SHORTREADS.out.fastp_json + meryl_kmers + genomescope_summary + genomescope_plot +} diff --git a/subworkflows/local/prepare/prepare_hifi/main.nf b/subworkflows/local/prepare/prepare_hifi/main.nf new file mode 100644 index 00000000..798249ba --- /dev/null +++ b/subworkflows/local/prepare/prepare_hifi/main.nf @@ -0,0 +1,86 @@ +include { FASTPLONG as FASTPLONG_HIFI } from '../../../../modules/nf-core/fastplong/main' + +workflow PREPARE_HIFI { + take: + main_in // should contain only samples with hifireads + + main: + channel.empty().set { ch_versions } + + main_in.dump(tag: "Prepare-HIFI input") + + main_in + .filter { it -> it.meta.group } + .map { it -> [it.meta, it.meta.group, it.meta.hifi_trim, it.meta.hifireads, it.meta.hifi_adapters, it.meta.hifi_fastplong_args] } + .groupTuple(by: 1) + .map { + it -> + [ + meta: [ + id: it[1], + metas: it[0], + trim: it[2][0], // These go in via config + hifi_fastplong_args: it[5][0] + ], + hifireads: it[3][0], + hifi_adapters: it[4][0] + ] + } + .mix( + main_in + .filter { it -> !it.meta.group } + .map { + it -> + [ + meta: it.meta, + hifireads: it.meta.hifireads, + hifi_adapters: it.meta.hifi_adapters, + ] + } + ) + .multiMap { + it -> + reads: [it.meta, it.hifireads] + adapters: it.hifi_adapters ?: [] + } + .set { ch_fastplong_in } + + ch_fastplong_in.reads.dump(tag: "HiFI fastplong reads in") + + FASTPLONG_HIFI(ch_fastplong_in.reads, ch_fastplong_in.adapters, false, false ) + + FASTPLONG_HIFI + .out + .reads + .filter { it -> it[0].metas } + .flatMap { it -> // it looks like [meta, output_path] + it[0].metas + .collect { metas -> [ meta: metas - metas.subMap("hifireads") + [hifireads: it[1]] ] } + } + .mix(FASTPLONG_HIFI.out.reads + .filter { it -> !it[0].metas } + .map { + meta, hifireads -> [ meta: meta - meta.subMap("hifireads") + [ hifireads: hifireads ] ] + } + ) + .set { fastplong_reads_out } + + FASTPLONG_HIFI + .out + .json + .filter { it -> it[0].metas } + .flatMap { it -> + it[0].metas + .collect { meta -> [ meta, it[1] ] } + } + .mix(FASTPLONG_HIFI.out.json + .filter { it -> !it[0].metas } + ) + .set { fastplong_json_out } + + fastplong_reads_out.dump(tag: "Prepare-HIFI output") + + emit: + main_out = fastplong_reads_out + fastplong_hifi_reports = fastplong_json_out +} diff --git a/subworkflows/local/prepare/prepare_ont/collect/main.nf b/subworkflows/local/prepare/prepare_ont/collect/main.nf new file mode 100644 index 00000000..c4fcff03 --- /dev/null +++ b/subworkflows/local/prepare/prepare_ont/collect/main.nf @@ -0,0 +1,20 @@ +include { COLLECT_READS } from '../../../../../modules/local/collect_reads/main' + +workflow COLLECT { + take: + ch_input + + main: + ch_input + .filter { + it -> it.ont_collect + } + .map { row -> [row.meta, row.meta.ontreads] } + .set { reads } + + COLLECT_READS(reads) + COLLECT_READS.out.combined_reads.set { reads } + + emit: + reads +} diff --git a/subworkflows/local/prepare/prepare_ont/main.nf b/subworkflows/local/prepare/prepare_ont/main.nf new file mode 100644 index 00000000..ac9c6c7b --- /dev/null +++ b/subworkflows/local/prepare/prepare_ont/main.nf @@ -0,0 +1,145 @@ +include { FASTPLONG as FASTPLONG_ONT } from '../../../../modules/nf-core/fastplong/main' +include { COLLECT } from './collect/main' + + +workflow PREPARE_ONT { + take: + ch_main // should contain only samples with ontreads + + main: + channel.empty().set { ch_versions } + + ch_main.dump(tag: "Prepare-ONT input") + ch_main + .branch { + it -> + to_collect: it.meta.ont_collect + no_collect: !it.meta.ont_collect + } + .set { ch_main_collect_branched } + + ch_main_collect_branched + .to_collect + .filter { it -> it.meta.group } + .map { it -> [it.meta, it.meta.group, it.meta.ontreads] } + .groupTuple(by: 1) + .map { + it -> + [ + [ + id: it[1], // the group + metas: it[0] + ], + it[2].unique()[0] // Ontreads + ] + } + .mix( + ch_main_collect_branched + .to_collect + .filter { it -> !it.meta.group } + .map { + it -> [ it.meta, it.meta.ontreads ] + } + ) + .set { collect_in } + + COLLECT(collect_in) + + COLLECT.out.reads + .filter { it -> it[0].metas } + .flatMap { it -> // it looks like [meta, output_path] + it[0].metas + .collect { meta -> [ meta: meta - meta.subMap("ontreads") + [ontreads: it[1]] ] } + } + .mix( + COLLECT.out.reads + .filter { it -> !it[0].metas } + .map { + meta, ontreads -> [ meta: meta - meta.subMap("ontreads") + [ontreads: ontreads] ] + } + ) + .set { ch_collected_reads } + + ch_collected_reads.dump(tag: "Collected ONT reads") + + ch_collected_reads + .mix(ch_main_collect_branched.no_collect) + .set { ch_collected } + + ch_collected.dump(tag: "Collected reads mixed with uncollected.") + + // ch_collected is the same samples as the input channel + ch_collected + .filter { it -> it.meta.group } + .map { it -> [it.meta, it.meta.group, it.meta.ont_trim, it.meta.ontreads, it.meta.ont_adaptors, it.meta.ont_fastplong_args] } + .groupTuple(by: 1) + .map { + it -> + [ + meta: [ + id: it[1], + metas: it[0], + trim: it[2][0], + ont_fastplong_args: it[5][0] + ], + ontreads: it[3][0], + ont_adaptors: it[4][0] + ] + } + .mix( + ch_collected + .filter { it -> !it.meta.group } + .map { + it -> + [ + meta: it.meta, + ontreads: it.meta.ontreads, + ont_adaptors: it.meta.ont_adaptors, + ] + } + ) + .multiMap { + it -> + reads: [it.meta, it.ontreads] + adapters: it.ont_adapters ?: [] + } + .set { ch_fastplong_in } + + FASTPLONG_ONT(ch_fastplong_in.reads, ch_fastplong_in.adapters, false, false) + + FASTPLONG_ONT + .out + .reads + .filter { it -> it[0].metas } + .flatMap { it -> // it looks like [meta, output_path] + it[0].metas + .collect { metas -> [ meta: metas - metas.subMap("ontreads") + [ ontreads: it[1] ] ] } + } + .mix(FASTPLONG_ONT.out.reads + .filter { it -> !it[0].metas } + .map { + it -> [ meta: it[0] - it[0].subMap("ontreads") + [ ontreads: it[1] ] ] + } + ) + .set { fastplong_reads_out } + + FASTPLONG_ONT + .out + .json + .filter { it -> it[0].metas } + .flatMap { it -> // it looks like [meta, output_path] + it[0].metas + .collect { metas -> [metas, it[1] ] } + } + .mix( + FASTPLONG_ONT.out.json + .filter { it -> !it[0].metas } + ) + .set { fastplong_json_out } + + fastplong_reads_out.dump(tag: "Prepare-ONT output") + + emit: + main_out = fastplong_reads_out + fastplong_ont_reports = fastplong_json_out +} diff --git a/subworkflows/local/prepare/prepare_shortreads/main.nf b/subworkflows/local/prepare/prepare_shortreads/main.nf new file mode 100644 index 00000000..87301de2 --- /dev/null +++ b/subworkflows/local/prepare/prepare_shortreads/main.nf @@ -0,0 +1,249 @@ +include { FASTP } from '../../../../modules/nf-core/fastp/main' +include { FASTP as FASTP_HIC } from '../../../../modules/nf-core/fastp/main' +include { MERYL_COUNT } from '../../../../modules/nf-core/meryl/count/main' +include { MERYL_UNIONSUM } from '../../../../modules/nf-core/meryl/unionsum/main' + +workflow PREPARE_SHORTREADS { + take: + shortreads_in + + main: + channel.empty().set { ch_versions } + + shortreads_in + .map { row -> row.meta.shortread_F ? create_shortread_channel(row.meta) : row } // function below + .branch { + it -> + trim: it.meta.shortread_trim + no_trim: !it.meta.shortread_trim + } + .set { shortreads } + + shortreads_in + .map { row -> row.meta.hic_F ? create_hic_shortread_channel(row.meta) : row } + .branch { + row -> + trim: row.meta.hic_trim + no_trim: !row.meta.hic_trim + } + .set { hic_trim } + + shortreads.trim.dump(tag: "shortread trim channel") + hic_trim.trim.dump(tag: "hic trim channel") + + shortreads + .trim + .filter { it -> it.meta.group } + .map { it -> [it.meta, it.meta.group] } + .groupTuple(by: 1) + .map { + it -> + [ + [ + id: it[1], // the group + metas: it[0] + ], + it[0].shortreads[0], // Pull path from meta + [] + ] + } + .mix( + shortreads + .trim + .filter { it -> !it.meta.group } + .map { + it -> [ it.meta, it.meta.shortreads, [] ] + } + ) + .set { trim_in } + + hic_trim + .trim + .filter { it -> it.meta.group } + .map {it -> [it.meta, it.meta.group]} + .groupTuple(by: 1) + .map { + it -> + [ + [ + id: it[1], // the group + metas: it[0] + ], + it[0].hic_reads[0], // Pull path from meta + [] + ] + } + .mix( + hic_trim + .trim + .filter { it -> !it.meta.group } + .map { + it -> [ it.meta, it.meta.hic_reads, [] ] + } + ) + .set { hic_trim_in } + + trim_in.dump(tag: "Trim in") + + FASTP(trim_in, false, false, false) + + FASTP.out.reads + .filter { it -> it[0].metas } + .flatMap { it -> // looks like [meta <[id, metas]>, output_path] + it[0].metas + .collect { meta -> [ meta: meta - meta.subMap("shortreads") + [ shortreads: it[1] ] ] } + } + .mix( + FASTP.out.reads + .filter { it -> !it[0].metas } + .map { it -> [ meta: it[0] - it[0].subMap("shortreads") + [ shortreads: it[1] ] ] } + ) + .set { trimmed_reads } + + trimmed_reads.dump(tag: "Trim out") + // unite branched: + // add trimmed reads to trim channel, then mix with shortreads.no_trim + + FASTP_HIC(hic_trim_in, false, false, false) + + FASTP_HIC.out.reads + .filter { it -> it[0].metas } + .flatMap { it -> // looks like [meta <[id, metas]>, output_path] + it[0].metas + .collect { meta -> [ meta: meta - meta.subMap("hic_reads") + [ hic_reads: it[1] ] ] } + } + .mix( + FASTP_HIC.out.reads + .filter { it -> !it[0].metas } + .map { it -> [ meta: it[0] - it[0].subMap("hic_reads") + [ hic_reads: it[1] ] ] } + ) + .set { hic_trimmed_reads } + + trimmed_reads + .mix( shortreads.no_trim ) + .set { shortreads } + + // add HiC trimmed to those that need it + + shortreads + .filter { row -> row.meta.hic_trim } + .map { row -> [ row.meta.id, row.meta ] } + .combine( + hic_trimmed_reads + .map { it -> + [ + it.meta.id, + it.meta.hic_reads + ] + }, + by: 0 + ) + .map { + _id, meta, trimmed_hic_reads -> + [ + meta: meta - meta.subMap("hic_reads") + [ hic_reads: trimmed_hic_reads ] + ] + } + .mix( + trimmed_reads + .filter { row -> !row.meta.hic_trim } + .map { it-> [meta: it.meta - it.meta.subMap("hic_reads") + [hic_reads: null]]} + ) + .set { shortreads } + + shortreads + .filter { it -> it.meta.merqury } + .filter { it -> it.meta.group } + .map { it -> [ it.meta, it.meta.group, it.meta.shortreads, it.meta.meryl_k ] } + // Create a group + .groupTuple(by: 1) + .map { + it -> [ + meta: [ id: it[1], metas: it[0] ], + shortreads: it[2][0], + meryl_k: it[3][0] + ] + } + .mix(shortreads + .filter { it -> it.meta.merqury } + .filter { it -> !it.meta.group } + .map { it -> [meta: it.meta, shortreads: it.meta.shortreads, meryl_k: it.meta.meryl_k]} + ) + .multiMap { it -> + reads: [ it.meta, it.shortreads ] + kmer_size: it.meryl_k + } + .set { meryl_in } + + MERYL_COUNT(meryl_in.reads, meryl_in.kmer_size) + + MERYL_UNIONSUM(MERYL_COUNT.out.meryl_db, params.meryl_k) + + MERYL_UNIONSUM.out.meryl_db + .filter { it -> it[0].metas } + .flatMap { it -> // looks like [meta <[id, metas]>, output_path] + it[0].metas + .collect { meta -> [ meta, it[1] ] } + } + .mix(MERYL_UNIONSUM.out.meryl_db + .filter { it -> !it[0].metas } + .map { + it -> [ it[0], it[1] ] + } + ) + .map {meta , kmers -> [meta.id, kmers]} + .set { meryl_kmers } + + + + emit: + main_out = shortreads + fastp_json = FASTP.out.json + meryl_kmers +} + +def create_shortread_channel(row) { // This function expects a meta map as input + // create meta map + def meta = row + meta.paired = row.paired.toBoolean() + meta.single_end = !meta.paired + + // add path(s) of the fastq file(s) to the meta map + def shortreads = [] + if (!file(row.shortread_F).exists()) { + exit(1, "ERROR: shortread_F fastq file does not exist!\n${row.shortread_F}") + } + if (!meta.paired) { + shortreads = [meta: meta + [shortreads: [file(row.shortread_F)]]] + } + else { + if (!file(row.shortread_R).exists()) { + exit(1, "ERROR: shortread_R fastq file does not exist!\n${row.shortread_R}") + } + shortreads = [ meta: meta + [shortreads: [file(row.shortread_F), file(row.shortread_R)]] ] + } + return shortreads +} + +def create_hic_shortread_channel(row) { // This function expects a meta map as input + // create meta map + def meta = row + meta.paired = true + meta.single_end = !meta.paired + + // add path(s) of the fastq file(s) to the meta map + def hic_reads = [] + if (!file(row.hic_F).exists()) { + exit(1, "ERROR: hic_F fastq file does not exist!\n${row.hic_F}") + } + if (!meta.paired) { + hic_reads = [meta: meta + [hic_reads: [file(row.hic_F)]]] + } + else { + if (!file(row.hic_R).exists()) { + exit(1, "ERROR: shortread_R fastq file does not exist!\n${row.hic_R}") + } + hic_reads = [ meta: meta + [hic_reads: [file(row.hic_F), file(row.hic_R)]] ] + } + return hic_reads +} diff --git a/subworkflows/local/prepare_hifi/main.nf b/subworkflows/local/prepare_hifi/main.nf deleted file mode 100644 index 3eda0a32..00000000 --- a/subworkflows/local/prepare_hifi/main.nf +++ /dev/null @@ -1,27 +0,0 @@ -include { LIMA } from '../../../modules/nf-core/lima/main' -include { SAMTOOLS_FASTQ as TO_FASTQ } from '../../../modules/nf-core/samtools/fastq/main' - -workflow PREPARE_HIFI { - take: - inputs - - main: - Channel.empty().set { ch_versions } - inputs - .map { it -> [it.meta, it.hifireads] } - .set { hifireads } - if (params.lima) { - if (!params.pacbio_primers) { - error('Trimming with lima requires a file containing primers (--pacbio_primers)') - } - LIMA(hifireads, params.pacbio_primers) - TO_FASTQ(LIMA.out.bam, false) - TO_FASTQ.out.set { hifireads } - ch_versions.mix(LIMA.out.versions).mix(TO_FASTQ.out.versions) - } - versions = ch_versions - - emit: - hifireads - versions -} diff --git a/subworkflows/local/prepare_ont/chop/main.nf b/subworkflows/local/prepare_ont/chop/main.nf deleted file mode 100644 index 0fa31419..00000000 --- a/subworkflows/local/prepare_ont/chop/main.nf +++ /dev/null @@ -1,24 +0,0 @@ -include { PORECHOP_PORECHOP as PORECHOP } from '../../../../modules/nf-core/porechop/porechop/main' - -workflow CHOP { - take: - in_reads - - main: - Channel.empty().set { chopped_reads } - Channel.empty().set { ch_versions } - - if (params.porechop) { - PORECHOP(in_reads) - PORECHOP.out.reads.set { chopped_reads } - ch_versions.mix(PORECHOP.out.versions) - } - else { - in_reads.set { chopped_reads } - } - versions = ch_versions - - emit: - chopped_reads - versions -} diff --git a/subworkflows/local/prepare_ont/collect/main.nf b/subworkflows/local/prepare_ont/collect/main.nf deleted file mode 100644 index aea3fdec..00000000 --- a/subworkflows/local/prepare_ont/collect/main.nf +++ /dev/null @@ -1,24 +0,0 @@ -include { COLLECT_READS } from '../../../../modules/local/collect_reads/main' - -workflow COLLECT { - take: - ch_input - - main: - Channel.empty().set { ch_versions } - - ch_input - .map { row -> [row.meta, row.ontreads] } - .set { reads } - - if (params.collect) { - COLLECT_READS(reads) - COLLECT_READS.out.combined_reads.set { reads } - ch_versions.mix(COLLECT_READS.out.versions) - } - versions = ch_versions - - emit: - reads - versions -} diff --git a/subworkflows/local/prepare_ont/main.nf b/subworkflows/local/prepare_ont/main.nf deleted file mode 100644 index 52c8522e..00000000 --- a/subworkflows/local/prepare_ont/main.nf +++ /dev/null @@ -1,34 +0,0 @@ -include { CHOP } from './chop/main' -include { COLLECT } from './collect/main' -include { RUN_NANOQ } from './run_nanoq/main' - -workflow PREPARE_ONT { - take: - inputs - - main: - Channel.empty().set { ch_versions } - - COLLECT(inputs) - - CHOP(COLLECT.out.reads) - - CHOP.out.chopped_reads.set { trimmed } - - RUN_NANOQ(trimmed) - - RUN_NANOQ.out.median_length.set { med_len } - - RUN_NANOQ.out.report.set { nanoq_report } - - RUN_NANOQ.out.stats.set { nanoq_stats } - - versions = ch_versions.mix(COLLECT.out.versions).mix(CHOP.out.versions).mix(RUN_NANOQ.out.versions) - - emit: - trimmed - med_len - nanoq_report - nanoq_stats - versions -} diff --git a/subworkflows/local/prepare_ont/run_nanoq/main.nf b/subworkflows/local/prepare_ont/run_nanoq/main.nf deleted file mode 100644 index 4f78c859..00000000 --- a/subworkflows/local/prepare_ont/run_nanoq/main.nf +++ /dev/null @@ -1,25 +0,0 @@ -include { NANOQ } from '../../../../modules/local/nanoq/main' - -workflow RUN_NANOQ { - take: - in_reads - - main: - Channel.empty().set { versions } - - NANOQ(in_reads) - - NANOQ.out.report.set { report } - - NANOQ.out.stats.set { stats } - - NANOQ.out.median_length.set { median_length } - - NANOQ.out.versions.set { versions } - - emit: - report - stats - median_length - versions -} diff --git a/subworkflows/local/prepare_shortreads/main.nf b/subworkflows/local/prepare_shortreads/main.nf deleted file mode 100644 index 78641add..00000000 --- a/subworkflows/local/prepare_shortreads/main.nf +++ /dev/null @@ -1,55 +0,0 @@ -include { TRIMGALORE } from '../../../modules/nf-core/trimgalore/main' -include { MERYL_COUNT } from '../../../modules/nf-core/meryl/count/main' -include { MERYL_UNIONSUM } from '../../../modules/nf-core/meryl/unionsum/main' - -workflow PREPARE_SHORTREADS { - take: - input_channel - - main: - Channel.empty().set { ch_versions } - - input_channel - .map { create_shortread_channel(it) } - .set { shortreads } - - if (params.trim_short_reads) { - TRIMGALORE(shortreads) - TRIMGALORE.out.reads.set { shortreads } - ch_versions = ch_versions.mix(TRIMGALORE.out.versions) - } - MERYL_COUNT(shortreads.map { it -> [it[0], it[1]] }, params.meryl_k) - MERYL_UNIONSUM(MERYL_COUNT.out.meryl_db, params.meryl_k) - MERYL_UNIONSUM.out.meryl_db.set { meryl_kmers } - - versions = ch_versions.mix(MERYL_COUNT.out.versions).mix(MERYL_UNIONSUM.out.versions) - - emit: - shortreads - meryl_kmers - versions -} - -def create_shortread_channel(row) { - // create meta map - def meta = [:] - meta.id = row.meta.id - meta.paired = row.paired.toBoolean() - meta.single_end = !meta.paired - - // add path(s) of the fastq file(s) to the meta map - def shortreads = [] - if (!file(row.shortread_F).exists()) { - exit(1, "ERROR: shortread_F fastq file does not exist!\n${row.shortread_F}") - } - if (!meta.paired) { - shortreads = [meta, [file(row.shortread_F)]] - } - else { - if (!file(row.shortread_R).exists()) { - exit(1, "ERROR: shortread_R fastq file does not exist!\n${row.shortread_R}") - } - shortreads = [meta, [file(row.shortread_F), file(row.shortread_R)]] - } - return shortreads -} diff --git a/subworkflows/local/qc/busco/main.nf b/subworkflows/local/qc/busco/main.nf deleted file mode 100644 index 93d93c5e..00000000 --- a/subworkflows/local/qc/busco/main.nf +++ /dev/null @@ -1,26 +0,0 @@ -include { BUSCO_BUSCO as BUSCO } from '../../../../modules/nf-core/busco/busco/main' - -workflow RUN_BUSCO { - take: - assembly - - main: - Channel.empty().set { versions } - Channel.empty().set { batch_summary } - Channel.empty().set { short_summary_txt } - Channel.empty().set { short_summary_json } - - if (params.busco) { - BUSCO(assembly, 'genome', params.busco_lineage, params.busco_db ? file(params.busco_db, checkIfExists: true) : [], [], true) - BUSCO.out.batch_summary.set { batch_summary } - BUSCO.out.short_summaries_txt.set { short_summary_txt } - BUSCO.out.short_summaries_json.set { short_summary_json } - BUSCO.out.versions.set { versions } - } - - emit: - batch_summary - short_summary_json - short_summary_txt - versions -} diff --git a/subworkflows/local/qc/main.nf b/subworkflows/local/qc/main.nf index bba6b31d..6f4cdf57 100644 --- a/subworkflows/local/qc/main.nf +++ b/subworkflows/local/qc/main.nf @@ -1,61 +1,132 @@ include { MAP_TO_ASSEMBLY } from '../mapping/map_to_assembly/main' -include { RUN_BUSCO } from './busco/main.nf' -include { RUN_QUAST } from './quast/main.nf' -include { MERQURY_QC } from './merqury/main.nf' +include { QUAST } from '../../../modules/local/quast/main' +include { BUSCO_BUSCO as BUSCO } from '../../../modules/nf-core/busco/busco/main' +include { MERQURY_MERQURY as MERQURY } from '../../../modules/nf-core/merqury/merqury/main' workflow QC { take: - inputs - in_reads + ch_main scaffolds - aln_to_ref meryl_kmers main: - Channel.empty().set { ch_versions } - Channel.empty().set { quast_out } - Channel.empty().set { busco_out } - Channel.empty().set { merqury_report_files } - Channel.empty().set { map_to_assembly } - - if (params.quast) { - MAP_TO_ASSEMBLY(in_reads, scaffolds) - MAP_TO_ASSEMBLY.out.aln_to_assembly_bam.set { map_to_assembly } - ch_versions = ch_versions.mix(MAP_TO_ASSEMBLY.out.versions) - } - - RUN_QUAST(scaffolds, inputs, aln_to_ref, map_to_assembly) - RUN_QUAST.out.quast_tsv.set { quast_out } - - ch_versions = ch_versions.mix(RUN_QUAST.out.versions) - - RUN_BUSCO(scaffolds) - RUN_BUSCO.out.batch_summary.set { busco_out } - - ch_versions = ch_versions.mix(RUN_BUSCO.out.versions) - - if (params.short_reads) { - MERQURY_QC(scaffolds, meryl_kmers) - MERQURY_QC.out.stats - .join( - MERQURY_QC.out.spectra_asm_hist - ) - .join( - MERQURY_QC.out.spectra_cn_hist - ) - .join( - MERQURY_QC.out.assembly_qv - ) - .set { merqury_report_files } - - ch_versions = ch_versions.mix(MERQURY_QC.out.versions) - } - - versions = ch_versions + channel.empty().set { quast_out } + channel.empty().set { busco_out } + channel.empty().set { merqury_report_files } + + ch_main + .branch { + it -> + shortread: it.meta.use_short_reads + no_shortread: !it.meta.use_short_reads + } + .set { ch_shortread_branched } + + ch_shortread_branched + .shortread + .filter { it -> it.meta.merqury } + .map { it -> [it.meta.id, it.meta] } + .join(scaffolds) + .join(meryl_kmers) + .map { _id, meta, scaffs, kmers -> + [ meta, kmers, scaffs ] + } + .set { merqury_in } + + MERQURY(merqury_in) + + // Make sure that Polish and Scaffold main channels do not contain assembly_map_bam + + ch_main + .branch { + it -> + map_to_assembly: it.meta.quast && !it.meta.assembly_map_bam + no_map_to_assembly: !it.meta.quast || (it.meta.quast && it.meta.assembly_map_bam) + } + .set { ch_map_branched } + + ch_map_branched + .map_to_assembly + .map { + it -> [ it.meta.id, it.meta ] + } + .join(scaffolds) + .map { + _id, meta, target_scaffolds -> + [ + meta + [qc_target: target_scaffolds], // QC Target only exists in QC channel, and takes the scaffold that should be qc'ed + meta.qc_reads_path, + target_scaffolds + ] + } + .set { map_assembly_in } + + MAP_TO_ASSEMBLY(map_assembly_in) + + // create main channel with mappings + MAP_TO_ASSEMBLY.out.aln_to_assembly_bam + .map { meta, assembly_map_bam -> + [ + meta: meta + [ assembly_map_bam: assembly_map_bam ] + ] + } + .mix(ch_map_branched.no_map_to_assembly) + .set { ch_qc } + + ch_qc + .filter { + it -> it.meta.quast + } + .multiMap { it -> + quast_in: [ + it.meta, + it.meta.qc_target, + it.meta.ref_fasta ?: [], + it.meta.ref_gff ?: [], + it.meta.ref_map_bam ?: [], + it.meta.assembly_map_bam + ] + use_ref: it.meta.use_ref + use_gff: it.meta.use_ref && it.meta.ref_gff ? true : false + } + .set { quast_in } + + QUAST(quast_in) // works magically + QUAST.out.tsv.set { quast_out } + + ch_qc + .filter { + it -> it.meta.busco + } + .multiMap { it -> + fasta: [ + it.meta, + it.meta.qc_target + ] + busco_lineage: it.meta.busco_lineage + busco_db: it.meta.busco_db ? file(it.meta.busco_db, checkIfExists: true) : [] + } + .set { busco_in } + + BUSCO(busco_in.fasta, 'genome', busco_in.busco_lineage, busco_in.busco_db , [], true) + BUSCO.out.batch_summary.set { busco_out } + + + MERQURY.out.stats + .join( + MERQURY.out.spectra_asm_hist + ) + .join( + MERQURY.out.spectra_cn_hist + ) + .join( + MERQURY.out.assembly_qv + ) + .set { merqury_report_files } emit: + ch_main // QC does not (and should not) modify ch_main but returns the input. quast_out busco_out merqury_report_files - versions } diff --git a/subworkflows/local/qc/merqury/main.nf b/subworkflows/local/qc/merqury/main.nf deleted file mode 100644 index 8ee300d3..00000000 --- a/subworkflows/local/qc/merqury/main.nf +++ /dev/null @@ -1,33 +0,0 @@ -include { MERQURY_MERQURY as MERQURY } from '../../../../modules/nf-core/merqury/merqury/main' - -workflow MERQURY_QC { - take: - assembly - meryl_out - - main: - Channel.empty().set { versions } - assembly.map { meta, _assembly -> [meta.id, []] }.set { stats } - assembly.map { meta, _assembly -> [meta.id, []] }.set { spectra_asm_hist } - assembly.map { meta, _assembly -> [meta.id, []] }.set { spectra_cn_hist } - assembly.map { meta, _assembly -> [meta.id, []] }.set { assembly_qv } - if (params.merqury) { - meryl_out - .map { it -> [[id: it[0].id], it[1]] } - .join(assembly) - .set { merqury_in } - MERQURY(merqury_in) - MERQURY.out.stats.set { stats } - MERQURY.out.spectra_asm_hist.set { spectra_asm_hist } - MERQURY.out.spectra_cn_hist.set { spectra_cn_hist } - MERQURY.out.assembly_qv.set { assembly_qv } - MERQURY.out.versions.set { versions } - } - - emit: - stats - spectra_asm_hist - spectra_cn_hist - assembly_qv - versions -} diff --git a/subworkflows/local/qc/quast/main.nf b/subworkflows/local/qc/quast/main.nf deleted file mode 100644 index bf7a4566..00000000 --- a/subworkflows/local/qc/quast/main.nf +++ /dev/null @@ -1,42 +0,0 @@ -include { QUAST } from '../../../../modules/local/quast/main' - -workflow RUN_QUAST { - take: - assembly - inputs - aln_to_ref - aln_to_assembly - - main: - Channel.empty().set { versions } - /* prepare for quast: - * This makes use of the input channel to obtain the reference and reference annotations - * See quast module for details - */ - Channel.empty().set { quast_results } - Channel.empty().set { quast_tsv } - - if (params.quast) { - inputs - .map { row -> [row.meta, row.ref_fasta, row.ref_gff] } - .set { inputs_references } - - assembly - .join(inputs_references) - .join(aln_to_ref) - .join(aln_to_assembly) - .set { quast_in } - /* - * Run QUAST - */ - QUAST(quast_in, params.use_ref, false) - QUAST.out.results.set { quast_results } - QUAST.out.tsv.set { quast_tsv } - QUAST.out.versions.set { versions } - } - - emit: - quast_results - quast_tsv - versions -} diff --git a/subworkflows/local/scaffold/hic/main.nf b/subworkflows/local/scaffold/hic/main.nf new file mode 100644 index 00000000..1d3da509 --- /dev/null +++ b/subworkflows/local/scaffold/hic/main.nf @@ -0,0 +1,142 @@ +include { QC } from '../../qc/main' +include { YAHS } from '../../../../modules/nf-core/yahs/main' +include { RUN_LIFTOFF } from '../../liftoff/main' +include { BWAMEM2_MEM } from '../../../../modules/nf-core/bwamem2/mem/main' +include { BWAMEM2_INDEX } from '../../../../modules/nf-core/bwamem2/index/main' +include { SAMTOOLS_FAIDX } from '../../../../modules/nf-core/samtools/faidx/main' +include { MINIMAP2_ALIGN as MINIMAP2_HIC } from '../../../../modules/nf-core/minimap2/align/main' +include { PICARD_MARKDUPLICATES as MARKDUP } from '../../../../modules/nf-core/picard/markduplicates/main' +include { PICARD_ADDORREPLACEREADGROUPS as ADD_RG } from '../../../../modules/nf-core/picard/addorreplacereadgroups/main' +workflow HIC { + take: + ch_main + meryl_kmers + + main: + channel.empty().set { ch_versions } + + ch_main + .branch { it -> + bwamem: it.meta.hic_aligner == "bwa-mem2" + minimap: it.meta.hic_aligner == "minimap2" + } + .set { + hic_align_branched + } + + hic_align_branched + .bwamem + .map { + it -> + [ + it.meta, + it.meta.polished ? (it.meta.polished.pilon ?: it.meta.polished.medaka ?: it.meta.polished.dorado) : it.meta.assembly + ] + } + .set { bwamem_index_in } + + BWAMEM2_INDEX(bwamem_index_in) + BWAMEM2_INDEX.out.index + .map {meta, idx -> + [meta: meta + [bwamem_idx: idx]] + } + .multiMap { + it -> + reads: [it.meta, it.meta.hic_reads] + assembly: [it.meta, it.meta.polished ? (it.meta.polished.pilon ?: it.meta.polished.medaka ?: it.meta.polished.dorado) : it.meta.assembly] + index: [it.meta, it.meta.bwamem_idx] + } + .set{bwamem_mem_in} + + BWAMEM2_MEM(bwamem_mem_in.reads, bwamem_mem_in.index, bwamem_mem_in.assembly, true) + + hic_align_branched + .minimap + .map { it -> + [ + it.meta, + it.meta.hic_reads, + it.meta.polished ? (it.meta.polished.pilon ?: it.meta.polished.medaka ?: it.meta.polished.dorado) : it.meta.assembly + ] + } + .set {minimap2_in} + + MINIMAP2_HIC(minimap2_in, true, "csi", [], []) + + BWAMEM2_MEM.out.bam.mix(MINIMAP2_HIC.out.bam).set{ add_rg_in } + + ADD_RG(add_rg_in, [[],[]], [[],[]]) + + MARKDUP(ADD_RG.out.bam, [[],[]], [[],[]]) + + MARKDUP.out.bam + .map { meta, bam -> [meta.id, meta, bam] } + .join( + MARKDUP.out.bai + .map { meta, bai -> [meta.id, bai] } + ) + .map {_id, meta, bam, bai -> [meta:meta + [hic_dedup_bam: bam, hic_dedup_bai: bai] ]} + .map { + it -> [ + it.meta, + it.meta.polished ? (it.meta.polished.pilon ?: it.meta.polished.medaka ?: it.meta.polished.dorado) : it.meta.assembly, + [] + ] + } + .set { faidx_in } + + SAMTOOLS_FAIDX(faidx_in, false) + + SAMTOOLS_FAIDX.out.fai + .map { + meta, index -> + [ + meta: meta + [hic_genome_idx: index] + ] + } + .set { indexed } + + indexed + .map { it -> + [ + it.meta, + it.meta.polished ? (it.meta.polished.pilon ?: it.meta.polished.medaka ?: it.meta.polished.dorado) : it.meta.assembly, + it.meta.hic_genome_idx, + it.meta.hic_dedup_bam, + [] + ] + } + .set { yahs_in } + + YAHS(yahs_in) + + YAHS.out.scaffolds_fasta + .map { meta, corrected -> [meta: meta + [ scaffolds_hic: corrected] ] } + .set { ch_main_scaffolded } + + QC(ch_main_scaffolded.map { it -> [meta: it.meta - it.meta.subMap("assembly_map_bam") + [assembly_map_bam: null] ] }, + YAHS.out.scaffolds_fasta.map { meta, corrected -> [ meta.id, corrected ] }, + meryl_kmers) + + ch_main_scaffolded + .filter { + it -> it.lift_annotations + } + .map { it -> + [ + it.meta, + it.meta.scaffolds_hic, + it.meta.ref_fasta, + it.meta.ref_gff + ] + } + .set { liftoff_in } + + RUN_LIFTOFF(liftoff_in) + + emit: + ch_main = ch_main_scaffolded + quast_out = QC.out.quast_out + busco_out = QC.out.busco_out + merqury_report_files = QC.out.merqury_report_files +} diff --git a/subworkflows/local/scaffold/links/main.nf b/subworkflows/local/scaffold/links/main.nf new file mode 100644 index 00000000..4e1ba95e --- /dev/null +++ b/subworkflows/local/scaffold/links/main.nf @@ -0,0 +1,52 @@ +include { LINKS } from '../../../../modules/nf-core/links/main' +include { QC } from '../../qc/main' +include { LIFTOFF } from '../../../../modules/nf-core/liftoff/main' + +workflow RUN_LINKS { + take: + ch_main + meryl_kmers + + main: + ch_main.dump(tag: "SCAFFOLD: LINKS: WORKFLOW inputs") + ch_main + .multiMap { it -> + assembly: [it.meta, it.meta.polished ? (it.meta.polished.pilon ?: it.meta.polished.medaka ?: it.meta.polished.dorado) : it.meta.assembly] + reads: [it.meta, it.meta.qc_reads_path] + } + .set { links_in } + + links_in.assembly.dump(tag: "SCAFFOLD: LINKS: Assembly inputs") + links_in.reads.dump(tag: "SCAFFOLD: LINKS: Read inputs") + + LINKS(links_in.assembly, links_in.reads) + LINKS.out.scaffolds_fasta + .map { meta, scaff_links -> [meta: meta + [scaffolds_links: scaff_links] ] } + .set { ch_main_scaffolded } + + QC(ch_main_scaffolded.map { it -> [meta: it.meta - it.meta.subMap("assembly_map_bam") + [assembly_map_bam: null] ]}, + LINKS.out.scaffolds_fasta.map { meta, scaffold -> [meta.id, scaffold]}, + meryl_kmers) + + ch_main_scaffolded + .filter { + it -> it.lift_annotations + } + .map { it -> + [ + it.meta, + it.scaffolds_links, + it.ref_fasta, + it.ref_gff + ] + } + .set { liftoff_in } + + LIFTOFF(liftoff_in, []) + + emit: + ch_main = ch_main_scaffolded + quast_out = QC.out.quast_out + busco_out = QC.out.busco_out + merqury_report_files = QC.out.merqury_report_files +} diff --git a/subworkflows/local/scaffold/longstitch/main.nf b/subworkflows/local/scaffold/longstitch/main.nf new file mode 100644 index 00000000..76abf8d5 --- /dev/null +++ b/subworkflows/local/scaffold/longstitch/main.nf @@ -0,0 +1,59 @@ +include { LONGSTITCH } from '../../../../modules/local/longstitch/main' +include { QC } from '../../qc/main' +include { LIFTOFF } from '../../../../modules/nf-core/liftoff/main' + +workflow RUN_LONGSTITCH { + take: + ch_main + meryl_kmers + + main: + channel.empty().set { ch_versions } + + ch_main + .map { + it -> + [ + it.meta, + it.meta.polished ? (it.meta.polished.pilon ?: it.meta.polished.medaka ?: it.meta.polished.dorado) : it.meta.assembly, + it.meta.qc_reads_path, + it.meta.genome_size + ] + } + .set { longstitch_in } + + longstitch_in.dump(tag: "SCAFFOLD: LONGSTITCH: inputs") + + LONGSTITCH(longstitch_in) + + LONGSTITCH.out.ntlLinks_arks_scaffolds + .map { meta, scaff_longst -> [meta: meta + [scaffolds_longstitch: scaff_longst] ] } + .set { ch_main_scaffolded } + + QC(ch_main_scaffolded.map { it -> [ meta: it.meta - it.meta.subMap("assembly_map_bam") + [assembly_map_bam: null] ] }, + LONGSTITCH.out.ntlLinks_arks_scaffolds.map { meta, scaffold -> [meta.id, scaffold]}, + meryl_kmers) + + ch_main_scaffolded + .filter { + it -> it.meta.lift_annotations + } + .map { it -> + [ + it.meta, + it.meta.scaffolds_longstitch, + it.meta.ref_fasta, + it.meta.ref_gff + ] + } + .set { liftoff_in } + + LIFTOFF(liftoff_in,[]) + + emit: + ch_main = ch_main_scaffolded + quast_out = QC.out.quast_out + busco_out = QC.out.busco_out + merqury_report_files = QC.out.merqury_report_files + versions = ch_versions +} diff --git a/subworkflows/local/scaffold/main.nf b/subworkflows/local/scaffold/main.nf new file mode 100644 index 00000000..11647143 --- /dev/null +++ b/subworkflows/local/scaffold/main.nf @@ -0,0 +1,210 @@ +include { RUN_LINKS } from './links/main' +include { RUN_LONGSTITCH } from './longstitch/main' +include { RUN_RAGTAG } from './ragtag/main' +include { HIC } from './hic/main' + +workflow SCAFFOLD { + take: + ch_main + meryl_kmers + + main: + channel.empty().set { links_busco } + channel.empty().set { links_quast } + channel.empty().set { links_merqury } + channel.empty().set { longstitch_busco } + channel.empty().set { longstitch_quast } + channel.empty().set { longstitch_merqury } + channel.empty().set { ragtag_busco } + channel.empty().set { ragtag_quast } + channel.empty().set { ragtag_merqury } + + // There is no support for scaffolding of scaffolded scaffolds. + // But it is possible that one sample is scaffolded with different tools. + // Therefore main is filtered, instead of branched. + + ch_main + .filter { + it -> it.meta.scaffold_links + } + .set { links_in } + + RUN_LINKS(links_in, meryl_kmers) + RUN_LINKS.out.ch_main + .set { links_out } + + ch_main + .filter { + it -> it.meta.scaffold_longstitch + } + .set { longstitch_in } + + RUN_LONGSTITCH(longstitch_in, meryl_kmers) + RUN_LONGSTITCH.out.ch_main + .set { longstitch_out } + + ch_main + .filter { + it -> it.meta.scaffold_hic + } + .set { hic_in } + + HIC(hic_in, meryl_kmers) + HIC.out.ch_main + .set { hic_out } + + ch_main + .filter { + it -> it.meta.scaffold_ragtag && !it.meta.hic_reads && !it.meta.scaffold_longstitch && !it.meta.scaffold_links + } + .mix(hic_out.filter { it -> it.meta.scaffold_ragtag } ) + .mix(longstitch_out.filter { it -> it.meta.scaffold_ragtag } ) + .mix(links_out.filter { it -> it.meta.scaffold_ragtag } ) + .set { ragtag_in } + + RUN_RAGTAG(ragtag_in, meryl_kmers) + RUN_RAGTAG.out.ch_main + .set { ragtag_out } + + // Deal with cases that are single scaffold + links_out + .filter {it -> !it.meta.scaffold_longstitch && !it.meta.scaffold_ragtag } + .map { meta -> [ meta: meta - meta.subMap("links_scaffold") + [ scaffolds: [ links: meta.scaffolds_links ] ] ]} + .mix( + longstitch_out + .filter {it -> !it.meta.scaffold_links && !it.meta.scaffold_ragtag } + .map { meta -> [ meta: meta - meta.subMap("scaffolds_longstitch") + [ scaffolds: [ longstitch: meta.scaffolds_longstitch ] ] ]} + ) + .mix( + ragtag_out + .filter {it -> !it.meta.scaffold_links && !it.meta.scaffold_longstitch } + .map { meta -> [ meta: meta - meta.subMap("scaffolds_ragtag") + [ scaffolds: [ ragtag: meta.scaffolds_ragtag ] ] ]} + ) + .mix( + hic_out + .map { meta -> [ meta: meta - meta.subMap("scaffolds_hic") + [ scaffolds: [ hic: meta.scaffolds_hic ] ] ]} + + ) + // mix in those that are double scaffolded: , links-ragtag, longstitch-ragtag + // links-longstitch + .mix( + links_out + .filter {it -> it.meta.scaffold_longstitch && !it.meta.scaffold_ragtag } + .map {meta -> [meta.id, meta]} + // Join without filtering, inner-join + .join( + longstitch_out + .map {meta -> [meta.id, meta]} + ) + .map { + _id, meta_links, meta_longstitch -> [ + meta: meta_links - + meta_links.subMap("scaffolds_links") + + [scaffolds: [links: meta_links.scaffolds_links, longstitch: meta_longstitch.scaffolds_longstitch]] ] + } + ) + //links-ragtag + .mix( + links_out + .filter {it -> !it.meta.scaffold_longstitch && it.meta.scaffold_ragtag } + .map {meta -> [meta.id, meta]} + // Join without filtering, inner-join + .join( + ragtag_out + .map {meta -> [meta.id, meta]} + ) + .map { + _id, meta_links, meta_ragtag -> [ + meta: meta_links - + meta_links.subMap("scaffolds_links") + + [scaffolds: [links: meta_links.scaffolds_links, ragtag: meta_ragtag.scaffolds_ragtag]] ] + } + ) + //longstitch-ragtag + .mix( + longstitch_out + .filter {it -> !it.meta.scaffold_links && it.meta.scaffold_ragtag } + .map {meta -> [meta.id, meta]} + // Join without filtering, inner-join + .join( + ragtag_out + .map {meta -> [meta.id, meta]} + ) + .map { + _id, meta_longstitch, meta_ragtag -> [ + meta: meta_longstitch - + meta_longstitch.subMap("scaffolds_longstitch") + + [scaffolds: [longstitch: meta_longstitch.scaffolds_longstitch, ragtag: meta_ragtag.scaffolds_ragtag]] ] + } + ) + // mix in triple-scaffolded + .mix( + links_out + .filter {it -> it.meta.scaffold_longstitch && it.meta.scaffold_ragtag } + .map {meta -> [meta.id, meta]} + // Join without filtering, inner-join + .join( + longstitch_out + .map {meta -> [meta.id, meta]} + ) + .join( + ragtag_out + .map {meta -> [meta.id, meta]} + ) + .map { + _id, meta_links, meta_longstitch, meta_ragtag -> [ + meta: meta_links - + meta_links.subMap("scaffolds_links") + + [ + scaffolds: [ + links: meta_links.scaffolds_links, + longstitch: meta_longstitch.scaffolds_longstitch, + ragtag: meta_ragtag.scaffolds_ragtag + ] + ] + ] + } + ) + .set { ch_main } + + + RUN_LINKS.out.busco_out.set { links_busco } + RUN_LINKS.out.quast_out.set { links_quast } + RUN_LINKS.out.merqury_report_files.set { links_merqury } + + RUN_LONGSTITCH.out.busco_out.set { longstitch_busco } + RUN_LONGSTITCH.out.quast_out.set { longstitch_quast } + RUN_LONGSTITCH.out.merqury_report_files.set { longstitch_merqury } + + HIC.out.busco_out.set { hic_busco } + HIC.out.quast_out.set { hic_quast } + HIC.out.merqury_report_files.set { hic_merqury } + + RUN_RAGTAG.out.busco_out.set { ragtag_busco } + RUN_RAGTAG.out.quast_out.set { ragtag_quast } + RUN_RAGTAG.out.merqury_report_files.set { ragtag_merqury } + + links_busco + .concat(longstitch_busco) + .concat(ragtag_busco) + .concat(hic_busco) + .set { scaffold_busco_reports } + + links_quast + .concat(longstitch_quast) + .concat(ragtag_quast) + .concat(hic_quast) + .set { scaffold_quast_reports } + + links_merqury + .concat(longstitch_merqury) + .concat(ragtag_merqury) + .concat(hic_merqury) + .set { scaffold_merqury_reports } + + emit: + ch_main + scaffold_busco_reports + scaffold_quast_reports + scaffold_merqury_reports +} diff --git a/subworkflows/local/scaffold/ragtag/main.nf b/subworkflows/local/scaffold/ragtag/main.nf new file mode 100644 index 00000000..27e7f951 --- /dev/null +++ b/subworkflows/local/scaffold/ragtag/main.nf @@ -0,0 +1,76 @@ +include { RAGTAG_SCAFFOLD } from '../../../../modules/nf-core/ragtag/scaffold/main' +include { QC } from '../../qc/main' +include { LIFTOFF } from '../../../../modules/nf-core/liftoff/main' + + +workflow RUN_RAGTAG { + take: + ch_main + meryl_kmers + + main: + ch_main + .multiMap { it -> + def assembly_to_scaffold = + it.meta.scaffold ? + ( + it.meta.scaffolds_hic ?: + it.meta.scaffolds_longstitch ?: + it.meta.scaffolds_links + ) : + it.meta.polished ? + ( + it.meta.polished.pilon ?: + it.meta.polished.medaka ?: + it.meta.polished.dorado + ) : + it.meta.assembly + assembly: + [ + it.meta, + assembly_to_scaffold + ] + reference: + [ + it.meta, + it.meta.ref_fasta + ] + } + .set { ragtag_in } + + ragtag_in.assembly.dump(tag: "SCAFFOLD: RAGTAG: Assembly inputs") + ragtag_in.reference.dump(tag: "SCAFFOLD: RAGTAG: Reference inputs") + + RAGTAG_SCAFFOLD(ragtag_in.assembly, ragtag_in.reference, [[], []], [[], [], []]) + + RAGTAG_SCAFFOLD.out.corrected_assembly + .map { meta, corrected -> [meta: meta + [ scaffolds_ragtag: corrected] ] } + .set { ch_main_scaffolded } + + QC(ch_main_scaffolded.map { it -> [meta: it.meta - it.meta.subMap("assembly_map_bam") + [assembly_map_bam: null] ] }, + RAGTAG_SCAFFOLD.out.corrected_assembly.map { meta, corrected -> [ meta.id, corrected ] }, + meryl_kmers) + + + ch_main_scaffolded + .filter { + it -> it.lift_annotations + } + .map { it -> + [ + it.meta, + it.meta.scaffolds_ragtag, + it.meta.ref_fasta, + it.meta.ref_gff + ] + } + .set { liftoff_in } + + LIFTOFF(liftoff_in, []) + + emit: + ch_main + quast_out = QC.out.quast_out + busco_out = QC.out.busco_out + merqury_report_files = QC.out.merqury_report_files +} diff --git a/subworkflows/local/scaffolding/links/main.nf b/subworkflows/local/scaffolding/links/main.nf deleted file mode 100644 index 4493e4c4..00000000 --- a/subworkflows/local/scaffolding/links/main.nf +++ /dev/null @@ -1,47 +0,0 @@ -include { LINKS } from '../../../../modules/nf-core/links/main' -include { QC } from '../../qc/main' -include { RUN_LIFTOFF } from '../../liftoff/main' - -workflow RUN_LINKS { - take: - inputs - in_reads - assembly - _references - ch_aln_to_ref - meryl_kmers - - main: - Channel.empty().set { ch_versions } - - assembly - .join(in_reads) - .multiMap { meta, assembly_fa, reads -> - assembly: [meta, assembly_fa] - reads: [meta, reads] - } - .set { links_in } - - LINKS(links_in.assembly, links_in.reads) - LINKS.out.scaffolds_fasta.set { scaffolds } - - ch_versions = ch_versions.mix(LINKS.out.versions) - - QC(inputs, in_reads, scaffolds, ch_aln_to_ref, meryl_kmers) - - ch_versions = ch_versions.mix(QC.out.versions) - - if (params.lift_annotations) { - RUN_LIFTOFF(scaffolds, inputs) - ch_versions = ch_versions.mix(RUN_LIFTOFF.out.versions) - } - - versions = ch_versions - - emit: - scaffolds - quast_out = QC.out.quast_out - busco_out = QC.out.busco_out - merqury_report_files = QC.out.merqury_report_files - versions -} diff --git a/subworkflows/local/scaffolding/longstitch/main.nf b/subworkflows/local/scaffolding/longstitch/main.nf deleted file mode 100644 index 8756225d..00000000 --- a/subworkflows/local/scaffolding/longstitch/main.nf +++ /dev/null @@ -1,45 +0,0 @@ -include { LONGSTITCH } from '../../../../modules/local/longstitch/main' -include { QC } from '../../qc/main' -include { RUN_LIFTOFF } from '../../liftoff/main' - -workflow RUN_LONGSTITCH { - take: - inputs - in_reads - assembly - _references - ch_aln_to_ref - meryl_kmers - genome_size - - main: - Channel.empty().set { ch_versions } - - assembly - .join(in_reads) - .join(genome_size) - .set { longstitch_in } - LONGSTITCH(longstitch_in) - - LONGSTITCH.out.ntlLinks_arks_scaffolds.set { scaffolds } - - ch_versions = ch_versions.mix(LONGSTITCH.out.versions) - - QC(inputs, in_reads, scaffolds, ch_aln_to_ref, meryl_kmers) - - ch_versions = ch_versions.mix(QC.out.versions) - - if (params.lift_annotations) { - RUN_LIFTOFF(LONGSTITCH.out.ntlLinks_arks_scaffolds, inputs) - ch_versions = ch_versions.mix(RUN_LIFTOFF.out.versions) - } - - versions = ch_versions - - emit: - scaffolds - quast_out = QC.out.quast_out - busco_out = QC.out.busco_out - merqury_report_files = QC.out.merqury_report_files - versions -} diff --git a/subworkflows/local/scaffolding/main.nf b/subworkflows/local/scaffolding/main.nf deleted file mode 100644 index d816ff87..00000000 --- a/subworkflows/local/scaffolding/main.nf +++ /dev/null @@ -1,76 +0,0 @@ -include { RUN_LINKS } from './links/main' -include { RUN_LONGSTITCH } from './longstitch/main' -include { RUN_RAGTAG } from './ragtag/main' - -workflow SCAFFOLD { - take: - inputs - in_reads - assembly - references - ch_aln_to_ref - meryl_kmers - genome_size - - main: - Channel.empty().set { ch_versions } - Channel.empty().set { links_busco } - Channel.empty().set { links_quast } - Channel.empty().set { links_merqury } - Channel.empty().set { longstitch_busco } - Channel.empty().set { longstitch_quast } - Channel.empty().set { longstitch_merqury } - Channel.empty().set { ragtag_busco } - Channel.empty().set { ragtag_quast } - Channel.empty().set { ragtag_merqury } - - if (params.scaffold_links) { - RUN_LINKS(inputs, in_reads, assembly, references, ch_aln_to_ref, meryl_kmers) - RUN_LINKS.out.busco_out.set { links_busco } - RUN_LINKS.out.quast_out.set { links_quast } - RUN_LINKS.out.merqury_report_files.set { links_merqury } - - ch_versions = ch_versions.mix(RUN_LINKS.out.versions) - } - - if (params.scaffold_longstitch) { - RUN_LONGSTITCH(inputs, in_reads, assembly, references, ch_aln_to_ref, meryl_kmers, genome_size) - RUN_LONGSTITCH.out.busco_out.set { longstitch_busco } - RUN_LONGSTITCH.out.quast_out.set { longstitch_quast } - RUN_LONGSTITCH.out.merqury_report_files.set { longstitch_merqury } - - ch_versions = ch_versions.mix(RUN_LONGSTITCH.out.versions) - } - - if (params.scaffold_ragtag) { - RUN_RAGTAG(inputs, in_reads, assembly, references, ch_aln_to_ref, meryl_kmers) - RUN_RAGTAG.out.busco_out.set { ragtag_busco } - RUN_RAGTAG.out.quast_out.set { ragtag_quast } - RUN_RAGTAG.out.merqury_report_files.set { ragtag_merqury } - - ch_versions = ch_versions.mix(RUN_RAGTAG.out.versions) - } - - links_busco - .concat(longstitch_busco) - .concat(ragtag_busco) - .set { scaffold_busco_reports } - - links_quast - .concat(longstitch_quast) - .concat(ragtag_quast) - .set { scaffold_quast_reports } - - links_merqury - .concat(longstitch_merqury) - .concat(ragtag_merqury) - .set { scaffold_merqury_reports } - - versions = ch_versions - - emit: - scaffold_busco_reports - scaffold_quast_reports - scaffold_merqury_reports - versions -} diff --git a/subworkflows/local/scaffolding/ragtag/main.nf b/subworkflows/local/scaffolding/ragtag/main.nf deleted file mode 100644 index 518afb87..00000000 --- a/subworkflows/local/scaffolding/ragtag/main.nf +++ /dev/null @@ -1,52 +0,0 @@ -include { RAGTAG_SCAFFOLD } from '../../../../modules/nf-core/ragtag/scaffold/main' -include { QC } from '../../qc/main' -include { RUN_LIFTOFF } from '../../liftoff/main' - - -workflow RUN_RAGTAG { - take: - inputs - in_reads - assembly - references - ch_aln_to_ref - meryl_kmers - - main: - Channel.empty().set { ch_versions } - - assembly - .join(references) - .multiMap { meta, assembly_fasta, reference_fasta -> - assembly: [meta, assembly_fasta] - reference: [meta, reference_fasta] - } - .set { ragtag_in } - - RAGTAG_SCAFFOLD(ragtag_in.assembly, ragtag_in.reference, [[], []], [[], [], []]) - - RAGTAG_SCAFFOLD.out.corrected_assembly.set { ragtag_scaffold_fasta } - - RAGTAG_SCAFFOLD.out.corrected_agp.set { ragtag_scaffold_agp } - - ch_versions = ch_versions.mix(RAGTAG_SCAFFOLD.out.versions) - - QC(inputs, in_reads, ragtag_scaffold_fasta, ch_aln_to_ref, meryl_kmers) - - ch_versions = ch_versions.mix(QC.out.versions) - - if (params.lift_annotations) { - RUN_LIFTOFF(RAGTAG_SCAFFOLD.out.corrected_assembly, inputs) - ch_versions = ch_versions.mix(RUN_LIFTOFF.out.versions) - } - - versions = ch_versions - - emit: - ragtag_scaffold_fasta - ragtag_scaffold_agp - quast_out = QC.out.quast_out - busco_out = QC.out.busco_out - merqury_report_files = QC.out.merqury_report_files - versions -} diff --git a/subworkflows/local/utils_nfcore_genomeassembler_pipeline/main.nf b/subworkflows/local/utils_nfcore_genomeassembler_pipeline/main.nf index 29aee94e..c452d4a4 100644 --- a/subworkflows/local/utils_nfcore_genomeassembler_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_genomeassembler_pipeline/main.nf @@ -95,50 +95,220 @@ workflow PIPELINE_INITIALISATION { // Create channel from input file provided through params.input // - Channel.empty().set { ch_refs } - Channel.fromPath(params.input) + channel.fromPath(params.input) .splitCsv(header: true) - .map { it -> [meta: [id: it.sample], ontreads: it.ontreads, hifireads: it.hifireads, ref_fasta: it.ref_fasta, ref_gff: it.ref_gff, shortread_F: it.shortread_F, shortread_R: it.shortread_R, paired: it.paired] } - .set { ch_samplesheet } - if (params.use_ref) { - ch_samplesheet - .map { it -> [it.meta, file(it.ref_fasta, checkIfExists: true)] } - .set { ch_refs } - } - if (params.lift_annotations) { - ch_samplesheet - .map { it -> [it.meta, file(it.ref_gff, checkIfExists: true)] } - } - // check for assembler / read combination - def hifi_only = params.hifi && !params.ont ? true : false - if (!params.skip_assembly) { - if (params.assembler == "flye") { - if (params.hifi) { - if (!hifi_only) { - error('Cannot combine hifi and ont reads with flye') - } - } + /* + This is a somewhat crucial step, where the samplesheet and params are used to determine per-sample parameters. + */ + .map { it -> + def strategy = it.strategy ?: params.strategy + + def ontreads = it.ontreads ?: params.ontreads + + def hifireads = it.hifireads ?: params.hifireads + + def assembler = it.assembler ?: params.assembler + + def assembler_ont = it.assembler_ont ?: + (strategy == "single" && assembler && ontreads && !hifireads) ? assembler : + params.assembler_ont ?: + (strategy == "hybrid" && assembler == "hifiasm") ? assembler : + assembler.contains("_") ? assembler.tokenize("_")[0] : + null + + def assembler_hifi = it.assembler_hifi ?: + (strategy == "single" && assembler && hifireads && !ontreads) ? assembler : + params.assembler_hifi ?: + assembler.contains("_") ? assembler.tokenize("_")[1] : + null + + def polish = it.polish ?: + (params.polish_medaka && params.polish_dorado) ? error("Both polish_medaka and polish_dorado are set.") : + (params.polish_medaka && params.polish_pilon && ontreads) ? "medaka+pilon" : + (params.polish_dorado && params.polish_pilon && ontreads) ? "dorado+pilon" : + (params.polish_medaka && ontreads) ? "medaka" : + (params.polish_dorado && ontreads) ? "dorado" : + (params.polish_pilon && (it.shortread_F || params.shortread_F)) ? "pilon" : + null + + def hic_F = it.hic_F ?: params.hic_F + + def scaffold_hic = hic_F ? (it.scaffold_hic != null ? it.scaffold_hic : params.scaffold_hic) : false + + def hic_trim = !scaffold_hic ? false : + (it.hic_trim ?: params.hic_trim) + + def assembler_ont_args = it.assembler_ont_args ?: params.assembler_ont_args ?: '' + def assembler_hifi_args = it.assembler_hifi_args ?: params.assembler_hifi_args ?: '' + + // Check if strategy can be inferred + strategy == "single" && ontreads && hifireads && !((!assembler_ont && assembler_hifi) || (assembler_ont && !assembler_hifi)) ? + error( + """ + [$it.sample]: Strategy is 'single', but ONT and HiFi reads are provided. + Please unambigiously define either 'assembler_ont' for ONT or 'assembler_hifi' for HiFi + """ + ) : + null + + // Build the map. Everything goes into meta. + [ + meta: [ + id: it.sample, + // new in refactor-assemblies + group: it.group ?: null, + ontreads: ontreads, + hifireads: hifireads, + // new in refactor-assemblers + strategy: strategy, + // The "assembler" value is mainly to ease input, all actual workflow logic should use assembler_ont/_hifi. + assembler: assembler, + assembler_ont: assembler_ont, + assembler_hifi: assembler_hifi, + assembly_scaffolding_order: it.assembly_scaffolding_order ?: params.assembly_scaffolding_order ?: "ont_on_hifi", + assembler_ont_args: assembler_ont_args, + assembler_hifi_args: assembler_hifi_args, + hifiasm_args: it.hifiasm_args ?: params.hifiasm_args, + flye_args: it.flye_args ?: params.flye_args, + polish: polish, + ont_collect: it.ont_collect ?: params.ont_collect, + ont_adapters: it.ont_adapters ?: params.ont_adapters, + ont_fastplong_args: it.ont_fastplong_args ?: params.ont_fastplong_args, + jellyfish: it.jellyfish ?: params.jellyfish, + jellyfish_k: it.ont_jellyfish_k ?: params.jellyfish_k, + hifi_adapters: it.hifi_adapters ?: params.hifi_adapters, + hifi_fastplong_args: it.hifi_fastplong_args ?: params.hifi_fastplong_args, + medaka_model: it.medaka_model ?: params.medaka_model, + scaffold_longstitch: it.scaffold_longstitch ?: params.scaffold_longstitch, + scaffold_links: it.scaffold_links ?: params.scaffold_links, + scaffold_ragtag: it.scaffold_ragtag ?: params.scaffold_ragtag, + scaffold_hic: scaffold_hic, + use_ref: it.use_ref ?: params.use_ref, + // hic + hic_aligner: it.hic_aligner ?: params.hic_aligner, + hic_F: scaffold_hic ? (hic_F) : [], + hic_R: scaffold_hic ? (it.hic_R ?: params.hic_R) : [], + hic_trim: hic_trim, + // not new + genome_size: it.genome_size ?: params.genome_size, + ref_fasta: it.ref_fasta ?: params.ref_fasta, + ref_gff: it.ref_gff ?: params.ref_gff, + flye_mode: it.flye_mode ?: params.flye_mode, + // assembly already provided? + assembly: it.assembly ?: params.assembly ?: null, + // ref mapping provided? + ref_map_bam: it.ref_map_bam ?: params.ref_map_bam ?: null, + // assembly mapping provided + assembly_map_bam: it.assembly_map_bam ?: params.ref_map_bam ?: null, + // reads for qc + qc_reads: ((it.qc_reads == "ont" || params.qc_reads == "ont") && ontreads) ? "ont" : "hifi", + qc_reads_path: ((it.qc_reads == "ont" || params.qc_reads == "ont") && ontreads) ? ontreads : hifireads, + quast: it.quast ?: params.quast, + busco: it.busco ?: params.busco, + busco_lineage: it.busco_lineage ?: params.busco_lineage, + busco_db: it.busco_db ?: params.busco_db, + meryl_k: it.meryl_k ?: params.meryl_k, + merqury: it.merqury ?: params.merqury, + lift_annotations: (it.ref_gff || params.ref_gff) ? (it.lift_annotations ?: params.lift_annotations) : false, + shortread_F: it.shortread_F ?: params.shortread_F, + shortread_R: it.shortread_R ?: params.shortread_R, + paired: it.paired ?: params.paired ?: ((it.shortread_F || params.shortread_F) && (it.shortread_R || params.shortread_R)) ? true : false, + // new: + use_short_reads: it.use_short_reads ?: params.use_short_reads ?: params.shortread_F ? true : (it.shortread_F ? true : false), + shortread_trim: it.shortread_trim ?: params.shortread_trim + ] + ] } - } - // check for QC reads - if (params.hifi && params.ont) { - if (!params.qc_reads) { - error("Please specify which reads should be used for qc: 'ONT' or 'HIFI'") + .set { ch_samplesheet } + + // Define valid hybrid assemblers + + def hybrid_assemblers = ["hifiasm"] + ch_samplesheet.dump(tag: "PARSED INPUTS:") + // sample-level checks + // if a check fails, map returns a list that prints what fails, and contains "invalid" + // error is raised by subscribe if there is more than one "invalid" + /* + ch_samplesheet + .map { + it -> + [ + // Check if assembler_ont was set + (it.meta.ontreads && !it.meta.assembler_ont && !it.meta.assembly) + ? + ( + // Check if assembler_hifi was set + (it.meta.hifireads && it.meta.assembler_hifi && it.meta.stragegy == "single") + ? + null + : + [ + println("Please confirm samplesheet: [sample: $it.meta.id]: assembler_ont could not be set and no assembly was provided."), + "invalid" + ] + ) + : null, + // Check if assembler_hifi was set + (it.meta.hifireads && !it.meta.assembler_hifi && !it.meta.assembly) + ? + ( + // Check if assembler_ont was set + (it.meta.ontreads && it.meta.assembler_ont && it.meta.stragegy == "single") + ? + null + : + [ + println("Please confirm samplesheet: [sample: $it.meta.id]: assembler_hifi could not be set and no assembly was provided."), + "invalid" + ] + ) + : null, + // Check if reads and strategy match + (it.meta.strategy == "single" && it.meta.ontreads && it.meta.hifireads) + ? + [ + println("Please confirm samplesheet: [sample: $it.meta.id]: Strategy is $it.meta.strategy, but both types of reads are provided."), + "invalid" + ] + : null, + // Check if assembler can do hybrid + (it.meta.strategy == "hybrid" && !hybrid_assemblers.contains(it.meta.assembler_ont)) + ? + [ + println("Please confirm samplesheet: [sample: $it.meta.id]: Hybrid assembly can only be performed with $hybrid_assemblers"), + "invalid" + ] + : null, + // Check if qc reads are specified for hybrid assemblies + (it.meta.strategy == "hybrid" && !it.meta.qc_reads) + ? + [ + println("Please confirm samplesheet: [sample: $it.meta.id]: Please specify which reads should be used for qc: '--qc_reads': 'ont' or 'hifi'"), + "invalid" + ] + : null, + // Check if genome_size is given with --scaffold_longstitch + (it.meta.scaffold_longstitch && !it.meta.genome_size && !it.meta.jellyfish) + ? + [ + println("Please confirm samplesheet: [sample: $it.meta.id]: scaffolding with longstitch requires genome-size. Either provide genome-size estimate, or estimate from reads with --jellyfish"), + "invalid" + ] + : null, + ] } - } - // Make sure that genome_size is provided or estimated when using scaffold_longstitch - if (params.scaffold_longstitch) { - // If genomesize is not provided, and if ONT is not used in combination with jellyfish - // Throw an error - if (!params.genome_size && (!params.ont && !params.jellyfish)) { - error("Scaffolding with longstitch requires genome size.\n Either provide a genome size with --genome_size or estimate from ONT reads using jellyfish and genomescope") + .map { it -> it.collect() } + .collect() + // warn if >0 samples failed a check above + .subscribe { + it -> it.contains("invalid") + ? log.warn("Invalid combination in samplesheet") + : null } - } - + */ emit: samplesheet = ch_samplesheet - refs = ch_refs versions = ch_versions } diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/main.nf b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf index b716375b..312c2d24 100644 --- a/subworkflows/nf-core/bam_sort_stats_samtools/main.nf +++ b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf @@ -12,14 +12,9 @@ workflow BAM_SORT_STATS_SAMTOOLS { ch_fasta // channel: [ val(meta), path(fasta) ] main: - - ch_versions = Channel.empty() - - SAMTOOLS_SORT ( ch_bam, ch_fasta ) - ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first()) + SAMTOOLS_SORT ( ch_bam, ch_fasta, '' ) SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam ) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) SAMTOOLS_SORT.out.bam .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) @@ -35,7 +30,6 @@ workflow BAM_SORT_STATS_SAMTOOLS { .set { ch_bam_bai } BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta ) - ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) emit: bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] @@ -45,6 +39,4 @@ workflow BAM_SORT_STATS_SAMTOOLS { stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - - versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test index 821a3cf5..c5841289 100644 --- a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test +++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test @@ -41,8 +41,7 @@ nextflow_workflow { { assert snapshot( workflow.out.flagstat, workflow.out.idxstats, - workflow.out.stats, - workflow.out.versions).match() } + workflow.out.stats).match() } ) } } @@ -72,8 +71,7 @@ nextflow_workflow { { assert snapshot( workflow.out.flagstat, workflow.out.idxstats, - workflow.out.stats, - workflow.out.versions).match() } + workflow.out.stats).match() } ) } } diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap index c3c9a049..f62d68c9 100644 --- a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap +++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap @@ -25,22 +25,15 @@ "id": "test", "single_end": false }, - "test.stats:md5,2fe0f3a7a1f07906061c1dadb62e0d05" + "test.stats:md5,1101fe711c4a389fdb5c4a1532107d1f" ] - ], - [ - "versions.yml:md5,032c89015461d597fcc5a5331b619d0a", - "versions.yml:md5,416c5e4a374c61167db999b0e400e3cf", - "versions.yml:md5,721391fd94c417808516480c9451c6fd", - "versions.yml:md5,9e12386b91a2977d23292754e3bcb522", - "versions.yml:md5,c294c162aeb09862cc5e55b602647452" ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-09-16T08:26:24.36986488" + "timestamp": "2026-02-03T11:33:01.647190952" }, "test_bam_sort_stats_samtools_paired_end": { "content": [ @@ -68,22 +61,15 @@ "id": "test", "single_end": false }, - "test.stats:md5,ba007b13981dad548358c7c957d41e12" + "test.stats:md5,f26c554c244ee86c89d62ebed509fd95" ] - ], - [ - "versions.yml:md5,032c89015461d597fcc5a5331b619d0a", - "versions.yml:md5,416c5e4a374c61167db999b0e400e3cf", - "versions.yml:md5,721391fd94c417808516480c9451c6fd", - "versions.yml:md5,9e12386b91a2977d23292754e3bcb522", - "versions.yml:md5,c294c162aeb09862cc5e55b602647452" ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-09-16T08:26:38.683996037" + "timestamp": "2026-02-03T11:33:08.706742267" }, "test_bam_sort_stats_samtools_single_end - stub": { "content": [ @@ -124,7 +110,7 @@ "id": "test", "single_end": false }, - "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" ] ], "5": [ @@ -136,13 +122,6 @@ "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "6": [ - "versions.yml:md5,032c89015461d597fcc5a5331b619d0a", - "versions.yml:md5,416c5e4a374c61167db999b0e400e3cf", - "versions.yml:md5,721391fd94c417808516480c9451c6fd", - "versions.yml:md5,9e12386b91a2977d23292754e3bcb522", - "versions.yml:md5,c294c162aeb09862cc5e55b602647452" - ], "bai": [ [ { @@ -170,7 +149,7 @@ "id": "test", "single_end": false }, - "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" ] ], "idxstats": [ @@ -190,21 +169,14 @@ }, "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" ] - ], - "versions": [ - "versions.yml:md5,032c89015461d597fcc5a5331b619d0a", - "versions.yml:md5,416c5e4a374c61167db999b0e400e3cf", - "versions.yml:md5,721391fd94c417808516480c9451c6fd", - "versions.yml:md5,9e12386b91a2977d23292754e3bcb522", - "versions.yml:md5,c294c162aeb09862cc5e55b602647452" ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-09-16T08:07:18.896460047" + "timestamp": "2026-02-03T11:11:02.1412136" }, "test_bam_sort_stats_samtools_paired_end - stub": { "content": [ @@ -245,7 +217,7 @@ "id": "test", "single_end": false }, - "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" ] ], "5": [ @@ -257,13 +229,6 @@ "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "6": [ - "versions.yml:md5,032c89015461d597fcc5a5331b619d0a", - "versions.yml:md5,416c5e4a374c61167db999b0e400e3cf", - "versions.yml:md5,721391fd94c417808516480c9451c6fd", - "versions.yml:md5,9e12386b91a2977d23292754e3bcb522", - "versions.yml:md5,c294c162aeb09862cc5e55b602647452" - ], "bai": [ [ { @@ -291,7 +256,7 @@ "id": "test", "single_end": false }, - "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" ] ], "idxstats": [ @@ -311,20 +276,13 @@ }, "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" ] - ], - "versions": [ - "versions.yml:md5,032c89015461d597fcc5a5331b619d0a", - "versions.yml:md5,416c5e4a374c61167db999b0e400e3cf", - "versions.yml:md5,721391fd94c417808516480c9451c6fd", - "versions.yml:md5,9e12386b91a2977d23292754e3bcb522", - "versions.yml:md5,c294c162aeb09862cc5e55b602647452" ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-09-16T08:07:39.028688324" + "timestamp": "2026-02-03T11:11:09.165267895" } } \ No newline at end of file diff --git a/subworkflows/nf-core/bam_stats_samtools/main.nf b/subworkflows/nf-core/bam_stats_samtools/main.nf index 44d4c010..34e8fe10 100644 --- a/subworkflows/nf-core/bam_stats_samtools/main.nf +++ b/subworkflows/nf-core/bam_stats_samtools/main.nf @@ -12,21 +12,14 @@ workflow BAM_STATS_SAMTOOLS { ch_fasta // channel: [ val(meta), path(fasta) ] main: - ch_versions = Channel.empty() - SAMTOOLS_STATS ( ch_bam_bai, ch_fasta ) - ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions) SAMTOOLS_FLAGSTAT ( ch_bam_bai ) - ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) SAMTOOLS_IDXSTATS ( ch_bam_bai ) - ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions) emit: stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), path(stats) ] flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), path(flagstat) ] idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), path(idxstats) ] - - versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test index 76e7a40a..2f329695 100644 --- a/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test +++ b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test @@ -36,8 +36,7 @@ nextflow_workflow { { assert snapshot( workflow.out.flagstat, workflow.out.idxstats, - workflow.out.stats, - workflow.out.versions).match() } + workflow.out.stats).match() } ) } } @@ -66,8 +65,7 @@ nextflow_workflow { { assert snapshot( workflow.out.flagstat, workflow.out.idxstats, - workflow.out.stats, - workflow.out.versions).match() } + workflow.out.stats).match() } ) } } @@ -96,8 +94,7 @@ nextflow_workflow { { assert snapshot( workflow.out.flagstat, workflow.out.idxstats, - workflow.out.stats, - workflow.out.versions).match() } + workflow.out.stats).match() } ) } } diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap index 8ca22526..9c8ff1b5 100644 --- a/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap +++ b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap @@ -17,7 +17,7 @@ "id": "test", "single_end": true }, - "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" ] ], "2": [ @@ -29,18 +29,13 @@ "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "3": [ - "versions.yml:md5,73c55059ed478cd2f9cd93dd3185da3a", - "versions.yml:md5,80d8653e01575b3c381d87073f672fb5", - "versions.yml:md5,cb889532237a2f3d813978ac14a12d51" - ], "flagstat": [ [ { "id": "test", "single_end": true }, - "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" ] ], "idxstats": [ @@ -60,19 +55,14 @@ }, "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" ] - ], - "versions": [ - "versions.yml:md5,73c55059ed478cd2f9cd93dd3185da3a", - "versions.yml:md5,80d8653e01575b3c381d87073f672fb5", - "versions.yml:md5,cb889532237a2f3d813978ac14a12d51" ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-09-16T08:08:35.660286921" + "timestamp": "2026-02-03T11:10:30.076183827" }, "test_bam_stats_samtools_single_end - stub": { "content": [ @@ -92,7 +82,7 @@ "id": "test", "single_end": true }, - "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" ] ], "2": [ @@ -104,18 +94,13 @@ "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "3": [ - "versions.yml:md5,73c55059ed478cd2f9cd93dd3185da3a", - "versions.yml:md5,80d8653e01575b3c381d87073f672fb5", - "versions.yml:md5,cb889532237a2f3d813978ac14a12d51" - ], "flagstat": [ [ { "id": "test", "single_end": true }, - "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" ] ], "idxstats": [ @@ -135,19 +120,14 @@ }, "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" ] - ], - "versions": [ - "versions.yml:md5,73c55059ed478cd2f9cd93dd3185da3a", - "versions.yml:md5,80d8653e01575b3c381d87073f672fb5", - "versions.yml:md5,cb889532237a2f3d813978ac14a12d51" ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-09-16T08:08:24.220305512" + "timestamp": "2026-02-03T11:10:24.379362883" }, "test_bam_stats_samtools_paired_end_cram - stub": { "content": [ @@ -167,7 +147,7 @@ "id": "test", "single_end": false }, - "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" ] ], "2": [ @@ -179,18 +159,13 @@ "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "3": [ - "versions.yml:md5,73c55059ed478cd2f9cd93dd3185da3a", - "versions.yml:md5,80d8653e01575b3c381d87073f672fb5", - "versions.yml:md5,cb889532237a2f3d813978ac14a12d51" - ], "flagstat": [ [ { "id": "test", "single_end": false }, - "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" ] ], "idxstats": [ @@ -210,19 +185,14 @@ }, "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" ] - ], - "versions": [ - "versions.yml:md5,73c55059ed478cd2f9cd93dd3185da3a", - "versions.yml:md5,80d8653e01575b3c381d87073f672fb5", - "versions.yml:md5,cb889532237a2f3d813978ac14a12d51" ] } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-09-16T08:08:54.206770141" + "timestamp": "2026-02-03T11:10:35.91658956" }, "test_bam_stats_samtools_single_end": { "content": [ @@ -250,20 +220,15 @@ "id": "test", "single_end": true }, - "test.stats:md5,291bb2393ec947140d12d42c2795b222" + "test.stats:md5,7a05a22bdb17e8df6e8c2d100ff09a31" ] - ], - [ - "versions.yml:md5,73c55059ed478cd2f9cd93dd3185da3a", - "versions.yml:md5,80d8653e01575b3c381d87073f672fb5", - "versions.yml:md5,cb889532237a2f3d813978ac14a12d51" ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-09-16T08:07:49.731645858" + "timestamp": "2026-02-03T11:32:20.243663217" }, "test_bam_stats_samtools_paired_end": { "content": [ @@ -291,20 +256,15 @@ "id": "test", "single_end": true }, - "test.stats:md5,8140d69cdedd77570ca1d7618a744e16" + "test.stats:md5,a391612b5ef5b181e854ccaad8c8a068" ] - ], - [ - "versions.yml:md5,73c55059ed478cd2f9cd93dd3185da3a", - "versions.yml:md5,80d8653e01575b3c381d87073f672fb5", - "versions.yml:md5,cb889532237a2f3d813978ac14a12d51" ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-09-16T08:08:01.421996172" + "timestamp": "2026-02-03T11:32:26.434187887" }, "test_bam_stats_samtools_paired_end_cram": { "content": [ @@ -332,19 +292,14 @@ "id": "test", "single_end": false }, - "test.stats:md5,1622856127bafd6cdbadee9cd64ec9b7" + "test.stats:md5,2b0e31ab01b867a6ff312023ae03838d" ] - ], - [ - "versions.yml:md5,73c55059ed478cd2f9cd93dd3185da3a", - "versions.yml:md5,80d8653e01575b3c381d87073f672fb5", - "versions.yml:md5,cb889532237a2f3d813978ac14a12d51" ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.3" }, - "timestamp": "2024-09-16T08:08:12.640915756" + "timestamp": "2026-02-03T11:32:32.441454186" } } \ No newline at end of file diff --git a/tests/.nftignore b/tests/.nftignore index e8128b21..c31dce4d 100644 --- a/tests/.nftignore +++ b/tests/.nftignore @@ -1,6 +1,16 @@ .DS_Store fastqc/*_fastqc.{html,zip} pipeline_info/*.{html,json,txt,yml} -*/*/*/*.{log,bin,gz,gff3,fasta,agp} -*/*/*.{log,bin,gz,gff3,txt} +*/*/*/*/*.{log,bin,gff3,agp,html,gz} +*/*/*/*.{log,bin,gff3,agp,html,gz} +*/*/*.{log,bin,gz,gff3,agp,html,gz} +*/*.{log,bin,gz,gff3,agp,html,gz} +*.{log,bin,gz,gff3,agp,html,gz} */*/*/*.assembly_info.txt +*/scaffold/longstitch/*tigmint-ntLink-arks.fa +test_flye_hifiasm/assembly/ragtag/test_flye_hifiasm_assembly_patch.comps.fasta +test_flye_hifiasm/assembly/ragtag/test_flye_hifiasm_assembly_patch.ctg.fasta +test_flye_hifiasm/assembly/ragtag/test_flye_hifiasm_assembly_patch.patch.fasta +test_flye_hifiasm/assembly/ragtag/test_flye_hifiasm_assembly_patch.patch.fasta +test_flye_hifiasm/scaffold/ragtag/test_flye_hifiasm_ragtag.fasta +test_flye_hifiasm/scaffold/ragtag/test_flye_hifiasm_ragtag.stats diff --git a/tests/default.nf.test b/tests/default.nf.test index 67fb220f..3fa9782d 100644 --- a/tests/default.nf.test +++ b/tests/default.nf.test @@ -16,14 +16,17 @@ nextflow_pipeline { // stable_name: All files + folders in ${params.outdir}/ with a stable name def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) // stable_path: All files in ${params.outdir}/ with stable content - def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + def stable_path = getAllFilesFromDir( + params.outdir, + ignoreFile: 'tests/.nftignore', + ) assertAll( { assert workflow.success}, { assert snapshot( // Number of successful tasks workflow.trace.succeeded().size(), // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions - removeNextflowVersion("$outputDir/pipeline_info/nf_core_genomeassembler_software_versions.yml"), + removeNextflowVersion("$outputDir/pipeline_info/nf_core_pipeline_software_versions.yml"), // All stable path name, with a relative path stable_name, // All files with stable contents diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 73ab05f2..67008918 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -1,89 +1,345 @@ { "-profile test": { "content": [ - 6, + 39, { - "FLYE": { + "FASTPLONG_HIFI": { + "fastplong": "0.3.0" + }, + "FASTPLONG_ONT": { + "fastplong": "0.3.0" + }, + "FLYE_HIFI": { + "flye": "2.9.5-b1801" + }, + "FLYE_ONT": { "flye": "2.9.5-b1801" }, "GFA_2_FA_HIFI": { - "awk": "mawk 1.3.4", - "gzip": 1.13 + "bgzip": "1.22.1" + }, + "GFA_2_FA_ONT": { + "bgzip": "1.22.1" }, "HIFIASM": { - "hifiasm": "0.25.0-r726" + "hifasm": "0.25.0-r726" + }, + "HIFIASM_ONT": { + "hifasm": "0.25.0-r726" }, "LIFTOFF": { - "liftoff": "v1.6.3" + "liftoff": "1.6.3" }, - "NANOQ": { - "nanoq": "0.10.0" + "LONGSTITCH": { + "LongStitch": "1.0.5" }, "RAGTAG_PATCH": { "ragtag": "2.1.0" }, - "Workflow": { - "nf-core/genomeassembler": "v1.1.0" + "RAGTAG_SCAFFOLD": { + "ragtag": "2.1.0" } }, [ - "Col-0_2MB", - "Col-0_2MB/QC", - "Col-0_2MB/QC/nanoq", - "Col-0_2MB/QC/nanoq/Col-0_2MB_report.json", - "Col-0_2MB/QC/nanoq/Col-0_2MB_stats.json", - "Col-0_2MB/assembly", - "Col-0_2MB/assembly/Col-0_2MB_assembly.gff3", - "Col-0_2MB/assembly/Col-0_2MB_assembly.unmapped.txt", - "Col-0_2MB/assembly/flye", - "Col-0_2MB/assembly/flye/Col-0_2MB.assembly.fasta.gz", - "Col-0_2MB/assembly/flye/Col-0_2MB.assembly_graph.gfa.gz", - "Col-0_2MB/assembly/flye/Col-0_2MB.assembly_graph.gv.gz", - "Col-0_2MB/assembly/flye/Col-0_2MB.assembly_info.txt", - "Col-0_2MB/assembly/flye/Col-0_2MB.flye.log", - "Col-0_2MB/assembly/flye/Col-0_2MB.params.json", - "Col-0_2MB/assembly/hifiasm", - "Col-0_2MB/assembly/hifiasm/Col-0_2MB.bp.hap1.p_ctg.gfa", - "Col-0_2MB/assembly/hifiasm/Col-0_2MB.bp.hap2.p_ctg.gfa", - "Col-0_2MB/assembly/hifiasm/Col-0_2MB.bp.p_ctg.gfa", - "Col-0_2MB/assembly/hifiasm/Col-0_2MB.bp.p_utg.gfa", - "Col-0_2MB/assembly/hifiasm/Col-0_2MB.bp.r_utg.gfa", - "Col-0_2MB/assembly/hifiasm/Col-0_2MB.ec.bin", - "Col-0_2MB/assembly/hifiasm/Col-0_2MB.ovlp.reverse.bin", - "Col-0_2MB/assembly/hifiasm/Col-0_2MB.ovlp.source.bin", - "Col-0_2MB/assembly/hifiasm/Col-0_2MB.stderr.log", - "Col-0_2MB/assembly/hifiasm/fasta", - "Col-0_2MB/assembly/hifiasm/fasta/Col-0_2MB.bp.p_utg.fa.gz", - "Col-0_2MB/assembly/ragtag", - "Col-0_2MB/assembly/ragtag/Col-0_2MB_assembly_patch.comps.fasta", - "Col-0_2MB/assembly/ragtag/Col-0_2MB_assembly_patch.ctg.agp", - "Col-0_2MB/assembly/ragtag/Col-0_2MB_assembly_patch.ctg.fasta", - "Col-0_2MB/assembly/ragtag/Col-0_2MB_assembly_patch.patch.agp", - "Col-0_2MB/assembly/ragtag/Col-0_2MB_assembly_patch.patch.err", - "Col-0_2MB/assembly/ragtag/Col-0_2MB_assembly_patch.patch.fasta", - "Col-0_2MB/assembly/ragtag/Col-0_2MB_assembly_patch.rename.agp", - "Col-0_2MB/assembly/ragtag/Col-0_2MB_assembly_patch.rename.fasta", "pipeline_info", - "pipeline_info/nf_core_genomeassembler_software_versions.yml", - "pipeline_info/nf_core_pipeline_software_versions.yml" + "pipeline_info/nf_core_pipeline_software_versions.yml", + "test_flye_hifiasm", + "test_flye_hifiasm/assembly", + "test_flye_hifiasm/assembly/flye", + "test_flye_hifiasm/assembly/flye/test_flye_hifiasm.assembly.fasta.gz", + "test_flye_hifiasm/assembly/flye/test_flye_hifiasm.assembly_graph.gfa.gz", + "test_flye_hifiasm/assembly/flye/test_flye_hifiasm.assembly_graph.gv.gz", + "test_flye_hifiasm/assembly/flye/test_flye_hifiasm.assembly_info.txt", + "test_flye_hifiasm/assembly/flye/test_flye_hifiasm.flye.log", + "test_flye_hifiasm/assembly/flye/test_flye_hifiasm.params.json", + "test_flye_hifiasm/assembly/hifiasm", + "test_flye_hifiasm/assembly/hifiasm/fasta", + "test_flye_hifiasm/assembly/hifiasm/fasta/test_flye_hifiasm.bp.p_ctg.fa.gz", + "test_flye_hifiasm/assembly/hifiasm/test_flye_hifiasm.bp.hap1.p_ctg.gfa", + "test_flye_hifiasm/assembly/hifiasm/test_flye_hifiasm.bp.hap2.p_ctg.gfa", + "test_flye_hifiasm/assembly/hifiasm/test_flye_hifiasm.bp.p_ctg.gfa", + "test_flye_hifiasm/assembly/hifiasm/test_flye_hifiasm.bp.p_utg.gfa", + "test_flye_hifiasm/assembly/hifiasm/test_flye_hifiasm.bp.r_utg.gfa", + "test_flye_hifiasm/assembly/hifiasm/test_flye_hifiasm.ec.bin", + "test_flye_hifiasm/assembly/hifiasm/test_flye_hifiasm.ovlp.reverse.bin", + "test_flye_hifiasm/assembly/hifiasm/test_flye_hifiasm.ovlp.source.bin", + "test_flye_hifiasm/assembly/hifiasm/test_flye_hifiasm.stderr.log", + "test_flye_hifiasm/assembly/ragtag", + "test_flye_hifiasm/assembly/ragtag/test_flye_hifiasm_assembly_patch.comps.fasta", + "test_flye_hifiasm/assembly/ragtag/test_flye_hifiasm_assembly_patch.ctg.agp", + "test_flye_hifiasm/assembly/ragtag/test_flye_hifiasm_assembly_patch.ctg.fasta", + "test_flye_hifiasm/assembly/ragtag/test_flye_hifiasm_assembly_patch.patch.agp", + "test_flye_hifiasm/assembly/ragtag/test_flye_hifiasm_assembly_patch.patch.err", + "test_flye_hifiasm/assembly/ragtag/test_flye_hifiasm_assembly_patch.patch.fasta", + "test_flye_hifiasm/assembly/ragtag/test_flye_hifiasm_assembly_patch.rename.agp", + "test_flye_hifiasm/assembly/ragtag/test_flye_hifiasm_assembly_patch.rename.fasta", + "test_flye_hifiasm/assembly/test_flye_hifiasm_assembly.gff3", + "test_flye_hifiasm/assembly/test_flye_hifiasm_assembly.unmapped.txt", + "test_flye_hifiasm/reads", + "test_flye_hifiasm/reads/fastplong", + "test_flye_hifiasm/reads/fastplong/hifi", + "test_flye_hifiasm/reads/fastplong/hifi/test_flye_hifiasm_hifi.fastplong.fastq.gz", + "test_flye_hifiasm/reads/fastplong/hifi/test_flye_hifiasm_hifi.fastplong.html", + "test_flye_hifiasm/reads/fastplong/hifi/test_flye_hifiasm_hifi.fastplong.json", + "test_flye_hifiasm/reads/fastplong/hifi/test_flye_hifiasm_hifi.fastplong.log", + "test_flye_hifiasm/reads/fastplong/ont", + "test_flye_hifiasm/reads/fastplong/ont/test_flye_hifiasm_ont.fastplong.fastq.gz", + "test_flye_hifiasm/reads/fastplong/ont/test_flye_hifiasm_ont.fastplong.html", + "test_flye_hifiasm/reads/fastplong/ont/test_flye_hifiasm_ont.fastplong.json", + "test_flye_hifiasm/reads/fastplong/ont/test_flye_hifiasm_ont.fastplong.log", + "test_flye_hifiasm/scaffold", + "test_flye_hifiasm/scaffold/ragtag", + "test_flye_hifiasm/scaffold/ragtag/test_flye_hifiasm_ragtag.agp", + "test_flye_hifiasm/scaffold/ragtag/test_flye_hifiasm_ragtag.fasta", + "test_flye_hifiasm/scaffold/ragtag/test_flye_hifiasm_ragtag.stats", + "test_hifi_flye", + "test_hifi_flye/reads", + "test_hifi_flye/reads/fastplong", + "test_hifi_flye/reads/fastplong/hifi", + "test_hifi_flye/reads/fastplong/hifi/test_hifi_flye_hifi.fastplong.fastq.gz", + "test_hifi_flye/reads/fastplong/hifi/test_hifi_flye_hifi.fastplong.html", + "test_hifi_flye/reads/fastplong/hifi/test_hifi_flye_hifi.fastplong.json", + "test_hifi_flye/reads/fastplong/hifi/test_hifi_flye_hifi.fastplong.log", + "test_hifi_flye/reads/fastplong/ont", + "test_hifi_flye/reads/fastplong/ont/test_hifi_flye_ont.fastplong.fastq.gz", + "test_hifi_flye/reads/fastplong/ont/test_hifi_flye_ont.fastplong.html", + "test_hifi_flye/reads/fastplong/ont/test_hifi_flye_ont.fastplong.json", + "test_hifi_flye/reads/fastplong/ont/test_hifi_flye_ont.fastplong.log", + "test_hifi_hifiasm", + "test_hifi_hifiasm/assembly", + "test_hifi_hifiasm/assembly/hifiasm", + "test_hifi_hifiasm/assembly/hifiasm/fasta", + "test_hifi_hifiasm/assembly/hifiasm/fasta/test_hifi_hifiasm.bp.p_ctg.fa.gz", + "test_hifi_hifiasm/assembly/hifiasm/test_hifi_hifiasm.bp.hap1.p_ctg.gfa", + "test_hifi_hifiasm/assembly/hifiasm/test_hifi_hifiasm.bp.hap2.p_ctg.gfa", + "test_hifi_hifiasm/assembly/hifiasm/test_hifi_hifiasm.bp.p_ctg.gfa", + "test_hifi_hifiasm/assembly/hifiasm/test_hifi_hifiasm.bp.p_utg.gfa", + "test_hifi_hifiasm/assembly/hifiasm/test_hifi_hifiasm.bp.r_utg.gfa", + "test_hifi_hifiasm/assembly/hifiasm/test_hifi_hifiasm.ec.bin", + "test_hifi_hifiasm/assembly/hifiasm/test_hifi_hifiasm.ovlp.reverse.bin", + "test_hifi_hifiasm/assembly/hifiasm/test_hifi_hifiasm.ovlp.source.bin", + "test_hifi_hifiasm/assembly/hifiasm/test_hifi_hifiasm.stderr.log", + "test_hifi_hifiasm/assembly/test_hifi_hifiasm_assembly.gff3", + "test_hifi_hifiasm/assembly/test_hifi_hifiasm_assembly.unmapped.txt", + "test_hifi_hifiasm/reads", + "test_hifi_hifiasm/reads/fastplong", + "test_hifi_hifiasm/reads/fastplong/hifi", + "test_hifi_hifiasm/reads/fastplong/hifi/test_hifi_hifiasm_hifi.fastplong.fastq.gz", + "test_hifi_hifiasm/reads/fastplong/hifi/test_hifi_hifiasm_hifi.fastplong.html", + "test_hifi_hifiasm/reads/fastplong/hifi/test_hifi_hifiasm_hifi.fastplong.json", + "test_hifi_hifiasm/reads/fastplong/hifi/test_hifi_hifiasm_hifi.fastplong.log", + "test_hifi_hifiasm/reads/fastplong/ont", + "test_hifi_hifiasm/reads/fastplong/ont/test_hifi_hifiasm_ont.fastplong.fastq.gz", + "test_hifi_hifiasm/reads/fastplong/ont/test_hifi_hifiasm_ont.fastplong.html", + "test_hifi_hifiasm/reads/fastplong/ont/test_hifi_hifiasm_ont.fastplong.json", + "test_hifi_hifiasm/reads/fastplong/ont/test_hifi_hifiasm_ont.fastplong.log", + "test_hifiasm_flye", + "test_hifiasm_flye/assembly", + "test_hifiasm_flye/assembly/flye", + "test_hifiasm_flye/assembly/flye/test_hifiasm_flye.assembly.fasta.gz", + "test_hifiasm_flye/assembly/flye/test_hifiasm_flye.assembly_graph.gfa.gz", + "test_hifiasm_flye/assembly/flye/test_hifiasm_flye.assembly_graph.gv.gz", + "test_hifiasm_flye/assembly/flye/test_hifiasm_flye.assembly_info.txt", + "test_hifiasm_flye/assembly/flye/test_hifiasm_flye.flye.log", + "test_hifiasm_flye/assembly/flye/test_hifiasm_flye.params.json", + "test_hifiasm_flye/assembly/hifiasm_ont", + "test_hifiasm_flye/assembly/hifiasm_ont/fasta", + "test_hifiasm_flye/assembly/hifiasm_ont/fasta/test_hifiasm_flye.bp.p_ctg.fa.gz", + "test_hifiasm_flye/assembly/hifiasm_ont/test_hifiasm_flye.bp.hap1.p_ctg.gfa", + "test_hifiasm_flye/assembly/hifiasm_ont/test_hifiasm_flye.bp.hap2.p_ctg.gfa", + "test_hifiasm_flye/assembly/hifiasm_ont/test_hifiasm_flye.bp.p_ctg.gfa", + "test_hifiasm_flye/assembly/hifiasm_ont/test_hifiasm_flye.bp.p_utg.gfa", + "test_hifiasm_flye/assembly/hifiasm_ont/test_hifiasm_flye.bp.r_utg.gfa", + "test_hifiasm_flye/assembly/hifiasm_ont/test_hifiasm_flye.ec.bin", + "test_hifiasm_flye/assembly/hifiasm_ont/test_hifiasm_flye.ovlp.reverse.bin", + "test_hifiasm_flye/assembly/hifiasm_ont/test_hifiasm_flye.ovlp.source.bin", + "test_hifiasm_flye/assembly/hifiasm_ont/test_hifiasm_flye.stderr.log", + "test_hifiasm_flye/assembly/ragtag", + "test_hifiasm_flye/assembly/ragtag/test_hifiasm_flye_assembly_patch.comps.fasta", + "test_hifiasm_flye/assembly/ragtag/test_hifiasm_flye_assembly_patch.ctg.agp", + "test_hifiasm_flye/assembly/ragtag/test_hifiasm_flye_assembly_patch.ctg.fasta", + "test_hifiasm_flye/assembly/ragtag/test_hifiasm_flye_assembly_patch.patch.agp", + "test_hifiasm_flye/assembly/ragtag/test_hifiasm_flye_assembly_patch.patch.err", + "test_hifiasm_flye/assembly/ragtag/test_hifiasm_flye_assembly_patch.patch.fasta", + "test_hifiasm_flye/assembly/ragtag/test_hifiasm_flye_assembly_patch.rename.agp", + "test_hifiasm_flye/assembly/ragtag/test_hifiasm_flye_assembly_patch.rename.fasta", + "test_hifiasm_flye/assembly/test_hifiasm_flye_assembly.gff3", + "test_hifiasm_flye/assembly/test_hifiasm_flye_assembly.unmapped.txt", + "test_hifiasm_flye/reads", + "test_hifiasm_flye/reads/fastplong", + "test_hifiasm_flye/reads/fastplong/hifi", + "test_hifiasm_flye/reads/fastplong/hifi/test_hifiasm_flye_hifi.fastplong.fastq.gz", + "test_hifiasm_flye/reads/fastplong/hifi/test_hifiasm_flye_hifi.fastplong.html", + "test_hifiasm_flye/reads/fastplong/hifi/test_hifiasm_flye_hifi.fastplong.json", + "test_hifiasm_flye/reads/fastplong/hifi/test_hifiasm_flye_hifi.fastplong.log", + "test_hifiasm_flye/reads/fastplong/ont", + "test_hifiasm_flye/reads/fastplong/ont/test_hifiasm_flye_ont.fastplong.fastq.gz", + "test_hifiasm_flye/reads/fastplong/ont/test_hifiasm_flye_ont.fastplong.html", + "test_hifiasm_flye/reads/fastplong/ont/test_hifiasm_flye_ont.fastplong.json", + "test_hifiasm_flye/reads/fastplong/ont/test_hifiasm_flye_ont.fastplong.log", + "test_hifiasm_ul", + "test_hifiasm_ul/assembly", + "test_hifiasm_ul/assembly/hifiasm", + "test_hifiasm_ul/assembly/hifiasm/fasta", + "test_hifiasm_ul/assembly/hifiasm/fasta/test_hifiasm_ul.bp.p_ctg.fa.gz", + "test_hifiasm_ul/assembly/hifiasm/test_hifiasm_ul.bp.hap1.p_ctg.gfa", + "test_hifiasm_ul/assembly/hifiasm/test_hifiasm_ul.bp.hap2.p_ctg.gfa", + "test_hifiasm_ul/assembly/hifiasm/test_hifiasm_ul.bp.p_ctg.gfa", + "test_hifiasm_ul/assembly/hifiasm/test_hifiasm_ul.bp.p_utg.gfa", + "test_hifiasm_ul/assembly/hifiasm/test_hifiasm_ul.bp.r_utg.gfa", + "test_hifiasm_ul/assembly/hifiasm/test_hifiasm_ul.ec.bin", + "test_hifiasm_ul/assembly/hifiasm/test_hifiasm_ul.ovlp.reverse.bin", + "test_hifiasm_ul/assembly/hifiasm/test_hifiasm_ul.ovlp.source.bin", + "test_hifiasm_ul/assembly/hifiasm/test_hifiasm_ul.re.uidx.bin", + "test_hifiasm_ul/assembly/hifiasm/test_hifiasm_ul.re.uidx.ucr.bin", + "test_hifiasm_ul/assembly/hifiasm/test_hifiasm_ul.re.ul.msk.bin", + "test_hifiasm_ul/assembly/hifiasm/test_hifiasm_ul.re.ul.ovlp.bin", + "test_hifiasm_ul/assembly/hifiasm/test_hifiasm_ul.stderr.log", + "test_hifiasm_ul/assembly/hifiasm/test_hifiasm_ul.uidx.bin", + "test_hifiasm_ul/assembly/hifiasm/test_hifiasm_ul.ul.ovlp.bin", + "test_hifiasm_ul/assembly/test_hifiasm_ul_assembly.gff3", + "test_hifiasm_ul/assembly/test_hifiasm_ul_assembly.unmapped.txt", + "test_hifiasm_ul/reads", + "test_hifiasm_ul/reads/fastplong", + "test_hifiasm_ul/reads/fastplong/hifi", + "test_hifiasm_ul/reads/fastplong/hifi/test_hifiasm_ul_hifi.fastplong.fastq.gz", + "test_hifiasm_ul/reads/fastplong/hifi/test_hifiasm_ul_hifi.fastplong.html", + "test_hifiasm_ul/reads/fastplong/hifi/test_hifiasm_ul_hifi.fastplong.json", + "test_hifiasm_ul/reads/fastplong/hifi/test_hifiasm_ul_hifi.fastplong.log", + "test_hifiasm_ul/reads/fastplong/ont", + "test_hifiasm_ul/reads/fastplong/ont/test_hifiasm_ul_ont.fastplong.fastq.gz", + "test_hifiasm_ul/reads/fastplong/ont/test_hifiasm_ul_ont.fastplong.html", + "test_hifiasm_ul/reads/fastplong/ont/test_hifiasm_ul_ont.fastplong.json", + "test_hifiasm_ul/reads/fastplong/ont/test_hifiasm_ul_ont.fastplong.log", + "test_hifiasm_ul/scaffold", + "test_hifiasm_ul/scaffold/longstitch", + "test_hifiasm_ul/scaffold/longstitch/test_hifiasm_ul_longstitch.gff3", + "test_hifiasm_ul/scaffold/longstitch/test_hifiasm_ul_longstitch.tigmint-ntLink-arks.fa", + "test_hifiasm_ul/scaffold/longstitch/test_hifiasm_ul_longstitch.tigmint-ntLink.fa", + "test_hifiasm_ul/scaffold/longstitch/test_hifiasm_ul_longstitch.unmapped.txt", + "test_ont_flye", + "test_ont_flye/assembly", + "test_ont_flye/assembly/flye", + "test_ont_flye/assembly/flye/test_ont_flye.assembly.fasta.gz", + "test_ont_flye/assembly/flye/test_ont_flye.assembly_graph.gfa.gz", + "test_ont_flye/assembly/flye/test_ont_flye.assembly_graph.gv.gz", + "test_ont_flye/assembly/flye/test_ont_flye.assembly_info.txt", + "test_ont_flye/assembly/flye/test_ont_flye.flye.log", + "test_ont_flye/assembly/flye/test_ont_flye.params.json", + "test_ont_flye/assembly/test_ont_flye_assembly.gff3", + "test_ont_flye/assembly/test_ont_flye_assembly.unmapped.txt", + "test_ont_flye/reads", + "test_ont_flye/reads/fastplong", + "test_ont_flye/reads/fastplong/hifi", + "test_ont_flye/reads/fastplong/hifi/test_ont_flye_hifi.fastplong.fastq.gz", + "test_ont_flye/reads/fastplong/hifi/test_ont_flye_hifi.fastplong.html", + "test_ont_flye/reads/fastplong/hifi/test_ont_flye_hifi.fastplong.json", + "test_ont_flye/reads/fastplong/hifi/test_ont_flye_hifi.fastplong.log", + "test_ont_flye/reads/fastplong/ont", + "test_ont_flye/reads/fastplong/ont/test_ont_flye_ont.fastplong.fastq.gz", + "test_ont_flye/reads/fastplong/ont/test_ont_flye_ont.fastplong.html", + "test_ont_flye/reads/fastplong/ont/test_ont_flye_ont.fastplong.json", + "test_ont_flye/reads/fastplong/ont/test_ont_flye_ont.fastplong.log", + "test_ont_hifiasm", + "test_ont_hifiasm/assembly", + "test_ont_hifiasm/assembly/hifiasm_ont", + "test_ont_hifiasm/assembly/hifiasm_ont/fasta", + "test_ont_hifiasm/assembly/hifiasm_ont/fasta/test_ont_hifiasm.bp.p_ctg.fa.gz", + "test_ont_hifiasm/assembly/hifiasm_ont/test_ont_hifiasm.bp.hap1.p_ctg.gfa", + "test_ont_hifiasm/assembly/hifiasm_ont/test_ont_hifiasm.bp.hap2.p_ctg.gfa", + "test_ont_hifiasm/assembly/hifiasm_ont/test_ont_hifiasm.bp.p_ctg.gfa", + "test_ont_hifiasm/assembly/hifiasm_ont/test_ont_hifiasm.bp.p_utg.gfa", + "test_ont_hifiasm/assembly/hifiasm_ont/test_ont_hifiasm.bp.r_utg.gfa", + "test_ont_hifiasm/assembly/hifiasm_ont/test_ont_hifiasm.ec.bin", + "test_ont_hifiasm/assembly/hifiasm_ont/test_ont_hifiasm.ovlp.reverse.bin", + "test_ont_hifiasm/assembly/hifiasm_ont/test_ont_hifiasm.ovlp.source.bin", + "test_ont_hifiasm/assembly/hifiasm_ont/test_ont_hifiasm.stderr.log", + "test_ont_hifiasm/assembly/test_ont_hifiasm_assembly.gff3", + "test_ont_hifiasm/assembly/test_ont_hifiasm_assembly.unmapped.txt", + "test_ont_hifiasm/reads", + "test_ont_hifiasm/reads/fastplong", + "test_ont_hifiasm/reads/fastplong/hifi", + "test_ont_hifiasm/reads/fastplong/hifi/test_ont_hifiasm_hifi.fastplong.fastq.gz", + "test_ont_hifiasm/reads/fastplong/hifi/test_ont_hifiasm_hifi.fastplong.html", + "test_ont_hifiasm/reads/fastplong/hifi/test_ont_hifiasm_hifi.fastplong.json", + "test_ont_hifiasm/reads/fastplong/hifi/test_ont_hifiasm_hifi.fastplong.log", + "test_ont_hifiasm/reads/fastplong/ont", + "test_ont_hifiasm/reads/fastplong/ont/test_ont_hifiasm_ont.fastplong.fastq.gz", + "test_ont_hifiasm/reads/fastplong/ont/test_ont_hifiasm_ont.fastplong.html", + "test_ont_hifiasm/reads/fastplong/ont/test_ont_hifiasm_ont.fastplong.json", + "test_ont_hifiasm/reads/fastplong/ont/test_ont_hifiasm_ont.fastplong.log", + "test_ont_hifiasm/scaffold", + "test_ont_hifiasm/scaffold/ragtag", + "test_ont_hifiasm/scaffold/ragtag/test_ont_hifiasm_ragtag.agp", + "test_ont_hifiasm/scaffold/ragtag/test_ont_hifiasm_ragtag.fasta", + "test_ont_hifiasm/scaffold/ragtag/test_ont_hifiasm_ragtag.stats" ], [ - "Col-0_2MB_report.json:md5,25d7ae5780b2f565cb46df7c9e09388a", - "Col-0_2MB_stats.json:md5,d41d8cd98f00b204e9800998ecf8427e", - "Col-0_2MB.params.json:md5,afa91c041bce5e190f4a699d11b69db6", - "Col-0_2MB.bp.hap1.p_ctg.gfa:md5,46ee70869884ad585165bd48081414e9", - "Col-0_2MB.bp.hap2.p_ctg.gfa:md5,7792865547989d6d284f640425c4e36c", - "Col-0_2MB.bp.p_ctg.gfa:md5,8fe65466d76815ffe1663ff6d8f2e8d1", - "Col-0_2MB.bp.p_utg.gfa:md5,ba2c77ebdb2ad3e6060f5574e890c6eb", - "Col-0_2MB.bp.r_utg.gfa:md5,ba2c77ebdb2ad3e6060f5574e890c6eb", - "Col-0_2MB.bp.p_utg.fa.gz:md5,812a3a16dc68bb409deb69f0aef7e6a8", - "Col-0_2MB_assembly_patch.patch.err:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_flye_hifiasm.params.json:md5,afa91c041bce5e190f4a699d11b69db6", + "test_flye_hifiasm.bp.hap1.p_ctg.gfa:md5,46ee70869884ad585165bd48081414e9", + "test_flye_hifiasm.bp.hap2.p_ctg.gfa:md5,7792865547989d6d284f640425c4e36c", + "test_flye_hifiasm.bp.p_ctg.gfa:md5,8fe65466d76815ffe1663ff6d8f2e8d1", + "test_flye_hifiasm.bp.p_utg.gfa:md5,ba2c77ebdb2ad3e6060f5574e890c6eb", + "test_flye_hifiasm.bp.r_utg.gfa:md5,ba2c77ebdb2ad3e6060f5574e890c6eb", + "test_flye_hifiasm_assembly_patch.patch.err:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_flye_hifiasm_assembly_patch.rename.fasta:md5,0515edc4c23258ef17d6ba085a6e4d31", + "test_flye_hifiasm_assembly.unmapped.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_flye_hifiasm_hifi.fastplong.json:md5,0e20fedb8dfe0646232060da502883ee", + "test_flye_hifiasm_ont.fastplong.json:md5,826cc5a321dd130a2f2c53cdb1a345b2", + "test_hifi_flye_hifi.fastplong.json:md5,daad59b32b84b79c29c274ed20f03cce", + "test_hifi_flye_ont.fastplong.json:md5,198eb762846548340557f5f861f10c11", + "test_hifi_hifiasm.bp.hap1.p_ctg.gfa:md5,46ee70869884ad585165bd48081414e9", + "test_hifi_hifiasm.bp.hap2.p_ctg.gfa:md5,7792865547989d6d284f640425c4e36c", + "test_hifi_hifiasm.bp.p_ctg.gfa:md5,8fe65466d76815ffe1663ff6d8f2e8d1", + "test_hifi_hifiasm.bp.p_utg.gfa:md5,ba2c77ebdb2ad3e6060f5574e890c6eb", + "test_hifi_hifiasm.bp.r_utg.gfa:md5,ba2c77ebdb2ad3e6060f5574e890c6eb", + "test_hifi_hifiasm_assembly.unmapped.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_hifi_hifiasm_hifi.fastplong.json:md5,e58f896e9b70344ae4e607324c2c6742", + "test_hifi_hifiasm_ont.fastplong.json:md5,eedbb98c4d3870f38320d75b21c86604", + "test_hifiasm_flye.params.json:md5,54b576cb6d4d27656878a7fd3657bde9", + "test_hifiasm_flye.bp.hap1.p_ctg.gfa:md5,c9a084903ea872a7ac28f3bbd5eee8f3", + "test_hifiasm_flye.bp.hap2.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_hifiasm_flye.bp.p_ctg.gfa:md5,d78f7a647429b254d9d46aab4d1306a0", + "test_hifiasm_flye.bp.p_utg.gfa:md5,e2b6621386edda1636dfb84ddd676ea8", + "test_hifiasm_flye.bp.r_utg.gfa:md5,e2b6621386edda1636dfb84ddd676ea8", + "test_hifiasm_flye_assembly_patch.comps.fasta:md5,6cfb8070823979baece63889e324e246", + "test_hifiasm_flye_assembly_patch.ctg.fasta:md5,6ff15d809eaa0ecf6381400cdd7f0770", + "test_hifiasm_flye_assembly_patch.patch.err:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_hifiasm_flye_assembly_patch.patch.fasta:md5,6ff15d809eaa0ecf6381400cdd7f0770", + "test_hifiasm_flye_assembly_patch.rename.fasta:md5,48208c6df370a225c6f54f378d7a8e39", + "test_hifiasm_flye_assembly.unmapped.txt:md5,e99c39c4afff7b433fb3973359dae6a4", + "test_hifiasm_flye_hifi.fastplong.json:md5,2671b946d2bf533341b86a9f2424e5c3", + "test_hifiasm_flye_ont.fastplong.json:md5,924d11a05764994135ff5de294052a02", + "test_hifiasm_ul.bp.hap1.p_ctg.gfa:md5,46ee70869884ad585165bd48081414e9", + "test_hifiasm_ul.bp.hap2.p_ctg.gfa:md5,7792865547989d6d284f640425c4e36c", + "test_hifiasm_ul.bp.p_ctg.gfa:md5,8fe65466d76815ffe1663ff6d8f2e8d1", + "test_hifiasm_ul.bp.p_utg.gfa:md5,ba2c77ebdb2ad3e6060f5574e890c6eb", + "test_hifiasm_ul.bp.r_utg.gfa:md5,ba2c77ebdb2ad3e6060f5574e890c6eb", + "test_hifiasm_ul_assembly.unmapped.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_hifiasm_ul_hifi.fastplong.json:md5,f52a2b6d9aa29fbe5749684ee479de90", + "test_hifiasm_ul_ont.fastplong.json:md5,86da6d58ac4630bfeae3ee08014ecb0b", + "test_hifiasm_ul_longstitch.tigmint-ntLink.fa:md5,301136e56bd854239f31e96e32bc49d4", + "test_hifiasm_ul_longstitch.unmapped.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_ont_flye.params.json:md5,afa91c041bce5e190f4a699d11b69db6", + "test_ont_flye_assembly.unmapped.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_ont_flye_hifi.fastplong.json:md5,9205a4cf65cce249429badc208348349", + "test_ont_flye_ont.fastplong.json:md5,e418b2481ade21bc45e77d1437bad913", + "test_ont_hifiasm.bp.hap1.p_ctg.gfa:md5,c9a084903ea872a7ac28f3bbd5eee8f3", + "test_ont_hifiasm.bp.hap2.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_ont_hifiasm.bp.p_ctg.gfa:md5,d78f7a647429b254d9d46aab4d1306a0", + "test_ont_hifiasm.bp.p_utg.gfa:md5,e2b6621386edda1636dfb84ddd676ea8", + "test_ont_hifiasm.bp.r_utg.gfa:md5,e2b6621386edda1636dfb84ddd676ea8", + "test_ont_hifiasm_assembly.unmapped.txt:md5,e99c39c4afff7b433fb3973359dae6a4", + "test_ont_hifiasm_hifi.fastplong.json:md5,91878df7edc2b07a885c103f260fc70e", + "test_ont_hifiasm_ont.fastplong.json:md5,e97714274268b5a30be4d9851f2b61b3", + "test_ont_hifiasm_ragtag.fasta:md5,51f2975257b49657716d2ca7cc6d5fe2", + "test_ont_hifiasm_ragtag.stats:md5,466273b499384b2c781c83dc583b8c99" ] ], + "timestamp": "2026-02-19T10:11:41.761287489", "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" - }, - "timestamp": "2025-07-02T11:25:42.487154678" + "nf-test": "0.9.4", + "nextflow": "25.10.0" + } } } \ No newline at end of file diff --git a/workflows/genomeassembler.nf b/workflows/genomeassembler.nf index 48b2d6db..9301c07f 100644 --- a/workflows/genomeassembler.nf +++ b/workflows/genomeassembler.nf @@ -3,153 +3,158 @@ IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { paramsSummaryMap } from 'plugin/nf-schema' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_genomeassembler_pipeline' -// Read preparation -include { PREPARE_ONT } from '../subworkflows/local/prepare_ont/main' -include { PREPARE_HIFI } from '../subworkflows/local/prepare_hifi/main' -include { PREPARE_SHORTREADS } from '../subworkflows/local/prepare_shortreads/main' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_genomeassembler_pipeline' -// Read checks -include { ONT } from '../subworkflows/local/ont/main' -include { HIFI } from '../subworkflows/local/hifi/main' +// Read preparation +include { PREPARE } from '../subworkflows/local/prepare/main' // Assembly -include { ASSEMBLE } from '../subworkflows/local/assemble/main' +include { ASSEMBLE } from '../subworkflows/local/assemble/main' // Polishing -include { POLISH } from '../subworkflows/local/polishing/main' - +include { POLISH } from '../subworkflows/local/polishing/main' // Scaffolding -include { SCAFFOLD } from '../subworkflows/local/scaffolding/main' +include { SCAFFOLD } from '../subworkflows/local/scaffold/main' + // reporting -include { REPORT } from '../modules/local/report/main' +include { REPORT } from '../modules/local/report/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN MAIN WORKFLOW + MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ workflow GENOMEASSEMBLER { take: ch_input - ch_refs main: // Initialize empty channels - Channel.empty().set { ch_ref_bam } - Channel.empty().set { ch_polished_genome } - Channel.empty().set { ch_ont_reads } - Channel.empty().set { ch_hifi_reads } - Channel.empty().set { ch_shortreads } - Channel.empty().set { meryl_kmers } - Channel.empty().set { genome_size } - Channel.empty().set { ch_versions } + ch_input.set { ch_main } + + /* + This pipeline uses a "meta-stuffing" appraoch. All information + about a sample is always stored in a map stored in [0]/"meta". + Values are extracted from the map to create input channels. + The correspoding key is created or updated from outputs. + This largely eliminates the need for joins. + + The initial keys are defined in + ./subworkflows/local/utils_nfcore_genomeassembler/main.nf + */ + channel.empty().set { meryl_kmers } + // Initialize channels for QC report collection - Channel + channel .of([]) .tap { quast_files } - .tap { nanoq_files } + .tap { fastplong_jsons } .tap { genomescope_files } .map { it -> ["dummy", it] } .tap { busco_files } .map { it -> [it[0], it[1], it[1], it[1], it[1]] } .tap { merqury_files } - /* - ============= - Some checks - ============= - */ - if (!params.ont && !params.hifi) { - error('At least one of params.ont, params.hifi needs to be true.') - } + /* ============= Prepare reads ============= */ - /* - Short reads - */ - if (params.short_reads) { - PREPARE_SHORTREADS(ch_input) - PREPARE_SHORTREADS.out.shortreads.set { ch_shortreads } - PREPARE_SHORTREADS.out.meryl_kmers.set { meryl_kmers } - ch_versions = ch_versions.mix(PREPARE_SHORTREADS.out.versions) - } + PREPARE(ch_main) - ch_input.map { it -> [it.meta, params.genome_size] } - .set { genome_size } - - /* - ONT reads - */ - if (params.ont) { - ONT(ch_input, genome_size) - ONT.out.genome_size.set { genome_size } - ONT.out.ont_reads.set { ch_ont_reads } - - ONT.out.nanoq_report - .concat( - ONT.out.nanoq_stats - ) - .collect { it -> it[1] } - .set { nanoq_files } - ONT.out.genomescope_summary - .concat( - ONT.out.genomescope_plot - ) - .unique() - .collect { it -> it[1] } - .set { genomescope_files } - - ch_versions = ch_versions.mix(ONT.out.versions) - } + PREPARE.out.ch_main.set { ch_main_prepared } + PREPARE.out.meryl_kmers.set { meryl_kmers } /* - HIFI reads + Assembly */ - if (params.hifi) { - HIFI(ch_input) - HIFI.out.hifi_reads.set { ch_hifi_reads } + // This pipeline is named genomeassembler, so everything goes into assemble + // even it might not actually be assembled. - ch_versions = ch_versions.mix(HIFI.out.versions) - } + ASSEMBLE(ch_main_prepared, meryl_kmers) - /* - Assembly - */ + ASSEMBLE.out.ch_main.set { ch_main_assembled } - ASSEMBLE(ch_ont_reads, ch_hifi_reads, ch_input, genome_size, meryl_kmers) - ASSEMBLE.out.assembly.set { ch_polished_genome } - ASSEMBLE.out.ref_bam.set { ch_ref_bam } - ASSEMBLE.out.longreads.set { ch_longreads } - ch_versions = ch_versions.mix(ASSEMBLE.out.versions) /* Polishing */ + ch_main_assembled + .branch { + it -> + def polishers = ["pilon", "medaka", "medaka+pilon", "dorado", "dorado+pilon"] + polish: polishers.contains(it.meta.polish) + no_polish: true + } + .set { ch_main_assembled_branched } - POLISH(ch_input, ch_ont_reads, ch_longreads, ch_shortreads, ch_polished_genome, ch_ref_bam, meryl_kmers) - POLISH.out.ch_polished_genome.set { ch_polished_genome } + POLISH(ch_main_assembled_branched.polish, meryl_kmers) - ch_versions = ch_versions.mix(POLISH.out.versions) + ch_main_assembled_branched.no_polish + .mix(POLISH.out.ch_main) + .set { ch_main_polished } + // Update scaffold for meta map + ch_main_polished + .branch { it -> + scaffold: it.meta.scaffold_links || it.meta.scaffold_longstitch || it.meta.scaffold_ragtag + no_scaffold: !it.meta.scaffold_links && !it.meta.scaffold_longstitch && !it.meta.scaffold_ragtag + } + .set { + ch_main_polished_branched + } /* Scaffolding */ - SCAFFOLD(ch_input, ch_longreads, ch_polished_genome, ch_refs, ch_ref_bam, meryl_kmers, genome_size) + SCAFFOLD(ch_main_polished_branched.scaffold, meryl_kmers) + + // Recreate ch_main, even though it is not used since there are no later steps. - ch_versions = ch_versions.mix(SCAFFOLD.out.versions) + ch_main_polished_branched + .no_scaffold + .mix(SCAFFOLD.out.ch_main) + .set { ch_main_scaffolded } + PREPARE.out.fastplong_json_reports + .map { it -> it[1] } + .unique() + .collect() + .set { fastplong_jsons } + + PREPARE.out.genomescope_summary + .concat( + PREPARE.out.genomescope_plot + ) + .unique() + .collect { it -> it[1] } + .set { genomescope_files } + + def topic_versions = channel.topic("versions") + .distinct() + .branch { entry -> + versions_file: entry instanceof Path + versions_tuple: true + } + + def topic_versions_string = topic_versions.versions_tuple + .map { process, tool, version -> + [ process[process.lastIndexOf(':')+1..-1], " ${tool}: ${version}" ] + } + .groupTuple(by:0) + .map { process, tool_versions -> + tool_versions.unique().sort() + "${process}:\n${tool_versions.join('\n')}" + } + ch_collated_versions = topic_versions_string /* Report */ - softwareVersionsToYAML(ch_versions) + ch_collated_versions .collectFile( storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_' + 'pipeline_software_' + 'versions.yml', @@ -158,10 +163,12 @@ workflow GENOMEASSEMBLER { ) quast_files - .concat( - ASSEMBLE.out.assembly_quast_reports.concat( + .mix( + ASSEMBLE.out.assembly_quast_reports + .mix( POLISH.out.polish_quast_reports - ).concat( + ) + .mix( SCAFFOLD.out.scaffold_quast_reports ) ) @@ -170,10 +177,12 @@ workflow GENOMEASSEMBLER { .set { quast_files } busco_files - .concat( - ASSEMBLE.out.assembly_busco_reports.concat( + .mix( + ASSEMBLE.out.assembly_busco_reports + .mix( POLISH.out.polish_busco_reports - ).concat( + ) + .mix( SCAFFOLD.out.scaffold_busco_reports ) ) @@ -182,10 +191,12 @@ workflow GENOMEASSEMBLER { .set { busco_files } merqury_files - .concat( - ASSEMBLE.out.assembly_merqury_reports.concat( + .mix( + ASSEMBLE.out.assembly_merqury_reports + .mix( POLISH.out.polish_merqury_reports - ).concat( + ) + .mix( SCAFFOLD.out.scaffold_merqury_reports ) ) @@ -195,54 +206,34 @@ workflow GENOMEASSEMBLER { .collect() .set { merqury_files } - Channel + channel .fromPath("${projectDir}/assets/report/*") .collect() .set { report_files } // Report files - Channel + channel .fromPath("${projectDir}/assets/report/functions/*") .collect() .set { report_functions } - - if(!params.merqury) { - merqury_files = Channel.of([]) - } - - REPORT(report_files, report_functions, nanoq_files, genomescope_files, quast_files, busco_files, merqury_files, Channel.fromPath("${params.outdir}/pipeline_info/nf_core_pipeline_software_versions.yml")) - - // - // Collate and save software versions - // - def topic_versions = Channel.topic("versions") - .distinct() - .branch { entry -> - versions_file: entry instanceof Path - versions_tuple: true - } - - def topic_versions_string = topic_versions.versions_tuple - .map { process, tool, version -> - [ process[process.lastIndexOf(':')+1..-1], " ${tool}: ${version}" ] - } - .groupTuple(by:0) - .map { process, tool_versions -> - tool_versions.unique().sort() - "${process}:\n${tool_versions.join('\n')}" - } - - softwareVersionsToYAML(ch_versions.mix(topic_versions.versions_file)) - .mix(topic_versions_string) - .collectFile( - storeDir: "${params.outdir}/pipeline_info", - name: 'nf_core_' + 'genomeassembler_software_' + 'versions.yml', - sort: true, - newLine: true - ) + channel + .fromPath("${projectDir}/assets/report/scripts/*") + .collect() + .set { report_scripts } + + REPORT( report_files, + report_functions, + report_scripts, + fastplong_jsons, + genomescope_files, + quast_files, + busco_files, + merqury_files, + channel.fromPath("${params.outdir}/pipeline_info/nf_core_pipeline_software_versions.yml"), + ch_main.map { it -> [sample: [id: it.meta.id, group: it.meta.group]] }.collect() + ) _report = REPORT.out.report_html.toList() emit: _report - versions = ch_versions // channel: [ path(versions.yml) ] }