diff --git a/.editorconfig b/.editorconfig deleted file mode 100644 index 6d9b74cc..00000000 --- a/.editorconfig +++ /dev/null @@ -1,37 +0,0 @@ -root = true - -[*] -charset = utf-8 -end_of_line = lf -insert_final_newline = true -trim_trailing_whitespace = true -indent_size = 4 -indent_style = space - -[*.{md,yml,yaml,html,css,scss,js}] -indent_size = 2 - -# These files are edited and tested upstream in nf-core/modules -[/modules/nf-core/**] -charset = unset -end_of_line = unset -insert_final_newline = unset -trim_trailing_whitespace = unset -indent_style = unset -[/subworkflows/nf-core/**] -charset = unset -end_of_line = unset -insert_final_newline = unset -trim_trailing_whitespace = unset -indent_style = unset - -[/assets/email*] -indent_size = unset - -# ignore python and markdown -[*.{py,md}] -indent_style = unset - -# ignore ro-crate metadata files -[**/ro-crate-metadata.json] -insert_final_newline = unset diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index c2103669..e01c8db1 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -78,7 +78,7 @@ If you wish to contribute a new step, please use the following coding standards: 5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core pipelines schema build` tool). 6. Add sanity checks and validation for all relevant parameters. 7. Perform local tests to validate that the new code works as expected. -8. If applicable, add a new test command in `.github/workflow/ci.yml`. +8. If applicable, add a new test in the `tests` directory. ### Default values diff --git a/.github/actions/get-shards/action.yml b/.github/actions/get-shards/action.yml new file mode 100644 index 00000000..34085279 --- /dev/null +++ b/.github/actions/get-shards/action.yml @@ -0,0 +1,69 @@ +name: "Get number of shards" +description: "Get the number of nf-test shards for the current CI job" +inputs: + max_shards: + description: "Maximum number of shards allowed" + required: true + paths: + description: "Component paths to test" + required: false + tags: + description: "Tags to pass as argument for nf-test --tag parameter" + required: false +outputs: + shard: + description: "Array of shard numbers" + value: ${{ steps.shards.outputs.shard }} + total_shards: + description: "Total number of shards" + value: ${{ steps.shards.outputs.total_shards }} +runs: + using: "composite" + steps: + - name: Install nf-test + uses: nf-core/setup-nf-test@v1 + with: + version: ${{ env.NFT_VER }} + - name: Get number of shards + id: shards + shell: bash + run: | + # Run nf-test with dynamic parameter + nftest_output=$(nf-test test \ + --profile +docker \ + $(if [ -n "${{ inputs.tags }}" ]; then echo "--tag ${{ inputs.tags }}"; fi) \ + --dry-run \ + --ci \ + --changed-since HEAD^) || { + echo "nf-test command failed with exit code $?" + echo "Full output: $nftest_output" + exit 1 + } + echo "nf-test dry-run output: $nftest_output" + + # Default values for shard and total_shards + shard="[]" + total_shards=0 + + # Check if there are related tests + if echo "$nftest_output" | grep -q 'No tests to execute'; then + echo "No related tests found." + else + # Extract the number of related tests + number_of_shards=$(echo "$nftest_output" | sed -n 's|.*Executed \([0-9]*\) tests.*|\1|p') + if [[ -n "$number_of_shards" && "$number_of_shards" -gt 0 ]]; then + shards_to_run=$(( $number_of_shards < ${{ inputs.max_shards }} ? $number_of_shards : ${{ inputs.max_shards }} )) + shard=$(seq 1 "$shards_to_run" | jq -R . | jq -c -s .) + total_shards="$shards_to_run" + else + echo "Unexpected output format. Falling back to default values." + fi + fi + + # Write to GitHub Actions outputs + echo "shard=$shard" >> $GITHUB_OUTPUT + echo "total_shards=$total_shards" >> $GITHUB_OUTPUT + + # Debugging output + echo "Final shard array: $shard" + echo "Total number of shards: $total_shards" diff --git a/.github/actions/nf-test/action.yml b/.github/actions/nf-test/action.yml new file mode 100644 index 00000000..bf44d961 --- /dev/null +++ b/.github/actions/nf-test/action.yml @@ -0,0 +1,109 @@ +name: "nf-test Action" +description: "Runs nf-test with common setup steps" +inputs: + profile: + description: "Profile to use" + required: true + shard: + description: "Shard number for this CI job" + required: true + total_shards: + description: "Total number of test shards(NOT the total number of matrix jobs)" + required: true + paths: + description: "Test paths" + required: true + tags: + description: "Tags to pass as argument for nf-test --tag parameter" + required: false +runs: + using: "composite" + steps: + - name: Setup Nextflow + uses: nf-core/setup-nextflow@v2 + with: + version: "${{ env.NXF_VERSION }}" + + - name: Set up Python + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + with: + python-version: "3.13" + + - name: Install nf-test + uses: nf-core/setup-nf-test@v1 + with: + version: "${{ env.NFT_VER }}" + install-pdiff: true + + - name: Setup apptainer + if: contains(inputs.profile, 'singularity') + uses: eWaterCycle/setup-apptainer@main + + - name: Set up Singularity + if: contains(inputs.profile, 'singularity') + shell: bash + run: | + mkdir -p $NXF_SINGULARITY_CACHEDIR + mkdir -p $NXF_SINGULARITY_LIBRARYDIR + + - name: Conda setup + if: contains(inputs.profile, 'conda') + uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3 + with: + auto-update-conda: true + conda-solver: libmamba + conda-remove-defaults: true + + - name: Run nf-test + shell: bash + env: + NFT_WORKDIR: ${{ env.NFT_WORKDIR }} + run: | + nf-test test \ + --profile=+${{ inputs.profile }} \ + $(if [ -n "${{ inputs.tags }}" ]; then echo "--tag ${{ inputs.tags }}"; fi) \ + --ci \ + --changed-since HEAD^ \ + --verbose \ + --tap=test.tap \ + --shard ${{ inputs.shard }}/${{ inputs.total_shards }} + + # Save the absolute path of the test.tap file to the output + echo "tap_file_path=$(realpath test.tap)" >> $GITHUB_OUTPUT + + - name: Generate test summary + if: always() + shell: bash + run: | + # Add header if it doesn't exist (using a token file to track this) + if [ ! -f ".summary_header" ]; then + echo "# 🚀 nf-test results" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "| Status | Test Name | Profile | Shard |" >> $GITHUB_STEP_SUMMARY + echo "|:------:|-----------|---------|-------|" >> $GITHUB_STEP_SUMMARY + touch .summary_header + fi + + if [ -f test.tap ]; then + while IFS= read -r line; do + if [[ $line =~ ^ok ]]; then + test_name="${line#ok }" + # Remove the test number from the beginning + test_name="${test_name#* }" + echo "| ✅ | ${test_name} | ${{ inputs.profile }} | ${{ inputs.shard }}/${{ inputs.total_shards }} |" >> $GITHUB_STEP_SUMMARY + elif [[ $line =~ ^not\ ok ]]; then + test_name="${line#not ok }" + # Remove the test number from the beginning + test_name="${test_name#* }" + echo "| ❌ | ${test_name} | ${{ inputs.profile }} | ${{ inputs.shard }}/${{ inputs.total_shards }} |" >> $GITHUB_STEP_SUMMARY + fi + done < test.tap + else + echo "| ⚠️ | No test results found | ${{ inputs.profile }} | ${{ inputs.shard }}/${{ inputs.total_shards }} |" >> $GITHUB_STEP_SUMMARY + fi + + - name: Clean up + if: always() + shell: bash + run: | + sudo rm -rf /home/ubuntu/tests/ diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 8dc44375..998de5b9 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -4,43 +4,23 @@ name: nf-core AWS full size tests # It runs the -profile 'test_full' on AWS batch on: - pull_request: - branches: - - main - - master workflow_dispatch: pull_request_review: types: [submitted] + release: + types: [published] + jobs: run-platform: name: Run AWS full tests - # run only if the PR is approved by at least 2 reviewers and against the master branch or manually triggered - if: github.repository == 'nf-core/genomeassembler' && github.event.review.state == 'approved' && github.event.pull_request.base.ref == 'master' || github.event_name == 'workflow_dispatch' + # run only if the PR is approved by at least 2 reviewers and against the master/main branch or manually triggered + if: github.repository == 'nf-core/genomeassembler' && github.event.review.state == 'approved' && (github.event.pull_request.base.ref == 'master' || github.event.pull_request.base.ref == 'main') || github.event_name == 'workflow_dispatch' || github.event_name == 'release' runs-on: ubuntu-latest steps: - - name: Get PR reviews - uses: octokit/request-action@v2.x - if: github.event_name != 'workflow_dispatch' - id: check_approvals - continue-on-error: true - with: - route: GET /repos/${{ github.repository }}/pulls/${{ github.event.pull_request.number }}/reviews?per_page=100 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - name: Check for approvals - if: ${{ failure() && github.event_name != 'workflow_dispatch' }} - run: | - echo "No review approvals found. At least 2 approvals are required to run this action automatically." - exit 1 - - - name: Check for enough approvals (>=2) - id: test_variables - if: github.event_name != 'workflow_dispatch' + - name: Set revision variable + id: revision run: | - JSON_RESPONSE='${{ steps.check_approvals.outputs.data }}' - CURRENT_APPROVALS_COUNT=$(echo $JSON_RESPONSE | jq -c '[.[] | select(.state | contains("APPROVED")) ] | length') - test $CURRENT_APPROVALS_COUNT -ge 2 || exit 1 # At least 2 approvals are required + echo "revision=${{ (github.event_name == 'workflow_dispatch' || github.event_name == 'release') && github.sha || 'dev' }}" >> "$GITHUB_OUTPUT" - name: Launch workflow via Seqera Platform uses: seqeralabs/action-tower-launch@v2 @@ -51,16 +31,16 @@ jobs: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - revision: ${{ github.sha }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/genomeassembler/work-${{ github.sha }} + revision: ${{ steps.revision.outputs.revision }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/genomeassembler/work-${{ steps.revision.outputs.revision }} parameters: | { "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/genomeassembler/results-${{ github.sha }}" + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/genomeassembler/results-${{ steps.revision.outputs.revision }}" } profiles: test_full - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: Seqera Platform debug log file path: | diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 1e25cad0..9e625149 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -25,7 +25,7 @@ jobs: } profiles: test - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: Seqera Platform debug log file path: | diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a268e192..9a5a5254 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,7 +27,7 @@ jobs: strategy: matrix: NXF_VER: - - "24.04.2" + - "24.10.5" - "latest-everything" ASSEMBLER: - "hifi_flye" @@ -35,7 +35,8 @@ jobs: - "ont_flye" - "ont_hifiasm" - "hifiont_hifiasm" - - "hifiont_flyehifiasm" + - "hifiont_flye_on_hifiasm" + - "hifiont_hifiasm_on_hifiasm" profile: - "conda" - "docker" @@ -85,9 +86,10 @@ jobs: echo $(realpath $CONDA)/condabin >> $GITHUB_PATH echo $(realpath python) >> $GITHUB_PATH - - name: "Run pipeline with test data ${{ matrix.ASSEMBLER }}" + - name: "Run pipeline with test data ${{ matrix.NXF_VER }} | ${{ matrix.ASSEMBLER }} | ${{ matrix.profile }}" + continue-on-error: ${{ matrix.NXF_VER == 'latest-everything' }} run: | - nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.ASSEMBLER }},test,${{matrix.profile}} --outdir ./results_${{matrix.profile}}_${{ matrix.ASSEMBLER }} + nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.ASSEMBLER }},test,${{matrix.profile}} --outdir ./results_${{matrix.profile}}_${{ matrix.ASSEMBLER }} - name: Clean up Disk space uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml index 0b6b1f27..ac030fd5 100644 --- a/.github/workflows/clean-up.yml +++ b/.github/workflows/clean-up.yml @@ -10,7 +10,7 @@ jobs: issues: write pull-requests: write steps: - - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 + - uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9 with: stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml index ab06316e..999bcc38 100644 --- a/.github/workflows/download_pipeline.yml +++ b/.github/workflows/download_pipeline.yml @@ -12,14 +12,6 @@ on: required: true default: "dev" pull_request: - types: - - opened - - edited - - synchronize - branches: - - main - - master - pull_request_target: branches: - main - master @@ -52,9 +44,9 @@ jobs: - name: Disk space cleanup uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: - python-version: "3.12" + python-version: "3.13" architecture: "x64" - name: Setup Apptainer @@ -120,6 +112,7 @@ jobs: echo "IMAGE_COUNT_AFTER=$image_count" >> "$GITHUB_OUTPUT" - name: Compare container image counts + id: count_comparison run: | if [ "${{ steps.count_initial.outputs.IMAGE_COUNT_INITIAL }}" -ne "${{ steps.count_afterwards.outputs.IMAGE_COUNT_AFTER }}" ]; then initial_count=${{ steps.count_initial.outputs.IMAGE_COUNT_INITIAL }} @@ -132,3 +125,10 @@ jobs: else echo "The pipeline can be downloaded successfully!" fi + + - name: Upload Nextflow logfile for debugging purposes + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + with: + name: nextflow_logfile.txt + path: .nextflow.log* + include-hidden-files: true diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix_linting.yml similarity index 96% rename from .github/workflows/fix-linting.yml rename to .github/workflows/fix_linting.yml index aff1a510..52d91ad3 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix_linting.yml @@ -32,9 +32,9 @@ jobs: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} # Install and run pre-commit - - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: - python-version: "3.12" + python-version: "3.13" - name: Install pre-commit run: pip install pre-commit diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index dbd52d5a..8b0f88c3 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -3,9 +3,6 @@ name: nf-core linting # It runs the `nf-core pipelines lint` and markdown lint tests to ensure # that the code meets the nf-core guidelines. on: - push: - branches: - - dev pull_request: release: types: [published] @@ -16,10 +13,10 @@ jobs: steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - - name: Set up Python 3.12 - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 + - name: Set up Python 3.13 + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: - python-version: "3.12" + python-version: "3.13" - name: Install pre-commit run: pip install pre-commit @@ -36,13 +33,13 @@ jobs: - name: Install Nextflow uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: - python-version: "3.12" + python-version: "3.13" architecture: "x64" - name: read .nf-core.yml - uses: pietrobolcato/action-read-yaml@1.1.0 + uses: pietrobolcato/action-read-yaml@9f13718d61111b69f30ab4ac683e67a56d254e1d # 1.1.0 id: read_yml with: config: ${{ github.workspace }}/.nf-core.yml @@ -74,7 +71,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 95b6b6af..d43797d9 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@20319c5641d495c8a52e688b7dc5fada6c3a9fbc # v8 + uses: dawidd6/action-download-artifact@ac66b43f0e6a346234dd65d4d0c8fbb31cb316e5 # v11 with: workflow: linting.yml workflow_conclusion: completed @@ -21,7 +21,7 @@ jobs: run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 + uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml new file mode 100644 index 00000000..e7b58449 --- /dev/null +++ b/.github/workflows/nf-test.yml @@ -0,0 +1,143 @@ +name: Run nf-test +on: + pull_request: + paths-ignore: + - "docs/**" + - "**/meta.yml" + - "**/*.md" + - "**/*.png" + - "**/*.svg" + release: + types: [published] + workflow_dispatch: + +# Cancel if a newer run is started +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + NFT_VER: "0.9.2" + NFT_WORKDIR: "~" + NXF_ANSI_LOG: false + NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity + NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity + +jobs: + nf-test-changes: + name: nf-test-changes + runs-on: # use self-hosted runners + - runs-on=${{ github.run_id }}-nf-test-changes + - runner=4cpu-linux-x64 + outputs: + shard: ${{ steps.set-shards.outputs.shard }} + total_shards: ${{ steps.set-shards.outputs.total_shards }} + steps: + - name: Clean Workspace # Purge the workspace in case it's running on a self-hosted runner + run: | + ls -la ./ + rm -rf ./* || true + rm -rf ./.??* || true + ls -la ./ + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + with: + fetch-depth: 0 + + - name: get number of shards + id: set-shards + uses: ./.github/actions/get-shards + env: + NFT_VER: ${{ env.NFT_VER }} + with: + max_shards: 7 + + - name: debug + run: | + echo ${{ steps.set-shards.outputs.shard }} + echo ${{ steps.set-shards.outputs.total_shards }} + + nf-test: + name: "${{ matrix.profile }} | ${{ matrix.NXF_VER }} | ${{ matrix.shard }}/${{ needs.nf-test-changes.outputs.total_shards }}" + needs: [nf-test-changes] + if: ${{ needs.nf-test-changes.outputs.total_shards != '0' }} + runs-on: # use self-hosted runners + - runs-on=${{ github.run_id }}-nf-test + - runner=4cpu-linux-x64 + strategy: + fail-fast: false + matrix: + shard: ${{ fromJson(needs.nf-test-changes.outputs.shard) }} + profile: [conda, docker, singularity] + isMain: + - ${{ github.base_ref == 'master' || github.base_ref == 'main' }} + # Exclude conda and singularity on dev + exclude: + - isMain: false + profile: "conda" + - isMain: false + profile: "singularity" + NXF_VER: + - "24.10.5" + - "latest-everything" + env: + NXF_ANSI_LOG: false + TOTAL_SHARDS: ${{ needs.nf-test-changes.outputs.total_shards }} + + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + with: + fetch-depth: 0 + + - name: Run nf-test + id: run_nf_test + uses: ./.github/actions/nf-test + continue-on-error: ${{ matrix.NXF_VER == 'latest-everything' }} + env: + NFT_WORKDIR: ${{ env.NFT_WORKDIR }} + with: + profile: ${{ matrix.profile }} + shard: ${{ matrix.shard }} + total_shards: ${{ env.TOTAL_SHARDS }} + + - name: Report test status + if: ${{ always() }} + run: | + if [[ "${{ steps.run_nf_test.outcome }}" == "failure" ]]; then + echo "::error::Test with ${{ matrix.NXF_VER }} failed" + # Add to workflow summary + echo "## ❌ Test failed: ${{ matrix.profile }} | ${{ matrix.NXF_VER }} | Shard ${{ matrix.shard }}/${{ env.TOTAL_SHARDS }}" >> $GITHUB_STEP_SUMMARY + if [[ "${{ matrix.NXF_VER }}" == "latest-everything" ]]; then + echo "::warning::Test with latest-everything failed but will not cause workflow failure. Please check if the error is expected or if it needs fixing." + fi + if [[ "${{ matrix.NXF_VER }}" != "latest-everything" ]]; then + exit 1 + fi + fi + + confirm-pass: + needs: [nf-test] + if: always() + runs-on: # use self-hosted runners + - runs-on=${{ github.run_id }}-confirm-pass + - runner=2cpu-linux-x64 + steps: + - name: One or more tests failed (excluding latest-everything) + if: ${{ contains(needs.*.result, 'failure') }} + run: exit 1 + + - name: One or more tests cancelled + if: ${{ contains(needs.*.result, 'cancelled') }} + run: exit 1 + + - name: All tests ok + if: ${{ contains(needs.*.result, 'success') }} + run: exit 0 + + - name: debug-print + if: always() + run: | + echo "::group::DEBUG: `needs` Contents" + echo "DEBUG: toJSON(needs) = ${{ toJSON(needs) }}" + echo "DEBUG: toJSON(needs.*.result) = ${{ toJSON(needs.*.result) }}" + echo "::endgroup::" diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml index 76a9e67e..0f732495 100644 --- a/.github/workflows/release-announcements.yml +++ b/.github/workflows/release-announcements.yml @@ -30,7 +30,7 @@ jobs: bsky-post: runs-on: ubuntu-latest steps: - - uses: zentered/bluesky-post-action@80dbe0a7697de18c15ad22f4619919ceb5ccf597 # v0.1.0 + - uses: zentered/bluesky-post-action@6461056ea355ea43b977e149f7bf76aaa572e5e8 # v0.3.0 with: post: | Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! diff --git a/.github/workflows/template_version_comment.yml b/.github/workflows/template-version-comment.yml similarity index 95% rename from .github/workflows/template_version_comment.yml rename to .github/workflows/template-version-comment.yml index 537529bc..beb5c77f 100644 --- a/.github/workflows/template_version_comment.yml +++ b/.github/workflows/template-version-comment.yml @@ -14,7 +14,7 @@ jobs: ref: ${{ github.event.pull_request.head.sha }} - name: Read template version from .nf-core.yml - uses: nichmor/minimal-read-yaml@v0.0.2 + uses: nichmor/minimal-read-yaml@1f7205277e25e156e1f63815781db80a6d490b8f # v0.0.2 id: read_yml with: config: ${{ github.workspace }}/.nf-core.yml diff --git a/.gitignore b/.gitignore index a42ce016..f232546a 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,5 @@ testing/ testing* *.pyc null/ +.nf-test/ +.nf-test.log diff --git a/.nf-core.yml b/.nf-core.yml index f7103eb4..377951a7 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,13 +1,13 @@ lint: + multiqc_config: false files_exist: - conf/igenomes.config - conf/igenomes_ignored.config - - assets/multiqc_config.yml - assets/sendmail_template.txt + - assets/multiqc_config.yml files_unchanged: - .github/CONTRIBUTING.md - multiqc_config: false -nf_core_version: 3.2.0 +nf_core_version: 3.3.2 repository_type: pipeline template: author: Niklas Schandry @@ -20,4 +20,4 @@ template: skip_features: - multiqc - igenomes - version: 1.0.1 + version: 1.1.0 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1dec8650..402f0a1c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,10 +4,26 @@ repos: hooks: - id: prettier additional_dependencies: - - prettier@3.2.5 - - - repo: https://github.com/editorconfig-checker/editorconfig-checker.python - rev: "3.1.2" + - prettier@3.6.2 + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 hooks: - - id: editorconfig-checker - alias: ec + - id: trailing-whitespace + args: [--markdown-linebreak-ext=md] + exclude: | + (?x)^( + .*ro-crate-metadata.json$| + modules/nf-core/.*| + subworkflows/nf-core/.*| + .*\.snap$| + .*svg$ + )$ + - id: end-of-file-fixer + exclude: | + (?x)^( + .*ro-crate-metadata.json$| + modules/nf-core/.*| + subworkflows/nf-core/.*| + .*\.snap$| + .*svg$ + )$ diff --git a/.prettierignore b/.prettierignore index edd29f01..105b03c5 100644 --- a/.prettierignore +++ b/.prettierignore @@ -11,3 +11,5 @@ testing* *.pyc bin/ ro-crate-metadata.json +*.svg +tests/ diff --git a/.prettierrc.yml b/.prettierrc.yml index c81f9a76..07dbd8bb 100644 --- a/.prettierrc.yml +++ b/.prettierrc.yml @@ -1 +1,6 @@ printWidth: 120 +tabWidth: 4 +overrides: + - files: "*.{md,yml,yaml,html,css,scss,js,cff}" + options: + tabWidth: 2 diff --git a/CHANGELOG.md b/CHANGELOG.md index b6879797..5de49257 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,49 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.0.1 'Aluminium Pigeon' - [2025-03.18] +## v1.1.0 'Brass Pigeon' - [2025-07-21] + +### `Added` + +[#170](https://github.com/nf-core/genomeassembler/issues/170) - Switched to nf-core template 3.3.2 + +[#164](https://github.com/nf-core/genomeassembler/issues/164) - Switched to nf-core template 3.3.1 + +[#153](https://github.com/nf-core/genomeassembler/issues/153) - Switched to nf-core template 3.2.1 + +[#144](https://github.com/nf-core/genomeassembler/issues/144) - Added `hifiasm_on_hifiasm` assembly strategy + +[#158](https://github.com/nf-core/genomeassembler/pull/158) - Added tables for QUAST and BUSCO to report, (using `gt`, added `gt` to container and env) + +### `Fixed` + +[#169](https://github.com/nf-core/genomeassembler/pull/169) - Module mainencance: gfa2fa container and conda env now report the same version of `mawk`. + +[#154](https://github.com/nf-core/genomeassembler/pull/154) - Module maintenance: + +- updated `hifiasm`, `minimap2`, `links` nf-core modules +- updated container in local `quast` module +- separated `modules.config` into several files for easier navigation and maintenance + +[#138](https://github.com/nf-core/genomeassembler/pull/138) - Switched to RagTag nf-core module + +[#142](https://github.com/nf-core/genomeassembler/pull/142) - Switch `--collect` to accept a glob pattern instead of a folder, consistent with input validation. + +[#131](https://github.com/nf-core/genomeassembler/pull/131) - Refactored QC steps into subworkflow. + +[#133](https://github.com/nf-core/genomeassembler/pull/133) - Updated the input validation to be more strict. This should prevent some down the line errors in the pipeline + +[#136](https://github.com/nf-core/genomeassembler/pull/136) - Switched to using ragtag `patch` instead of `scaffold` for `flye_on_hifiasm` + +[#145](https://github.com/nf-core/genomeassembler/pull/145) - Fixed `--skip_assembly` input validation bug. + +[#148](https://github.com/nf-core/genomeassembler/pull/148) - Switched to LINKS nf-core module + +### `Dependencies` + +### `Deprecated` + +## v1.0.1 'Aluminium Pigeon' - [2025-03-19] Bugfix release diff --git a/README.md b/README.md index 932cdde3..5719717b 100644 --- a/README.md +++ b/README.md @@ -9,13 +9,15 @@ [![GitHub Actions Linting Status](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/genomeassembler/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.14986998-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.14986998) [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/) +[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/genomeassembler) -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23genomeassembler-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/genomeassembler)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23genomeassembler-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/genomeassembler)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23genomeassembler-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/genomeassembler)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction diff --git a/assets/report/report.qmd b/assets/report/report.qmd index 1373b025..16cb578c 100644 --- a/assets/report/report.qmd +++ b/assets/report/report.qmd @@ -1,6 +1,6 @@ --- title: "nf-core/genomeassembler report" -author: "Niklas Schandry" +author: "" format: dashboard editor: source nav-buttons: @@ -316,7 +316,31 @@ for (i in 1:length(unique(quast_stats$sample))) { paste0('\n'), paste0(':::'), paste0('\n\n'), - paste0('### Plots \n\n'), + paste0('### {.tabset}'), + paste0('\n\n'), + paste0('#### Tables \n\n'), + quast_stats %>% + filter(sample == unique(quast_stats$sample)[i]) %>% + dplyr::select(sample, stage, stat, value) %>% + pivot_wider(names_from = "stat", values_from = "value") %>% + #knitr::kable(format = 'html', caption = glue::glue('QUAST statistics')) + gt::gt() %>% + gt::cols_nanoplot(columns = starts_with("# contigs ("), + new_col_name = "Contigs_by_size", + new_col_label = gt::md("*# Contigs by size*")) %>% + gt::cols_nanoplot(columns = starts_with("Total length ("), + new_col_name = "Total_length", + new_col_label = gt::md("*Total length*")) %>% + gt::tab_footnote( + footnote = "Breaks are: contigs >= 0, 1kb, 5kb, 10kb, 25kb, 50kb", + locations = gt::cells_column_labels(columns = c(Contigs_by_size, Total_length))) %>% + gt::cols_align(align = "center", + columns = c(Contigs_by_size, Total_length)) %>% + gt::cols_move(Contigs_by_size, "Largest contig") %>% + gt::cols_move(Total_length, "Total length") %>% + gt::as_raw_html(), + paste0('\n\n'), + paste0('#### Plots \n\n'), knitr::knit_child(glue::glue('quast_files/length/_{ unique(quast_stats$sample)[i] }_quast.Rmd'), envir = globalenv(), quiet = TRUE), @@ -464,7 +488,15 @@ for (i in 1:length(unique(busco_reports$sample))) { paste0('\n'), paste0(':::'), paste0('\n\n'), - paste('### { .flow }'), + paste('###'), + paste0('\n\n'), + busco_reports %>% + filter(sample == cur_sample) %>% + dplyr::select(sample, stage, Var, value) %>% + mutate(Var = str_replace_all(Var, "_", " ")) %>% + pivot_wider(names_from = "Var", values_from = "value") %>% + gt::gt() %>% + gt::as_raw_html(), paste0('\n\n'), knitr::knit_child(glue::glue('busco_files/orthologs/_{ unique(busco_reports$sample)[i] }_orthologs.Rmd'), envir = globalenv(), diff --git a/assets/schema_input.json b/assets/schema_input.json index 7e60400d..0271e948 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -10,51 +10,50 @@ "sample": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces", + "errorMessage": "Sample name must be provided, has to be a string value and cannot contain spaces", "meta": ["id"] }, "ontreads": { "type": "string", "format": "file-path", + "exists": true, "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "ONT reads cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "errorMessage": "ONT reads cannot contain spaces, has to exist and must have extension '.fq.gz' or '.fastq.gz'" }, "hifireads": { "type": "string", "format": "file-path", + "exists": true, "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "HiFi reads cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "errorMessage": "HiFi reads cannot contain spaces, has to exist and must have extension '.fq.gz' or '.fastq.gz'" }, "ref_fasta": { "type": "string", "format": "file-path", - "pattern": "^\\S+\\.f(ast)?a", - "errorMessage": "Reference fasta cannot contain spaces and must have extension '.fa' or '.fasta'" + "exists": true, + "pattern": "^\\S+\\.f(n|ast)?a", + "errorMessage": "Reference fasta cannot contain spaces, has to exist and must have extension '.fa', '.fna' or '.fasta'" }, "ref_gff": { "type": "string", "format": "file-path", + "exists": true, "pattern": "^\\S+\\.gff(3)?", - "errorMessage": "Reference gff cannot contain spaces and must have extension '.gff' or '.gff3'" + "errorMessage": "Reference gff cannot contain spaces, has to exist and must have extension '.gff' or '.gff3'" }, "shortread_F": { "type": "string", "format": "file-path", + "exists": true, "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "shortread_F cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "errorMessage": "shortread_F cannot contain spaces, has to exist and must have extension '.fq.gz' or '.fastq.gz'" }, "shortread_R": { - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" - }, - { - "type": "string", - "maxLength": 0 - } - ], - "errorMessage": "shortread_R cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "errorMessage": "shortread_R cannot contain spaces, has to exist and must have extension '.fq.gz' or '.fastq.gz'" }, "paired": { "type": "boolean", diff --git a/conf/base.config b/conf/base.config index 508c751f..da8ec780 100644 --- a/conf/base.config +++ b/conf/base.config @@ -14,8 +14,8 @@ process { memory = { 6.GB * task.attempt } time = { 4.h * task.attempt } - errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } - maxRetries = 3 // Increased number of retries + errorStrategy = { task.exitStatus in ((130..145) + 104 + 175) ? 'retry' : 'finish' } + maxRetries = 1 maxErrors = '-1' withLabel:process_single { @@ -54,4 +54,8 @@ process { errorStrategy = 'retry' maxRetries = 2 } + withLabel: process_gpu { + ext.use_gpu = { workflow.profile.contains('gpu') } + accelerator = { workflow.profile.contains('gpu') ? 1 : null } + } } diff --git a/conf/modules.config b/conf/modules.config index 02772c24..efacbbdd 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -17,558 +17,38 @@ process { mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - /* - ---------- - Reads in - ONT - ---------- - */ - // nanoq: local module; QC - withName: NANOQ { - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/nanoq" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // only local module - withName: COLLECT { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/collect" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // porechop: nf-core module - withName: PORECHOP { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/porechop" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // Genomescope / jellyfish: local modules - withName: COUNT { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/genomescope/jellyfish/count/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: DUMP { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/genomescope/jellyfish/dump/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: STATS { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/genomescope/jellyfish/stats/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: HISTO { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/genomescope/jellyfish/histo/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: GENOMESCOPE { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/genomescope/genomescope/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - /* - ---------- - Reads in - HiFi - all nf-core - ---------- - */ - withName: LIMA { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/lima/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: TO_FASTQ { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/lima/fastq/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - /* - ---------- - Reads in - Short reads - all nf-core - ---------- - */ - withName: TRIMGALORE { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/trimgalore" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: MERYL_COUNT { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/meryl/count/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: MERYL_UNIONSUM { - publishDir = [ - path: { "${params.outdir}/${meta.id}/reads/meryl/unionsum/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - /* - ---------- - ASSEMBLY - ---------- - */ - // FLYE: nf-core - withName: FLYE { - ext.args = { - [ - meta.genome_size ? "--genome-size ${meta.genome_size}" : '', - params.flye_args - ].join(" ").trim() - } - publishDir = [ - path: { "${params.outdir}/${meta.id}/assembly/flye/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // HIFIASM: - /* updated nf-core module */ - - withName: HIFIASM { - ext.args = { [ params.hifiasm_args ].join(" ").trim() } - publishDir = [ - path: { "${params.outdir}/${meta.id}/assembly/hifiasm/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: HIFIASM_ONT { - ext.args = { [ params.hifiasm_args, "--ont" ].join(" ").trim() } - publishDir = [ - path: { "${params.outdir}/${meta.id}/assembly/hifiasm/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: GFA_2_FA { - publishDir = [ - path: { "${params.outdir}/${meta.id}/assembly/hifiasm/fasta" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*ASSEMBLE:.*RAGTAG_SCAFFOLD' { - publishDir = [ - path: { "${params.outdir}/${meta.id}/assembly/ragtag/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_assembly_scaffold" } - } - - /* - ---------- - Polishing - ---------- - */ - // Medaka: local; nf-core module cant deal with gzipped input - withName: MEDAKA { - ext.args1 = { } // args mini_align - ext.args2 = { [params.medaka_model ? "--model ${params.medaka_model}" : ''].join(" ").trim() } // args medaka_inference - ext.args3 = { } // args medaka sequence - ext.prefix = { "${meta.id}_medaka" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/polish/medaka" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // Pilon: nf-core - withName: PILON { - ext.prefix = { "${meta.id}_pilon" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/polish/pilon" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - /* - ---------- - Scaffolding - None of the tools used has a core module. - ---------- - */ - // RagTag - withName: '.*SCAFFOLD:.*RAGTAG_SCAFFOLD' { - publishDir = [ - path: { "${params.outdir}/${meta.id}/scaffold/ragtag/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_ragtag" } - } - withName: LINKS { - publishDir = [ - path: { "${params.outdir}/${meta.id}/scaffold/links/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_links" } - } - withName: LONGSTITCH { - publishDir = [ - path: { "${params.outdir}/${meta.id}/scaffold/longstitch/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_longstitch" } - } - /* - -------- - Annotations - liftoff nf-core module - -------- - gff file goes with fasta file - */ - - withName: '.*ASSEMBLE:.*LIFTOFF' { - publishDir = [ - path: { "${params.outdir}/${meta.id}/assembly/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_assembly" } - } - - withName: '.*PILON:.*LIFTOFF' { - publishDir = [ - path: { "${params.outdir}/${meta.id}/polish/pilon/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_pilon" } - } - - withName: '.*MEDAKA:.*LIFTOFF' { - publishDir = [ - path: { "${params.outdir}/${meta.id}/polish/medaka" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_medaka" } - } - - withName: '.*RAGTAG:.*LIFTOFF' { - publishDir = [ - path: { "${params.outdir}/${meta.id}/scaffold/ragtag/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_ragtag" } - } - - withName: '.*LONGSTITCH:.*LIFTOFF' { - publishDir = [ - path: { "${params.outdir}/${meta.id}/scaffold/longstitch" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_longstitch" } - } - - withName: '.*LINKS:.*LIFTOFF' { - publishDir = [ - path: { "${params.outdir}/${meta.id}/scaffold/links" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_links" } - } - - /* - ---------- - QC - ---------- - */ - - // BUSCO: nf-core - withName: '.*:ASSEMBLE:.*:BUSCO' { - ext.prefix = { "${meta.id}_assembly-${lineage}" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, - mode: params.publish_dir_mode, - pattern: "*{-busco,_summary}*", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*PILON:.*:BUSCO' { - ext.prefix = { "${meta.id}_pilon-${lineage}" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, - mode: params.publish_dir_mode, - pattern: "*{-busco,_summary}*", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*MEDAKA:.*:BUSCO' { - ext.prefix = { "${meta.id}_medaka-${lineage}" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, - mode: params.publish_dir_mode, - pattern: "*{-busco,_summary}*", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*LINKS:.*:BUSCO' { - ext.prefix = { "${meta.id}_links-${lineage}" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, - mode: params.publish_dir_mode, - pattern: "*{-busco,_summary}*", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*LONGSTITCH:.*:BUSCO' { - ext.prefix = { "${meta.id}_longstitch-${lineage}" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, - mode: params.publish_dir_mode, - pattern: "*{-busco,_summary}*", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // avoid catching ragtag from ont_on_hifi assembly - withName: '.*:SCAFFOLD:.*RAGTAG:.*:BUSCO' { - ext.prefix = { "${meta.id}_ragtag-${lineage}" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, - mode: params.publish_dir_mode, - pattern: "*{-busco,_summary}*", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - // QUAST: Prefer to keep the local module since it can deal with the inputs I have - withName: '.*:ASSEMBLE:.*:QUAST' { - ext.prefix = { "${meta.id}_assembly" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/QUAST" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*PILON:.*:QUAST' { - ext.prefix = { "${meta.id}_pilon" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/QUAST" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*MEDAKA:.*:QUAST' { - ext.prefix = { "${meta.id}_medaka" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/QUAST" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*LINKS:.*:QUAST' { - ext.prefix = { "${meta.id}_links" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/QUAST/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*LONGSTITCH:.*:QUAST' { - ext.prefix = { "${meta.id}_longstitch" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/QUAST/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // avoid catching ragtag from ont_on_hifi assembly - withName: '.*:SCAFFOLD:.*RAGTAG:.*:QUAST' { - ext.prefix = { "${meta.id}_ragtag" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/QUAST/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - // MERQURY: nf-core - withName: '.*:ASSEMBLE:.*:MERQURY' { - ext.prefix = { "${meta.id}_assembly" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/merqury/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*PILON:.*:MERQURY' { - ext.prefix = { "${meta.id}_pilon" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/merqury/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*MEDAKA:.*:MERQURY' { - ext.prefix = { "${meta.id}_medaka" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/merqury/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*LINKS:.*:MERQURY' { - ext.prefix = { "${meta.id}_links" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/merqury/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*LONGSTITCH:.*:MERQURY' { - ext.prefix = { "${meta.id}_longstitch" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/merqury/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // avoid catching ragtag from ont_on_hifi assembly - withName: '.*:SCAFFOLD:.*RAGTAG:.*:MERQURY' { - ext.prefix = { "${meta.id}_ragtag" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/merqury/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // Refence - withName: '.*MAP_TO_REF.*' { - ext.prefix = { "${meta.id}_to_reference" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/alignments/reference/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = { - (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax map-ont" : "-ax map-hifi") : (params.ont) ? "-ax map-ont" : "-ax map-hifi" - } - } - // Assembly mappings - withName: '.*ASSEMBLE:.*MAP_TO_ASSEMBLY.*' { - ext.prefix = { "${meta.id}_assembly" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/alignments/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = { - (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax map-ont" : "-ax map-hifi") : (params.ont) ? "-ax map-ont" : "-ax map-hifi" - } - } - withName: '.*MEDAKA:.*MAP_TO_ASSEMBLY.*' { - ext.prefix = { "${meta.id}_medaka" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/alignments/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = { - (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax map-ont" : "-ax map-hifi") : (params.ont) ? "-ax map-ont" : "-ax map-hifi" - } - } - withName: '.*PILON:.*MAP_TO_ASSEMBLY.*' { - ext.prefix = { "${meta.id}_pilon" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/alignments/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = { - (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax map-ont" : "-ax map-hifi") : (params.ont) ? "-ax map-ont" : "-ax map-hifi" - } - } - withName: '.*LONGSTITCH:.*MAP_TO_ASSEMBLY.*' { - ext.prefix = { "${meta.id}_longstitch" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/alignments/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = { - (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax map-ont" : "-ax map-hifi") : (params.ont) ? "-ax map-ont" : "-ax map-hifi" - } - } - withName: '.*LINKS:.*MAP_TO_ASSEMBLY.*' { - ext.prefix = { "${meta.id}_links" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/alignments/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = { - (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax map-ont" : "-ax map-hifi") : (params.ont) ? "-ax map-ont" : "-ax map-hifi" - } - } - withName: '.*RAGTAG:.*MAP_TO_ASSEMBLY.*' { - ext.prefix = { "${meta.id}_ragtag" } - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/alignments/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = { - (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax map-ont" : "-ax map-hifi") : (params.ont) ? "-ax map-ont" : "-ax map-hifi" - } - } - // Pilon mapping - withName: '.*PILON:MAP_SR.*' { - publishDir = [ - path: { "${params.outdir}/${meta.id}/QC/alignments/shortreads/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.prefix = { "${meta.id}_shortreads" } - ext.args = { "-ax sr " } - } - /* - -------- - Report - */ - withName: REPORT { - publishDir = [ - path: { "${params.outdir}/report/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } } + +// Read preparation +includeConfig 'modules/ont-prep.config' +includeConfig 'modules/hifi-prep.config' +includeConfig 'modules/trimgalore.config' + +// Assembly +includeConfig 'modules/assembly.config' + +// Polishing +includeConfig 'modules/polishing.config' + +// Scaffolding +includeConfig 'modules/scaffolding.config' + +// Annotation liftoff +includeConfig 'modules/liftoff.config' + +// QC +// BUSCO +includeConfig 'modules/QC/busco.config' +// QUAST +includeConfig 'modules/QC/quast.config' +// merqury +includeConfig 'modules/QC/merqury.config' +// alignments +includeConfig 'modules/QC/alignments.config' +// Meryl +includeConfig 'modules/QC/meryl.config' +// Jellyfish, Genomescope +includeConfig 'modules/QC/jellyfish-genomescope.config' + +// Report +includeConfig 'modules/report.config' diff --git a/conf/modules/QC/alignments.config b/conf/modules/QC/alignments.config new file mode 100644 index 00000000..f9536164 --- /dev/null +++ b/conf/modules/QC/alignments.config @@ -0,0 +1,81 @@ +process { + // Refence + withName: '.*MAP_TO_REF.*' { + ext.prefix = { "${meta.id}_to_reference" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/reference/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = { + (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax lr:hq" : "-ax map-hifi") : (params.ont) ? "-ax lr:hq" : "-ax map-hifi" + } + } + // Assembly mappings + withName: '.*ASSEMBLE:.*MAP_TO_ASSEMBLY.*' { + ext.prefix = { "${meta.id}_assembly" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = { + (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax lr:hq" : "-ax map-hifi") : (params.ont) ? "-ax lr:hq" : "-ax map-hifi" + } + } + withName: '.*MEDAKA:.*MAP_TO_ASSEMBLY.*' { + ext.prefix = { "${meta.id}_medaka" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = { + (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax lr:hq" : "-ax map-hifi") : (params.ont) ? "-ax lr:hq" : "-ax map-hifi" + } + } + withName: '.*PILON:.*MAP_TO_ASSEMBLY.*' { + ext.prefix = { "${meta.id}_pilon" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = { + (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax lr:hq" : "-ax map-hifi") : (params.ont) ? "-ax lr:hq" : "-ax map-hifi" + } + } + withName: '.*LONGSTITCH:.*MAP_TO_ASSEMBLY.*' { + ext.prefix = { "${meta.id}_longstitch" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = { + (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax lr:hq" : "-ax map-hifi") : (params.ont) ? "-ax lr:hq" : "-ax map-hifi" + } + } + withName: '.*LINKS:.*MAP_TO_ASSEMBLY.*' { + ext.prefix = { "${meta.id}_links" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = { + (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax lr:hq" : "-ax map-hifi") : (params.ont) ? "-ax lr:hq" : "-ax map-hifi" + } + } + withName: '.*RAGTAG:.*MAP_TO_ASSEMBLY.*' { + ext.prefix = { "${meta.id}_ragtag" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = { + (params.hifi && params.ont) ? (params.qc_reads == 'ONT' ? "-ax lr:hq" : "-ax map-hifi") : (params.ont) ? "-ax lr:hq" : "-ax map-hifi" + } + } +} diff --git a/conf/modules/QC/busco.config b/conf/modules/QC/busco.config new file mode 100644 index 00000000..4972756f --- /dev/null +++ b/conf/modules/QC/busco.config @@ -0,0 +1,57 @@ +process { + withName: '.*:ASSEMBLE:.*:BUSCO' { + ext.prefix = { "${meta.id}_assembly-${lineage}" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, + mode: params.publish_dir_mode, + pattern: "*{-busco,_summary}*", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*PILON:.*:BUSCO' { + ext.prefix = { "${meta.id}_pilon-${lineage}" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, + mode: params.publish_dir_mode, + pattern: "*{-busco,_summary}*", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*MEDAKA:.*:BUSCO' { + ext.prefix = { "${meta.id}_medaka-${lineage}" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, + mode: params.publish_dir_mode, + pattern: "*{-busco,_summary}*", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*LINKS:.*:BUSCO' { + ext.prefix = { "${meta.id}_links-${lineage}" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, + mode: params.publish_dir_mode, + pattern: "*{-busco,_summary}*", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*LONGSTITCH:.*:BUSCO' { + ext.prefix = { "${meta.id}_longstitch-${lineage}" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, + mode: params.publish_dir_mode, + pattern: "*{-busco,_summary}*", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + // avoid catching ragtag from ont_on_hifi assembly + withName: '.*:SCAFFOLD:.*RAGTAG:.*:BUSCO' { + ext.prefix = { "${meta.id}_ragtag-${lineage}" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/BUSCO/" }, + mode: params.publish_dir_mode, + pattern: "*{-busco,_summary}*", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/QC/jellyfish-genomescope.config b/conf/modules/QC/jellyfish-genomescope.config new file mode 100644 index 00000000..18f70cd9 --- /dev/null +++ b/conf/modules/QC/jellyfish-genomescope.config @@ -0,0 +1,37 @@ +process { + withName: COUNT { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/genomescope/jellyfish/count/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: DUMP { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/genomescope/jellyfish/dump/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: STATS { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/genomescope/jellyfish/stats/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: HISTO { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/genomescope/jellyfish/histo/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: GENOMESCOPE { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/genomescope/genomescope/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/QC/merqury.config b/conf/modules/QC/merqury.config new file mode 100644 index 00000000..06a31325 --- /dev/null +++ b/conf/modules/QC/merqury.config @@ -0,0 +1,51 @@ +process { + withName: '.*:ASSEMBLE:.*:MERQURY' { + ext.prefix = { "${meta.id}_assembly" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/merqury/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*PILON:.*:MERQURY' { + ext.prefix = { "${meta.id}_pilon" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/merqury/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*MEDAKA:.*:MERQURY' { + ext.prefix = { "${meta.id}_medaka" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/merqury/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*LINKS:.*:MERQURY' { + ext.prefix = { "${meta.id}_links" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/merqury/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*LONGSTITCH:.*:MERQURY' { + ext.prefix = { "${meta.id}_longstitch" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/merqury/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + // avoid catching ragtag from ont_on_hifi assembly + withName: '.*:SCAFFOLD:.*RAGTAG:.*:MERQURY' { + ext.prefix = { "${meta.id}_ragtag" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/merqury/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/QC/meryl.config b/conf/modules/QC/meryl.config new file mode 100644 index 00000000..41452a69 --- /dev/null +++ b/conf/modules/QC/meryl.config @@ -0,0 +1,16 @@ +process { + withName: MERYL_COUNT { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/meryl/count/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: MERYL_UNIONSUM { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/meryl/unionsum/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/QC/quast.config b/conf/modules/QC/quast.config new file mode 100644 index 00000000..b97a7494 --- /dev/null +++ b/conf/modules/QC/quast.config @@ -0,0 +1,51 @@ +process { + withName: '.*:ASSEMBLE:.*:QUAST' { + ext.prefix = { "${meta.id}_assembly" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/QUAST" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*PILON:.*:QUAST' { + ext.prefix = { "${meta.id}_pilon" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/QUAST" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*MEDAKA:.*:QUAST' { + ext.prefix = { "${meta.id}_medaka" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/QUAST" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*LINKS:.*:QUAST' { + ext.prefix = { "${meta.id}_links" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/QUAST/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*LONGSTITCH:.*:QUAST' { + ext.prefix = { "${meta.id}_longstitch" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/QUAST/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + // avoid catching ragtag from ont_on_hifi assembly + withName: '.*:SCAFFOLD:.*RAGTAG:.*:QUAST' { + ext.prefix = { "${meta.id}_ragtag" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/QUAST/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/assembly.config b/conf/modules/assembly.config new file mode 100644 index 00000000..eb18c7ef --- /dev/null +++ b/conf/modules/assembly.config @@ -0,0 +1,60 @@ +process { + withName: FLYE { + ext.args = { + [ + meta.genome_size ? "--genome-size ${meta.genome_size}" : '', + params.flye_args + ].join(" ").trim() + } + publishDir = [ + path: { "${params.outdir}/${meta.id}/assembly/flye/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: HIFIASM { + ext.args = { [ params.hifiasm_args ].join(" ").trim() } + publishDir = [ + path: { "${params.outdir}/${meta.id}/assembly/hifiasm/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: HIFIASM_ONT { + ext.args = { [ params.hifiasm_args, "--ont" ].join(" ").trim() } + publishDir = [ + path: { "${params.outdir}/${meta.id}/assembly/hifiasm_ont/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: GFA_2_FA { + publishDir = [ + path: { "${params.outdir}/${meta.id}/assembly/hifiasm/fasta" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: GFA_2_FA_HIFI { + publishDir = [ + path: { "${params.outdir}/${meta.id}/assembly/hifiasm/fasta" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: GFA_2_FA_ONT { + publishDir = [ + path: { "${params.outdir}/${meta.id}/assembly/hifiasm_ont/fasta" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*ASSEMBLE:.*RAGTAG_PATCH' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/assembly/ragtag/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_assembly_patch" } + } +} diff --git a/conf/modules/hifi-prep.config b/conf/modules/hifi-prep.config new file mode 100644 index 00000000..ec84a420 --- /dev/null +++ b/conf/modules/hifi-prep.config @@ -0,0 +1,16 @@ +process { + withName: LIMA { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/lima/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: TO_FASTQ { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/lima/fastq/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/liftoff.config b/conf/modules/liftoff.config new file mode 100644 index 00000000..3f470e90 --- /dev/null +++ b/conf/modules/liftoff.config @@ -0,0 +1,50 @@ +process { + withName: '.*ASSEMBLE:.*LIFTOFF' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/assembly/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_assembly" } + } + withName: '.*PILON:.*LIFTOFF' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/polish/pilon/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_pilon" } + } + withName: '.*MEDAKA:.*LIFTOFF' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/polish/medaka" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_medaka" } + } + withName: '.*RAGTAG:.*LIFTOFF' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/scaffold/ragtag/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_ragtag" } + } + withName: '.*LONGSTITCH:.*LIFTOFF' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/scaffold/longstitch" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_longstitch" } + } + withName: '.*LINKS:.*LIFTOFF' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/scaffold/links" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_links" } + } +} diff --git a/conf/modules/ont-prep.config b/conf/modules/ont-prep.config new file mode 100644 index 00000000..6b824273 --- /dev/null +++ b/conf/modules/ont-prep.config @@ -0,0 +1,23 @@ +process { + withName: NANOQ { + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/nanoq" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: COLLECT { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/collect" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: PORECHOP { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/porechop" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/polishing.config b/conf/modules/polishing.config new file mode 100644 index 00000000..8c39f50e --- /dev/null +++ b/conf/modules/polishing.config @@ -0,0 +1,31 @@ +process { + withName: MEDAKA { + ext.args1 = { } // args mini_align + ext.args2 = { [params.medaka_model ? "--model ${params.medaka_model}" : ''].join(" ").trim() } // args medaka_inference + ext.args3 = { } // args medaka sequence + ext.prefix = { "${meta.id}_medaka" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/polish/medaka" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + // Pilon mapping + withName: '.*PILON:MAP_SR.*' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/QC/alignments/shortreads/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_shortreads" } + ext.args = { "-ax sr " } + } + withName: PILON { + ext.prefix = { "${meta.id}_pilon" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/polish/pilon" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/report.config b/conf/modules/report.config new file mode 100644 index 00000000..b405d0f6 --- /dev/null +++ b/conf/modules/report.config @@ -0,0 +1,9 @@ +process { + withName: REPORT { + publishDir = [ + path: { "${params.outdir}/report/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/scaffolding.config b/conf/modules/scaffolding.config new file mode 100644 index 00000000..878b8bfa --- /dev/null +++ b/conf/modules/scaffolding.config @@ -0,0 +1,33 @@ +process { + withName: '.*SCAFFOLD:.*RAGTAG_SCAFFOLD' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/scaffold/ragtag/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_ragtag" } + ext.args = [ + "-C", + "-u", + "-r", + "-w" + ].join(" ").trim() + } + withName: LINKS { + publishDir = [ + path: { "${params.outdir}/${meta.id}/scaffold/links/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_links" } + ext.args = ["-t 40,200", "-d 500,2000,5000"].join(" ").trim() + } + withName: LONGSTITCH { + publishDir = [ + path: { "${params.outdir}/${meta.id}/scaffold/longstitch/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}_longstitch" } + } +} diff --git a/conf/modules/trimgalore.config b/conf/modules/trimgalore.config new file mode 100644 index 00000000..dc899e99 --- /dev/null +++ b/conf/modules/trimgalore.config @@ -0,0 +1,9 @@ +process { + withName: TRIMGALORE { + publishDir = [ + path: { "${params.outdir}/${meta.id}/reads/trimgalore" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/test.config b/conf/test.config index 1f8df199..be3dc0e5 100644 --- a/conf/test.config +++ b/conf/test.config @@ -29,4 +29,5 @@ params { hifi = true ont = true assembler = "flye_on_hifiasm" + hifiasm_args = "-f 0" } diff --git a/configs/hifi_ont_hifiasm_on_hifiasm.config b/configs/hifi_ont_hifiasm_on_hifiasm.config new file mode 100644 index 00000000..9e548e42 --- /dev/null +++ b/configs/hifi_ont_hifiasm_on_hifiasm.config @@ -0,0 +1,12 @@ +/* + Use this config to: + assemble HIFI reads with hifiasm + assemble ONT reads with hifiasm --ont + scaffold the ONT assembly onto the HiFi assembly +*/ + +params { + hifi = true + ont = true + assembler = "hifiasm_on_hifiasm" +} diff --git a/docs/usage.md b/docs/usage.md index 69ae6a96..6a2a8eb9 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -15,14 +15,15 @@ This pipeline can perform assembly, polishing, scaffolding and annotation lift-o To ease configuration, there are a couple of pre-defined profiles for various combinations of read sources and assemblers (named readtype_assembler) -| ONT | HiFI  | Assembly-strategy  | Profile name | -| --- | ----- | -------------------------------------------------- | --------------------- | -| Yes | No  | flye | `ont_flye` | -| No | Yes  | flye | `hifi_flye` | -| Yes | No | hifiasm | `ont_hifiasm` | -| No | Yes  | hifiasm | `hifi_hifiasm` | -| Yes | Yes  | hifiasm --ul | `hifiont_hifiasm` | -| Yes | Yes  | Scaffolding of ONT assemblies onto HiFi assemblies | `hifiont_flyehifiasm` | +| ONT | HiFI  | Assembly-strategy  | Profile name | +| --- | ----- | ---------------------------------------------------------------------- | ---------------------------- | +| Yes | No  | flye | `ont_flye` | +| No | Yes  | flye | `hifi_flye` | +| Yes | No | hifiasm | `ont_hifiasm` | +| No | Yes  | hifiasm | `hifi_hifiasm` | +| Yes | Yes  | hifiasm --ul | `hifiont_hifiasm` | +| Yes | Yes  | Scaffolding of ONT assemblies (flye) onto HiFi assemblies (hifiasm) | `hifiont_flye_on_hifiasm` | +| Yes | Yes  | Scaffolding of ONT assemblies (hifiasm) onto HiFi assemblies (hifiasm) | `hifiont_hifiasm_on_hifiasm` | ## Samplesheet input @@ -38,7 +39,7 @@ The largest samplesheet format is: ```csv title="samplesheet.csv" sample,ontreads,hifireads,ref_fasta,ref_gff,shortread_F,shortread_R,paired -Sample1,sample1ont.fq.gz,sample1hifi.fq.gz,ref.fa,ref.gff,sample1_r1.fq.gz,sample1_r2,fq.gz +Sample1,/path/reads/sample1ont.fq.gz,/path/reads/sample1hifi.fq.gz,/path/references/ref.fa,/path/references/ref.gff,/path/reads/sample1_r1.fq.gz,/path/reads/sample1_r2.fq.gz,true ``` The samplesheet _must_ contain a column name `sample` [string]. @@ -54,9 +55,12 @@ Further columns _can_ be: - `shortread_R`: shortread reverse file (paired end) - `paired`: [true/false] true if the reads are paired end, false if they are single-end. The `shortreads_R` column should exist if `paired` is `false` but can be empty. +> [!INFO] +> It is strongly recommended to provide all paths as absolute paths + ### Multiple runs of the same sample -For ONT reads, a folder containing several fastq files can be provided, which will be concatenated into a single file if `--collect` is used. Generally we recommend to provide all reads in a single file. +For ONT reads, a glob pattern can be provided, matching files will be concatenated into a single file if `--collect` is used. Generally we recommend to provide all reads in a single file. ## Running the pipeline diff --git a/modules.json b/modules.json index f15bcf2d..daa2af82 100644 --- a/modules.json +++ b/modules.json @@ -7,100 +7,115 @@ "nf-core": { "busco/busco": { "branch": "master", - "git_sha": "d34caf3c0d3cf5b9bae0fae6107bab0933c96f37", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["modules"] }, "fastqc": { "branch": "master", - "git_sha": "08108058ea36a63f141c25c4e75f9f872a5b2296", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", "installed_by": ["modules"] }, "flye": { "branch": "master", - "git_sha": "ce1a66562156776bb0dd1c1bb5640d368dadd4e6", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["modules"] }, "hifiasm": { "branch": "master", - "git_sha": "ea8610177512dff284c7d3dc0c8dcbcc12fb4cf0", + "git_sha": "c457b50bf9187031f65b0fb090dc022e8814c729", "installed_by": ["modules"] }, "liftoff": { "branch": "master", - "git_sha": "32b1f7f4e3d5c6e68af8f1d732f61c3d2aa6e67e", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"], "patch": "modules/nf-core/liftoff/liftoff.diff" }, "lima": { "branch": "master", - "git_sha": "1c4249137bdcd4392317e34123c00b5049c58d45", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "links": { + "branch": "master", + "git_sha": "bd049fd0244ed914f2d10bed580b49fb44eba914", "installed_by": ["modules"] }, "merqury/merqury": { "branch": "master", - "git_sha": "42140b76b12c18dbde34bfa7f2ef09afae8b054f", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["modules"], "patch": "modules/nf-core/merqury/merqury/merqury-merqury.diff" }, "meryl/count": { "branch": "master", - "git_sha": "42140b76b12c18dbde34bfa7f2ef09afae8b054f", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["modules"] }, "meryl/unionsum": { "branch": "master", - "git_sha": "42140b76b12c18dbde34bfa7f2ef09afae8b054f", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["modules"] }, "minimap2/align": { "branch": "master", - "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "git_sha": "a532706a19b3d83f14b1d48a6a815ed33eb48b0c", "installed_by": ["modules"], "patch": "modules/nf-core/minimap2/align/minimap2-align.diff" }, "pilon": { "branch": "master", - "git_sha": "2816479e1de5c04f0907f74a53a2096ceaff558e", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["modules"] }, "porechop/porechop": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "dbf496251becaa54933305bb494b880253a84ee6", + "installed_by": ["modules"] + }, + "ragtag/patch": { + "branch": "master", + "git_sha": "62775d90df7565c82bd4ceedca70149529820cff", + "installed_by": ["modules"] + }, + "ragtag/scaffold": { + "branch": "master", + "git_sha": "7d163aded9221aef68d8c11cb7a04354a232d89c", "installed_by": ["modules"] }, "samtools/fastq": { "branch": "master", - "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["modules"] }, "samtools/flagstat": { "branch": "master", - "git_sha": "2d20463181b1c38981a02e90d3084b5f9fa8d540", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["bam_stats_samtools", "modules"] }, "samtools/idxstats": { "branch": "master", - "git_sha": "2d20463181b1c38981a02e90d3084b5f9fa8d540", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["bam_stats_samtools", "modules"] }, "samtools/index": { "branch": "master", - "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["bam_sort_stats_samtools", "modules"] }, "samtools/sort": { "branch": "master", - "git_sha": "b7800db9b069ed505db3f9d91b8c72faea9be17b", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["bam_sort_stats_samtools", "modules"] }, "samtools/stats": { "branch": "master", - "git_sha": "2d20463181b1c38981a02e90d3084b5f9fa8d540", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["bam_stats_samtools", "modules"] }, "trimgalore": { "branch": "master", - "git_sha": "8d3e71002c5008e3f68a691ad8cd32c346356258", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["modules"] } } @@ -109,12 +124,12 @@ "nf-core": { "bam_sort_stats_samtools": { "branch": "master", - "git_sha": "763d4b5c05ffda3ac1ac969dc67f7458cfb2eb1d", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["subworkflows"] }, "bam_stats_samtools": { "branch": "master", - "git_sha": "763d4b5c05ffda3ac1ac969dc67f7458cfb2eb1d", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["bam_sort_stats_samtools", "subworkflows"] }, "utils_nextflow_pipeline": { diff --git a/modules/local/collect_reads/main.nf b/modules/local/collect_reads/main.nf index c7d2ef82..4dbdc87a 100644 --- a/modules/local/collect_reads/main.nf +++ b/modules/local/collect_reads/main.nf @@ -8,17 +8,17 @@ process COLLECT_READS { : 'community.wave.seqera.io/library/coreutils_grep_gzip_lbzip2_pruned:838ba80435a629f8'}" input: - tuple val(meta), path(read_directory) + tuple val(meta), path(reads) output: - tuple val(meta), path("*.fastq"), emit: combined_reads + tuple val(meta), path("*_all_reads.fq.gz"), emit: combined_reads path "versions.yml", emit: versions script: def prefix = task.ext.prefix ?: "${meta.id}" """ - gunzip -c ${read_directory}/*.gz > ${prefix}_all_reads.fastq + cat ${reads} > ${prefix}_all_reads.fq.gz cat <<-END_VERSIONS > versions.yml "${task.process}": gzip: \$(echo \$(gzip --version | head -n1 | sed 's/gzip //')) @@ -28,7 +28,7 @@ process COLLECT_READS { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}_all_reads.fastq + touch ${prefix}_all_reads.fq; gzip ${prefix}_all_reads.fq cat <<-END_VERSIONS > versions.yml "${task.process}": gzip: \$(echo \$(gzip --version | head -n1 | sed 's/gzip //')) diff --git a/modules/local/gfa2fa/environment.yml b/modules/local/gfa2fa/environment.yml index ae4fa457..2e1fcd06 100644 --- a/modules/local/gfa2fa/environment.yml +++ b/modules/local/gfa2fa/environment.yml @@ -8,3 +8,4 @@ dependencies: - conda-forge::lbzip2=2.5 - conda-forge::sed=4.8 - conda-forge::tar=1.34 + - bioconda::mawk=1.3.4 diff --git a/modules/local/gfa2fa/main.nf b/modules/local/gfa2fa/main.nf index a53144a3..b258f8d6 100644 --- a/modules/local/gfa2fa/main.nf +++ b/modules/local/gfa2fa/main.nf @@ -20,7 +20,7 @@ process GFA_2_FA { | gzip > \$outfile cat <<-END_VERSIONS > versions.yml "${task.process}": - awk: \$(echo \$(awk --version | head -n1 | sed 's/mawk //; s/ .*//')) + awk: \$(mawk -Wversion | sed '1!d; s/.*Awk //; s/,.*//; s/ [0-9]*\$//') gzip: \$(echo \$(gzip --version | head -n1 | sed 's/gzip //')) END_VERSIONS """ @@ -31,7 +31,7 @@ process GFA_2_FA { touch \$outfile cat <<-END_VERSIONS > versions.yml "${task.process}": - awk: \$(echo \$(awk --version | head -n1 | sed 's/mawk //; s/ .*//')) + awk: \$(mawk -Wversion | sed '1!d; s/.*Awk //; s/,.*//; s/ [0-9]*\$//') gzip: \$(echo \$(gzip --version | head -n1 | sed 's/gzip //')) END_VERSIONS """ diff --git a/modules/local/links/main.nf b/modules/local/links/main.nf deleted file mode 100644 index 9b3a2374..00000000 --- a/modules/local/links/main.nf +++ /dev/null @@ -1,45 +0,0 @@ -process LINKS { - tag "${meta.id}" - label 'process_high' - - conda "${moduleDir}/environment.yml" - container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://depot.galaxyproject.org/singularity/links:2.0.1--h4ac6f70_5' - : 'biocontainers/links:2.0.1--h4ac6f70_5'}" - - input: - tuple val(meta), path(assembly), path(reads) - - output: - tuple val(meta), path("*.scaffolds.fa"), emit: scaffolds - tuple val(meta), path("*.scaffolds"), emit: scaffold_csv - tuple val(meta), path("*.gv"), emit: graph - tuple val(meta), path("*.log"), emit: log - path "versions.yml", emit: versions - - script: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - echo "${reads}" > readfile.fof - LINKS -f ${assembly} -s readfile.fof -j 3 -b ${prefix} -t 40,200 -d 500,2000,5000 - sed -i 's/\\(scaffold[0-9]*\\).*/\\1/' ${prefix}.scaffolds.fa - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - LINKS: \$(echo \$(LINKS | grep -o 'LINKS v.*' | sed 's/LINKS v//')) - END_VERSIONS - """ - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.scaffolds.fa - touch ${prefix}.scaffolds - touch ${prefix}.gv - touch ${prefix}.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - LINKS: \$(echo \$(LINKS | grep -o 'LINKS v.*' | sed 's/LINKS v//')) - END_VERSIONS - """ -} diff --git a/modules/local/medaka/main.nf b/modules/local/medaka/main.nf index 3d001421..7117c7da 100644 --- a/modules/local/medaka/main.nf +++ b/modules/local/medaka/main.nf @@ -6,7 +6,7 @@ process MEDAKA { conda "${moduleDir}/environment.yml" container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://depot.galaxyproject.org/singularity/biocontainers/medaka:1.11.3--py310h87e71ce_0' + ? 'https://depot.galaxyproject.org/singularity/medaka:1.11.3--py310h87e71ce_0' : 'biocontainers/medaka:1.11.3--py310h87e71ce_0'}" input: diff --git a/modules/local/quast/environment.yml b/modules/local/quast/environment.yml index 2c14403d..50e43eac 100644 --- a/modules/local/quast/environment.yml +++ b/modules/local/quast/environment.yml @@ -2,4 +2,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::quast=5.2.0 + - bioconda::quast=5.3.0 diff --git a/modules/local/quast/main.nf b/modules/local/quast/main.nf index 5da08e19..118c1b3a 100644 --- a/modules/local/quast/main.nf +++ b/modules/local/quast/main.nf @@ -3,9 +3,9 @@ process QUAST { label 'process_medium' conda "${moduleDir}/environment.yml" - container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://depot.galaxyproject.org/singularity/quast:5.2.0--py39pl5321h2add14b_1' - : 'biocontainers/quast:5.2.0--py39pl5321heaaa4ec_4'}" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/a5/a515d04307ea3e0178af75132105cd36c87d0116c6f9daecf81650b973e870fd/data' : + 'community.wave.seqera.io/library/quast:5.3.0--755a216045b6dbdd' }" input: tuple val(meta), path(consensus), path(fasta), path(gff), path(ref_bam), path(bam) @@ -44,7 +44,7 @@ process QUAST { cat <<-END_VERSIONS > versions.yml "${task.process}": - quast: \$(quast.py --version 2>&1 | sed 's/^.*QUAST v//; s/ .*\$//') + quast: \$(quast.py --version 2>&1 | sed 's/^.*QUAST v//; s/ .*\$//' | tail -n1) END_VERSIONS """ stub: @@ -55,7 +55,7 @@ process QUAST { cat <<-END_VERSIONS > versions.yml "${task.process}": - quast: \$(quast.py --version 2>&1 | sed 's/^.*QUAST v//; s/ .*\$//') + quast: \$(quast.py --version 2>&1 | sed 's/^.*QUAST v//; s/ .*\$//' | tail -n1) END_VERSIONS """ } diff --git a/modules/local/ragtag/main.nf b/modules/local/ragtag/main.nf deleted file mode 100644 index 9756114c..00000000 --- a/modules/local/ragtag/main.nf +++ /dev/null @@ -1,58 +0,0 @@ -process RAGTAG_SCAFFOLD { - tag "${meta.id}" - label 'process_high' - conda "${moduleDir}/environment.yml" - container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://depot.galaxyproject.org/singularity/ragtag:2.1.0--pyhb7b1952_0' - : 'biocontainers/ragtag:2.1.0--pyhb7b1952_0'}" - - input: - tuple val(meta), path(assembly), path(reference) - - output: - tuple val(meta), path("*.fasta"), emit: corrected_assembly - tuple val(meta), path("*.agp"), emit: corrected_agp - tuple val(meta), path("*.stats"), emit: corrected_stats - path "versions.yml", emit: versions - - script: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - if [[ ${assembly} == *.gz ]] - then - zcat ${assembly} > assembly.fa - else - mv ${assembly} assembly.fa - fi - - ragtag.py scaffold ${reference} assembly.fa \\ - -o "${prefix}" \\ - -t ${task.cpus} \\ - -f 5000 \\ - -w \\ - -C \\ - -u \\ - -r - - mv ${prefix}/ragtag.scaffold.fasta ${prefix}.fasta - mv ${prefix}/ragtag.scaffold.agp ${prefix}.agp - mv ${prefix}/ragtag.scaffold.stats ${prefix}.stats - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - RagTag: \$(echo \$(ragtag.py -v | sed 's/v//')) - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.fasta - touch ${prefix}.agp - touch ${prefix}.stats - cat <<-END_VERSIONS > versions.yml - "${task.process}": - RagTag: \$(echo \$(ragtag.py -v | sed 's/v//')) - END_VERSIONS - """ -} diff --git a/modules/local/report/environment.yml b/modules/local/report/environment.yml index 0cc7b3a4..ff0aceb2 100644 --- a/modules/local/report/environment.yml +++ b/modules/local/report/environment.yml @@ -2,11 +2,13 @@ channels: - conda-forge - bioconda dependencies: - - conda-forge::quarto=1.5.57 + - conda-forge::quarto=1.7.31 + - conda-forge::r-gt=1.0.0 - conda-forge::r-plotly=4.10.4 - conda-forge::r-quarto=1.4.4 + - conda-forge::r-tidyjson=0.3.2 - conda-forge::r-tidyverse=2.0.0 - pip - pip: - - quarto-cli==1.5.56 + - quarto-cli==1.7.31 - quarto==0.1.0 diff --git a/modules/local/report/main.nf b/modules/local/report/main.nf index 6995f7de..841e61b4 100644 --- a/modules/local/report/main.nf +++ b/modules/local/report/main.nf @@ -3,9 +3,12 @@ process REPORT { label 'process_low' conda "${moduleDir}/environment.yml" container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/90/90fef3481ee574cada2330e149cebab7290724e6b9869d84af66d0fc3d04168e/data' - : 'community.wave.seqera.io/library/quarto_r-plotly_r-quarto_r-tidyjson_pruned:2712f84a83ca0d9a'}" - + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/89/8967e1cb830fdc77ec5b84541a50c74a0a05eaaae557314490809de2fc91e4af/data' + : 'community.wave.seqera.io/library/quarto_r-gt_r-plotly_r-quarto_pruned:be4a8863b7b76cf7'}" + /* wave builds: + https://wave.seqera.io/view/builds/bd-6e20dd9b9b77f359_1 singularity + https://wave.seqera.io/view/builds/bd-be4a8863b7b76cf7_1 docker + */ input: path qmdir_files, stageAs: "*" path funct_files, stageAs: "functions/*" diff --git a/modules/nf-core/busco/busco/environment.yml b/modules/nf-core/busco/busco/environment.yml index 53e5e90e..ba8a40c0 100644 --- a/modules/nf-core/busco/busco/environment.yml +++ b/modules/nf-core/busco/busco/environment.yml @@ -1,6 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda dependencies: - - bioconda::busco=5.8.2 + - bioconda::busco=5.8.3 + - bioconda::sepp=4.5.5 diff --git a/modules/nf-core/busco/busco/main.nf b/modules/nf-core/busco/busco/main.nf index 609cae95..05ac4295 100644 --- a/modules/nf-core/busco/busco/main.nf +++ b/modules/nf-core/busco/busco/main.nf @@ -1,55 +1,65 @@ process BUSCO_BUSCO { - tag "$meta.id" + tag "${meta.id}" label 'process_medium' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/busco:5.8.2--pyhdfd78af_0': - 'biocontainers/busco:5.8.2--pyhdfd78af_0' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c6/c607f319867d96a38c8502f751458aa78bbd18fe4c7c4fa6b9d8350e6ba11ebe/data' + : 'community.wave.seqera.io/library/busco_sepp:f2dbc18a2f7a5b64'}" input: tuple val(meta), path(fasta, stageAs:'tmp_input/*') val mode // Required: One of genome, proteins, or transcriptome val lineage // Required: lineage for checking against, or "auto/auto_prok/auto_euk" for enabling auto-lineage - path busco_lineages_path // Recommended: busco lineages file - downloads if not set - path config_file // Optional: busco configuration file + path busco_lineages_path // Recommended: BUSCO lineages file - downloads if not set + path config_file // Optional: BUSCO configuration file + val clean_intermediates // Optional: Remove intermediate files output: - tuple val(meta), path("*-busco.batch_summary.txt") , emit: batch_summary - tuple val(meta), path("short_summary.*.txt") , emit: short_summaries_txt , optional: true - tuple val(meta), path("short_summary.*.json") , emit: short_summaries_json , optional: true - tuple val(meta), path("*-busco/*/run_*/full_table.tsv") , emit: full_table , optional: true - tuple val(meta), path("*-busco/*/run_*/missing_busco_list.tsv") , emit: missing_busco_list , optional: true - tuple val(meta), path("*-busco/*/run_*/single_copy_proteins.faa") , emit: single_copy_proteins , optional: true - tuple val(meta), path("*-busco/*/run_*/busco_sequences") , emit: seq_dir , optional: true - tuple val(meta), path("*-busco/*/translated_proteins") , emit: translated_dir , optional: true - tuple val(meta), path("*-busco") , emit: busco_dir - tuple val(meta), path("busco_downloads/lineages/*") , emit: downloaded_lineages , optional: true + tuple val(meta), path("*-busco.batch_summary.txt") , emit: batch_summary + tuple val(meta), path("short_summary.*.txt") , emit: short_summaries_txt , optional: true + tuple val(meta), path("short_summary.*.json") , emit: short_summaries_json, optional: true + tuple val(meta), path("*-busco.log") , emit: log , optional: true + tuple val(meta), path("*-busco/*/run_*/full_table.tsv") , emit: full_table , optional: true + tuple val(meta), path("*-busco/*/run_*/missing_busco_list.tsv") , emit: missing_busco_list , optional: true + tuple val(meta), path("*-busco/*/run_*/single_copy_proteins.faa") , emit: single_copy_proteins, optional: true + tuple val(meta), path("*-busco/*/run_*/busco_sequences") , emit: seq_dir , optional: true + tuple val(meta), path("*-busco/*/translated_proteins") , emit: translated_dir , optional: true + tuple val(meta), path("*-busco") , emit: busco_dir + tuple val(meta), path("busco_downloads/lineages/*") , emit: downloaded_lineages , optional: true + tuple val(meta), path("*-busco/*/run_*/busco_sequences/single_copy_busco_sequences/*.faa"), emit: single_copy_faa , optional: true + tuple val(meta), path("*-busco/*/run_*/busco_sequences/single_copy_busco_sequences/*.fna"), emit: single_copy_fna , optional: true - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - if ( mode !in [ 'genome', 'proteins', 'transcriptome' ] ) { - error "Mode must be one of 'genome', 'proteins', or 'transcriptome'." + if (mode !in ['genome', 'proteins', 'transcriptome']) { + error("Mode must be one of 'genome', 'proteins', or 'transcriptome'.") } def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}-${lineage}" def busco_config = config_file ? "--config ${config_file}" : '' - def busco_lineage = lineage in [ 'auto', 'auto_prok', 'auto_euk'] + def busco_lineage = lineage in ['auto', 'auto_prok', 'auto_euk'] ? lineage.replaceFirst('auto', '--auto-lineage').replaceAll('_', '-') : "--lineage_dataset ${lineage}" def busco_lineage_dir = busco_lineages_path ? "--download_path ${busco_lineages_path}" : '' + def intermediate_files = [ + './*-busco/*/auto_lineage', + './*-busco/*/**/{miniprot,hmmer,.bbtools}_output', + './*-busco/*/prodigal_output/predicted_genes/tmp/', + ] + def clean_cmd = clean_intermediates ? "rm -fr ${intermediate_files.join(' ')}" : '' """ - # Nextflow changes the container --entrypoint to /bin/bash (container default entrypoint: /usr/local/env-execute) - # Check for container variable initialisation script and source it. - if [ -f "/usr/local/env-activate.sh" ]; then - set +u # Otherwise, errors out because of various unbound variables - . "/usr/local/env-activate.sh" - set -u + # Fix Augustus for Apptainer + ENV_AUGUSTUS=/opt/conda/etc/conda/activate.d/augustus.sh + set +u + if [ -z "\${AUGUSTUS_CONFIG_PATH}" ] && [ -f "\${ENV_AUGUSTUS}" ]; then + source "\${ENV_AUGUSTUS}" fi + set -u # If the augustus config directory is not writable, then copy to writeable area if [ ! -w "\${AUGUSTUS_CONFIG_PATH}" ]; then @@ -74,21 +84,25 @@ process BUSCO_BUSCO { cd .. busco \\ - --cpu $task.cpus \\ + --cpu ${task.cpus} \\ --in "\$INPUT_SEQS" \\ --out ${prefix}-busco \\ - --mode $mode \\ - $busco_lineage \\ - $busco_lineage_dir \\ - $busco_config \\ - $args + --mode ${mode} \\ + ${busco_lineage} \\ + ${busco_lineage_dir} \\ + ${busco_config} \\ + ${args} # clean up rm -rf "\$INPUT_SEQS" + ${clean_cmd} + # find and remove broken symlinks from the cleanup + find . -xtype l -delete # Move files to avoid staging/publishing issues mv ${prefix}-busco/batch_summary.txt ${prefix}-busco.batch_summary.txt mv ${prefix}-busco/*/short_summary.*.{json,txt} . || echo "Short summaries were not available: No genes were found." + mv ${prefix}-busco/logs/busco.log ${prefix}-busco.log cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -97,11 +111,11 @@ process BUSCO_BUSCO { """ stub: - def prefix = task.ext.prefix ?: "${meta.id}-${lineage}" - def fasta_name = files(fasta).first().name - '.gz' + def prefix = task.ext.prefix ?: "${meta.id}-${lineage}" + def fasta_name = files(fasta).first().name - '.gz' """ touch ${prefix}-busco.batch_summary.txt - mkdir -p ${prefix}-busco/$fasta_name/run_${lineage}/busco_sequences + mkdir -p ${prefix}-busco/${fasta_name}/run_${lineage}/busco_sequences cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/busco/busco/meta.yml b/modules/nf-core/busco/busco/meta.yml index 8f719e08..0222e490 100644 --- a/modules/nf-core/busco/busco/meta.yml +++ b/modules/nf-core/busco/busco/meta.yml @@ -40,6 +40,9 @@ input: - - config_file: type: file description: Path to BUSCO config file. + - - clean_intermediates: + type: boolean + description: Flag to remove intermediate files. output: - batch_summary: - meta: @@ -71,6 +74,16 @@ output: type: file description: Short Busco summary in JSON format pattern: "short_summary.*.json" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*-busco.log": + type: file + description: BUSCO main log + pattern: "*-busco.log" - full_table: - meta: type: map @@ -142,6 +155,26 @@ output: type: directory description: Lineages downloaded by BUSCO when running the analysis, for example bacteria_odb12 pattern: "busco_downloads/lineages/*" + - single_copy_faa: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*-busco/*/run_*/busco_sequences/single_copy_busco_sequences/*.faa": + type: file + description: Single copy .faa sequence files + pattern: "*-busco/*/run_*/busco_sequences/single_copy_busco_sequences/*.faa" + - single_copy_fna: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*-busco/*/run_*/busco_sequences/single_copy_busco_sequences/*.fna": + type: file + description: Single copy .fna sequence files + pattern: "*-busco/*/run_*/busco_sequences/single_copy_busco_sequences/*.fna" - versions: - versions.yml: type: file diff --git a/modules/nf-core/busco/busco/tests/main.nf.test b/modules/nf-core/busco/busco/tests/main.nf.test index 55954a73..411ceb86 100644 --- a/modules/nf-core/busco/busco/tests/main.nf.test +++ b/modules/nf-core/busco/busco/tests/main.nf.test @@ -14,6 +14,9 @@ nextflow_process { config './nextflow.config' when { + params { + busco_args = '--tar' + } process { """ input[0] = [ @@ -24,6 +27,7 @@ nextflow_process { input[2] = 'bacteria_odb12' // Launch with 'auto' to use --auto-lineage, and specified lineages // 'auto' removed from test due to memory issues input[3] = [] // Download busco lineage input[4] = [] // No config + input[5] = false // Clean intermediates """ } } @@ -59,7 +63,7 @@ nextflow_process { assert contains('fragmented_busco_sequences.tar.gz') } - with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + with(path(process.out.log[0][1]).text) { assert contains('DEBUG:busco.run_BUSCO') assert contains('Results from dataset') assert contains('how to cite BUSCO') @@ -75,6 +79,9 @@ nextflow_process { config './nextflow.config' when { + params { + busco_args = '--tar' + } process { """ input[0] = [ @@ -88,6 +95,7 @@ nextflow_process { input[2] = 'bacteria_odb12' input[3] = [] input[4] = [] + input[5] = false """ } } @@ -143,7 +151,7 @@ nextflow_process { assert contains('fragmented_busco_sequences.tar.gz') } - with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + with(path(process.out.log[0][1]).text) { assert contains('DEBUG:busco.run_BUSCO') assert contains('Results from dataset') assert contains('how to cite BUSCO') @@ -157,9 +165,12 @@ nextflow_process { test("test_busco_eukaryote_metaeuk") { - config './nextflow.metaeuk.config' + config './nextflow.config' when { + params { + busco_args = '--tar --metaeuk' + } process { """ input[0] = [ @@ -170,6 +181,7 @@ nextflow_process { input[2] = 'eukaryota_odb10' input[3] = [] input[4] = [] + input[5] = false """ } } @@ -205,7 +217,7 @@ nextflow_process { assert contains('fragmented_busco_sequences.tar.gz') } - with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + with(path(process.out.log[0][1]).text) { assert contains('DEBUG:busco.run_BUSCO') assert contains('Results from dataset') assert contains('how to cite BUSCO') @@ -220,9 +232,12 @@ nextflow_process { test("test_busco_eukaryote_augustus") { - config './nextflow.augustus.config' + config './nextflow.config' when { + params { + busco_args = '--tar --augustus' + } process { """ input[0] = [ @@ -233,6 +248,7 @@ nextflow_process { input[2] = 'eukaryota_odb10' input[3] = [] input[4] = [] + input[5] = false """ } } @@ -245,7 +261,7 @@ nextflow_process { process.out.versions[0] ).match() - with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + with(path(process.out.log[0][1]).text) { assert contains('DEBUG:busco.run_BUSCO') assert contains('Augustus did not recognize any genes') @@ -266,6 +282,9 @@ nextflow_process { config './nextflow.config' when { + params { + busco_args = '--tar' + } process { """ input[0] = [ @@ -276,6 +295,7 @@ nextflow_process { input[2] = 'bacteria_odb12' input[3] = [] input[4] = [] + input[5] = false """ } } @@ -311,7 +331,7 @@ nextflow_process { assert contains('fragmented_busco_sequences.tar.gz') } - with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + with(path(process.out.log[0][1]).text) { assert contains('DEBUG:busco.run_BUSCO') assert contains('Results from dataset') assert contains('how to cite BUSCO') @@ -328,6 +348,9 @@ nextflow_process { config './nextflow.config' when { + params { + busco_args = '--tar' + } process { """ input[0] = [ @@ -338,6 +361,7 @@ nextflow_process { input[2] = 'bacteria_odb12' input[3] = [] input[4] = [] + input[5] = false """ } } @@ -375,7 +399,7 @@ nextflow_process { assert contains('fragmented_busco_sequences.tar.gz') } - with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + with(path(process.out.log[0][1]).text) { assert contains('DEBUG:busco.run_BUSCO') assert contains('Results from dataset') assert contains('how to cite BUSCO') @@ -384,6 +408,53 @@ nextflow_process { } + test("test_busco_cleanup") { + + config './nextflow.config' + + when { + params { + busco_args = '--tar' + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) + ] + input[1] = 'genome' + input[2] = 'bacteria_odb12' + input[3] = [] + input[4] = [] + input[5] = true + """ + } + } + + then { + assert process.success + + assert snapshot( + process.out.batch_summary[0][1], + process.out.full_table[0][1], + process.out.missing_busco_list[0][1], + process.out.versions[0] + ).match() + + with(path(process.out.log[0][1]).text) { + assert contains('DEBUG:busco.run_BUSCO') + assert contains('Results from dataset') + assert contains('how to cite BUSCO') + } + + assert path("${process.out.busco_dir[0][1]}/*/auto_lineage").exists() == false + assert path("${process.out.busco_dir[0][1]}/*/**/{miniprot,hmmer,.bbtools}_output").exists() == false + + assert process.out.single_copy_proteins == [] + assert process.out.translated_dir == [] + } + } + test("minimal-stub") { options '-stub' @@ -399,6 +470,7 @@ nextflow_process { input[2] = 'bacteria_odb12' input[3] = [] input[4] = [] + input[5] = false """ } } @@ -413,5 +485,4 @@ nextflow_process { ) } } - } \ No newline at end of file diff --git a/modules/nf-core/busco/busco/tests/main.nf.test.snap b/modules/nf-core/busco/busco/tests/main.nf.test.snap index 0f44373f..1026524b 100644 --- a/modules/nf-core/busco/busco/tests/main.nf.test.snap +++ b/modules/nf-core/busco/busco/tests/main.nf.test.snap @@ -10,75 +10,88 @@ ] ], [ - "versions.yml:md5,c6e638f981761c13cd9ff7663cf707e6" + "versions.yml:md5,0046a4b8575cbc3635f2a9ee616fd840" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.2" + "nextflow": "24.10.3" }, - "timestamp": "2024-12-13T15:30:45.505241761" + "timestamp": "2025-03-12T10:50:57.218573431" }, "test_busco_eukaryote_augustus": { "content": [ "test-eukaryota_odb10-busco.batch_summary.txt:md5,3ea3bdc423a461dae514d816bdc61c89", - "versions.yml:md5,c6e638f981761c13cd9ff7663cf707e6" + "versions.yml:md5,0046a4b8575cbc3635f2a9ee616fd840" ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nextflow": "24.10.3" }, - "timestamp": "2024-12-11T13:07:45.550722277" + "timestamp": "2025-03-12T10:44:25.359421247" }, "test_busco_genome_single_fasta": { "content": [ "test-bacteria_odb12-busco.batch_summary.txt:md5,e3e503e1540b633d95c273c465945740", - "full_table.tsv:md5,e2a08fdd9b2596322e70c5549d1affc7", - "missing_busco_list.tsv:md5,25417462f2c484f1942c86b21bcf77d0", - "versions.yml:md5,c6e638f981761c13cd9ff7663cf707e6" + "full_table.tsv:md5,086f2ecdc90d47745c828c9b25357039", + "missing_busco_list.tsv:md5,9919aee2da9d30a3985aede354850a46", + "versions.yml:md5,0046a4b8575cbc3635f2a9ee616fd840" ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nextflow": "24.10.3" }, - "timestamp": "2024-12-11T12:43:40.359736221" + "timestamp": "2025-03-12T10:41:46.251404188" }, "test_busco_genome_multi_fasta": { "content": [ [ - "full_table.tsv:md5,5e7df014f2804789f0d98ae2e09734ad", - "full_table.tsv:md5,e2a08fdd9b2596322e70c5549d1affc7" + "full_table.tsv:md5,5a6bf59055e2040e74797a1e36c8e374", + "full_table.tsv:md5,086f2ecdc90d47745c828c9b25357039" ], [ - "missing_busco_list.tsv:md5,d902f10173b463f81e4892ef64f63c50", - "missing_busco_list.tsv:md5,25417462f2c484f1942c86b21bcf77d0" + "missing_busco_list.tsv:md5,a55eee6869fad9176d812e59886232fb", + "missing_busco_list.tsv:md5,9919aee2da9d30a3985aede354850a46" ], - "versions.yml:md5,c6e638f981761c13cd9ff7663cf707e6" + "versions.yml:md5,0046a4b8575cbc3635f2a9ee616fd840" ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nextflow": "24.10.3" }, - "timestamp": "2024-12-11T17:17:42.175675858" + "timestamp": "2025-03-12T10:42:28.126899794" }, "test_busco_eukaryote_metaeuk": { "content": [ "test-eukaryota_odb10-busco.batch_summary.txt:md5,ff6d8277e452a83ce9456bbee666feb6", - "full_table.tsv:md5,9bfa9ef7d54ca6ad8bcf8e87729720b1", - "missing_busco_list.tsv:md5,325b529e5a8af2a392d747b4eddc150c", - "versions.yml:md5,c6e638f981761c13cd9ff7663cf707e6" + "full_table.tsv:md5,cfb55ab2ce590d2def51926324691aa8", + "missing_busco_list.tsv:md5,77e3d4503b2c13db0d611723fc83ab7e", + "versions.yml:md5,0046a4b8575cbc3635f2a9ee616fd840" ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nextflow": "24.10.3" }, - "timestamp": "2024-12-11T13:07:17.335085326" + "timestamp": "2025-03-12T10:43:59.997031348" + }, + "test_busco_cleanup": { + "content": [ + "test-bacteria_odb12-busco.batch_summary.txt:md5,e3e503e1540b633d95c273c465945740", + "full_table.tsv:md5,086f2ecdc90d47745c828c9b25357039", + "missing_busco_list.tsv:md5,9919aee2da9d30a3985aede354850a46", + "versions.yml:md5,0046a4b8575cbc3635f2a9ee616fd840" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-03-12T10:50:48.928173488" }, "test_busco_transcriptome": { "content": [ "test-bacteria_odb12-busco.batch_summary.txt:md5,6cd69d8a66b5f8b7fd4a9de758e7a739", - "full_table.tsv:md5,73a3a90c2fa8fef41cafed5a607fab66", - "missing_busco_list.tsv:md5,4778855c345f4e409750c9bbd38c5a0c", + "full_table.tsv:md5,4efc19f8d2cc7ea9e73425f09cb3ed97", + "missing_busco_list.tsv:md5,55f0322d494e5c165508712be63062bf", [ "9767721at2.faa:md5,1731738ca153959391f8302fd5a3679f", "9778364at2.faa:md5,7a19a6b6696ae53efce30457b4dd1ab2", @@ -125,25 +138,25 @@ "9814755at2.faa:md5,9b4c4648d250c2e6d04acb78f9cf6df0" ], "single_copy_proteins.faa:md5,14124def13668c6d9b0d589207754b31", - "versions.yml:md5,c6e638f981761c13cd9ff7663cf707e6" + "versions.yml:md5,0046a4b8575cbc3635f2a9ee616fd840" ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nextflow": "24.10.3" }, - "timestamp": "2024-12-11T12:45:43.336777509" + "timestamp": "2025-03-12T10:45:08.029718703" }, "test_busco_protein": { "content": [ "test-bacteria_odb12-busco.batch_summary.txt:md5,44d4cdebd61a3c8e8981ddf1829f83b3", - "full_table.tsv:md5,696bae3f377fd5dbaf19f1c522088d93", - "missing_busco_list.tsv:md5,d902f10173b463f81e4892ef64f63c50", - "versions.yml:md5,c6e638f981761c13cd9ff7663cf707e6" + "full_table.tsv:md5,350f9b1b6c37cfcf41be84e93ef41931", + "missing_busco_list.tsv:md5,a55eee6869fad9176d812e59886232fb", + "versions.yml:md5,0046a4b8575cbc3635f2a9ee616fd840" ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nextflow": "24.10.3" }, - "timestamp": "2024-12-11T12:45:16.960592213" + "timestamp": "2025-03-12T10:44:44.094048564" } } \ No newline at end of file diff --git a/modules/nf-core/busco/busco/tests/nextflow.augustus.config b/modules/nf-core/busco/busco/tests/nextflow.augustus.config deleted file mode 100644 index 84daa69d..00000000 --- a/modules/nf-core/busco/busco/tests/nextflow.augustus.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: 'BUSCO_BUSCO' { - ext.args = '--tar --augustus' - } -} diff --git a/modules/nf-core/busco/busco/tests/nextflow.config b/modules/nf-core/busco/busco/tests/nextflow.config index 1ec3fec0..db73a7ee 100644 --- a/modules/nf-core/busco/busco/tests/nextflow.config +++ b/modules/nf-core/busco/busco/tests/nextflow.config @@ -1,5 +1,5 @@ process { withName: 'BUSCO_BUSCO' { - ext.args = '--tar' + ext.args = params.busco_args } } diff --git a/modules/nf-core/busco/busco/tests/nextflow.metaeuk.config b/modules/nf-core/busco/busco/tests/nextflow.metaeuk.config deleted file mode 100644 index c1418445..00000000 --- a/modules/nf-core/busco/busco/tests/nextflow.metaeuk.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: 'BUSCO_BUSCO' { - ext.args = '--tar --metaeuk' - } -} diff --git a/modules/nf-core/busco/busco/tests/old_test.yml b/modules/nf-core/busco/busco/tests/old_test.yml deleted file mode 100644 index 75177f5d..00000000 --- a/modules/nf-core/busco/busco/tests/old_test.yml +++ /dev/null @@ -1,624 +0,0 @@ -- name: busco test_busco_genome_single_fasta - command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_genome_single_fasta -c ./tests/config/nextflow.config - tags: - - busco - files: - - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.json - contains: - - "one_line_summary" - - "mode" - - "dataset" - - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.txt - contains: - - "BUSCO version" - - "The lineage dataset is" - - "BUSCO was run in mode" - - "Complete BUSCOs" - - "Missing BUSCOs" - - "Dependencies and versions" - - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt - md5sum: bc2440f8a68d7fbf931ff911c1c3fdfa - - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_err.log - - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_out.log - md5sum: 9caf1a1434414c78562eb0bbb9c0e53f - - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_err.log - - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_out.log - contains: - - "# hmmsearch :: search profile(s) against a sequence database" - - "# target sequence database:" - - "Internal pipeline statistics summary:" - - "[ok]" - - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_err.log - md5sum: 538510cfc7483498210f01e53fe035ad - - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_out.log - md5sum: 61050b0706addc9498b2088a2d6efa9a - - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/.checkpoint - contains: - - "Tool: prodigal" - - "Completed" - - "jobs" - - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.faa - md5sum: 836e9a80d33d8b89168f07ddc13ee991 - - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.fna - md5sum: 20eeb75f86842e6e136f02bca8b73a9f - - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa - md5sum: 836e9a80d33d8b89168f07ddc13ee991 - - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.fna - md5sum: 20eeb75f86842e6e136f02bca8b73a9f - - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_err.log - md5sum: 538510cfc7483498210f01e53fe035ad - - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_out.log - md5sum: 61050b0706addc9498b2088a2d6efa9a - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/.bbtools_output/.checkpoint - contains: - - "Tool: bbtools" - - "Completed" - - "jobs" - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/full_table.tsv - md5sum: c56edab1dc1522e993c25ae2b730799f - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/hmmer_output.tar.gz - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/missing_busco_list.tsv - md5sum: b533ef30270f27160acce85a22d01bf5 - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.json - contains: - - "one_line_summary" - - "mode" - - "lineage_dataset" - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.txt - contains: - - "# BUSCO version is:" - - "Results:" - - "busco:" - - path: output/busco/test-bacteria_odb10-busco/logs/busco.log - contains: - - "DEBUG:busco.run_BUSCO" - - "Results from dataset" - - "how to cite BUSCO" - - path: output/busco/versions.yml - -- name: busco test_busco_genome_multi_fasta - command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_genome_multi_fasta -c ./tests/config/nextflow.config - tags: - - busco - files: - - path: output/busco/short_summary.specific.bacteria_odb10.genome.fasta.json - contains: - - "one_line_summary" - - "mode" - - "dataset" - - path: output/busco/short_summary.specific.bacteria_odb10.genome.fasta.txt - contains: - - "BUSCO version" - - "The lineage dataset is" - - "BUSCO was run in mode" - - "Complete BUSCOs" - - "Missing BUSCOs" - - "Dependencies and versions" - - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.json - contains: - - "one_line_summary" - - "mode" - - "dataset" - - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.txt - contains: - - "BUSCO version" - - "The lineage dataset is" - - "BUSCO was run in mode" - - "Complete BUSCOs" - - "Missing BUSCOs" - - "Dependencies and versions" - - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt - md5sum: 8c64c1a28b086ef2ee444f99cbed5f7d - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/bbtools_err.log - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/bbtools_out.log - md5sum: 8f047bdb33264d22a83920bc2c63f29a - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/hmmsearch_err.log - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/hmmsearch_out.log - contains: - - "# hmmsearch :: search profile(s) against a sequence database" - - "# target sequence database:" - - "Internal pipeline statistics summary:" - - "[ok]" - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/prodigal_err.log - md5sum: c1fdc6977332f53dfe7f632733bb4585 - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/prodigal_out.log - md5sum: 50752acb1c5a20be886bfdfc06635bcb - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/.checkpoint - contains: - - "Tool: prodigal" - - "Completed" - - "jobs" - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/predicted.faa - md5sum: 8166471fc5f08c82fd5643ab42327f9d - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/predicted.fna - md5sum: ddc508a18f60e7f3314534df50cdf8ca - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa - md5sum: 8166471fc5f08c82fd5643ab42327f9d - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.fna - md5sum: ddc508a18f60e7f3314534df50cdf8ca - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_err.log - md5sum: c1fdc6977332f53dfe7f632733bb4585 - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_out.log - md5sum: 50752acb1c5a20be886bfdfc06635bcb - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4.faa - md5sum: e56fd59c38248dc21ac94355dca98121 - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4.fna - md5sum: b365f84bf99c68357952e0b98ed7ce42 - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4_err.log - md5sum: e5f14d7925ba14a0f9850542f3739894 - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4_out.log - md5sum: d41971bfc1b621d4ffd2633bc47017ea - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/.bbtools_output/.checkpoint - contains: - - "Tool: bbtools" - - "Completed" - - "jobs" - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/full_table.tsv - md5sum: c9651b88b10871abc260ee655898e828 - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/hmmer_output.tar.gz - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/missing_busco_list.tsv - md5sum: 9939309df2da5419de88c32d1435c779 - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/short_summary.json - contains: - - "one_line_summary" - - "mode" - - "dataset" - - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/short_summary.txt - contains: - - "# BUSCO version is:" - - "Results:" - - "busco:" - - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_err.log - - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_out.log - md5sum: 9caf1a1434414c78562eb0bbb9c0e53f - - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_err.log - - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_out.log - contains: - - "# hmmsearch :: search profile(s) against a sequence database" - - "# target sequence database:" - - "Internal pipeline statistics summary:" - - "[ok]" - - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_err.log - md5sum: 538510cfc7483498210f01e53fe035ad - - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_out.log - md5sum: 61050b0706addc9498b2088a2d6efa9a - - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/.checkpoint - contains: - - "Tool: prodigal" - - "Completed" - - "jobs" - - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.faa - md5sum: 836e9a80d33d8b89168f07ddc13ee991 - - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.fna - md5sum: 20eeb75f86842e6e136f02bca8b73a9f - - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa - md5sum: 836e9a80d33d8b89168f07ddc13ee991 - - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.fna - md5sum: 20eeb75f86842e6e136f02bca8b73a9f - - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_err.log - md5sum: 538510cfc7483498210f01e53fe035ad - - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_out.log - md5sum: 61050b0706addc9498b2088a2d6efa9a - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/.bbtools_output/.checkpoint - contains: - - "Tool: bbtools" - - "Completed" - - "jobs" - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/full_table.tsv - md5sum: c56edab1dc1522e993c25ae2b730799f - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/hmmer_output.tar.gz - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/missing_busco_list.tsv - md5sum: b533ef30270f27160acce85a22d01bf5 - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.json - contains: - - "one_line_summary" - - "mode" - - "dataset" - - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.txt - contains: - - "# BUSCO version is:" - - "Results:" - - "busco:" - - path: output/busco/test-bacteria_odb10-busco/logs/busco.log - contains: - - "DEBUG:busco.run_BUSCO" - - "Results from dataset" - - "how to cite BUSCO" - - path: output/busco/versions.yml - -- name: busco test_busco_eukaryote_metaeuk - command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_eukaryote_metaeuk -c ./tests/config/nextflow.config - tags: - - busco - files: - - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.json - contains: - - "one_line_summary" - - "mode" - - "dataset" - - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.txt - contains: - - "BUSCO version" - - "The lineage dataset is" - - "BUSCO was run in mode" - - "Complete BUSCOs" - - "Missing BUSCOs" - - "Dependencies and versions" - - path: output/busco/test-eukaryota_odb10-busco.batch_summary.txt - md5sum: ff6d8277e452a83ce9456bbee666feb6 - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_err.log - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_out.log - md5sum: e63debaa653f18f7405d936050abc093 - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_err.log - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_out.log - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_err.log - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_out.log - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_err.log - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_out.log - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/.bbtools_output/.checkpoint - contains: - - "Tool: bbtools" - - "Completed" - - "jobs" - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/fragmented_busco_sequences.tar.gz - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/single_copy_busco_sequences.tar.gz - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/full_table.tsv - md5sum: bd880e90b9e5620a58943a3e0f9ff16b - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/hmmer_output.tar.gz - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/.checkpoint - contains: - - "Tool: metaeuk" - - "Completed" - - "jobs" - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/combined_pred_proteins.fas - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.codon.fas - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.fas - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.gff - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.headersMap.tsv - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/refseq_db_rerun.faa - md5sum: d80b8fa4cb5ed0d47d63d6aa93635bc2 - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.codon.fas - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.fas - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.gff - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.headersMap.tsv - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/missing_busco_list.tsv - md5sum: 1e8e79c540fd2e69ba0d2659d9eb2988 - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.json - contains: - - "one_line_summary" - - "mode" - - "dataset" - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.txt - contains: - - "# BUSCO version is:" - - "Results:" - - "busco:" - - path: output/busco/test-eukaryota_odb10-busco/logs/busco.log - contains: - - "DEBUG:busco.run_BUSCO" - - "Results from dataset" - - "how to cite BUSCO" - - path: output/busco/versions.yml - -- name: busco test_busco_eukaryote_augustus - command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_eukaryote_augustus -c ./tests/config/nextflow.config - tags: - - busco - files: - - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.json - contains: - - "one_line_summary" - - "mode" - - "dataset" - - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.txt - contains: - - "BUSCO version" - - "The lineage dataset is" - - "BUSCO was run in mode" - - "Complete BUSCOs" - - "Missing BUSCOs" - - "Dependencies and versions" - - path: output/busco/test-eukaryota_odb10-busco.batch_summary.txt - md5sum: ff6d8277e452a83ce9456bbee666feb6 - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_err.log - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_out.log - md5sum: e63debaa653f18f7405d936050abc093 - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_err.log - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_out.log - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_err.log - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_out.log - contains: - - "metaeuk" - - "easy-predict" - - "Compute score and coverage" - - "Time for processing:" - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_err.log - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_out.log - contains: - - "metaeuk" - - "easy-predict" - - "Compute score and coverage" - - "Time for processing:" - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/.bbtools_output/.checkpoint - contains: - - "Tool: bbtools" - - "Completed" - - "jobs" - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/fragmented_busco_sequences.tar.gz - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/single_copy_busco_sequences.tar.gz - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/full_table.tsv - md5sum: bd880e90b9e5620a58943a3e0f9ff16b - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/hmmer_output.tar.gz - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/.checkpoint - contains: - - "Tool: metaeuk" - - "Completed" - - "jobs" - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/combined_pred_proteins.fas - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.codon.fas - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.fas - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.gff - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.headersMap.tsv - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/refseq_db_rerun.faa - md5sum: d80b8fa4cb5ed0d47d63d6aa93635bc2 - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.codon.fas - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.fas - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.gff - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.headersMap.tsv - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/missing_busco_list.tsv - md5sum: 1e8e79c540fd2e69ba0d2659d9eb2988 - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.json - contains: - - "one_line_summary" - - "mode" - - "dataset" - - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.txt - contains: - - "# BUSCO version is:" - - "Results:" - - "busco:" - - path: output/busco/test-eukaryota_odb10-busco/logs/busco.log - contains: - - "DEBUG:busco.run_BUSCO" - - "Results from dataset" - - "how to cite BUSCO" - - path: output/busco/versions.yml - -- name: busco test_busco_protein - command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_protein -c ./tests/config/nextflow.config - tags: - - busco - files: - - path: output/busco/short_summary.specific.bacteria_odb10.proteome.fasta.json - contains: - - "one_line_summary" - - "mode" - - "dataset" - - path: output/busco/short_summary.specific.bacteria_odb10.proteome.fasta.txt - contains: - - "BUSCO version" - - "The lineage dataset is" - - "BUSCO was run in mode" - - "Complete BUSCOs" - - "Missing BUSCOs" - - "Dependencies and versions" - - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt - md5sum: 7a65e6cbb6c56a2ea4e739ae0aa3297d - - path: output/busco/test-bacteria_odb10-busco/logs/busco.log - contains: - - "DEBUG:busco.run_BUSCO" - - "Results from dataset" - - "how to cite BUSCO" - - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/logs/hmmsearch_err.log - - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/logs/hmmsearch_out.log - contains: - - "# hmmsearch :: search profile(s) against a sequence database" - - "# target sequence database:" - - "Internal pipeline statistics summary:" - - "[ok]" - - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz - - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz - - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz - - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/full_table.tsv - md5sum: 0e34f1011cd83ea1d5d5103ec62b8922 - - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/hmmer_output.tar.gz - - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/missing_busco_list.tsv - md5sum: 9939309df2da5419de88c32d1435c779 - - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/short_summary.json - contains: - - "one_line_summary" - - "mode" - - "dataset" - - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/short_summary.txt - contains: - - "# BUSCO version is:" - - "Results:" - - "busco:" - - path: output/busco/versions.yml - -- name: busco test_busco_transcriptome - command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_transcriptome -c ./tests/config/nextflow.config - tags: - - busco - files: - - path: output/busco/short_summary.specific.bacteria_odb10.test1.contigs.fa.json - contains: - - "one_line_summary" - - "mode" - - "dataset" - - path: output/busco/short_summary.specific.bacteria_odb10.test1.contigs.fa.txt - contains: - - "BUSCO version" - - "The lineage dataset is" - - "BUSCO was run in mode" - - "Complete BUSCOs" - - "Missing BUSCOs" - - "Dependencies and versions" - - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt - md5sum: 46118ecf60d1b87d22b96d80f4f03632 - - path: output/busco/test-bacteria_odb10-busco/logs/busco.log - contains: - - "DEBUG:busco.run_BUSCO" - - "Results from dataset" - - "how to cite BUSCO" - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/.checkpoint - contains: - - "Tool: makeblastdb" - - "Completed" - - "jobs" - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.ndb - md5sum: 3788c017fe5e6f0f58224e9cdd21822b - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nhr - md5sum: 8ecd2ce392bb5e25ddbe1d85f879582e - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nin - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.njs - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.not - md5sum: 0c340e376c7e85d19f82ec1a833e6a6e - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nsq - md5sum: 532d5c0a7ea00fe95ca3c97cb3be6198 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.ntf - md5sum: de1250813f0c7affc6d12dac9d0fb6bb - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nto - md5sum: ff74bd41f9cc9b011c63a32c4f7693bf - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/hmmsearch_err.log - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/hmmsearch_out.log - contains: - - "# hmmsearch :: search profile(s) against a sequence database" - - "# target sequence database:" - - "Internal pipeline statistics summary:" - - "[ok]" - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/makeblastdb_err.log - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/makeblastdb_out.log - contains: - - "Building a new DB" - - "Adding sequences from FASTA" - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/tblastn_err.log - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/tblastn_out.log - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/.checkpoint - contains: - - "Tool: tblastn" - - "Completed" - - "jobs" - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/coordinates.tsv - md5sum: cc30eed321944af293452bdbcfc24292 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_101.temp - md5sum: 73e9c65fc83fedc58f57f09b08f08238 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_119.temp - md5sum: 7fa4cc7955ec0cc36330a221c579b975 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_129.temp - md5sum: 6f1601c875d019e3f6f1f98ed8e988d4 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_138.temp - md5sum: 3f8e034686cd240c2330650d791bcae2 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_143.temp - md5sum: df3dfa8e9ba30ed70cf75b5e7abf2179 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_172.temp - md5sum: 7d463e0e6cf7169bc9077d8dc776dda1 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_178.temp - md5sum: 2288edf7fa4f88f51b4cf4d94086f77e - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_188.temp - md5sum: 029906abbad6d87fc57830dd548cac24 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_195.temp - md5sum: 4937f3b348774a31b1160a00297c29cc - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_210.temp - md5sum: afcb20ba4c466479d6b91c8c62251e1f - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_232.temp - md5sum: 2e1e823ce017345bd998191a39fa9924 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_268.temp - md5sum: 08c2d82c34ecffbe1c638b410349412e - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_29.temp - md5sum: cd9b63cf93524284781535c888313764 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_44.temp - md5sum: d1929b742b24ebe379bf4801ca882dca - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_58.temp - md5sum: 69215765b010c05336538cb322c900b3 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_72.temp - md5sum: 6feaa1cc3b0899a147ea9d466878f3e3 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_80.temp - md5sum: 13625eae14e860a96ce17cd4e37e9d01 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_81.temp - md5sum: e14b2484649b0dbc8926815c207b806d - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_93.temp - md5sum: 6902c93691df00e690faea914c71839e - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_97.temp - md5sum: 0a0d9d38a83acbd5ad43c29cdf429988 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/tblastn.tsv - contains: - - "TBLASTN" - - "BLAST processed" - - "queries" - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/full_table.tsv - md5sum: 24df25199e13c88bd892fc3e7b541ca0 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/hmmer_output.tar.gz - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/missing_busco_list.tsv - md5sum: e7232e2b8cca4fdfdd9e363b39ebbc81 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/short_summary.json - contains: - - "one_line_summary" - - "mode" - - "dataset" - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/short_summary.txt - contains: - - "# BUSCO version is:" - - "Results:" - - "busco:" - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/single_copy_proteins.faa - md5sum: e04b9465733577ae6e4bccb7aa01e720 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1024388at2.faa - md5sum: 7333c39a20258f20c7019ea0cd83157c - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1054741at2.faa - md5sum: ebb481e77a824685fbe04d8a2f3a0d7d - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1093223at2.faa - md5sum: 34621c7d499034e8f8e6b92fd4020a93 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1151822at2.faa - md5sum: aa89ca381c1c70c9c4e1380351ca7c2a - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/143460at2.faa - md5sum: f2e91d78b8dd3722840378789f29e8c8 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1491686at2.faa - md5sum: 73c25aef5c9cba7f4151804941b146ea - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1504821at2.faa - md5sum: cda556018d1f84ebe517e89f6fc107d0 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1574817at2.faa - md5sum: a9096c9fb8b25c78a72871ab0463acdc - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1592033at2.faa - md5sum: e463d25ce186c0cebfd749474f3a4c64 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1623045at2.faa - md5sum: f2cfd241590c6d8377286d6135480937 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1661836at2.faa - md5sum: 586569546fb9861502468e3d9ba2775c - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1674344at2.faa - md5sum: 24c658bee14ad84b062d81ad96642eb8 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1698718at2.faa - md5sum: 0b8e26ddf5149bbd8805be7af125208d - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1990650at2.faa - md5sum: 159320712ee01fb2ccb31a25df44eead - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/223233at2.faa - md5sum: 812629c0b06ac3d18661c2ca78de0c08 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/402899at2.faa - md5sum: f7ff4e1591342d30b77392a2e84b57d9 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/505485at2.faa - md5sum: 7b34a24fc49c540d46fcf96ff5129564 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/665824at2.faa - md5sum: 4cff2df64f6bcaff8bc19c234c8bcccd - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/776861at2.faa - md5sum: 613af7a3fea30ea2bece66f603b9284a - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/874197at2.faa - md5sum: a7cd1b13c9ef91c7ef4e31614166f197 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/932854at2.faa - md5sum: fe313ffd5efdb0fed887a04fba352552 - - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/95696at2.faa - md5sum: 4e1f30a2fea4dfbf9bb7fae2700622a0 - - path: output/busco/versions.yml diff --git a/modules/nf-core/busco/busco/tests/tags.yml b/modules/nf-core/busco/busco/tests/tags.yml deleted file mode 100644 index 7c4d2835..00000000 --- a/modules/nf-core/busco/busco/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -busco/busco: - - "modules/nf-core/busco/busco/**" diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml index 691d4c76..f9f54ee9 100644 --- a/modules/nf-core/fastqc/environment.yml +++ b/modules/nf-core/fastqc/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 26d47863..23e16634 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -2,13 +2,10 @@ process FASTQC { tag "${meta.id}" label 'process_medium' - conda "${moduleDir}/environment.yml" conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" - 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : - 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" input: tuple val(meta), path(reads) @@ -32,7 +29,7 @@ process FASTQC { // The total amount of allocated RAM by FastQC is equal to the number of threads defined (--threads) time the amount of RAM defined (--memory) // https://github.com/s-andrews/FastQC/blob/1faeea0412093224d7f6a07f777fad60a5650795/fastqc#L211-L222 // Dividing the task.memory by task.cpu allows to stick to requested amount of RAM in the label - def memory_in_mb = task.memory ? task.memory.toUnit('MB').toFloat() / task.cpus : null + def memory_in_mb = task.memory ? task.memory.toUnit('MB') / task.cpus : null // FastQC memory value allowed range (100 - 10000) def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) @@ -50,7 +47,6 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) - fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml index 2b2e62b8..c8d9d025 100644 --- a/modules/nf-core/fastqc/meta.yml +++ b/modules/nf-core/fastqc/meta.yml @@ -29,9 +29,10 @@ input: description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. + ontologies: [] output: - - html: - - meta: + html: + - - meta: type: map description: | Groovy Map containing sample information @@ -40,8 +41,9 @@ output: type: file description: FastQC report pattern: "*_{fastqc.html}" - - zip: - - meta: + ontologies: [] + zip: + - - meta: type: map description: | Groovy Map containing sample information @@ -50,11 +52,14 @@ output: type: file description: FastQC report archive pattern: "*_{fastqc.zip}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@drpatelh" - "@grst" diff --git a/modules/nf-core/flye/environment.yml b/modules/nf-core/flye/environment.yml index 87b97eb8..cbad0b1c 100644 --- a/modules/nf-core/flye/environment.yml +++ b/modules/nf-core/flye/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/flye/tests/tags.yml b/modules/nf-core/flye/tests/tags.yml deleted file mode 100644 index 31103d13..00000000 --- a/modules/nf-core/flye/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -flye: - - modules/nf-core/flye/** diff --git a/modules/nf-core/hifiasm/environment.yml b/modules/nf-core/hifiasm/environment.yml index 6aea679a..04d7b0d4 100644 --- a/modules/nf-core/hifiasm/environment.yml +++ b/modules/nf-core/hifiasm/environment.yml @@ -1,6 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda dependencies: - - bioconda::hifiasm=0.24.0 + - bioconda::hifiasm=0.25.0 diff --git a/modules/nf-core/hifiasm/main.nf b/modules/nf-core/hifiasm/main.nf index 9b3e4ad6..7330920e 100644 --- a/modules/nf-core/hifiasm/main.nf +++ b/modules/nf-core/hifiasm/main.nf @@ -4,118 +4,107 @@ process HIFIASM { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hifiasm:0.24.0--h5ca1c30_0' : - 'biocontainers/hifiasm:0.24.0--h5ca1c30_0' }" + 'https://depot.galaxyproject.org/singularity/hifiasm:0.25.0--h5ca1c30_0' : + 'biocontainers/hifiasm:0.25.0--h5ca1c30_0' }" input: tuple val(meta) , path(long_reads) , path(ul_reads) tuple val(meta1), path(paternal_kmer_dump), path(maternal_kmer_dump) tuple val(meta2), path(hic_read1) , path(hic_read2) + tuple val(meta3), path(bin_files) output: - tuple val(meta), path("*.r_utg.gfa") , emit: raw_unitigs - tuple val(meta), path("*.ec.bin") , emit: corrected_reads - tuple val(meta), path("*.ovlp.source.bin") , emit: source_overlaps - tuple val(meta), path("*.ovlp.reverse.bin"), emit: reverse_overlaps - tuple val(meta), path("*.bp.p_ctg.gfa") , emit: processed_contigs, optional: true - tuple val(meta), path("*.p_utg.gfa") , emit: processed_unitigs, optional: true - tuple val(meta), path("*.asm.p_ctg.gfa") , emit: primary_contigs , optional: true - tuple val(meta), path("*.asm.a_ctg.gfa") , emit: alternate_contigs, optional: true - tuple val(meta), path("*.hap1.p_ctg.gfa") , emit: paternal_contigs , optional: true - tuple val(meta), path("*.hap2.p_ctg.gfa") , emit: maternal_contigs , optional: true - tuple val(meta), path("*.log") , emit: log - path "versions.yml" , emit: versions + tuple val(meta), path("*.r_utg.gfa") , emit: raw_unitigs + tuple val(meta), path("*.bin") , emit: bin_files , optional: true + tuple val(meta), path("*.p_utg.gfa") , emit: processed_unitigs, optional: true + tuple val(meta), path("${prefix}.{p_ctg,bp.p_ctg,hic.p_ctg}.gfa"), emit: primary_contigs , optional: true + tuple val(meta), path("${prefix}.{a_ctg,hic.a_ctg}.gfa") , emit: alternate_contigs, optional: true + tuple val(meta), path("${prefix}.*.hap1.p_ctg.gfa") , emit: hap1_contigs , optional: true + tuple val(meta), path("${prefix}.*.hap2.p_ctg.gfa") , emit: hap2_contigs , optional: true + tuple val(meta), path("*.ec.fa.gz") , emit: corrected_reads , optional: true + tuple val(meta), path("*.ovlp.paf.gz") , emit: read_overlaps , optional: true + tuple val(meta), path("${prefix}.stderr.log") , emit: log + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def ultralong = ul_reads ? "--ul ${ul_reads}" : "" + prefix = task.ext.prefix ?: "${meta.id}" - if ((paternal_kmer_dump) && (maternal_kmer_dump) && (hic_read1) && (hic_read2)) { - error "Hifiasm Trio-binning and Hi-C integrated should not be used at the same time" - } else if ((paternal_kmer_dump) && !(maternal_kmer_dump)) { - error "Hifiasm Trio-binning requires maternal data" - } else if (!(paternal_kmer_dump) && (maternal_kmer_dump)) { - error "Hifiasm Trio-binning requires paternal data" - } else if ((paternal_kmer_dump) && (maternal_kmer_dump)) { - """ - hifiasm \\ - $args \\ - -o ${prefix}.asm \\ - -t $task.cpus \\ - -1 $paternal_kmer_dump \\ - -2 $maternal_kmer_dump \\ - $ultralong \\ - $long_reads \\ - 2> >( tee ${prefix}.stderr.log >&2 ) + def long_reads_sorted = long_reads instanceof List ? long_reads.sort{ it.name } : long_reads + def ul_reads_sorted = ul_reads instanceof List ? ul_reads.sort{ it.name } : ul_reads + def ultralong = ul_reads ? "--ul ${ul_reads_sorted}" : "" + if([paternal_kmer_dump, maternal_kmer_dump].any() && [hic_read1, hic_read2].any()) { + log.error("ERROR: hifiasm trio binning mode and Hi-C phasing can not be used at the same time.") + } - cat <<-END_VERSIONS > versions.yml - "${task.process}": - hifiasm: \$(hifiasm --version 2>&1) - END_VERSIONS - """ - } else if ((hic_read1) && !(hic_read2)) { - error "Hifiasm Hi-C integrated requires paired-end data (only R1 specified here)" - } else if (!(hic_read1) && (hic_read2)) { - error "Hifiasm Hi-C integrated requires paired-end data (only R2 specified here)" - } else if ((hic_read1) && (hic_read2)) { - """ - hifiasm \\ - $args \\ - -o ${prefix}.asm \\ - -t $task.cpus \\ - --h1 $hic_read1 \\ - --h2 $hic_read2 \\ - $ultralong \\ - $long_reads \\ - 2> >( tee ${prefix}.stderr.log >&2 ) + def input_trio = "" + if([paternal_kmer_dump, maternal_kmer_dump].any()) { + if(![paternal_kmer_dump, maternal_kmer_dump].every()) { + log.error("ERROR: Either the maternal or paternal kmer dump is missing!") + } else { + input_trio = "-1 ${paternal_kmer_dump} -2 ${maternal_kmer_dump}" + } + } + def input_hic = "" + if([hic_read1, hic_read2].any()) { + if(![hic_read1, hic_read2].every()) { + log.error("ERROR: Either the forward or reverse Hi-C reads are missing!") + } else { + input_hic = "--h1 ${hic_read1} --h2 ${hic_read2}" + } + } + """ + hifiasm \\ + $args \\ + -t ${task.cpus} \\ + ${input_trio} \\ + ${input_hic} \\ + ${ultralong} \\ + -o ${prefix} \\ + ${long_reads_sorted} \\ + 2> >( tee ${prefix}.stderr.log >&2 ) - cat <<-END_VERSIONS > versions.yml - "${task.process}": - hifiasm: \$(hifiasm --version 2>&1) - END_VERSIONS - """ - } else { // Phasing with Hi-C data is not supported yet - """ - hifiasm \\ - $args \\ - -o ${prefix}.asm \\ - -t $task.cpus \\ - $ultralong \\ - $long_reads \\ - 2> >( tee ${prefix}.stderr.log >&2 ) + if [ -f ${prefix}.ec.fa ]; then + gzip ${prefix}.ec.fa + fi - cat <<-END_VERSIONS > versions.yml - "${task.process}": - hifiasm: \$(hifiasm --version 2>&1) - END_VERSIONS - """ - } - stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.asm.r_utg.gfa - touch ${prefix}.asm.ec.bin - touch ${prefix}.asm.ovlp.source.bin - touch ${prefix}.asm.ovlp.reverse.bin - touch ${prefix}.asm.bp.p_ctg.gfa - touch ${prefix}.asm.p_utg.gfa - touch ${prefix}.asm.p_ctg.gfa - touch ${prefix}.asm.a_ctg.gfa - touch ${prefix}.asm.hap1.p_ctg.gfa - touch ${prefix}.asm.hap2.p_ctg.gfa - touch ${prefix}.stderr.log + if [ -f ${prefix}.ovlp.paf ]; then + gzip ${prefix}.ovlp.paf + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hifiasm: \$(hifiasm --version 2>&1) + END_VERSIONS + """ - cat <<-END_VERSIONS > versions.yml - "${task.process}": - hifiasm: \$(hifiasm --version 2>&1) - END_VERSIONS - """ + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.r_utg.gfa + touch ${prefix}.ec.bin + touch ${prefix}.ovlp.source.bin + touch ${prefix}.ovlp.reverse.bin + touch ${prefix}.hic.tlb.bin + touch ${prefix}.hic.lk.bin + touch ${prefix}.bp.p_ctg.gfa + touch ${prefix}.p_utg.gfa + touch ${prefix}.p_ctg.gfa + touch ${prefix}.a_ctg.gfa + touch ${prefix}.bp.hap1.p_ctg.gfa + touch ${prefix}.bp.hap2.p_ctg.gfa + echo "" | gzip > ${prefix}.ec.fa.gz + echo "" | gzip > ${prefix}.ovlp.paf.gz + touch ${prefix}.stderr.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hifiasm: \$(hifiasm --version 2>&1) + END_VERSIONS + """ } diff --git a/modules/nf-core/hifiasm/meta.yml b/modules/nf-core/hifiasm/meta.yml index b255571f..fcd211db 100644 --- a/modules/nf-core/hifiasm/meta.yml +++ b/modules/nf-core/hifiasm/meta.yml @@ -25,9 +25,11 @@ input: - long_reads: type: file description: Long reads PacBio HiFi reads or ONT reads (requires ext.arg '--ont'). + ontologies: [] - ul_reads: type: file description: ONT long reads to use with --ul. + ontologies: [] - - meta1: type: map description: | @@ -36,10 +38,12 @@ input: type: file description: Yak kmer dump file for paternal reads (can be used for haplotype resolution). It can have an arbitrary extension. + ontologies: [] - maternal_kmer_dump: type: file description: Yak kmer dump file for maternal reads (can be used for haplotype resolution). It can have an arbitrary extension. + ontologies: [] - - meta2: type: map description: | @@ -47,9 +51,19 @@ input: - hic_read1: type: file description: Hi-C data Forward reads. + ontologies: [] - hic_read2: type: file description: Hi-C data Reverse reads. + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing information about the input bin files + - bin_files: + type: file + description: bin files produced during a previous Hifiasm run + ontologies: [] output: - raw_unitigs: - meta: @@ -61,116 +75,133 @@ output: type: file description: Raw unitigs pattern: "*.r_utg.gfa" - - corrected_reads: + - bin_files: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.ec.bin": + - "*.bin": type: file - description: Corrected reads - pattern: "*.ec.bin" - - source_overlaps: - - meta: - type: map description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - "*.ovlp.source.bin": - type: file - description: Source overlaps - pattern: "*.ovlp.source.bin" - - reverse_overlaps: + Binary files containing processed data for hifiasm, including + error-corrected reads, read overlaps, and Hi-C alignments. Can + be re-used as an input for subsequent re-runs of hifiasm with new + inputs or modified parameters in order to save recomputation of + initial results, which are the most computationally-expensive + steps. + pattern: "*.bin" + - processed_unitigs: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.ovlp.reverse.bin": + - "*.p_utg.gfa": type: file - description: Reverse overlaps - pattern: "*.ovlp.reverse.bin" - - processed_contigs: + description: Processed unitigs + pattern: "*.p_utg.gfa" + - primary_contigs: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.bp.p_ctg.gfa": + - ${prefix}.{p_ctg,bp.p_ctg,hic.p_ctg}.gfa: type: file - description: Processed contigs - pattern: "*.bp.p_ctg.gfa" - - processed_unitigs: + description: Contigs representing the primary assembly + pattern: "${prefix}.{p_ctg,bp.p_ctg,hic.p_ctg}.gfa" + - alternate_contigs: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.p_utg.gfa": + - ${prefix}.{a_ctg,hic.a_ctg}.gfa: type: file - description: Processed unitigs - pattern: "*.p_utg.gfa" - - primary_contigs: + description: Contigs representing the alternative assembly + pattern: "${prefix}.{a_ctg,hic.a_ctg}.gfa" + - hap1_contigs: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.asm.p_ctg.gfa": + pattern: "${prefix}.*.hap1.p_ctg.gfa" + - ${prefix}.*.hap1.p_ctg.gfa: type: file - description: Primary contigs - pattern: "*.asm.p_ctg.gfa" - - alternate_contigs: + description: | + Contigs for the first haplotype. How the haplotypes are represented + depends on the input mode; in standard HiFi-only mode, these + are partially-phased parental contigs. In Hi-C mode, they + are fully phased parental contigs, but the phasing is not maintained + between contigs. In trio mode, they are fully phased paternal contigs + all originating from a single parental haplotype. + pattern: "${prefix}.*.hap1.p_ctg.gfa" + - hap2_contigs: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.asm.a_ctg.gfa": + pattern: "${prefix}.*.hap2.p_ctg.gfa" + - ${prefix}.*.hap2.p_ctg.gfa: type: file - description: Alternative contigs - pattern: "*.asm.a_ctg.gfa" - - paternal_contigs: + description: | + Contigs for the second haplotype. How the haplotypes are represented + depends on the input mode; in standard HiFi-only mode, these + are partially-phased parental contigs. In Hi-C mode, they + are fully phased parental contigs, but the phasing is not maintained + between contigs. In trio mode, they are fully phased paternal contigs + all originating from a single parental haplotype. + pattern: "${prefix}.*.hap2.p_ctg.gfa" + - corrected_reads: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.hap1.p_ctg.gfa": + - "*.ec.fa.gz": type: file - description: Paternal contigs - pattern: "*.hap1.p_ctg.gfa" - - maternal_contigs: + description: | + If option --write-ec specified, a gzipped fasta file containing the error corrected + reads produced by the hifiasm error correction module + pattern: "*.ec.fa.gz" + - read_overlaps: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.hap2.p_ctg.gfa": + - "*.ovlp.paf.gz": type: file - description: Maternal contigs - pattern: "*.hap2.p_ctg.gfa" + description: | + If option --write-paf specified, a gzipped paf file describing the overlaps + among all error-corrected reads + pattern: "*.ovlp.paf.gz" - log: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.log": + pattern: "*.stderr.log" + - ${prefix}.stderr.log: type: file description: Stderr log - pattern: "*.log" + pattern: "*.stderr.log" - versions: - versions.yml: type: file description: File containing software versions pattern: "versions.yml" + ontologies: [] authors: - "@sidorov-si" - "@scorreard" - "@mbeavitt" - "@schmytzi" + - "@prototaxites" maintainers: - "@sidorov-si" - "@scorreard" diff --git a/modules/nf-core/hifiasm/tests/main.nf.test b/modules/nf-core/hifiasm/tests/main.nf.test index 12c69bd9..53edf404 100644 --- a/modules/nf-core/hifiasm/tests/main.nf.test +++ b/modules/nf-core/hifiasm/tests/main.nf.test @@ -9,8 +9,12 @@ nextflow_process { tag "modules_nfcore" tag "hifiasm" - test("homo_sapiens pacbio hifi [fastq, [,], [,] ]") { + test("homo_sapiens pacbio hifi [fastq, [,], [,], [,]]") { when { + params { + extra_output = "--write-ec --write-paf" + } + process { """ input[0] = [ @@ -28,30 +32,177 @@ nextflow_process { [], [] ] + input[3] = [ + [], + [] + ] """ } } then { + def bin_files = process.out.bin_files.get(0).get(1).collect { bin -> file(bin).name } + def expected_bin_files = [ + "test.ec.bin", + "test.ovlp.reverse.bin", + "test.ovlp.source.bin" + ] + assertAll( { assert process.success }, - { assert file(process.out.corrected_reads.get(0).get(1)).exists() }, - { assert file(process.out.source_overlaps.get(0).get(1)).exists() }, - { assert file(process.out.reverse_overlaps.get(0).get(1)).exists() }, + { assertContainsInAnyOrder(bin_files, expected_bin_files) }, { assert file(process.out.log.get(0).get(1)).exists() }, { assert snapshot( process.out.raw_unitigs, - process.out.processed_contigs, process.out.processed_unitigs, - process.out.paternal_contigs, - process.out.maternal_contigs, + process.out.primary_contigs, + process.out.hap1_contigs, + process.out.hap2_contigs, + process.out.fasta, + process.out.paf, process.out.versions ).match() } ) } } - test("homo_sapiens pacbio hifi [fastq, [yak, yak], [,] ]") { + test("homo_sapiens pacbio hifi [fastq, [,], [,], [bin] ]") { + + setup { + run("HIFIASM", alias: "HIFIASM_INITIAL") { + script "../main.nf" + process { + """ + input[0] = [ + [ id : 'test'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true), + [] + ] + input[1] = [ + [], + [], + [] + ] + input[2] = [ + [], + [], + [] + ] + input[3] = [ + [], + [] + ] + """ + } + } + } + when { + params { + extra_output = "" + } + + process { + """ + input[0] = [ + [ id : 'test'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true), + [] + ] + input[1] = [ + [], + [], + [] + ] + input[2] = [ + [], + [], + [] + ] + input[3] = HIFIASM_INITIAL.out.bin_files + """ + } + } + then { + assertAll( + { assert process.success }, + { assert file(process.out.log.get(0).get(1)).exists() }, + { assert file(process.out.log.get(0).get(1)).readLines().first().contains("Reads has been loaded.") }, + { assert snapshot( + process.out.raw_unitigs, + process.out.processed_unitigs, + process.out.primary_contigs, + process.out.hap1_contigs, + process.out.hap2_contigs, + process.out.versions + ).match() } + ) + } + } + + test("homo_sapiens pacbio hifi [fastq x2, [,], [,], [,] ]") { + + when { + params { + extra_output = "" + } + + process { + """ + input[0] = Channel.of([ + [ id : 'test'], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/alz.ccs.fastq', checkIfExists: true), + ], + [] + ]) + input[1] = [ + [], + [], + [] + ] + input[2] = [ + [], + [], + [] + ] + input[3] = [ + [], + [] + ] + """ + } + } + then { + def bin_files = process.out.bin_files.get(0).get(1).collect { bin -> file(bin).name } + def expected_bin_files = [ + "test.ec.bin", + "test.ovlp.reverse.bin", + "test.ovlp.source.bin" + ] + + assertAll( + { assert process.success }, + { assertContainsInAnyOrder(bin_files, expected_bin_files) }, + { assert file(process.out.log.get(0).get(1)).exists() }, + { assert snapshot( + process.out.raw_unitigs, + process.out.processed_unitigs, + process.out.primary_contigs, + process.out.hap1_contigs, + process.out.hap2_contigs, + process.out.versions + ).match() } + ) + } + } + + test("homo_sapiens pacbio hifi [fastq, [yak, yak], [,], [,] ]") { + when { + params { + extra_output = "" + } + process { """ input[0] = [ @@ -69,29 +220,47 @@ nextflow_process { [], [] ] + input[3] = [ + [], + [] + ] """ } } then { + def bin_files = process.out.bin_files.get(0).get(1).collect { bin -> file(bin).name } + def expected_bin_files = [ + "test.ec.bin", + "test.ovlp.reverse.bin", + "test.ovlp.source.bin", + "test.hap1.phase.bin", + "test.hap2.phase.bin" + ] + assertAll( { assert process.success }, - { assert file(process.out.corrected_reads.get(0).get(1)).exists() }, - { assert file(process.out.source_overlaps.get(0).get(1)).exists() }, - { assert file(process.out.reverse_overlaps.get(0).get(1)).exists() }, + { assertContainsInAnyOrder(bin_files, expected_bin_files) }, + { assert file(process.out.hap1_contigs.get(0).get(1)).exists() }, + { assert file(process.out.hap1_contigs.get(0).get(1)).name == "test.dip.hap1.p_ctg.gfa" }, + { assert file(process.out.hap2_contigs.get(0).get(1)).exists() }, + { assert file(process.out.hap2_contigs.get(0).get(1)).name == "test.dip.hap2.p_ctg.gfa" }, { assert file(process.out.log.get(0).get(1)).exists() }, - { assert file(process.out.maternal_contigs.get(0).get(1)).length() == 0}, { assert snapshot( process.out.raw_unitigs, process.out.processed_unitigs, - process.out.paternal_contigs, + process.out.hap1_contigs, process.out.versions ).match() } ) } } - test("homo_sapiens pacbio hifi [fastq, [,], [fastq, fastq] ]") { + test("homo_sapiens pacbio hifi [fastq, [,], [fastq, fastq], [,] ]") { when { + params { + extra_output = "" + } + process { """ input[0] = [ @@ -109,29 +278,49 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + input[3] = [ + [], + [] + ] """ } } then { + def bin_files = process.out.bin_files.get(0).get(1).collect { bin -> file(bin).name } + def expected_bin_files = [ + "test.ec.bin", + "test.ovlp.reverse.bin", + "test.ovlp.source.bin", + "test.hic.lk.bin", + "test.hic.tlb.bin" + ] + assertAll( { assert process.success }, - { assert file(process.out.corrected_reads.get(0).get(1)).exists() }, - { assert file(process.out.source_overlaps.get(0).get(1)).exists() }, - { assert file(process.out.reverse_overlaps.get(0).get(1)).exists() }, + { assertContainsInAnyOrder(bin_files, expected_bin_files) }, + { assert file(process.out.hap1_contigs.get(0).get(1)).exists() }, + { assert file(process.out.hap1_contigs.get(0).get(1)).name == "test.hic.hap1.p_ctg.gfa" }, + { assert file(process.out.hap2_contigs.get(0).get(1)).exists() }, + { assert file(process.out.hap2_contigs.get(0).get(1)).name == "test.hic.hap2.p_ctg.gfa" }, { assert file(process.out.log.get(0).get(1)).exists() }, - { assert file(process.out.maternal_contigs.get(0).get(1)).length() == 0}, { assert snapshot( process.out.raw_unitigs, process.out.processed_unitigs, - process.out.paternal_contigs, + process.out.primary_contigs, + process.out.alternate_contigs, + process.out.hap1_contigs, process.out.versions ).match() } ) } } - test("homo_sapiens pacbio hifi [fastq, [yak, yak], [fastq, fastq] ]") { + test("homo_sapiens pacbio hifi [fastq, [yak, yak], [fastq, fastq], [,] ]") { when { + params { + extra_output = "" + } + process { """ input[0] = [ [ id : 'test'], @@ -148,7 +337,12 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz, checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz, checkIfExists: true) ] + input[3] = [ + [], + [] + ] """ + } } then { assertAll( @@ -157,8 +351,13 @@ nextflow_process { } } - test("homo_sapiens pacbio hifi [fastq, [yak, ], [,] ]") { + test("homo_sapiens pacbio hifi [fastq, [yak, ], [,], [,] ]") { when { + params { + extra_output = "" + } + + process { """ input[0] = [ [ id : 'test'], @@ -171,7 +370,12 @@ nextflow_process { [] ] input[2] = [] + input[3] = [ + [], + [] + ] """ + } } then { assertAll( @@ -180,8 +384,13 @@ nextflow_process { } } - test("homo_sapiens pacbio hifi [fastq, [,], [, fastq] ]") { + test("homo_sapiens pacbio hifi [fastq, [,], [, fastq], [,] ]") { when { + params { + extra_output = "" + } + + process { """ input[0] = [ [ id : 'test'], @@ -194,7 +403,12 @@ nextflow_process { [], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + input[3] = [ + [], + [] + ] """ + } } then { assertAll( @@ -203,9 +417,13 @@ nextflow_process { } } - test("homo_sapiens pacbio hifi [fastq, [,], [,] ] - stub") { + test("homo_sapiens pacbio hifi [fastq, [,], [,], [,] ] - stub") { options "-stub" when { + params { + extra_output = "" + } + process { """ input[0] = [ @@ -223,6 +441,10 @@ nextflow_process { [], [] ] + input[3] = [ + [], + [] + ] """ } } diff --git a/modules/nf-core/hifiasm/tests/main.nf.test.snap b/modules/nf-core/hifiasm/tests/main.nf.test.snap index e9a0de59..cf8a7eba 100644 --- a/modules/nf-core/hifiasm/tests/main.nf.test.snap +++ b/modules/nf-core/hifiasm/tests/main.nf.test.snap @@ -1,12 +1,12 @@ { - "homo_sapiens pacbio hifi [fastq, [,], [,] ]": { + "homo_sapiens pacbio hifi [fastq, [,], [fastq, fastq], [,] ]": { "content": [ [ [ { "id": "test" }, - "test.asm.bp.r_utg.gfa:md5,6a200e4bcb5d8d8fa6ed75874d26edc6" + "test.hic.r_utg.gfa:md5,6a200e4bcb5d8d8fa6ed75874d26edc6" ] ], [ @@ -14,7 +14,7 @@ { "id": "test" }, - "test.asm.bp.p_ctg.gfa:md5,c3225425a73a8d6d46536dab72380650" + "test.hic.p_utg.gfa:md5,6a200e4bcb5d8d8fa6ed75874d26edc6" ] ], [ @@ -22,15 +22,38 @@ { "id": "test" }, - "test.asm.bp.p_utg.gfa:md5,6a200e4bcb5d8d8fa6ed75874d26edc6" + "test.hic.p_ctg.gfa:md5,6d0ec1ec65b1d2b295ac94b6fef25a27" ] ], + [ + + ], + [ + [ + { + "id": "test" + }, + "test.hic.hap1.p_ctg.gfa:md5,f67a8fdfa756961360732c79d189054d" + ] + ], + [ + "versions.yml:md5,b7828129854c24ce80a025f90bf3f557" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-15T14:24:48.67437687" + }, + "homo_sapiens pacbio hifi [fastq, [,], [,], [bin] ]": { + "content": [ [ [ { "id": "test" }, - "test.asm.bp.hap1.p_ctg.gfa:md5,78a86b6d5741a680e22a427ca9faa74a" + "test.bp.r_utg.gfa:md5,6a200e4bcb5d8d8fa6ed75874d26edc6" ] ], [ @@ -38,27 +61,59 @@ { "id": "test" }, - "test.asm.bp.hap2.p_ctg.gfa:md5,ac2116fd2f22c67d4c304cbf9b9f7793" + "test.bp.p_utg.gfa:md5,6a200e4bcb5d8d8fa6ed75874d26edc6" ] ], [ - "versions.yml:md5,a2ea36e18c39850b1680302d9f6c950f" + [ + { + "id": "test" + }, + "test.bp.p_ctg.gfa:md5,c3225425a73a8d6d46536dab72380650" + ] + ], + [ + [ + { + "id": "test" + }, + "test.bp.hap1.p_ctg.gfa:md5,78a86b6d5741a680e22a427ca9faa74a" + ] + ], + [ + [ + { + "id": "test" + }, + "test.bp.hap2.p_ctg.gfa:md5,ac2116fd2f22c67d4c304cbf9b9f7793" + ] + ], + [ + "versions.yml:md5,b7828129854c24ce80a025f90bf3f557" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nextflow": "24.10.5" }, - "timestamp": "2024-11-28T10:51:30.175326435" + "timestamp": "2025-04-15T14:24:20.599937477" }, - "homo_sapiens pacbio hifi [fastq, [,], [fastq, fastq] ]": { + "homo_sapiens pacbio hifi [fastq x2, [,], [,], [,] ]": { "content": [ [ [ { "id": "test" }, - "test.asm.hic.r_utg.gfa:md5,6a200e4bcb5d8d8fa6ed75874d26edc6" + "test.bp.r_utg.gfa:md5,e6e38ac76f6b73142c3fe403f71d27b0" + ] + ], + [ + [ + { + "id": "test" + }, + "test.bp.p_utg.gfa:md5,e6e38ac76f6b73142c3fe403f71d27b0" ] ], [ @@ -66,7 +121,7 @@ { "id": "test" }, - "test.asm.hic.p_utg.gfa:md5,6a200e4bcb5d8d8fa6ed75874d26edc6" + "test.bp.p_ctg.gfa:md5,43890a1832d8f26de263e57dc5e3b8de" ] ], [ @@ -74,20 +129,28 @@ { "id": "test" }, - "test.asm.hic.hap1.p_ctg.gfa:md5,f67a8fdfa756961360732c79d189054d" + "test.bp.hap1.p_ctg.gfa:md5,7d7ea2bed472de263f6ec3521959b0d9" ] ], [ - "versions.yml:md5,a2ea36e18c39850b1680302d9f6c950f" + [ + { + "id": "test" + }, + "test.bp.hap2.p_ctg.gfa:md5,ce096a66c9bba039c6a22ba9e9409d01" + ] + ], + [ + "versions.yml:md5,b7828129854c24ce80a025f90bf3f557" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nextflow": "24.10.5" }, - "timestamp": "2024-11-28T10:52:15.111066189" + "timestamp": "2025-04-15T14:24:28.744387853" }, - "homo_sapiens pacbio hifi [fastq, [,], [,] ] - stub": { + "homo_sapiens pacbio hifi [fastq, [,], [,], [,] ] - stub": { "content": [ { "0": [ @@ -95,7 +158,7 @@ { "id": "test" }, - "test.asm.r_utg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.r_utg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ @@ -103,26 +166,24 @@ { "id": "test" }, - "test.asm.ec.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + [ + "test.ec.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.hic.lk.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.hic.tlb.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.ovlp.reverse.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.ovlp.source.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] ] ], "10": [ - [ - { - "id": "test" - }, - "test.stderr.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "11": [ - "versions.yml:md5,a2ea36e18c39850b1680302d9f6c950f" + "versions.yml:md5,b7828129854c24ce80a025f90bf3f557" ], "2": [ [ { "id": "test" }, - "test.asm.ovlp.source.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.p_utg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "3": [ @@ -130,7 +191,10 @@ { "id": "test" }, - "test.asm.ovlp.reverse.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + [ + "test.bp.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] ] ], "4": [ @@ -138,7 +202,7 @@ { "id": "test" }, - "test.asm.bp.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.a_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "5": [ @@ -146,7 +210,7 @@ { "id": "test" }, - "test.asm.p_utg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.bp.hap1.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "6": [ @@ -154,7 +218,7 @@ { "id": "test" }, - "test.asm.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.bp.hap2.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "7": [ @@ -162,7 +226,7 @@ { "id": "test" }, - "test.asm.a_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.ec.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "8": [ @@ -170,7 +234,7 @@ { "id": "test" }, - "test.asm.hap1.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.ovlp.paf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "9": [ @@ -178,7 +242,7 @@ { "id": "test" }, - "test.asm.hap2.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.stderr.log:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "alternate_contigs": [ @@ -186,55 +250,64 @@ { "id": "test" }, - "test.asm.a_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.a_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "corrected_reads": [ + "bin_files": [ [ { "id": "test" }, - "test.asm.ec.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + [ + "test.ec.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.hic.lk.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.hic.tlb.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.ovlp.reverse.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.ovlp.source.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] ] ], - "log": [ + "corrected_reads": [ [ { "id": "test" }, - "test.stderr.log:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.ec.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "maternal_contigs": [ + "hap1_contigs": [ [ { "id": "test" }, - "test.asm.hap2.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.bp.hap1.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "paternal_contigs": [ + "hap2_contigs": [ [ { "id": "test" }, - "test.asm.hap1.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.bp.hap2.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "primary_contigs": [ + "log": [ [ { "id": "test" }, - "test.asm.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.stderr.log:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "processed_contigs": [ + "primary_contigs": [ [ { "id": "test" }, - "test.asm.bp.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + [ + "test.bp.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.p_ctg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] ] ], "processed_unitigs": [ @@ -242,7 +315,7 @@ { "id": "test" }, - "test.asm.p_utg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.p_utg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "raw_unitigs": [ @@ -250,44 +323,90 @@ { "id": "test" }, - "test.asm.r_utg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "reverse_overlaps": [ - [ - { - "id": "test" - }, - "test.asm.ovlp.reverse.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.r_utg.gfa:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "source_overlaps": [ + "read_overlaps": [ [ { "id": "test" }, - "test.asm.ovlp.source.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.ovlp.paf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "versions": [ - "versions.yml:md5,a2ea36e18c39850b1680302d9f6c950f" + "versions.yml:md5,b7828129854c24ce80a025f90bf3f557" ] } ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-17T15:56:54.410648332" + }, + "homo_sapiens pacbio hifi [fastq, [,], [,], [,]]": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.bp.r_utg.gfa:md5,6a200e4bcb5d8d8fa6ed75874d26edc6" + ] + ], + [ + [ + { + "id": "test" + }, + "test.bp.p_utg.gfa:md5,6a200e4bcb5d8d8fa6ed75874d26edc6" + ] + ], + [ + [ + { + "id": "test" + }, + "test.bp.p_ctg.gfa:md5,c3225425a73a8d6d46536dab72380650" + ] + ], + [ + [ + { + "id": "test" + }, + "test.bp.hap1.p_ctg.gfa:md5,78a86b6d5741a680e22a427ca9faa74a" + ] + ], + [ + [ + { + "id": "test" + }, + "test.bp.hap2.p_ctg.gfa:md5,ac2116fd2f22c67d4c304cbf9b9f7793" + ] + ], + null, + null, + [ + "versions.yml:md5,b7828129854c24ce80a025f90bf3f557" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-11-28T10:53:06.003871811" + "timestamp": "2025-04-15T14:24:12.033205578" }, - "homo_sapiens pacbio hifi [fastq, [yak, yak], [,] ]": { + "homo_sapiens pacbio hifi [fastq, [yak, yak], [,], [,] ]": { "content": [ [ [ { "id": "test" }, - "test.asm.dip.r_utg.gfa:md5,68361ac3e8babd51f3891d1637ca0fdc" + "test.dip.r_utg.gfa:md5,68361ac3e8babd51f3891d1637ca0fdc" ] ], [ @@ -295,7 +414,7 @@ { "id": "test" }, - "test.asm.dip.p_utg.gfa:md5,68361ac3e8babd51f3891d1637ca0fdc" + "test.dip.p_utg.gfa:md5,68361ac3e8babd51f3891d1637ca0fdc" ] ], [ @@ -303,17 +422,17 @@ { "id": "test" }, - "test.asm.dip.hap1.p_ctg.gfa:md5,eed5da5f3dd415dbb711edb61a09802f" + "test.dip.hap1.p_ctg.gfa:md5,eed5da5f3dd415dbb711edb61a09802f" ] ], [ - "versions.yml:md5,a2ea36e18c39850b1680302d9f6c950f" + "versions.yml:md5,b7828129854c24ce80a025f90bf3f557" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nextflow": "24.10.5" }, - "timestamp": "2024-11-28T10:51:45.301359171" + "timestamp": "2025-04-15T14:24:37.330378652" } -} +} \ No newline at end of file diff --git a/modules/nf-core/hifiasm/tests/nextflow.config b/modules/nf-core/hifiasm/tests/nextflow.config index 8dc2ae48..ead4b539 100644 --- a/modules/nf-core/hifiasm/tests/nextflow.config +++ b/modules/nf-core/hifiasm/tests/nextflow.config @@ -1,3 +1,3 @@ process { - ext.args = "-f0" + ext.args = "-f0 ${params.extra_output}" } diff --git a/modules/nf-core/liftoff/environment.yml b/modules/nf-core/liftoff/environment.yml index 94c10a3d..bdac6d51 100644 --- a/modules/nf-core/liftoff/environment.yml +++ b/modules/nf-core/liftoff/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/lima/environment.yml b/modules/nf-core/lima/environment.yml index 7c137245..2e56e30d 100644 --- a/modules/nf-core/lima/environment.yml +++ b/modules/nf-core/lima/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/lima/tests/tags.yml b/modules/nf-core/lima/tests/tags.yml deleted file mode 100644 index bf24addd..00000000 --- a/modules/nf-core/lima/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -lima: - - modules/nf-core/lima/** diff --git a/modules/local/ragtag/environment.yml b/modules/nf-core/links/environment.yml similarity index 84% rename from modules/local/ragtag/environment.yml rename to modules/nf-core/links/environment.yml index 756805f7..9b3fd0b6 100644 --- a/modules/local/ragtag/environment.yml +++ b/modules/nf-core/links/environment.yml @@ -1,7 +1,6 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda dependencies: - - "bioconda::ragtag=2.1.0" + - "bioconda::links=2.0.1" diff --git a/modules/nf-core/links/main.nf b/modules/nf-core/links/main.nf new file mode 100644 index 00000000..c55bc661 --- /dev/null +++ b/modules/nf-core/links/main.nf @@ -0,0 +1,85 @@ +process LINKS { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/links:2.0.1--h4ac6f70_5': + 'biocontainers/links:2.0.1--h4ac6f70_5' }" + + input: + tuple val(meta), path(assembly) + tuple val(meta2), path(reads) + + output: + tuple val(meta), path("*.log"), emit: log + tuple val(meta), path("*.pairing_distribution.csv"), emit: pairing_distribution, optional: true + tuple val(meta), path("*.pairing_issues"), emit: pairing_issues + tuple val(meta), path("*.scaffolds"), emit: scaffolds_csv + tuple val(meta), path("*.scaffolds.fa"), emit: scaffolds_fasta + tuple val(meta), path("*.bloom"), emit: bloom + tuple val(meta), path("*.gv"), emit: scaffolds_graph + tuple val(meta), path("*.assembly_correspondence.tsv"), emit: assembly_correspondence + tuple val(meta), path("*.simplepair_checkpoint.tsv"), emit: simplepair_checkpoint, optional: true + tuple val(meta), path("*.tigpair_checkpoint.tsv"), emit: tigpair_checkpoint + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + // Currently LINKS does not support more than 4 threads + def nthreads = "${task.cpus}" < 4 ? "${task.cpus}" : 4 + def args = task.ext.args ?: "" + """ + if [[ ${assembly} == *.gz ]]; + then + gzip -dc ${assembly} > assembly.fa + else + ln -s ${assembly} assembly.fa + fi + + for read_file in ${reads}; + do + if [[ \$read_file == *.gz ]]; + then + gzip -dc \$read_file > \$(basename \$read_file .gz) + echo \$(basename \$read_file .gz) >> readfile.fof + else + echo \$read_file >> readfile.fof + fi + done + + LINKS -f assembly.fa \\ + -s readfile.fof \\ + -j $nthreads \\ + -b ${prefix} \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + LINKS: \$(echo \$(LINKS | grep -o 'LINKS v.*' | sed 's/LINKS v//')) + END_VERSIONS + """ + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.log + touch ${prefix}.pairing_distribution.csv + touch ${prefix}.pairing_issues + touch ${prefix}.scaffolds + touch ${prefix}.scaffolds.fa + touch ${prefix}.bloom + touch ${prefix}.gv + touch ${prefix}.assembly_correspondence.tsv + touch ${prefix}.simplepair_checkpoint.tsv + touch ${prefix}.tigpair_checkpoint.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + LINKS: \$(echo \$(LINKS | grep -o 'LINKS v.*' | sed 's/LINKS v//')) + END_VERSIONS + """ + } diff --git a/modules/nf-core/links/meta.yml b/modules/nf-core/links/meta.yml new file mode 100644 index 00000000..852cf2bb --- /dev/null +++ b/modules/nf-core/links/meta.yml @@ -0,0 +1,175 @@ +--- +name: "links" +description: | + LINKS is a genomics application for scaffolding genome assemblies with long reads, + such as those produced by Oxford Nanopore Technologies Ltd. + It can be used to scaffold high-quality draft genome assemblies with any long sequences + (eg. ONT reads, PacBio reads, other draft genomes, etc). + It is also used to scaffold contig pairs linked by ARCS/ARKS. + This module is for LINKS >=2.0.0 and does not support MPET input. +keywords: + - scaffold + - long-reads + - genomics +tools: + - "links": + description: "Long Interval Nucleotide K-mer Scaffolder" + homepage: "https://www.bcgsc.ca/resources/software/links" + documentation: "https://github.com/bcgsc/LINKS" + tool_dev_url: "https://github.com/bcgsc/LINKS" + doi: "10.1186/s13742-015-0076-3" + licence: ["GPL v3"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - assembly: + type: file + description: (Multi-)fasta file containing the draft assembly + pattern: "*.{fa,fasta,fa.gz,fasta.gz}" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - reads: + type: file + description: fastq file(s) containing the long reads to be used for scaffolding + pattern: "*.{fq,fastq,fq.gz,fastq.gz}" + +output: + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1']` + - "*.log": + type: file + description: text file; Logs execution time / errors / pairing stats. + pattern: "*.log" + - pairing_distribution: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1']` + - "*.pairing_distribution.csv": + type: file + description: | + comma-separated file; 1st column is the calculated distance + for each pair (template) with reads that assembled logically + within the same contig. 2nd column is the number of pairs at + that distance. + pattern: "*.pairing_distribution.csv" + - pairing_issues: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1']` + - "*.pairing_issues": + type: file + description: | + text file; Lists all pairing issues encountered between contig + pairs and illogical/out-of-bounds pairing. + pattern: "*.pairing_issues" + - scaffolds_csv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1']` + - "*.scaffolds": + type: file + description: comma-separated file; containing the new scaffold(s) + pattern: "*.scaffolds" + - scaffolds_fasta: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1']` + - "*.scaffolds.fa": + type: file + description: fasta file of the new scaffold sequence + pattern: "*.scaffolds.fa" + - bloom: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1']` + - "*.bloom": + type: file + description: | + Bloom filter created by shredding the -f input + into k-mers of size -k + pattern: "*.bloom" + - scaffolds_graph: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1']` + - "*.gv": + type: file + description: | + scaffold graph (for visualizing merges), can be rendered + in neato, graphviz, etc + pattern: "*.gv" + - assembly_correspondence: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1']` + - "*.assembly_correspondence.tsv": + type: file + description: | + correspondence file lists the scaffold ID, + contig ID, original_name, #linking kmer pairs, + links ratio, gap or overlap + pattern: "*.assembly_correspondence.tsv" + - simplepair_checkpoint: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1']` + - "*.simplepair_checkpoint.tsv": + type: file + description: checkpoint file, contains info to rebuild datastructure for .gv graph + pattern: "*.simplepair_checkpoint.tsv" + - tigpair_checkpoint: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1']` + - "*.tigpair_checkpoint.tsv": + type: file + description: | + if -b BASNAME.tigpair_checkpoint.tsv is present, + LINKS will skip the kmer pair extraction and contig pairing stages. + Delete this file to force LINKS to start at the beginning. + This file can be used to: + 1) quickly test parameters (-l min. links / -a min. links ratio), + 2) quickly recover from crash, + 3) explore very large kmer spaces, + 4) scaffold with output of ARCS + pattern: "*.tigpair_checkpoint.tsv" + - versions: + - "versions.yml": + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@nschan" +maintainers: + - "@nschan" diff --git a/modules/nf-core/links/tests/main.nf.test b/modules/nf-core/links/tests/main.nf.test new file mode 100644 index 00000000..bbffb1dd --- /dev/null +++ b/modules/nf-core/links/tests/main.nf.test @@ -0,0 +1,123 @@ +nextflow_process { + name "Test Process LINKS" + script "../main.nf" + process "LINKS" + + tag "modules" + tag "modules_nfcore" + tag "links" + + test("LINKS - sarscov2 test data - contigs") { + config './nextflow.config' + + when { + params { + module_args = "-d 1000,2000,3000,4000,5000,6000,7000,8000,9000,10000,12000,14000,16000,18000,20000" + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true), + ] + input[1] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log[0][1]).name, + process.out.pairing_issues, + process.out.scaffolds_csv, + process.out.scaffolds_fasta, + process.out.bloom, + file(process.out.scaffolds_graph[0][1]).name, + process.out.assembly_correspondence, + process.out.tigpair_checkpoint, + process.out.versions + ).match() + } + ) + } + + } + + test("LINKS - sarscov2 test data - scaffolds") { + config './nextflow.config' + + when { + params { + module_args = "-d 1000,2000,3000,4000,5000,6000,7000,8000,9000,10000,12000,14000,16000,18000,20000" + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/scaffolds.fasta', checkIfExists: true), + ] + input[1] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log[0][1]).name, + process.out.pairing_issues, + process.out.scaffolds_csv, + process.out.scaffolds_fasta, + process.out.bloom, + file(process.out.scaffolds_graph[0][1]).name, + process.out.assembly_correspondence, + process.out.tigpair_checkpoint, + process.out.versions + ).match() + } + ) + } + + } + test("LINKS - stub") { + + options "-stub" + config './nextflow.config' + + when { + params { + module_args = "-d 1000,2000,3000,4000,5000,6000,7000,8000,9000,10000,12000,14000,16000,18000,20000" + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + ] + input[1] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/links/tests/main.nf.test.snap b/modules/nf-core/links/tests/main.nf.test.snap new file mode 100644 index 00000000..8c19d398 --- /dev/null +++ b/modules/nf-core/links/tests/main.nf.test.snap @@ -0,0 +1,303 @@ +{ + "LINKS - sarscov2 test data - scaffolds": { + "content": [ + "test.log", + [ + [ + { + "id": "test" + }, + "test.pairing_issues:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + [ + { + "id": "test" + }, + "test.scaffolds:md5,095cc323b3af3a7873c8b80cf3736a1f" + ] + ], + [ + [ + { + "id": "test" + }, + "test.scaffolds.fa:md5,b8c7938abbc3d2f9b5c3d709d43b4a60" + ] + ], + [ + [ + { + "id": "test" + }, + "test.bloom:md5,23737e49d9a2f070b312da844201b494" + ] + ], + "test.gv", + [ + [ + { + "id": "test" + }, + "test.assembly_correspondence.tsv:md5,a65d30663dce705d382df52ab87ca8a4" + ] + ], + [ + [ + { + "id": "test" + }, + "test.tigpair_checkpoint.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + "versions.yml:md5,f58863e433b849b1ef0dfc19cb57656b" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-04-25T14:13:53.050775593" + }, + "LINKS - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.pairing_distribution.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + "versions.yml:md5,f58863e433b849b1ef0dfc19cb57656b" + ], + "2": [ + [ + { + "id": "test" + }, + "test.pairing_issues:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test.scaffolds:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test" + }, + "test.scaffolds.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test" + }, + "test.bloom:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test" + }, + "test.gv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test" + }, + "test.assembly_correspondence.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test" + }, + "test.simplepair_checkpoint.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "test" + }, + "test.tigpair_checkpoint.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "assembly_correspondence": [ + [ + { + "id": "test" + }, + "test.assembly_correspondence.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bloom": [ + [ + { + "id": "test" + }, + "test.bloom:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pairing_distribution": [ + [ + { + "id": "test" + }, + "test.pairing_distribution.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pairing_issues": [ + [ + { + "id": "test" + }, + "test.pairing_issues:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "scaffolds_csv": [ + [ + { + "id": "test" + }, + "test.scaffolds:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "scaffolds_fasta": [ + [ + { + "id": "test" + }, + "test.scaffolds.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "scaffolds_graph": [ + [ + { + "id": "test" + }, + "test.gv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "simplepair_checkpoint": [ + [ + { + "id": "test" + }, + "test.simplepair_checkpoint.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tigpair_checkpoint": [ + [ + { + "id": "test" + }, + "test.tigpair_checkpoint.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,f58863e433b849b1ef0dfc19cb57656b" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-04-11T11:49:53.947870525" + }, + "LINKS - sarscov2 test data - contigs": { + "content": [ + "test.log", + [ + [ + { + "id": "test" + }, + "test.pairing_issues:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + [ + { + "id": "test" + }, + "test.scaffolds:md5,41c129edd1e66140fcfb7efce81197ad" + ] + ], + [ + [ + { + "id": "test" + }, + "test.scaffolds.fa:md5,8abc4f609d0ad415f900b0046b38a72b" + ] + ], + [ + [ + { + "id": "test" + }, + "test.bloom:md5,23737e49d9a2f070b312da844201b494" + ] + ], + "test.gv", + [ + [ + { + "id": "test" + }, + "test.assembly_correspondence.tsv:md5,b36e951b0a1bb4b1c1ccd50925392e3d" + ] + ], + [ + [ + { + "id": "test" + }, + "test.tigpair_checkpoint.tsv:md5,168f2075f524a86216118c7230ad65e9" + ] + ], + [ + "versions.yml:md5,f58863e433b849b1ef0dfc19cb57656b" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-04-25T14:07:49.212617595" + } +} \ No newline at end of file diff --git a/modules/nf-core/links/tests/nextflow.config b/modules/nf-core/links/tests/nextflow.config new file mode 100644 index 00000000..1279535c --- /dev/null +++ b/modules/nf-core/links/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'LINKS' { + ext.args = params.module_args + } +} diff --git a/modules/nf-core/merqury/merqury/environment.yml b/modules/nf-core/merqury/merqury/environment.yml index a62b4b92..84dc78d9 100644 --- a/modules/nf-core/merqury/merqury/environment.yml +++ b/modules/nf-core/merqury/merqury/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/merqury/merqury/merqury-merqury.diff b/modules/nf-core/merqury/merqury/merqury-merqury.diff index 60b70cc5..a0a65d32 100644 --- a/modules/nf-core/merqury/merqury/merqury-merqury.diff +++ b/modules/nf-core/merqury/merqury/merqury-merqury.diff @@ -35,7 +35,6 @@ Changes in 'merqury/merqury/main.nf': when: -'modules/nf-core/merqury/merqury/tests/tags.yml' is unchanged 'modules/nf-core/merqury/merqury/tests/main.nf.test' is unchanged 'modules/nf-core/merqury/merqury/tests/main.nf.test.snap' is unchanged ************************************************************ diff --git a/modules/nf-core/merqury/merqury/tests/tags.yml b/modules/nf-core/merqury/merqury/tests/tags.yml deleted file mode 100644 index af157f18..00000000 --- a/modules/nf-core/merqury/merqury/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -merqury/merqury: - - "modules/nf-core/merqury/merqury/**" diff --git a/modules/nf-core/meryl/count/environment.yml b/modules/nf-core/meryl/count/environment.yml index e37d7901..deebca1f 100644 --- a/modules/nf-core/meryl/count/environment.yml +++ b/modules/nf-core/meryl/count/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/meryl/count/tests/tags.yml b/modules/nf-core/meryl/count/tests/tags.yml deleted file mode 100644 index b25bfa60..00000000 --- a/modules/nf-core/meryl/count/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -meryl/count: - - "modules/nf-core/meryl/count/**" diff --git a/modules/nf-core/meryl/unionsum/environment.yml b/modules/nf-core/meryl/unionsum/environment.yml index e37d7901..deebca1f 100644 --- a/modules/nf-core/meryl/unionsum/environment.yml +++ b/modules/nf-core/meryl/unionsum/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/meryl/unionsum/tests/tags.yml b/modules/nf-core/meryl/unionsum/tests/tags.yml deleted file mode 100644 index 707dd781..00000000 --- a/modules/nf-core/meryl/unionsum/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -meryl/unionsum: - - "modules/nf-core/meryl/unionsum/**" diff --git a/modules/nf-core/minimap2/align/environment.yml b/modules/nf-core/minimap2/align/environment.yml index 60677e65..17886061 100644 --- a/modules/nf-core/minimap2/align/environment.yml +++ b/modules/nf-core/minimap2/align/environment.yml @@ -5,6 +5,5 @@ channels: - bioconda dependencies: - - bioconda::htslib=1.20 - - bioconda::minimap2=2.28 - - bioconda::samtools=1.20 + - bioconda::minimap2=2.29 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf index 5a6d3319..50e3ecf9 100644 --- a/modules/nf-core/minimap2/align/main.nf +++ b/modules/nf-core/minimap2/align/main.nf @@ -5,8 +5,8 @@ process MINIMAP2_ALIGN { // Note: the versions here need to match the versions used in the mulled container below and minimap2/index conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0' : - 'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/66/66dc96eff11ab80dfd5c044e9b3425f52d818847b9c074794cf0c02bfa781661/data' : + 'community.wave.seqera.io/library/minimap2_samtools:33bb43c18d22e29c' }" input: tuple val(meta), path(reads), path(reference) diff --git a/modules/nf-core/minimap2/align/minimap2-align.diff b/modules/nf-core/minimap2/align/minimap2-align.diff index 2275aff7..78677563 100644 --- a/modules/nf-core/minimap2/align/minimap2-align.diff +++ b/modules/nf-core/minimap2/align/minimap2-align.diff @@ -5,7 +5,7 @@ Changes in 'minimap2/align/main.nf': --- modules/nf-core/minimap2/align/main.nf +++ modules/nf-core/minimap2/align/main.nf @@ -9,8 +9,7 @@ - 'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0' }" + 'community.wave.seqera.io/library/minimap2_samtools:33bb43c18d22e29c' }" input: - tuple val(meta), path(reads) @@ -15,7 +15,6 @@ Changes in 'minimap2/align/main.nf': val bam_index_extension val cigar_paf_format -'modules/nf-core/minimap2/align/tests/tags.yml' is unchanged 'modules/nf-core/minimap2/align/tests/main.nf.test' is unchanged 'modules/nf-core/minimap2/align/tests/main.nf.test.snap' is unchanged ************************************************************ diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test.snap b/modules/nf-core/minimap2/align/tests/main.nf.test.snap index 12264a85..89f20336 100644 --- a/modules/nf-core/minimap2/align/tests/main.nf.test.snap +++ b/modules/nf-core/minimap2/align/tests/main.nf.test.snap @@ -4,20 +4,20 @@ [ "@HD\tVN:1.6\tSO:coordinate", "@SQ\tSN:MT192765.1\tLN:29829", - "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta -", - "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index" + "@PG\tID:minimap2\tPN:minimap2\tVN:2.29-r1283\tCL:minimap2 -t 2 -a genome.fasta -", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index" ], "5d426b9a5f5b2c54f1d7f1e4c238ae94", "test.bam.bai", [ - "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + "versions.yml:md5,660fcf8ff66d4dce2045ffa0e325eed8" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-07-25T09:03:00.827260362" + "timestamp": "2025-04-22T14:48:23.829797899" }, "sarscov2 - bam, fasta, true, 'bai', false, false - stub": { "content": [ @@ -44,7 +44,7 @@ ] ], "3": [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" ], "bam": [ [ @@ -68,15 +68,15 @@ ], "versions": [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-07-23T11:21:37.92353539" + "timestamp": "2025-04-22T14:48:54.665655242" }, "sarscov2 - fastq, fasta, true, 'bai', false, false - stub": { "content": [ @@ -103,7 +103,7 @@ ] ], "3": [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" ], "bam": [ [ @@ -127,15 +127,15 @@ ], "versions": [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-06-03T11:29:44.669021368" + "timestamp": "2025-04-22T14:48:38.492212433" }, "sarscov2 - fastq, fasta, false, [], false, false - stub": { "content": [ @@ -156,7 +156,7 @@ ], "3": [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" ], "bam": [ @@ -174,15 +174,15 @@ ] ], "versions": [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-06-03T11:15:52.738781039" + "timestamp": "2025-04-22T14:48:43.879647142" }, "sarscov2 - fastq, fasta, true, [], false, false - stub": { "content": [ @@ -203,7 +203,7 @@ ], "3": [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" ], "bam": [ [ @@ -221,92 +221,92 @@ ], "versions": [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-06-03T11:15:23.033808223" + "timestamp": "2025-04-22T14:48:33.262333471" }, "sarscov2 - [fastq1, fastq2], fasta, true, false, false": { "content": [ [ "@HD\tVN:1.6\tSO:coordinate", "@SQ\tSN:MT192765.1\tLN:29829", - "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz test_2.fastq.gz", - "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + "@PG\tID:minimap2\tPN:minimap2\tVN:2.29-r1283\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz test_2.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam" ], "1bc392244f228bf52cf0b5a8f6a654c9", [ - "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + "versions.yml:md5,660fcf8ff66d4dce2045ffa0e325eed8" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-07-23T11:18:18.964586894" + "timestamp": "2025-04-22T14:48:07.571731983" }, "sarscov2 - fastq, fasta, true, [], false, false": { "content": [ [ "@HD\tVN:1.6\tSO:coordinate", "@SQ\tSN:MT192765.1\tLN:29829", - "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz", - "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + "@PG\tID:minimap2\tPN:minimap2\tVN:2.29-r1283\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam" ], "f194745c0ccfcb2a9c0aee094a08750", [ - "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + "versions.yml:md5,660fcf8ff66d4dce2045ffa0e325eed8" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-07-23T11:17:48.667488325" + "timestamp": "2025-04-22T14:47:56.497792473" }, "sarscov2 - fastq, fasta, true, 'bai', false, false": { "content": [ [ "@HD\tVN:1.6\tSO:coordinate", "@SQ\tSN:MT192765.1\tLN:29829", - "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz", - "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index" + "@PG\tID:minimap2\tPN:minimap2\tVN:2.29-r1283\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index" ], "f194745c0ccfcb2a9c0aee094a08750", "test.bam.bai", [ - "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + "versions.yml:md5,660fcf8ff66d4dce2045ffa0e325eed8" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-07-23T11:18:02.517416733" + "timestamp": "2025-04-22T14:48:01.888544427" }, "sarscov2 - bam, fasta, true, [], false, false": { "content": [ [ "@HD\tVN:1.6\tSO:coordinate", "@SQ\tSN:MT192765.1\tLN:29829", - "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta -", - "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + "@PG\tID:minimap2\tPN:minimap2\tVN:2.29-r1283\tCL:minimap2 -t 2 -a genome.fasta -", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam" ], "5d426b9a5f5b2c54f1d7f1e4c238ae94", [ - "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + "versions.yml:md5,660fcf8ff66d4dce2045ffa0e325eed8" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-07-25T09:02:49.64829488" + "timestamp": "2025-04-22T14:48:18.376062313" }, "sarscov2 - bam, fasta, true, [], false, false - stub": { "content": [ @@ -327,7 +327,7 @@ ], "3": [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" ], "bam": [ [ @@ -345,15 +345,15 @@ ], "versions": [ - "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-07-23T11:21:22.162291795" + "timestamp": "2025-04-22T14:48:49.268693724" }, "sarscov2 - fastq, [], true, false, false": { "content": [ @@ -459,18 +459,18 @@ "@SQ\tSN:ERR5069949.3258358\tLN:151", "@SQ\tSN:ERR5069949.1476386\tLN:151", "@SQ\tSN:ERR5069949.2415814\tLN:150", - "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a test_1.fastq.gz test_1.fastq.gz", - "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + "@PG\tID:minimap2\tPN:minimap2\tVN:2.29-r1283\tCL:minimap2 -t 2 -a test_1.fastq.gz test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam" ], "16c1c651f8ec67383bcdee3c55aed94f", [ - "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + "versions.yml:md5,660fcf8ff66d4dce2045ffa0e325eed8" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-07-23T11:18:34.246998277" + "timestamp": "2025-04-22T14:48:12.942360555" } } \ No newline at end of file diff --git a/modules/nf-core/minimap2/align/tests/tags.yml b/modules/nf-core/minimap2/align/tests/tags.yml deleted file mode 100644 index 39dba374..00000000 --- a/modules/nf-core/minimap2/align/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -minimap2/align: - - "modules/nf-core/minimap2/align/**" diff --git a/modules/nf-core/pilon/environment.yml b/modules/nf-core/pilon/environment.yml index a67d2869..eca24d42 100644 --- a/modules/nf-core/pilon/environment.yml +++ b/modules/nf-core/pilon/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/pilon/tests/tags.yml b/modules/nf-core/pilon/tests/tags.yml deleted file mode 100644 index 0d94efba..00000000 --- a/modules/nf-core/pilon/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -pilon: - - "modules/nf-core/pilon/**" diff --git a/modules/nf-core/porechop/porechop/environment.yml b/modules/nf-core/porechop/porechop/environment.yml index 4defeb33..109cf8bd 100644 --- a/modules/nf-core/porechop/porechop/environment.yml +++ b/modules/nf-core/porechop/porechop/environment.yml @@ -1,5 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda dependencies: - bioconda::porechop=0.2.4 + - conda-forge::pigz=2.8 diff --git a/modules/nf-core/porechop/porechop/main.nf b/modules/nf-core/porechop/porechop/main.nf index 1ff02a12..34daf3e8 100644 --- a/modules/nf-core/porechop/porechop/main.nf +++ b/modules/nf-core/porechop/porechop/main.nf @@ -3,9 +3,10 @@ process PORECHOP_PORECHOP { label 'process_medium' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/porechop:0.2.4--py39h7cff6ad_2' : - 'biocontainers/porechop:0.2.4--py39h7cff6ad_2' }" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/2b/2bce1f10c51906a66c4c4d3a7485394f67e304177192ad1cce6cf586a3a18bae/data' : + 'community.wave.seqera.io/library/porechop_pigz:d1655e5b5bad786c' }" + input: tuple val(meta), path(reads) diff --git a/modules/nf-core/porechop/porechop/tests/tags.yml b/modules/nf-core/porechop/porechop/tests/tags.yml deleted file mode 100644 index 743645c2..00000000 --- a/modules/nf-core/porechop/porechop/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -porechop/porechop: - - "modules/nf-core/porechop/porechop/**" diff --git a/modules/local/medaka/environment.yml b/modules/nf-core/ragtag/patch/environment.yml similarity index 64% rename from modules/local/medaka/environment.yml rename to modules/nf-core/ragtag/patch/environment.yml index 37fc99d1..83cefc79 100644 --- a/modules/local/medaka/environment.yml +++ b/modules/nf-core/ragtag/patch/environment.yml @@ -2,4 +2,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::medaka=2.0.1 + - "bioconda::ragtag=2.1.0" diff --git a/modules/nf-core/ragtag/patch/main.nf b/modules/nf-core/ragtag/patch/main.nf new file mode 100644 index 00000000..4e8cf455 --- /dev/null +++ b/modules/nf-core/ragtag/patch/main.nf @@ -0,0 +1,109 @@ +process RAGTAG_PATCH { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/ragtag:2.1.0--pyhb7b1952_0' + : 'biocontainers/ragtag:2.1.0--pyhb7b1952_0'}" + + input: + tuple val(meta), path(target, name: 'target/*') + tuple val(meta2), path(query, name: 'query/*') + tuple val(meta3), path(exclude) + tuple val(meta4), path(skip) + + output: + tuple val(meta), path("*.patch.fasta"), emit: patch_fasta + tuple val(meta), path("*.patch.agp"), emit: patch_agp + tuple val(meta), path("*.comps.fasta"), emit: patch_components_fasta + tuple val(meta), path("*.ragtag.patch.asm.*"), emit: assembly_alignments, optional: true + tuple val(meta), path("*.ctg.agp"), emit: target_splits_agp + tuple val(meta), path("*.ctg.fasta"), emit: target_splits_fasta + tuple val(meta), path("*.rename.agp"), emit: qry_rename_agp, optional: true + tuple val(meta), path("*.rename.fasta"), emit: qry_rename_fasta, optional: true + tuple val(meta), path("*.patch.err"), emit: stderr + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: "" + def arg_exclude = exclude ? "-e ${exclude}" : "" + def arg_skip = skip ? "-j ${skip}" : "" + """ + if [[ ${target} == *.gz ]] + then + zcat ${target} > target.fa + else + ln -s ${target} target.fa + fi + + if [[ ${query} == *.gz ]] + then + zcat ${query} > query.fa + else + ln -s ${query} query.fa + fi + + tail -F ${prefix}/ragtag.patch.err >&2 & + tailpid=\$! + ragtag.py patch target.fa query.fa \\ + -o "${prefix}" \\ + -t ${task.cpus} \\ + ${arg_exclude} \\ + ${arg_skip} \\ + ${args} \\ + 2> >( tee ${prefix}.stderr.log >&2 ) \\ + | tee ${prefix}.stdout.log + + kill -TERM "\$tailpid" + + mv ${prefix}/ragtag.patch.agp ${prefix}.patch.agp + mv ${prefix}/ragtag.patch.fasta ${prefix}.patch.fasta + mv ${prefix}/ragtag.patch.comps.fasta ${prefix}.comps.fasta + mv ${prefix}/ragtag.patch.ctg.agp ${prefix}.ctg.agp + mv ${prefix}/ragtag.patch.ctg.fasta ${prefix}.ctg.fasta + if [ -f ${prefix}/ragtag.patch.rename.agp ]; then + mv ${prefix}/ragtag.patch.rename.agp ${prefix}.rename.agp + fi + + if [ -f ${prefix}/ragtag.patch.rename.fasta ]; then + mv ${prefix}/ragtag.patch.rename.fasta ${prefix}.rename.fasta + fi + mv ${prefix}/ragtag.patch.err ${prefix}.patch.err + # Move the assembly files from prefix folder, and add prefix + for alignment_file in \$(ls ${prefix}/ragtag.patch.asm.*); + do + mv "\$alignment_file" "\${alignment_file/${prefix}\\//${prefix}_}" + done + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ragtag: \$(echo \$(ragtag.py -v | sed 's/v//')) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def _args = task.ext.args ?: "" + def _arg_exclude = exclude ? "-e ${exclude}" : "" + def _arg_skip = skip ? "-j ${skip}" : "" + """ + touch ${prefix}.patch.agp + touch ${prefix}.patch.fasta + touch ${prefix}.comps.fasta + touch ${prefix}.ctg.agp + touch ${prefix}.ctg.fasta + touch ${prefix}.rename.agp + touch ${prefix}.rename.fasta + touch ${prefix}.ragtag.patch.asm.1 + touch ${prefix}.patch.err + + cat <<-END_VERSIONS > versions.yml + ragtag: \$(echo \$(ragtag.py -v | sed 's/v//')) + END_VERSIONS + """ +} diff --git a/modules/nf-core/ragtag/patch/meta.yml b/modules/nf-core/ragtag/patch/meta.yml new file mode 100644 index 00000000..d74ee3d2 --- /dev/null +++ b/modules/nf-core/ragtag/patch/meta.yml @@ -0,0 +1,156 @@ +name: "ragtag_patch" +description: "Homology-based assembly patching: Make continuous joins and fill gaps + in 'target.fa' using sequences from 'query.fa'" + +keywords: + - assembly + - consensus + - ragtag + - patch +tools: + - "ragtag": + description: "Fast reference-guided genome assembly scaffolding" + homepage: "https://github.com/malonge/RagTag/wiki" + documentation: "https://github.com/malonge/RagTag/wiki" + tool_dev_url: "https://github.com/malonge/RagTag" + doi: "10.1186/s13059-022-02823-7" + licence: ["MIT"] + identifier: biotools:ragtag +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - target: + type: file + description: Target assembly + pattern: "*.{fasta,fasta.gz}" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - query: + type: file + description: Query assembly + pattern: "*.{fasta,fasta.gz}" + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - exclude: + type: file + description: list of target sequences to ignore + pattern: "*.txt" + - - meta4: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - skip: + type: file + description: list of query sequences to ignore + pattern: "*.txt" +output: + - patch_fasta: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.patch.fasta": + type: file + description: FASTA file containing the patched assembly + pattern: "*.patch.fasta" + - patch_agp: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.patch.agp": + type: file + description: AGP file defining how ragtag.patch.fasta is built + pattern: "*.patch.agp" + - patch_components_fasta: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.comps.fasta": + type: file + description: The split target assembly and the renamed query assembly combined + into one FASTA file. This file contains all components in ragtag.patch.agp + pattern: "*.comps.fasta" + - assembly_alignments: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.ragtag.patch.asm.*": + type: file + description: Assembly alignment files + pattern: "*.ragtag.patch.asm.*" + - target_splits_agp: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.ctg.agp": + type: file + description: An AGP file defining how the target assembly was split at gaps + pattern: "*.ctg.agp" + - target_splits_fasta: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.ctg.fasta": + type: file + description: FASTA file containing the target assembly split at gaps + pattern: "*.ctg.fasta" + - qry_rename_agp: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.rename.agp": + type: file + description: An AGP file defining the new names for query sequences + pattern: "*.rename.agp" + - qry_rename_fasta: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.rename.fasta": + type: file + description: A FASTA file with the original query sequence, but with new names + pattern: "*.rename.fasta" + - stderr: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.patch.err": + type: file + description: Standard error logging for all external RagTag commands + pattern: "*.patch.err" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@nschan" +maintainers: + - "@nschan" diff --git a/modules/nf-core/ragtag/patch/tests/main.nf.test b/modules/nf-core/ragtag/patch/tests/main.nf.test new file mode 100644 index 00000000..a7c0fee8 --- /dev/null +++ b/modules/nf-core/ragtag/patch/tests/main.nf.test @@ -0,0 +1,89 @@ +nextflow_process { + + name "Test Process RAGTAG_PATCH" + script "../main.nf" + process "RAGTAG_PATCH" + + tag "modules" + tag "modules_nfcore" + tag "ragtag" + tag "ragtag/patch" + + +test("A. thaliana Col-0 test data - ragtag - patch") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + input[1] = [ + [], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + input[2] = [ + [], + [] + ] + input[3] = [ + [], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.patch_fasta, + process.out.patch_agp, + process.out.patch_components_fasta, + process.out.target_splits_agp, + process.out.target_splits_fasta, + process.out.versions + ).match() + }, + ) + } + + } + test("A. thaliana Col-0 test data - ragtag - patch - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + input[1] = [ + [], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + input[2] = [ + [], + [] + ] + input[3] = [ + [], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/ragtag/patch/tests/main.nf.test.snap b/modules/nf-core/ragtag/patch/tests/main.nf.test.snap new file mode 100644 index 00000000..b1444692 --- /dev/null +++ b/modules/nf-core/ragtag/patch/tests/main.nf.test.snap @@ -0,0 +1,215 @@ +{ + "A. thaliana Col-0 test data - ragtag - patch - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.patch.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.patch.agp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.comps.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test.ragtag.patch.asm.1:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test" + }, + "test.ctg.agp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test" + }, + "test.ctg.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test" + }, + "test.rename.agp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test" + }, + "test.rename.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test" + }, + "test.patch.err:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + "versions.yml:md5,cecbb39907d607affa6522e395b78a1f" + ], + "assembly_alignments": [ + [ + { + "id": "test" + }, + "test.ragtag.patch.asm.1:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "patch_agp": [ + [ + { + "id": "test" + }, + "test.patch.agp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "patch_components_fasta": [ + [ + { + "id": "test" + }, + "test.comps.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "patch_fasta": [ + [ + { + "id": "test" + }, + "test.patch.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "qry_rename_agp": [ + [ + { + "id": "test" + }, + "test.rename.agp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "qry_rename_fasta": [ + [ + { + "id": "test" + }, + "test.rename.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "stderr": [ + [ + { + "id": "test" + }, + "test.patch.err:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "target_splits_agp": [ + [ + { + "id": "test" + }, + "test.ctg.agp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "target_splits_fasta": [ + [ + { + "id": "test" + }, + "test.ctg.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,cecbb39907d607affa6522e395b78a1f" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-04-04T14:10:01.648597527" + }, + "A. thaliana Col-0 test data - ragtag - patch": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.patch.fasta:md5,5cf615df690061ab15e4fee62abf3ebc" + ] + ], + [ + [ + { + "id": "test" + }, + "test.patch.agp:md5,7878fd4e42ecb2bfccd7565d5ed6b625" + ] + ], + [ + [ + { + "id": "test" + }, + "test.comps.fasta:md5,65bd2563dfc2564d5edf0e8d24257032" + ] + ], + [ + [ + { + "id": "test" + }, + "test.ctg.agp:md5,ac3460a377daaf3e3ce37f499e561968" + ] + ], + [ + [ + { + "id": "test" + }, + "test.ctg.fasta:md5,5cf615df690061ab15e4fee62abf3ebc" + ] + ], + [ + "versions.yml:md5,4c0992a27edf294209711ce4f181eb5a" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-04-04T14:25:42.121285998" + } +} \ No newline at end of file diff --git a/modules/local/links/environment.yml b/modules/nf-core/ragtag/scaffold/environment.yml similarity index 64% rename from modules/local/links/environment.yml rename to modules/nf-core/ragtag/scaffold/environment.yml index 862ed92f..83cefc79 100644 --- a/modules/local/links/environment.yml +++ b/modules/nf-core/ragtag/scaffold/environment.yml @@ -1,6 +1,5 @@ channels: - conda-forge - bioconda - dependencies: - - bioconda::links=2.0.1 + - "bioconda::ragtag=2.1.0" diff --git a/modules/nf-core/ragtag/scaffold/main.nf b/modules/nf-core/ragtag/scaffold/main.nf new file mode 100644 index 00000000..c3930c12 --- /dev/null +++ b/modules/nf-core/ragtag/scaffold/main.nf @@ -0,0 +1,82 @@ +process RAGTAG_SCAFFOLD { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/ragtag:2.1.0--pyhb7b1952_0' + : 'biocontainers/ragtag:2.1.0--pyhb7b1952_0'}" + + input: + tuple val(meta), path(assembly, name: 'assembly/*') + tuple val(meta2), path(reference, name: 'reference/*') + tuple val(meta3), path(exclude) + tuple val(meta4), path(skip), path(hard_skip) + + output: + tuple val(meta), path("*.fasta"), emit: corrected_assembly + tuple val(meta), path("*.agp"), emit: corrected_agp + tuple val(meta), path("*.stats"), emit: corrected_stats + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def arg_exclude = exclude ? "-e ${exclude}" : "" + def arg_skip = skip ? "-j ${skip}" : "" + def arg_hard_skip = hard_skip ? "-J ${hard_skip}" : "" + """ + if [[ ${assembly} == *.gz ]] + then + zcat ${assembly} > assembly.fa + else + ln -s ${assembly} assembly.fa + fi + + if [[ ${reference} == *.gz ]] + then + zcat ${reference} > reference.fa + else + ln -s ${reference} reference.fa + fi + + ragtag.py scaffold reference.fa assembly.fa \\ + -o "${prefix}" \\ + -t ${task.cpus} \\ + -C \\ + ${arg_exclude} \\ + ${arg_skip} \\ + ${arg_hard_skip} \\ + ${args} \\ + 2> >( tee ${prefix}.stderr.log >&2 ) \\ + | tee ${prefix}.stdout.log + + mv ${prefix}/ragtag.scaffold.fasta ${prefix}.fasta + mv ${prefix}/ragtag.scaffold.agp ${prefix}.agp + mv ${prefix}/ragtag.scaffold.stats ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ragtag: \$(echo \$(ragtag.py -v | sed 's/v//')) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def _args = task.ext.args ?: '' + def _arg_exclude = exclude ? "-e ${exclude}" : "" + def _arg_skip = skip ? "-j ${skip}" : "" + def _arg_hard_skip = hard_skip ? "-J ${hard_skip}" : "" + """ + touch ${prefix}.fasta + touch ${prefix}.agp + touch ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + ragtag: \$(echo \$(ragtag.py -v | sed 's/v//')) + END_VERSIONS + """ +} diff --git a/modules/nf-core/ragtag/scaffold/meta.yml b/modules/nf-core/ragtag/scaffold/meta.yml new file mode 100644 index 00000000..62eb0e49 --- /dev/null +++ b/modules/nf-core/ragtag/scaffold/meta.yml @@ -0,0 +1,106 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: ragtag_scaffold +description: | + Scaffolding is the process of ordering and orienting draft assembly (query) + sequences into longer sequences. Gaps (stretches of "N" characters) are placed + between adjacent query sequences to indicate the presence of unknown sequence. + RagTag uses whole-genome alignments to a reference assembly to scaffold query sequences. + RagTag does not alter input query sequence in any way and only orders and orients sequences, joining them with gaps. +keywords: + - scaffolding + - ragtag + - assembly + - genome +tools: + - "ragtag": + description: "Fast reference-guided genome assembly scaffolding" + homepage: "https://github.com/malonge/RagTag/wiki" + documentation: "https://github.com/malonge/RagTag/wiki" + tool_dev_url: "https://github.com/malonge/RagTag" + doi: "10.1186/s13059-022-02823-7" + licence: ["MIT"] + identifier: biotools:ragtag + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - assembly: + type: file + description: Assembly to be scaffolded + pattern: "*.{fasta,fasta.gz,fa,fa.gz}" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - reference: + type: file + description: Reference assembly + pattern: "*.{fasta,fasta.gz,fa,fa.gz}" + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - exclude: + type: file + description: list of target sequences to ignore + pattern: "*.txt" + - - meta4: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - skip: + type: file + description: list of query sequences to leave unplaced + pattern: "*.txt" + - hard_skip: + type: file + description: list of query headers to leave unplaced and exclude from 'chr0' + ('-C') + pattern: "*.txt" +output: + - corrected_assembly: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.fasta": + type: file + description: FASTA file containing the patched assembly + pattern: "*.fasta" + - corrected_agp: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.agp": + type: file + description: agp file defining how corrected_assembly is built + pattern: "*.agp" + - corrected_stats: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.stats": + type: file + description: Statistics on the scaffold + pattern: "*.stats" + + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@nschan" +maintainers: + - "@nschan" diff --git a/modules/nf-core/ragtag/scaffold/tests/main.nf.test b/modules/nf-core/ragtag/scaffold/tests/main.nf.test new file mode 100644 index 00000000..51b42642 --- /dev/null +++ b/modules/nf-core/ragtag/scaffold/tests/main.nf.test @@ -0,0 +1,81 @@ +nextflow_process { + + name "Test Process RAGTAG_SCAFFOLD" + script "../main.nf" + process "RAGTAG_SCAFFOLD" + + tag "modules" + tag "modules_nfcore" + tag "ragtag" + tag "ragtag/scaffold" + + test("A. thaliana Col-0 test data - ragtag - scaffold") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + input[1] = [ + [], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + input[2] = [ + [], + [] + ] + input[3] = [ + [], + [], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + test("A. thaliana Col-0 test data - ragtag - scaffold - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + input[2] = [ + [], + [] + ] + input[3] = [ + [], + [], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/ragtag/scaffold/tests/main.nf.test.snap b/modules/nf-core/ragtag/scaffold/tests/main.nf.test.snap new file mode 100644 index 00000000..e4faf0b0 --- /dev/null +++ b/modules/nf-core/ragtag/scaffold/tests/main.nf.test.snap @@ -0,0 +1,132 @@ +{ + "A. thaliana Col-0 test data - ragtag - scaffold": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fasta:md5,70b661fab5364a1c389972a771f97905" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.agp:md5,40fbf2d081c32880d8ce8187c529a80b" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.stats:md5,209e973e4bac1653b8d5fddb7fa13b63" + ] + ], + "3": [ + "versions.yml:md5,48710c1720f668d8ba3397f99892959e" + ], + "corrected_agp": [ + [ + { + "id": "test" + }, + "test.agp:md5,40fbf2d081c32880d8ce8187c529a80b" + ] + ], + "corrected_assembly": [ + [ + { + "id": "test" + }, + "test.fasta:md5,70b661fab5364a1c389972a771f97905" + ] + ], + "corrected_stats": [ + [ + { + "id": "test" + }, + "test.stats:md5,209e973e4bac1653b8d5fddb7fa13b63" + ] + ], + "versions": [ + "versions.yml:md5,48710c1720f668d8ba3397f99892959e" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-04-04T13:37:54.181644032" + }, + "A. thaliana Col-0 test data - ragtag - scaffold - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.agp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,cecbb39907d607affa6522e395b78a1f" + ], + "corrected_agp": [ + [ + { + "id": "test" + }, + "test.agp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "corrected_assembly": [ + [ + { + "id": "test" + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "corrected_stats": [ + [ + { + "id": "test" + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,cecbb39907d607affa6522e395b78a1f" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-04-04T13:38:21.635495713" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/fastq/main.nf b/modules/nf-core/samtools/fastq/main.nf index 136744d5..696d668f 100644 --- a/modules/nf-core/samtools/fastq/main.nf +++ b/modules/nf-core/samtools/fastq/main.nf @@ -41,4 +41,19 @@ process SAMTOOLS_FASTQ { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def output = ( interleave && ! meta.single_end ) ? "touch ${prefix}_interleaved.fastq" : + meta.single_end ? "echo | gzip > ${prefix}_1.fastq.gz && echo | gzip > ${prefix}_singleton.fastq.gz" : + "echo | gzip > ${prefix}_1.fastq.gz && echo | gzip > ${prefix}_2.fastq.gz && echo | gzip > ${prefix}_singleton.fastq.gz" + """ + ${output} + echo | gzip > ${prefix}_other.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/samtools/fastq/tests/main.nf.test b/modules/nf-core/samtools/fastq/tests/main.nf.test index f6ac1123..971ea1d4 100644 --- a/modules/nf-core/samtools/fastq/tests/main.nf.test +++ b/modules/nf-core/samtools/fastq/tests/main.nf.test @@ -64,4 +64,56 @@ nextflow_process { ) } } + + test("bam - stub") { + + options "-stub" + + when { + process { + """ + interleave = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = interleave + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("bam_interleave - stub") { + + options "-stub" + + when { + process { + """ + interleave = true + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = interleave + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } } diff --git a/modules/nf-core/samtools/fastq/tests/main.nf.test.snap b/modules/nf-core/samtools/fastq/tests/main.nf.test.snap index 10e5cd3d..ff63f9ae 100644 --- a/modules/nf-core/samtools/fastq/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/fastq/tests/main.nf.test.snap @@ -27,6 +27,89 @@ }, "timestamp": "2024-02-12T18:18:23.988269" }, + "bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test_singleton.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test_other.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "4": [ + "versions.yml:md5,11e074d69900de5a7dfdbe1fb4e789fd" + ], + "fastq": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "interleaved": [ + + ], + "other": [ + [ + { + "id": "test", + "single_end": false + }, + "test_other.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "singleton": [ + [ + { + "id": "test", + "single_end": false + }, + "test_singleton.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,11e074d69900de5a7dfdbe1fb4e789fd" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-03-05T12:50:58.986886415" + }, "bam_fastq": { "content": [ [ @@ -135,5 +218,70 @@ "nextflow": "23.04.3" }, "timestamp": "2024-02-12T18:18:30.859468" + }, + "bam_interleave - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_interleaved.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test_other.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "4": [ + "versions.yml:md5,11e074d69900de5a7dfdbe1fb4e789fd" + ], + "fastq": [ + + ], + "interleaved": [ + [ + { + "id": "test", + "single_end": false + }, + "test_interleaved.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "other": [ + [ + { + "id": "test", + "single_end": false + }, + "test_other.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "singleton": [ + + ], + "versions": [ + "versions.yml:md5,11e074d69900de5a7dfdbe1fb4e789fd" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-03-05T12:51:10.155471004" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/fastq/tests/tags.yml b/modules/nf-core/samtools/fastq/tests/tags.yml deleted file mode 100644 index d14a8534..00000000 --- a/modules/nf-core/samtools/fastq/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -samtools/fastq: - - "modules/nf-core/samtools/fastq/**" diff --git a/modules/nf-core/samtools/flagstat/tests/tags.yml b/modules/nf-core/samtools/flagstat/tests/tags.yml deleted file mode 100644 index 2d2b7255..00000000 --- a/modules/nf-core/samtools/flagstat/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -samtools/flagstat: - - modules/nf-core/samtools/flagstat/** diff --git a/modules/nf-core/samtools/idxstats/tests/tags.yml b/modules/nf-core/samtools/idxstats/tests/tags.yml deleted file mode 100644 index d3057c61..00000000 --- a/modules/nf-core/samtools/idxstats/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -samtools/idxstats: - - modules/nf-core/samtools/idxstats/** diff --git a/modules/nf-core/samtools/index/tests/tags.yml b/modules/nf-core/samtools/index/tests/tags.yml deleted file mode 100644 index e0f58a7a..00000000 --- a/modules/nf-core/samtools/index/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -samtools/index: - - modules/nf-core/samtools/index/** diff --git a/modules/nf-core/samtools/sort/tests/tags.yml b/modules/nf-core/samtools/sort/tests/tags.yml deleted file mode 100644 index cd63ea20..00000000 --- a/modules/nf-core/samtools/sort/tests/tags.yml +++ /dev/null @@ -1,3 +0,0 @@ -samtools/sort: - - modules/nf-core/samtools/sort/** - - tests/modules/nf-core/samtools/sort/** diff --git a/modules/nf-core/samtools/stats/tests/tags.yml b/modules/nf-core/samtools/stats/tests/tags.yml deleted file mode 100644 index 7c28e30f..00000000 --- a/modules/nf-core/samtools/stats/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -samtools/stats: - - modules/nf-core/samtools/stats/** diff --git a/modules/nf-core/trimgalore/environment.yml b/modules/nf-core/trimgalore/environment.yml index b1efd94c..568b9e72 100644 --- a/modules/nf-core/trimgalore/environment.yml +++ b/modules/nf-core/trimgalore/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/trimgalore/tests/tags.yml b/modules/nf-core/trimgalore/tests/tags.yml deleted file mode 100644 index e9937691..00000000 --- a/modules/nf-core/trimgalore/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -trimgalore: - - modules/nf-core/trimgalore/** diff --git a/nextflow.config b/nextflow.config index 3293c406..45e4f254 100644 --- a/nextflow.config +++ b/nextflow.config @@ -205,23 +205,41 @@ profiles { executor.name = 'local' executor.cpus = 4 executor.memory = 8.GB + process { + resourceLimits = [ + memory: 8.GB, + cpus : 4, + time : 1.h + ] + } } - test { includeConfig 'conf/test.config' } - test_full { includeConfig 'conf/test_full.config' } - hifi_flye { includeConfig 'configs/hifi_flye.config' } // Hifi-reads with flye - hifi_hifiasm { includeConfig 'configs/hifi_hifiasm.config' } // hifi-reads with hifiasm - ont_flye { includeConfig 'configs/ont_flye.config' } // ont-reads with flye - ont_hifiasm { includeConfig 'configs/ont_hifiasm.config' } // ont-reads with hifiasm - hifiont_hifiasm { includeConfig 'configs/hifi_ont_hifiasm_ul.config' } // ont and hifi reads with hifiasm --ul - hifiont_flyehifiasm { includeConfig 'configs/hifi_ont_flye_on_hifiasm.config' } // ont and hifi reads. ONT via flye, Hifi via hifiasm, scaffold flye on hifiasm + + gpu { + docker.runOptions = '-u $(id -u):$(id -g) --gpus all' + apptainer.runOptions = '--nv' + singularity.runOptions = '--nv' + } + + test { includeConfig 'conf/test.config' } + test_full { includeConfig 'conf/test_full.config' } + hifi_flye { includeConfig 'configs/hifi_flye.config' } // Hifi-reads with flye + hifi_hifiasm { includeConfig 'configs/hifi_hifiasm.config' } // hifi-reads with hifiasm + ont_flye { includeConfig 'configs/ont_flye.config' } // ont-reads with flye + ont_hifiasm { includeConfig 'configs/ont_hifiasm.config' } // ont-reads with hifiasm + hifiont_hifiasm { includeConfig 'configs/hifi_ont_hifiasm_ul.config' } // ont and hifi reads with hifiasm --ul + hifiont_flye_on_hifiasm { includeConfig 'configs/hifi_ont_flye_on_hifiasm.config' } // ont and hifi reads. ONT via flye, Hifi via hifiasm, scaffold flye on hifiasm + hifiont_hifiasm_on_hifiasm { includeConfig 'configs/hifi_ont_hifiasm_on_hifiasm.config' } // ont and hifi reads. ONT via hifiasm, Hifi via hifiasm, scaffold ONT on HiFi } -// Load nf-core custom profiles from different Institutions -includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" +// Load nf-core custom profiles from different institutions +// If params.custom_config_base is set AND either the NXF_OFFLINE environment variable is not set or params.custom_config_base is a local path, the nfcore_custom.config file from the specified base path is included. // Load nf-core/genomeassembler custom profiles from different institutions. -includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/genomeassembler.config" : "/dev/null" +includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" + +// Load nf-core/genomeassembler custom profiles from different institutions. +includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/pipeline/genomeassembler.config" : "/dev/null" // Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile // Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled // Set to your registry if you have a mirror of containers @@ -231,6 +249,8 @@ podman.registry = 'quay.io' singularity.registry = 'quay.io' charliecloud.registry = 'quay.io' + + // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -289,14 +309,15 @@ manifest { homePage = 'https://github.com/nf-core/genomeassembler' description = """Assemble genomes from long ONT or pacbio HiFi reads""" mainScript = 'main.nf' - nextflowVersion = '!>=24.04.2' - version = '1.0.1' + defaultBranch = 'master' + nextflowVersion = '!>=24.10.5' + version = '1.1.0' doi = '10.5281/zenodo.14986998' } // Nextflow plugins plugins { - id 'nf-schema@2.1.1' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-schema@2.4.2' // Validation of pipeline parameters and creation of an input channel from a sample sheet } validation { diff --git a/nextflow_schema.json b/nextflow_schema.json index e546e810..636070bc 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -27,8 +27,7 @@ "type": "string", "format": "directory-path", "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", - "fa_icon": "fas fa-folder-open", - "default": "null" + "fa_icon": "fas fa-folder-open" }, "email": { "type": "string", @@ -231,6 +230,9 @@ }, "pacbio_primers": { "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.fn?a(sta)?$", "description": "file containing pacbio primers for trimming with lima" } } @@ -243,8 +245,8 @@ "properties": { "assembler": { "type": "string", - "description": "Assembler to use. Valid choices are: `'hifiasm'`, `'flye'`, or `'flye_on_hifiasm'`. `flye_on_hifiasm` will scaffold flye assembly (ont) on hifiasm (hifi) assembly using ragtag", - "enum": ["flye", "hifiasm", "flye_on_hifiasm"], + "description": "Assembler to use. Valid choices are: `'hifiasm'`, `'flye'`, `'flye_on_hifiasm'` or `hifiasm_on_hifiasm`. `flye_on_hifiasm` will scaffold flye assembly (ont) on hifiasm (hifi) assembly using ragtag. `hifiasm_on_hifiasm` will scaffold hifiasm (ont) onto hifiasm (HiFi) using ragtag", + "enum": ["flye", "hifiasm", "flye_on_hifiasm", "hifiasm_on_hifiasm"], "default": "flye" }, "genome_size": { @@ -359,6 +361,7 @@ }, "busco_db": { "type": "string", + "format": "directory-path", "description": "Path to busco db (optional)" }, "busco_lineage": { diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 00000000..889df760 --- /dev/null +++ b/nf-test.config @@ -0,0 +1,24 @@ +config { + // location for all nf-test tests + testsDir "." + + // nf-test directory including temporary files for each test + workDir System.getenv("NFT_WORKDIR") ?: ".nf-test" + + // location of an optional nextflow.config file specific for executing tests + configFile "tests/nextflow.config" + + // ignore tests coming from the nf-core/modules repo + ignore 'modules/nf-core/**/*', 'subworkflows/nf-core/**/*' + + // run all test with defined profile(s) from the main nextflow.config + profile "test" + + // list of filenames or patterns that should be trigger a full test run + triggers 'nextflow.config', 'nf-test.config', 'conf/test.config', 'tests/nextflow.config', 'tests/.nftignore' + + // load the necessary plugins + plugins { + load "nft-utils@0.0.3" + } +} diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index bb89ad4b..8545a1f2 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -22,8 +22,8 @@ "@id": "./", "@type": "Dataset", "creativeWorkStatus": "Stable", - "datePublished": "2025-03-18T13:18:08+00:00", - "description": "

\n \n \n \"nf-core/genomeassembler\"\n \n

\n\n[![GitHub Actions CI Status](https://github.com/nf-core/genomeassembler/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/genomeassembler/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.14986998-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.14986998)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/genomeassembler)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23genomeassembler-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/genomeassembler)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/genomeassembler** is a bioinformatics pipeline that carries out genome assembly, polishing and scaffolding from long reads (ONT or pacbio). Assembly can be done via `flye` or `hifiasm`, polishing can be carried out with `medaka` (ONT), or `pilon` (requires short-reads), and scaffolding can be done using `LINKS`, `Longstitch`, or `RagTag` (if a reference is available). Quality control includes `BUSCO`, `QUAST` and `merqury` (requires short-reads).\nCurrently, this pipeline does not implement phasing of polyploid genomes or HiC scaffolding.\n\n\n \n \"nf-core/genomeassembler\"\n\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,ontreads,hifireads,ref_fasta,ref_gff,shortread_F,shortread_R,paired\nsampleName,ontreads.fa.gz,hifireads.fa.gz,assembly.fasta.gz,reference.fasta,reference.gff,short_F1.fastq,short_F2.fastq,true\n```\n\nEach row represents one genome to be assembled. `sample` should contain the name of the sample, `ontreads` should contain a path to ONT reads (fastq.gz), `hifireads` a path to HiFi reads (fastq.gz), `ref_fasta` and `ref_gff` contain reference genome fasta and annotations. `shortread_F` and `shortread_R` contain paths to short-read data, `paired` indicates if short-reads are paired. Columns can be omitted if they contain no data, with the exception of `shortread_R`, which needs to be present if `shortread_F` is there, even if it is empty.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/genomeassembler \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/genomeassembler/usage) and the [parameter documentation](https://nf-co.re/genomeassembler/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/genomeassembler/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/genomeassembler/output).\n\n## Credits\n\nnf-core/genomeassembler was originally written by [Niklas Schandry](https://github.com/nschan), of the Faculty of Biology of the Ludwig-Maximilians University (LMU) in Munich, Germany.\n\nI thank the following people for their extensive assistance and constructive reviews during the development of this pipeline:\n\n- [Mahesh Binzer-Panchal](https://github.com/mahesh-panchal)\n- [Matthias H\u00f6rtenhuber](https://github.com/mashehu)\n- [Louis Le N\u00e9zet](https://github.com/LouisLeNezet)\n- [J\u00falia Mir Pedrol](https://github.com/mirpedrol)\n- [Daniel Straub](https://github.com/d4straub)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#genomeassembler` channel](https://nfcore.slack.com/channels/genomeassembler) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/genomeassembler for your analysis, please cite it using the following doi: [10.5281/zenodo.14986998](https://doi.org/10.5281/zenodo.14986998)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "datePublished": "2025-07-08T11:38:27+00:00", + "description": "

\n \n \n \"nf-core/genomeassembler\"\n \n

\n\n[![GitHub Actions CI Status](https://github.com/nf-core/genomeassembler/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/genomeassembler/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/genomeassembler/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.14986998-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.14986998)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/genomeassembler)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23genomeassembler-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/genomeassembler)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23genomeassembler-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/genomeassembler)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/genomeassembler** is a bioinformatics pipeline that carries out genome assembly, polishing and scaffolding from long reads (ONT or pacbio). Assembly can be done via `flye` or `hifiasm`, polishing can be carried out with `medaka` (ONT), or `pilon` (requires short-reads), and scaffolding can be done using `LINKS`, `Longstitch`, or `RagTag` (if a reference is available). Quality control includes `BUSCO`, `QUAST` and `merqury` (requires short-reads).\nCurrently, this pipeline does not implement phasing of polyploid genomes or HiC scaffolding.\n\n\n \n \"nf-core/genomeassembler\"\n\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,ontreads,hifireads,ref_fasta,ref_gff,shortread_F,shortread_R,paired\nsampleName,ontreads.fa.gz,hifireads.fa.gz,assembly.fasta.gz,reference.fasta,reference.gff,short_F1.fastq,short_F2.fastq,true\n```\n\nEach row represents one genome to be assembled. `sample` should contain the name of the sample, `ontreads` should contain a path to ONT reads (fastq.gz), `hifireads` a path to HiFi reads (fastq.gz), `ref_fasta` and `ref_gff` contain reference genome fasta and annotations. `shortread_F` and `shortread_R` contain paths to short-read data, `paired` indicates if short-reads are paired. Columns can be omitted if they contain no data, with the exception of `shortread_R`, which needs to be present if `shortread_F` is there, even if it is empty.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/genomeassembler \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/genomeassembler/usage) and the [parameter documentation](https://nf-co.re/genomeassembler/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/genomeassembler/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/genomeassembler/output).\n\n## Credits\n\nnf-core/genomeassembler was originally written by [Niklas Schandry](https://github.com/nschan), of the Faculty of Biology of the Ludwig-Maximilians University (LMU) in Munich, Germany.\n\nI thank the following people for their extensive assistance and constructive reviews during the development of this pipeline:\n\n- [Mahesh Binzer-Panchal](https://github.com/mahesh-panchal)\n- [Matthias H\u00f6rtenhuber](https://github.com/mashehu)\n- [Louis Le N\u00e9zet](https://github.com/LouisLeNezet)\n- [J\u00falia Mir Pedrol](https://github.com/mirpedrol)\n- [Daniel Straub](https://github.com/d4straub)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#genomeassembler` channel](https://nfcore.slack.com/channels/genomeassembler) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/genomeassembler for your analysis, please cite it using the following doi: [10.5281/zenodo.14986998](https://doi.org/10.5281/zenodo.14986998)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" @@ -105,7 +105,7 @@ }, "mentions": [ { - "@id": "#b6b9b72e-48fc-4472-a0e9-8030921b5a17" + "@id": "#8601bd11-94fa-4298-805d-fb2c3f6a7eab" } ], "name": "nf-core/genomeassembler" @@ -134,17 +134,17 @@ ], "creator": [ { - "@id": "https://orcid.org/0000-0002-7860-3560" + "@id": "https://orcid.org/0000-0003-3099-7860" }, { - "@id": "https://orcid.org/0000-0003-1675-0677" + "@id": "https://orcid.org/0000-0002-7860-3560" }, { - "@id": "https://orcid.org/0000-0003-3099-7860" + "@id": "https://orcid.org/0000-0003-1675-0677" } ], "dateCreated": "", - "dateModified": "2025-03-18T14:18:08Z", + "dateModified": "2025-07-08T11:38:27Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", "keywords": [ "nf-core", @@ -154,11 +154,6 @@ "license": [ "MIT" ], - "maintainer": [ - { - "@id": "https://orcid.org/0000-0003-3099-7860" - } - ], "name": [ "nf-core/genomeassembler" ], @@ -170,10 +165,10 @@ }, "url": [ "https://github.com/nf-core/genomeassembler", - "https://nf-co.re/genomeassembler/1.0.1/" + "https://nf-co.re/genomeassembler/1.1.0/" ], "version": [ - "1.0.1" + "1.1.0" ] }, { @@ -186,14 +181,14 @@ "url": { "@id": "https://www.nextflow.io/" }, - "version": "!>=24.04.2" + "version": "!>=24.10.5" }, { - "@id": "#b6b9b72e-48fc-4472-a0e9-8030921b5a17", + "@id": "#8601bd11-94fa-4298-805d-fb2c3f6a7eab", "@type": "TestSuite", "instance": [ { - "@id": "#10b3c45d-045e-40b5-898c-73a2f6edcb1b" + "@id": "#a9fa96cd-308f-4e32-b912-de8b760cb8e2" } ], "mainEntity": { @@ -202,10 +197,10 @@ "name": "Test suite for nf-core/genomeassembler" }, { - "@id": "#10b3c45d-045e-40b5-898c-73a2f6edcb1b", + "@id": "#a9fa96cd-308f-4e32-b912-de8b760cb8e2", "@type": "TestInstance", "name": "GitHub Actions workflow for testing nf-core/genomeassembler", - "resource": "repos/nf-core/genomeassembler/actions/workflows/ci.yml", + "resource": "repos/nf-core/genomeassembler/actions/workflows/nf-test.yml", "runsOn": { "@id": "https://w3id.org/ro/terms/test#GithubService" }, @@ -340,6 +335,12 @@ "name": "nf-core", "url": "https://nf-co.re/" }, + { + "@id": "https://orcid.org/0000-0003-3099-7860", + "@type": "Person", + "email": "niklas@bio.lmu.de", + "name": "Niklas Schandry" + }, { "@id": "https://orcid.org/0000-0002-7860-3560", "@type": "Person", @@ -351,12 +352,6 @@ "@type": "Person", "email": "mahesh.binzer-panchal@nbis.se", "name": "Mahesh Binzer-Panchal" - }, - { - "@id": "https://orcid.org/0000-0003-3099-7860", - "@type": "Person", - "email": "niklas@bio.lmu.de", - "name": "Niklas Schandry" } ] } \ No newline at end of file diff --git a/subworkflows/local/assemble/main.nf b/subworkflows/local/assemble/main.nf index bd7c0eef..1030beb9 100644 --- a/subworkflows/local/assemble/main.nf +++ b/subworkflows/local/assemble/main.nf @@ -1,14 +1,12 @@ include { FLYE } from '../../../modules/nf-core/flye/main' include { HIFIASM } from '../../../modules/nf-core/hifiasm/main' include { HIFIASM as HIFIASM_ONT } from '../../../modules/nf-core/hifiasm/main' -include { GFA_2_FA } from '../../../modules/local/gfa2fa/main' -include { MAP_TO_ASSEMBLY } from '../mapping/map_to_assembly/main' +include { GFA_2_FA as GFA_2_FA_HIFI } from '../../../modules/local/gfa2fa/main' +include { GFA_2_FA as GFA_2_FA_ONT} from '../../../modules/local/gfa2fa/main' include { MAP_TO_REF } from '../mapping/map_to_ref/main' -include { RUN_QUAST } from '../qc/quast/main' -include { RUN_BUSCO } from '../qc/busco/main' -include { MERQURY_QC } from '../qc/merqury/main' include { RUN_LIFTOFF } from '../liftoff/main' -include { RAGTAG_SCAFFOLD } from '../../../modules/local/ragtag/main' +include { RAGTAG_PATCH } from '../../../modules/nf-core/ragtag/patch/main' +include { QC } from '../qc/main' workflow ASSEMBLE { @@ -25,9 +23,6 @@ workflow ASSEMBLE { Channel.empty().set { ch_ref_bam } Channel.empty().set { ch_assembly_bam } Channel.empty().set { ch_assembly } - Channel.empty().set { assembly_quast_reports } - Channel.empty().set { assembly_busco_reports } - Channel.empty().set { assembly_merqury_reports } Channel.empty().set { flye_inputs } Channel.empty().set { hifiasm_inputs } Channel.empty().set { longreads } @@ -73,64 +68,89 @@ workflow ASSEMBLE { hifi_reads .join(ont_reads) .set { hifiasm_inputs } - HIFIASM(hifiasm_inputs, [[], [], []], [[], [], []]) - GFA_2_FA(HIFIASM.out.processed_contigs) - GFA_2_FA.out.contigs_fasta.set { ch_assembly } + HIFIASM(hifiasm_inputs, [[], [], []], [[], [], []], [[], []]) + GFA_2_FA_HIFI(HIFIASM.out.processed_unitigs) + GFA_2_FA_HIFI.out.contigs_fasta.set { ch_assembly } - ch_versions = ch_versions.mix(HIFIASM.out.versions).mix(GFA_2_FA.out.versions) + ch_versions = ch_versions.mix(HIFIASM.out.versions).mix(GFA_2_FA_HIFI.out.versions) } // ONT reads only if (!params.hifi && params.ont) { ont_reads .map { meta, ontreads -> [meta, ontreads, []] } .set { hifiasm_inputs } - HIFIASM_ONT(hifiasm_inputs, [[], [], []], [[], [], []]) - GFA_2_FA(HIFIASM_ONT.out.processed_contigs) - GFA_2_FA.out.contigs_fasta.set { ch_assembly } - - ch_versions = ch_versions.mix(HIFIASM_ONT.out.versions).mix(GFA_2_FA.out.versions) + HIFIASM_ONT(hifiasm_inputs, [[], [], []], [[], [], []], [[], []]) + GFA_2_FA_ONT(HIFIASM_ONT.out.processed_unitigs) + GFA_2_FA_ONT.out.contigs_fasta.set { ch_assembly } + ch_versions = ch_versions.mix(HIFIASM_ONT.out.versions).mix(GFA_2_FA_ONT.out.versions) } // HiFI reads only if (params.hifi && !params.ont) { hifi_reads .map { meta, ontreads -> [meta, ontreads, []] } .set { hifiasm_inputs } - HIFIASM(hifiasm_inputs, [[], [], []], [[], [], []]) + HIFIASM(hifiasm_inputs, [[], [], []], [[], [], []], [[], []]) - GFA_2_FA(HIFIASM.out.processed_contigs) - GFA_2_FA.out.contigs_fasta.set { ch_assembly } + GFA_2_FA_HIFI(HIFIASM.out.processed_unitigs) + GFA_2_FA_HIFI.out.contigs_fasta.set { ch_assembly } - ch_versions = ch_versions.mix(HIFIASM.out.versions).mix(GFA_2_FA.out.versions) + ch_versions = ch_versions.mix(HIFIASM.out.versions).mix(GFA_2_FA_HIFI.out.versions) } } - if (params.assembler == "flye_on_hifiasm") { + if (params.assembler == "flye_on_hifiasm" | params.assembler == "hifiasm_on_hifiasm") { // Run hifiasm hifi_reads .map { meta, hifireads -> [meta, hifireads, []] } .set { hifiasm_inputs } - HIFIASM(hifiasm_inputs, [[], [], []], [[], [], []]) + HIFIASM(hifiasm_inputs, [[], [], []], [[], [], []], [[], []]) - GFA_2_FA(HIFIASM.out.processed_contigs) + GFA_2_FA_HIFI(HIFIASM.out.processed_unitigs) - ch_versions = ch_versions.mix(HIFIASM.out.versions).mix(GFA_2_FA.out.versions) + ch_versions = ch_versions.mix(HIFIASM.out.versions).mix(GFA_2_FA_HIFI.out.versions) + if(params.assembler == "flye_on_hifiasm") { // Run flye - ont_reads - .join(genome_size) - .map { meta, reads, genomesize -> [[id: meta.id, genome_size: genomesize], reads]} - .set { flye_inputs } + ont_reads + .join(genome_size) + .map { meta, reads, genomesize -> [[id: meta.id, genome_size: genomesize], reads]} + .set { flye_inputs } - FLYE(flye_inputs, params.flye_mode) - FLYE.out.fasta - .map { meta, assembly -> [[id: meta.id], assembly] } - .join( - GFA_2_FA.out.contigs_fasta - ) - .set { ragtag_in } - RAGTAG_SCAFFOLD(ragtag_in) - // takes: meta, assembly (flye), reference (hifi) - RAGTAG_SCAFFOLD.out.corrected_assembly.set { ch_assembly } - ch_versions = ch_versions.mix(FLYE.out.versions).mix(RAGTAG_SCAFFOLD.out.versions) + FLYE(flye_inputs, params.flye_mode) + FLYE.out.fasta + .map { meta, assembly -> [[id: meta.id], assembly] } + .join( + GFA_2_FA_HIFI.out.contigs_fasta + ) + .multiMap { meta, flye_fasta, hifiasm_fasta -> + target: [meta, flye_fasta] + query: [meta, hifiasm_fasta] + } + .set { ragtag_in } + ch_versions = ch_versions.mix(FLYE.out.versions) + } + if(params.assembler == "hifiasm_on_hifiasm") { + // Run hifiasm --ont + ont_reads + .map { meta, ontreads -> [meta, ontreads, []] } + .set { hifiasm_inputs } + HIFIASM_ONT(hifiasm_inputs,[[], [], []], [[], [], []], [[], []]) + GFA_2_FA_ONT(HIFIASM_ONT.out.processed_unitigs) + GFA_2_FA_ONT.out.contigs_fasta + .join( + GFA_2_FA_HIFI.out.contigs_fasta + ) + .multiMap { meta, ont_assembly, hifi_assembly -> + target: [meta, ont_assembly] + query: [meta, hifi_assembly] + } + .set { ragtag_in } + ch_versions = ch_versions.mix(HIFIASM_ONT.out.versions).mix(GFA_2_FA_ONT.out.versions) + } + + RAGTAG_PATCH(ragtag_in.target, ragtag_in.query, [[], []], [[], []] ) + // takes: meta, assembly (ont), reference (hifi) + RAGTAG_PATCH.out.patch_fasta.set { ch_assembly } + ch_versions = ch_versions.mix(RAGTAG_PATCH.out.versions) } } /* @@ -154,7 +174,7 @@ workflow ASSEMBLE { .map { meta, reads -> [[id: meta.id], reads] } .set { longreads } } - if (params.assembler == "hifiasm" || params.assembler == "flye_on_hifiasm") { + if (params.assembler == "hifiasm" || params.assembler == "flye_on_hifiasm" || params.assembler == "hifiasm_on_hifiasm") { hifiasm_inputs .map { meta, long_reads, _ultralong -> [meta, long_reads] } .set { longreads } @@ -172,49 +192,20 @@ workflow ASSEMBLE { } } } - if (params.quast) { + if (params.quast) { if (params.use_ref) { MAP_TO_REF(longreads, ch_refs) MAP_TO_REF.out.ch_aln_to_ref_bam.set { ch_ref_bam } } - - MAP_TO_ASSEMBLY(longreads, ch_assembly) - MAP_TO_ASSEMBLY.out.aln_to_assembly_bam.set { ch_assembly_bam } - - RUN_QUAST(ch_assembly, ch_input, ch_ref_bam, ch_assembly_bam) - RUN_QUAST.out.quast_tsv.set { assembly_quast_reports } - - ch_versions = ch_versions.mix(MAP_TO_ASSEMBLY.out.versions).mix(RUN_QUAST.out.versions) - } } /* QC on initial assembly */ - if (params.busco) { - RUN_BUSCO(ch_assembly) - RUN_BUSCO.out.batch_summary.set { assembly_busco_reports } - ch_versions = ch_versions.mix(RUN_BUSCO.out.versions) - } - - if (params.short_reads) { - MERQURY_QC(ch_assembly, meryl_kmers) - MERQURY_QC.out.stats - .join( - MERQURY_QC.out.spectra_asm_hist - ) - .join( - MERQURY_QC.out.spectra_cn_hist - ) - .join( - MERQURY_QC.out.assembly_qv - ) - .set { assembly_merqury_reports } - - ch_versions = ch_versions.mix(MERQURY_QC.out.versions) - } + QC(ch_input, longreads, ch_assembly, ch_ref_bam, meryl_kmers) + ch_versions = ch_versions.mix(QC.out.versions) if (params.lift_annotations) { RUN_LIFTOFF(ch_assembly, ch_input) @@ -222,11 +213,11 @@ workflow ASSEMBLE { } emit: - assembly = ch_assembly - ref_bam = ch_ref_bam + assembly = ch_assembly + ref_bam = ch_ref_bam longreads - assembly_quast_reports - assembly_busco_reports - assembly_merqury_reports - versions = ch_versions + assembly_quast_reports = QC.out.quast_out + assembly_busco_reports = QC.out.busco_out + assembly_merqury_reports = QC.out.merqury_report_files + versions = ch_versions } diff --git a/subworkflows/local/polishing/medaka/polish_medaka/main.nf b/subworkflows/local/polishing/medaka/polish_medaka/main.nf index cdf52e23..e4d459fa 100644 --- a/subworkflows/local/polishing/medaka/polish_medaka/main.nf +++ b/subworkflows/local/polishing/medaka/polish_medaka/main.nf @@ -1,9 +1,6 @@ include { RUN_MEDAKA } from '../run_medaka/main' -include { MAP_TO_ASSEMBLY } from '../../../mapping/map_to_assembly/main' -include { RUN_BUSCO } from '../../../qc/busco/main' -include { RUN_QUAST } from '../../../qc/quast/main' +include { QC } from '../../../qc/main.nf' include { RUN_LIFTOFF } from '../../../liftoff/main' -include { MERQURY_QC } from '../../../qc/merqury/main' workflow POLISH_MEDAKA { take: @@ -24,35 +21,9 @@ workflow POLISH_MEDAKA { ch_versions = ch_versions.mix(RUN_MEDAKA.out.versions) - MAP_TO_ASSEMBLY(in_reads, polished_assembly) + QC(ch_input, in_reads, polished_assembly, ch_aln_to_ref, meryl_kmers) - ch_versions = ch_versions.mix(MAP_TO_ASSEMBLY.out.versions) - - RUN_QUAST(polished_assembly, ch_input, ch_aln_to_ref, MAP_TO_ASSEMBLY.out.aln_to_assembly_bam) - RUN_QUAST.out.quast_tsv.set { quast_out } - - ch_versions = ch_versions.mix(RUN_QUAST.out.versions) - - RUN_BUSCO(polished_assembly) - RUN_BUSCO.out.batch_summary.set { busco_out } - - ch_versions = ch_versions.mix(RUN_BUSCO.out.versions) - - if (params.short_reads) { - MERQURY_QC(polished_assembly, meryl_kmers) - MERQURY_QC.out.stats - .join( - MERQURY_QC.out.spectra_asm_hist - ) - .join( - MERQURY_QC.out.spectra_cn_hist - ) - .join( - MERQURY_QC.out.assembly_qv - ) - .set { merqury_report_files } - ch_versions = ch_versions.mix(MERQURY_QC.out.versions) - } + ch_versions = ch_versions.mix(QC.out.versions) if (params.lift_annotations) { RUN_LIFTOFF(polished_assembly, ch_input) @@ -63,8 +34,8 @@ workflow POLISH_MEDAKA { emit: polished_assembly - quast_out - busco_out - merqury_report_files + quast_out = QC.out.quast_out + busco_out = QC.out.busco_out + merqury_report_files = QC.out.merqury_report_files versions } diff --git a/subworkflows/local/polishing/pilon/polish_pilon/main.nf b/subworkflows/local/polishing/pilon/polish_pilon/main.nf index cde1ec4a..3b7df47a 100644 --- a/subworkflows/local/polishing/pilon/polish_pilon/main.nf +++ b/subworkflows/local/polishing/pilon/polish_pilon/main.nf @@ -1,10 +1,7 @@ include { RUN_PILON } from '../run_pilon/main' include { MAP_SR } from '../../../mapping/map_sr/main' -include { MAP_TO_ASSEMBLY } from '../../../mapping/map_to_assembly/main' -include { RUN_BUSCO } from '../../../qc/busco/main' -include { RUN_QUAST } from '../../../qc/quast/main' include { RUN_LIFTOFF } from '../../../liftoff/main' -include { MERQURY_QC } from '../../../qc/merqury/main' +include { QC } from '../../../qc/main.nf' workflow POLISH_PILON { take: @@ -17,9 +14,6 @@ workflow POLISH_PILON { main: Channel.empty().set { ch_versions } - Channel.empty().set { quast_out } - Channel.empty().set { busco_out } - Channel.empty().set { merqury_report_files } MAP_SR(shortreads, assembly) @@ -31,35 +25,9 @@ workflow POLISH_PILON { ch_versions = ch_versions.mix(RUN_PILON.out.versions) - MAP_TO_ASSEMBLY(in_reads, pilon_polished) + QC(ch_input, in_reads, pilon_polished, ch_aln_to_ref, meryl_kmers) - ch_versions = ch_versions.mix(MAP_TO_ASSEMBLY.out.versions) - - RUN_QUAST(pilon_polished, ch_input, ch_aln_to_ref, MAP_TO_ASSEMBLY.out.aln_to_assembly_bam) - RUN_QUAST.out.quast_tsv.set { quast_out } - - ch_versions = ch_versions.mix(RUN_QUAST.out.versions) - - RUN_BUSCO(pilon_polished) - RUN_BUSCO.out.batch_summary.set { busco_out } - - ch_versions = ch_versions.mix(RUN_BUSCO.out.versions) - - if (params.short_reads) { - MERQURY_QC(pilon_polished, meryl_kmers) - MERQURY_QC.out.stats - .join( - MERQURY_QC.out.spectra_asm_hist - ) - .join( - MERQURY_QC.out.spectra_cn_hist - ) - .join( - MERQURY_QC.out.assembly_qv - ) - .set { merqury_report_files } - ch_versions = ch_versions.mix(MERQURY_QC.out.versions) - } + ch_versions = ch_versions.mix(QC.out.versions) if (params.lift_annotations) { RUN_LIFTOFF(pilon_polished, ch_input) @@ -70,8 +38,8 @@ workflow POLISH_PILON { emit: pilon_polished - quast_out - busco_out - merqury_report_files + quast_out = QC.out.quast_out + busco_out = QC.out.busco_out + merqury_report_files = QC.out.merqury_report_files versions } diff --git a/subworkflows/local/qc/busco/main.nf b/subworkflows/local/qc/busco/main.nf index 59d30544..93d93c5e 100644 --- a/subworkflows/local/qc/busco/main.nf +++ b/subworkflows/local/qc/busco/main.nf @@ -11,7 +11,7 @@ workflow RUN_BUSCO { Channel.empty().set { short_summary_json } if (params.busco) { - BUSCO(assembly, 'genome', params.busco_lineage, params.busco_db ? file(params.busco_db, checkIfExists: true) : [], []) + BUSCO(assembly, 'genome', params.busco_lineage, params.busco_db ? file(params.busco_db, checkIfExists: true) : [], [], true) BUSCO.out.batch_summary.set { batch_summary } BUSCO.out.short_summaries_txt.set { short_summary_txt } BUSCO.out.short_summaries_json.set { short_summary_json } diff --git a/subworkflows/local/qc/main.nf b/subworkflows/local/qc/main.nf new file mode 100644 index 00000000..bba6b31d --- /dev/null +++ b/subworkflows/local/qc/main.nf @@ -0,0 +1,61 @@ +include { MAP_TO_ASSEMBLY } from '../mapping/map_to_assembly/main' +include { RUN_BUSCO } from './busco/main.nf' +include { RUN_QUAST } from './quast/main.nf' +include { MERQURY_QC } from './merqury/main.nf' + +workflow QC { + take: + inputs + in_reads + scaffolds + aln_to_ref + meryl_kmers + + main: + Channel.empty().set { ch_versions } + Channel.empty().set { quast_out } + Channel.empty().set { busco_out } + Channel.empty().set { merqury_report_files } + Channel.empty().set { map_to_assembly } + + if (params.quast) { + MAP_TO_ASSEMBLY(in_reads, scaffolds) + MAP_TO_ASSEMBLY.out.aln_to_assembly_bam.set { map_to_assembly } + ch_versions = ch_versions.mix(MAP_TO_ASSEMBLY.out.versions) + } + + RUN_QUAST(scaffolds, inputs, aln_to_ref, map_to_assembly) + RUN_QUAST.out.quast_tsv.set { quast_out } + + ch_versions = ch_versions.mix(RUN_QUAST.out.versions) + + RUN_BUSCO(scaffolds) + RUN_BUSCO.out.batch_summary.set { busco_out } + + ch_versions = ch_versions.mix(RUN_BUSCO.out.versions) + + if (params.short_reads) { + MERQURY_QC(scaffolds, meryl_kmers) + MERQURY_QC.out.stats + .join( + MERQURY_QC.out.spectra_asm_hist + ) + .join( + MERQURY_QC.out.spectra_cn_hist + ) + .join( + MERQURY_QC.out.assembly_qv + ) + .set { merqury_report_files } + + ch_versions = ch_versions.mix(MERQURY_QC.out.versions) + } + + versions = ch_versions + + emit: + quast_out + busco_out + merqury_report_files + versions +} diff --git a/subworkflows/local/scaffolding/links/main.nf b/subworkflows/local/scaffolding/links/main.nf index 8119427c..4493e4c4 100644 --- a/subworkflows/local/scaffolding/links/main.nf +++ b/subworkflows/local/scaffolding/links/main.nf @@ -1,9 +1,6 @@ -include { LINKS } from '../../../../modules/local/links/main' -include { MAP_TO_ASSEMBLY } from '../../mapping/map_to_assembly/main' -include { RUN_QUAST } from '../../qc/quast/main' -include { RUN_BUSCO } from '../../qc/busco/main' +include { LINKS } from '../../../../modules/nf-core/links/main' +include { QC } from '../../qc/main' include { RUN_LIFTOFF } from '../../liftoff/main' -include { MERQURY_QC } from '../../qc/merqury/main' workflow RUN_LINKS { take: @@ -16,49 +13,23 @@ workflow RUN_LINKS { main: Channel.empty().set { ch_versions } - Channel.empty().set { quast_out } - Channel.empty().set { busco_out } - Channel.empty().set { merqury_report_files } assembly .join(in_reads) + .multiMap { meta, assembly_fa, reads -> + assembly: [meta, assembly_fa] + reads: [meta, reads] + } .set { links_in } - LINKS(links_in) - LINKS.out.scaffolds.set { scaffolds } + LINKS(links_in.assembly, links_in.reads) + LINKS.out.scaffolds_fasta.set { scaffolds } ch_versions = ch_versions.mix(LINKS.out.versions) - MAP_TO_ASSEMBLY(in_reads, scaffolds) + QC(inputs, in_reads, scaffolds, ch_aln_to_ref, meryl_kmers) - ch_versions = ch_versions.mix(MAP_TO_ASSEMBLY.out.versions) - - RUN_QUAST(scaffolds, inputs, ch_aln_to_ref, MAP_TO_ASSEMBLY.out.aln_to_assembly_bam) - RUN_QUAST.out.quast_tsv.set { quast_out } - - ch_versions = ch_versions.mix(RUN_QUAST.out.versions) - - RUN_BUSCO(scaffolds) - RUN_BUSCO.out.batch_summary.set { busco_out } - - ch_versions = ch_versions.mix(RUN_BUSCO.out.versions) - - if (params.short_reads) { - MERQURY_QC(scaffolds, meryl_kmers) - MERQURY_QC.out.stats - .join( - MERQURY_QC.out.spectra_asm_hist - ) - .join( - MERQURY_QC.out.spectra_cn_hist - ) - .join( - MERQURY_QC.out.assembly_qv - ) - .set { merqury_report_files } - - ch_versions = ch_versions.mix(MERQURY_QC.out.versions) - } + ch_versions = ch_versions.mix(QC.out.versions) if (params.lift_annotations) { RUN_LIFTOFF(scaffolds, inputs) @@ -69,8 +40,8 @@ workflow RUN_LINKS { emit: scaffolds - quast_out - busco_out - merqury_report_files + quast_out = QC.out.quast_out + busco_out = QC.out.busco_out + merqury_report_files = QC.out.merqury_report_files versions } diff --git a/subworkflows/local/scaffolding/longstitch/main.nf b/subworkflows/local/scaffolding/longstitch/main.nf index c7fcc16b..8756225d 100644 --- a/subworkflows/local/scaffolding/longstitch/main.nf +++ b/subworkflows/local/scaffolding/longstitch/main.nf @@ -1,9 +1,6 @@ include { LONGSTITCH } from '../../../../modules/local/longstitch/main' -include { MAP_TO_ASSEMBLY } from '../../mapping/map_to_assembly/main' -include { RUN_QUAST } from '../../qc/quast/main' -include { RUN_BUSCO } from '../../qc/busco/main' +include { QC } from '../../qc/main' include { RUN_LIFTOFF } from '../../liftoff/main' -include { MERQURY_QC } from '../../qc/merqury/main' workflow RUN_LONGSTITCH { take: @@ -17,9 +14,7 @@ workflow RUN_LONGSTITCH { main: Channel.empty().set { ch_versions } - Channel.empty().set { quast_out } - Channel.empty().set { busco_out } - Channel.empty().set { merqury_report_files } + assembly .join(in_reads) .join(genome_size) @@ -30,36 +25,10 @@ workflow RUN_LONGSTITCH { ch_versions = ch_versions.mix(LONGSTITCH.out.versions) - MAP_TO_ASSEMBLY(in_reads, scaffolds) - - ch_versions = ch_versions.mix(MAP_TO_ASSEMBLY.out.versions) - - RUN_QUAST(scaffolds, inputs, ch_aln_to_ref, MAP_TO_ASSEMBLY.out.aln_to_assembly_bam) - RUN_QUAST.out.quast_tsv.set { quast_out } - - ch_versions = ch_versions.mix(RUN_QUAST.out.versions) + QC(inputs, in_reads, scaffolds, ch_aln_to_ref, meryl_kmers) - RUN_BUSCO(scaffolds) - RUN_BUSCO.out.batch_summary.set { busco_out } + ch_versions = ch_versions.mix(QC.out.versions) - ch_versions = ch_versions.mix(RUN_BUSCO.out.versions) - - if (params.short_reads) { - MERQURY_QC(scaffolds, meryl_kmers) - MERQURY_QC.out.stats - .join( - MERQURY_QC.out.spectra_asm_hist - ) - .join( - MERQURY_QC.out.spectra_cn_hist - ) - .join( - MERQURY_QC.out.assembly_qv - ) - .set { merqury_report_files } - - ch_versions = ch_versions.mix(MERQURY_QC.out.versions) - } if (params.lift_annotations) { RUN_LIFTOFF(LONGSTITCH.out.ntlLinks_arks_scaffolds, inputs) ch_versions = ch_versions.mix(RUN_LIFTOFF.out.versions) @@ -69,8 +38,8 @@ workflow RUN_LONGSTITCH { emit: scaffolds - quast_out - busco_out - merqury_report_files + quast_out = QC.out.quast_out + busco_out = QC.out.busco_out + merqury_report_files = QC.out.merqury_report_files versions } diff --git a/subworkflows/local/scaffolding/ragtag/main.nf b/subworkflows/local/scaffolding/ragtag/main.nf index 83574b93..518afb87 100644 --- a/subworkflows/local/scaffolding/ragtag/main.nf +++ b/subworkflows/local/scaffolding/ragtag/main.nf @@ -1,9 +1,6 @@ -include { RAGTAG_SCAFFOLD } from '../../../../modules/local/ragtag/main' -include { MAP_TO_ASSEMBLY } from '../../mapping/map_to_assembly/main' -include { RUN_QUAST } from '../../qc/quast/main' -include { RUN_BUSCO } from '../../qc/busco/main' +include { RAGTAG_SCAFFOLD } from '../../../../modules/nf-core/ragtag/scaffold/main' +include { QC } from '../../qc/main' include { RUN_LIFTOFF } from '../../liftoff/main' -include { MERQURY_QC } from '../../qc/merqury/main' workflow RUN_RAGTAG { @@ -17,14 +14,16 @@ workflow RUN_RAGTAG { main: Channel.empty().set { ch_versions } - Channel.empty().set { quast_out } - Channel.empty().set { busco_out } - Channel.empty().set { merqury_report_files } + assembly .join(references) + .multiMap { meta, assembly_fasta, reference_fasta -> + assembly: [meta, assembly_fasta] + reference: [meta, reference_fasta] + } .set { ragtag_in } - RAGTAG_SCAFFOLD(ragtag_in) + RAGTAG_SCAFFOLD(ragtag_in.assembly, ragtag_in.reference, [[], []], [[], [], []]) RAGTAG_SCAFFOLD.out.corrected_assembly.set { ragtag_scaffold_fasta } @@ -32,37 +31,9 @@ workflow RUN_RAGTAG { ch_versions = ch_versions.mix(RAGTAG_SCAFFOLD.out.versions) - MAP_TO_ASSEMBLY(in_reads, ragtag_scaffold_fasta) - - ch_versions = ch_versions.mix(MAP_TO_ASSEMBLY.out.versions) - - - RUN_QUAST(ragtag_scaffold_fasta, inputs, ch_aln_to_ref, MAP_TO_ASSEMBLY.out.aln_to_assembly_bam) - RUN_QUAST.out.quast_tsv.set { quast_out } - - ch_versions = ch_versions.mix(RUN_QUAST.out.versions) + QC(inputs, in_reads, ragtag_scaffold_fasta, ch_aln_to_ref, meryl_kmers) - RUN_BUSCO(ragtag_scaffold_fasta) - RUN_BUSCO.out.batch_summary.set { busco_out } - - ch_versions = ch_versions.mix(RUN_BUSCO.out.versions) - - if (params.short_reads) { - MERQURY_QC(ragtag_scaffold_fasta, meryl_kmers) - MERQURY_QC.out.stats - .join( - MERQURY_QC.out.spectra_asm_hist - ) - .join( - MERQURY_QC.out.spectra_cn_hist - ) - .join( - MERQURY_QC.out.assembly_qv - ) - .set { merqury_report_files } - - ch_versions = ch_versions.mix(MERQURY_QC.out.versions) - } + ch_versions = ch_versions.mix(QC.out.versions) if (params.lift_annotations) { RUN_LIFTOFF(RAGTAG_SCAFFOLD.out.corrected_assembly, inputs) @@ -74,8 +45,8 @@ workflow RUN_RAGTAG { emit: ragtag_scaffold_fasta ragtag_scaffold_agp - quast_out - busco_out - merqury_report_files + quast_out = QC.out.quast_out + busco_out = QC.out.busco_out + merqury_report_files = QC.out.merqury_report_files versions } diff --git a/subworkflows/local/utils_nfcore_genomeassembler_pipeline/main.nf b/subworkflows/local/utils_nfcore_genomeassembler_pipeline/main.nf index ad5059aa..8acd6fab 100644 --- a/subworkflows/local/utils_nfcore_genomeassembler_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_genomeassembler_pipeline/main.nf @@ -43,7 +43,7 @@ workflow PIPELINE_INITIALISATION { version, true, outdir, - workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1, ) // @@ -52,7 +52,7 @@ workflow PIPELINE_INITIALISATION { UTILS_NFSCHEMA_PLUGIN( workflow, validate_params, - null + null, ) // @@ -67,22 +67,27 @@ workflow PIPELINE_INITIALISATION { // Channel.empty().set { ch_refs } - Channel - .fromPath(params.input) + Channel.fromPath(params.input) .splitCsv(header: true) .map { it -> [meta: [id: it.sample], ontreads: it.ontreads, hifireads: it.hifireads, ref_fasta: it.ref_fasta, ref_gff: it.ref_gff, shortread_F: it.shortread_F, shortread_R: it.shortread_R, paired: it.paired] } .set { ch_samplesheet } if (params.use_ref) { ch_samplesheet - .map { it -> [it.meta, it.ref_fasta] } + .map { it -> [it.meta, file(it.ref_fasta, checkIfExists: true)] } .set { ch_refs } } + if (params.lift_annotations) { + ch_samplesheet + .map { it -> [it.meta, file(it.ref_gff, checkIfExists: true)] } + } // check for assembler / read combination def hifi_only = params.hifi && !params.ont ? true : false - if (params.assembler == "flye") { - if (params.hifi) { - if (!hifi_only) { - error('Cannot combine hifi and ont reads with flye') + if (!params.skip_assembly) { + if (params.assembler == "flye") { + if (params.hifi) { + if (!hifi_only) { + error('Cannot combine hifi and ont reads with flye') + } } } } @@ -96,10 +101,12 @@ workflow PIPELINE_INITIALISATION { if (params.scaffold_longstitch) { // If genomesize is not provided, and if ONT is not used in combination with jellyfish // Throw an error - if ( !params.genome_size && (!params.ont && !params.jellyfish) ) { + if (!params.genome_size && (!params.ont && !params.jellyfish)) { error("Scaffolding with longstitch requires genome size.\n Either provide a genome size with --genome_size or estimate from ONT reads using jellyfish and genomescope") } } + + emit: samplesheet = ch_samplesheet refs = ch_refs @@ -169,7 +176,7 @@ def validateInputSamplesheet(input) { error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") } - return [metas[0], fastqs] + return [ metas[0], fastqs ] } // // Generate methods description for MultiQC diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml b/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml deleted file mode 100644 index 30b69d6a..00000000 --- a/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -subworkflows/bam_sort_stats_samtools: - - subworkflows/nf-core/bam_sort_stats_samtools/** diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/tags.yml b/subworkflows/nf-core/bam_stats_samtools/tests/tags.yml deleted file mode 100644 index ec2f2d68..00000000 --- a/subworkflows/nf-core/bam_stats_samtools/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -subworkflows/bam_stats_samtools: - - subworkflows/nf-core/bam_stats_samtools/** diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config index 0907ac58..09ef842a 100644 --- a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config @@ -1,5 +1,5 @@ plugins { - id "nf-schema@2.1.0" + id "nf-schema@2.4.2" } validation { diff --git a/tests/.nftignore b/tests/.nftignore new file mode 100644 index 00000000..e8128b21 --- /dev/null +++ b/tests/.nftignore @@ -0,0 +1,6 @@ +.DS_Store +fastqc/*_fastqc.{html,zip} +pipeline_info/*.{html,json,txt,yml} +*/*/*/*.{log,bin,gz,gff3,fasta,agp} +*/*/*.{log,bin,gz,gff3,txt} +*/*/*/*.assembly_info.txt diff --git a/tests/default.nf.test b/tests/default.nf.test new file mode 100644 index 00000000..67fb220f --- /dev/null +++ b/tests/default.nf.test @@ -0,0 +1,35 @@ +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + tag "pipeline" + + test("-profile test") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_genomeassembler_software_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap new file mode 100644 index 00000000..73ab05f2 --- /dev/null +++ b/tests/default.nf.test.snap @@ -0,0 +1,89 @@ +{ + "-profile test": { + "content": [ + 6, + { + "FLYE": { + "flye": "2.9.5-b1801" + }, + "GFA_2_FA_HIFI": { + "awk": "mawk 1.3.4", + "gzip": 1.13 + }, + "HIFIASM": { + "hifiasm": "0.25.0-r726" + }, + "LIFTOFF": { + "liftoff": "v1.6.3" + }, + "NANOQ": { + "nanoq": "0.10.0" + }, + "RAGTAG_PATCH": { + "ragtag": "2.1.0" + }, + "Workflow": { + "nf-core/genomeassembler": "v1.1.0" + } + }, + [ + "Col-0_2MB", + "Col-0_2MB/QC", + "Col-0_2MB/QC/nanoq", + "Col-0_2MB/QC/nanoq/Col-0_2MB_report.json", + "Col-0_2MB/QC/nanoq/Col-0_2MB_stats.json", + "Col-0_2MB/assembly", + "Col-0_2MB/assembly/Col-0_2MB_assembly.gff3", + "Col-0_2MB/assembly/Col-0_2MB_assembly.unmapped.txt", + "Col-0_2MB/assembly/flye", + "Col-0_2MB/assembly/flye/Col-0_2MB.assembly.fasta.gz", + "Col-0_2MB/assembly/flye/Col-0_2MB.assembly_graph.gfa.gz", + "Col-0_2MB/assembly/flye/Col-0_2MB.assembly_graph.gv.gz", + "Col-0_2MB/assembly/flye/Col-0_2MB.assembly_info.txt", + "Col-0_2MB/assembly/flye/Col-0_2MB.flye.log", + "Col-0_2MB/assembly/flye/Col-0_2MB.params.json", + "Col-0_2MB/assembly/hifiasm", + "Col-0_2MB/assembly/hifiasm/Col-0_2MB.bp.hap1.p_ctg.gfa", + "Col-0_2MB/assembly/hifiasm/Col-0_2MB.bp.hap2.p_ctg.gfa", + "Col-0_2MB/assembly/hifiasm/Col-0_2MB.bp.p_ctg.gfa", + "Col-0_2MB/assembly/hifiasm/Col-0_2MB.bp.p_utg.gfa", + "Col-0_2MB/assembly/hifiasm/Col-0_2MB.bp.r_utg.gfa", + "Col-0_2MB/assembly/hifiasm/Col-0_2MB.ec.bin", + "Col-0_2MB/assembly/hifiasm/Col-0_2MB.ovlp.reverse.bin", + "Col-0_2MB/assembly/hifiasm/Col-0_2MB.ovlp.source.bin", + "Col-0_2MB/assembly/hifiasm/Col-0_2MB.stderr.log", + "Col-0_2MB/assembly/hifiasm/fasta", + "Col-0_2MB/assembly/hifiasm/fasta/Col-0_2MB.bp.p_utg.fa.gz", + "Col-0_2MB/assembly/ragtag", + "Col-0_2MB/assembly/ragtag/Col-0_2MB_assembly_patch.comps.fasta", + "Col-0_2MB/assembly/ragtag/Col-0_2MB_assembly_patch.ctg.agp", + "Col-0_2MB/assembly/ragtag/Col-0_2MB_assembly_patch.ctg.fasta", + "Col-0_2MB/assembly/ragtag/Col-0_2MB_assembly_patch.patch.agp", + "Col-0_2MB/assembly/ragtag/Col-0_2MB_assembly_patch.patch.err", + "Col-0_2MB/assembly/ragtag/Col-0_2MB_assembly_patch.patch.fasta", + "Col-0_2MB/assembly/ragtag/Col-0_2MB_assembly_patch.rename.agp", + "Col-0_2MB/assembly/ragtag/Col-0_2MB_assembly_patch.rename.fasta", + "pipeline_info", + "pipeline_info/nf_core_genomeassembler_software_versions.yml", + "pipeline_info/nf_core_pipeline_software_versions.yml" + ], + [ + "Col-0_2MB_report.json:md5,25d7ae5780b2f565cb46df7c9e09388a", + "Col-0_2MB_stats.json:md5,d41d8cd98f00b204e9800998ecf8427e", + "Col-0_2MB.params.json:md5,afa91c041bce5e190f4a699d11b69db6", + "Col-0_2MB.bp.hap1.p_ctg.gfa:md5,46ee70869884ad585165bd48081414e9", + "Col-0_2MB.bp.hap2.p_ctg.gfa:md5,7792865547989d6d284f640425c4e36c", + "Col-0_2MB.bp.p_ctg.gfa:md5,8fe65466d76815ffe1663ff6d8f2e8d1", + "Col-0_2MB.bp.p_utg.gfa:md5,ba2c77ebdb2ad3e6060f5574e890c6eb", + "Col-0_2MB.bp.r_utg.gfa:md5,ba2c77ebdb2ad3e6060f5574e890c6eb", + "Col-0_2MB.bp.p_utg.fa.gz:md5,812a3a16dc68bb409deb69f0aef7e6a8", + "Col-0_2MB_assembly_patch.patch.err:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-07-02T11:25:42.487154678" + } +} \ No newline at end of file diff --git a/tests/nextflow.config b/tests/nextflow.config new file mode 100644 index 00000000..f5eb58fd --- /dev/null +++ b/tests/nextflow.config @@ -0,0 +1,10 @@ +/* +======================================================================================== + Nextflow config file for running nf-test tests +======================================================================================== +*/ + +params.modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' +params.pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/genomeassembler' + +aws.client.anonymous = true // fixes S3 access issues on self-hosted runners