From 6ce88a0e80be64c4baea4d679cf6a28b5157dc65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Famke=20Ba=CC=88uerle?= Date: Tue, 25 Nov 2025 12:17:19 +0100 Subject: [PATCH 1/9] perf: bump to dna-seq-benchmark 1.14.1 --- workflow/Snakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index 7c07716..62ef004 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -18,7 +18,7 @@ module benchmark: github( "snakemake-workflows/dna-seq-benchmark", path="workflow/Snakefile", - tag="v1.13.0", + tag="v1.14.1", ) config: config From 45f3fbf1636a0b694c72b701bc197b82541f6fd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bianca=20St=C3=B6cker?= Date: Thu, 18 Dec 2025 12:23:33 +0100 Subject: [PATCH 2/9] chore: Update snakemake-github-action to version 2.0.3 --- .github/workflows/main.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 6397d07..60e937b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -58,7 +58,7 @@ jobs: uses: actions/checkout@v4 - name: Download reference genome - uses: snakemake/snakemake-github-action@v2 + uses: snakemake/snakemake-github-action@v2.0.3 with: directory: "." snakefile: "workflow/Snakefile" @@ -68,7 +68,7 @@ jobs: pip install snakemake-storage-plugin-http - name: Download truthsets - uses: snakemake/snakemake-github-action@v2 + uses: snakemake/snakemake-github-action@v2.0.3 with: directory: "." snakefile: "workflow/Snakefile" @@ -82,7 +82,7 @@ jobs: # because their modification dates are too new or too old. # (as git does not preserve modification dates) - name: Fix modification dates - uses: snakemake/snakemake-github-action@v2 + uses: snakemake/snakemake-github-action@v2.0.3 with: directory: "." snakefile: "workflow/Snakefile" @@ -92,7 +92,7 @@ jobs: pip install snakemake-storage-plugin-http - name: Run analysis - uses: snakemake/snakemake-github-action@v2 + uses: snakemake/snakemake-github-action@v2.0.3 with: directory: "." snakefile: "workflow/Snakefile" @@ -103,7 +103,7 @@ jobs: pip install snakemake-storage-plugin-http - name: Create report - uses: snakemake/snakemake-github-action@v2 + uses: snakemake/snakemake-github-action@v2.0.3 with: directory: "." snakefile: "workflow/Snakefile" From 5c11a721812653daa68efb1d5805a5710cec661a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Famke=20Ba=CC=88uerle?= Date: Fri, 23 Jan 2026 14:16:37 +0100 Subject: [PATCH 3/9] try: dividing into subcategories --- .github/workflows/main.yml | 115 +++++++++++++++++++++++++++---------- workflow/rules/common.smk | 10 ++++ 2 files changed, 95 insertions(+), 30 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 60e937b..dc44492 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -7,46 +7,22 @@ on: pull_request: workflow_dispatch: - concurrency: # Cancel concurrent flows group: ci-${{ github.head_ref || github.run_id }} cancel-in-progress: true jobs: - evaluate: + # First job: Download shared resources once to avoid multiple Zenodo requests + prepare: runs-on: ubuntu-latest - permissions: - actions: write env: - FTP_PASSWORD: ${{ secrets.FTP_PASSWORD }} - ZENODO_TOKEN: ${{ secrets.ZENODO_TOKEN }} - BENCHMARK_GIAB_NA12878_AGILENT_TOKEN: ${{ secrets.BENCHMARK_GIAB_NA12878_AGILENT_TOKEN }} - BO_AGILENT_TOKEN: ${{ secrets.BO_AGILENT_TOKEN }} - CO_AGILENT_TOKEN: ${{ secrets.CO_AGILENT_TOKEN }} - BO_CORE_UNIT_TOKEN: ${{ secrets.BO_CORE_UNIT_TOKEN }} - BENCHMARK_GIAB_NA12878_TWIST_TOKEN: ${{ secrets.BENCHMARK_GIAB_NA12878_TWIST_TOKEN }} SNAKEMAKE_STORAGE_ZENODO_ACCESS_TOKEN: ${{ secrets.SNAKEMAKE_STORAGE_ZENODO_ACCESS_TOKEN }} steps: - - uses: 8BitJonny/gh-get-current-pr@3.0.0 - id: pr - with: - sha: ${{ github.event.pull_request.head.sha }} - filterOutClosed: true - - - run: | - echo "is PR: ${{ steps.pr.outputs.pr_found }}" - echo "current branch: ${{ github.ref }}" - - name: Free Disk Space (Ubuntu) uses: jlumbroso/free-disk-space@v1.3.1 with: - # this might remove tools that are actually needed, - # if set to "true" but frees about 6 GB tool-cache: false - - # all of these default to true, but feel free to set to - # "false" if necessary for your workflow android: true dotnet: true haskell: true @@ -77,6 +53,51 @@ jobs: pip install snakemake-storage-plugin-zenodo pip install snakemake-storage-plugin-http + - name: Upload shared resources + uses: actions/upload-artifact@v4 + with: + name: shared-resources + path: | + resources/reference/ + resources/truthsets/ + retention-days: 1 + + # Matrix job: Run benchmarks in parallel per subcategory + evaluate: + needs: prepare + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + subcategory: + - NA12878-agilent + - NA12878-twist + - CHM-eval + permissions: + actions: write + env: + SNAKEMAKE_STORAGE_ZENODO_ACCESS_TOKEN: ${{ secrets.SNAKEMAKE_STORAGE_ZENODO_ACCESS_TOKEN }} + steps: + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@v1.3.1 + with: + tool-cache: false + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: true + docker-images: false + + - name: Check out repository code + uses: actions/checkout@v4 + + - name: Download shared resources + uses: actions/download-artifact@v4 + with: + name: shared-resources + path: resources/ + # This step is necessary (after downloading the truthsets above) to ensure # that the files coming from the git repo are not triggering reruns # because their modification dates are too new or too old. @@ -91,17 +112,52 @@ jobs: pip install snakemake-storage-plugin-zenodo pip install snakemake-storage-plugin-http - - name: Run analysis - uses: snakemake/snakemake-github-action@v2.0.3 + - name: Run analysis for ${{ matrix.subcategory }} + uses: snakemake/snakemake-github-action@v2 with: directory: "." snakefile: "workflow/Snakefile" args: > - --cores 4 --sdm conda --conda-cleanup-pkgs cache --rerun-triggers mtime --all-temp + --cores 2 --sdm conda --conda-cleanup-pkgs cache --rerun-triggers mtime --all-temp + --config subcategory=${{ matrix.subcategory }} stagein: | pip install snakemake-storage-plugin-zenodo pip install snakemake-storage-plugin-http + - name: Upload results + uses: actions/upload-artifact@v4 + with: + name: results-${{ matrix.subcategory }} + path: results/ + retention-days: 1 + + # Final job: Combine results and create report + report: + needs: evaluate + runs-on: ubuntu-latest + permissions: + actions: write + steps: + - uses: 8BitJonny/gh-get-current-pr@3.0.0 + id: pr + with: + sha: ${{ github.event.pull_request.head.sha }} + filterOutClosed: true + + - run: | + echo "is PR: ${{ steps.pr.outputs.pr_found }}" + echo "current branch: ${{ github.ref }}" + + - name: Check out repository code + uses: actions/checkout@v4 + + - name: Download all results + uses: actions/download-artifact@v4 + with: + pattern: results-* + path: results/ + merge-multiple: true + - name: Create report uses: snakemake/snakemake-github-action@v2.0.3 with: @@ -113,7 +169,6 @@ jobs: pip install snakemake-storage-plugin-http - name: Upload report as artifact - uses: actions/upload-artifact@v4 with: name: report diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 8ebc811..3764dce 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -5,6 +5,16 @@ def get_zenodo_tag(entry): return "zenodo" +# Filter callsets by subcategory if specified via --config subcategory=... +# This allows running benchmarks in parallel across multiple CI runners +if "subcategory" in config: + config["variant-calls"] = { + key: callset + for key, callset in config["variant-calls"].items() + if callset.get("subcategory") == config["subcategory"] + } + + # add path to callsets for key, callset in config["variant-calls"].items(): if "zenodo" in callset: From 6c50241aaf0752057dc1caa22fbc5d8981d88441 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Famke=20Ba=CC=88uerle?= Date: Fri, 23 Jan 2026 14:26:26 +0100 Subject: [PATCH 4/9] test: add back secrets --- .github/workflows/main.yml | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index dc44492..9b25b59 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -19,6 +19,14 @@ jobs: env: SNAKEMAKE_STORAGE_ZENODO_ACCESS_TOKEN: ${{ secrets.SNAKEMAKE_STORAGE_ZENODO_ACCESS_TOKEN }} steps: + - name: Debug - Check if token is set + run: | + if [ -n "$SNAKEMAKE_STORAGE_ZENODO_ACCESS_TOKEN" ]; then + echo "Token is set (length: ${#SNAKEMAKE_STORAGE_ZENODO_ACCESS_TOKEN})" + else + echo "WARNING: Token is NOT set!" + fi + - name: Free Disk Space (Ubuntu) uses: jlumbroso/free-disk-space@v1.3.1 with: @@ -38,7 +46,7 @@ jobs: with: directory: "." snakefile: "workflow/Snakefile" - args: "--cores 1 --sdm conda --conda-cleanup-pkgs cache resources/reference/genome.fasta" + args: "--cores 1 --sdm conda --conda-cleanup-pkgs cache --retries 3 --resources downloads=1 resources/reference/genome.fasta" stagein: | pip install snakemake-storage-plugin-zenodo pip install snakemake-storage-plugin-http @@ -48,7 +56,7 @@ jobs: with: directory: "." snakefile: "workflow/Snakefile" - args: "--sdm conda --cores 1 --conda-cleanup-pkgs cache --until benchmark_get_truth" + args: "--sdm conda --cores 1 --conda-cleanup-pkgs cache --retries 3 --resources downloads=1 --until benchmark_get_truth" stagein: | pip install snakemake-storage-plugin-zenodo pip install snakemake-storage-plugin-http @@ -76,6 +84,13 @@ jobs: permissions: actions: write env: + FTP_PASSWORD: ${{ secrets.FTP_PASSWORD }} + ZENODO_TOKEN: ${{ secrets.ZENODO_TOKEN }} + BENCHMARK_GIAB_NA12878_AGILENT_TOKEN: ${{ secrets.BENCHMARK_GIAB_NA12878_AGILENT_TOKEN }} + BO_AGILENT_TOKEN: ${{ secrets.BO_AGILENT_TOKEN }} + CO_AGILENT_TOKEN: ${{ secrets.CO_AGILENT_TOKEN }} + BO_CORE_UNIT_TOKEN: ${{ secrets.BO_CORE_UNIT_TOKEN }} + BENCHMARK_GIAB_NA12878_TWIST_TOKEN: ${{ secrets.BENCHMARK_GIAB_NA12878_TWIST_TOKEN }} SNAKEMAKE_STORAGE_ZENODO_ACCESS_TOKEN: ${{ secrets.SNAKEMAKE_STORAGE_ZENODO_ACCESS_TOKEN }} steps: - name: Free Disk Space (Ubuntu) @@ -119,6 +134,7 @@ jobs: snakefile: "workflow/Snakefile" args: > --cores 2 --sdm conda --conda-cleanup-pkgs cache --rerun-triggers mtime --all-temp + --retries 3 --resources downloads=1 --config subcategory=${{ matrix.subcategory }} stagein: | pip install snakemake-storage-plugin-zenodo From 307f3a16f9fc8d2161d0156ede77fb30bf40aaae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Famke=20Ba=CC=88uerle?= Date: Fri, 23 Jan 2026 14:31:13 +0100 Subject: [PATCH 5/9] fix: args syntax --- .github/workflows/main.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 9b25b59..ab4df5e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -132,10 +132,7 @@ jobs: with: directory: "." snakefile: "workflow/Snakefile" - args: > - --cores 2 --sdm conda --conda-cleanup-pkgs cache --rerun-triggers mtime --all-temp - --retries 3 --resources downloads=1 - --config subcategory=${{ matrix.subcategory }} + args: "--cores 2 --sdm conda --conda-cleanup-pkgs cache --rerun-triggers mtime --all-temp --retries 3 --resources downloads=1 --config subcategory=${{ matrix.subcategory }}" stagein: | pip install snakemake-storage-plugin-zenodo pip install snakemake-storage-plugin-http From 66b6c3b7097f7e493a7e8678d9867a6af7757e22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Famke=20Ba=CC=88uerle?= Date: Fri, 23 Jan 2026 14:39:31 +0100 Subject: [PATCH 6/9] fix: args syntax --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ab4df5e..edcfc6f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -132,7 +132,7 @@ jobs: with: directory: "." snakefile: "workflow/Snakefile" - args: "--cores 2 --sdm conda --conda-cleanup-pkgs cache --rerun-triggers mtime --all-temp --retries 3 --resources downloads=1 --config subcategory=${{ matrix.subcategory }}" + args: "--cores 2 --sdm conda --conda-cleanup-pkgs cache --rerun-triggers mtime --all-temp --retries 3 --config subcategory=${{ matrix.subcategory }}" stagein: | pip install snakemake-storage-plugin-zenodo pip install snakemake-storage-plugin-http From 939d4e64640e964047eabeaca14426464f5fb0fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Famke=20Ba=CC=88uerle?= Date: Fri, 23 Jan 2026 14:58:18 +0100 Subject: [PATCH 7/9] fix: action version --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index edcfc6f..71edb1f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -128,7 +128,7 @@ jobs: pip install snakemake-storage-plugin-http - name: Run analysis for ${{ matrix.subcategory }} - uses: snakemake/snakemake-github-action@v2 + uses: snakemake/snakemake-github-action@v2.0.3 with: directory: "." snakefile: "workflow/Snakefile" From de17e289792fc00edfb5cb34e92b5983a78bd58f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Famke=20Ba=CC=88uerle?= Date: Fri, 23 Jan 2026 15:10:23 +0100 Subject: [PATCH 8/9] fix: dl reference --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 71edb1f..24b62cd 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -46,7 +46,7 @@ jobs: with: directory: "." snakefile: "workflow/Snakefile" - args: "--cores 1 --sdm conda --conda-cleanup-pkgs cache --retries 3 --resources downloads=1 resources/reference/genome.fasta" + args: "--cores 1 --sdm conda --conda-cleanup-pkgs cache resources/reference/genome.fasta" stagein: | pip install snakemake-storage-plugin-zenodo pip install snakemake-storage-plugin-http From 6e73b80aaa0ac1a0aae4426c51e77bd6cf53f7f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Famke=20Ba=CC=88uerle?= Date: Fri, 23 Jan 2026 16:14:08 +0100 Subject: [PATCH 9/9] test: split matrix by benchmark --- .github/workflows/main.yml | 115 ++++++++++++------------------------- workflow/rules/common.smk | 10 ++++ 2 files changed, 47 insertions(+), 78 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 24b62cd..f0bdf14 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -13,74 +13,18 @@ concurrency: cancel-in-progress: true jobs: - # First job: Download shared resources once to avoid multiple Zenodo requests - prepare: - runs-on: ubuntu-latest - env: - SNAKEMAKE_STORAGE_ZENODO_ACCESS_TOKEN: ${{ secrets.SNAKEMAKE_STORAGE_ZENODO_ACCESS_TOKEN }} - steps: - - name: Debug - Check if token is set - run: | - if [ -n "$SNAKEMAKE_STORAGE_ZENODO_ACCESS_TOKEN" ]; then - echo "Token is set (length: ${#SNAKEMAKE_STORAGE_ZENODO_ACCESS_TOKEN})" - else - echo "WARNING: Token is NOT set!" - fi - - - name: Free Disk Space (Ubuntu) - uses: jlumbroso/free-disk-space@v1.3.1 - with: - tool-cache: false - android: true - dotnet: true - haskell: true - large-packages: true - swap-storage: true - docker-images: false - - - name: Check out repository code - uses: actions/checkout@v4 - - - name: Download reference genome - uses: snakemake/snakemake-github-action@v2.0.3 - with: - directory: "." - snakefile: "workflow/Snakefile" - args: "--cores 1 --sdm conda --conda-cleanup-pkgs cache resources/reference/genome.fasta" - stagein: | - pip install snakemake-storage-plugin-zenodo - pip install snakemake-storage-plugin-http - - - name: Download truthsets - uses: snakemake/snakemake-github-action@v2.0.3 - with: - directory: "." - snakefile: "workflow/Snakefile" - args: "--sdm conda --cores 1 --conda-cleanup-pkgs cache --retries 3 --resources downloads=1 --until benchmark_get_truth" - stagein: | - pip install snakemake-storage-plugin-zenodo - pip install snakemake-storage-plugin-http - - - name: Upload shared resources - uses: actions/upload-artifact@v4 - with: - name: shared-resources - path: | - resources/reference/ - resources/truthsets/ - retention-days: 1 - - # Matrix job: Run benchmarks in parallel per subcategory + # Matrix job: Run benchmarks in parallel per benchmark evaluate: - needs: prepare runs-on: ubuntu-latest strategy: fail-fast: false + max-parallel: 2 matrix: - subcategory: - - NA12878-agilent - - NA12878-twist - - CHM-eval + benchmark: + - giab-NA12878-agilent-75M + - giab-NA12878-agilent-200M + - giab-NA12878-twist + - chm-eval permissions: actions: write env: @@ -107,40 +51,41 @@ jobs: - name: Check out repository code uses: actions/checkout@v4 - - name: Download shared resources - uses: actions/download-artifact@v4 + - name: Restore caches (conda + snakemake) + uses: actions/cache@v4 with: - name: shared-resources - path: resources/ + path: | + /home/runner/miniconda3/pkgs + /home/runner/miniconda3/envs/snakemake + /home/runner/.cache/pip + /home/runner/.cache/snakemake + .snakemake + key: ${{ runner.os }}-snakemake-${{ hashFiles('workflow/Snakefile', 'workflow/rules/**/*.smk', 'config/config.yaml') }} + restore-keys: | + ${{ runner.os }}-snakemake- # This step is necessary (after downloading the truthsets above) to ensure # that the files coming from the git repo are not triggering reruns # because their modification dates are too new or too old. # (as git does not preserve modification dates) - name: Fix modification dates - uses: snakemake/snakemake-github-action@v2.0.3 - with: - directory: "." - snakefile: "workflow/Snakefile" - args: "--cores 1 --sdm conda --touch resources/regions/*/test-regions.cov-*.bed" - stagein: | - pip install snakemake-storage-plugin-zenodo - pip install snakemake-storage-plugin-http + run: touch resources/regions/*/test-regions.cov-*.bed - - name: Run analysis for ${{ matrix.subcategory }} + - name: Run analysis for ${{ matrix.benchmark }} uses: snakemake/snakemake-github-action@v2.0.3 with: directory: "." snakefile: "workflow/Snakefile" - args: "--cores 2 --sdm conda --conda-cleanup-pkgs cache --rerun-triggers mtime --all-temp --retries 3 --config subcategory=${{ matrix.subcategory }}" + args: "--cores 2 --sdm conda --conda-cleanup-pkgs cache --rerun-triggers mtime --all-temp --retries 3 --resources downloads=1 --config benchmark=${{ matrix.benchmark }}" stagein: | pip install snakemake-storage-plugin-zenodo pip install snakemake-storage-plugin-http + show-disk-usage-on-error: true - name: Upload results uses: actions/upload-artifact@v4 with: - name: results-${{ matrix.subcategory }} + name: results-${{ matrix.benchmark }} path: results/ retention-days: 1 @@ -164,6 +109,19 @@ jobs: - name: Check out repository code uses: actions/checkout@v4 + - name: Restore caches (conda + snakemake) + uses: actions/cache@v4 + with: + path: | + /home/runner/miniconda3/pkgs + /home/runner/miniconda3/envs/snakemake + /home/runner/.cache/pip + /home/runner/.cache/snakemake + .snakemake + key: ${{ runner.os }}-snakemake-${{ hashFiles('workflow/Snakefile', 'workflow/rules/**/*.smk', 'config/config.yaml') }} + restore-keys: | + ${{ runner.os }}-snakemake- + - name: Download all results uses: actions/download-artifact@v4 with: @@ -180,6 +138,7 @@ jobs: stagein: | pip install snakemake-storage-plugin-zenodo pip install snakemake-storage-plugin-http + show-disk-usage-on-error: true - name: Upload report as artifact uses: actions/upload-artifact@v4 diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 3764dce..2795bf3 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -15,6 +15,16 @@ if "subcategory" in config: } +# Filter callsets by benchmark if specified via --config benchmark=... +# This allows splitting CI work further (e.g. giab-NA12878-agilent-75M vs 200M). +if "benchmark" in config: + config["variant-calls"] = { + key: callset + for key, callset in config["variant-calls"].items() + if callset.get("benchmark") == config["benchmark"] + } + + # add path to callsets for key, callset in config["variant-calls"].items(): if "zenodo" in callset: