diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 6397d07..96858d7 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -7,15 +7,24 @@ on: pull_request: workflow_dispatch: - concurrency: # Cancel concurrent flows group: ci-${{ github.head_ref || github.run_id }} cancel-in-progress: true jobs: + # Full job: Run full benchmark suite in parallel evaluate: runs-on: ubuntu-latest + strategy: + fail-fast: false + max-parallel: 2 + matrix: + benchmark: + - giab-NA12878-agilent-75M + - giab-NA12878-agilent-200M + - giab-NA12878-twist + - chm-eval permissions: actions: write env: @@ -27,26 +36,12 @@ jobs: BO_CORE_UNIT_TOKEN: ${{ secrets.BO_CORE_UNIT_TOKEN }} BENCHMARK_GIAB_NA12878_TWIST_TOKEN: ${{ secrets.BENCHMARK_GIAB_NA12878_TWIST_TOKEN }} SNAKEMAKE_STORAGE_ZENODO_ACCESS_TOKEN: ${{ secrets.SNAKEMAKE_STORAGE_ZENODO_ACCESS_TOKEN }} + TMPDIR: /mnt/tmp steps: - - uses: 8BitJonny/gh-get-current-pr@3.0.0 - id: pr - with: - sha: ${{ github.event.pull_request.head.sha }} - filterOutClosed: true - - - run: | - echo "is PR: ${{ steps.pr.outputs.pr_found }}" - echo "current branch: ${{ github.ref }}" - - name: Free Disk Space (Ubuntu) uses: jlumbroso/free-disk-space@v1.3.1 with: - # this might remove tools that are actually needed, - # if set to "true" but frees about 6 GB tool-cache: false - - # all of these default to true, but feel free to set to - # "false" if necessary for your workflow android: true dotnet: true haskell: true @@ -57,53 +52,93 @@ jobs: - name: Check out repository code uses: actions/checkout@v4 - - name: Download reference genome - uses: snakemake/snakemake-github-action@v2 - with: - directory: "." - snakefile: "workflow/Snakefile" - args: "--cores 1 --sdm conda --conda-cleanup-pkgs cache resources/reference/genome.fasta" - stagein: | - pip install snakemake-storage-plugin-zenodo - pip install snakemake-storage-plugin-http + - name: Prepare /mnt workspace + run: | + sudo mkdir -p /mnt/conda /mnt/tmp + sudo chown -R $USER:$USER /mnt/conda /mnt/tmp - - name: Download truthsets - uses: snakemake/snakemake-github-action@v2 + - name: Restore caches (conda + snakemake) + uses: actions/cache@v4 with: - directory: "." - snakefile: "workflow/Snakefile" - args: "--sdm conda --cores 1 --conda-cleanup-pkgs cache --until benchmark_get_truth" - stagein: | - pip install snakemake-storage-plugin-zenodo - pip install snakemake-storage-plugin-http + path: | + /home/runner/miniconda3/pkgs + /home/runner/miniconda3/envs/snakemake + /home/runner/.cache/pip + /home/runner/.cache/snakemake + .snakemake + key: ${{ runner.os }}-snakemake-${{ hashFiles('workflow/Snakefile', 'workflow/rules/**/*.smk', 'config/config.yaml') }} + restore-keys: | + ${{ runner.os }}-snakemake- # This step is necessary (after downloading the truthsets above) to ensure # that the files coming from the git repo are not triggering reruns # because their modification dates are too new or too old. # (as git does not preserve modification dates) - name: Fix modification dates - uses: snakemake/snakemake-github-action@v2 + run: touch resources/regions/*/test-regions.cov-*.bed + + - name: Run analysis for ${{ matrix.benchmark }} + uses: snakemake/snakemake-github-action@v2.0.3 with: directory: "." snakefile: "workflow/Snakefile" - args: "--cores 1 --sdm conda --touch resources/regions/*/test-regions.cov-*.bed" + args: "--cores 2 --sdm conda --conda-prefix /mnt/conda --conda-cleanup-pkgs cache --conda-cleanup-envs --rerun-triggers mtime --retries 3 --resources downloads=1 --config benchmark=${{ matrix.benchmark }}" stagein: | pip install snakemake-storage-plugin-zenodo pip install snakemake-storage-plugin-http + show-disk-usage-on-error: true - - name: Run analysis - uses: snakemake/snakemake-github-action@v2 + - name: Upload results + uses: actions/upload-artifact@v4 with: - directory: "." - snakefile: "workflow/Snakefile" - args: > - --cores 4 --sdm conda --conda-cleanup-pkgs cache --rerun-triggers mtime --all-temp - stagein: | - pip install snakemake-storage-plugin-zenodo - pip install snakemake-storage-plugin-http + name: results-${{ matrix.benchmark }} + path: results/ + retention-days: 1 + + # Final job: Combine results and create report + report: + needs: evaluate + runs-on: ubuntu-latest + permissions: + actions: write + env: + SNAKEMAKE_STORAGE_ZENODO_ACCESS_TOKEN: ${{ secrets.SNAKEMAKE_STORAGE_ZENODO_ACCESS_TOKEN }} + steps: + - uses: 8BitJonny/gh-get-current-pr@3.0.0 + id: pr + with: + sha: ${{ github.event.pull_request.head.sha }} + filterOutClosed: true + + - run: | + echo "is PR: ${{ steps.pr.outputs.pr_found }}" + echo "current branch: ${{ github.ref }}" + + - name: Check out repository code + uses: actions/checkout@v4 + + - name: Restore caches (conda + snakemake) + uses: actions/cache@v4 + with: + path: | + /home/runner/miniconda3/pkgs + /home/runner/miniconda3/envs/snakemake + /home/runner/.cache/pip + /home/runner/.cache/snakemake + .snakemake + key: ${{ runner.os }}-snakemake-${{ hashFiles('workflow/Snakefile', 'workflow/rules/**/*.smk', 'config/config.yaml') }} + restore-keys: | + ${{ runner.os }}-snakemake- + + - name: Download all results + uses: actions/download-artifact@v4 + with: + pattern: results-* + path: results/ + merge-multiple: true - name: Create report - uses: snakemake/snakemake-github-action@v2 + uses: snakemake/snakemake-github-action@v2.0.3 with: directory: "." snakefile: "workflow/Snakefile" @@ -111,9 +146,9 @@ jobs: stagein: | pip install snakemake-storage-plugin-zenodo pip install snakemake-storage-plugin-http + show-disk-usage-on-error: true - name: Upload report as artifact - uses: actions/upload-artifact@v4 with: name: report diff --git a/workflow/Snakefile b/workflow/Snakefile index 7c07716..62ef004 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -18,7 +18,7 @@ module benchmark: github( "snakemake-workflows/dna-seq-benchmark", path="workflow/Snakefile", - tag="v1.13.0", + tag="v1.14.1", ) config: config diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 8ebc811..2795bf3 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -5,6 +5,26 @@ def get_zenodo_tag(entry): return "zenodo" +# Filter callsets by subcategory if specified via --config subcategory=... +# This allows running benchmarks in parallel across multiple CI runners +if "subcategory" in config: + config["variant-calls"] = { + key: callset + for key, callset in config["variant-calls"].items() + if callset.get("subcategory") == config["subcategory"] + } + + +# Filter callsets by benchmark if specified via --config benchmark=... +# This allows splitting CI work further (e.g. giab-NA12878-agilent-75M vs 200M). +if "benchmark" in config: + config["variant-calls"] = { + key: callset + for key, callset in config["variant-calls"].items() + if callset.get("benchmark") == config["benchmark"] + } + + # add path to callsets for key, callset in config["variant-calls"].items(): if "zenodo" in callset: