diff --git a/.circleci/config.yml b/.circleci/config.yml
index da38e0592..9014546f5 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -5,7 +5,7 @@ variables:
   # default settings for all steps
   defaults: &defaults
     docker:
-      - image: ubuntu:20.04
+      - image: ubuntu:latest
 
   # --------------------------------------------------------------------------
   # The caching dramatically speeds up testing time, because we can do the
@@ -26,9 +26,9 @@ variables:
 
   save_cache: &save_cache
     save_cache:
-      key: v5-{{ checksum "env.yml" }}-{{ checksum "env-r.yml" }}
+      key: v0-{{ checksum "env.yml" }}-{{ checksum "env-r.yml" }}
       paths:
-        - /opt/mambaforge
+        - /opt/miniforge
 
         # this file is created by sra-tools upon installation by conda, and so
         # needs to be included in the cache otherwise fastq-dump thinks it's
@@ -38,7 +38,7 @@ variables:
   restore_cache: &restore_cache
     restore_cache:
       keys:
-        - v5-{{ checksum "env.yml" }}-{{ checksum "env-r.yml" }}
+        - v0-{{ checksum "env.yml" }}-{{ checksum "env-r.yml" }}
 
   # --------------------------------------------------------------------------
   # The path needs to be set each time; in jobs below this will be called as
@@ -48,6 +48,7 @@ variables:
         name: Set path
         command: |
           # x11-utils required to avoid R::png() segfaulting
+          export DEBIAN_FRONTEND=noninteractive
           apt update && apt install -y \
             curl \
             git \
@@ -73,7 +74,7 @@ variables:
 
           # Note that if we don't escape \$PATH, we'll be stuck with the exact
           # PATH defined here, which will break anything needing conda envs.
-          echo "export PATH=\$PATH:/opt/mambaforge/bin" >> $BASH_ENV
+          echo "export PATH=\$PATH:/opt/miniforge/bin" >> $BASH_ENV
           source $BASH_ENV
 
 
@@ -85,28 +86,16 @@ variables:
       command: |
         source $BASH_ENV
         echo $PATH
-        # /opt/mambaforge will only exist if there was a cache restore; otherwise we'll make it here.
+        # /opt/miniforge will only exist if there was a cache restore; otherwise we'll make it here.
         #
-        # Use mambaforge which comes with mamba.
-        if [ ! -e /opt/mambaforge ]; then
-            curl -L https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh > mambaforge.sh
-            bash mambaforge.sh -b -p /opt/mambaforge
-            source "/opt/mambaforge/etc/profile.d/conda.sh"
-            source "/opt/mambaforge/etc/profile.d/mamba.sh"
+        if [ ! -e /opt/miniforge ]; then
+            curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh"
+            bash Miniforge3-$(uname)-$(uname -m).sh -b -p /opt/miniforge
+            source "/opt/miniforge/etc/profile.d/conda.sh"
             conda activate
 
             which conda
-            which mamba
-            mamba --version
-
-            # Note that mambaforge doesn't come with the defaults channel, but
-            # we're adding it here at the beginning to simulate what most users
-            # probably have locally (and following the bioconda docs). Using
-            # strict channel priority means we should [theoretically] never
-            # pull packages from defaults because they all exist on
-            # conda-forge.
-            conda config --system --add channels defaults
-
+            conda --version
             conda config --system --add channels bioconda
             conda config --system --add channels conda-forge
             conda config --system --set channel_priority strict
@@ -115,10 +104,12 @@ variables:
             # https://docs.conda.io/projects/conda-build/en/latest/resources/link-scripts.html,
             # post-link scripts should not depend on any installed or
             # to-be-installed conda packages...but they do.
-            mamba install -n base r-base yq
+            # conda install -n base r-base yq
 
-            time mamba env create -n $LCDBWF_ENV --file env.yml
-            time mamba env create -n $LCDBWF_ENV_R --file env-r.yml
+            time conda env create -n $LCDBWF_ENV --file env.yml
+            time conda env create -n $LCDBWF_ENV_R --file env-r.yml
+            conda env export -n $LCDBWF_ENV > /opt/miniforge/env.yml
+            conda env export -n $LCDBWF_ENV_R > /opt/miniforge/env.yml
         fi
 
   # --------------------------------------------------------------------------
@@ -127,7 +118,7 @@ variables:
     run:
       name: Download example data
       command: |
-        source /opt/mambaforge/etc/profile.d/conda.sh
+        source /opt/miniforge/etc/profile.d/conda.sh
         conda activate $LCDBWF_ENV
         conda info --envs
         conda config --show
@@ -146,45 +137,40 @@ variables:
         tree $ORIG
         set +x
 
-        # Separately copy over some test-specific files
+        # Separately copy over some test-specific files that are not part of deploying
         cp $ORIG/workflows/chipseq/run_test.sh $DEPLOY/workflows/chipseq/run_test.sh
         cp $ORIG/workflows/rnaseq/run_test.sh $DEPLOY/workflows/rnaseq/run_test.sh
         cp $ORIG/workflows/rnaseq/run_downstream_test.sh $DEPLOY/workflows/rnaseq/run_downstream_test.sh
-        cp $ORIG/workflows/references/run_test.sh $DEPLOY/workflows/references/run_test.sh
-        cp $ORIG/workflows/colocalization/run_test.sh $DEPLOY/workflows/colocalization/run_test.sh
 
         mkdir $DEPLOY/ci
         mkdir $DEPLOY/test
-        cp $ORIG/test/lcdb-wf-test $DEPLOY/test/lcdb-wf-test
-        cp $ORIG/test/workflow_test_params.yaml $DEPLOY/test/workflow_test_params.yaml
         cp $ORIG/ci/get-data.py $DEPLOY/ci/get-data.py
 
         # the ./run_test.sh scripts run this
         cp $ORIG/ci/preprocessor.py $DEPLOY/ci/preprocessor.py
 
-        # download example data
+        # Now we can download example data
         cd $DEPLOY
-        test/lcdb-wf-test data --kind=all --verbose
+        ci/get-data.py
 
   # --------------------------------------------------------------------------
   # Run the doctests across the included modules
   pytest-step: &pytest-step
     run:
-      name: Run pytest suite and testthat suite
+      name: Run pytest suite and R testthat suite
       command: |
-        source /opt/mambaforge/etc/profile.d/conda.sh
+        source /opt/miniforge/etc/profile.d/conda.sh
         conda activate $LCDBWF_ENV
+
         # run unit tests and doctests for the modules in lib
-        test/lcdb-wf-test unit_tests --pytest
+        pytest --doctest-modules lib
 
         # Ensure that the chunks in rnaseq.Rmd have matching documentation
-        test/lcdb-wf-test unit_tests --ensure-docs
-
-        # find all URLs in reference configs and make sure they exist
-        test/lcdb-wf-test unit_tests --url-check
+        (cd ci && ./ensure_docs.py)
 
         # run R package unit tests using the R env
-        test/lcdb-wf-test unit_tests --r-test
+        conda activate $LCDBWF_ENV_R
+        Rscript -e "devtools::test('lib/lcdbwf', reporter=c('summary', 'fail'), export_all=TRUE)"
 
 
   # --------------------------------------------------------------------------
@@ -194,10 +180,11 @@ variables:
         name: chipseq workflow
         command: |
           cd $DEPLOY/workflows/chipseq
-          source /opt/mambaforge/etc/profile.d/conda.sh
+          source /opt/miniforge/etc/profile.d/conda.sh
           conda activate $LCDBWF_ENV
-          $DEPLOY/test/lcdb-wf-test chipseq --run-workflow --use-conda -j2 -k -p -r
-          $DEPLOY/test/lcdb-wf-test chipseq --trackhub
+          cd $DEPLOY/workflows/chipseq
+          ./run_test.sh --use-conda -j2 -k -p
+          python chipseq_trackhub.py config/config.yaml config/hub_config.yaml
 
   # --------------------------------------------------------------------------
   # Previous versions had an error where chipseq peaks needed to be defined for
@@ -207,11 +194,10 @@ variables:
       run:
         name: chipseq misc
         command: |
-          cd $DEPLOY/workflows/chipseq
-          source /opt/mambaforge/etc/profile.d/conda.sh
+          source /opt/miniforge/etc/profile.d/conda.sh
           conda activate $LCDBWF_ENV
-
-          ./run_test.sh --use-conda -j2 -k -p -r \
+          cd $DEPLOY/workflows/chipseq
+          ./run_test.sh --use-conda -j2 -k -p \
             --configfile $ORIG/test/test_configs/test_chipseq_regression.yaml \
             --config sampletable=$ORIG/test/test_configs/chipseq_one_run.tsv \
             merged_bigwigs="{}" \
@@ -231,16 +217,6 @@ variables:
               --until bed_to_bigbed
           fi
 
-  # --------------------------------------------------------------------------
-  # Standard references workflow.
-  references-step: &references-step
-      run:
-        name: references workflow
-        command: |
-          source /opt/mambaforge/etc/profile.d/conda.sh
-          conda activate $LCDBWF_ENV
-          $DEPLOY/test/lcdb-wf-test references --run-workflow --configfile=config/config.yaml -j2 -p -r -k --orig $ORIG
-
   # --------------------------------------------------------------------------
   # Standard RNA-seq workflow
   rnaseq-step: &rnaseq-step
@@ -248,24 +224,29 @@ variables:
         name: rnaseq workflow
         command: |
           cd $DEPLOY
-          source /opt/mambaforge/etc/profile.d/conda.sh
+          source /opt/miniforge/etc/profile.d/conda.sh
           conda activate $LCDBWF_ENV
-          $DEPLOY/test/lcdb-wf-test rnaseq --run-workflow -n
-          $DEPLOY/test/lcdb-wf-test rnaseq --run-workflow --use-conda -j2 -k -p -r --orig $ORIG
 
-          $DEPLOY/test/lcdb-wf-test rnaseq --trackhub --orig $ORIG
+          cd workflows/rnaseq
+
+          ./run_test.sh -n \
+            --configfile $ORIG/test/test_configs/test_rnaseq_config.yaml
+
+          ./run_test.sh --use-conda -j2 -k -p \
+            --configfile $ORIG/test/test_configs/test_rnaseq_config.yaml
+
+          python rnaseq_trackhub.py \
+            config/config.yaml config/hub_config.yaml \
+            --additional-configs $ORIG/test/test_configs/test_rnaseq_config.yaml
 
-          # This run the preprocessor on the Rmd files and stores them
-          # in a new download-test directory (see the comments in the script
-          # for details)
-          $DEPLOY/test/lcdb-wf-test rnaseq --downstream
+          conda activate $LCDBWF_ENV_R
+
+          # This creates files in `workflows/rnaseq/downstream-test`:
+          ./run_downstream_test.sh
 
           # bundle up the entire directory to be used as an artifact
-          tar -zcf /tmp/downstream.tar.gz workflows/rnaseq/downstream-test/
-          cp workflows/rnaseq/downstream-test/rnaseq.html /tmp/rnaseq.html
-          cp workflows/rnaseq/downstream-test/functional-enrichment.html /tmp/functional-enrichment.html
-          cp workflows/rnaseq/downstream-test/gene-patterns.html /tmp/gene-patterns.html
-          cp workflows/rnaseq/data/rnaseq_aggregation/multiqc.html /tmp/rnaseq.html
+          tar -zcf /tmp/downstream.tar.gz downstream-test/
+          cp data/rnaseq_aggregation/multiqc.html /tmp/rnaseq.html
 
   # --------------------------------------------------------------------------
   # Various tests on RNA-seq workflow that don't warrant the overhead of a new
@@ -276,32 +257,27 @@ variables:
         command: |
           ORIG=$(pwd)
           cd $DEPLOY
-          source /opt/mambaforge/etc/profile.d/conda.sh
+          source /opt/miniforge/etc/profile.d/conda.sh
           conda activate $LCDBWF_ENV
 
-          # Check the help for test/lcdb-wf-test to see what args these
-          # provide; some of them use the --until argument to restrict the
-          # rules that are run. Note the use of --orig $ORIG to use the test
-          # configs from the original clone rather than the deployed directory.
-          $DEPLOY/test/lcdb-wf-test rnaseq --run-workflow --sra-pe          -k -r -p -j2 --use-conda --orig $ORIG
-          $DEPLOY/test/lcdb-wf-test rnaseq --run-workflow --sra-se          -k -r -p -j2 --use-conda --orig $ORIG
-          $DEPLOY/test/lcdb-wf-test rnaseq --run-workflow --strandedness-pe -k -r -p -j2 --use-conda --orig $ORIG
-          $DEPLOY/test/lcdb-wf-test rnaseq --run-workflow --star-2pass      -k -r -p -j2 --use-conda --orig $ORIG
-          $DEPLOY/test/lcdb-wf-test rnaseq --run-workflow --star-1pass      -k -r -p -j2 --use-conda --orig $ORIG
-          $DEPLOY/test/lcdb-wf-test rnaseq --run-workflow --pe              -k -r -p -j2 --use-conda --orig $ORIG
+          cd workflows/rnaseq
 
+          # SRA test
+          ./run_test.sh -k -p -j2 --use-conda \
+            --configfile $ORIG/test/test_configs/test_rnaseq_config.yaml \
+            --config sampletable=$ORIG/test/test_configs/test_sra_sampletable.csv
+
+          # SRA SE only
+          ./run_test.sh -k -p -j2 --use-conda \
+            --configfile $ORIG/test/test_configs/test_rnaseq_config.yaml \
+            --config sampletable=$ORIG/test/test_configs/test_sra_sampletable_SE_only.tsv
+
+          # PE
+          ./run_test.sh -k -p -j2 --use-conda \
+            --configfile $ORIG/test/test_configs/test_rnaseq_config.yaml \
+            --config sampletable=$ORIG/test/test_configs/test_pe_sampletable.tsv
 
 
-  # --------------------------------------------------------------------------
-  # Standard colocalization workflow
-  colocalization-step: &colocalization-step
-      run:
-        name: colocalization workflow
-        command: |
-          cd $DEPLOY/workflows/colocalization
-          source /opt/mambaforge/etc/profile.d/conda.sh
-          conda activate $LCDBWF_ENV
-          $DEPLOY/test/lcdb-wf-test colocalization --run-workflow -k -r -p -j2 --use-conda --orig $ORIG
 
 # --------------------------------------------------------------------------
 # Syntax note: All of the steps above, with their "&step-name" labels, can be
@@ -342,10 +318,13 @@ jobs:
       # themselves.
       - *save_cache
 
+      # These files were created during conda setup, and become part of the
+      # cache. So we should get them as artifacts regardless of if the conda
+      # setup ran this time.
       - store_artifacts:
-          path: /tmp/lcdb-wf-test/env.yaml
+          path: /opt/miniforge/env.yml
       - store_artifacts:
-          path: /tmp/lcdb-wf-test/env-r.yaml
+          path: /opt/miniforge/env-r.yml
   pytest:
     <<: *defaults
     resource_class: small
@@ -365,7 +344,7 @@ jobs:
       - *get-data
       - *chipseq-step
       - store_artifacts:
-          path: /tmp/lcdb-wf-test/workflows/chipseq/data/chipseq_aggregation/multiqc.html
+          path: $DEST/workflows/chipseq/data/chipseq_aggregation/multiqc.html
 
   chipseq-misc:
     <<: *defaults
@@ -387,19 +366,9 @@ jobs:
       - store_artifacts:
           path: /tmp/downstream.tar.gz
           destination: downstream.tar.gz
-      - store_artifacts:
-          path: /tmp/rnaseq.html
-          destination: rnaseq.html
       - store_artifacts:
           path: /tmp/multiqc.html
           destination: multiqc.html
-      - store_artifacts:
-          path: /tmp/functional-enrichment.html
-          destination: functional-enrichment.html
-      - store_artifacts:
-          path: /tmp/gene-patterns.html
-          destination: gene-patterns.html
-
 
   rnaseq-misc:
     <<: *defaults
@@ -410,24 +379,6 @@ jobs:
       - *get-data
       - *rnaseq-misc-step
 
-  colocalization:
-    <<: *defaults
-    steps:
-      - checkout
-      - *restore_cache
-      - *set-path
-      - *get-data
-      - *colocalization-step
-
-  references:
-    <<: *defaults
-    steps:
-      - checkout
-      - *restore_cache
-      - *set-path
-      - *get-data
-      - *references-step
-
   build-docs:
     <<: *defaults
     resource_class: small
@@ -438,9 +389,9 @@ jobs:
       - run:
           name: Install sphinx
           command: |
-            source /opt/mambaforge/etc/profile.d/conda.sh
+            source /opt/miniforge/etc/profile.d/conda.sh
             conda activate lcdb-wf-test
-            mamba install -y sphinx make yaml
+            conda install -y sphinx make yaml
       - run:
           name: OK for unknown github host
           command: mkdir -p ~/.ssh/ && echo -e "Host github.com\n\tStrictHostKeyChecking no\n" > ~/.ssh/config
@@ -450,30 +401,12 @@ jobs:
       - run:
           name: Build and upload docs
           command: |
-            source /opt/mambaforge/etc/profile.d/conda.sh
+            source /opt/miniforge/etc/profile.d/conda.sh
             conda activate lcdb-wf-test
             ci/build-docs.sh
       - store_artifacts:
           path: /tmp/docs.tar.gz
 
-  report-env:
-    <<: *defaults
-    resource_class: small
-    steps:
-      - checkout
-      - *restore_cache
-      - *set-path
-      - run:
-          name: Report environment
-          command: |
-            source /opt/mambaforge/etc/profile.d/conda.sh
-            conda env export -n lcdb-wf-test > /tmp/env.yaml
-            conda env export -n lcdb-wf-test-r > /tmp/env-r.yaml
-      - store_artifacts:
-          path: /tmp/env.yaml
-      - store_artifacts:
-          path: /tmp/env-r.yaml
-
 # ----------------------------------------------------------------------------
 # This section configures the dependencies across jobs.
 workflows:
@@ -492,6 +425,7 @@ workflows:
          requires:
            - initial-setup
            - pytest
+           - chipseq
       - rnaseq:
           requires:
             - initial-setup
@@ -500,22 +434,7 @@ workflows:
           requires:
             - initial-setup
             - pytest
-      - references:
-          requires:
-            - initial-setup
-            - pytest
-      - colocalization:
-          requires:
-            - initial-setup
-            - pytest
+            - rnaseq
       - build-docs:
           requires:
             - initial-setup
-      - report-env:
-          requires:
-            - rnaseq
-            - rnaseq-misc
-            - chipseq
-            - chipseq-misc
-            - references
-            - colocalization
diff --git a/.gitignore b/.gitignore
index ab3fd51ea..b1f7c8ca1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -66,3 +66,6 @@ workflows/rnaseq/downstream/rnaseq.html
 ._*
 Rplots.pdf
 /lib/include/*
+
+workflows/*/references
+
diff --git a/ci/get-data.py b/ci/get-data.py
index cd2d356b1..984ed9de8 100755
--- a/ci/get-data.py
+++ b/ci/get-data.py
@@ -1,37 +1,112 @@
 #!/usr/bin/env python
+import argparse
 import os
+
 from snakemake.shell import shell
 from snakemake.utils import makedirs
 
-shell.executable('/bin/bash')
-BRANCH = 'master'
-URL = 'https://github.com/lcdb/lcdb-test-data/blob/{0}/data/{{}}?raw=true'.format(BRANCH)
+BRANCH = "master"
+URL = "https://github.com/lcdb/lcdb-test-data/blob/{0}/data/{{}}?raw=true".format(
+    BRANCH
+)
+
+TOPLEVEL = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
 
-def _download_file(fn, dest=None):
+def _download_file(fn, dest=None, verbose=False):
     url = URL.format(fn)
     if dest is None:
         dest = fn
+    dest = os.path.join(TOPLEVEL, dest)
     makedirs(os.path.dirname(dest))
-    basename = os.path.basename(fn)
-    shell('wget -q -O- {url} > {dest}')
+    if not verbose:
+        q = "-q"
+    else:
+        q = ""
+    shell(f"wget {q} -O- {url} > {dest}")
+    if verbose:
+        print(f"Saved {dest}")
     return dest
 
 
-_download_file('rnaseq_samples/sample1/sample1.small_R1.fastq.gz', 'workflows/rnaseq/data/example_data/rnaseq_sample1.fq.gz')
-_download_file('rnaseq_samples/sample2/sample2.small_R1.fastq.gz', 'workflows/rnaseq/data/example_data/rnaseq_sample2.fq.gz')
-_download_file('rnaseq_samples/sample3/sample3.small_R1.fastq.gz', 'workflows/rnaseq/data/example_data/rnaseq_sample3.fq.gz')
-_download_file('rnaseq_samples/sample4/sample4.small_R1.fastq.gz', 'workflows/rnaseq/data/example_data/rnaseq_sample4.fq.gz')
+ap = argparse.ArgumentParser()
+ap.add_argument("-v", "--verbose", action="store_true", help="Be verbose when downloading")
+args = ap.parse_args()
+
+_download_file(
+    "rnaseq_samples/sample1/sample1.small_R1.fastq.gz",
+    "workflows/rnaseq/data/example_data/rnaseq_sample1.fq.gz",
+    args.verbose,
+)
+_download_file(
+    "rnaseq_samples/sample2/sample2.small_R1.fastq.gz",
+    "workflows/rnaseq/data/example_data/rnaseq_sample2.fq.gz",
+    args.verbose,
+)
+_download_file(
+    "rnaseq_samples/sample3/sample3.small_R1.fastq.gz",
+    "workflows/rnaseq/data/example_data/rnaseq_sample3.fq.gz",
+    args.verbose,
+)
+_download_file(
+    "rnaseq_samples/sample4/sample4.small_R1.fastq.gz",
+    "workflows/rnaseq/data/example_data/rnaseq_sample4.fq.gz",
+    args.verbose,
+)
 
-_download_file('rnaseq_samples/sample1/sample1.small_R1.fastq.gz', 'workflows/rnaseq/data/example_data/rnaseq_sample1PE_1.fq.gz')
-_download_file('rnaseq_samples/sample1/sample1.small_R2.fastq.gz', 'workflows/rnaseq/data/example_data/rnaseq_sample1PE_2.fq.gz')
-_download_file('rnaseq_samples/sample2/sample2.small_R1.fastq.gz', 'workflows/rnaseq/data/example_data/rnaseq_sample2PE_1.fq.gz')
-_download_file('rnaseq_samples/sample2/sample2.small_R2.fastq.gz', 'workflows/rnaseq/data/example_data/rnaseq_sample2PE_2.fq.gz')
+_download_file(
+    "rnaseq_samples/sample1/sample1.small_R1.fastq.gz",
+    "workflows/rnaseq/data/example_data/rnaseq_sample1PE_1.fq.gz",
+    args.verbose,
+)
+_download_file(
+    "rnaseq_samples/sample1/sample1.small_R2.fastq.gz",
+    "workflows/rnaseq/data/example_data/rnaseq_sample1PE_2.fq.gz",
+    args.verbose,
+)
+_download_file(
+    "rnaseq_samples/sample2/sample2.small_R1.fastq.gz",
+    "workflows/rnaseq/data/example_data/rnaseq_sample2PE_1.fq.gz",
+    args.verbose,
+)
+_download_file(
+    "rnaseq_samples/sample2/sample2.small_R2.fastq.gz",
+    "workflows/rnaseq/data/example_data/rnaseq_sample2PE_2.fq.gz",
+    args.verbose,
+)
 
-_download_file('chipseq_samples/input_1/input_1.tiny_R1.fastq.gz', 'workflows/chipseq/data/example_data/chipseq_input1.fq.gz')
-_download_file('chipseq_samples/ip_1/ip_1.tiny_R1.fastq.gz', 'workflows/chipseq/data/example_data/chipseq_ip1.fq.gz')
-_download_file('chipseq_samples/input_2/input_2.tiny_R1.fastq.gz', 'workflows/chipseq/data/example_data/chipseq_input2.fq.gz')
-_download_file('chipseq_samples/ip_2/ip_2.tiny_R1.fastq.gz', 'workflows/chipseq/data/example_data/chipseq_ip2.fq.gz')
-_download_file('chipseq_samples/ip_3/ip_3.tiny_R1.fastq.gz', 'workflows/chipseq/data/example_data/chipseq_ip3.fq.gz')
-_download_file('chipseq_samples/ip_4/ip_4.tiny_R1.fastq.gz', 'workflows/chipseq/data/example_data/chipseq_ip4.fq.gz')
-_download_file('chipseq_samples/input_3/input_3.tiny_R1.fastq.gz', 'workflows/chipseq/data/example_data/chipseq_input3.fq.gz')
+_download_file(
+    "chipseq_samples/input_1/input_1.tiny_R1.fastq.gz",
+    "workflows/chipseq/data/example_data/chipseq_input1.fq.gz",
+    args.verbose,
+)
+_download_file(
+    "chipseq_samples/ip_1/ip_1.tiny_R1.fastq.gz",
+    "workflows/chipseq/data/example_data/chipseq_ip1.fq.gz",
+    args.verbose,
+)
+_download_file(
+    "chipseq_samples/input_2/input_2.tiny_R1.fastq.gz",
+    "workflows/chipseq/data/example_data/chipseq_input2.fq.gz",
+    args.verbose,
+)
+_download_file(
+    "chipseq_samples/ip_2/ip_2.tiny_R1.fastq.gz",
+    "workflows/chipseq/data/example_data/chipseq_ip2.fq.gz",
+    args.verbose,
+)
+_download_file(
+    "chipseq_samples/ip_3/ip_3.tiny_R1.fastq.gz",
+    "workflows/chipseq/data/example_data/chipseq_ip3.fq.gz",
+    args.verbose,
+)
+_download_file(
+    "chipseq_samples/ip_4/ip_4.tiny_R1.fastq.gz",
+    "workflows/chipseq/data/example_data/chipseq_ip4.fq.gz",
+    args.verbose,
+)
+_download_file(
+    "chipseq_samples/input_3/input_3.tiny_R1.fastq.gz",
+    "workflows/chipseq/data/example_data/chipseq_input3.fq.gz",
+    args.verbose,
+)
diff --git a/ci/preprocessor.py b/ci/preprocessor.py
index 042bee332..1cd7e5dac 100644
--- a/ci/preprocessor.py
+++ b/ci/preprocessor.py
@@ -7,54 +7,16 @@
 in production. Rather than require users edit files to remove those
 test-specific patterns, here we keep the test settings commented out and only
 un-comment when running tests.
-
-First, we look for any line that matches "# [test settings]" (case insensitive,
-with optional surrounding spacing) and an optional signed integer. Any of these
-would work:
-
-    >>> assert matches('# [test settings]')
-    >>> assert matches('#[test settings]')
-    >>> assert matches('# [ test settings ]')
-    >>> assert matches('# [ test settings -1]')
-    >>> assert matches('# [ test settings +2]')
-    >>> assert matches('# [ TEST SETTINGS +2]')
-    >>> assert matches('# [ TeSt SeTTiNgS +2   ]')
-
-If a lines does not match, output it as-is.
-
-If a line matches, then uncomment it. Specifically, remove the first "#" in the
-line; if it was followed by exactly one space, then remove that too.
-
-If a line matches and a signed integer was provided, then consider it
-a relative location, and then comment-out the referred-to line. Example:
-
-    >>> preprocess('''
-    ... use this for production
-    ... # use this for tests  # [test settings -1]
-    ... '''.splitlines(True))
-    <BLANKLINE>
-    # use this for production
-    use this for tests  # [test settings -1]
-    <BLANKLINE>
-
-If the matched special string creates the first "#" in the line, then do
-nothing to that line but still respect the relative locations. Useful for just
-commenting out nearby lines for tests:
-
-    >>> preprocess('''
-    ... # [TEST SETTINGS +1]
-    ... comment out for testing'''.splitlines(True))
-    <BLANKLINE>
-    # [TEST SETTINGS +1]
-    # comment out for testing
 """
 
+
 import re
-regexp = re.compile(r'#\s?\[\s?test settings\s?(?P<rel>[-+]*\d)?\s*\]')
 
+regexp = re.compile(r"#\s?\[\s?(enable|disable) for test\s?\]")
 
-def matches(line):
-    return regexp.search(line.lower()) is not None
+
+def is_commented(line):
+    return line.strip().startswith("#")
 
 
 def comment_line(line):
@@ -66,87 +28,75 @@ def comment_line(line):
     """
     x = []
     for i, character in enumerate(line):
-        if character == ' ':
+        if character == " ":
             x.append(character)
         else:
             break
-    x.append('# ')
+    x.append("# ")
     x.extend(line[i:])
-    return ''.join(x)
+    return "".join(x)
 
 
 def uncomment_line(line):
     """
     Removes the first instance of "#" from a line; if it was followed by
-    exactly one space then remove that too.
+    exactly one space then remove that too . . . UNLESS the *only* comment is the
+    special character that triggers this behavior, in which case we do nothing.
 
     >>> assert uncomment_line('# asdf') == 'asdf'
     >>> assert uncomment_line('#asdf') == 'asdf'
     >>> assert uncomment_line('# asdf # but this should be kept') == 'asdf # but this should be kept'
     >>> assert uncomment_line('#    asdf') == '    asdf'
     >>> assert uncomment_line('  #    asdf') == '      asdf'
+    >>> assert uncomment_line('do nothing') == 'do nothing'
+    >>> assert uncomment_line('do nothing # [disable for test]') == 'do nothing # [disable for test]'
+    >>> assert uncomment_line('#uncomment # [disable for test]') == 'uncomment # [disable for test]'
     """
-    first = line.find('#')
+    first = line.find("#")
 
-    # If the first comment is the one that flag the line, then do nothing.
+    # If the first comment is the one that flagged the line, then do nothing.
     m = regexp.search(line.lower())
     if m:
         if m.start() == first:
             return line
 
-    if line[first + 1] == ' ' and line[first + 2] != ' ':
-        pattern = '# '
+    if line[first + 1] == " " and line[first + 2] != " ":
+        pattern = "# "
     else:
-        pattern = '#'
-    return line.replace(pattern, '', 1)
+        pattern = "#"
+    return line.replace(pattern, "", 1)
 
 
 def preprocess(lines):
+    result = []
 
     if isinstance(lines, str):
         lines = [lines]
 
-    # These lists will keep track of whether a line should be changed.  We need to
-    # create them ahead of time so that we can use relative indexing from line N to
-    # modify the state of lines N-1 or N+1
-    uncomment = [False for i in range(len(lines))]
-    comment = [False for i in range(len(lines))]
-
-    for i, line in enumerate(lines):
+    for line in lines:
         m = regexp.search(line.lower())
-        if m:
-            # There as at least a "[ test settings ]", so remove comment
-            uncomment[i] = True
-
-            # Figure out if there was also a relative location to uncomment,
-            # and keep track of it in the `comment` list.
-            rel = m.group('rel')
-            if rel is not None:
-                rel = int(rel)
-                comment[i + rel] = True
+        if not m:
+            result.append(line)
+            continue
 
-    result = []
-    for (c, u, line) in zip(comment, uncomment, lines):
-        # E.g., in this situation, unclear what should happen:
-        #
-        #     # [test settings]
-        #     # [test settings -1]
-        #
-        if c and u:
-            raise ValueError("Line {0} is trying to be both commented and uncommented".format(line))
-        if c:
-            result.append(comment_line(line))
-        elif u:
+        action = m.group(1)
+        if action == "enable" and is_commented(line):
             result.append(uncomment_line(line))
+        elif action == "disable" and not is_commented(line):
+            result.append(comment_line(line))
         else:
-            result.append(line)
-    print(''.join(result))
+            raise ValueError(f"Inconsistent commenting and action:\n{line}")
+
+    print("".join(result))
 
 
 if __name__ == "__main__":
     import argparse
+
     ap = argparse.ArgumentParser(usage=__doc__)
-    ap.add_argument('infile', help='Input file to modify. Modified file printed to stdout.')
+    ap.add_argument(
+        "infile", help="Input file to modify. Modified file printed to stdout."
+    )
     args = ap.parse_args()
     lines = open(args.infile).readlines()
     preprocess(lines)
diff --git a/deploy.py b/deploy.py
index 7ad7e1ace..6e98596b2 100755
--- a/deploy.py
+++ b/deploy.py
@@ -8,14 +8,13 @@
 import subprocess as sp
 import datetime
 import json
-import fnmatch
 import logging
 import hashlib
 from pathlib import Path
 from distutils import filelist
 
 # Determine default staging area, used in help
-default_staging = "/tmp/{0}-lcdb-wf-staging".format(os.getenv('USER'))
+default_staging = "/tmp/{0}-lcdb-wf-staging".format(os.getenv("USER"))
 
 usage = f"""
 This script assists in the deployment of relevant code from the lcdb-wf
@@ -74,52 +73,50 @@ def error(s):
     logging.error(RED + s + RESET)
 
 
-def write_include_file(source, flavor='all'):
+def write_include_file(source, flavor="all"):
 
     # Patterns follow that of MANIFEST.in
     # (https://packaging.python.org/en/latest/guides/using-manifest-in/),
     # and distutils.filelist is used below to parse them.
 
     PATTERN_DICT = {
-        'rnaseq': [
-            'include workflows/rnaseq/Snakefile',
-            'recursive-include workflows/rnaseq/config *',
-            'include workflows/rnaseq/rnaseq_trackhub.py',
-            'recursive-include workflows/rnaseq/downstream *.Rmd',
-            'recursive-include workflows/rnaseq/downstream *.yaml',
+        "rnaseq": [
+            "include workflows/rnaseq/Snakefile",
+            "recursive-include workflows/rnaseq/config *",
+            "include workflows/rnaseq/rnaseq_trackhub.py",
+            "recursive-include workflows/rnaseq/downstream *.Rmd",
+            "recursive-include workflows/rnaseq/downstream *.yaml",
         ],
-        'chipseq': [
-            'include workflows/chipseq/Snakefile',
-            'recursive-include workflows/chipseq/config *',
-            'include workflows/chipseq/chipseq_trackhub.py',
+        "chipseq": [
+            "include workflows/chipseq/Snakefile",
+            "recursive-include workflows/chipseq/config *",
+            "include workflows/chipseq/chipseq_trackhub.py",
         ],
-        'all': [
-            'recursive-include wrappers *',
-            'recursive-include include *',
-            'recursive-include lib *', 
-            'include env.yml env-r.yml .gitignore',
-            'include workflows/references/Snakefile',
-            'recursive-include workflows/references/config *',
-            'global-exclude __pycache__',
+        "all": [
+            "recursive-include wrappers *",
+            "recursive-include include *",
+            "recursive-include lib *",
+            "include env.yml env-r.yml .gitignore",
+            "recursive-include scripts *",
+            "global-exclude __pycache__",
+        ],
+        "full": [
+            "include workflows/colocalization/Snakefile",
+            "recursive-include workflows/colocalization/config *",
+            "recursive-include workflows/colocalization/scripts *",
+            "recursive-include workflows/figures *",
+            "recursive-include workflows/external *",
         ],
-        'full': [
-            'include workflows/colocalization/Snakefile',
-            'recursive-include workflows/colocalization/config *',
-            'recursive-include workflows/colocalization/scripts *',
-            'recursive-include workflows/figures *',
-            'recursive-include workflows/external *',
-        ]
-
     }
 
     patterns = []
-    if flavor in ('full', 'rnaseq'):
-        patterns.extend(PATTERN_DICT['rnaseq'])
-    if flavor in ('full', 'chipseq'):
-        patterns.extend(PATTERN_DICT['chipseq'])
-    if flavor == 'full':
-        patterns.extend(PATTERN_DICT['full'])
-    patterns.extend(PATTERN_DICT['all'])
+    if flavor in ("full", "rnaseq"):
+        patterns.extend(PATTERN_DICT["rnaseq"])
+    if flavor in ("full", "chipseq"):
+        patterns.extend(PATTERN_DICT["chipseq"])
+    if flavor == "full":
+        patterns.extend(PATTERN_DICT["full"])
+    patterns.extend(PATTERN_DICT["all"])
 
     def fastwalk(path):
         """
@@ -128,13 +125,13 @@ def fastwalk(path):
         """
         path = str(path)
         for root, dirs, files in os.walk(path, topdown=True):
-            if 'conda-meta' in dirs:
+            if "conda-meta" in dirs:
                 dirs[:] = []
                 files[:] = []
             for d in dirs:
-                yield os.path.join(root, d).replace(path + '/', '')
+                yield os.path.join(root, d).replace(path + "/", "")
             for f in files:
-                yield os.path.join(root, f).replace(path + '/', '')
+                yield os.path.join(root, f).replace(path + "/", "")
 
     f = filelist.FileList()
     f.allfiles = list(fastwalk(source))
@@ -153,9 +150,9 @@ def fastwalk(path):
 
     to_transfer = list(set(under_version_control).intersection(f.files))
     include = tempfile.NamedTemporaryFile(delete=False).name
-    with open(include, 'w') as fout:
-        fout.write('\n\n')
-        fout.write('\n'.join(to_transfer))
+    with open(include, "w") as fout:
+        fout.write("\n\n")
+        fout.write("\n".join(to_transfer))
 
     return include
 
@@ -188,8 +185,8 @@ def check_md5(f):
         full_here = Path(__file__).resolve()
         full_there = Path(dest) / "deploy.py"
         error(
-            "Files {full_here} and {full_there} do not match! ".format(**locals()) +
-            "The deploy script you are running appears to be out of date. "
+            f"Files {full_here} and {full_there} do not match! "
+            + "The deploy script you are running appears to be out of date. "
             "Please get an updated copy from https://github.com/lcdb/lcdb-wf, perhaps "
             "with 'wget https://raw.githubusercontent.com/lcdb/lcdb-wf/master/deploy.py'"
         )
@@ -267,7 +264,7 @@ def deployment_json(source, dest):
     info("Wrote details of deployment to {log}".format(**locals()))
 
 
-def build_envs(dest, conda_frontend="mamba"):
+def build_envs(dest, additional_main=None, additional_r=None, conda_frontend="conda"):
     """
     Build conda environments.
 
@@ -279,14 +276,22 @@ def build_envs(dest, conda_frontend="mamba"):
         the command line with --dest) in which the env and env-r yaml files
         should already exist. Envs will be created in here.
 
+    additional_main : list
+        Other packages to install, e.g., a snakemake plugin needed for
+        a cluster profile, into the main environment.
+
+    additional_r : list
+        Other packages to install into the R environment.
+
     conda_frontend : 'mamba' | 'conda'
         Which front-end to use (terminology borrowed from Snakemake)
+
     """
     mapping = [
-        ("./env", "env.yml"),
-        ("./env-r", "env-r.yml"),
+        ("./env", "env.yml", additional_main),
+        ("./env-r", "env-r.yml", additional_r),
     ]
-    for env, yml in mapping:
+    for env, yml, additional in mapping:
         info("Building environment " + os.path.join(dest, env))
 
         try:
@@ -308,27 +313,42 @@ def build_envs(dest, conda_frontend="mamba"):
             p = sp.Popen(cmds, universal_newlines=True, cwd=dest)
             p.wait()
 
+            if additional:
+                info(f"Adding {additional} to environment")
+                cmds = [conda_frontend, "install", "-y", "-p", env] + additional
+            p = sp.Popen(cmds, universal_newlines=True, cwd=dest)
+            p.wait()
+
         except KeyboardInterrupt:
             print("")
-            error("Killing running {conda_frontend} job, '".format(**locals()) + " ".join(cmds))
+            error(
+                "Killing running {conda_frontend} job, '".format(**locals())
+                + " ".join(cmds)
+            )
             p.kill()
             sys.exit(1)
 
         if p.returncode:
-            error("Error running {conda_frontend}, '".format(**locals()) + " ".join(cmds))
+            error(
+                "Error running {conda_frontend}, '".format(**locals()) + " ".join(cmds)
+            )
             sys.exit(1)
 
         full_env = Path(dest) / env
-        info("Created env {full_env}".format(**locals()))
+        info(f"Created env {full_env}")
 
 
 if __name__ == "__main__":
 
+    additional_main_from_env_var = os.getenv("LCDBWF_ADDITIONAL_MAIN", [])
+
     ap = argparse.ArgumentParser(usage=usage)
     ap.add_argument(
         "--flavor",
         default="full",
-        help="""Options are {0}. Default is full.""".format(['full', 'rnaseq', 'chipseq']),
+        help="""Options are {0}. Default is full.""".format(
+            ["full", "rnaseq", "chipseq"]
+        ),
     )
     ap.add_argument(
         "--dest", help="""Destination directory in which to copy files""", required=True
@@ -340,7 +360,7 @@ def build_envs(dest, conda_frontend="mamba"):
         help=f"""Make a new clone to a staging area (at the location specified
         by --staging which defaults to {default_staging}) and deploy from
         there. Useful if using this script as a standalone tool. You can also
-        use --branch to configure which branch to deploy from that clone."""
+        use --branch to configure which branch to deploy from that clone.""",
     )
 
     ap.add_argument(
@@ -367,25 +387,40 @@ def build_envs(dest, conda_frontend="mamba"):
     ap.add_argument(
         "--conda-frontend",
         help="Set program (conda or mamba) to use when creating environments. Default is %(default)s.",
-        default="mamba",
+        default="conda",
     )
     ap.add_argument(
         "--rsync-args",
         help="Options for rsync when deploying to a new directory. Default is %(default)s.",
-        default="-rlt"
+        default="-rlt",
     )
 
     ap.add_argument(
-        "--mismatch-ok",
-        action="store_true",
-        help="Used for testing")
+        "--additional-main",
+        help="""Additional packages to install in main environment (only
+        relevant with --build-envs). For example,
+        'snakemake-executor-plugin-cluster-generic' to support a cluster
+        profile. You can use the env var LCDBWF_ADDITIONAL_MAIN to supply this
+        argument automatically instead.""",
+        nargs="+",
+    )
+    ap.add_argument(
+        "--additional-r",
+        help="Additional packages to install in R environment (only relevant with --build-envs)",
+        nargs="+",
+    )
+
+    ap.add_argument("--mismatch-ok", action="store_true", help="Used for testing")
     args = ap.parse_args()
     dest = args.dest
     flavor = args.flavor
 
     if args.staging and not args.clone:
-            print("ERROR: --staging was specified but --clone was not. Did you want to use --clone?", file=sys.stderr)
-            sys.exit(1)
+        print(
+            "ERROR: --staging was specified but --clone was not. Did you want to use --clone?",
+            file=sys.stderr,
+        )
+        sys.exit(1)
     if args.clone:
         if args.staging is None:
             args.staging = default_staging
@@ -398,7 +433,22 @@ def build_envs(dest, conda_frontend="mamba"):
     rsync(include, source, dest, args.rsync_args)
     deployment_json(source, dest)
 
+    if additional_main_from_env_var:
+        if args.additional_main:
+            print(
+                "ERROR: Unset LCDBWF_ADDITIONAL_MAIN env var if you want to use the --additional-main argument."
+            )
+            sys.exit(1)
+        additional_main = [additional_main_from_env_var]
+    else:
+        additional_main = args.additional_main
+
     if args.build_envs:
-        build_envs(dest, conda_frontend=args.conda_frontend)
+        build_envs(
+            dest,
+            additional_main=additional_main,
+            additional_r=args.additional_r,
+            conda_frontend=args.conda_frontend,
+        )
 
     warning("Deployment complete in {args.dest}".format(**locals()))
diff --git a/docs/README.md b/docs/README.md
deleted file mode 100644
index 45ed38711..000000000
--- a/docs/README.md
+++ /dev/null
@@ -1,30 +0,0 @@
-This documentation uses [sphinx](http://www.sphinx-doc.org) to buid the documentation.
-
-The built documentation from the master branch can be found at
-https://lcdb.github.io/lcdb-wf. If you want to build a local copy of the
-documentation:
-
-- create an environment from the `docs/docs-requirements.txt` file
-- activate it
-- run the Makefile in `docs`
-
-
-That is:
-
-```bash
-# Create env
-conda create -n lcdb-wf-docs \
-  --file docs/docs-requirements.txt \
-  --channel bioconda \
-  --channel conda-forge \
-  --channel lcdb
-
-# activate it
-source activate lcdb-wf-docs
-
-# build the docs
-cd docs
-make html
-```
-
-The locally-built docs will be in `docs/_build/html/toc.html`.
diff --git a/docs/_static/balloon.min.css b/docs/_static/balloon.min.css
deleted file mode 100644
index 268c8a8e4..000000000
--- a/docs/_static/balloon.min.css
+++ /dev/null
@@ -1 +0,0 @@
-[data-balloon]{position:relative}[data-balloon]:after,[data-balloon]:before{-ms-filter:"progid:DXImageTransform.Microsoft.Alpha(Opacity=0)";filter:alpha(opacity=0);-khtml-opacity:0;-moz-opacity:0;opacity:0;pointer-events:none;-webkit-transition:all .18s ease-out .18s;transition:all .18s ease-out .18s;bottom:100%;left:50%;position:absolute;z-index:10;-webkit-transform:translate(-50%,10px);-ms-transform:translate(-50%,10px);transform:translate(-50%,10px);-webkit-transform-origin:top;-ms-transform-origin:top;transform-origin:top}[data-balloon]:after{background:rgba(17,17,17,.9);border-radius:4px;color:#fff;content:attr(data-balloon);font-size:12px;padding:.5em 1em;white-space:nowrap;margin-bottom:11px}[data-balloon]:before{background:url('data:image/svg+xml;utf8,<svg xmlns="http://www.w3.org/2000/svg" width="36px" height="12px"><path fill="rgba(17, 17, 17, 0.9)" transform="rotate(0)" d="M2.658,0.000 C-13.615,0.000 50.938,0.000 34.662,0.000 C28.662,0.000 23.035,12.002 18.660,12.002 C14.285,12.002 8.594,0.000 2.658,0.000 Z"/></svg>') no-repeat;background-size:100% auto;height:6px;width:18px;content:"";margin-bottom:5px}[data-balloon]:hover:after,[data-balloon]:hover:before{-ms-filter:"progid:DXImageTransform.Microsoft.Alpha(Opacity=100)";filter:alpha(opacity=100);-khtml-opacity:1;-moz-opacity:1;opacity:1;pointer-events:auto;-webkit-transform:translate(-50%,0);-ms-transform:translate(-50%,0);transform:translate(-50%,0)}[data-balloon][data-balloon-break]:after{white-space:normal}[data-balloon-pos=down]:after,[data-balloon-pos=down]:before{bottom:auto;left:50%;top:100%;-webkit-transform:translate(-50%,-10px);-ms-transform:translate(-50%,-10px);transform:translate(-50%,-10px)}[data-balloon-pos=down]:after{margin-top:11px}[data-balloon-pos=down]:before{background:url('data:image/svg+xml;utf8,<svg xmlns="http://www.w3.org/2000/svg" width="36px" height="12px"><path fill="rgba(17, 17, 17, 0.9)" transform="rotate(180 18 6)" d="M2.658,0.000 C-13.615,0.000 50.938,0.000 34.662,0.000 C28.662,0.000 23.035,12.002 18.660,12.002 C14.285,12.002 8.594,0.000 2.658,0.000 Z"/></svg>') no-repeat;background-size:100% auto;height:6px;width:18px;margin-top:5px;margin-bottom:0}[data-balloon-pos=down]:hover:after,[data-balloon-pos=down]:hover:before{-webkit-transform:translate(-50%,0);-ms-transform:translate(-50%,0);transform:translate(-50%,0)}[data-balloon-pos=left]:after,[data-balloon-pos=left]:before{bottom:auto;left:auto;right:100%;top:50%;-webkit-transform:translate(10px,-50%);-ms-transform:translate(10px,-50%);transform:translate(10px,-50%)}[data-balloon-pos=left]:after{margin-right:11px}[data-balloon-pos=left]:before{background:url('data:image/svg+xml;utf8,<svg xmlns="http://www.w3.org/2000/svg" width="12px" height="36px"><path fill="rgba(17, 17, 17, 0.9)" transform="rotate(-90 18 18)" d="M2.658,0.000 C-13.615,0.000 50.938,0.000 34.662,0.000 C28.662,0.000 23.035,12.002 18.660,12.002 C14.285,12.002 8.594,0.000 2.658,0.000 Z"/></svg>') no-repeat;background-size:100% auto;height:18px;width:6px;margin-right:5px;margin-bottom:0}[data-balloon-pos=left]:hover:after,[data-balloon-pos=left]:hover:before{-webkit-transform:translate(0,-50%);-ms-transform:translate(0,-50%);transform:translate(0,-50%)}[data-balloon-pos=right]:after,[data-balloon-pos=right]:before{bottom:auto;left:100%;top:50%;-webkit-transform:translate(-10px,-50%);-ms-transform:translate(-10px,-50%);transform:translate(-10px,-50%)}[data-balloon-pos=right]:after{margin-left:11px}[data-balloon-pos=right]:before{background:url('data:image/svg+xml;utf8,<svg xmlns="http://www.w3.org/2000/svg" width="12px" height="36px"><path fill="rgba(17, 17, 17, 0.9)" transform="rotate(90 6 6)" d="M2.658,0.000 C-13.615,0.000 50.938,0.000 34.662,0.000 C28.662,0.000 23.035,12.002 18.660,12.002 C14.285,12.002 8.594,0.000 2.658,0.000 Z"/></svg>') no-repeat;background-size:100% auto;height:18px;width:6px;margin-bottom:0;margin-left:5px}[data-balloon-pos=right]:hover:after,[data-balloon-pos=right]:hover:before{-webkit-transform:translate(0,-50%);-ms-transform:translate(0,-50%);transform:translate(0,-50%)}[data-balloon-length]:after{white-space:normal}[data-balloon-length=small]:after{width:80px}[data-balloon-length=medium]:after{width:150px}[data-balloon-length=large]:after{width:260px}[data-balloon-length=xlarge]:after{width:90vw}@media screen and (min-width:768px){[data-balloon-length=xlarge]:after{width:380px}}[data-balloon-length=fit]:after{width:100%}
\ No newline at end of file
diff --git a/docs/_static/custom.css b/docs/_static/custom.css
deleted file mode 100644
index b83f5902d..000000000
--- a/docs/_static/custom.css
+++ /dev/null
@@ -1,30 +0,0 @@
-pre {
-    font-size: 0.7em;
-}
-
-
-h3 {
-    font-style: italic;
-}
-
-h2 {
-    /* text-decoration: underline; */
-}
-
-code {
-    background-color: #fff;
-    font-size: 0.8em;
-    color: #444;
-}
-
-code.file {
-    font-style: italic;
-}
-
-/* make fixed sidebar scrollable
-   from: https://stackoverflow.com/questions/57031848/sphinx-alabaster-theme-scroll-inside-of-fixed-sidebar 
-*/
-div.sphinxsidebar {
-    max-height: 90%;
-    overflow-y: auto;
-}
diff --git a/docs/autodoc.rst b/docs/autodoc.rst
deleted file mode 100644
index 7217f828b..000000000
--- a/docs/autodoc.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-Module documentation
-====================
-
-.. toctree::
-   :maxdepth: 2
-
-   lib.common
-   lib.chipseq
-   lib.patterns_targets
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 220399444..9583eafcc 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -1,6 +1,17 @@
 Changelog
 =========
 
+v2.0
+----
+
+Major release, heavily focused on simplification where possible. This includes
+the reference configurations, more streamlined config files, and many fixes and
+improvements that have been requested over the years.
+
+- Requires Snakemake 8+
+- Removed colocalization workflow
+
+
 v1.10.3
 -------
 
diff --git a/docs/chipseq.png b/docs/chipseq.png
deleted file mode 100644
index 051e0df12..000000000
Binary files a/docs/chipseq.png and /dev/null differ
diff --git a/docs/chipseq.rst b/docs/chipseq.rst
deleted file mode 100644
index 202e0375c..000000000
--- a/docs/chipseq.rst
+++ /dev/null
@@ -1,33 +0,0 @@
-.. _chipseq:
-
-ChIP-seq workflow
------------------
-The ChIP-seq workflow starts with raw FASTQ files and performs various QC steps. It
-aligns and prepares BAM and bigWig files, performs peak-calling, and combines
-everything together into a track hub for visualization.
-
-Specifically, the workflow does the following:
-
-    - trims reads with cutadapt
-    - maps reads with Bowtie2
-    - runs FastQC on raw, trimmed, and aligned reads
-    - Removes multimappers (samtools) and duplicates (Picard MarkDuplicates)
-    - performs fastq_screen on multiple configured genomes to look for evidence of
-      cross-contamination
-    - QC aggregation using MultiQC, along with a custom table for library sizes
-    - merges technical replicates and then re-deduplicates them
-    - creates bigWigs from unique, no-dups BAM files
-    - optionally merges bigWigs to create one signal track for all replicates
-    - runs deepTools plotFingerprint on grouped IP and input for QC and
-      evaluation of enrichment
-    - calls peaks using macs2, spp, and/or sicer, with support for multiple
-      peak-calling runs using different parameters to assist with assessing
-      performance and to help make decisions for downstream analysis
-    - optionally runs a template diffBind RMarkdown file used for differential binding analysis
-    - converts BED files into bigBed (or bigNarrowPeak where possible)
-    - builds and optionally uploads a track hub of bigWigs and bigBeds to
-      visualize peak-calling in UCSC Genome Browser
-
-To configure a ChIP-seq experiment, see :ref:`config-yaml`.
-
-.. image:: chipseq.png
diff --git a/docs/conda.rst b/docs/conda.rst
deleted file mode 100644
index 1cf44f84d..000000000
--- a/docs/conda.rst
+++ /dev/null
@@ -1,209 +0,0 @@
-.. _conda-envs:
-
-conda and conda envs in `lcdb-wf`
-=================================
-
-Conda basics
-------------
-
-If you're not familiar with ``conda``, it is a way of keeping software isolated
-on a computer in an "environment" (basically a directory with the executables
-for all the software you want to use). When you "activate" the environment, it
-places that location at the beginning of your ``$PATH`` variable, so that any
-executables there are found first. It does not affect any existing installation
-of any software on your machine and does not need root privileges.
-
-If you don't already have conda installed and the Bioconda channel set up, see
-the `Bioconda docs <https://bioconda.github.io>`_ for details.
-
-You'll also probably want `mamba <https://github.com/mamba-org/mamba>`_. Mamba
-is a drop-in replacement for conda that is faster and more robust. In fact, it
-is now the default conda front-end for Snakemake. If you don't already have
-mamba, you can install it into your base conda environment with:
-
-.. code-block:: bash
-
-    conda install -n base -c conda-forge mamba
-
-It's recommended that you install mamba into the base env (just like conda
-itself is) so that it behaves like conda. It does *not* need to be installed
-into each individual environment.
-
-
-Building the environments
--------------------------
-
-**It is recommended that you create a separate environment directory for
-each project**, rather than a single environment for all projects. That way you
-can update packages in each project independently of any others, and yet the
-environment will always be close at hand. This is an especially good practice
-in shared space as others can easily find and activate the environment specific
-to the project.
-
-.. note::
-
-    We recommend using mamba rather than conda for the speed increase and
-    ability to more correctly solve environments. See the `snakemake docs
-    <https://snakemake.readthedocs.io/en/stable/getting_started/installation.html#installation-via-conda>`_
-    for more info.
-
-
-If you use the ``--build-envs`` argument when deploying lcdb-wf to a project
-directory (see :ref:`setup-proj`), two conda environments will be built in the
-directories: ``env``, which has all of the non-R requirements, and ``env-r``
-which has the R packages used in particular for downstream RNA-seq analysis.
-These environments will use the fully-pinned environments in ``env.yml`` and
-``env-r.yml``. If you've already deployed but didn't use the ``--build-envs``
-argument, then then the equivalent command to run in the deployed directory is:
-
-.. code-block:: bash
-
-    mamba env create -p ./env --file env.yml
-    mamba env create -p ./env-r --file env-r.yml
-
-
-.. _conda-troubleshooting:
-
-Troubleshooting environments
-----------------------------
-
-Sometimes there is a problem with creating an environment. For example, the
-exact package specified in the env yaml might not be available for some reason
-(this should not happen, but in practice sometimes it does in corner cases).
-
-If this happens, you can try a couple things.
-
-First, some terminology with how packages are specified in the environment
-yamls. Here's an example for ``libpng`` version 1.6.37::
-
-    libpng=1.6.37=hed695b0_2
-    |____| |____| |________|
-     |       |       |
-    name     |       |
-           version   |
-                   build string
-
-The package name (libpng) and version (1.6.37) are pretty standard and
-self-explanatory. The `build` string refers to different built versions of the
-*conda package*, but for the same version (1.6.37 in this case) of the package.
-For example, if a conda package was built for version 1.1 of a tool, but that
-package itself had an error unrelated to the tool, then a fixed build would be
-made. The package version would remain the same (1.1) but the build string
-would change.
-
-In this example, the build string contains a hash ``hed695b0`` which is a hash
-of all the pinned dependencies for this package at packaging time. The
-`conda-forge pinning docs
-<https://conda-forge.org/docs/maintainer/pinning_deps.html>`_ give more detail
-on what this pinning is about, but basically if that pinning changes then this
-hash will change. The ``_2`` on the end of the build string hash indicates that
-this is the third built package (build numbers start at zero) for this version
-of ``libpng`` using the same pinning. In other words, there also likely exists
-``libpng=1.6.37=hed695b0_1`` and ``libpng=1.6.37=hed695b0_0``. At the time of
-this writing, there is also ``libpng-1.6.37-h21135ba_2`` (notice the different
-hash) which is the same libpng version but uses different pinnings.
-
-What does this mean for troubleshooting?
-
-For any package that seems to be problematic, try editing the respective
-environment yaml (e.g., ``env.yml``) to remove the build string (so in the
-example above, you would try changing it to just ``libpng=1.6.37``) and try
-building the environment again. If that doesn't work, try removing the version
-as well (so just ``libpng``).
-
-Alternatively for very problematic cases or cases where there are multiple
-problematic packages, you can try creating an environment with the "loose"
-pinning in ``include/requirements.txt`` which effectively does not require any
-particular versions with the exception of a few corner cases. Keep in mind that
-using that file may cause the environment to take a while to build as conda (or
-mamba) solves the dependencies of all the specified packages.
-
-
-Conda envs in lcdb-wf
----------------------
-
-Given all of the software used across all of `lcdb-wf`, the environments can
-take a lot of time to build because the solver needs to figure out the entire
-dependency tree and come up with a solution that works to satisfy the entire
-set of specified requirements.
-
-We chose to split the conda environments in two: the **main** environment and the **R**
-environment (see :ref:`conda-design-decisions`). These environments are
-described by both "strict" and "loose" files. By default we use the "strict"
-version, which pins all versions of all packages exactly. This is preferred
-wherever possible. However we also provide a "loose" version that is not
-specific about versions. The following table describes these files:
-
-+----------------+--------------------------------+----------------------------------+
-| strict version | loose version                  | used for                         |
-+================+================================+==================================+
-| ``env.yml``    | ``include/requirements.txt``   | Main Snakefiles                  |
-+----------------+--------------------------------+----------------------------------+
-| ``env-r.yaml`` | ``include/requirements-r.txt`` | Downstream RNA-seq analysis in R |
-+----------------+--------------------------------+----------------------------------+
-
-When deploying new instances, use the ``--build-envs`` argument which will use
-the strict version. Or use the following commands in a deployed directory:
-
-.. code-block:: bash
-
-    mamba env create -p ./env --file env.yml
-    mamba env create -p ./env-r --file env-r.yml
-
-When getting ready to release a new lcdb-wf version, create a new environment
-using the loose version to prepare the env and then when tests pass, export it
-to yaml. That is:
-
-.. code-block:: bash
-
-    # use loose version when preparing a new version of lcdb-wf
-    mamba create -p ./env --file include/requirements.txt
-    mamba create -p ./env-r --file include/requirements-r.txt
-
-    # then do testing....
-
-    # when tests pass, export the envs
-    conda env export -p ./env > env.yml
-    conda env export -p ./env-r > env-r.yaml
-
-    # commit, push, finalize release
-
-
-.. _conda-design-decisions:
-
-Design decisions
-----------------
-
-We made the design decision to split the conda envs into two different
-environments -- one for R, one for non-R. We found that by by removing the
-entire sub-DAG of R packages from the main environment we can dramatically
-reduce the creation time.
-
-We also made the decision to use large top-level environments rather than
-smaller environments created for each rule using the ``conda:`` directive.
-There are two reasons for this choice. First, it allows us to activate a single
-environment to give us access to all the tools used. This streamlines
-troubleshooting because we don't have to dig through the ``.snakemake/conda``
-directory to figure out which hash corresponds to which file, but comes with
-the up-front cost of creating the environment initially. Second, it simplifies
-running the tests on CircleCI, allowing us to cache the env directories as
-a whole to be re-used for multiple tests rather than caching the individual
-.snakemake directories for each tested workflow.
-
-Given that the conda and snakemake ecosystem are in flux, this may change in
-the future to using small conda environments for each rule separately if it
-turns out to be more beneficial to do so.
-
-.. note::
-
-    Prior to v1.7, we used requirements.txt files with loose pinning. Moving to
-    yaml files allows us the option of also installing pip packages if needed.
-    It also allows us to specify channels directly in the yaml file for
-    streamlined installation.
-
-    Using strictly-pinned yaml files that are consistently tested will
-    hopefully result in a more stable experience for users. For example, if you
-    happen to create an environment around the time of a new R/Bioconductor
-    release, the environment may not build correctly using a loose pinning.
-    Other transient issues in the packaging ecosystem can similarly cause
-    issues.
diff --git a/docs/conf.py b/docs/conf.py
index a8c11dc93..047fd82fa 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,180 +1,35 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
+# Configuration file for the Sphinx documentation builder.
 #
-# lcdb-wf documentation build configuration file, created by
-# sphinx-quickstart on Tue Apr 11 11:06:34 2017.
-#
-# This file is execfile()d with the current directory set to its
-# containing dir.
-#
-# Note that not all possible configuration values are present in this
-# autogenerated file.
-#
-# All configuration values have a default; values that are commented out
-# serve to show the default.
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#
-import os
-import sys
-sys.path.insert(0, os.path.abspath('.'))
-sys.path.insert(0, os.path.abspath('..'))
-sys.path.insert(0, os.path.abspath('../lib'))
-
-
-# -- General configuration ------------------------------------------------
-
-# If your documentation needs a minimal Sphinx version, state it here.
-#
-# needs_sphinx = '1.0'
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = [
-    'generate_guide',
-    'sphinx.ext.autodoc',
-    'sphinx.ext.autosummary',
-    'sphinx.ext.doctest',
-    'sphinx.ext.napoleon',
-    'sphinx.ext.todo',
-    'sphinx.ext.viewcode',
-    'sphinx.ext.githubpages']
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# The suffix(es) of source filenames.
-# You can specify multiple suffix as a list of string:
-#
-# source_suffix = ['.rst', '.md']
-source_suffix = '.rst'
+# For the full list of built-in configuration values, see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
 
-# The master toctree document.
-master_doc = 'toc'
+# -- Project information -----------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 
-# General information about the project.
 project = 'lcdb-wf'
-copyright = '2017, Ryan Dale, Justin Fear'
-author = 'Ryan Dale, Justin Fear'
+copyright = '2025, Ryan Dale'
+author = 'Ryan Dale'
+release = '2.0'
 
-# The version info for the project you're documenting, acts as replacement for
-# |version| and |release|, also used in various other places throughout the
-# built documents.
-#
-# The short X.Y version.
-version = '1.9'
-# The full version, including alpha/beta/rc tags.
-release = '1.9'
+# -- General configuration ---------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
 
-# The language for content autogenerated by Sphinx. Refer to documentation
-# for a list of supported languages.
-#
-# This is also used if you do content translation via gettext catalogs.
-# Usually you set "language" from the command line for these cases.
-language = "en"
+extensions = []
 
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This patterns also effect to html_static_path and html_extra_path
+templates_path = ['_templates']
 exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 
-# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
-
-# If true, `todo` and `todoList` produce output, else they produce nothing.
-todo_include_todos = True
 
-autoclass_content = "both"
-autosummary_generate = True
 
-# -- Options for HTML output ----------------------------------------------
+# -- Options for HTML output -------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
 
-# The theme to use for HTML and HTML Help pages.  See the documentation for
-# a list of builtin themes.
-#
-html_theme = 'alabaster'
-
-# Theme options are theme-specific and customize the look and feel of a theme
-# further.  For a list of options available for each theme, see the
-# documentation.
-#
-# Default options here: https://github.com/bitprophet/alabaster/blob/master/alabaster/theme.conf
-#
-html_theme_options = {
-    'description': 'Customizable workflows for high-throughput sequencing analysis',
-    'show_related': 'true',
-    'fixed_sidebar': 'true',
-    'sidebar_width': '300px',
-}
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
+html_theme = 'shibuya'
 html_static_path = ['_static']
+master_doc = 'toc'
 
-html_sidebars = {
-    "*": [
-        'about.html',
-        'navigation.html',
-        'relations.html',
-        'searchbox.html',
-    ]
-}
-# -- Options for HTMLHelp output ------------------------------------------
-
-# Output file base name for HTML help builder.
-htmlhelp_basename = 'lcdb-wfdoc'
-
-
-# -- Options for LaTeX output ---------------------------------------------
-
-latex_elements = {
-    # The paper size ('letterpaper' or 'a4paper').
-    #
-    # 'papersize': 'letterpaper',
-
-    # The font size ('10pt', '11pt' or '12pt').
-    #
-    # 'pointsize': '10pt',
-
-    # Additional stuff for the LaTeX preamble.
-    #
-    # 'preamble': '',
-
-    # Latex figure (float) alignment
-    #
-    # 'figure_align': 'htbp',
+html_theme_options = {
+    "globaltoc_expand_depth": 1,
+    "toctree_titles_only": False,
+    "accent_color": "gold",
 }
-
-# Grouping the document tree into LaTeX files. List of tuples
-# (source start file, target name, title,
-#  author, documentclass [howto, manual, or own class]).
-latex_documents = [
-    (master_doc, 'lcdb-wf.tex', 'lcdb-wf Documentation',
-     'Ryan Dale, Justin Fear', 'manual'),
-]
-
-
-# -- Options for manual page output ---------------------------------------
-
-# One entry per manual page. List of tuples
-# (source start file, name, description, authors, manual section).
-man_pages = [
-    (master_doc, 'lcdb-wf', 'lcdb-wf Documentation',
-     [author], 1)
-]
-
-
-# -- Options for Texinfo output -------------------------------------------
-
-# Grouping the document tree into Texinfo files. List of tuples
-# (source start file, target name, title, author,
-#  dir menu entry, description, category)
-texinfo_documents = [
-    (master_doc, 'lcdb-wf', 'lcdb-wf Documentation',
-     author, 'lcdb-wf', 'One line description of project.',
-     'Miscellaneous'),
-]
diff --git a/docs/config-yaml.rst b/docs/config-yaml.rst
deleted file mode 100644
index c80263253..000000000
--- a/docs/config-yaml.rst
+++ /dev/null
@@ -1,607 +0,0 @@
-.. _config-yaml:
-
-Config YAML
-===========
-
-This page details the various configuration options and describes how to
-configure a new workflow.
-
-Note that the ``references:`` section is detailed separately, at
-:ref:`references-config`.
-
-Config files are expected to be in a ``config`` directory next to the
-the Snakefile. For example, the RNA-seq workflow at
-``workflows/rnaseq/Snakefile`` expects the config file
-``workflows/rnaseq/config/config.yaml``.
-
-While it is possible to use Snakemake mechanisms such as ``--config`` to
-override a particular config value and ``--configfile`` to update the config
-with a different file, it is easiest to edit the existing
-``config/config.yaml`` in place. This has the additional benefit of reproducibity
-because all of the config information is stored in one place.
-
-The following table summarizes the config fields, which ones are use for which
-workflow, and under what conditions, if any, they are required. Each option
-links to a section below with more details on how to use it.
-
-================================================================================== =================== ================ ================= =========
-Field                                                                              Used for References Used for RNA-seq Used for ChIP-seq Required
-================================================================================== =================== ================ ================= =========
-:ref:`references <cfg-references>` and/or :ref:`include_references <cfg-inc-refs>`          yes                 yes              yes      yes
-:ref:`references_dir <cfg-references-dir>`                                                  yes                 yes              yes      if `REFERENCES_DIR` env var not set
-:ref:`sampletable <cfg-sampletable>`                                                        .                   yes              yes      always
-:ref:`organism <cfg-organism>`                                                              .                   yes              yes      always
-:ref:`aligner <cfg-aligner>`                                                                .                   yes              yes      always
-:ref:`stranded <cfg-stranded>`                                                              .                   yes              no       usually (see :ref:`stranded <cfg-stranded>`)
-:ref:`fastq_screen <cfg-fastq-screen>`                                                      .                   yes              yes      if using `fastq_screen`
-:ref:`merged_bigwigs <cfg-merged-bigwigs>`                                                  .                   yes              yes      if you want to merge bigwigs
-:ref:`gtf <cfg-gtf>`                                                                        .                   yes              .        always for RNA-seq
-:ref:`rrna <cfg-rrna>`                                                                      .                   yes              .        if rRNA screening desired
-:ref:`salmon <cfg-salmon>`                                                                  .                   yes              .        if Salmon quantification will be run
-:ref:`chipseq <cfg-chipseq>`                                                                .                   .                yes      always for ChIP-seq
-================================================================================== =================== ================ ================= =========
-
-Example configs
----------------
-
-To provide an overview, here are some example config files. More detail is
-provided later; this is just to provide some context:
-
-RNA-seq
-~~~~~~~
-
-The config file for RNA-seq is expected to be in
-``workflows/rnaseq/config/config.yaml``:
-
-.. code-block:: yaml
-
-    references_dir: "/data/references"
-    sampletable: "config/sampletable.tsv"
-    organism: 'human'
-    aligner:
-      tag: 'gencode-v25'
-      index: 'hisat2'
-    rrna:
-      tag: 'rRNA'
-      index: 'bowtie2'
-    gtf:
-      tag: 'gencode-v25'
-
-    fastq_screen:
-      - label: Human
-        organism: human
-        tag: gencode-v25
-      - label: rRNA
-        organism: human
-        tag: rRNA
-
-    # Portions have been omitted from "references" section below for
-    # simplicity; see references config section for details.
-
-    references:
-      human:
-        gencode-v25:
-          genome:
-            url: 'ftp://.../genome.fa.gz'
-            indexes:
-              - 'hisat2'
-              - 'bowtie2'
-          annotation:
-            url: 'ftp://.../annotation.gtf.gz'
-
-          transcriptome:
-            indexes:
-              - 'salmon'
-
-        rRNA:
-          genome:
-            url: 'https://...'
-            indexes:
-                - 'bowtie2'
-
-ChIP-seq
-~~~~~~~~
-
-The config file for ChIP-seq is expected to be in
-``workflows/chipseq/config/config.yaml``.
-
-The major differences between ChIP-seq and RNA-seq configs are:
-
-- ChIP-seq has no ``annotation`` or ``rrna`` fields
-- ChIP-seq has an addition section ``chipseq: peak_calling:``
-
-.. code-block:: yaml
-
-    sampletable: 'config/sampletable.tsv'
-    organism: 'dmel'
-    genome: 'dm6'
-
-    aligner:
-      index: 'bowtie2'
-      tag: 'test'
-
-    chipseq:
-      peak_calling:
-
-        - label: gaf-embryo-1
-          algorithm: macs2
-          ip:
-            - gaf-embryo-1
-          control:
-            - input-embryo-1
-
-        - label: gaf-embryo-1
-          algorithm: spp
-          ip:
-            - gaf-embryo-1
-          control:
-            - input-embryo-1
-
-        - label: gaf-wingdisc-pooled
-          algorithm: macs2
-          ip:
-            - gaf-wingdisc-1
-            - gaf-wingdisc-2
-          control:
-            - input-wingdisc-1
-            - input-wingdisc-2
-
-        - label: gaf-wingdisc-pooled
-          algorithm: spp
-          ip:
-            - gaf-wingdisc-1
-            - gaf-wingdisc-2
-          control:
-            - input-wingdisc-1
-            - input-wingdisc-2
-
-        - label: gaf-wingdisc-pooled-1
-          algorithm: epic2
-          ip:
-            - gaf-wingdisc-1
-          control:
-            - input-wingdisc-1
-          extra: ''
-
-        - label: gaf-wingdisc-pooled-2
-          algorithm: epic2
-          ip:
-            - gaf-wingdisc-2
-          control:
-            - input-wingdisc-2
-          extra: ''
-
-    fastq_screen:
-      - label: Human
-        organism: human
-        tag: gencode-v25
-
-    merged_bigwigs:
-      input-wingdisc:
-        - input-wingdisc-1
-        - input-wingdisc-2
-      gaf-wingdisc:
-        - gaf-wingdisc-1
-        - gaf-wingdisc-2
-      gaf-embryo:
-        - gaf-embryo-1
-
-
-    # Portions have been omitted from "references" section below for
-    # simplicity; see references config section for details.
-
-    references:
-      human:
-        gencode-v25:
-          genome:
-            url: 'ftp://.../genome.fa.gz'
-            indexes:
-              - 'hisat2'
-              - 'bowtie2'
-          annotation:
-            url: 'ftp://.../annotation.gtf.gz'
-
-      fly:
-        test:
-          genome:
-            url: "https://raw.githubusercontent.com/lcdb/lcdb-test-data/master/data/seq/dm6.small.fa"
-            postprocess: 'lib.common.gzipped'
-            indexes:
-              - 'bowtie2'
-              - 'hisat2'
-
-
-
-Field descriptions
-------------------
-Required for references, RNA-seq and ChIP-seq
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-.. _cfg-references:
-
-``references``
-``````````````
-    This section defines labels for references, where to get FASTA and GTF
-    files and (optionally) post-process them, and which indexes to build.
-
-    Briefly, the example above has a single organism configured ("human"). That
-    organism has two tags ("gencode-v25" and "rRNA").
-
-    This is the most complex section and is documented elsewhere (see
-    :ref:`references-config`).
-
-
-.. _cfg-inc-refs:
-
-``include_references``
-``````````````````````
-
-    This section can be used to supplement the ``references`` section with
-    other reference sections stored elsewhere in files. It's a convenient way
-    of managing a large amount of references without cluttering the config
-    file.
-
-    See :ref:`references-config` for more.
-
-
-.. _cfg-references-dir:
-
-``references_dir``
-``````````````````
-    Top-level directory in which to create references.
-
-    If not specified, uses the environment variable ``REFERENCES_DIR``.
-
-    If specified and ``REFERENCES_DIR`` also exists, ``REFERENCES_DIR`` takes
-    precedence.
-
-    This is useful when multiple people in a group share the same references to
-    avoid duplicating commonly-used references. Simply point references_dir to
-    an existing references directory to avoid having to rebuild references.
-
-Required for RNA-seq and ChIP-seq
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-.. _cfg-sampletable:
-
-``sampletable`` field
-`````````````````````
-    Path to sampletable file which, at minimum, list sample names and paths to
-    FASTQ files. The path of this filename is relative to the Snakefile. See
-    :ref:`sampletable` for more info on the expected contents of the file.
-
-    Example:
-
-    .. code-block:: yaml
-
-        sampletable: "config/sampletable.tsv"
-
-.. _cfg-organism:
-
-``organism`` field
-``````````````````
-    This field selects the top-level section of the ``references`` section that
-    will be used for the analysis. In RNA-seq example above, "human" is the
-    only organism configured. In the ChIP-seq example, there is "human" as well
-    as "fly".
-
-    Example:
-
-    .. code-block:: yaml
-
-        organism: "human"
-
-.. _cfg-aligner:
-
-``aligner`` config section
-``````````````````````````
-    This field has two sub-fields, and automatically uses the configured
-    ``organism`` to select the top-level entry in the references section.
-    ``tag`` selects the tag from the organism to use, and ``index`` selects
-    which aligner index to use. The relevant option from the example above
-    would be "gencode-v25", which configures both bowtie2 and hisat2 indexes to
-    be built. For RNA-seq we would likely choose "hisat2"; for ChIP-seq
-    "bowtie2".
-
-    Currently-configured options are ``hisat2``, ``bowtie2``, and ``star``.
-
-    Example:
-
-    .. code-block:: yaml
-
-        aligner:
-          tag: "gencode-v25"
-          index: "hisat2"
-
-Required for RNA-seq
-~~~~~~~~~~~~~~~~~~~~
-
-.. _cfg-stranded:
-
-``stranded`` field
-``````````````````
-    This field specifies the strandedness of the library. This is used by
-    various rule to set the parameters correctly. For example,
-    ``featureCounts`` will use ``-s0``, ``-s1``, or ``-s2`` accordingly;
-    ``kallisto`` will use ``--fr-stranded`` if needed, and so on.
-
-    This field can take the following options:
-
-    =================== ===========
-    value               description
-    =================== ===========
-    ``unstranded``      The strand that R1 reads align to has no information about the strand of the gene.
-    ``fr-firststrand``  R1 reads from plus-strand genes align to the *minus* strand. Also called reverse stranded, dUTP-based
-    ``fr-secondstrand`` R1 reads from plus-strand genes align to the *plus* strand. Also called forward stranded.
-    =================== ===========
-
-    Example:
-
-    .. code-block:: yaml
-
-        stranded: "fr-firststrand"
-
-    Rules that require information about strand will check the config file at
-    run time and raise an error if this field doesn't exist.
-
-    If you don't know the strandedness of the library, run the Snakefile in
-    such a way to only run the ``strand_check`` rule:
-
-    .. code-block:: bash
-
-        snakemake -j 2 strand_check
-
-    Or, when using the Slurm wrapper on cluster,
-
-    .. code-block:: bash
-
-        sbatch ../../include/WRAPPER_SLURM strand_check
-
-    When complete, there will be a MultiQC HTML file in the ``strand_check/``
-    directory that you can inspect to make your choice.
-
-    This will align the first 10,000 reads to the specified reference and run
-    RSeQC's ``infer_experiment.py`` on the results and then run MultiQC on just
-    those output files.
-
-    .. versionadded:: 1.8
-
-Optional fields
-~~~~~~~~~~~~~~~
-
-.. _cfg-fastq-screen:
-
-``fastq_screen`` config section
-```````````````````````````````
-
-    This section configures which Bowtie2 indexes should be used with
-    `fastq_screen`. It takes the form of a list of dictionaries. Each
-    dictionary has the keys:
-
-        - `label`: how to label the genome in the output
-        - `organism`: a configured organism. In the example above, there is only a single configured organism, "human".
-        - `tag`: a configured tag for that organism.
-
-    Each entry in the list must have a Bowtie2 index configured to be built.
-
-    Example:
-
-    .. code-block:: yaml
-
-        fastq_screen:
-          - label: Human
-            organism: human
-            tag: gencode-v25
-          - label: rRNA
-            organism: human
-            tag: rRNA
-
-   The above example configures two different indexes to use for fastq_screen:
-   the human gencode-v25 reference, and the human rRNA reference.
-
-.. _cfg-merged-bigwigs:
-
-``merged_bigwigs`` config section
-`````````````````````````````````
-    This section controls optional merging of signal files in bigWig format.
-    Its format differs depending on RNA-seq or ChIP-seq, due to how strands are
-    handled in those workflows.
-
-    Here is an RNA-seq example:
-
-    .. code-block:: yaml
-
-        merged_bigwigs:
-          arbitrary_label_to_use:
-            pos:
-              - 'sample1'
-              - 'sample2'
-            neg:
-              - 'sample1'
-              - 'sample2'
-
-    This will result in a single bigWig file called
-    `arbitrary_label_to_use.bigwig` in the directory
-    `data/rnaseq_aggregation/merged_bigwigs` (by default; this is configured
-    using ``config/rnaseq_patterns.yaml``). That file merges together both the
-    positive and negative signal strands of two samples, `sample1` and `sample2`. The
-    names "sample1" and "sample2" are sample names defined in the :ref:`sample
-    table <sampletable>`.
-
-    In other words, if samples 1 and 2 are replicates for a condition, this
-    gets us a single merged (averaged) track for that condition.
-
-    Here's another RNA-seq example, where we merge the samples again but keep
-    the strands separate. This will result in two output bigwigs.
-
-    .. code-block:: yaml
-
-        merged_bigwigs:
-          merged_sense:
-            sense:
-              - 'sample1'
-              - 'sample2'
-          merged_antisense:
-            antisense:
-              - 'sample1'
-              - 'sample
-
-    Here is a ChIP-seq example:
-
-    .. code-block:: yaml
-
-        merged_bigwigs:
-          arbitrary_label_to_use:
-            - 'label1'
-            - 'label2'
-
-    This will result in a single bigWig file called
-    `arbitrary_label_to_use.bigwig` in the directory
-    `data/chipseq_aggregation/merged_bigwigs` (by default; this is configured
-    using ``config/chipseq_patterns.yaml``) that merges together the "label1"
-    and "label2" bigwigs.
-
-    See :ref:`sampletable` for more info on the relationship between a *sample*
-    and a *label* when working with ChIP-seq.
-
-
-RNA-seq-only fields
-~~~~~~~~~~~~~~~~~~~
-.. _cfg-rrna:
-
-``rrna`` field
-```````````````
-
-    This field selects the reference tag to use for screening rRNA reads.
-    Similar to the ``aligner`` field, it takes both a ``tag`` and ``index``
-    key. The specified index must have been configured to be built for the
-    specified tag. It uses the already configured ``organism``.
-
-    Example:
-
-    .. code-block:: yaml
-
-        rrna:
-          tag: 'rRNA'
-          index: 'bowtie2'
-
-
-.. _cfg-gtf:
-
-``gtf`` field
-`````````````
-
-    This field selects the reference tag to use for counting reads in features.
-    The tag must have had a ``gtf:`` section specified; see
-    :ref:`references-config` for details.
-
-    The organism is inherited from the ``organism:`` field.
-
-    Example:
-
-    .. code-block:: yaml
-
-         gtf:
-           tag: "gencode-v25"
-
-.. _cfg-salmon:
-
-``salmon`` field
-````````````````
-    This field selects the reference tag to use for the Salmon index (if used).
-    The tag must have had a FASTA configured, and an index for "salmon" must
-    have been configured to be built for the organism selected with the
-    ``organism`` config option.
-
-
-ChIP-seq-only fields
-~~~~~~~~~~~~~~~~~~~~
-
-.. _cfg-chipseq:
-
-``chipseq`` config section
-``````````````````````````
-    This section configures the peak-calling stage of the ChIP-seq workflow. It
-    currently expects a single key, ``peak_calling``, which is a list of
-    peak-calling runs.
-
-    A peak-calling run is a dictionary configuring a single execution of
-    a peak-caller which results in a single BED file of called peaks.
-    A peak-calling run is uniquely described by its ``label`` and
-    ``algorithm``. This way, we can use the same label (e.g., `gaf-embryo-1`)
-    across multiple peak-callers to help organize the output.
-
-   The currently-supported peak-callers are ``macs2``, ``spp``, and ``sicer``.
-   They each have corresponding wrappers in the ``wrappers`` directory. To add
-   other peak-callers, see :ref:`new-peak-caller`.
-
-    The track hubs will include all of these called peaks which helps with
-    assessing the peak-calling performance.
-
-    Here is a minimal example of a peak-calling config section. It defines
-    a single peak-calling run using the `macs2` algorithm. Note that the
-    ``ip:`` and ``control:`` keys are lists of **labels** from the ChIP-seq
-    sample table's ``label`` column, **not sample IDs** from the first column.
-
-    .. code-block:: yaml
-
-        chipseq:
-          peak_calling:
-
-            - label: gaf-embryo-1
-              algorithm: macs2
-              ip:
-                - gaf-embryo-1
-              control:
-                - input-embryo-1
-
-    The above peak-calling config will result in a file
-    ``data/chipseq_peaks/macs2/gaf-embryo-1/peaks.bed`` (that pattern is
-    defined in ``chipseq_patterns.yaml`` if you need to change it).
-
-    We can specify additional command-line arguments that are passed verbatim
-    to `macs2` with the ``extra:`` section, for example:
-
-    .. code-block:: yaml
-
-        chipseq:
-          peak_calling:
-
-            - label: gaf-embryo-1
-              algorithm: macs2
-              ip:
-                - gaf-embryo-1
-              control:
-                - input-embryo-1
-              extra: '--nomodel --extsize 147'
-
-
-    `macs2` supports multiple IP and input files, which internally are merged
-    by `macs2`. We can supply multiple IP and input labels for biological
-    replicates to get a set of peaks called on pooled samples. Note that we
-    give it a different label so it doesn't overwrite the other peak-calling
-    run we already have configured.
-
-    .. code-block:: yaml
-
-        chipseq:
-          peak_calling:
-
-            - label: gaf-embryo-1
-              algorithm: macs2
-              ip:
-                - gaf-embryo-1
-              control:
-                - input-embryo-1
-              extra: '--nomodel --extsize 147'
-
-
-            - label: gaf-embryo-pooled
-              algorithm: macs2
-              ip:
-                - gaf-embryo-1
-                - gaf-embryo-2
-              control:
-                - input-embryo-1
-                - input-embryo-2
-
-
-
diff --git a/docs/config.rst b/docs/config.rst
index 649a3cabc..107fbe288 100644
--- a/docs/config.rst
+++ b/docs/config.rst
@@ -5,75 +5,549 @@
 Configuration
 =============
 
-General configuration
-~~~~~~~~~~~~~~~~~~~~~
+Configuration happens in two places:
+
+**Config file:**
+
+- :ref:`rnaseq-config`
+- :ref:`chipseq-config`
+
+**Sampletable:**
+
+- :ref:`rnaseq-sampletable`
+- :ref:`chipseq-sampletable`
+
+
+.. _configfiles:
+
+Config file
+-----------
+
+Within a workflow directory, the default config file is expected to be at :file:`config/config.yaml`.
+
+Config files, at a minimum, specify which reference FASTA to use (:ref:`reference-config`).
+
+For RNA-seq (:ref:`rnaseq-config`) the config file also specifies a GTF
+reference and strandedness of the libraries.
+
+For ChIP-seq (:ref:`chipseq-config`) the config file specifies peak-calling runs.
+
+You can override the default config file location when calling snakemake like
+this::
+
+  snakemake --configfile="otherdir/myconfig.yaml" ...
+
+Snakemake will merge the config file(s) given on the command line with the
+default config file (:file:`config/config.yaml`).
+
+.. _reference-config:
+
+References section
+~~~~~~~~~~~~~~~~~~
+
+This section is just about the references part of the config; see
+:ref:`rnaseq-config` and :ref:`chipseq-config` for any additional config for
+those workflows.
+
+Using included reference config templates
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The repository includes pre-configured reference genome and annotation
+templates in :file:`include/reference_config_templates/` for common model
+organisms. These templates provide organism name, genome FASTA URL, and
+annotation GTF URL (for RNA-seq). They can be used for both ChIP-seq and
+RNA-seq to conveniently fill in the references part of the config.
+
+This is the easiest way to configure references. There are two ways to use
+these templates:
+
+1. Command-line: Point to the template using ``--configfile`` when calling Snakemake::
+
+    snakemake --configfile=../../include/reference_config_templates/Homo_sapiens/GENCODE.yaml ...
+
+   This merges the template with your default :file:`config/config.yaml`,
+   creating new or replacing existing keys.
+
+2. Copy-paste: Copy the contents from a template file into your
+   :file:`config/config.yaml` file.
+
+Otherwise, see the next section for customizing the references section.
+
+Configuring references
+^^^^^^^^^^^^^^^^^^^^^^
+
+Both RNA-seq and ChIP-seq need a reference fasta configured, like this:
+
+.. code-block:: yaml
+
+  genome:
+    url: <URL to gzipped FASTA file>
+
+
+RNA-seq also needs a GTF annotation configured, which works similarly:
+
+.. code-block:: yaml
+
+  annotation:
+    url: <URL to gzipped GTF>
+
+
+The value of ``url`` can be a file (like
+``file:///data/references/Homo_sapiens/gencode.fa.gz``) or any FTP or HTTP URL.
+
+This is useful if you have existing reference files you want to use.
+
+By default, reference files will be downloaded to the :file:`references`
+directory within the current workflow. Aligner indexes will be built here as well.
+
+For ChIP-seq, the references directory will look like:
+
+.. code-block:: text
+
+  references/
+  ├── genome.fa.gz  # Downloaded FASTA
+  ├── bowtie2/  # bowtie2 index
+  │   └── genome.*.bt2
+  └── genome.chromsizes  # chromsizes from fasta
+
+For RNA-seq, it will look like:
+
+.. code-block:: text
+
+  references/
+  ├── bowtie2/  # bowtie2 index for rRNA
+  │   └── rrna.*.bt2
+  ├── salmon/  # salmon index
+  ├── star/   # STAR index
+  ├── annotation.gtf.gz  # downloaded GTF
+  ├── annotation.refflat # GTF converted to refflat
+  ├── annotation.bed12   # GTF converted to bed12
+  ├── annotation.mapping.tsv.gz  # TSV of attributes from GTF
+  ├── genome.fa.gz  # downloaded FASTA
+  ├── genome.fa.fai  # chrom sizes
+  ├── rrna.fa.gz  # rRNA sequence for organism from SILVA
+  └── transcriptome.fa.gz  # created from genome FASTA and GTF
+
+
+See :ref:`decisions-references` for a discussion on why it's done this way. You
+can control this behavior by using the optional ``references`` entry in the
+config file, which will instead look for (and create if needed) the specified
+directory. If you do this, keep in mind that each reference directory uses
+generic labels like ``genome``, ``annotation``, etc, so using the same
+directory for different organisms will cause the files to be overwritten for
+the last-run organism. So if you use this approach you should consider putting
+your references in directories named after organisms and the versions of
+aligners used.
+
+
+
+.. _rnaseq-config:
+
+RNA-seq config
+~~~~~~~~~~~~~~
+
+For RNA-seq, in addition to the genome fasta file described above, you also need:
+
+- ``annotation``, structured similar to ``genome``, which specifies a gzipped
+  GTF file. A transcriptome fasta is automatically built from the genome fasta
+  and this GTF.
+- ``organism`` which will be used to screen ribosomal RNA. Technically, this is
+  searching for the string in the SILVA rRNA database's fasta records.
+- ``stranded`` of the libraries, which is used for automatically
+  configuring strand-specific tools. The options are:
+  - ``fr-firststrand`` for dUTP libraries
+  - ``fr-secondstrand`` for ligation libraries
+  - ``unstranded`` for libraries without strand specificity.
+
+See https://rnabio.org/module-09-appendix/0009/12/01/StrandSettings for more
+info on strandedness. If you don't know ahead of time, you can use
+``fr-firststrand`` and inspect the results for RSeQC's infer_experiment in the
+MultiQC output. Correct the strandedness in the config, and re-run. Only the
+jobs affected by strandedness will be re-run.
+
+Here is an example for human:
+
+.. code-block:: yaml
+
+    organism: "Homo sapiens"
+    genome:
+      url: "https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_49/GRCh38.primary_assembly.genome.fa.gz"
+    annotation:
+      url: "https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_49/gencode.v49.primary_assembly.annotation.gtf.gz"
+    stranded: "fr-firststrand"
+
+In :file:`include/reference_config_templates` you can find configs for common
+model organisms. These have both genome and annotation, so you can point
+Snakemake to them on the command line. You would still need to specify
+strandedness, which can be a config entry in
+:file:`config/config.yaml`. Or it could be specified directly on the command
+line, like this:
+
+.. code-block:: bash
+
+  snakemake \
+    --configfile=../../include/reference_config_templates/Homo_sapiens/GENCODE.yaml \
+    --config stranded=fr-firststrand
+
+(in this case a separate :file:`config/config.yaml` would not be needed, as
+long as you use the default :file:`config/sampletable.tsv` as your sampletable)
+
+.. _chipseq-config:
+
+ChIP-seq config
+~~~~~~~~~~~~~~~
+
+For ChIP-seq, in addition to the genome fasta file described above, you also
+need a peak-calling section if you want to to run peak-calling.
+
+The idea is that the ``peak_calling:`` entry in the config is a list. Each item
+in the list is a dictionary with the following keys:
+
+- ``label`` for the peak-calling run. This is intentionally free-form since you
+  may want to run the same samples through multiple algorithms or different
+  parameters. Output will be in :file:`data/peak_calling/<algorithm>/<label>`.
+- ``algorithm``, currently supported options are ``macs``, ``epic2``
+- ``ip`` a list of IP samples (or merged tech reps) or their equivalent. For
+  ATAC-seq, this is the ATAC samples. For CUT&RUN and Cut&Tag, these are the
+  samples with the antibody of interest.
+- ``control`` a list of control samples (or merged tech reps). For ATAC-seq,
+  leave this empty or exclude entirely. For CUT&RUN and Cut&Tag, this can be
+  excluded or IgG samples can be used.
+- ``extra`` is a string that is passed verbatim to the peak-caller's
+  command-line call. Use this to modify parameters for each peak-calling run.
+
+.. note::
+
+   The values for ``ip`` and ``control`` must match the sampletable's ``label``
+   column, see :ref:`chipseq-sampletable` for details. That is, these are the
+   names of the merged technical replicates.
+
 
-The majority of the work in setting up a new project is in the configuration --
-which samples to run, where the data files are located, which references are
-needed, etc.
+Here is an example:
 
-**The entry point for configuration** is in the ``config/config.yaml`` file found
-in each workflow directory. See :ref:`config-yaml` for more.
+.. code-block:: yaml
 
-.. toctree::
-   :maxdepth: 2
+    chipseq:
+      peak_calling:
+        - label: gaf-wingdisc-pooled
+          algorithm: macs
+          ip:
+            - gaf-wingdisc-1
+            - gaf-wingdisc-2
+          control:
+            - input-wingdisc-1
+            - input-wingdisc-2
+          extra: '--nomodel --extsize 147'
 
-   config-yaml
+        - label: gaf-wingdisc-pooled-1
+          algorithm: epic2
+          ip:
+            - gaf-wingdisc-1
+          control:
+            - input-wingdisc-1
 
-The **references section** of the config file configures the genomes,
-transcriptomes, and annotations to be used. See :ref:`references-config` for more.
+        - label: gaf-wingdisc-pooled-no-control
+          algorithm: epic2
+          ip:
+            - gaf-wingdisc-1
+            - gaf-wingdisc-2
+          control: []
+          extra: ''
 
-.. toctree::
-   :maxdepth: 2
+.. _sampletables:
 
-   references-config
+Sampletable
+-----------
 
-The **sample table**, lists sample IDs, filenames, and other metadata. Its path
-is specified in the config file. See :ref:`sampletable` for more.
+Sample tables map sample names to files on disk and provide additional
+metadata.
 
-.. toctree::
-   :maxdepth: 2
+.. note::
 
-   sampletable
+    Running data from SRA? You can use the SRA metadata sample table (see
+    `example <https://www.ncbi.nlm.nih.gov/Traces/study/?acc=SRP423046>`__)
+    as-is, though you may want to add a new column at the beginning with more
+    readable sample names. If there are technical replicates, you may need to
+    edit the SRA table -- see :ref:`chipseq-sampletable` below.
 
-A **patterns file** only needs to be edited
-if you're doing custom work. It determines the patterns of files that will be
-created by the workflow. See :ref:`patterns-and-targets` for more.
+Sample tables are TSV or CSV (with a respective .tsv or .csv
+extension) and have a header. Empty lines and lines that start with a comment
+(``#``) are skipped.
 
-.. toctree::
-   :maxdepth: 2
+**The first column is interpreted as sample name.** FASTQ files specified in the
+sampletable will be automatically symlinked to locations named after the sample
+name as expected by lcdb-wf (i.e., you don't need to move files anywhere to meet
+these expectations).
 
-   patterns-targets
+By default, the Snakefiles look for a file called
+:file:`config/sampletable.tsv`. But you can edit the config file or provide
+a commandline config option if you want to use something different:
 
-.. _cluster:
+.. code-block:: yaml
 
-Running on a cluster
+    # in the config yaml
+    sampletable: "config/mytable.csv"
+
+    # ...remainder of config
+
+or don't edit the config and instead modify the command-line call:
+
+.. code-block:: bash
+
+  snakemake --config sampletable="config/mytable.csv" ...
+
+.. _rnaseq-sampletable:
+
+RNA-seq sample table
 ~~~~~~~~~~~~~~~~~~~~
-The example commands in :ref:`getting-started` describe running Snakemake
-locally. For larger data sets, you'll want to run them on an HPC cluster.
-Snakemake `supports arbitrary cluster commands
-<http://snakemake.readthedocs.io/en/latest/snakefiles/configuration.html>`_,
-making it easy to run these workflows on many different cluster environments.
-
-Snakemake and these workflows are designed to decouple the code from the
-configuration. Each rule has resources specified. When running with
-a cluster-specific `Snakemake profile
-<https://snakemake.readthedocs.io/en/stable/executing/cli.html#profiles>`_,
-these resources are translated into cluster-specific commands.
-
-For example, if runnng NIH's Biowulf HPC cluster, use the `Biowulf profile
-<https://github.com/NIH-HPC/snakemake_profile>`_.
-
-Generally, you shouldn't run long-running tasks on a login node of a cluster,
-and this includes long-running Snakemake workflows. So lcdb-wf comes with
-a wrapper script, ``include/WRAPPER_SLURM``, that runs Snakemake which can be
-submitted to a compute node on a Slurm cluster.
-
-For example, to run a workflow on a Slurm cluster, from the workflow directory
-(e.g., ``workflows/rnaseq``, run the following command::
-
-    sbatch ../../include/WRAPPER_SLURM
-
-The ``WRAPPER_SLURM`` script submits the main Snakemake process on a separate
-node to avoid any restrictions from running on the head node. That main
-Snakemake process then submits each rule separately to the cluster scheduler.
+
+Here is an example minimal sample table for single-end RNA-seq data. The column
+``orig_filename`` is required. Paths are **relative to the Snakefile**.
+
+.. code-block:: text
+
+    # Example RNA-seq sample table
+    sample   orig_filename
+    c1       /data/c1_R1.fastq.gz
+    c2       /data/c2_R1.fastq.gz
+    t1       other-data/treatment_1_1.fq.gz
+    t2       ../../raw-data/t2_1.fq.gz
+
+For paired-end data, we need to specify the second end of the pair in the
+``orig_filename_R2`` column:
+
+.. code-block:: text
+
+    sample   orig_filename                   orig_filename_R2
+    c1       /data/c1_R1.fastq.gz            /data/c1_R2.fastq.gz
+    c2       /data/c2_R1.fastq.gz            /data/c2_R2.fastq.gz
+    t1       other-data/treatment_1_1.fq.gz  other-data/treatment_1_2.fq.gz
+    t2       ../../raw-data/t2_1.fq.gz       ../../raw-data/t2_2.fq.gz
+
+
+The paired-end example above would result in the following symlinks being
+created:
+
+.. code-block:: text
+
+  data/rnaseq_samples/c1/c1_R1.fastq.gz --> /data/c1_R1.fastq.gz
+  data/rnaseq_samples/c1/c1_R2.fastq.gz --> /data/c1_R2.fastq.gz
+  data/rnaseq_samples/c2/c2_R1.fastq.gz --> /data/c2_R1.fastq.gz
+  data/rnaseq_samples/c2/c2_R2.fastq.gz --> /data/c2_R2.fastq.gz
+  data/rnaseq_samples/t1/t1_R1.fastq.gz --> ../../../other-data/treatment_1_1.fq.gz
+  data/rnaseq_samples/t1/t1_R2.fastq.gz --> ../../../other-data/treatment_1_2.fq.gz
+  data/rnaseq_samples/t2/t2_R1.fastq.gz --> ../../../../../raw-data/t2_1.fq.gz
+  data/rnaseq_samples/t2/t2_R2.fastq.gz --> ../../../../../raw-data/t2_2.fq.gz
+
+This sampletable will be read into the downstream differential expression
+analysis, so it's a good idea to add lots of metadata here. Here is a final
+paired-end sample table we could use:
+
+.. code-block:: text
+
+    sample   group     replicate  orig_filename                   orig_filename_R2
+    c1       control   1          /data/c1_R1.fastq.gz            /data/c1_R2.fastq.gz
+    c2       control   2          /data/c2_R1.fastq.gz            /data/c2_R2.fastq.gz
+    t1       treatment 1          other-data/treatment_1_1.fq.gz  other-data/treatment_1_2.fq.gz
+    t2       treatment 2          ../../raw-data/t2_1.fq.gz       ../../raw-data/t2_2.fq.gz
+
+Here are some additional tips:
+
+- If you have technical replicates, list them as separate lines. This lets us inspect
+  the QC of the technical replicates independently. The downstream analysis in
+  R has a function for summing counts of technical replicates, so the merging
+  can be handled later.
+- It's helpful to add as many metadata columns as you can, so that the
+  sampletable becomes a single source of truth.
+- Creating sampletables is by far the most error-prone step -- it's very easy
+  to miss changing an R1 to an R2, for example. Double- and triple-check!
+- You can split up experiments across multiple sampletables if needed
+- Descriptive sample names help with interpreting QC and downstream analysis
+
+
+.. _chipseq-sampletable:
+
+ChIP-seq sample table
+~~~~~~~~~~~~~~~~~~~~~
+
+A ChIP-seq sampletable needs sample names and original filenames that are
+symlinked to expected locations. just like the RNA-seq sampletable described
+above.
+
+However, if a sample has technical replicates, they have to be specified in the
+sampletable for ChIP-seq. This is in contrast to RNA-seq, where we can simply
+sum counts of tech reps in R. See :ref:`decisions-techreps` for details.
+
+Use the ``merged_label`` column to control this. Rows with the same
+``merged_label`` value will be merged together. Take the following example:
+
+.. code-block:: text
+
+  samplename  merged_label  orig_filename
+  ip1a        ip1           /data/run1/ip1.fq.gz
+  ip1b        ip1           /data/run2/ip1.fq.gz
+  ip2                       /data/run1/ip2.fq.gz
+  input1                    /data/run1/input1.fq.gz
+
+In this case, we will get individual QC metrics for technical replicates
+``ip1a`` and ``ip1b``. Then they will be merged into a single ``ip1`` (the
+merged label) BAM file that is ready for peak-calling. The other samples
+(``ip2``, ``input1``) do not have technical replicates.
+
+The merging process uses ``samtools merge`` on the tech reps followed by
+Picard ``MarkDuplicates``, saving the result in
+:file:`data/chipseq_merged/{merged_label}/{merged_label}.cutadapt.unique.nodups.merged.bam`.
+If no merging needs to be done (like for ``ip2`` and ``input1`` here), then we
+will get a symlink to the respective BAM file. In this way,
+:file:`data/chipseq_merged` always has the complete set of BAMs ready for
+peak-calling.
+
+The workflow will automatically fill in missing values in ``merged_label`` with
+values from the first column. Or, to be explicit, we could write it all out
+like this:
+
+.. code-block:: text
+
+  samplename  merged_label  orig_filename
+  ip1a        ip1           /data/run1/ip1.fq.gz
+  ip1b        ip1           /data/run2/ip1.fq.gz
+  ip2         ip2           /data/run1/ip2.fq.gz
+  input1      input1        /data/run1/input1.fq.gz
+
+
+.. note::
+
+  In prior versions of lcdb-wf, the ``merged_label`` column was called just
+  ``label``. This is still supported for backward compatibility; if no
+  ``merged_label`` column exists then the workflow will make a new
+  ``merged_label`` column out of an existing ``label`` column.
+
+**Use** ``merged_label`` **values when configuring peak-calling.** See
+:ref:`chipseq-config` for more on the peak-calling section.
+
+With the sampletable above, a peak-calling config section might then look like this:
+
+.. code-block:: yaml
+
+  chipseq:
+    peak_calling:
+      - label: ip1-macs
+        algorithm: macs
+        ip: ip1  # Note the use of merged_label here
+        input: input1
+      - label: ip2-macs
+        algorithm: macs
+        ip: ip2
+        input: input1
+
+.. note::
+
+  In general, you may find it useful to add an antibody column and a chromatin
+  prep column to the sampletable so you know which inputs/controls go with
+  which IPs.
+
+Advanced: post-processing reference files
+-----------------------------------------
+
+In some cases, reference files may need to be modified after download. This is
+becoming increasingly rare thanks to updates from providers like Ensembl and
+NCBI, but sometimes files need to be post-processed. For example:
+
+- only a GFF is available, so it needs to be post-processed into GTF format
+- extra chromosomes are included that should be removed
+- renaming chromosomes (e.g. to match UCSC Genome Browser nomenclature)
+- adding transgenic constructs to FASTA and/or GTF
+- removing problematic annotations (like trans-splicing events which some tools have issues with)
+
+To handle these situations, a reference file config can take an optional
+``postprocess:`` key. This is a string containing a dotted name referring to
+a Python function importable by :file:`lib.utils`. For
+:file:`lib/postprocess/__init__.py` has many such functions, but you can write
+your own.
+
+This is a bit of an advanced topic. See the help and comments in
+``lib.utils.download_and_postprocess`` (in the file :file:`lib/utils.py`) for
+details; the following attempts to provide enough information and direction
+for you to implement your own customizations.
+
+A function used for post-processing must have the signature:
+
+.. code-block:: python
+
+    def name_of_function(tmpfiles, outfile, **kwargs):
+        pass
+
+It should expect ``tmpfiles`` to be a list of files that were just downloaded,
+and ``outfile`` is the final gzipped file to create.
+
+If the function does not need any kwargs, configure it like this:
+
+.. code-block:: yaml
+
+  genome:
+    url: <URL>
+    postprocess: "lib.postprocess.name_of_function"
+
+If it needs kwargs, configure it like this:
+
+.. code-block:: yaml
+
+  genome:
+    url: <URL>
+    postprocess:
+      function: "lib.postprocess.name_of_function"
+      kwargs:
+        kwarg1:
+          - list
+          - of
+          - items
+        verbose: true
+
+Note these examples use the genome fasta, but the functionality works for
+annotations as well.
+
+If a post-processing function has a keyword argument with starts and ends with
+a double underscore (``__``), the config system will assume this is a string
+that should be interpreted as a dotted function name and the actual function
+will be resolved and passed to the post-processing function.
+
+Here is a complete (and complex) example to illustrate the mechanism. In this
+example, we want to include ERCC spike-ins to the reference genome as well as
+to the GTF file so we can quantify them. However, only a GFF file is available
+for *S. pombe*, so we also need to post-process that into a GTF before
+appending ERCC annotations. As another wrinkle, there is no ERCC spike-in GTF,
+so we need to create our own from the FASTA file. Here is how this would be
+configured:
+
+.. code-block:: yaml
+
+    genome:
+      url:
+        # S. pombe fasta
+        - "ftp://ftp.ensemblgenomes.org/pub/fungi/release-41/fasta/schizosaccharomyces_pombe/dna/Schizosaccharomyces_pombe.ASM294v2.dna_sm.toplevel.fa.gz"
+        # ERCC fasta
+        - "https://tsapps.nist.gov/srmext/certificates/documents/SRM2374_Sequence_v1.FASTA"
+      postprocess:
+        # See lib/postprocess/ercc.py
+        function: "lib.postprocess.ercc.add_fasta_to_genome"
+
+    annotation:
+      url:
+        # S. pombe GFF, which needs to be converted to GTF
+        - "ftp://ftp.ensemblgenomes.org/pub/fungi/release-41/gff3/schizosaccharomyces_pombe/Schizosaccharomyces_pombe.ASM294v2.41.gff3.gz"
+
+        # ERCC GTF is not available; conversion function needed to convert
+        # fasta to GTF
+        - "https://tsapps.nist.gov/srmext/certificates/documents/SRM2374_Sequence_v1.FASTA"
+
+      postprocess:
+        function: "lib.postprocess.ercc.add_gtf_to_genome"
+        kwargs:
+          # As per the docs for add_gtf_to_genome(), this function will be
+          # applied to all but the last input file. It is specified as a string
+          # here, but the config-processing system will resolve this to the
+          # actual function and pass that along to add_gtf_to_genome
+          __preprocess__: "lib.common.gff2gtf"
+
+The end result is a genomic fasta with ERCC spike-ins added and a GTF version
+of Ensembl's GFF file with ERCC spike-ins added as additional annotations.
diff --git a/docs/decisions.rst b/docs/decisions.rst
new file mode 100644
index 000000000..13e9a32c1
--- /dev/null
+++ b/docs/decisions.rst
@@ -0,0 +1,856 @@
+Decision log
+============
+
+This document keeps track of the reasoning behind various architecture decisions.
+
+.. _decisions-references:
+
+References
+----------
+Here are use-cases we have that are common enough to warrant supporting:
+
+**References should support multiple workflows (ChIP-seq, RNA-seq, etc)**
+
+**References should support different organisms in different workflows. There
+should be only one organism per workflow though.**
+
+- For example, ``workflows/mouse-rnaseq`` and ``workflows/human-rnaseq`` should
+  be supported in the same project.
+
+**References should be re-created for each project.**
+
+- Historically we had a central location for the references (shared by multiple
+  deployments of lcdb-wf over the years) but we got conflicts where one
+  deployment's aligner version was more recent, causing errors when using the
+  index for an older version.
+- To keep using this, we'd need to version indexes based on aligner version.
+- However, when writing up methods for a paper we need to be able to trace
+  back what commands were run to generate the reference, including additional
+  patching that may have taken place (as is supported by the references
+  workflow).
+- Re-using indexes is space- and time-efficient in the short term, but experience has
+  shown it to be inefficient in time and reproducibility in the long term.
+- Keeping everything in the same deployment directory also helps with the
+  archiving process.
+- We were hesitant to update the references in the central location due to
+  being unsure of what was depending on them.
+- Overall, here we make the decision that the time and space cost to re-make
+  references for each project is worth the gain in simplicity and isolation.
+
+Arguments for and against a separate references workflow
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+RNA-seq, ChIP-seq, and the upcoming variant calling all need to do something
+with references, including possibly patching them. We have to deal with this
+inherent complexity. It initially made sense to put common rules in the
+separate references workflow.
+
+However, only a subset of the rules in the references workflow are actually
+shared across RNA-seq and ChIP-seq -- currently, only the bowtie2 index
+(genome-wide ChIP-seq alignment; rRNA screening for RNA-seq), the fasta rule,
+chromsizes, and the generic unzip rule. The other rules in the <v2.0 references
+workflow (gtf, mappings, conversion_bed12, conversion_refflat, kallisto_index,
+salmon_index, transcriptome_fasta, star_index, rrna) are all unique to RNA-seq.
+So the <v2.0 references workflow is actually mostly an RNA-seq-only references
+workflow. It would make more sense to have those RNA-seq-specific rules in the
+RNA-seq workflow directly.
+
+Furthermore, much of the complexity is handled in the
+lib.utils.download_and_postprocess function, rather than in the workflow rules.
+This is the function that downloads, figures out what functions to apply for
+post-processing, and outputs the prepared file. We already are using the utils
+module separately in the ChIP-seq and RNA-seq workflows, so there's no
+additional overhead to import it into the Snakefiles. We can use that function
+directly.
+
+Last, having a workflow split across two Snakefiles hampers the ability to
+understand the complete workflow.
+
+Taken together, it made more sense to eliminate the references workflow
+entirely, and port the rules to the respective workflows.
+
+
+Selection of reference genomes and annotations
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Where possible, we select "primary" assemblies -- those with th canonical
+chromosomes and unassembled contigs (scaffolds) but NOT haplotypes, alternate
+loci, or assembly patches.
+
+`Heng Li's blog post
+<https://lh3.github.io/2017/11/13/which-human-reference-genome-to-use>`__ on
+the subject is a useful guideline. To summarize, we want to exclude alt contigs
+/ haplotypes because they may create multimapping issues, and we want to
+include unassembled contigs because excluding them would artificially decrease
+alignment percentage.
+
+Since lcdb-wf is intended to be used with arbitrary organisms, the PAR and
+mitochondrial sequences mentioned there are not relevant in general.
+
+Reference genome and annotation sources
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+lcdb-wf has always been organism-agnostic. It would be nice to have a single
+source of all genomics data such that we could pass an organism name and get
+back the referencs. But even Ensembl and NCBI are not uniform in their support.
+Sometimes primary assemblies are available; sometimes primary chromosome fastas
+are available but the top-level is actually primary (rat, Ensembl); A GTF might
+not be available (pombe, Ensembl); or only a toplevel assembly is available and
+we need to remove the haplotypes and alt loci out (hg19, Ensembl).
+
+Reference nomenclature and directory structure
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Options considered:
+
+1. ``references`` (top-level of project, shared by all workflows)
+2. ``workflows/<workflow>/references`` (workflow-specific)
+
+The possible location ``workflows/references`` is functionally similar to
+top-level ``references`` (in a parent directory of individual workflows) but
+references is no longer a workflow so it doesn't make sense to have it right in
+the ``workflows`` directory. So this was excluded as an option.
+
+Recall that in lcdb-wf <2.0, we have organism and then tag. For example, we
+might have configurations available for different human genome assemblies
+(hg19, hg38) and in the central location we needed to differentiate between
+them (e.g. ``references/human/hg19/``), which we did with tags.
+
+If we assume a single organism per workflow, which seems reasonable and that
+the references are workflow-specific, then we don't need any of this.
+``workflows/<workflow>/references/genome.fa`` for example should cover it.
+
+This becomes inefficient in the case where there are multiple workflows, all
+for the same organism and all the same workflow type. For example, a project
+with chipseq and a two different RNA-seq experiments would have three copies of
+the genome fasta. However in such cases, manually creating symlinks can get
+around this if space is a problem, and I think it's an acceptable workaround
+for the benefit of simplified references more generally.
+
+So we might have something like the following:
+
+::
+
+  workflows/rnaseq/references
+    genome.fasta
+    genome.chromsizes
+    rrna.fasta
+    annotation.gtf
+    annotation.bed12
+    annotation.refflat
+    transcriptome.fasta
+    star/
+      genome.fasta <symlink to ../genome.fasta>
+      <star files>
+    bowtie2/
+      rrna.fasta <symlink to ../rrna.fasta>
+      <bowtie2 files>
+    salmon/
+      transcriptome.fasta <symlink to ../transcriptome.fasta>
+      <salmon files>
+
+For ChIP-seq:
+
+::
+
+  workflows/chipseq/references
+    genome.fasta
+    genome.chromsizes
+    bowtie2/
+      genome.fasta <symlink to ../genome.fasta>
+      <bowtie2 files>
+
+Zipping/unzipping references
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Some tools need uncompressed files, others are fine with compressed. For example,
+STAR requires uncompressed FASTA and GTF files to build the index, but bowtie2
+can use a compressed fasta. gffread nees uncompressed FASTA and GTF to make
+a transcriptome fasta.
+
+Previously, anything using a FASTA or GTF would use the uncompressed version,
+and the ``unzip`` rule marked the uncompressed output as temporary. The problem
+with this was when we wanted to make a change in featureCounts. Since this used
+the temp uncompressed GTF file, the ``unzip`` rule needed to run again...but
+that would then trigger the STAR rule to rerun, because it too used that temp
+file and it was being changed (well, re-created but that's the same to
+Snakemake). As a result, we had to spend the time/resource cost to realign
+*everything* and all the downstream jobs after alignment, just to run
+featureCounts.
+
+Making the featureCounts rule use the compressed GTF avoids this issue.
+However, the transcriptome fasta and the STAR index need the uncompressed
+references. During testing, there were multiple times when the entire workflow
+needed to run because a file marked as temporary was transiently needed. Upon
+closer inspection, this was correct behavior, but it happened enough for subtle
+reasons that, to avoid future confusion, we keep both compressed and
+uncompressed.
+
+Annotations
+~~~~~~~~~~~
+
+We use the most comprehensive annotations. For human and mouse, this is the
+GENCODE "comprehensive" annotation for the primary assembly, which will include
+many more than just protein-coding transcripts. For example, here are the
+frequencies of ``transcript_type`` values in GENCODE v19's comprehensive
+annotation:
+
+::
+
+  1726632 protein_coding
+  214952 nonsense_mediated_decay
+  154780 processed_transcript
+  135772 retained_intron
+   54584 lincRNA
+   44207 antisense
+   22976 processed_pseudogene
+   15313 pseudogene
+   11202 unprocessed_pseudogene
+    9477 miRNA
+    7090 transcribed_unprocessed_pseudogene
+    6149 misc_RNA
+    5783 snRNA
+    4521 snoRNA
+    3148 sense_intronic
+    1662 polymorphic_pseudogene
+    1610 rRNA
+    1430 unitary_pseudogene
+    1417 sense_overlapping
+    1117 IG_V_gene
+    1091 transcribed_processed_pseudogene
+    1035 non_stop_decay
+     755 TR_V_gene
+     681 IG_V_pseudogene
+     300 TR_J_gene
+     185 IG_C_gene
+     152 IG_D_gene
+     100 3prime_overlapping_ncrna
+      99 TR_V_pseudogene
+      80 IG_J_gene
+      66 Mt_tRNA
+      56 TR_C_gene
+      36 IG_C_pseudogene
+      12 TR_J_pseudogene
+      12 TR_D_gene
+       9 IG_J_pseudogene
+       6 Mt_rRNA
+       3 translated_processed_pseudogene
+
+Erring on the side of too many annotations (i.e., using the comprehensive
+annotation instead of a curated version) will result in more features, which at
+face value might make the FDR adjustment more harsh in DESeq2. But DESeq2's
+independent filtering (not even testing those features with so few reads that
+they would not reach significance) guards against this. So we stick with the
+comprehensive annotations when available.
+
+.. _decisions-patterns:
+
+Patterns/targets
+----------------
+
+Previously, we had a system of "patterns", which were the string filenames with
+wildcards, stored in a separate yaml file. The original idea was to provide
+flexibility in reorganizing outputs -- if you didn't like where things were
+stored (e.g., if you didn't want your files to always have ``{sample}`` in the
+basename), then you could edit that one file and everything would be updated.
+
+The config system would fill in the patterns so that you also had a list of the
+filled-in targets. This was mildly convenient for aggregation rules like
+multiqc that use lots of inputs.
+
+It was also useful in integrative workflows (e.g., making figures using results
+from ChIP-seq and RNA-seq workflows), where you could use the single patterns
+yaml file as a record of all the files created. This made writing rules and
+keeping track of input/output files a bit easier:
+
+.. code-block:: python
+
+  rule downstream_figure:
+      input: c.targets["bam"]
+      output: "fig1.pdf"
+
+instead of:
+
+.. code-block:: python
+
+  rule downstream_figure:
+      input: expand('../rnaseq/data/{sample}/{sample}.cutadapt.markdups.bam', sample=SAMPLES)
+      output: "fig1.pdf"
+
+However, over the years, this system proved to be too obscure, hampering
+understandibility of the workflows. I'm not aware of anyone making changes to
+it to modify the output locations. In fact, over the years we realized that the
+consistency of output directory structure across hundreds of projects is
+a *major* benefit, so we specifically *don't* want to change output locations.
+
+So in version 2.0, this system has been completely removed, preferring to use
+hard-coded filenames and plenty of ``expand()`` calls.
+
+Advanced users can always create their own patterns yaml files to use in
+downstream work.
+
+.. _decisions-params:
+
+Params
+------
+
+The ``params:`` directive allows `non-file parameters for rules
+<https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#non-file-parameters-for-rules>`__.
+Much (perhaps all?) of what can be done in a ``params:`` directive can also be
+done in the body of ``run:`` block. On one hand, it can be nice to have a plain
+string ``shell:`` block, and put the complexity in the params. But on the other
+hand, sometimes it is harder to follow what's happening in params than it would
+be in Python in a ``run:`` block.
+
+This section talks about when and why we use params in lcdb-wf.
+
+One of the nice things sbout Snakemake is that the rules (in ``shell:`` blocks)
+can be quite close to the equivalent command-line call. Since rules in these
+Snakefiles are intended to be edited, it makes sense to keep them as close to
+the command-line as is reasonable.
+
+Take the cutadapt rule for example, where we typically would want to include
+the adapters but it's not uncommon to add other arguments. Here
+we're working with a simplified, single-end version of it:
+
+.. code-block:: python
+
+  rule cutadapt:
+      input:
+          fastq='{sample}.fastq.gz"
+      output:
+          fastq='{sample}.cutadapt.fastq.gz'
+      threads: 8
+      shell:
+          "cutadapt "
+          "-o {output[0]} "
+          "-j {threads} "
+          "--nextseq-trim 20 "
+          "--overlap 6 "
+          "--minimum-length 25 "
+          "-a AGATCGGAAGAGCACACGTCTGAACTCCAGTCA "
+          "{input.fastq[0]} "
+          "&> {log}"
+
+
+Here's an extreme way of adding params where we pull out each argument into
+a separate params item. This isn't very flexible and has lots of repetition, so
+we probably don't want this::
+
+.. code-block:: python
+
+  rule cutadapt:
+      input:
+          '{sample}.fastq.gz"
+      output:
+          '{sample}.cutadapt.fastq.gz'
+      threads:
+          8
+      params:
+          nextseq_trim="--nextseq-trim 20",
+          overlap="--overlap 6",
+          minimum_length=25,
+          a="AGATCGGAAGAGCACACGTCTGAACTCCAGTCA",
+      shell:
+          "cutadapt "
+          "-o {output} "
+          "-j {threads} "
+          "{params.nextseq_trim} "
+          "{params.overlap} "
+          "{params.minimum_length} "
+          "{params.a} "
+          "{input} "
+          "&> {log}"
+
+But we could add the arguments to be a single "extra" string and store that
+in params, like this:
+
+.. code-block:: python
+
+  rule cutadapt:
+      input:
+          '{sample}.fastq.gz"
+      output:
+          '{sample}.cutadapt.fastq.gz'
+      threads:
+          8
+      params:
+          extra=(
+              "--nextseq-trim 20 "
+              "--overlap 6 "
+              "--minimum-length 25 "
+              "-a AGATCGGAAGAGCACACGTCTGAACTCCAGTCA "
+          )
+      shell:
+          "cutadapt "
+          "-o {output} "
+          "-j {threads} "
+          "{params.extra} "
+          "{input} "
+          "&> {log}"
+
+One thing that's nice about this is that the "changeable things" are visually in
+a different location. When running Snakemake with `-p` then the params will be
+filled in to make one long string, which we could use for debugging.
+
+But we want to support single- and paired-end reads, and the arguments to
+cutadapt depend on that. Here's the actual rule:
+
+.. code-block:: python
+
+  rule cutadapt: input:
+          fastq=expand("data/rnaseq_samples/{{sample}}/{{sample}}_R{n}.fastq.gz", n=n),
+      output:
+          fastq=expand(
+              "data/rnaseq_samples/{{sample}}/{{sample}}_R{n}.cutadapt.fastq.gz", n=n
+          ),
+      log:
+          "data/rnaseq_samples/{sample}/{sample}_cutadapt.fastq.gz.log",
+      threads: 6
+      resources:
+          mem="2g",
+          runtime="2h",
+      run:
+          if is_paired:
+              shell(
+                  "cutadapt "
+                  "-o {output[0]} "
+                  "-p {output[1]} "
+                  "-j {threads} "
+                  "--nextseq-trim 20 "
+                  "--overlap 6 "
+                  "--minimum-length 25 "
+                  "-a AGATCGGAAGAGCACACGTCTGAACTCCAGTCA "
+                  "-A AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT "
+                  "{input.fastq[0]} "
+                  "{input.fastq[1]} "
+                  "&> {log}"
+              )
+          else:
+              shell(
+                  "cutadapt "
+                  "-o {output[0]} "
+                  "-j {threads} "
+                  "--nextseq-trim 20 "
+                  "--overlap 6 "
+                  "--minimum-length 25 "
+                  "-a AGATCGGAAGAGCACACGTCTGAACTCCAGTCA "
+                  "{input.fastq[0]} "
+                  "&> {log}"
+              )
+
+Notice that we have some shared arguments (``--nextseq-trim``, ``--overlap``,
+``--minimum-length``) as well as a PE-specific adapter argument. Converting
+this one to params would be something like the following:
+
+.. code-block:: python
+
+  rule cutadapt: input:
+          fastq=expand("data/rnaseq_samples/{{sample}}/{{sample}}_R{n}.fastq.gz", n=n),
+      output:
+          fastq=expand(
+              "data/rnaseq_samples/{{sample}}/{{sample}}_R{n}.cutadapt.fastq.gz", n=n
+          ),
+      log:
+          "data/rnaseq_samples/{sample}/{sample}_cutadapt.fastq.gz.log",
+      threads: 6
+      resources:
+          mem="2g",
+          runtime="2h",
+      params:
+          shared=(
+               "--nextseq-trim 20 "
+               "--overlap 6 "
+               "--minimum-length 25 "
+               "-a AGATCGGAAGAGCACACGTCTGAACTCCAGTCA "
+          ),
+          se_pe_specific=(
+                 "-A AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT "
+          ) if is_paired else ""
+      run:
+          if is_paired:
+              shell(
+                  "cutadapt "
+                  "-o {output[0]} "
+                  "-p {output[1]} "
+                  "-j {threads} "
+                  "{params.shared} "
+                  "{params.se_pe_specific} "
+                  "{input.fastq[0]} "
+                  "{input.fastq[1]} "
+                  "&> {log}"
+              )
+          else:
+              shell(
+                  "cutadapt "
+                  "-o {output[0]} "
+                  "-j {threads} "
+                  "{params.shared} "
+                  "{params.se_pe_specific} "
+                  "{input.fastq[0]} "
+                  "&> {log}"
+              )
+
+Note in this case we need to provide ``-o`` and ``-p`` arguments
+separately for paired-end. So we still need to have the ``if is_paired`` clause
+in the body of the rule. This one could be a little bit confusing with the
+``se_pe_specific`` clause, but otherwise it supports both SE and PE.
+
+What if we split that out into params as well, so that everything SE or PE
+specific is handled there?
+
+.. code-block:: python
+
+  rule cutadapt:
+      input:
+          fastq=expand(
+              "data/chipseq_samples/{sample}/{sample}_R{n}.fastq.gz",
+              n=n, allow_missing=True),
+      output:
+          fastq=expand(
+              "data/chipseq_samples/{sample}/{sample}_R{n}.cutadapt.fastq.gz",
+              n=n, allow_missing=True),
+      log:
+          "data/chipseq_samples/{sample}/{sample}_cutadapt.fastq.gz.log",
+      threads: 6
+      resources:
+          mem="2g",
+          runtime="2h",
+      params:
+          extra=(
+              "--nextseq-trim 20 "
+              "--overlap 6 "
+              "--minimum-length 25 "
+              "-a AGATCGGAAGAGCACACGTCTGAACTCCAGTCA "
+          ),
+          se_pe_specific=(
+              "-o {output[0]} "
+              "-p {output[1]} "
+              "-A AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT "
+              "{input.fastq[0]} "
+              "{input.fastq[1]} "
+              if not is_paired else
+              "{input.fastq[0]} "
+              "-o {output[0]} "
+          )
+      shell:
+          "cutadapt "
+          "-j {threads} "
+          "{params.se_pe_specific} "
+          "{params.extra} "
+          "&> {log}"
+
+Now it becomes a little harder to understand what's going on, and we may have
+gone too far in pulling everything out into params. So maybe an absolute
+principle of "everything must go in params" is not useful because it impacts
+clarity.
+
+Let's take another example, the featureCounts rule for RNA-seq:
+
+.. code-block:: python
+
+  rule featurecounts:
+      input:
+          annotation=rules.gtf.output,
+          bam=rules.markduplicates.output.bam,
+      output:
+          "data/rnaseq_samples/{sample}/{sample}_featurecounts.txt",
+      log:
+          "data/rnaseq_samples/{sample}/{sample}_featurecounts.txt.log",
+      threads: 8
+      resources:
+          mem="16g",
+          runtime="2h",
+      params:
+          strand_arg={
+              "unstranded": "-s0",
+              "fr-firststrand": "-s2",
+              "fr-secondstrand": "-s1",
+          }[config["stranded"]],
+          se_pe_specific=(
+            "-p --countReadPairs" if is_paired
+            else ""
+          ),
+          extra="",
+      run:
+          shell(
+              "featureCounts "
+              "{params.strand_arg} "
+              "{params.se_pe_specific} "
+              "{params.extra} "
+              "-T {threads} "
+              "-a {input.annotation} "
+              "-o {output} "
+              "{input.bam} "
+              "&> {log}"
+          )
+
+Here, it is important to have ``strand_arg`` be in the params. To understand
+why, imagine if we determined that argument inside the ``run:`` block instead
+of in params, and then we changed the config file's stranded entry
+(``config["stranded"]``). Even though we would want it to re-run (since the
+config changed), this rule would NOT re-run because the *code* didn't change --
+Snakemake does not *evaluate* the code in a ``run:`` block to determine if it
+changed. However, it *does* evaluate the params. So in this case, it's
+necessary to keep the strand argument detection in the params to take advantage
+of this behavior, and correctly re-run the rule if the config's strand argument
+has changed.
+
+Next, we would want to decide whether *all* arguments should go in ``params:``.
+In this case, since we're sort of forced to split out ``strand_arg``, we might
+as well split everything out.
+
+In the end we have these observations:
+
+- strand-specific arguments *must* be in ``params:``
+- some tools have SE/PE-specific arguments. These need an ``if`` clause
+  *somewhere*, whether in a ``run:`` block or in ``params:``
+- understandability and configuration flexibility are important goals of lcdb-wf
+- factoring out *everything* into params weakens understandibility
+
+
+Guidelines:
+
+- Stranded arguments must be in params
+- SE/PE arguments should be handled inside a ``run:`` block
+- Any other arguments should be written in a  ``shell:`` block or a ``shell()``
+  call directly, to visually match the equivalent command-line call and to make
+  it clear what should be edited.
+
+Lack of sample-specific parameters
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Currently if we have samples with different library preps that need different
+arguments for cutadapt, then they need to be split into two separate workflow
+directories and the Snakefiles edited accordingly to have the correct parameters
+for rules.
+
+Supporting sample-specific parameters would certainly be possible. But this
+would go against the goal of reducing complexity.
+
+For example, we'd need a location to store multiple sets of parameters (probably
+in the config file) and a mechanism to retrieve them based on sample names. This
+could be an additional column in the sampletable indicating "parameter sets".
+Then we could create a lookup table in the config storing the different
+parameter sets, with each set containing parameters for all rules. We'd need to
+handle default params in case they weren't specified. Then we'd need to have
+each rules' ``params:`` directive do the lookup in a sample-specific manner,
+which would be a lookup function in :file:`lib/utils.py`.
+
+Again, this would all be possible. But to reduce complexity it is a deliberate
+design choice to opt for a simpler approach: use multiple workflow directories
+and edit the respective Snakefiles appropriately. In cases where samples need to
+be compared or considered together across the workflows, an additional workflow
+can be introduced to aggregate their output.
+
+featureCounts all-in-one or individually
+----------------------------------------
+
+featureCounts can accept a list of BAMs and run everything in one shot, or can
+be run once per sample and then those outputs can be aggregated later. Previously, we
+provided all BAMs to a single all-in-one call of featureCounts. However, for
+paired-end BAMs, featureCounts will internally name sort each BAM before
+counting. It does this serially. The result is possibly substantial memory
+usage and a lot of time.
+
+One approach could be to temporarily name-sort BAMs in a separate rule,
+conditional on paired-end reads, and the featureCounts rule would need to have
+conditional input filenames as well. This adds a little bit of complexity for
+the benefit of being able to more finely control resource usage. Another
+approach would be to run featureCounts independently on each BAM, allowing it
+to name-sort independently each one (which would happen in parallel jobs
+managed by Snakemake), and then manually aggregate the featureCounts output of
+each.
+
+Turns out the conditional inclusion of a namesorted rule was straightforward (a
+matter of choosing the input file for featureCounts rule), it made the most
+sense to run featureCounts once, providing it all samples, and having it use
+the temporarily name-sorted BAMs as input for paired-end experiments.
+
+
+.. _decisions-testframework:
+
+Test framework
+--------------
+
+I had previously thought that the CircleCI tests were annoying to run and
+reproduce locally, so the ``tests/lcdb-wf-test`` script was born. Turns out
+that got rather complicated, and ended up being just as annoying. In the spirit
+of reducing complexity, that test harness script is removed. In part, the new
+reference config simplification allows control over configs from the
+commandline, reducing the need to handle that from the test script.
+
+rRNA
+----
+Assessing ribosomal RNA contamination is an important QC step. Different
+annotation sources have different ways of indicating ribosomal RNA. For example,
+Ensembl GTF files typically have "trancript_biotype" attributes on transcript
+featuretypes and "gene_biotype" attributes on gene features, depending on
+version (older versions have separate rRNA featuretypes). FlyBase uses separate
+rRNA feature types. Dictyostelium does not have anything in the GTF. PomBase
+uses the "biotype" attribute.
+
+One way of handling this is to have post-processing steps that extract the rRNA
+features from a GTF (probably defaulting to assuming an Ensembl-like
+"gene_biotype" attribute) and convert them to `IntervalList format
+<https://samtools.github.io/htsjdk/javadoc/htsjdk/htsjdk/samtools/util/IntervalList.html>`__
+to pass to Picard CollectRnaSeqMetrics.
+
+Another way is to bypass the GTF altogether and align to rRNA directly, which is
+what we have historically done here. Previously, the reference configs would all
+need an rRNA entry that basically did the same thing for each organism, since
+every model organism we've worked with is in the SILVA database. It would
+download the full SILVA fasta (for large and small subunits), grep out the
+records for our species of interest, and build a bowtie2 index out of that. That
+means this method is more general, and arguably more complete, but has its own
+complexity: we need to download and filter the fasta, build the bowtie2 index,
+and aggregate the results into a MultiQC module.
+
+In the 2.0 refactor, rRNA fasta creation now only needs an organism name and the
+Snakefile does what was always in the references config, which is to use the
+post-process mechanism to filter the fasta.
+
+
+
+Aligners
+--------
+
+Previously, HISAT2 and STAR were both supported; salmon and kallisto were both
+supported. This created additional complexity in the references workflow and in
+the configs. Now, we're just using STAR and salmon (for RNA-seq) and bowtie2 for
+ChIP-seq.
+
+Aligners don't seem to make that much of a difference, and officially
+supporting just one (plus a psueodaligner for RNA-seq) makes the workflows and
+config simpler.
+
+
+.. _decisions-sample-specific-params:
+
+
+PEP support
+-----------
+
+Support for `Portable Encapsulated Projects
+<http://pep.databio.org/spec/specification/>`__ is built into Snakemake. Using
+a combination of PEP config files, sample tables, and subsample tables, it is
+possible to set up the workflows to use PEP in such a way that it can be
+backwards-compatible with prior lcdb-wf versions. Specifically, by providing
+TSV sampletables, forcing a sample column name, and populating the table with
+subsamples. It would be convenient to offload the complexity of handling
+technical replicate configuration to a third-party package.
+
+However, getting technical replicates to work correctly proved to be tricky,
+due to the way they come in as lists in the resulting dataframe with PEP. While
+it would be possible to fix this, some initial experimentation with this
+suggested that it would actually be more complex to do that, so deferring to
+another package did not result in a net gain in convenience or in complexity
+reduction.
+
+PEP configs are not ruled out completely, but we might need a rewiring and
+possible rewriting of the ChIP-seq (and possibly RNA-seq) workflows to fully
+support PEP subsamples. I don't consider that effort to be worth it right now,
+especially because the current config system already supports technical
+replicates.
+
+.. _decisions-techreps:
+
+Technical replicates
+--------------------
+In practice, it's not uncommon for something to go wrong in library prep or
+sequencing such that it makes sense to re-do a library. Typically, if it's just
+resequencing the same library (perhaps after rebalancing the multiplexing), we
+consider that a technical replicate.
+
+The conventional method for handling technical replicates in RNA-seq is to sum
+the counts. That is, we take the Salmon or featureCounts files, where technical
+replicates are quantified separately, and sum them after import into R. This
+allows us to check QC on individual tech reps e.g. to see if they worked. If we
+merged at an early stage (like cocatting the FASTQs), then we would not be able
+to check QC separately.
+
+For ChIP-seq, the conventional method is to merge BAM files. However, we still
+want to keep observability of individual technical replicates where possible,
+which includes inspecting duplicates. However, when we merge BAMs of technical
+replicates that each had duplicates removed, it's possible that we're
+introducing additional duplicates. So we do another round of duplicate removal
+after merging.
+
+The end result of all of this is that we get MultiQC output for all of the
+technical replicates separately. For ChIP-seq, the post-merging files are
+bigWigs and merged-and-deduped BAMs. Currently these do not have separate
+entries in MultiQC.
+
+Removing built-in support for plotFingerprint
+---------------------------------------------
+
+deepTools' `plotFingerprint
+<https://deeptools.readthedocs.io/en/develop/content/tools/plotFingerprint.html>`__
+needs matched input to each antibody. Previously, we configured this in the
+sampletable with the combination of "biological_material" and "antibody"
+columns. Samples with exactly "input" as the antibody were the matched control
+for the non-input samples with the same biological material.
+
+This ended up being a little complicated because "biological material" is easily
+confused with "biological replicate". And now with common CUT&RUN and Cut&Tag
+assays that use IgG as control, "IgG" and "control" should probably be aliases
+for "input".
+
+It turns out the "biological_material" column was only ever used for the
+plotFingerprint rule. It introduced complexity (in code, configuration,
+documentation, and user support) for a single rule. In addition, in practice we
+ended up visualizing the bigWigs rather than relying exclusively on the
+plotFingerprint metrics. So to reduce complexity, plotFingerpring support is
+being removed.
+
+Clearer ChIP-seq config
+-----------------------
+
+For "label", it was not clear that it was the merged name. And even if there
+were no technical replicates in an experiment, it still needed to be filled out
+with copies of the sample name.
+
+Now, ``merged_label`` is an alias for ``label``. If the column is missing
+entirely, or if the value is empty for a row, then the samplename will be used
+automatically.
+
+Removal of autobump
+-------------------
+For several versions, resources were wrapped with the ``autobump()`` function,
+which would automatically retry jobs with more resources if they failed. Turns
+out this wasn't as helpful as expected, because errors (like syntax errors or
+other mistakes) ended up being a lot more frequent than exceeding resources.
+This resulted in escalating resource allocations and longer run time with no
+need. So the autobump was removed.
+
+Cleanup of lib/utils.py
+-----------------------
+We had accumulated a lot of useful functions over time, but things have changed
+enough that they haven't been used. To avoid clutter and additional maintenance
+burden in supporting otherwise unused code, these functions were removed.
+
+Variant-calling workflow
+------------------------
+
+lcdb-wf originally started as a way of combining RNA-seq and ChIP-seq
+experiments for different model organisms. It was important to have an
+organism-agnostic system that could be highly customized.
+
+An experimental variant calling workflow was added. This worked well for human
+and, with some effort, mouse. However, it was difficult to support an
+organism-agnostic workflow for variant annotation which depends on large
+amounts of downloaded annotation data that is typically very human-focused.
+
+All that said, it is useful to have an organism-agnostic variant-calling
+workflow that at least does everything upstream of annotation and that can be
+integrated with RNA-seq and ChIP-seq if needed.
+
+This will be added back in to lcdb-wf v2 at a later point once it is refactored
+to be similar to the other workflows (i.e., following the decisions outlined in
+this document when possible).
+
+For human variant calling, consider the `snakemake workflows
+dna-seq-gatk-variant-calling
+<https://github.com/snakemake-workflows/dna-seq-gatk-variant-calling>`__
+workflow, the `nf-core/raredisease <https://nf-co.re/raredisease>`__ NextFlow
+pipeline, or the `nf-core/sarek <https://nf-co.re/sarek/>`__ NextFlow pipeline.
diff --git a/docs/developers.rst b/docs/developers.rst
deleted file mode 100644
index fc45b00d7..000000000
--- a/docs/developers.rst
+++ /dev/null
@@ -1,116 +0,0 @@
-For Developers
-==============
-
-Creating and updating conda envs
---------------------------------
-
-The ``env.yml`` and ``env-r.yml`` files contain fully-pinned versions of the
-environments. This hopefully helps with stability and can dramatically speed up
-the creation of environment. However these env definitions periodically need to
-be updated.
-
-To do so, create new environments using the unpinned versions in
-``include/requirements.txt`` and ``include/requirements-r.txt``. This may take
-substantially longer to create.
-
-Then run the tests (:ref:`running-the-tests`) using those environments.
-
-If all tests pass, then export the newly-created environments to the
-``env.yml`` and ``env-r.yml`` files.
-
-When you commit and push those files, the CI/CD system will detect that they
-are different and will trigger a re-build of the cached environments and
-proceed with the tests using those new environments.
-
-Running the full complex datasets
----------------------------------
-
-Prior to a release, the complex datasets should be run. These do a more
-extensive job in testing the corner cases. This should be run on a cluster or
-a machine with substantial resources. The configs can be found in
-``include/test``. Here is how to run it using the WRAPPER_SLURM:
-
-.. code-block:: bash
-
-    sbatch ../../include/WRAPPER_SLURM \
-      --configfile ../../test/test_configs/complex-dataset-rnaseq-config.yaml \
-      --config sampletable=../../test/test_configs/complex-dataset-rnaseq-sampletable.tsv
-
-Module documentation
---------------------
-
-.. toctree::
-   :maxdepth: 2
-
-   lib.common
-   lib.chipseq
-   lib.patterns_targets
-
-
-Adding a new aligner
---------------------
-
-Modules
-^^^^^^^
-
-In `lib/common.py`, there is a function `references_dict`. Within that is
-a `index_extensions` dictionary. You'll need to add the name of the aligner and
-the extension of the index it creates. If it creates multiple index files, just
-one should be sufficient. The filename will be automatically created and will
-be used as the expected output file which can then be accessed from the
-references dict as `references_dict[organism][tag][aligner]` for use in various
-rules that need the index as input (that is, any mapping rules).
-
-Configuration
-^^^^^^^^^^^^^
-
-- add the aligner to the `include/reference_configs/test.yaml` config file,
-  "indexes:" section.
-
-- write a rule in `workflows/references/Snakefile` to build the index. Use the
-  other index-building rules there as a guide.
-
-- Depending on which type of workflow the aligner is appropriate for, add
-  a rule there. Enclose it in an "if:" clause to only run if the config file
-  has specified that aligner.
-
-- add the name to the list of supported aligners in `docs/config-yaml.rst`, in
-  the "Aligner config" section.
-
-- add appropriate memory/time requirements to the rule for that aligner.
-
-Testing
-^^^^^^^
-
-- For testing, create a copy of the config for any workflows it is used for,
-  and change only the aligner.
-
-- Modify `.circleci/config.yml` to include a new block in each of the
-  variables, jobs, and workflows sections. Use the `rnaseq-star` blocks as
-  a guide for this. The idea is to only run up through the aligner step in
-  a parallel task (to save on CI build time).
-
-
-.. _new-peak-caller:
-
-Adding a new peak-caller
-------------------------
-
-First, write a wrapper for the peak-caller. You can use the ``macs2``, ``spp``,
-and ``sicer`` wrappers as a guide. A wrapper should expect one or more sorted
-and indexed BAM files as IP, one or more sorted and indexed BAM files as input.
-The wrapper should create at least a sorted BED file of peaks, and can
-optionally create other supplemental files as well.
-
-Next, add the peak-caller to the top of ``lib/patterns_targets.py`` in the
-``PEAK_CALLERS`` list.
-
-Then write a rule for the peak-caller, again using ``macs2``, ``spp``, or
-``sicer`` rules as a guide.
-
-Last, add additional lines in
-``workflows/chipseq/config/chipseq-patterns.yaml`` for the
-``patterns_by_peaks`` key.
-
-To test or use, add the new peak-caller to the
-``workflows/chipseq/config/config.yaml`` file's ``peak_calling`` key.
diff --git a/docs/downstream-rnaseq.rst b/docs/downstream-rnaseq.rst
deleted file mode 100644
index 73a9504c9..000000000
--- a/docs/downstream-rnaseq.rst
+++ /dev/null
@@ -1,82 +0,0 @@
-.. _downstream:
-
-RNA-Seq downstream analysis
-===========================
-
-In a typical RNA-seq analysis, it is relatively straightforward to go from raw
-reads to read counts in features to importing them into R. After that however,
-expression analysis gets a bit more complicated and highly depends on the
-design of the experiment.
-
-We attempted to strike the balance between simplicity -- where as much
-configuration as possible takes place via a config file -- and flexibility
-where the R code can be modified as needed depending on the project.
-
-This file is ``workflows/rnaseq/downstream/rnaseq.Rmd``. It uses a separate
-conda environment that just has the R dependencies. It is rendered via
-``knitr`` to create an HTML file. The inputs for the rule are the featureCounts
-output, the sample table, the ``lib/lcdbwf`` R package, and the Rmd.
-
-.. warning::
-
-   This RMarkdown file is **intended to be edited and customized per experiment**.
-
-
-How to use this code
-~~~~~~~~~~~~~~~~~~~~
-
-1. Activate the ``env-r`` conda environment (created as part of setting up the
-   `lcdb-wf` deployment)
-
-2. Edit the :file:`workflows/rnaseq/downstream/config.yaml` file. It is
-   heavily commented and should be self-explanatory.
-
-3. Customize the contrasts you want to run (see below for details on this)
-
-4. From the :file:`workflows/rnaseq/downstream` directory, run
-   ``rmarkdown::render("rnaseq.Rmd")`` to get :file:`rnaseq.html`
-
-Here are some additional notes:
-
-- Many of the code chunks have the ``cache=TRUE`` option to speed up
-  re-rendering and make iterative development quicker. When everything's in
-  a final state, you may want to delete the ``rnaseq_cache`` directory and
-  re-run.
-
-- Many of the cached code chunks also specify a config argument. These config
-  items are taken from the :file:`config.yaml` file living alongside the
-  :file:`rnaseq.Rmd`. If a cached chunk specifies a config option, and the
-  value in the config file changes, the chunk will be re-run because its cache
-  is invalidated.
-
-- As with many analyses in R, the work is highly iterative. You may want to
-  consider using an interactive interpreter, either via the command line or
-  RStudio. To ensure that RStudio is using the same packages as the workflows,
-  you should set the ``RSTUDIO_WHICH_R`` environment variable.
-
-  The easiest way to do this is to activate the conda environment you're using
-  for the analysis, then export the identified location of R to that variable:
-
-  .. code-block:: bash
-
-      source activate lcdb-wf
-      export RSTUDIO_WHICH_R=$(which R)
-
-  On MacOS, you may additionally need the following:
-
-  .. code-block:: bash
-
-      launchctl setenv RSTUDIO_WHICH_R $RSTUDIO_WHICH_R
-
-  Then run RStudio, which should pick up the conda environment's version of R and
-  which will already have packages like DESeq2 installed in the environment.
-
-More details
-~~~~~~~~~~~~
-
-For more detailed documentation, see :ref:`downstream-detailed`.
-
-.. toctree::
-   :maxdepth: 2
-
-   rnaseq-rmd
diff --git a/docs/external.png b/docs/external.png
deleted file mode 100644
index 52aaab664..000000000
Binary files a/docs/external.png and /dev/null differ
diff --git a/docs/faqs.rst b/docs/faqs.rst
deleted file mode 100644
index 86d31cb05..000000000
--- a/docs/faqs.rst
+++ /dev/null
@@ -1,189 +0,0 @@
-FAQs
-====
-
-This page serves as a catch-all for details of various topics.
-
-
-.. _simultaneous-workflows:
-
-Can I run multiple workflows at once?
--------------------------------------
-
-Sometimes. While Snakemake creates a lockfile to prevent multiple instances
-running in the same directory, the fact that each workflow (rnaseq, chipseq,
-etc) are in their own subdirectory means that they will each have their own
-separate lockfiles and can be run.
-
-Be careful though, because both ChIP-seq and RNA-seq workflows include the
-references workflow. This means that if you have not yet already set up the
-references, the RNA-seq and ChIP-seq workflows may both attempt to write the
-references, potentially corrupting it.
-
-
-.. _multiple-experiments:
-
-How do I handle multiple experiments in the same project?
----------------------------------------------------------
-
-It's pretty common to have both RNA-seq and ChIP-seq experiments that need to
-be analyzed together. For example, we might have RNA-seq in two different mouse
-cell types, RNA-seq in a human cell type, ChIP-seq for different antibodies in
-those cell types, and all of this needs to be compared with publicly available
-data (say, other RNA-seq experiments from GEO). We need to make figures for the
-manuscript, so the figure-making code needs to be re-run if there are any
-changes to the primary analysis (new samples, parameter changes, etc).
-
-lcdb-wf is designed to handle all of this. There are a couple of limitations to
-lcdb-wf that will determine how best to split up workflows, and the subsections
-below will help you decide if you should consider an experiment as part of
-a different workflow.
-
-If an experiment needs to be considered as part of a different workflow, then
-make copies of the relevant workflow directory after deploying. Taking the
-above project as an example, immediately after deploying (using ``--flavor
-full`` so we get all supported workflows including RNA-seq and ChIP-seq), we
-have::
-
-    workflows/
-      chipseq/
-      colocalization/
-      external/
-      figures/
-      rnaseq/
-
-then we might rename the directory called "external" to match the GEO accession (to make it
-easier to remember), make copies of the RNA-seq directory for the mouse and
-human experiments, and clean up a little:
-
-.. code-block:: bash
-
-    rm -r workflows/colocalization
-    mv workflows/external workflows/GSE00112233
-    cp -r workflows/rnaseq workflows/mouse-rnaseq
-    cp -r workflows/rnaseq workflows/human-rnaseq
-    rm -r workflows/rnaseq
-
-then we would have::
-
-    workflows/
-      chipseq/
-      figures/
-      GSE00112233/
-      human-rnaseq/
-      mouse-rnaseq/
-
-See below for advice on when to split experiments into separate workflows.
-
-
-Samples need the same library layout
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-A single workflow must be either all single-end or all paired-end.
-
-If you have the same RNA-seq samples that were sequenced once SE and again PE,
-you'll need to have two different copies of the ``workflows/rnaseq`` directory
-(say, ``workflows/rnaseq-se`` and ``workflows/rnaseq-pe``). If they are to be
-combined in the same differential expression analysis (e.g., by modeling layout
-as a batch effect), then the adjust your downstream analysis in R to read in
-both counts tables (see :ref:`rnaseqrmd` for more).
-
-Samples need to use the same parameters
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-There is no mechanism for specifying sample-specific parameters. For example,
-to use cutadapt to trim some 5' bases from some samples but leave other samples
-alone. Samples that need to treated differently should be split off into
-a separate workflow, and the respective Snakefiles should be edited
-accordingly.
-
-.. note::
-
-    A partial exception to this is that the peak-calling for ChIP-seq supports
-    specifying custom parameters for each peak-calling run. For example, when
-    running macs2 you can specify "--nomodel" for a single peak-calling run, or
-    any other parameter supported by the peak-caller.
-
-    However, the BAM files used in peak-calling still need to have used uniform
-    parameters across samples, so if alignment or trimming options differ, you
-    should split each set of parameters into a different workflow.
-
-Samples must use the same assembly
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-There is no mechanism for specifying per-sample assemblies. Samples from
-different species need to go in different workflows. If you want to compare,
-say, hg19 and hg38, then you would make a copy of the workflow for each assembly
-and adjust the reference config accordingly for each workflow separately.
-
-.. _lowcounts:
-
-How are low counts handled during differential expression analysis? Should we use a read-count threshold to filter genes?
--------------------------------------------------------------------------------------------------------------------------
-Low count genes are handled during the normalization and analysis steps of DESeq2
-with sophisticated statistical models. Genes with low counts across the board are flagged
-as *low count outliers*, and the p-values are set to NA. Also genes with low counts
-are penalized by shrinking the ``log2FoldChange`` estimate. For example, a fold change of
-4 that came from 4 reads in the treatment group vs 1 read in the control, will be shrunken,
-as opposed to if the treatment had 2000 reads vs 500 in the control. As a result of this
-low-count correction, the ``log2FoldChange`` of genes clearing a false-discovery criterion
-can be used as a reliable metric for prioritizing candidate genes for follow-up experiments.
-In contrast, using an arbitrary fold-change cutoff could introduce biases that potentially
-violate modeling assumptions and introduce variables that we could not predict or control for.
-So, we do not recommend using count thresholds to filter differential expression analysis
-results to determine candidate genes for follow up.
-
-
-.. _troubleshooting:
-
-How do I troubleshoot failed jobs?
-----------------------------------
-Many rules have an explicit ``log:`` directive that defines where the log is
-written. These are typically in the same directory as the output files the rule
-creates, and this is the first place to check if something goes wrong.
-
-Some rules do not explicitly redirect to ``log:`` or may only redirect either
-stdout or stderr. Where this output ends up depends on if you're running
-locally or on a cluster.
-
-**When running locally,**  stdout and stderr will be included in the output
-from Snakemake, so check there.
-
-**If running on a cluster,** the default behavior is to send the main Snakemake
-output to ``Snakefile.log``.  The per-rule output depends on how it was sent to
-the cluster.  As described in the above section, by default stdout and stderr
-are sent to the ``logs`` directory, named after rule and job ID.
-
-**If a job fails on a cluster**:
-
-- Open ``Snakefile.log`` and search for ``Error``
-- Recent versions of Snakemake report the ``log:`` file (if any) and the
-  ``cluster_jobid:``. Keep track of these.
-- If ``log:`` was defined for the rule, check there first
-- If not, or if more information is needed, check
-  ``logs/<rulename>.{e,o}.<jobid>`` (which is how stderr and stdout are
-  configure when running with the ``include/WRAPPER_SLURM`` wrapper).
-
-For example, if we find the following error in ``Snakefile.log``::
-
-    [Tue Feb  6 20:06:30 2018] Error in rule rnaseq_rmarkdown:
-    [Tue Feb  6 20:06:30 2018]     jobid: 156
-    [Tue Feb  6 20:06:30 2018]     output: downstream/rnaseq.html
-    [Tue Feb  6 20:06:30 2018]     cluster_jobid: 60894387
-
-Then we would check ``logs/rnaseq_markdown.e.60894387`` and
-``logs/rnaseq_markdown.o.60894387`` for more information.
-
-
-.. _updating:
-
-How do I update my deployment?
-------------------------------
-
-If there are additional fixes or features in the main lcdb-wf repo that you
-want to propagate to your existing projects, the best way to do this is to
-clone a recent version and do the manual diffs between the new version and what
-you have on disk.
-
-To help narrow down the changes that have happened in the main lcdb-wf repo
-since you deplyed to a project, Use the ``.lcdb-wf-deployment.json`` file that
-is created when deploying to a project to find the commit hash that the
-deployment used.
diff --git a/docs/functional-enrichment-rmd.rst b/docs/functional-enrichment-rmd.rst
deleted file mode 100644
index 4b9f4465e..000000000
--- a/docs/functional-enrichment-rmd.rst
+++ /dev/null
@@ -1,122 +0,0 @@
-.. _functional-enrichment:
-
-Functional enrichment analysis
-==============================
-
-Let's say that the differential expression analysis of your RNA-Seq data
-yields hundreds of changed genes. While this is exciting as it shows that
-there is an effect of the treatment or mutant or drug, it also can be
-daunting, since it is extremely labor intensive to manually go through a
-bunch of genes, and assess their relevance to the biology that you're 
-interested in. 
-
-So, one of the first questions we usually have after performing a
-differential expression analysis, is: **what kind of genes are changing in 
-the experiment?**
-
-To answer this question, we perform functional enrichment analysis, where
-information about gene function (``annotation``) from various databases
-are collated and tested for enrichment using a `hypergeometric test 
-<http://en.wikipedia.org/wiki/Hypergeometric_distribution#Hypergeometric_test>`_.
-The databases implemented here by default are gene ontology (GO) and KEGG pathways,
-while Reactome pathways can be enabled if desired.
-
-cprofsetup
-----------
-Here we set up ``clusterProfiler`` parameters and output directories. The default
-parameters should work with almost all analyses. The only parameter that can be
-changed:
-
-+-----------+--------------------------------------------------------------------------------------------------+
-| parameter | description                                                                                      |
-+===========+==================================================================================================+
-| ``types`` | A list of keys to extract from the annotation database. The defaults should not be changed, but  |
-|           | additional keys can be added. For a list of keys for an orgDb object, run ``keyTypes(orgdb)``    |
-+-----------+--------------------------------------------------------------------------------------------------+
-
-``bitrgenes``
--------------
-Here we obtain a list of differentially expressed genes and obtain a mapping of
-these gene IDs to the terms listed in ``types`` above. Notable parameter to change:
-
-+---------------+-----------------------------------------------------------------------+
-| parameter     | description                                                           |
-+===============+=======================================================================+
-| ``from.type`` | This needs to be changed to match the source of gene IDs being used.  |
-|               | For instance, if the GTF file is downloaded from Ensembl, then this   |
-|               | should be set to ``ENSEMBL`` (default = ``'FLYBASE'``).               |
-+---------------+-----------------------------------------------------------------------+
-
-``enrichall``
--------------
-This is the main chunk where the functional enrichment analysis is performed. A set of 
-parameters for the analysis are set at the top of the chunk, but the only ones that should be
-changed based on the experiment are:
-
-+-----------------------+-----------------------------------------------------------------------------------+
-| parameter             | description                                                                       |
-+=======================+===================================================================================+
-| ``kegg.organism``     | This is set based on the organism being studied. For example, for *Homo sapiens*  |
-|                       | this should be ``'hsa'`` (default = ``'dme'``).                                   |
-+-----------------------+-----------------------------------------------------------------------------------+
-| ``RUN.REACTOME``      | Specify if Reactome analysis should be performed (default = ``FALSE``).           |
-+-----------------------+-----------------------------------------------------------------------------------+
-| ``reactome.organism`` | In contrast to KEGG, Reactome analysis needs the name of the species.             |
-|                       | For example, for *Homo sapiens* this should be ``'human'`` (default = ``'fly'``). |
-|                       | This only needs adjusting if ``RUN.REACTOME`` is ``TRUE``.                        |
-+-----------------------+-----------------------------------------------------------------------------------+
-
-This chunk performs GO, KEGG (and optionally Reactome) analyses for each element in the list of 
-DE genes, separately for up- and down-regulated genes. The results populate a list which is then
-output to files and used subsequently for visualization.
-
-.. warning::
-
-   Each list of DE genes is analyzed in multiple (3 GO ontologies + KEGG + optionally Reactome) separate ways
-   so for many lists, this chunk can be *very* time-consuming. However, with ``cache = TRUE``, 
-   this is only run once and subsequent runs are much faster.
-
-``plotgo``
-----------
-Here we visualize the functional enrichment analysis results using three different
-plots:
-
-- ``dotplot``
-
-   This represents highly enriched terms from the analysis with the color corresponding to
-   level of enrichment (adjusted p-value) and the size of the dots representing the
-   number of genes associated with the GO term or pathway.
-   
-   This is the easiest plot to read the enriched terms. By default the top 10
-   categories are shown.
-
-- ``emapplot``
-
-   The enrichment map plot is a network visualization of the enrichment analysis. Nodes in
-   the network represent GO terms or pathways and the connections (edges) between nodes
-   represent shared genes between the terms/pathways. The color of the node corresponds
-   to adjusted p-values, the size represent the number of genes associated with the term
-   and the thickness of the edge represents the number or fraction of genes shared between
-   the terms.
-   
-   This plot is good for identifying enriched pathways, since categories with
-   similar genes will cluster together. By default the top 30 categories are
-   shown.
-
-- ``cnetplot``
-
-   This plot extracts the complex associations of genes with multiple functional terms or
-   pathways. The size of the nodes for functional terms represent the number of associated
-   genes and the gene nodes can also be overlayed with observed fold-changes.
-   
-   This plot is good for identifying which genes act as links between which
-   categories. By default the top 5 categories are plotted.
-
-Each plot is also saved to file as PDF and a link included in the html summary report.
-
-``adjustcatlen``
-----------------
-Here we update the ``clusterProfiler`` results object by adding symbols corresponding to
-KEGG gene IDs. This is done so that human-readable gene names show in the visualizations.
-This will likely be moved to a helper function that is run automatically in a future release.
-
diff --git a/docs/gene-patterns-rmd.rst b/docs/gene-patterns-rmd.rst
deleted file mode 100644
index a8130708c..000000000
--- a/docs/gene-patterns-rmd.rst
+++ /dev/null
@@ -1,78 +0,0 @@
-.. _gene-patterns:
-
-Gene patterns analysis
-======================
-
-When analyzing RNA-Seq experiments that have a temporal element, e.g.
-time-series data where a treatment is applied for an increasing amount
-of time, or a dose-response aspect, e.g. where increasing amounts of a drug
-is administered to a set of cells, an interesting biological question can be:
-*which genes are changing in the same way over time, or in response to the
-drug treatment?*
-
-In these cases, it may be useful to look for groups of **co-expressed** genes, or genes
-that show similar trends in expression over time or the dose-response assay, as this might
-indicate a pathway or gene regulatory network is being affected. For instance,
-if the main effect of a treatment is to suppress a transcription factor that
-induces a group of genes, these genes would likely show a trend of decreasing
-expression over the course of a dose-response experiment.
-
-One approach to this analysis could be to compare each time-point or dose sample to
-the control sample. However, if we are working with many time-points or doses
-this can get unwieldy very quickly, as we would be dealing with that many lists
-of differentially expressed genes. Moreover, we would lose the temporal information.
-For instance, in a time-course experiment if a gene is up-regulated from 0 hr ⇒ 2 hr, 
-and also from 0 hr ⇒ 4 hr, we don't know how 4 hr compares to 2 hr, without comparing
-those time-points explicitly.
-
-An alternative approach that avoids the complications of a per-time-point analysis
-is a pattern-based analysis, which looks at the temporal trend of gene expression as a whole,
-to find similar groups of genes. For instance, in the above experiment, the pattern
-analysis would immediately tell us which genes keep going up from 0 ⇒ 2 ⇒ 4 hr,
-and which genes go up from 0 ⇒ 2 hr, but then don't change from 2 ⇒ 4 hr.
-
-In this Rmd, we implement the latter approach and look for groups of co-expressed or
-co-regulated genes, using gene patterns analysis with the R package, ``DEGreport``. The input to 
-this analysis is normalized expression data of differentially expressed genes.
-A clustering algorithm implemented in the package is then used to find similar groups
-of genes, which are then plotted.
-
-``dpsettings``
---------------
-
-This is the chunk where the parameters for the pattern analysis are specified.
-Notable parameters to adjust include:
-
-+-------------+----------------------------------------------------------------------------------------------------------------+
-| parameter   |  description                                                                                                   |
-+=============+================================================================================================================+
-| ``time``    | Factor to show on x-axis. Typically this is the time or dose-response factor in the experiment.                |
-|             | Should be a column in the ``colData`` metadata (default = ``'group'``).                                        | 
-+-------------+----------------------------------------------------------------------------------------------------------------+
-| ``col``     | Factor to color lines by. Should be a column in the ``colData`` metadata (default = ``NULL``).                 |
-+-------------+----------------------------------------------------------------------------------------------------------------+
-| ``minc``    | Minimum cluster size (default = 1). Consider increasing if you're getting many clusters with very few genes.   |
-+-------------+----------------------------------------------------------------------------------------------------------------+
-| ``lim``     | Maximum number of genes to include in the clustering (default = 2000). If number of DE genes is higher, they   |
-|             | are downsampled to ``lim``. Increase with caution since clustering a huge number of genes can be very          | 
-|             | CPU-intensive.                                                                                                 |
-+-------------+----------------------------------------------------------------------------------------------------------------+
-
-``finalclusters``
------------------
-
-This is the chunk where the pattern analysis is done via the following steps:
-
-1. Set up selections of DE genes to be used in the clustering analysis. By default,
-   we use all the ``changed`` genes for this purpose, but you can choose to only
-   use ``up`` or ``dn`` genes.
-2. The set of genes are consolidated and downsampled if the total number > ``lim``.
-3. Normalized data corresponding to these genes are extracted. We use the
-   ``varianceStabilizingTransformation`` for the normalization. Before proceeding,
-   genes with identical counts across all samples are removed, since this could lead
-   to errors in the clustering.
-4. The primary clustering command ``degPatterns`` is run, using the parameters specified
-   in the ``dpsettings`` chunk above.
-5. Clusters having > ``minc`` genes are filtered out and plotted, with the gene lists being
-   saved to files in the ``final_clusters`` subdirectory, while a column encoding the cluster
-   membership is added to the ``res.list`` element corresponding to the DE genes being analyzed.
diff --git a/docs/generate_guide.py b/docs/generate_guide.py
deleted file mode 100644
index 4f6fc1015..000000000
--- a/docs/generate_guide.py
+++ /dev/null
@@ -1,185 +0,0 @@
-import os
-from jinja2 import Template
-HERE = os.path.abspath(os.path.dirname(__file__))
-FILES = os.path.join(HERE, 'guide-to-files.txt')
-
-class File(object):
-    def __init__(self, fn):
-        self._fn = fn.strip()
-        self._desc = ""
-        self._padding = 0
-
-    @property
-    def fn(self):
-        f = self._fn
-        if f.endswith('/'):
-            cls = 'dir'
-            padding = f.count('/') - 2
-            f = os.path.basename(f.rstrip('/'))
-            f = ('&nbsp;' * padding * 3) + f + '/'
-        else:
-            cls = 'file'
-            d, f = os.path.split(f)
-            padding = d.count('/')
-            if self._fn.count('/') == 1:
-                padding = 0
-            f = ('&nbsp;' * padding * 3) + f
-
-        if '***' in self._desc:
-            cls += ' important'
-            self._desc.replace('***', '')
-        if not self.desc:
-            cls += ' undoc'
-        return '<span class="{0}">{1}</span>'.format(cls, f)
-
-    @property
-    def desc(self):
-        return self._desc.replace('*', '')
-
-    def __str__(self):
-        if self.desc:
-            return (
-                '<a href="https://github.com/lcdb/lcdb-wf/blob/master/{0}" '
-                'data-balloon="{1}" data-balloon-pos="right" '
-                'data-balloon-length="xlarge" style="text-decoration:none;">{2}</a>'
-                .format(self._fn, self.desc, self.fn)
-            )
-        return (
-            '<a href="https://github.com/lcdb/lcdb-wf/blob/master/{0}" '
-            'style="text-decoration:none;">{2}</a>'
-            .format(self._fn, self.desc, self.fn)
-        )
-
-
-def gen():
-    f = None
-    for line in open(FILES):
-        if line.startswith('/'):
-            # it's a filename
-            if f is not None:
-                yield f
-            f = File(line)
-        else:
-            f._desc += ' ' + line.strip()
-    if f is not None:
-        yield f
-
-files = list(gen())
-
-
-TEMPLATE = """\
-.. _guide:
-
-Guide to file hierarchy
-=======================
-
-The ``lcdb-wf`` workflow system is designed to have a standardized directory
-structure and file hierarchy to allow us to be as consistent across many diverse
-and disparate analyses and sources of data and reduce the overhead when it comes
-to troubleshooting when something goes wrong. All the components of the repository
-are laid out with this overarching design principle in mind.
-
-Below we give a high-level overview and brief description of the files and folders used
-by the workflows, and include an annotated directory tree highlighting the most important
-parts of the repository.
-
-Folder organization
-~~~~~~~~~~~~~~~~~~~
-
-The top level of the repo looks like this:
-
-::
-
-    [1]  ├── ci/
-    [2]  ├── docs/
-    [3]  ├── include/
-    [4]  ├── lib/
-    [5]  ├── README.md
-    [6]  ├── requirements-non-r.txt
-    [7]  ├── requirements-r.txt
-    [8]  ├── workflows/
-    [9]  └── wrappers/
-
-1. ``ci`` contains infrastructure for continuous integration testing. You don't
-   have to worry about this stuff unless you're actively developing `lcdb-wf`.
-
-2. ``docs/`` contains the source for documentation. You're reading it.
-
-3. ``include/`` has miscellaneous files and scripts that can be used by all
-   workflows. Of particular note is the ``WRAPPER_SLURM`` script (see
-   :ref:`cluster` for more) and the ``reference_configs`` directory (see
-   :ref:`references` and :ref:`config` for more).
-
-4. ``lib/`` contains Python modules used by the workflows.
-
-5. ``README.md`` contains top-level info.
-
-6. ``requirements-non-r.txt`` contains the package dependencies needed to run the
-   workflows, and is used to set up a conda environment.
-
-7. ``requirements-r.txt`` contains the package dependencies for R and various
-   Bioconductor packages used in downstream analysis. See :ref:`conda-envs` for the
-   rationale for splitting these.
-
-8. ``workflows/`` contains one directory for each workflow. Each workflow directory contains
-   its own ``Snakefile`` and configuration files. We go into more detail in the next section.
-
-9. ``wrappers/`` contains Snakemake `wrappers
-   <https://snakemake.readthedocs.io/en/stable/snakefiles/modularization.html#wrappers>`_,
-   which are scripts that can use their own independent environment. See
-   :ref:`wrappers` for more.
-
-Below, you can see a detailed overview of the files contained in these folders.
-
-
-Annotated tree
-~~~~~~~~~~~~~~
-
-The following is an annotated directory tree of the ``lcdb-wf`` repository to
-help orient you. Hover over files for a tooltip description; click a file to
-view the most recent version on GitHub.
-
-Files in bold are the most important.
-
-.. raw:: html
-
-    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/balloon-css/0.2.4/balloon.min.css">
-
-    <style>
-    .dir {
-        font-family: monospace;
-        font-size: 1em;
-    }
-    .file {
-        font-family: monospace;
-        font-size: 0.8em;
-    }
-    .important {
-        font-weight: bold;
-    }
-    .undoc {
-        color: #888;
-        }
-
-    </style>
-    {% for x in files %}
-    <p style="margin:0px;">{{ x }}</p>
-    {% endfor %}
-
-
-Now that you have seen which files and folders are the most important and have some idea
-of where everything lives, let's look at how to run tests to make sure everything is set up 
-correctly (see :ref:`running-the-tests`), or jump right in to learning about how to configure
-the workflows for your particular experiment (see :ref:`config`).
-
-"""#.format( '   '.join([i for i in open(FILES)]))
-
-
-def setup(*args):
-    t = Template(TEMPLATE)
-    contents = t.render(files=files)
-    with open('guide.rst', 'w') as fout:
-        fout.write(contents)
-
-if __name__ == "__main__":
-    setup()
diff --git a/docs/getting-started.rst b/docs/getting-started.rst
index 17132c367..c77c935e0 100644
--- a/docs/getting-started.rst
+++ b/docs/getting-started.rst
@@ -3,201 +3,127 @@
 Getting started
 ===============
 
-The main prerequisite for `lcdb-wf` is `conda <https://docs.conda.io/en/latest/>_`, with the `bioconda <https://bioconda.github.io>`_. channel set up and the `mamba <https://github.com/mamba-org/mamba>`_ drop-in replacement for conda installed.
-
-If this is new to you, please see :ref:`conda-envs`.
+**Prerequisites:** `conda <https://docs.conda.io/en/latest/>`_ with the `bioconda <https://bioconda.github.io>`_ channel. See :ref:`conda-envs` if you need help setting this up.
 
 .. note::
 
-    `lcdb-wf` is tested and heavily used on Linux. It is only supported on
-    Linux.
+    `lcdb-wf` is only supported on Linux.
 
 .. _setup-proj:
 
-Setting up a project
---------------------
-
-The general steps to use lcdb-wf in a new project are:
+Quick start
+-----------
 
-1. **Deploy:** download and run ``deploy.py`` to copy files into a project directory
-2. **Configure:** set up samples table for experiments and edit configuration file
-3. **Run:** activate environment and run the Snakemake file either locally or on a cluster
+1. **Deploy:** Copy workflow files to your project directory
+2. **Configure:** Set up sample tables and edit config files
+3. **Run:** Activate environment and run Snakemake
 
 .. _deploy:
 
-1. Deploying lcdb-wf
---------------------
-Using `lcdb-wf` starts with copying files to a project directory, or
-"deploying".
-
-Unlike other tools you may have used, `lcdb-wf` is not actually installed per
-se. Rather, it is "deployed" by copying over relevant files from the `lcdb-wf`
-repository to your project directory. This includes Snakefiles, config files,
-and other infrastructure required to run, and excludes files like these docs
-and testing files that are not necessary for an actual project. The reason is
-to use this script is so you end up with a cleaner project directory, compared
-to cloning the repo directly.
+1. Deploy
+---------
 
-This script also writes a file to the destination called
-``.lcdb-wf-deployment.json``. It stores the timestamp and details about what
-commit was used to deploy it. This tracks provenance of the code, so you can
-always figure out what lcdb-wf commit your deployment originally started from.
+`lcdb-wf` is deployed by copying relevant files (Snakefiles, configs,
+infrastructure) to your project directory. This creates
+a ``.lcdb-wf-deployment.json`` file tracking the commit used.
 
-There are a few ways of doing this.
-
-Option 1: Download and run the deployment script
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-This is the most convenient method, although it does not allow running tests
-locally.
-
-.. code-block:: bash
-
-    BRANCH=master  # optionally change branch
-    wget https://raw.githubusercontent.com/lcdb/lcdb-wf/$BRANCH/deploy.py
-
-Run ``python deploy.py -h`` to see help. Be sure to use the ``--staging`` and
-``--branch=$BRANCH`` arguments when using this method, which will clone the
-repository to a location of your choosing. Once you deploy you can remove the
-script. For example:
+**Option A: Download deployment script** (quickest method)
 
 .. code-block:: bash
 
+    wget https://raw.githubusercontent.com/lcdb/lcdb-wf/master/deploy.py
     python deploy.py \
       --dest analysis/project \
       --staging /tmp/lcdb-wf-tmp \
-      --branch $BRANCH \
+      --branch master \
       --flavor rnaseq \
       --clone \
       --build-envs
 
-    # You can clean up the cloned copy if you want:
-    # rm -rf /tmp/lcdb-wf-tmp
-
-This will clone the full git repo to ``/tmp/lcdb-wf-tmp``, check out the master
-branch (or whatever branch ``$BRANCH`` is set to), copy the files required for
-an RNA-seq project over to ``analysis/project``, build the main conda
-environment and the R environment, save the ``.lcdb-wf-deployment.json`` file
-there, and then delete the temporary repo.
-
-Option 2: Clone repo manually
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Clone a repo using git and check out the branch. Use this method for running
-tests):
+**Option B: Clone repo first** (use this to run tests)
 
 .. code-block:: bash
 
-   BRANCH=master  # optionally change branch
    git clone https://github.com/lcdb/lcdb-wf /tmp/lcdb-wf
    cd /tmp/lcdb-wf
-   git checkout $BRANCH
-
-Then run the deploy script, ``python deploy.py -h`` to see usage info. Here is
-an example for RNA-seq:
-
-.. code-block:: bash
-
-    python deploy.py \
-      --dest analysis/project \
-      --flavor rnaseq \
-      --build-envs
-
-.. note::
+   python deploy.py \
+     --dest analysis/project \
+     --flavor rnaseq \
+     --build-envs
 
-   If you want to run the tests then don't deploy just yet -- see
-   :ref:`running-the-tests` for details, and then come back here to deploy for
-   an actual project.
-
-
-.. note::
-
-    See :ref:`conda-envs` for more details on the conda environment building.
+See :ref:`conda-envs` for conda environment details.
 
 2. Configure
 ------------
+For your workflow of interest (see :ref:`workflows`), change to that directory
+(e.g., :file:`workflows/rnaseq`).
 
-This step takes the most effort. The first time you set up a project it
-will take some time to understand the configuration system.
-
-- see :ref:`sampletable` for how to write a sampletable, which includes where to find raw data and contains the associated metadata
-- see :ref:`config-yaml` for configuring each workflow
-- see :ref:`multiple-experiments` for advice on how to handle multiple experiments that are intended to be analyzed together
+- Edit :file:`config/sampletable.tsv` to reflect your samples and additional
+  metadata. The required columns depend on the respective workflow type. See :ref:`sampletable`.
+- Edit :ref:`config/config.yaml`. This is also workflow-specific, but at least
+  points to the reference files (genome fasta). See :ref:`config`.
 
 3. Run
 ------
 
-Activate the main environment and go to the workflow you want to run. For
-example if you have deployed and configured an RNA-seq run, then do:
-
-.. code-block:: bash
+.. warning::
 
-    conda activate ./env
-    cd workflows/rnaseq
+    Some jobs require substantial RAM (e.g., 20 GB for typical MarkDuplicates;
+    64 GB for building a STAR index for a mammalian genome). For
+    MarkDuplicates, the Java VM will try to allocate this much RAM before
+    starting, and will immediately crash if not enough is available. The STAR
+    index building will continue to consume RAM and the machine may become
+    sluggish and eventually crash.
 
-and run the following:
+Activate the environment and navigate to your workflow:
 
 .. code-block:: bash
 
+    conda activate ./env
+    cd workflows/rnaseq
     snakemake --dryrun
 
-If all goes well, this should print a list of jobs to be run.
-
-You can run locally, but this is NOT recommended for a typicaly RNA-seq
-project. To run locally, choose the number of CPUs you want to use with the
-``-j`` argument as is standard for Snakemake.
-
-.. warning::
-
-    If you haven't made any changes to the Snakefiles, be aware that the
-    default configuration needs a lot of RAM. For example, the MarkDuplicates
-    runs set 20 GB RAM for Java, and that's for each job. Adjust the Snakefiles
-    accordingly if you don't have enough RAM available (search for "Xmx" to
-    find the Java args that set memory).
+**Local execution** (not recommended for typical projects):
 
 .. code-block:: bash
 
-    # run locally (not recommended)
     snakemake --use-conda -j 8
 
-The recommended way is to run on a cluster.
+**Cluster execution** (recommended):
 
-To run on a cluster, you will need a `Snakemake profile
+Use a `Snakemake profile
 <https://snakemake.readthedocs.io/en/stable/executing/cli.html#profiles>`_ for
-your cluster that translates generic resource requirements into arguments for
-your cluster's batch system.
+your cluster. For NIH's Biowulf, you can use the `NIH-HPC snakemake profile
+<https://github.com/NIH-HPC/snakemake_profile>`__, using the ``snakemake8``
+branch, like this:
 
-On NIH's Biowulf cluster, the profile can be found at
-https://github.com/NIH-HPC/snakemake_profile. If you are not already using this for other Snakemake workflows, you can set it up the first time like this:
-
-1. Clone the profile to a location of your choosing, maybe
-   ``~/snakemake_profile``
-2. Set the environment variable ``LCDBWF_SNAKEMAKE_PROFILE``, perhaps in your
-   ``~/.bashrc`` file.
+.. code-block:: bash
 
-Then back in your deployed and configured project, submit the wrapper script as
-a batch job:
+    # One-time setup:
+    git clone https://github.com/NIH-HPC/snakemake_profile ~/snakemake_profile
+    (cd ~/snakemake_profile && git checkout snakemake8)
 
-.. code-block:: bash
+    # add this to ~/.bashrc for persistence
+    export LCDBWF_SNAKEMAKE_PROFILE=~/snakemake_profile
 
+    # Submit job to the Slurm cluster
     sbatch ../../include/WRAPPER_SLURM
 
-This will submit Snakemake as a batch job, use the profile to translate
-resources to cluster arguments and set default command-line arguments, and
-submit the various jobs created by Snakemake to the cluster on your behalf. See
-:ref:`cluster` for more details on this.
-
-Other clusters will need different configuration, but everything in `lcdb-wf`
-is standard Snakemake. The Snakemake documentation on `cluster execution
+For other clusters, see Snakemake's `cluster execution
 <https://snakemake.readthedocs.io/en/stable/executing/cluster.html>`_ and
 `cloud execution
-<https://snakemake.readthedocs.io/en/stable/executing/cloud.html>`_ can be
-consulted for running on your particular system.
+<https://snakemake.readthedocs.io/en/stable/executing/cloud.html>`_ docs. See
+:ref:`cluster` and :ref:`workflows` for more details.
+
+4. Downstream analyses
+----------------------
 
-You can typically run simultaneous workflows when they are in different
-directories; see :ref:`workflows` for details.
+For RNA-seq, there is a comprehensive set of downstream analyses to run, see
+:ref:`rnaseq-downstream`.
 
-Next steps
-~~~~~~~~~~
+5. Review output
+----------------
 
-Next, we give a brief overview of the file hierarchy of ``lcdb-wf`` in the
-:ref:`guide` page.
+In the workflow's directory (e.g., :file:`workflows/rnaseq`) there will be
+a :file:`data` directory with the output. See the respective details for each
+workflow at :ref:`workflows`.
diff --git a/docs/guide-to-files.txt b/docs/guide-to-files.txt
deleted file mode 100644
index da4aea141..000000000
--- a/docs/guide-to-files.txt
+++ /dev/null
@@ -1,122 +0,0 @@
-/README.md
-/workflows/
-    The directory with all workflows
-/workflows/references/
-    The main workflow to generate references
-/workflows/references/Snakefile
-    Snakefile to perform the references workflow***
-/workflows/rnaseq/
-    The main workflow for performing RNA-seq analysis
-/workflows/rnaseq/Snakefile
-    Snakefile to perform RNA-Seq analysis***
-/workflows/rnaseq/config/
-    Configuration for the RNA-seq workflow***
-/workflows/rnaseq/config/sampletable.tsv
-    Sample metadata for the RNA-seq workflow
-/workflows/rnaseq/config/rnaseq_patterns.yaml
-    Output filename patterns used by the RNA-seq workflow
-/workflows/rnaseq/downstream/rnaseq.Rmd
-    Rmd file called by the RNA-Seq Snakefile***
-/workflows/rnaseq/downstream/gene-patterns.Rmd
-    Rmd file called by rnaseq.Rmd to perform gene patterns analysis
-/workflows/rnaseq/downstream/functional-enrichment.Rmd
-    Rmd file called by rnaseq.Rmd to perform functional enrichment analysis
-/workflows/chipseq/
-    The main workflow for performing ChIP-seq analysis
-/workflows/chipseq/Snakefile
-    Snakefile to perform ChIP-Seq analysis***
-/workflows/chipseq/config/
-    Configuration for the ChIP-seq workflow***
-/workflows/chipseq/config/sampletable.tsv
-    Sample metadata for the ChIP-seq workflow
-/workflows/chipseq/config/chipseq_patterns.yaml
-    Output filename patterns used by the ChIP-seq workflow
-/requirements.txt
-    Dependencies required for running lcdb-wf
-/ci/
-    Tools for managing the continuous integration tests
-/ci/build-docs.sh
-    Builds documentation on travis-ci and automatically pushes to the
-    gh-pages branch on github
-/ci/dependency_consistency.py
-    Helper script for consistently updating dependencies of wrappers and
-    requirements.txt.
-/ci/get-data.py
-    Script for downloading example data.***
-/ci/key.enc
-    Encoded private key that allows pushing to github from travis-ci
-/ci/travis-run.sh
-    Runs tests on travis-ci
-/ci/travis-setup.sh
-    Sets up environment on travis-ci
-/config/
-    This directory contains various configuration files used by the workflows***
-/config/sampletable.tsv
-    Table of sample metadata***
-/config/config.yml
-    Main config file***
-/config/4c-sampletable.tsv
-    Example sampletable for running a 4C analysis
-/config/envs/
-    Conda environment definitions for per-rule environments that are not
-    already a wrapper
-/config/multiqc_config.yaml
-    Config file with additional settings for running MultiQC
-/config/test_4c_config.yaml
-    Test config for 4C
-/config/test_config.yaml
-    Test config for rnaseq
-/downstream/rnaseq-requirements.txt
-/include/
-/include/adapters.fa
-    Used in the cutadapt rules.
-/include/WRAPPER_SLURM
-    Wrapper script to submit jobs to a SLURM cluster***
-/lib/
-    Directory of utilities used by the workflows
-/lib/common.py
-    The main module of utilities
-/lib/postprocess/
-    A package for post-processing references after they are downloaded.***
-/lib/postprocess/adapters.py
-/lib/postprocess/dicty.py
-/lib/postprocess/dm6.py
-/lib/postprocess/erccFisher.py
-/lib/postprocess/ercc.py
-/lib/postprocess/hg19.py
-/lib/postprocess/hg38.py
-/lib/postprocess/__init__.py
-/lib/postprocess/merge.py
-/lib/postprocess/phix.py
-/lib/postprocess/sacCer3.py
-/make_trackhub.py
-/wrappers/
-/wrappers/.gitignore
-/wrappers/LICENSE
-/wrappers/README.md
-/wrappers/test/
-    Main test directory for wrappers
-/wrappers/test/conftest.py
-    Fixtures are imported here and used across py.test tests
-/wrappers/test/raw_data_fixtures.py
-    Fixtures for downloading example data
-/wrappers/test/test_atropos.py
-/wrappers/test/test_bowtie2.py
-/wrappers/test/test_cutadapt.py
-/wrappers/test/test_deeptools.py
-/wrappers/test/test_demo.py
-/wrappers/test/test_dupradar.py
-/wrappers/test/test_fastqc.py
-/wrappers/test/test_fastq_screen.py
-/wrappers/test/test_featurecounts.py
-/wrappers/test/test_hisat2.py
-/wrappers/test/test_kallisto.py
-/wrappers/test/test_multiqc.py
-/wrappers/test/test_picard.py
-/wrappers/test/test_rseqc.py
-/wrappers/test/test_salmon.py
-/wrappers/test/test_samtools.py
-/wrappers/test_toy.py
-/wrappers/test/utils.py
-/wrappers/wrappers/
-    Wrappers directory of snakemake wrappers used among the workflows***
diff --git a/docs/guide.rst b/docs/guide.rst
deleted file mode 100644
index 030f686df..000000000
--- a/docs/guide.rst
+++ /dev/null
@@ -1,257 +0,0 @@
-.. _guide:
-
-Guide to file hierarchy
-=======================
-
-The ``lcdb-wf`` workflow system is designed to have a standardized directory
-structure and file hierarchy to allow us to be as consistent across many diverse
-and disparate analyses and sources of data and reduce the overhead when it comes
-to troubleshooting when something goes wrong. All the components of the repository
-are laid out with this overarching design principle in mind.
-
-Below we give a high-level overview and brief description of the files and folders used
-by the workflows, and include an annotated directory tree highlighting the most important
-parts of the repository.
-
-Folder organization
-~~~~~~~~~~~~~~~~~~~
-
-The top level of the repo looks like this:
-
-::
-
-    [1]  ├── ci/
-    [2]  ├── docs/
-    [3]  ├── include/
-    [4]  ├── lib/
-    [5]  ├── README.md
-    [6]  ├── requirements-non-r.txt
-    [7]  ├── requirements-r.txt
-    [8]  ├── workflows/
-    [9]  └── wrappers/
-
-1. ``ci`` contains infrastructure for continuous integration testing. You don't
-   have to worry about this stuff unless you're actively developing `lcdb-wf`.
-
-2. ``docs/`` contains the source for documentation. You're reading it.
-
-3. ``include/`` has miscellaneous files and scripts that can be used by all
-   workflows. Of particular note is the ``WRAPPER_SLURM`` script (see
-   :ref:`cluster` for more) and the ``reference_configs`` directory (see
-   :ref:`references` and :ref:`config` for more).
-
-4. ``lib/`` contains Python modules used by the workflows.
-
-5. ``README.md`` contains top-level info.
-
-6. ``requirements-non-r.txt`` contains the package dependencies needed to run the
-   workflows, and is used to set up a conda environment.
-
-7. ``requirements-r.txt`` contains the package dependencies for R and various
-   Bioconductor packages used in downstream analysis. See :ref:`conda-envs` for the
-   rationale for splitting these.
-
-8. ``workflows/`` contains one directory for each workflow. Each workflow directory contains
-   its own ``Snakefile`` and configuration files. We go into more detail in the next section.
-
-9. ``wrappers/`` contains Snakemake `wrappers
-   <https://snakemake.readthedocs.io/en/stable/snakefiles/modularization.html#wrappers>`_,
-   which are scripts that can use their own independent environment. See
-   :ref:`wrappers` for more.
-
-Below, you can see a detailed overview of the files contained in these folders.
-
-
-Annotated tree
-~~~~~~~~~~~~~~
-
-The following is an annotated directory tree of the ``lcdb-wf`` repository to
-help orient you. Hover over files for a tooltip description; click a file to
-view the most recent version on GitHub.
-
-Files in bold are the most important.
-
-.. raw:: html
-
-    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/balloon-css/0.2.4/balloon.min.css">
-
-    <style>
-    .dir {
-        font-family: monospace;
-        font-size: 1em;
-    }
-    .file {
-        font-family: monospace;
-        font-size: 0.8em;
-    }
-    .important {
-        font-weight: bold;
-    }
-    .undoc {
-        color: #888;
-        }
-
-    </style>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//README.md" style="text-decoration:none;"><span class="file undoc">README.md</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//workflows/" data-balloon=" The directory with all workflows" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="dir">workflows/</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//workflows/references/" data-balloon=" The main workflow to generate references" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="dir">&nbsp;&nbsp;&nbsp;references/</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//workflows/references/Snakefile" data-balloon=" Snakefile to perform the references workflow" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="file important">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Snakefile</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//workflows/rnaseq/" data-balloon=" The main workflow for performing RNA-seq analysis" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="dir">&nbsp;&nbsp;&nbsp;rnaseq/</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//workflows/rnaseq/Snakefile" data-balloon=" Snakefile to perform RNA-Seq analysis" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="file important">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Snakefile</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//workflows/rnaseq/config/" data-balloon=" Configuration for the RNA-seq workflow" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="dir important">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;config/</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//workflows/rnaseq/config/sampletable.tsv" data-balloon=" Sample metadata for the RNA-seq workflow" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="file">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;sampletable.tsv</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//workflows/rnaseq/config/rnaseq_patterns.yaml" data-balloon=" Output filename patterns used by the RNA-seq workflow" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="file">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;rnaseq_patterns.yaml</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//workflows/rnaseq/downstream/rnaseq.Rmd" data-balloon=" Rmd file called by the RNA-Seq Snakefile" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="file important">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;rnaseq.Rmd</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//workflows/rnaseq/downstream/gene-patterns.Rmd" data-balloon=" Rmd file called by rnaseq.Rmd to perform gene patterns analysis" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="file">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;gene-patterns.Rmd</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//workflows/rnaseq/downstream/functional-enrichment.Rmd" data-balloon=" Rmd file called by rnaseq.Rmd to perform functional enrichment analysis" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="file">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;functional-enrichment.Rmd</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//workflows/chipseq/" data-balloon=" The main workflow for performing ChIP-seq analysis" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="dir">&nbsp;&nbsp;&nbsp;chipseq/</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//workflows/chipseq/Snakefile" data-balloon=" Snakefile to perform ChIP-Seq analysis" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="file important">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Snakefile</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//workflows/chipseq/config/" data-balloon=" Configuration for the ChIP-seq workflow" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="dir important">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;config/</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//workflows/chipseq/config/sampletable.tsv" data-balloon=" Sample metadata for the ChIP-seq workflow" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="file">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;sampletable.tsv</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//workflows/chipseq/config/chipseq_patterns.yaml" data-balloon=" Output filename patterns used by the ChIP-seq workflow" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="file">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;chipseq_patterns.yaml</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//requirements.txt" data-balloon=" Dependencies required for running lcdb-wf" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="file">requirements.txt</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//ci/" data-balloon=" Tools for managing the continuous integration tests" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="dir">ci/</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//ci/build-docs.sh" data-balloon=" Builds documentation on travis-ci and automatically pushes to the gh-pages branch on github" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="file">&nbsp;&nbsp;&nbsp;build-docs.sh</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//ci/dependency_consistency.py" data-balloon=" Helper script for consistently updating dependencies of wrappers and requirements.txt." data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="file">&nbsp;&nbsp;&nbsp;dependency_consistency.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//ci/get-data.py" data-balloon=" Script for downloading example data." data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="file important">&nbsp;&nbsp;&nbsp;get-data.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//ci/key.enc" data-balloon=" Encoded private key that allows pushing to github from travis-ci" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="file">&nbsp;&nbsp;&nbsp;key.enc</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//ci/travis-run.sh" data-balloon=" Runs tests on travis-ci" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="file">&nbsp;&nbsp;&nbsp;travis-run.sh</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//ci/travis-setup.sh" data-balloon=" Sets up environment on travis-ci" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="file">&nbsp;&nbsp;&nbsp;travis-setup.sh</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//config/" data-balloon=" This directory contains various configuration files used by the workflows" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="dir important">config/</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//config/sampletable.tsv" data-balloon=" Table of sample metadata" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="file important">&nbsp;&nbsp;&nbsp;sampletable.tsv</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//config/config.yml" data-balloon=" Main config file" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="file important">&nbsp;&nbsp;&nbsp;config.yml</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//config/4c-sampletable.tsv" data-balloon=" Example sampletable for running a 4C analysis" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="file">&nbsp;&nbsp;&nbsp;4c-sampletable.tsv</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//config/envs/" data-balloon=" Conda environment definitions for per-rule environments that are not already a wrapper" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="dir">&nbsp;&nbsp;&nbsp;envs/</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//config/multiqc_config.yaml" data-balloon=" Config file with additional settings for running MultiQC" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="file">&nbsp;&nbsp;&nbsp;multiqc_config.yaml</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//config/test_4c_config.yaml" data-balloon=" Test config for 4C" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="file">&nbsp;&nbsp;&nbsp;test_4c_config.yaml</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//config/test_config.yaml" data-balloon=" Test config for rnaseq" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="file">&nbsp;&nbsp;&nbsp;test_config.yaml</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//downstream/rnaseq-requirements.txt" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;rnaseq-requirements.txt</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//include/" style="text-decoration:none;"><span class="dir undoc">include/</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//include/adapters.fa" data-balloon=" Used in the cutadapt rules." data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="file">&nbsp;&nbsp;&nbsp;adapters.fa</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//include/WRAPPER_SLURM" data-balloon=" Wrapper script to submit jobs to a SLURM cluster" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="file important">&nbsp;&nbsp;&nbsp;WRAPPER_SLURM</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//lib/" data-balloon=" Directory of utilities used by the workflows" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="dir">lib/</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//lib/common.py" data-balloon=" The main module of utilities" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="file">&nbsp;&nbsp;&nbsp;common.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//lib/postprocess/" data-balloon=" A package for post-processing references after they are downloaded." data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="dir important">&nbsp;&nbsp;&nbsp;postprocess/</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//lib/postprocess/adapters.py" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;adapters.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//lib/postprocess/dicty.py" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;dicty.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//lib/postprocess/dm6.py" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;dm6.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//lib/postprocess/erccFisher.py" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;erccFisher.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//lib/postprocess/ercc.py" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;ercc.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//lib/postprocess/hg19.py" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;hg19.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//lib/postprocess/hg38.py" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;hg38.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//lib/postprocess/__init__.py" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;__init__.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//lib/postprocess/merge.py" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;merge.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//lib/postprocess/phix.py" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;phix.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//lib/postprocess/sacCer3.py" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;sacCer3.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//make_trackhub.py" style="text-decoration:none;"><span class="file undoc">make_trackhub.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//wrappers/" style="text-decoration:none;"><span class="dir undoc">wrappers/</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//wrappers/.gitignore" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;.gitignore</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//wrappers/LICENSE" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;LICENSE</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//wrappers/README.md" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;README.md</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//wrappers/test/" data-balloon=" Main test directory for wrappers" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="dir">&nbsp;&nbsp;&nbsp;test/</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//wrappers/test/conftest.py" data-balloon=" Fixtures are imported here and used across py.test tests" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="file">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;conftest.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//wrappers/test/raw_data_fixtures.py" data-balloon=" Fixtures for downloading example data" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="file">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;raw_data_fixtures.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//wrappers/test/test_atropos.py" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;test_atropos.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//wrappers/test/test_bowtie2.py" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;test_bowtie2.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//wrappers/test/test_cutadapt.py" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;test_cutadapt.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//wrappers/test/test_deeptools.py" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;test_deeptools.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//wrappers/test/test_demo.py" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;test_demo.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//wrappers/test/test_dupradar.py" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;test_dupradar.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//wrappers/test/test_fastqc.py" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;test_fastqc.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//wrappers/test/test_fastq_screen.py" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;test_fastq_screen.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//wrappers/test/test_featurecounts.py" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;test_featurecounts.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//wrappers/test/test_hisat2.py" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;test_hisat2.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//wrappers/test/test_kallisto.py" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;test_kallisto.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//wrappers/test/test_multiqc.py" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;test_multiqc.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//wrappers/test/test_picard.py" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;test_picard.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//wrappers/test/test_rseqc.py" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;test_rseqc.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//wrappers/test/test_salmon.py" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;test_salmon.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//wrappers/test/test_samtools.py" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;test_samtools.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//wrappers/test_toy.py" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;test_toy.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//wrappers/test/utils.py" style="text-decoration:none;"><span class="file undoc">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;utils.py</span></a></p>
-    
-    <p style="margin:0px;"><a href="https://github.com/lcdb/lcdb-wf/blob/master//wrappers/wrappers/" data-balloon=" Wrappers directory of snakemake wrappers used among the workflows" data-balloon-pos="right" data-balloon-length="xlarge" style="text-decoration:none;"><span class="dir important">&nbsp;&nbsp;&nbsp;wrappers/</span></a></p>
-    
-
-
-Now that you have seen which files and folders are the most important and have some idea
-of where everything lives, let's look at how to run tests to make sure everything is set up 
-correctly (see :ref:`running-the-tests`), or jump right in to learning about how to configure
-the workflows for your particular experiment (see :ref:`config`).
diff --git a/docs/index.rst b/docs/index.rst
index 064f30ab4..6bbb52a50 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -4,11 +4,12 @@ Introduction
 `lcdb-wf` is a collection of Snakemake workflows for common high-throughput
 sequencing analysis.
 
-There are a multitude of workflows out there for high-throughput sequencing analysis.
 What makes `lcdb-wf` different?
 
+Features
+--------
 Designed with customization in mind
------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 We recognize that every experiment has its own idiosyncracies. Rather
 than provide a one-size-fits-all solution, we aim to provide a reasonable
 starting point that users can modify for their own use.
@@ -18,20 +19,30 @@ Unconventional command-line arguments to tools? New tools to add to the
 workflow? No problem.
 
 Extensive downstream RNA-seq
-----------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 A comprehensive RMarkdown template, along with a custom R package, enables
 sophisticated RNA-seq analysis that supports complex experimental designs and
-many contrasts.
+many contrasts. For example, easily set up multiple DESeqDataSet objects with
+different models or different mixtures of samples, set up contrasts using these
+different objects, and all the output and functional enrichment analysis will
+automatically be generated.
 
-Extenstive exploration of ChIP-seq peaks
-----------------------------------------
+Integration with Carnation
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+The RNA-seq workflow generates RDS objects compatible with `Carnation
+<https://github.com/NICHD-BSPC/carnation>`__ the Shiny app for interactively
+exploring RNA-seq results, including gene patterns, functional enrichment,
+comparisons across contrasts, and more.
+
+Extensive exploration of ChIP-seq peaks
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 The ChIP-seq configuration supports multiple peak-callers as well as calling
 peaks with many different parameter sets for each caller. Combined with
 visualizaiton in track hubs (see below), this can identify the optimal
 parameters for a given experiment.
 
 Track hubs
-----------
+~~~~~~~~~~
 The ChIP-seq and RNA-seq workflows generate track hubs that can be viewed in
 the UCSC Genome Browser. ChIP-seq shows signal and called peaks (many
 peak-calling runs, with different peak-callers and different parameters for
@@ -40,38 +51,23 @@ RNA-seq shows strand-specific signal tracks. Both support the addition of
 arbitrary additional tracks (primers, loci of interest, external data, etc) to
 view alongside your data.
 
-Unified approach to reference genomes
--------------------------------------
-The references workflow defines the genome files for the organisms used in 
-the experiment. It it is shared by RNA-seq and ChIP-seq and is driven by
-a config file that specifies URLs for FASTA and GTF files. Set it up once for
-a site to get lots of genomes you can use for running `fastq_screen`, and
-easily include arbitrary other genomes. They can then be automatically included
-in RNA-seq and ChIP-seq workflows.
+Support for complex reference genomes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Reference genomes may need to be patched with experimental genetic constructs,
+or may need to be adjusted when downloaded from an original source (for
+example, change chromosome nomenclature to match existing work).
 
 Arbitrary genomes can be used, whether local (e.g., customized with additional
-genetic constructs) or on the web. The `references` workflow need only be run
-once for all these genomes to be created, with the `references_dir` being used
-as a centralized repository that can be then used with all other workflows.
-
-Integration with external data and figure-making
-------------------------------------------------
-It is designed to tie together your entire analysis, from downloading references
-through producing final figures. The included examples tie together workflows that download external data, perform downstream analysis, and make figures.
-
-If an upstream file changes (e.g., gene annotation), all dependent downstream
-jobs -- including figures -- will be updated so you can ensure that even
-complex analyses stay correct and up-to-date.
+genetic constructs) or on the web.
 
 Tested automatically
---------------------
+~~~~~~~~~~~~~~~~~~~~
 Every change to the code on GitHub triggers an automated test, the results of
 which you can find at https://circleci.com/gh/lcdb/lcdb-wf. Each test sets the
 system up from scratch, including installing all software, downloading example
 data, and running everything up through the final results. This guarantees that
 you can set up and test the code yourself.
 
-
 All the advantages of Snakemake
 -------------------------------
 
diff --git a/docs/integrative.rst b/docs/integrative.rst
deleted file mode 100644
index d1425b1fb..000000000
--- a/docs/integrative.rst
+++ /dev/null
@@ -1,82 +0,0 @@
-.. _integrative:
-
-Integrative workflows
-=====================
-
-Here we look at integrative workflows which can be used to combine multiple
-standard or non-standard workflows.
-
-.. _colocalization:
-
-Colocalization workflow
------------------------
-The output of this workflow is a set of heatmaps showing metrics of
-colocalization between pairs of regions. This can be used to answer questions
-like "what else does my protein of interest bind with?".
-
-Several colocalization methods are run, and they all give slightly different
-results.
-
-- bedtools fisher
-- bedtools jaccard
-- GAT (Genome Association Test) log2 fold change
-- GAT (Genome Association Test) nucleotide overlap
-- IntervalStats
-
-.. _external:
-
-"External" workflow
--------------------
-Often we want to compare new data with existing published data. We have found
-that in practice, having a separate workflow to handle downloading and
-reformatting and various conversion tasks helps with organization.
-
-The test workflow is a working example that:
-
-- downloads ChIP-seq data from modENCODE in an older fly genome organism (dm3)
-- downloads the chainfile for liftover
-- fixes the formatting of the downloaded files so they can be lifted over
-- lifts over the files to the newer dm6 assembly.
-
-The file is intended to be heavily edited for the particular experiment; it is
-here mostly as a placeholder and to be used as a template for integrative
-downstream work.  It can then be incorporated into the ``figures`` workflow (see
-:ref:`figures`) to integrate the analysis with other output.
-
-.. image:: external.png
-
-.. _figures:
-
-"Figures" workflow
-------------------
-
-This workflow is a working example of how you would tie together output from
-RNA-seq, ChIP-seq, and "external" workflows to automate figure creation and
-formally link together the dependencies of each figure all the way back to the
-original fastq files. If any changes are made upstream, they will trigger
-downstream rules to re-run as needed.
-
-For example, if a new GTF annotation file comes out, you would change the URL
-in the config, and then re-run the figures workflow. All RNA-seq jobs that
-depended on some way on the GTF file will be re-run. This will include the
-feature counts and downstream RNA-seq analysis, but will *not* re-run any of
-the jobs like trimming or mapping that do not depend on the GTF. In addition,
-any figures that depended in some way on that GTF file will also be re-run.
-
-
-
-To provide a sufficiently complex example that can be used in real-world
-applications, this workflow currently:
-
-    - counts the number of peaks in all configured peak-calling runs and stores
-      the output in a TSV report (work is performed in the script
-      ``scripts/peak_count.py``)
-    - identifies peaks at promoters of genes, reports a summary of how many
-      peaks from each run were found in a promoter, and creates BED files of
-      these subsets (``scripts/peaks_at_promoters.py``)
-    - builds the DAG image for the ChIP-seq and RNA-seq workflows
-    - symlinks over the ChIP-seq peaks and RNA-seq differential expression results
-    - extracts the text from the README.txt files created by the scripts
-      (usually from their docstrings), and compiles them into a summary report
-    - the ``figures`` directory can then be zipped up and distributed to collaborators
-
diff --git a/docs/lib.chipseq.rst b/docs/lib.chipseq.rst
deleted file mode 100644
index 1dcca7008..000000000
--- a/docs/lib.chipseq.rst
+++ /dev/null
@@ -1,23 +0,0 @@
-Module ``lib.chipseq``
-======================
-
-Handling ChIP-seq peak-calling configuration correctly is complex. The
-functions in this module help manipulate the config information so we can use
-it more easily in the ChIP-seq workflow without cluttering the Snakefile.
-
-.. currentmodule:: lib.chipseq
-
-.. autosummary::
-
-    peak_calling_dict
-    block_for_run
-    samples_for_run
-    merged_input_for_ip
-    detect_peak_format
-
-Details
--------
-
-.. automodule:: lib.chipseq
-    :members:
-
diff --git a/docs/lib.common.rst b/docs/lib.common.rst
deleted file mode 100644
index 22f95df0a..000000000
--- a/docs/lib.common.rst
+++ /dev/null
@@ -1,35 +0,0 @@
-Module ``lib.common``
-=====================
-
-This module contains various helper functions used by the workflows. It has two
-main types of functions, those for handling configuration information those for
-handling references.
-
-.. currentmodule:: lib.common
-
-Functions for handling configuration
-------------------------------------
-
-.. autosummary::
-
-    resolve_config
-    references_dict
-    get_references_dir
-    get_sampletable
-
-Functions for handling references
----------------------------------
-
-.. autosummary::
-
-    gzipped
-    cat
-    filter_fastas
-    twobit_to_fasta
-    download_and_postprocess
-
-Details
--------
-
-.. automodule:: lib.common
-    :members:
diff --git a/docs/lib.patterns_targets.rst b/docs/lib.patterns_targets.rst
deleted file mode 100644
index 2c452b17a..000000000
--- a/docs/lib.patterns_targets.rst
+++ /dev/null
@@ -1,22 +0,0 @@
-Module ``lib.patterns_targets``
-===============================
-
-The classes in module control how ``rnaseq_patterns.yaml`` and
-``chipseq_patterns.yaml`` are filled in with the information from their
-respective sample tables and config files.
-
-.. currentmodule:: lib.patterns_targets
-
-.. autosummary::
-
-    SeqConfig
-    RNASeqConfig
-    ChIPSeqConfig
-
-Details
--------
-.. autoclass:: lib.patterns_targets.SeqConfig
-
-.. autoclass:: lib.patterns_targets.RNASeqConfig
-
-.. autoclass:: lib.patterns_targets.ChIPSeqConfig
diff --git a/docs/patterns-targets.rst b/docs/patterns-targets.rst
deleted file mode 100644
index b35a71fed..000000000
--- a/docs/patterns-targets.rst
+++ /dev/null
@@ -1,140 +0,0 @@
-.. _patterns-and-targets:
-
-Patterns and targets
-====================
-We use a hybrid approach to specifying input and output patterns for Snakemake
-rules. These are used in the RNA-seq and ChIP-seq workflows.
-
-Patterns
---------
-
-**Patterns** are filename-like strings with placeholders in them, like this::
-
-    data/rnaseq_samples/{sample}/{sample}_R1.fastq.gz
-
-Generally you don't need to modify them (though you can). The patterns file is
-useful as a guide to the files that are created by the workflow.
-
-- RNA-seq patterns are in ``workflows/rnaseq/config/rnaseq_patterns.yaml``
-- ChIP-seq patterns are in ``workflows/chipseq/config/chipseq_patterns.yaml``
-
-
-Targets
--------
-The metadata (sample names, peak-calling runs) configured in the sample table
-and config file are used to fill in the patterns to create the targets. It's the
-equivalent of a complicated `expand()` call in a standard
-Snakefile.
-
-If we had 2 samples, A and B, then filling in the pattern::
-
-    data/rnaseq_samples/{sample}/{sample}_R1.fastq.gz
-
-would result in::
-
-    data/rnaseq_samples/A/A_R1.fastq.gz
-    data/rnaseq_samples/B/B_R1.fastq.gz
-
-
-How patterns and targets are used in Snakefiles
------------------------------------------------
-Briefly:
-
-- Each Snakefile has access to an object, ``c``.
-- **Patterns** are accessed via the ``c.patterns`` dictionary. The structure of
-  ``c.patterns`` matches that of the patterns file. Patterns still have the
-  ``{}`` placeholders as written in that file.
-- **Targets** are accessed via the ``c.targets`` dictionary. The structure
-  matches that of ``c.patterns``, but the placeholders are filled in based on
-  other configuration information such that each single string from patterns
-  may turn into a list after being filled in using the `snakemake.expand()`
-  function.
-- We can collapse arbitrary groups of patterns or targets together into
-  a flattened list with ``lib.utils.flatten()``.
-
-Here is an example rule that uses patterns and targets from the ``c`` object:
-
-.. code-block:: python
-
-    rule all:
-        input:
-            c.targets['cutadapt']
-
-    rule cutadapt:
-        input:
-            c.patterns['fastq']
-        output:
-            c.patterns['cutadapt']
-
-        shell:
-            'cutadapt {input} -o {output}'
-
-In the example above, ``c.patterns['fastq']`` and ``c.patterns['cutadapt']``
-have the following values, configured in ``config/rnaseq_patterns.yaml``:
-
-.. code-block:: python
-
-    c.patterns['fastq']
-    # data/rnaseq_samples/{sample}/{sample}_R1.fastq.gz
-
-    c.patterns['cutadapt']
-    # data/rnaseq_samples/{sample}/{sample}_R1.cutadapt.fastq.gz
-
-
-And ``c.targets[['cutadapt']`` might have the following values, after being
-filled in with the sample table (see below for details):
-
-.. code-block:: python
-
-    c.targets['cutadapt']
-    # data/rnaseq_samples/sample1/sample1_R1.cutadapt.fastq.gz
-    # data/rnaseq_samples/sample1/sample2_R1.cutadapt.fastq.gz
-    # data/rnaseq_samples/sample1/sample3_R1.cutadapt.fastq.gz
-    # data/rnaseq_samples/sample1/sample4_R1.cutadapt.fastq.gz
-
-
-
-
-This has several advantages:
-
-- Patterns can be automatically filled in by the sample table and config file,
-  so the workflow is largely controlled by a TSV file and a YAML file.
-
-- Storing filenames outside individual Snakefiles allows us to access them much
-  more easily from other Snakefiles or downstream scripts.
-
-- Writing aggregation rules is much easier. For example, instead of lots of
-  ``expand()`` calls, we can get all the FastQC output across raw, trimmed, and
-  aligned runs with ``flatten(c.targets["fastqc"])``.
-
-- Toggling entire sections of the workflow can be performed by changing
-  a single line in the first ``all`` rule.
-
-- We can re-organize the output directories only by editing the patterns file
-  -- no need to touch the Snakefile.
-
-- It's easier to understand the output locations of files by looking at the
-  patterns file than it is to scroll through a Snakefile.
-
-- Letting the ``c`` objects do the work of filling in patterns allows complex
-  work to be abstracted away, resulting in simpler Snakefiles. For example,
-  filling in the output BED files across many arbitrary-named configured
-  peak-calling runs gets complicated, but since this is handled transparently
-  by the ``c`` object, we can do things like
-  ``utils.flatten(c.targets['peaks'])`` to get all the BED files for all
-  peak-callers and all peak-calling runs.
-
-.. seealso::
-
-    For more details, the code is the authoritative source.
-
-    In particular:
-
-        - :class:`lib.patterns_targets.SeqConfig`
-        - :class:`lib.patterns_targets.RNASeqConfig`
-        - :class:`lib.patterns_targets.ChIPSeqConfig`
-
-    In addition, the `figures Snakefile
-    <https://github.com/lcdb/lcdb-wf/blob/master/workflows/figures/Snakefile>`_
-    demonstrates how the ChIP-seq and RNA-seq patterns and targets can be used
-    for downstream work.
diff --git a/docs/references-config.rst b/docs/references-config.rst
deleted file mode 100644
index 997154403..000000000
--- a/docs/references-config.rst
+++ /dev/null
@@ -1,603 +0,0 @@
-
-.. _references-config:
-
-References config
-=================
-
-The references section defines which genomes, transcriptomes, and annotations
-to use. It supports arbitrarily many species and assemblies, and supports
-customizing references for a particular project. For example, there are tools
-and examples for adding ERCC spike-in controls to references, or adjusting
-chromosome nomenclature, or removing problematic entries from GTF files, and so
-on.
-
-Another advantage is that this makes it easy to add multiple genomes to the
-screening step (which uses fastq_screen).
-
-Specifying a references directory
----------------------------------
-
-See :ref:`cfg-references-dir` for more information on where the references are
-built (as well as how and why to adjust this).
-
-Including existing reference configs
-------------------------------------
-We provide a number of pre-configured reference configs for common model
-organisms. If you just want to use some references configs that work, then put
-this in your config file:
-
-.. code-block:: yaml
-
-    include_references:
-      - '../../include/references_configs'
-
-This will populate the config with the contents of all the files contained in
-the ``include/references_configs`` directory. Any paths provided under the
-``include_references`` key are relative to the Snakefile using the config.
-**Note that you will still need to inspect the contents** of those files to
-decide which organsim and tag you want to use for your particular experiment
-(see :ref:`cfg-organism` and :ref:`cfg-aligner` for more on these fields). For
-example, if you are working with human RNA-seq data, and you use the above
-``include_references``, you may want this in your config:
-
-.. code-block:: yaml
-
-    organism: 'human'
-    aligner:
-      tag: 'gencode-v25'
-      index: 'hisat2'
-    salmon: 'gencode-v25'
-
-The reason for using ``gencode-v25`` is because that tag is configured for the
-``human`` key in ``../../include/references_configs/Homo_sapiens.yaml``.
-
-You can provide entire directories of reference configs, a single file, or use
-the references section below. The prioritization works like this:
-
-- an organism can show up in multiple configs; if a tag exists for an organism
-  in more than one config, higher-priority configs will overwrite the contents
-  of the tag.
-- directories have lowest priority; when multiple directories are specified the
-  last one has priority
-- files have priority over directories; when multiple files are specified the
-  last one has priority
-- the ``references:``` section always has priority over anything in
-  ``include_references:``.
-
-The remainder of this section of the documentation explains how to customize
-the references, to add your own or modify the existing examples.
-
-Overview
---------
-The references workflow is based on the idea that while each genome's source
-files may differ, they can usually be modified to a uniform format. For
-example, reference files (FASTA, GTF) may come from different providers
-(Ensembl, FlyBase, UCSC, etc) and have slightly different formatting (strange
-headers, one big file or a tarball of individual chromosomes, etc), once they
-are well-formatted they can be used to create a hisat2 index, a bowtie2 index,
-a list of genes, intergenic regions, and so on without any further
-customization.
-
-The challenging part is the "well-formatted" part. To solve this, the config
-file and references building system allows a very flexible specification of how
-to modify references via a plugin architecture. It works something like this:
-
-- Each key in the references section refers to an **organism**.
-- An organism has one or more **tags**.
-- Each **tag** has a FASTA file and/or a GTF file associated with it.
-- Each FASTA or GTF specifies one or more URIs from which to download the raw
-  file(s). These can be `ftp://`, `http://`, `https://`, or `file://` URIs.
-- An optional **postprocess** key specifies the import path to a Python module.
-  This is the primary hook for customization, and is described in more detail
-  below.
-- For FASTA files one or more **indexes** are requested
-- For GTF files, zero or more **conversions** are requested.
-
-.. note::
-
-    If using a ``file://`` URI, it needs to be gzipped.
-
-It's probably easiest to show an example config and then describe what's
-happening.
-
-Example references config
--------------------------
-
-The following example configures the workflow to:
-
-- download a fasta file from the GENCODE project for the human genome and build
-  a hisat2 and bowtie2 index
-- download the corresponding GTF file from GENCODE, strip off the dotted
-  version numbers from Ensembl gene and transcript IDs, and create a refFlat
-  format file from it
-- download the SILVA rRNA database and keep only the ribosomal RNA sequence
-  corresponding to *Homo sapiens*
-
-This example contains sufficient real-world complexity to illustrate the
-flexibility afforded by the references workflow. It is heavily commented for
-illustration.
-
-.. code-block:: yaml
-
-    # EXAMPLE REFERENCES CONFIG SECTION
-
-    # This configures the directory in which the prepared references will be
-    # saved (see below for directory structure). If you already have reference
-    # files saved in the lcdb-wf structure, point this to that directory to
-    # avoid rebuilding a fresh set of references:
-
-    references_dir: 'data/references'
-
-    # One of the organisms configured below. We are only configuring a single one
-    # so "human" is our only option here:
-
-    organism: 'human'
-
-    # Here we specify which tag under "human" to use for aligning, as well as
-    # which index we'll be using. This example is RNA-seq, so we'll use HISAT2:
-
-    aligner:
-      tag: 'gencode-v25'
-      index: 'hisat2'
-
-    # Top-level section for references:
-
-    references:
-
-      # Label for this organism or species:
-
-      human:
-
-        # "gencode-v25" is our tag to describe this particular FASTA and GTF
-        # we're preparing:
-
-        gencode-v25:
-
-          # This block will define how to get and postprocess a FASTA file:
-
-          fasta:
-
-            # URL to download:
-
-            url: 'ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_25/GRCh38.primary_organism.genome.fa.gz'
-
-            # We can optionally build indexes for various aligners:
-
-            indexes:
-              - 'hisat2'
-              - 'bowtie2'
-
-          # This next block will define how to get and postprocess a GTF file.
-          # The coordinates of the GTF file correspond to the
-          # coordinates in the fasta defined above, so we're putting it under
-          # the same tag. This is not required; we could also put it under
-          # separate tag (perhaps called "gencode-v25-annotations")
-
-          gtf:
-            url: 'ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_25/gencode.v25.annotation.gtf.gz'
-
-            # The GENCODE annotations include the dotted Ensembl versions in
-            # the gene IDs. The following function, strip_ensembl_version, is
-            # defined in lib/postprocess/hg38.py. It strips off those dotted
-            # versions so that our resulting GTF file used by the workflows
-            # will not contain them:
-
-            postprocess: 'lib.postprocess.hg38.strip_ensembl_version'
-
-            # Once well-formatted by the postprocessing function, we can now
-            # perform standard conversions on the GTF. These conversions are
-            # defined as rules in the references Snakefile, and will be run
-            # if the conversion is specified here. Here we ask to get a refFlat
-            # file, which can be provided to Picard's collectRnaSeqMetrics tool:
-
-            conversions:
-              - 'refflat'
-
-
-        # Here is another tag, to create a FASTA file for ribosomal RNA. It can
-        # then be used for fastq_screen, or for the rRNA screening portion of the
-        # RNA-seq workflow:
-
-        rRNA:
-          fasta:
-
-            # The SILVA database has separate files for large and small subunit
-            # sequences. We'd like them all; by providing multiple URLs they will
-            # be concatenated:
-
-            url:
-              - 'https://www.arb-silva.de/fileadmin/silva_databases/release_128/Exports/SILVA_128_LSURef_tax_silva_trunc.fasta.gz'
-              - 'https://www.arb-silva.de/fileadmin/silva_databases/release_128/Exports/SILVA_128_SSURef_Nr99_tax_silva_trunc.fasta.gz'
-
-            # However, the downloaded files contain many species. Here we only
-            # care about human. We already have a function, "filter_fastas()", in
-            # lib/common.py that accepts a FASTA and only keeps the records that
-            # contain the provided first argument.
-
-            # We specify that first argument here, and it will be passed to that
-            # function, resulting in a final FASTA file that only contains the
-            # rRNA sequence for Homo sapiens:
-
-            postprocess:
-                function: 'lib.common.filter_fastas'
-                args: 'Homo sapiens'
-
-            # We only need a bowtie2 index out of it.
-            indexes:
-                - 'bowtie2'
-
-Without all those comments, it looks like this:
-
-.. code-block:: yaml
-
-    references_dir: 'data/references'
-    organism: 'human'
-    aligner:
-      tag: 'gencode-v25'
-      index: 'hisat2'
-    references:
-      human:
-        gencode-v25:
-          fasta:
-            url: 'ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_25/GRCh38.primary_organism.genome.fa.gz'
-            indexes:
-              - 'hisat2'
-              - 'bowtie2'
-          gtf:
-            url: 'ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_25/gencode.v25.annotation.gtf.gz'
-            postprocess: 'lib.postprocess.hg38.strip_ensembl_version'
-            conversions:
-              - 'refflat'
-        rRNA:
-          fasta:
-            url:
-              - 'https://www.arb-silva.de/fileadmin/silva_databases/release_128/Exports/SILVA_128_LSURef_tax_silva_trunc.fasta.gz'
-              - 'https://www.arb-silva.de/fileadmin/silva_databases/release_128/Exports/SILVA_128_SSURef_Nr99_tax_silva_trunc.fasta.gz'
-            postprocess:
-                function: 'lib.common.filter_fastas'
-                args: 'Homo sapiens'
-            indexes:
-                - 'bowtie2'
-
-
-The above file will result in the following directory structure::
-
-    data/references/human/gencode-v25/fasta
-    data/references/human/gencode-v25/bowtie2
-    data/references/human/gencode-v25/hisat2
-    data/references/human/gencode-v25/gtf
-    data/references/human/gencode-v25-transcriptome/fasta
-    data/references/human/gencode-v25-transcriptome/salmon
-    data/references/human/rRNA/fasta
-    data/references/human/rRNA/bowtie2
-
-Each block in the YAML file describes either a `fasta` or `gtf` file. Each
-block has at least the organism, type, and a URL.  A block can optionally have
-a `postprocess`, which is an arbitrary function (described below) that converts
-the downloaded URL to something that conforms to the standards of the workflow
-(also described below). By supplying a tag, we can differentiate between
-different versions (e.g., FlyBase r6.04 vs r6.11; hg19 vs hg38) or different
-kinds of postprocessing (e.g, "chr" preprended to chrom names or not;
-comprehensive annotation vs only coding genes).
-
-`fasta` blocks can have an optional  `indexes` entry which will build the
-specified indexes. `gtf` blocks can have an optional `conversions` entry which
-will perform the specified conversion. Available indexes and conversions are
-described below.
-
-
-Post processing
----------------
-
-**All files created by a block are required to be gzipped.**
-
-This means that if a URL points to an uncompressed GTF file, a post-processing
-function must gzip it. It also means that any post-processing functions must
-write gzipped output files.
-
-Other than that, it's up to the user to decide what transformations (if any)
-are required. Examples might include:
-
-- exluding particular contigs
-- removing or editing problematic genes that have transcripts on both strands
-  -- mod(mdg4) I'm looking at you
-- renaming chromosomes (e.g., prepend "chr")
-- remove unnecessary annotations (e.g., keep only cds/exon/transcript/gene features)
-
-In the example config above, the yeast genome is available as a tarball of
-separate fasta files, but we'd like to get it into a single fasta file for
-downstream tools to work with.
-
-The configuration block can define an optional `postprocess` string which
-contains a dotted name referring to Python function that is importable by the
-`reference.snakefile` workflow. By default, the workflow will find modules in
-in ``lib.postprocess`` directory, so it's most convenient and organized to put
-your functions within modules in that directory.
-
-For example, above we used the postprocess function
-``lib.postprocess.sacCer3.fasta_lib.postprocess``, and you can view this
-function in ``lib/postprocess/sacCer3.py``.
-
-Please see :func:`lib.common.download_and_postprocess` for more details, and
-the files in the ``lib/postproces`` directory for inspiration.
-
-These two arguments are automatically provided by the references workflow --
-you don't have to know or care exactly what the filenames are, just what has to
-be done to their contents.
-
-See the files in ``lib/postprocess`` for inspiration if you need to write your
-own post-processing functions.
-
-The job of a postprocessing function is to ensure that the
-fastq/gtf/transcriptome fasta meets the requirements described above and is
-ready for any intended downstream tasks. For example if we download the fasta
-file from FlyBase for dm6 but want "chr" prepended to chromosome names, we can
-create a function in the file ``dm6.py`` called ``add_chr`` that does
-this:
-
-.. code-block:: python
-
-    # This is dm6.py
-
-    from snakemake.shell import shell  # a very convenient function
-
-    def add_chr(origfn, newfn):
-        shell(
-            'zcat {origfn} '       # input is always gzipped
-            '| sed "s/>/>chr/g" '  # add chr to names
-            '| gzip -c > {newfn} ' # re-zip
-            '&& rm {origfn}'       # clean up
-        )
-
-We specify this function to be called in the fasta config block like this (note
-that the module doesn't have to be the same name as the organism, but it is
-here for clarity):
-
-.. code-block:: yaml
-
-    dm6:
-      fasta:
-        url: ...
-        postprocess: "dm6.add_chr"
-
-This expects a file ``dm6.py`` in the same directory as the
-`references.snakefile` workflow, and expects a function ``add_chr`` to
-be defined in that module.
-
-Any downstream rules that operate on the genome FASTA file (like hisat2 index,
-bowtie2 index, etc) will now use this fixed version with "chr" prepended to
-chromosome names.  In this way, we can apply arbitrary code to modify
-references to get them into a uniform format.
-
-
-.. _advanced-postprocessing:
-
-More advanced postprocessing
-----------------------------
-
-If a post-processing function has a keyword argument with starts and ends with
-a double underscore (``__``), the config system will assume this is a string
-that should be interpreted as a dotted function name and the actual function
-will be resolved and passed to the post-processing function.
-
-This is useful for example when attaching ERCC spike-ins to a reference file
-that in turn needs to be modified. For example, the `S. pombe` reference
-annotations are available as a GFF file, but this needs to be converted to
-a GTF file. After that, the ERCC spike-in GTF annotations need to be added to
-the newly-created GTF.
-
-The functions in ``lib/postprocess/ercc.py`` support such a use-case. The
-config looks like this:
-
-.. code-block:: yaml
-
-    genome:
-      url:
-        # S. pombe fasta
-        - 'ftp://ftp.ensemblgenomes.org/pub/fungi/release-41/fasta/schizosaccharomyces_pombe/dna/Schizosaccharomyces_pombe.ASM294v2.dna_sm.toplevel.fa.gz'
-        # ERCC fasta
-        - 'https://www-s.nist.gov/srmors/certificates/documents/SRM2374_Sequence_v1.FASTA'
-      postprocess:
-        function: "lib.postprocess.ercc.add_fasta_to_genome"
-
-    annotation:
-      url:
-        # S. pombe GFF, which needs to be converted to GTF
-        - 'ftp://ftp.ensemblgenomes.org/pub/fungi/release-41/gff3/schizosaccharomyces_pombe/Schizosaccharomyces_pombe.ASM294v2.41.gff3.gz'
-
-        # ERCC GTF is not available; conversion function needed to convert
-        # fasta to GTF
-        - 'https://www-s.nist.gov/srmors/certificates/documents/SRM2374_Sequence_v1.FASTA'
-
-      postprocess:
-        function: "lib.postprocess.ercc.add_gtf_to_genome"
-        kwargs:
-          # As per the docs for add_gtf_to_genome, this function will be
-          # applied to all but the last input file. It is specified as a string
-          # here, but the config-processing system will resolve this to the
-          # actual function and pass that along to add_gtf_to_genome
-          __preprocess__: "lib.common.gff2gtf"
-
-.. versionadded:: 1.7
-    Ability to use special ``__``-prefixed variables that are interpreted as
-    dotted-path functions to import.
-
-Locations of downloaded-and-post-processed FASTA and GTF files
---------------------------------------------------------------
-Generally speaking, the fasta and gtf files will be in::
-
-    {references_dir}/{organism}/{tag}/fasta/{organism}_{tag}.fasta
-    {references_dir}/{organism}/{tag}/gtf/{organism}_{tag}.gtf
-
-If a config file looks like this (simplified here for clarity):
-
-.. code-block:: yaml
-
-  references_dir: refs
-  references:
-    human:
-      hg38:
-        fasta: ...
-        gtf: ...
-
-Then the following files will be created::
-
-    refs/human/hg38/fasta/human_hg38.fasta
-    refs/human/hg38/gtf/human_hg38.gtf
-
-
-If you are running the references workflow directly, or it is included in
-another workflow that requests a chromsizes file, the following will also be
-created::
-
-    refs/human/hg38/fasta/human_hg38.chromsizes
-
-.. note::
-
-  URLs are expected to be gzipped and any postprocessing functions are
-  expected to output gzipped files. This is because it is most common for
-  providers to offer gzipped reference files, and therefore minimizes the
-  effort required to prepare fasta and gtf files.  However, not all downstream
-  tools handle gzipped input. The references workflow therefore stores only the
-  uncompressed versions. We consider the resulting configuration simplicity to
-  be worth the additional space and time cost.
-
-
-Available indexes and conversions
----------------------------------
-The following indexes can be currently be specified for fasta files:
-
-hisat2
-^^^^^^
-
-    .. code-block:: yaml
-
-        indexes:
-          - hisat2
-
-    Output files::
-
-      {references_dir}/{organism}/{tag}/hisat2/{organism}_{tag}.*.ht2
-
-bowtie2
-^^^^^^^
-
-    .. code-block:: yaml
-
-        indexes:
-          - bowtie2
-
-    Output files::
-
-      {references_dir}/{organism}/{tag}/bowtie2/{organism}_{tag}.*.bt2
-
-salmon
-^^^^^^
-
-    .. code-block:: yaml
-
-        indexes:
-          - salmon
-
-    Output files::
-
-      {references_dir}/{organism}/{tag}/salmon/{organism}_{tag}/*
-
-The following conversions can be specified for GTF files:
-
-refflat
-^^^^^^^
-
-    .. code-block:: yaml
-
-        conversions:
-          - refflat
-
-    Converts GTF to refFlat format. See the ``conversion_refflat`` rule in
-    ``workflows/references/Snakefile``.
-
-    Output file::
-
-      {references_dir}/{organism}/{tag}/gtf/{organism}_{tag}.refflat
-
-bed12
-^^^^^
-
-    .. code-block:: yaml
-
-        conversions:
-           - bed12
-
-   Converts GTF to BED12 format. See the ``conversion_bed12`` rule in
-   ``workflows/references/Snakefile``.
-
-   Output file::
-
-      {references_dir}/{organism}/{tag}/gtf/{organism}_{tag}.refflat
-
-gffutils
-^^^^^^^^
-    Converts GTF to gffutils database (typically used for downstream work). You
-    can specify arbitrary kwargs to ``gffutils.create_db`` by including them as
-    keys. For example, if the GTF file already contains features for genes and
-    transcripts:
-
-    .. code-block:: yaml
-
-        conversions:
-          - gffutils:
-              disable_infer_genes: True
-              disable_infer_transcripts: True
-
-
-    Output file::
-
-        {references_dir}/{organism}/{tag}/gtf/{organism}_{tag}.gtf.db
-
-genelist
-^^^^^^^^
-    Reads the postprocessed GTF file, and extracts the set of gene IDs found,
-    one ID per line. The GTF attribute to use is configured by the
-    ``gene_id:`` key, for example, if the file contains gene IDs in the
-    ``Name`` attribute of each line, use the following:
-
-    .. code-block:: yaml
-
-        conversions:
-          - genelist:
-              gene_id: 'Name'
-
-    Output file::
-
-      {references_dir}/{organism}/{tag}/gtf/{organism}_{tag}.genelist
-
-mappings
-^^^^^^^^
-    Reads the postprocesses GTF file, and outputs mappings between attributes
-    as a gzipped TSV.
-
-    You can include/exclude featuretypes from being checked.  For example, if
-    your GTF has genes and transcripts in addition to exons, the gene and
-    transcript lines probably contain all of the attributes you are interested
-    in (like gene_id, symbol, name, etc) and the exon (and any other lines) can
-    be ignored, speeding up the process. In this case you could use
-    ``include_featuretypes: [gene, transcript]``.
-
-    A ``__featuretype__`` column is always included in the mapping.  This is
-    the GTF featuretype of each line, with extra ``__`` to avoid overwriting an
-    attribute that may happen to be called ``featuretype``.
-
-    .. code-block:: yaml
-
-        conversions:
-          - mappings
-
-    .. code-block:: yaml
-
-        conversions:
-          - mappings:
-              include_featuretypes: [gene, transcript]
-
-    Output file::
-
-      {references_dir}/{organism}/{tag}/gtf/{organism}_{tag}.mapping.tsv.gz
diff --git a/docs/references.png b/docs/references.png
deleted file mode 100644
index df35437e5..000000000
Binary files a/docs/references.png and /dev/null differ
diff --git a/docs/references.rst b/docs/references.rst
deleted file mode 100644
index 068a894e9..000000000
--- a/docs/references.rst
+++ /dev/null
@@ -1,82 +0,0 @@
-.. _references:
-
-References workflow
-===================
-
-This workflow is not normally run on its own. Rather, it is intended to be
-`include:`-ed into other workflows that depend on reference fastas, indexes,
-and annotations. That way, rules in this references workflow will only be run
-for those files asked for in the parent workflow.
-
-It is still possible to run this workflow on its own. In that case, it will
-build **all** of the references and indexes specified in the config. This can
-be helpful when setting up the workflows for the first time on a new machine.
-
-In all cases, it depends on the `references` section being in
-``config/config.yaml``. See :ref:`references-config` for details on
-configuring.
-
-.. image:: references.png
-
-A dictionary of references
---------------------------
-When run on its own, the references workflow in ``workflows/references/Snakefile`` 
-builds all references specified in the config file. This is typically done
-only when initially setting up a system that will run workflows on many
-different references.
-
-Most of the time, this workflow is included into the other workflows with the
-``include:`` directive. This way, any reference files that are needed by, say,
-the RNA-seq workflow will be created automatically.
-
-The format of the config YAML is designed to be convenient to edit and
-maintain. It can be awkward to use within a Snakefile though, so for
-convenience it is converted into an easier-to-access dictionary in
-the ``c`` config object in each workflow, accessible as ``c.refdict``.
-
-If we have the following `references` section defined in our config file (see
-:ref:`config` for more):
-
-.. code-block:: yaml
-
-    references:
-      dm6:
-        r6-11:
-          fasta:
-            url: "https://url/to/dm6.fasta"
-            indexes:
-              - bowtie2
-              - hisat2
-          gtf:
-            url: "https://url/to/gm6.gtf"
-            conversions:
-              - refflat
-        r6-11_transcriptome:
-          fasta:
-            url: "https://url/to/transcriptome.fa"
-            indexes:
-              - salmon
-
-then it will be converted to this simplified version where values are
-filenames:
-
-.. code-block:: python
-
-    {
-      'dm6': {
-         'r6-11': {
-             'fasta': '/data/dm6/r6-11/fasta/dm6_r6-11.fasta',
-             'refflat': '/data/dm6/r6-11/gtf/dm6_r6-11.refflat',
-             'gtf': '/data/dm6/r6-11/gtf/dm6_r6-11.gtf',
-             'chromsizes': '/data/dm6/r6-11/fasta/dm6_r6-11.chromsizes',
-             'bowtie2': '/data/dm6/r6-11/bowtie2/dm6_r6-11.1.bt2',
-             'hisat2': '/data/dm6/r6-11/hisat2/dm6_r6-11.1.ht2',
-             },
-         'r6-11_transcriptome': {
-             'fasta': '/data/dm6/r6-11_transcriptome/fasta/dm6_r6-11_transcriptome.fasta',
-             'chromsizes': '/data/dm6/r6-11_transcriptome/fasta/dm6_r6-11_transcriptome.chromsizes',
-             'salmon': '/data/dm6/r6-11_transcriptome/salmon/dm6_r6-11_transcriptome/hash.bin,
-             },
-        },
-    }
-
diff --git a/docs/rnaseq-rmd.rst b/docs/rnaseq-rmd.rst
deleted file mode 100644
index e28411ed2..000000000
--- a/docs/rnaseq-rmd.rst
+++ /dev/null
@@ -1,587 +0,0 @@
-.. _downstream-detailed:
-
-Detailed documentation of RNA-Seq downstream
-============================================
-
-Here we describe in detail the downstream analysis of RNA-Seq data performed in :file:`workflows/rnaseq/downstream/rnaseq.Rmd`. 
-
-This page has one section per named chunk. For example, if :file:`rnaseq.Rmd`
-has the following code:
-
-.. code-block:: r
-
-    ```{r load_libraries}
-    library(DESeq2)
-    library(dplyr)
-    ```
-
-Then you can find the corresponding documentation on this page under the
-``load_libraries`` heading.
-
-The file :file:`ci/ensure_docs.py` double-checks to make sure all chunks are
-documented and all documentation corresponds to a chunk, as part of the testing
-framework.
-
-.. _rnaseqrmd:
-
-
-``global_options``
-------------------
-This chunk sets global rmarkdown options. Some of the lines provide
-a mechanism for all cached chunks to have their cache invalidated if either of
-the filenames' modification times have changed. For example, the following
-argument to ``knitr::opts_chunk$set`` will inject the option
-``cache.extra_file_dep_1`` into all chunks:
-
-.. code-block:: r
-
-    cache.extra_file_dep_1=file.info('../config/sampletable.tsv')$mtime,
-
-Therefore if the sample table of the RNA-Seq workflow has been updated since
-the last time the analysis script was run, the modification time (mtime) will
-be changed, so the ``cache.extra_file_dep_1`` value will be different than it
-was before for every chunk, and so every chunk will be re-run.
-
-``lcdbwf``
-----------
-
-Loads the ``lcdbwf`` R package, stored in ``../../../lib/lcdbwf/R``. This chunk
-is not cached and fully reloads the package each time using
-``devtools::load_all``, so any changes to the code in that package will show up
-when this file is rendered. Documentation is also automatically re-generated.
-
-Note that throughout this RMarkdown file, functions from this package will use
-the ``lcdbwf::`` namespace prefix to be explicit about where that function is
-coming from.
-
-``config``
-----------
-
-This chunk loads the config files :file:`config.yaml` and :file:`text.yaml`.
-This chunk is not cached, so any changes in the config files automatically show
-up here.
-
-See the :file:`config.yaml` file for configuring the code.
-
-See the :file:`text.yaml` file for editing the explanatory text.
-
-This chunk also configures the parallelization options in a chunk that is not
-cached.
-
-``libraries``
--------------
-
-Standard loading of the library dependencies used throughout the code.
-
-``coldata_setup``
------------------
-This chunk loads the sample table.
-
-Use this chunk to add additional columns to your sampletable.
-
-The only requirement is that the rownames correspond to sample names.
-
-``dds_initial``
----------------
-
-Constructs the initial dds object and the variance stabilized counts.
-
-Note that the design used is ``~1`` and the call to
-``varianceStabilizingTransformation`` uses ``blind=TRUE``, so you don't need to
-change anything here.
-
-Also note that the entire ``config`` object is passed to the ``make_dds``
-function, which reads options like whether to strip version numbers off of gene
-IDs or whether (and how) to collapse technical replicates.
-
-``print_coldata``
------------------
-
-Simply prints the colData for reference -- excluding columns that might be in
-there that would clutter the output.
-
-``sample_heatmap``
-------------------
-
-This chunk creates a clustered heatmap of sample distances. The columns
-specified in the config file's "covariates_for_plots" item will show up as
-colors along the right side.
-
-
-``pca``
--------
-
-Creates PCA plots, one tab per entry in the config file's
-"covariates_for_plots" item. Each plot has the same points, but the colors
-differ. This can help assess the experimental design and set expectations on
-how many differentially expressed genes one may find.
-
-These are interactive plots, and hovering over a point indicates the sample.
-
-
-``sizefactors``
----------------
-
-This chunk makes diagnostic plots. In general, we expect sizeFactors to
-correlate with total read count. When it doesn't, it can indicate that a small
-number of genes are very highly expressed.
-
-
-.. _dds_list:
-
-``dds_list``
-------------
-
-This chunk sets up the :term:`dds` objects to be used in the `results` section
-below for differential expression detection.
-
-You may need different ``dds`` objects for testing different models, or perhaps
-removing outlier samples. If you have technical replicates you might need to
-combine them, and you might need to remove gene version identifiers. You might
-want to use salmon instead of featureCounts. These would need to be done for
-each ``dds``, requiring code duplication.
-
-After working on many complex and/or messy experimental designs, we have
-settled on the approach of a named list of ``dds`` objects, where later code
-refers to these objects by their name in the list.
-
-**The** ``results`` **chunk below expects such a list.**
-
-The simplest example is the following where we create a single ``dds`` and put
-it into a list.
-
-.. code-block:: r
-
-   dds <- DESeqFromCombinedFeatureCounts(
-      '../data/rnaseq_aggregation/featurecounts.txt',
-      sampletable=colData,
-      design=~group)
-   dds <- DESeq(dds, parallel=parallel)
-
-   dds.list <- list(main=dds)
-
-Now imagine a case where we want to remove a replicate that we think is an
-outlier, but we still want to compare it to the results when it is included.
-Let's say we also need to collapse the technical replicates. Such code would
-look like this:
-
-.. code-block:: r
-
-   # The long way...
-   #
-   # First object with all replicates
-   dds1 <- DESeqFromCombinedFeatureCounts(
-      '../data/rnaseq_aggregation/featurecounts.txt',
-      sampletable=colData,
-      design=~group)
-   dds1 <- collapseReplicates(dds1, 'biorep')
-   dds1 <- DESeq(dds1, parallel=parallel)
-
-   # Similar to above, but remove replicate 4
-   dds2 <- DESeqFromCombinedFeatureCounts(
-      '../data/rnaseq_aggregation/featurecounts.txt',
-      sampletable=colData %>% filter(replicate!='rep4'),
-      design=~group,
-      # need subset_counts=TRUE if we want to automatically
-      # subset the featureCounts to match the filtered colData
-      # we provided.
-      subset_counts=TRUE
-      )
-   dds2 <- collapseReplicates(dds, 'biorep')
-   dds2 <- DESeq(dds2, parallel=parallel)
-
-Based on our experience, as we add more ``dds`` objects the code gets more
-error-prone. So for more complex use-cases, we have a function
-``lcdbwf::make_dds``.
-
-Here is how the code above would look using this method:
-
-.. code-block:: r
-
-   lst <- list(
-
-      main=list(sampletable=colData, design=~group),
-
-      no.rep.4=list(
-         sampletable=colData %>% filter(replicate!='rep4'),
-         design=~group,
-         subset.counts=TRUE))
-   )
-
-   dds_list <- map(lst, lcdbwf::make_dds, config=config, parallel=config$parallel$parallel)
-
-That is, first we create a list of lists (``lst``), and then we used ``map()`` to apply
-the ``make_dds`` function to all items in the list. The collapsing of
-replicates and other dds-creation configuration like stripping dotted version
-names is determined by the config object which is passed along.
-
-See the help for ``lcdbwf::make_dds`` for more details.
-
-This chunk becomes a dependency of all of the ``results`` chunks below.
-
-``dds_diagnostics``
--------------------
-
-If configured, this chunk will run the diagnostics on the dds objects and show
-tabbed reports on each dds.
-
-``results_*``
--------------
-
-.. note::
-
-  This is where most of the customization needs to happen for each project.
-
-This is actually a series of chunks where the bulk of the differential
-expression analysis takes place.
-
-For simple cases, you probably just need one of these. But for complex
-experimental designs where you end up doing lots of contrasts, it can get time
-consuming to run them every time you change the RMarkdown file.
-
-The end result of these chunks is a single list containing DESeq2 results
-objects and associated metadata in (sub)lists. Each of these sublists has:
-
-
-- ``res``, the results object
-- ``dds``, the string name in ``names(dds.list)``
-- ``label``, a "nice" label which is used for headings and other output
-- additional optional arguments that are passed along to ``DESeq2::results()``
-  and/or ``DESeq2::lfcshrink()``
-
-To continue our example from above, we might want to run the same contrast on
-all samples (the "main" dds) and after removing replicate 4 (the "no.rep.4"
-dds). To illustrate how additional arguments are used, let's imagine we also
-want to use `ashr` as the shrinkage method for the second contrast.
-
-Use the ``lcdbwf::make_results()`` function for this. This function is
-a loose wrapper around ``DESeq2::results()`` and ``DESeq2::lfcshrink()`` that
-adds some extra convenience when working with lists of dds objects, including
-the detection of parallelization as set up in the config object. See the help
-for ``lcdbwf::make_results()`` for more details.
-
-By default, if no test argument is specified in the parameters for
-``lcdbwf::make_dds`` found here in `examples 1-4, <https://github.com/lcdb/lcdb-wf/blob/LRT/workflows/rnaseq/downstream/rnaseq.Rmd#L164-L187>`_
-the Wald test is performed. When ``lcdbwf::make_results`` processes a Wald test dds object, it
-detects the Wald test and expects a ``contrast`` or ``coef`` argument to specify which
-p-values and log2FoldChange values to report.
-
-DESeq2 also supports the nBinomLRT (LRT). `Example 5 <https://github.com/lcdb/lcdb-wf/blob/LRT/workflows/rnaseq/downstream/rnaseq.Rmd#L189-L194>`_
-demonstrates how to create a dds object with LRT data. Since the LRT tests
-the removal of one or more terms from the design formula, a single
-log2FoldChange column doesn't reflect the test's complexity. DESeq2's results
-object is optimized for the Wald test, and when storing LRT results, it
-maintains consistency in datastructure by choosing a single pair-wise comparison for
-log2FoldChange values. To avoid confusion, ***we set all log2FoldChange values to
-0 for LRT results***.
-
-For more details, see the DESeq2 documentation: 
-`DESeq2 Likelihood Ratio Test <https://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#i-ran-a-likelihood-ratio-test-but-results-only-gives-me-one-comparison>`_.
-
-.. _rules:
-
-To take advantage of this infrastructure, we put each of those contrasts into
-its own chunk **according to the following rules**:
-
-- the chunk name must start with ``results_``
-- the chunk is cached
-- the chunk depends on the ``'dds_list'`` chunk
-- the variable name starts with ``contr_[index]_``, and the rest of the variable name
-  will be used as the name in the list. index is a string, containing 1 or more alphanumeric
-  characters, and will be used as a sorting index for contrasts when generating output files.
-  Note that the index string cannot contain "_".
-
-Our example would look like the following. Note that we're showing the chunks
-here because that will be come meaningful in a moment. They are shown as
-comments here just to get the syntax highlighting to look OK.
-
-.. code-block:: r
-
-    # ```{r results_01, dependson='dds_list', cache=TRUE}
-    contr_1_ko.vs.wt <- lcdbwf::make_results(
-      dds_name='main',
-      label='Using all samples',
-      contrast=c('genotype', 'KO', 'WT')
-    )
-    # ```
-
-    # ```{r results_02, dependson='dds_list', cache=TRUE}
-    contr_2a_no.rep.4 <- lcdbwf::make_results(
-      dds_name='no.rep.4',
-      label='Removing replicate 4 and using ashr for shrinkage',
-      contrast=c('genotype', 'KO', 'WT'),
-      type='ashr'
-    )
-    # ```
-
-
-When combined with the ``assemble_variables`` chunk described below, this
-allows us to:
-
-- retain caching at the level of individual contrasts
-- combine all results into a single list used in later code while still respecting dependencies
-- reduce the bookkeeping overhead
-
-In our experience this scales well with very complex experimental designs with
-lots of contrasts. For more information on creating complex contrasts, see
-:ref:`contrast`. For more information on how these results are collected, see :ref:`assemble_variables`.
-
-
-.. _assemble_variables:
-
-``assemble_variables``
-----------------------
-
-If we had put all ``results()`` calls into the same chunk and cached that, then
-a change anywhere in that chunk would invalidate the cache which would cause
-all results to be regenerated. With many contrasts, this can get quite
-time-consuming. An alternative would be to put each ``results()`` call into its
-own chunk. But then we would need to keep track of dependencies and ensure
-those dependencies were specified in downstream chunks.
-
-For example, if you add a new results chunk and cache it, but forget to add
-that chunk as a dependency in a later chunk, that later chunk will be
-inconsistent and may even be missing the new results. Keeping track of this can
-be error-prone.
-
-Our solution is to set up the contrasts according to the :ref:`rules described
-above <rules>`. By following those rules, the following becomes possible:
-
-- we can detect all chunks creating results by looking for ``results_`` in the
-  chunk name and automatically inject these into dependencies of future chunks.
-- we can detect all results objects created by looking for variables starting
-  with ``contr_[index]_``
-- we can assemble all results objects into a list, and name each item in the
-  list according to its variable name (minus the ``contr_[index]_``).
-- we can alter the order of contrasts by simply modifying the index string in a single chunk.
-  For example, if we have three contrasts contr_1_ko.vs.wt, contr_2_no.rep.4, and contr_3_no.rep.3, 
-  we can change the order of contrasts simply by modifying one index string (ex: change contr_3_no.rep.3 to
-  contr_1a_no.rep.3).
-
-The ``assemble_variables`` chunk does all of this. The end result of this chunk
-is a list of lists that is used by functions in the `lcdbwf` R package for
-downstream work. For more details, see :term:`res_list`.
-
-For each contrast (that is, each entry in `res_list`) the below chunks will
-automatically create a DE results section including:
-
-- a tabbed section using the label as a header
-- summary table
-- MA plot
-- counts plots of top 3 up- and down-regulated genes
-- p-value distribution
-- exported results tables with links
-
-.. _contrast:
-
-Specifying contrasts
-^^^^^^^^^^^^^^^^^^^^
-
-Contrasts can be specified in three different ways.
-
-.. note::
-
-   In these examples, "control" and "treatment" are factor levels in the
-   "group" factor (which was in the :term:`colData`), and the :term:`dds`
-   object was created with the design ``~group``:
-
-1. A character vector to the `contrast` parameter.
-
-   This should be a three element vector:
-
-   - the name of a factor in the design formula
-   - name of the numerator for the fold change
-   - the name of the denominator for the fold change. E.g.,
-
-   .. code-block:: r
-
-      res <- results(dds, contrast=c('group', 'treatment', 'control')
-
-   That is, **the control must be last**.
-
-2. `name` parameter for ``results()`` function call or `coef` parameter for
-   ``lfcShrink()`` call
-
-   `name` or `coef` should be one of the values returned by
-   ``resultsNames(dds)`` that corresponds to the precomputed results. E.g.
-
-   .. code-block:: r
-
-      resultsNames(dds)
-      # [1] "Intercept"  "group_treatment_vs_control"
-
-      res <- results(dds, name='group_treatment_vs_control')
-
-3. A numeric contrast vector with one element for each element in the
-   ``resultsNames()`` function call. This is useful for arbitrary comparisons
-   in multi-factor designs with a grouping variable.
-
-   .. code-block:: r
-
-      resultsNames(dds)
-      # [1] "Intercept"  "group_treatment_vs_control"
-
-      res <- results(dds, contrast=c(0, 1))
-
-
-The most general way to specify contrasts
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-The most general way to specify contrasts is with a numeric vector (third
-option above).
-
-Here is a worked example, using a two-factor experiment.
-
-`group` encodes all combinations of a two-factor experiment, so we construct
-a sampletable that looks like the following (here, showing 2 replicates per
-group):
-
-.. code-block::
-
-   sample   genotype   condition   group
-   1        A          I           IA
-   2        A          I           IA
-   3        B          I           IB
-   4        B          I           IB
-   5        A          II          IIA
-   6        A          II          IIA
-   7        B          II          IIB
-   8        B          II          IIB
-
-
-
-We can make arbitrary comparisons by fitting an 'intercept-less' model, e.g.
-``design=~group + 0``, and numeric contrast vectors:
-
-.. code-block:: r
-
-   dds <- DESeqDataSetFromCombinedFeatureCounts(
-       '../data/rnaseq_aggregation/featurecounts.txt',
-       sampletable=colData,
-       # NOTE: the design is now different
-       design=~group + 0
-   )
-   dds <- DESeq(dds)
-
-Check ``resultsNames``:
-
-.. code-block::
-
-   resultsNames(dds)
-   # [1] "groupIA"  "groupIB"  "groupIIA"  "groupIIB"
-
-So any numeric vectors we provide must be 4 items long. Here is how we can make
-various contrasts with this experimental design. In each example, the
-coefficients are indicated above the resultsNames to make it easier to see.
-
-To compare IA and IB (that is, the genotype effect only in condition I):
-
-.. code-block:: r
-
-   #     1          -1         0           0
-   # "groupIA"  "groupIB"  "groupIIA"  "groupIIB"
-
-   res <- results(dds, contrast=c(1, -1, 0, 0)
-
-
-Effect of genotype B (that is, disregard information about condition):
-
-.. code-block:: r
-
-   #     1          -1         1           -1
-   # "groupIA"  "groupIB"  "groupIIA"  "groupIIB"
-
-   res <- results(dds, contrast=c(1, -1, 1, -1)
-
-
-Effect of condition II (that is, disregard information about genotype):
-
-.. code-block:: r
-
-   #     1           1         -1          -1
-   # "groupIA"  "groupIB"  "groupIIA"  "groupIIB"
-
-   res <- results(dds, contrast=c(1, 1, -1, -1)
-
-
-
-Interaction term, that is, (IA vs IB) vs (IIA vs IIB). This is effectively ``(IA
-- IB) - (IIA - IIB)``, which in turn becomes ``IA - IB - IIA + IIB``:
-
-.. code-block:: r
-
-   #     1          -1         -1          1
-   # "groupIA"  "groupIB"  "groupIIA"  "groupIIB"
-
-   res <- results(dds, contrast=c(1, -1, -1, 1)
-
-
-``summary``
------------
-
-This chunk prints a high-level overview of all the contrasts.
-
-
-``reportresults``
------------------
-
-This is the section that creates multiple, tabbed outputs for each of the
-contrasts in the :term:`res_list`.
-
-If the config specifies results diagnostics
-(``config$toggle$results_diagnostics`` is TRUE), then this chunk will also run
-the diagnostics. You can select just the ones you want diagnostics on using the
-``config$plotting$diagnostics_results_names`` config option.
-
-``upsetplots``
---------------
-
-This chunk produces UpSet plots comparing the contrasts.
-
-``excel``
----------
-
-This chunk outputs an Excel spreadsheet with one contrast per sheet. Normalized
-counts for each sample, from the respective dds object used for the contrast,
-are also included on each sheet.
-
-``write_output``
-----------------
-
-TSVs for each contrast's results are written to disk.
-
-``combined_rds``
-----------------
-
-A single object is written as an .Rds file. This can then be used for
-downstream visualization or it can be used as input to the functional
-enrichment RMarkdown document.
-
-``sessioninfo``
----------------
-
-The output of sessionInfo records the versions of packages used in the analysis.
-
-Glossary
---------
-.. glossary::
-
-   colData
-      The metadata describing the samples. This is originally defined in the
-      sampletable for the entire lcdb-wf run, is imported into rnaseq.Rmd, and
-      may be subsequently modified.
-
-   dds
-      DESeq data set object. Typically this is incrementally added to, as in
-      the DESeq2 vignette.
-
-   vsd
-      The variance-stabilized transformed version of the counts. Used for PCA,
-      clustered heatmaps, and gene patterns.
-
-   res_list
-      A list, with one item per contrast. Each of those items in turn is a list
-      of objects that together compose the contrast (dds name, results object, and
-      label). This list-of-lists, which we call `res_list` for short, is used
-      by functions in the `lcdbwf` R package for more downstream work, like
-      gene patterns, functional enrichment, and Shiny apps.
diff --git a/docs/rnaseq.png b/docs/rnaseq.png
deleted file mode 100644
index fb37fb2ff..000000000
Binary files a/docs/rnaseq.png and /dev/null differ
diff --git a/docs/rnaseq.rst b/docs/rnaseq.rst
deleted file mode 100644
index 2e34c25bd..000000000
--- a/docs/rnaseq.rst
+++ /dev/null
@@ -1,53 +0,0 @@
-.. _rnaseq:
-
-RNA-seq workflow
-================
-
-This workflow is used for RNA-seq and RNA-seq-like analysis (like euRNA-seq, RIP-seq or small RNA-seq).
-
-This workflow can use references created by the references workflow with no
-need to run the references workflow separately. This workflow performs the
-following tasks:
-
-- Builds a HISAT2 index
-- Builds a salmon transcriptome index
-- Downloads a GTF annotation
-- Converts the GTF to refflat format
-- Trims reads with cutadapt
-- Aligns with HISAT2
-- Runs FastQC on raw, trimmed, and aligned reads
-- Aligns reads to rRNA using bowtie2 to evaluate rRNA contamination
-- Counts reads in genes with featureCounts
-- Runs dupRadar and preseq to assess library complexity
-- Checks for evidence of cross-contamination using fastq_screen on multiple
-  configured genomes
-- Assesses transcript coverage with Picard CollectRnaSeqMetrics
-- Builds bigWigs (optionally strand-specific) created from BAM files
-- Optionally merges bigWigs as defined by config
-- Aggregates QC results using MultiQC. Includes custom tables for library
-  sizes and rRNA contamination
-- Runs comprehensive downstream analysis including QC and differential expression
-  in R. See section below for more details.
-- Constructs and uploads a track hub of scaled coverage bigWigs for each
-  sample that can be viewed in UCSC Genome Browser
-
-The DAG of jobs looks like this:
-
-.. image:: rnaseq.png
-
-Downstream analysis
-~~~~~~~~~~~~~~~~~~~
-
-This is  performed in an RMarkdown file (``rnaseq.Rmd``) that uses DESeq2
-for differential expression analysis, along with diagnostic plots, 
-exported tables of differentially expressed genes for each comparison of 
-interest, gene patterns analysis for finding coexpressed genes and downstream
-functional enrichment analysis using clusterProfiler. This file is run and
-rendered into an output HTML file. See :ref:`downstream` for more details.
-
-.. toctree::
-   :maxdepth: 2
-
-   downstream-rnaseq
-
-
diff --git a/docs/sampletable.rst b/docs/sampletable.rst
deleted file mode 100644
index 4a56b511d..000000000
--- a/docs/sampletable.rst
+++ /dev/null
@@ -1,272 +0,0 @@
-.. _sampletable:
-
-Sample tables
-=============
-Sample tables map sample names to files on disk and provide additional
-metadata. It is expected to have a header and be tab-delimited. Empty lines and
-lines that start with a comment (``#``) are skipped.
-
-For running new experiments, you will need to write your own sample table. For
-running experiments uploaded to SRA (Sequence Read Archive), **you can use the SRA sample table
-as-is**, with the addition of a new column indicating what you would like to
-name each sample. This makes it almost trivial to run arbitrary SRA RNA-seq
-data sets! For ChIP-seq data from SRA, the additional columns `antibody`,
-`label`, and `biological_material` as described below will need to be added,
-but often that information is already in the SRA sampletable so the columns
-just need to be renamed.
-
-.. _rnaseq-sampletable:
-
-RNA-seq sample table
---------------------
-Here is an example minimal sample table for RNA-seq. It only contains sample
-IDs for four samples::
-
-    # Minimal RNA-seq sample table
-    sample
-    c1
-    c2
-    t1
-    t2
-
-In this minimal example, the original FASTQ files are expected to be at the
-locations ``data/rnaseq_samples/{sample}/{sample}_R1.fastq.gz``. That pattern
-is configured in the ``config/rnaseq_patterns.yaml`` file if you would like to
-change it (see :ref:`patterns-and-targets`). Specifically, the workflow will
-expect the following files to already exist (paths relative to the Snakefile)::
-
-    # The above sample table expects these files to exist:
-    data/rnaseq_samples/c1/c1_R1.fastq.gz
-    data/rnaseq_samples/c2/c2_R1.fastq.gz
-    data/rnaseq_samples/t1/t1_R1.fastq.gz
-    data/rnaseq_samples/t2/t2_R1.fastq.gz
-
-.. _symlinks:
-
-Symlinking FASTQs
-~~~~~~~~~~~~~~~~~
-
-To avoid having to copy or symlink files over into the expected directory
-structure, we can instead list the original filenames in a column called
-``orig_filename`` and they will be automatically symlinked into
-``data/rnaseq_samples/{sample}/{sample}_R1.fastq.gz``. That is, the following
-sampletable::
-
-    # Example RNA-seq sample table with original filenames are specified
-    sample   orig_filename
-    c1       /data/c1.fastq.gz
-    c2       /data/c2.fastq.gz
-    t1       /data/other/t1.fq.gz
-    t2       ../raw-data/t2.fq.gz
-
-Will result in the following symlinks::
-
-    data/rnaseq_samples/c1/c1_R1.fastq.gz -> /data/c1.fastq.gz
-    data/rnaseq_samples/c2/c2_R1.fastq.gz -> /data/c2.fastq.gz
-    data/rnaseq_samples/t1/t1_R1.fastq.gz -> /data/other/t1.fq.gz
-    data/rnaseq_samples/t2/t2_R1.fastq.gz -> ../raw-data/t2.fq.gz
-
-Note that `orig_filename` paths in the sampletable are considered *relative to
-the Snakefile*.
-
-Additional metadata
-~~~~~~~~~~~~~~~~~~~
-For RNA-seq, only the first column and optionally the `orig_filename` column
-are used directly by the RNA-seq workflow.
-
-However, the sampletable is imported into the ``downstream/rnaseq.Rmd`` file
-(see :ref:`downstream` for more info). It's often useful to include
-any metadata in the sampletable so it's all in one place, and you'll get all
-that information imported into R.
-
-For example, with this sample table we would be easily able to use a DESeq
-model of ``~condition`` since the condition column will be imported into R.
-
-::
-
-    # Example RNA-seq sampletable with "condition" metadata included
-    sample   orig_filename          condition
-    c1       /data/c1.fastq.gz      control
-    c2       /data/c2.fastq.gz      control
-    t1       /data/other/t1.fq.gz   treatment
-    t2       /data/other/t2.fq.gz   treatment
-
-Paired-end data
-~~~~~~~~~~~~~~~
-**Paired-end and single-end data may be mixed in the same sampletable.**
-A sample is specified as paired-end using a separate column in the sampletable.
-That column can either be named `layout` (easiest if you're writing your own
-sample table) or `LibraryLayout` (if you're using an SRA sampletable, in which
-case you can leave it as-is). An error will be raised if both columns are
-provided.
-
-If one of these columns exists, the values of the column are converted to
-lowercase. For each sample, if the value is either `pe` or `paired`, the sample
-will be considered paired-end. In all other cases the sample will be considered
-single-end.
-
-For paired-end samples that will be symlinked, both `orig_filename` and
-`orig_filename_R2` must be specified as paths relative to the Snakefile (see
-:ref:`symlinks` above). If there is a mix of SE and PE samples, the SE sample
-must have an empty entry for `orig_filename_R2` (in the context of the
-tab-delimited sampletable, this means two tab characters next to
-each other with nothing in between).
-
-.. note::
-
-  If the sample table contains both single- and paired-end samples, the
-  `fastq_dump` and `cutadapt` rules will create empty R2 files.
-
-  Once the BAM files are created (after alignment in a single- or paired-end
-  fashion as appropriate for the sample), we operate mostly on the BAM.
-
-  After the alignment stage, remaining rules **do not** differentiate between
-  single- and paired-end reads. In particular, featureCounts and bamCoverage
-  may need different parameters depending on the library layout.
-
-::
-
-    # Example RNA-seq sample table with original filenames are specified,
-    # and c1 is a paired-end sample
-    sample   orig_filename         orig_filename_R2      layout
-    c1       /data/c1_R1.fastq.gz  /data/c1_R2.fastq.gz  PE
-    c2       /data/c2.fastq.gz                           SE
-    t1       /data/other/t1.fq.gz                        SE
-    t2       /data/other/t2.fq.gz                        SE
-
-.. _chipseq-sampletable:
-
-ChIP-seq sample table
----------------------
-**Three additional columns are required** for ChIP-seq: ``antibody``,
-``biological_material`` and ``label``.
-
-
-``antibody``
-    Used for differentiating between input and IP samples. Input samples should
-    be listed with an antibody of exactly ``input``.
-
-``biological_material``
-    Ties together which samples came from the same chromatin. This is how we
-    know a particular input sample is the matched control for a particular IP
-    sample. This is primarily used in the `fingerprint` rule, where we collect
-    all the input BAMs together for performing QC. See the
-    `lib.chipseq.merged_input_for_ip` function for the technical details of how
-    this is handled.
-
-``label``
-    Used to tie together technical replicates, and **used to configure the
-    ChIP-seq peak-calling runs** (see :ref:`cfg-chipseq`).
-
-    Technical replicates share the same label. If you don't have technical
-    replicates, then this column can be a copy of the first column containing
-    sample names. Technical replicates will have their BAMs merged together
-    and duplicates removed from the merged BAM.
-
-The reason that the ChIP-seq sample table is more complicated than RNA-seq is
-because RNA-seq is often analyzed in R, and complicated sample handling (like
-summing technical replicates) can be performed very flexibly in R. In contrast,
-ChIP-seq peak-callers are command-line tools and frequently only take a single
-biological replicate, and so are run as Snakemake rules. As a result, more
-complex configuration is required to ensure complex experimental designs are
-handled correctly.
-
-
-Minimal ChIP-seq sample table, no replicates
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-A minimal ChIP-seq sample table, with no biological replicates, looks like
-this::
-
-    # Example minimal ChIP-seq sample table
-    sampleid    antibody   biological_material  label          orig_filename
-    ip1         gaf        s2cell-1             s2cell-gaf-1   /data/ip1.fastq.gz
-    input1      input      s2cell-1             s2cell-input-1 /data/input.fastq.gz
-
-- The input sample is required to have the antibody as "input"
-- For an IP, its matched input is the sample with ``antibody == input`` that
-  also has the same biological material as the IP. Here, we know `input1` goes
-  with `ip1` because they both have the same biological material.
-
-
-ChIP-seq sample table, biological replicates
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Here is another example, this time with biological replicates::
-
-    # Example ChIP-seq sampletable with biological replicates
-    sampleid    antibody   biological_material  label          orig_filename
-    ip1         gaf        s2cell-1             s2cell-gaf-1   /data/ip1.fastq.gz
-    ip2         gaf        s2cell-2             s2cell-gaf-2   /data/run2/ip3.fastq.gz
-    input1      input      s2cell-1             s2cell-input-1 /data/input.fastq.gz
-    input2      input      s2cell-2             s2cell-input-2 /data/run2/input2.fastq.gz
-
-- As before, `ip1` and `input1` share the same biological material, indicating
-  that `input1` is the matched input for `ip1`.
-- The matched input for `ip2` is `input2` because they share the same
-  biological material (`s2cell-2`) and `input2` has ``antibody == input``.
-- Each sample has a unique label because there are no technical replicates
-  here.
-
-ChIP-seq sample table, biological and technical replicates
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Another example, this time with biological and technical replicates:
-
-::
-
-    # Example ChIP-seq sampletable with bio and tech reps
-    sampleid    antibody   biological_material  label          orig_filename
-    ip1         gaf        s2cell-1             s2cell-gaf-1   /data/ip1.fastq.gz
-    ip1a        gaf        s2cell-1             s2cell-gaf-1   /data/ip2.fastq.gz
-    ip2         gaf        s2cell-2             s2cell-gaf-2   /data/run2/ip3.fastq.gz
-    input1      input      s2cell-1             s2cell-input-1 /data/input.fastq.gz
-    input2      input      s2cell-2             s2cell-input-2 /data/run2/input2.fastq.gz
-
-
-- `ip1` and `ip1a` are technical replicates because they share the label
-  `s2cell-gaf-1`. This is often the case when we need to sequence the same
-  sample again for higher depth.
-
-- `ip1` and `ip1a` will be merged into one BAM file named after their common
-  label, `s2cell-gaf-1` (described further below). The remaining `ip2`,
-  `input1`, and `input2` do not have to be merged with anything, so they will
-  be symlinked.
-
-Merging technical replicates for ChIP-seq
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-In contrast to technical replicates in RNA-seq, where counts can be summed in
-R, ChIP-seq is a bit more complicated. The ChIP-seq workflow uses ``samtools
-merge`` to merge together the unique, duplicates-removed BAM files from
-technical replicates into a single BAM, and then removes the duplicates again
-from that merged file.
-
-There is a "merged_techreps" key in ``config/chipseq_patterns.yaml`` which
-defines the filenames to which technical replicates will be merged. By default
-this pattern is
-``data/chipseq_merged/{label}/{label}.cutadapt.unique.nodups.merged.bam``.
-After trimming, aligning, removing multimappers, and removing duplicates, tech
-reps are merged together. Specifically, these files:
-
-::
-
-    data/chipseq_samples/ip1/ip1.cutadapt.unique.nodups.bam
-    data/chipseq_samples/ip1a/ip1a.cutadapt.unique.nodups.bam
-
-get merged and then duplicates removed again from that merged file, resulting
-in this file::
-
-    data/chipseq_merged/s2cell-gaf-1/s2cell-gaf-1.cutadapt.unique.nodups.merged.bam
-
-For samples with no technical replicates, only symlinks are performed, so for
-example this file::
-
-
-    data/chipseq_samples/ip2/ip2.cutadapt.unique.nodups.bam
-
-will get symlinked to this file::
-
-    data/chipseq_merged/s2cell-gaf-2/s2cell-gaf-2.cutadapt.unique.nodups.merged.bam
-
-For peak-calling (see :ref:`cfg-chipseq`) and any other downstream analysis,
-**the files to use are these merged (or symlinked) BAM files.**
diff --git a/docs/tests.rst b/docs/tests.rst
deleted file mode 100644
index 79ae28eb1..000000000
--- a/docs/tests.rst
+++ /dev/null
@@ -1,183 +0,0 @@
-.. _running-the-tests:
-
-Testing the installation
-========================
-This section describes how to set up and run the example data.
-It is useful for verifying everything is working correctly. This
-reproduces the steps that are performed during the automated tests
-on `Circle CI <https://circleci.com>`_. You can see the latest test
-results `here <https://circleci.com/gh/lcdb/lcdb-wf/tree/master>`_.
-
-The example run takes up about 360 MB of space and runs in about 15 mins on
-2 cores.
-
-.. note::
-
-   The ``deploy.py`` script specifically **excludes** the various test files,
-   so the commands below must be run in a full clone of the repo, not in
-   a directory in which lcdb-wf has been deployed.
-
-Create conda envs
------------------
-
-This assumes you have set up the `bioconda channel
-<https://bioconda.github.io>`_ properly.
-
-.. code-block:: bash
-
-   mamba env create -p ./env --file env.yml
-
-.. code-block:: bash
-
-   mamba env create -p ./env-r --file env-r.yml
-
-We **highly recommend** using conda for isolating projects and for analysis
-reproducibility. If you are unfamiliar with conda, we provide a more detailed look
-at :ref:`conda-envs`.
-
-
-Activate the main env
----------------------
-
-Depending on how you have set up conda, either
-
-.. code-block:: bash
-
-   conda activate ./env
-
-or
-
-.. code-block:: bash
-
-   source activate ./env
-
-Download example data
----------------------
-
-This will download the example data from our `test data repository
-<https://github.com/lcdb/lcdb-test-data>`_ into the directories
-``workflows/{references,rnaseq,chipseq}/data``:
-
-.. code-block:: bash
-
-    python ci/get-data.py
-
-
-.. _test-settings:
-
-A note about test settings
---------------------------
-
-.. warning::
-
-    The default configuration assumes a machine with large amounts of RAM.
-    Running the workflows as-is on a single machine with limited RAM may cause
-    all RAM to be consumed! Use ``run_test.sh`` as described below to avoid
-    this.
-
-A major benefit of ``lcdb-wf`` is that the code undergoes automated testing on
-`CircleCI <https://circleci.com/gh/lcdb>`_. However this test environment only
-has 2 cores and 2GB RAM. To accommodate this, we developed a small
-representative `test dataset <https://github.com/lcdb/lcdb-test-data>`_ from
-real-world data. This allows the workflows to run in their entirety in a reasonable time frame.
-We also needed to adjust specific settings to the workflows, e.g.
-we set the Java VM memory to only 2GB for Java tools like Picard and FastQC.
-
-We had to make a design decision about the “default” state of the workflows:
-should the workflows reflect production-ready (high-RAM) settings, or reflect
-test-ready (low RAM) settings? We chose to have the default to be real-world,
-production-ready settings, because we want to minimize the edits required
-(and therefore possibility of introducing errors!) for running on real data.
-
-What this all means is that if we want to run tests, we need use the ``run_test.sh`` 
-script in each workflows directory to make adjustments. This script runs a
-preprocessor, ``ci/preprocessor.py``, which looks for specially-formatted 
-comments in the workflows. It swaps out production settings for test settings,
-and writes the results to a new ``Snakefile.test`` file that
-is then run. In production, especially when running on a cluster, there's no
-need to do this.
-
-See the docstring in the ``ci/preprocessor.py`` for details on how this works.
-
-The ``run_test.sh`` simply passes all arguments on to Snakemake. Take a look at
-the script to see what it's doing, and see the examples below for usage.
-
-Run the RNA-seq workflow with example data
-------------------------------------------
-
-With the `lcdb-wf` environment activated, change to the RNA-seq workflows
-directory:
-
-.. code-block:: bash
-
-    cd workflows/rnaseq
-
-First, run in dry-run mode which will print out the jobs to be run.  The
-arguments will be described later, this is just to get things running:
-
-.. code-block:: bash
-
-    ./run_test.sh -n --use-conda
-
-If all goes well, you will get lots of output ending with a summary of the
-number of jobs that will be run. Then, use the same command but remove the
-``-n``, and optionally include the ``-j`` argument to specify the number of
-cores to use, for example ``-j 8`` if you have 8 cores on your machine (this
-example just uses 2 cores):
-
-.. code-block:: bash
-
-    ./run_test.sh -j 2 --use-conda
-
-This will take ~15 minutes to run.
-
-Then activate the R environment (this assumes you're still in the
-``workflows/rnaseq`` subdirectory):
-
-.. code-block:: bash
-
-    conda activate env-r   # or source activate env-r
-
-and run:
-
-.. code-block:: bash
-
-    ./run_downstream_test.sh
-
-After the workflow runs, here are some useful points of interest in the output:
-
-    - ``data/rnaseq_samples/*``: sample-specific output. For example,
-      individual BAMs and bigWig files can be found here
-    - ``data/aggregation/multiqc.html``:  MultiQC report.
-    - ``downstream/rnaseq.html``: Differential expression results generated
-      from running the ``downstream/rnaseq.Rmd`` RMarkdown file.
-
-See :ref:`rnaseq` and :ref:`config` for more details.
-
-Run the ChIP-seq workflow with example data
--------------------------------------------
-
-To run the ChIP-Seq workflow, follow the same steps as above but
-with the workflow directory updated to ``workflows/chipseq``.
-The most notable difference here is that the downstream analysis
-in R (e.g. the ``rmarkdown::render`` step)  is not run.
-
-Points of interest after running the ChIP-seq workflow:
-
-    - ``data/chipseq_samples/*``: sample-specific output. Individual BAM files
-      for a sample can be found here.
-    - ``data/chipseq_merged/*``: technical replicates merged and re-deduped, or
-      if only one tech rep, symlinked to the BAM in the samples directory
-    - ``data/chipseq_peaks/*``: peak-caller output, including BED files of
-      called peaks and bedGraph files of signal as output by each algorithm
-    - ``data/chipseq_aggregation/multiqc.html``: MultiQC report
-
-See :ref:`chipseq` for more details.
-
-
-Exhaustive tests
-----------------
-
-The file ``.circleci/config.yml`` configures all of the tests that are run on
-CircleCI. There's a lot of configuration happening there, but look for the
-entries that have ``./run_test.sh`` in them to see the commands that are run.
diff --git a/docs/toc.rst b/docs/toc.rst
index 1180c8cd7..c07089e90 100644
--- a/docs/toc.rst
+++ b/docs/toc.rst
@@ -6,17 +6,8 @@ Table of Contents
 
    index
    getting-started
-   guide
    workflows
    config
-   references
-   rnaseq
-   downstream-rnaseq
-   chipseq
-   integrative
-   conda
-   tests
-   faqs
+   decisions
    changelog
-   developers
-
+   v2.0.rst
diff --git a/docs/v2.0.rst b/docs/v2.0.rst
new file mode 100644
index 000000000..5e9408329
--- /dev/null
+++ b/docs/v2.0.rst
@@ -0,0 +1,111 @@
+Version 2.0
+===========
+
+This is a major release, heavily focused on simplification where possible.
+
+Config has been dramatically simplified, especially reference configurations.
+Many fixes and improvements that have been requested over the years have been
+addressed here. The docs have been completely rewritten.
+
+
+Reference handling
+------------------
+
+- The separate ``workflows/references/`` workflow has been completely removed.
+  Reference generation rules are now integrated directly into the respective
+  RNA-seq and ChIP-seq workflows. See :ref:`decisions-references` for detailed
+  reasoning.
+
+- The reference configs have been **dramatically** simplified.
+
+- Using prepared reference configs is now more obvious and can use standard
+  Snakemake commandline args (``--configfile``, ``--config``) to modify at run
+  time.
+
+- The prepared reference configs have been updated to the latest assemblies.
+
+Infrastructure
+--------------
+
+- Simplified the test framework (see :ref:`decision log: test framework <decisions-testframework>`)
+
+- The custom wrappers system (``wrappers/`` directory) has been removed.
+  Peak callers and other tools are now implemented as straightforward scripts in the
+  :file:`scripts` directory.
+
+- Cleanup of :file:`lib/utils.py`, removing unused functions.
+
+- Updated all tool versions in ``env.yml`` and ``env-r.yml``
+
+- Removed unused dependencies (GAT, kallisto, etc.)
+
+- Default to conda rather than mamba as package manager frontend
+
+- Complete rewrite of documentation.
+
+- New ``decisions.rst`` documenting architectural decisions and rationale
+
+Snakefiles
+----------
+
+- **Snakemake 8+ is now required**.
+
+- Resource specifications changed, for example ``mem_mb=8192`` is now
+  ``mem="8g"``. With this string representation, we no longer need the
+  ``utils.gb()`` convenience functions, so they have been removed.
+
+- Autobump functionality was removed.
+
+- Snakefiles are formatted using ``snakefmt``.
+
+- The patterns/targets system has been removed (see :ref:`decision log for patterns/targets <decisions-patterns>`).
+
+- A principled approach to ``params:`` was used (see :ref:`decision log for params <decisions-params>`).
+
+Workflow removals
+-----------------
+
+- Removed colocalization workflow due to limited usage and maintenance burden.
+
+
+RNA-seq workflow
+----------------
+
+- Removed kallisto (but kept salmon)
+
+- Removed STAR two-pass mode (it added complexity and was rarely used in practice)
+
+- Removed preliminary strand check on subset of data; use the existing strand
+  check on full data.
+
+- Self-contained reference rules
+
+- Improved preflight checks that catch configuration errors before workflow runs:
+  required columns in sampletables, reference configuration validation, SRA Layout
+  column verification, and warnings about common misconfigurations
+
+- Removed separate libsizes table in MultiQC (redundant with General Stats table)
+
+- Use config's organism key to automatically handle rRNA FASTA, rather than manually configuring
+
+- Paired-end BAMs are temporarily name-sorted before featureCounts, dramatically improving performance
+
+- Paired-end FastQC results are run and properly reported in separate sections in MultiQC
+
+
+ChIP-seq workflow
+-----------------
+
+- Removed plotFingerprint rule (see :ref:`decision log: plotFingerprint <decisions-plotfingerprint>`).
+
+- Simpler sampletable no longer requires antibody and biological material
+  (though they're still recommended for clarity)
+
+- Sampletable ``merged_label`` now an alias for ``label`` to be more clear that
+  it reflects the name of merged tech reps
+
+- ``merged_label`` automatically filled in with samplenames if there are no techreps
+
+- macs2 --> macs3
+
+- Paired-end FastQC results are run and properly reported in separate sections in MultiQC
diff --git a/docs/workflows.rst b/docs/workflows.rst
index 3ab1ec2d3..847b5d667 100644
--- a/docs/workflows.rst
+++ b/docs/workflows.rst
@@ -1,145 +1,174 @@
 .. _workflows:
 
-Overview of workflows
-=====================
-
-.. note::
-
-   These workflows **are intended to be edited and customized by the user**.
-
-   See :ref:`getting-started` for recommendations on setting up these workflows in
-   your project directory.
-
-Each workflow lives in its own directory:
-
-::
-
-    ├── references/
-    │   ├── Snakefile
-    │   └── ...
-    ├── rnaseq/
-    │   ├── Snakefile
-    │   └── ...
-    ├── chipseq/
-    │   ├── Snakefile
-    │   └── ...
-    ├── colocalization/
-    │   ├── Snakefile
-    │   └── ...
-    ├── external/
-    │   ├── Snakefile
-    │   └── ...
-    └── figures/
-        ├── Snakefile
-        └── ...
-
-
-There are two general classes of workflows, **primary analysis** and the
-**integrative analysis**. 
-
-Each workflow is driven by a ``Snakefile`` and is configured by plain text
-`YAML <https://en.wikipedia.org/wiki/YAML>`_ and `TSV
-<https://en.wikipedia.org/wiki/Tab-separated_values>`_ format files (see
-:ref:`config` for much more on this).
-
-Features common to workflows
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-In this section, we will take a higher-level look at the features common to
-the primary analysis workflows.
-
-- The ``lib`` module is imported in each Snakefile, allowing various helper
-  functions to be used.
-
-- The config file is hard-coded to be ``config/config.yaml`` by default, but
-  a custom config can be specified at the command-line, using  ``snakemake
-  --configfile <path to other config file>``.
-
-- The config file is loaded using ``lib.common.load_config``. This function
-  resolves various paths (especially the references config section) and checks
-  to see if the config is well-formatted.
-
-- The ``c`` object: To make it easier to work with the config, a `SeqConfig`
-  object is created. It needs that parsed config file as well as the patterns
-  file (see :ref:`patterns-and-targets` for more on this). The act of creating
-  this object reads the sample table, fills in the patterns with sample names,
-  creates a reference dictionary (see ``common.references_dict``) for easy
-  access to reference files, and for ChIP-seq, also fills in the filenames for
-  the configured peak-calling runs. This object, called ``c`` for convenience,
-  can be accessed to get all sort of information -- ``c.sampletable``,
-  ``c.config``, ``c.patterns``, ``c.targets``, and ``c.refdict`` are frequently
-  used in rules throughout the Snakefiles.
-
-
-Primary analysis workflows
-~~~~~~~~~~~~~~~~~~~~~~~~~~
-The primary analysis workflows are generally used for transforming raw data
-(fastq files) into usable results. For RNA-seq, that's differentially-expressed
-genes (along with comprehensive QC and analysis). For ChIP-seq, that's called
-peaks or differentially bound chromatin regions.
-
-The primary analysis workflows are:
-
-   - References
-   - RNA-seq
-   - ChIP-seq
-
-These are each described further in their respective sections.
-
-While the references workflow can be stand-alone, usually it is run as
-a by-product of running the RNA-seq or ChIP-seq workflows. Here we will
-focus on RNA-seq and ChIP-seq which share some common properties.
-
-Where possible, we prefer to have rules use the normal command-line syntax for
-tools (examples include rules calling samtools, deepTools bamCoverage, picard,
-salmon).  However in some cases we use wrapper scripts. 
-
-Situtations where we use wrappers:
-
-- Ensuring various aligners (HISAT2, Bowtie2, STAR, bwa) behave uniformly.
-  These wrappers call the aligner, followed by samtools sort and view. The end
-  result is that FASTQs go in, and a sorted BAM comes out.
-- Tools with legacy dependencies like Python 2.7 that must be run in an
-  independent environment (macs2, sicer, rseqc)
-- R analyses (particularly spp and dupradar, which build up an R script
-  incrementally before calling it).
-- Tools that need complicated setup, or handling output files hard-coded by the
-  tool (fastqc, fastq_screen).
-
-In all cases, search for the string **NOTE:** in the Snakefile to read notes on
-how to configure each rule, and make adjustments as necessary. You may see some
-comments that say `# [TEST SETTINGS]`; you can ignore these, and see
-:ref:`test-settings` for more info.
-
-.. note:: 
-
-    If you have two different RNA-seq experiments, from different species, they
-    have to be run separately. However, if downstream analyses will use them both
-    then you would like to keep them in the same project. In this case, you can copy
-    the ``workflows/rnaseq`` directory to two other directories:
-
-    .. code-block:: bash
-
-        cp -r workflows/rnaseq workflows/genome1-rnaseq
-        cp -r workflows/rnaseq workflows/genome2-rnaseq
-
-    This way, downstream analyses can link to and utilize results from these
-    individual folders, while the whole project remains self-contained.
-
-Integrative analysis workflows
+Workflows
+=========
+
+The workflows are RNA-seq-like, ChIP-seq-like, and variant calling.
+
+They are currently labeled ``rnaseq``, ``chipseq``, and ``variant-calling``,
+but you can rename the workflow directories to whatever you need. And you can
+make copies of a workflow directory to support multiple experiments.
+
+If multiple experiments can all use the same parameters in the Snakefile, all
+the samples can be combined into the same sampletable. But if they differ --
+for example, they had different library prep such that the cutadapt parameters
+need to be changed -- then they need to split up into multiple workflow
+directories, each with their own sample and with respective edits to the
+Snakefile. See :ref:`decisions-sample-specific-params` for rationale.
+
+RNA-seq
+-------
+
+**Sampletable:** :ref:`rnaseq-sampletable`
+
+**Config:** :ref:`rnaseq-config`
+
+**Downstream:** :ref:`rnaseq-downstream`
+
+This workflow can be used for any bulk Illumina-based RNA-seq-like assay that
+quantifies transcripts of some sort and where a gene-by-sample matrix of counts
+is useful.
+
+This of course includes standard bulk RNA-seq, but also things like
+RIP-seq, small RNA-seq, or even differential ChIP-seq within gene
+bodies.
+
+This workflow trims raw reads with cutadapt, aligns with STAR and quantifies
+reads in genes with featureCounts. It also quantifies reads with Salmon.
+Extensive QC is performed at each stage and is aggregated with MultiQC.
+
+The biggest advantage of using this workflow is the extensive downstream
+analysis (see :ref:`rnaseq-downstream`), which is run after the Snakefile
+completes due to the frequent need for project-specific customization.
+
+The primary output of the Snakefile consists of the following:
+
+- Salmon quantification files for each sample:
+
+.. code-block:: text
+
+  data/rnaseq_samples/{sample_id}/{sample_id}.salmon/quant.sf
+
+- Aligned BAM files for each sample (duplicates marked but not removed):
+
+.. code-block:: text
+
+   data/rnaseq_samples/{sample_id}/{sample_id}.cutadapt.markdups.bam
+
+- Strand-specific bigWig files for each sample:
+
+.. code-block:: text
+
+  data/rnaseq_samples/{sample_id}/{sample_id}.cutadapt.bam.neg.bigwig
+  data/rnaseq_samples/{sample_id}/{sample_id}.cutadapt.bam.pos.bigwig
+
+- Single featureCounts file with all samples:
+
+.. code-block:: text
+
+  data/rnaseq_aggregation/featurecounts.txt
+
+- MultiQC output:
+
+.. code-block:: text
+
+  data/rnaseq_aggregation/multiqc.html
+
+The primary output of the downstream analysis (:ref:`rnaseq-downstream`) is the
+final HTML report and the RDS files ready for exploration with `Carnation
+<https://github.com/NICHD-BSPC/carnation>`__:
+
+.. code-block:: text
+
+   downstream/rnaseq.html
+   downstream/combined.Rds
+
+ChIP-seq (and other chromatin-associated assays)
+------------------------------------------------
+
+**Sampletable:** :ref:`chipseq-sampletable`
+
+**Config** :ref:`chipseq-config`
+
+This workflow can be used for various bulk Illumina-based sequencing assays related
+to chromatin binding, including ChIP-seq, CUT&RUN, Cut&Tag, and ATAC-seq. You may need
+to make modifications to specific tool parameters, but the framework is useful for all of them.
+
+This workflow trims raw reads with cutadapt, aligns with bowtie2, and runs peak
+calling on all samples. Extensive QC is performed at each stage which is
+aggregated with MultiQC.
+
+The biggest advantage of using this workflow is the flexibility of
+peak-calling. Since peak-calling tends to need extensive tweaking depending on
+the antibody or assay, it is straightforward to configure multiple peak-calling
+runs (different algorithms, each with possibly different parameters) on the
+same sample, and view them all together in a genome browser to decide on
+a final strategy.
+
+The primary output of the Snakefile consists of the following:
+
+- Aligned BAM files (multimappers removed, duplicates removed):
+
+.. code-block:: text
+
+  data/chipseq_samples/{sample_id}/{sample_id}.cutadapt.unique.nodups.bam
+
+- Peak calls:
+
+.. code-block:: text
+
+  data/chipseq_peaks/{algorithm}/{peak_run}/peaks.bed
+  data/chipseq_peaks/{algorithm}/{peak_run}/peaks.bigbed
+
+- BigWig files for merged technical replicates:
+
+.. code-block:: text
+
+  data/chipseq_merged/{merged_label}/{merged_label}.cutadapt.unique.nodups.bam.bigwig
+
+- MultiQC output:
+
+.. code-block:: text
+
+  data/chipseq_aggregation/multiqc.html
+
+
+Multiple workflows
+------------------
+
+When to use multiple workflows
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The integrative analysis workflows take input from the primary workflows and
-tie them together.
 
-The integrative analysis workflows are described in :ref:`integrative`:
+**Same sampletable:** If you have multiple experiments of the same type and the same parameters can
+be used for all of them, then they can go in the same sampletable.
+
+**Separate workflows:** If samples need different parameters in any way, then make a copy of the
+respective workflow directory and treat them as separate workflows.
+
+See :ref:`decisions-sample-specific-params` for the rationale behind this design choice.
+
+Examples of multiple workflows
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**Different library preparations** require different parameters:
+
+.. code-block:: text
+
+  workflows/
+    mirnaseq/
+    smartseq/
+
+Each of these would be a copy of the ``rnaseq`` workflow, but with appropriate
+changes in the respective Snakefiles to handle different cutadapt parameters
+and alignment settings.
 
-- Colocalization
-- "External"
-- Figures
+**Different organisms** require different reference genomes:
 
-These are each described in more detail in their respective sections.
+.. code-block:: text
 
-Next Steps
-~~~~~~~~~~
+  workflows/
+    chipseq-human/
+    chipseq-mouse/
 
-Next we look at :ref:`config` for details on how to configure specific
-workflows.
+Each workflow directory contains its own config file pointing to the appropriate
+reference genome and annotations.
diff --git a/env.yml b/env.yml
index 5b6567204..41739e838 100644
--- a/env.yml
+++ b/env.yml
@@ -4,228 +4,264 @@ channels:
 dependencies:
   - _libgcc_mutex=0.1
   - _openmp_mutex=4.5
+  - _python_abi3_support=1.0
   - _r-mutex=1.0.1
-  - alsa-lib=1.2.3.2
-  - amply=0.1.5
+  - alabaster=1.0.0
+  - alsa-lib=1.2.14
+  - amply=0.1.6
+  - anndata=0.12.2
+  - annotated-types=0.7.0
+  - anyio=4.11.0
   - appdirs=1.4.4
-  - argcomplete=3.0.8
-  - argh=0.27.2
-  - asttokens=2.2.1
-  - attr=2.5.1
-  - attrs=23.1.0
-  - backcall=0.2.0
+  - argcomplete=3.6.2
+  - argh=0.31.3
+  - argparse-dataclass=2.0.0
+  - array-api-compat=1.12.0
+  - asttokens=3.0.0
+  - attrs=25.3.0
+  - babel=2.17.0
   - backports=1.0
-  - backports.functools_lru_cache=1.6.4
-  - bedtools=2.31.0
-  - binutils_impl_linux-64=2.39
-  - binutils_linux-64=2.39
-  - biopython=1.81
-  - boost-cpp=1.74.0
+  - backports.tarfile=1.2.0
+  - beautifulsoup4=4.14.2
+  - bedtools=2.31.1
+  - binutils_impl_linux-64=2.44
+  - biopython=1.85
+  - boost-cpp=1.85.0
   - bowtie=1.3.1
-  - bowtie2=2.5.1
-  - brotli=1.0.9
-  - brotli-bin=1.0.9
-  - brotlipy=0.7.0
-  - bwidget=1.9.14
-  - bx-python=0.9.0
+  - bowtie2=2.5.4
+  - brotli=1.1.0
+  - brotli-bin=1.1.0
+  - brotli-python=1.1.0
+  - bwidget=1.10.1
+  - bx-python=0.14.0
   - bzip2=1.0.8
-  - c-ares=1.18.1
-  - ca-certificates=2023.5.7
-  - cairo=1.16.0
-  - certifi=2023.5.7
-  - cffi=1.15.1
-  - charset-normalizer=3.1.0
-  - click=8.1.3
-  - coin-or-cbc=2.10.10
-  - coin-or-cgl=0.60.7
-  - coin-or-clp=1.17.8
-  - coin-or-osi=0.108.8
-  - coin-or-utils=2.11.9
-  - coincbc=2.10.10
+  - c-ares=1.34.5
+  - ca-certificates=2025.10.5
+  - cached-property=1.5.2
+  - cached_property=1.5.2
+  - cairo=1.18.4
+  - certifi=2025.10.5
+  - cffi=2.0.0
+  - charset-normalizer=3.4.3
+  - click=8.3.0
+  - coin-or-cbc=2.10.12
+  - coin-or-cgl=0.60.9
+  - coin-or-clp=1.17.10
+  - coin-or-osi=0.108.11
+  - coin-or-utils=2.11.12
   - colorama=0.4.6
   - coloredlogs=15.0.1
   - colormath=3.0.0
-  - configargparse=1.5.3
+  - conda-inject=1.3.2
+  - configargparse=1.7.1
   - connection_pool=0.0.3
-  - contourpy=1.0.7
-  - cryptography=39.0.0
-  - curl=7.86.0
-  - cutadapt=4.4
-  - cycler=0.11.0
-  - datrie=0.8.2
-  - dbus=1.13.6
-  - decorator=5.1.1
-  - deeptools=3.5.2
+  - contourpy=1.3.3
+  - cpython=3.11.13
+  - crc32c=2.7.1
+  - cryptography=46.0.2
+  - curl=8.14.1
+  - cutadapt=5.1
+  - cycler=0.12.1
+  - cykhash=2.0.1
+  - dbus=1.16.2
+  - decorator=5.2.1
+  - deeptools=3.5.6
   - deeptoolsintervals=0.1.9
-  - dnaio=0.10.0
-  - docutils=0.20.1
-  - dpath=2.1.5
+  - deprecated=1.2.18
+  - distlib=0.4.0
+  - dnaio=1.2.2
+  - docutils=0.21.2
+  - donfig=0.8.1.post1
+  - dpath=2.2.0
+  - editables=0.5
+  - eido=0.2.4
   - epic2=0.0.52
-  - exceptiongroup=1.1.1
-  - execnet=1.9.0
-  - executing=1.2.0
-  - expat=2.5.0
-  - fastq-screen=0.15.3
+  - et_xmlfile=2.0.0
+  - exceptiongroup=1.3.0
+  - execnet=2.1.1
+  - executing=2.2.1
+  - expat=2.7.1
+  - fastq-screen=0.16.0
   - fastqc=0.12.1
-  - fftw=3.3.10
-  - filelock=3.12.0
+  - filelock=3.19.1
   - font-ttf-dejavu-sans-mono=2.37
   - font-ttf-inconsolata=3.000
   - font-ttf-source-code-pro=2.038
   - font-ttf-ubuntu=0.83
-  - fontconfig=2.14.2
+  - fontconfig=2.15.0
   - fonts-conda-ecosystem=1
   - fonts-conda-forge=1
-  - fonttools=4.39.4
-  - freetype=2.12.1
-  - fribidi=1.0.10
-  - future=0.18.3
-  - gat=1.3.6
-  - gcc_impl_linux-64=10.4.0
-  - gcc_linux-64=10.4.0
-  - gettext=0.21.1
+  - fonttools=4.60.1
+  - freetype=2.14.1
+  - fribidi=1.0.16
+  - gcc_impl_linux-64=15.2.0
   - gffread=0.12.7
-  - gffutils=0.11.1
-  - gfortran_impl_linux-64=10.4.0
-  - gfortran_linux-64=10.4.0
-  - giflib=5.2.1
-  - gitdb=4.0.10
-  - gitpython=3.1.31
-  - glib=2.74.1
-  - glib-tools=2.74.1
-  - gmp=6.2.1
-  - graphite2=1.3.13
-  - gsl=2.7
-  - gst-plugins-base=1.18.5
-  - gstreamer=1.20.3
-  - gxx_impl_linux-64=10.4.0
-  - gxx_linux-64=10.4.0
-  - harfbuzz=4.2.0
-  - hdf5=1.12.1
+  - gffutils=0.13
+  - gfortran_impl_linux-64=15.2.0
+  - giflib=5.2.2
+  - gitdb=4.0.12
+  - gitpython=3.1.45
+  - graphite2=1.3.14
+  - gsl=1.16
+  - gxx_impl_linux-64=15.2.0
+  - h11=0.16.0
+  - h2=4.3.0
+  - h5py=3.13.0
+  - harfbuzz=11.4.5
+  - hatch=1.14.1
+  - hatchling=1.27.0
+  - hdf5=1.14.3
   - hisat2=2.2.1
-  - htslib=1.16
+  - hmmlearn=0.3.3
+  - hpack=4.1.0
+  - html5lib=1.1
+  - htslib=1.22.1
+  - httpcore=1.0.9
+  - httpx=0.28.1
   - humanfriendly=10.0
-  - icu=69.1
-  - idna=3.4
-  - importlib-metadata=6.6.0
-  - importlib_resources=5.12.0
+  - humanize=4.13.0
+  - hyperframe=6.1.0
+  - hyperlink=21.0.0
+  - icu=75.1
+  - idna=3.10
+  - imagesize=1.4.1
+  - immutables=0.21
+  - importlib-metadata=8.7.0
+  - importlib_resources=6.5.2
   - iniconfig=2.0.0
   - intervalstats=1.01
-  - ipython=8.13.2
-  - isa-l=2.30.0
-  - jack=1.9.18
-  - jedi=0.18.2
-  - jinja2=3.1.2
-  - jpeg=9e
-  - jsonschema=4.17.3
-  - jupyter_core=5.3.0
-  - kallisto=0.48.0
-  - kernel-headers_linux-64=2.6.32
-  - keyutils=1.6.1
-  - kiwisolver=1.4.4
-  - krb5=1.19.3
-  - lcms2=2.14
-  - ld_impl_linux-64=2.39
+  - ipython=9.6.0
+  - ipython_pygments_lexers=1.1.1
+  - isa-l=2.31.1
+  - jaraco.classes=3.4.0
+  - jaraco.context=6.0.1
+  - jaraco.functools=4.3.0
+  - jedi=0.19.2
+  - jeepney=0.9.0
+  - jinja2=3.1.6
+  - joblib=1.5.2
+  - jsonschema=4.25.1
+  - jsonschema-specifications=2025.9.1
+  - jupyter_core=5.8.1
+  - kaleido-core=0.2.1
+  - kernel-headers_linux-64=5.14.0
+  - keyring=25.6.0
+  - keyutils=1.6.3
+  - kiwisolver=1.4.9
+  - krb5=1.21.3
+  - lcms2=2.17
+  - ld_impl_linux-64=2.44
+  - legacy-api-wrap=1.4.1
   - lerc=4.0.0
+  - libaec=1.1.4
   - libblas=3.9.0
-  - libbrotlicommon=1.0.9
-  - libbrotlidec=1.0.9
-  - libbrotlienc=1.0.9
-  - libcap=2.64
+  - libboost=1.85.0
+  - libboost-devel=1.85.0
+  - libboost-headers=1.85.0
+  - libbrotlicommon=1.1.0
+  - libbrotlidec=1.1.0
+  - libbrotlienc=1.1.0
   - libcblas=3.9.0
-  - libclang=13.0.1
   - libcups=2.3.3
-  - libcurl=7.86.0
-  - libdb=6.2.32
-  - libdeflate=1.13
-  - libedit=3.1.20191231
+  - libcurl=8.14.1
+  - libdeflate=1.22
+  - libedit=3.1.20250104
   - libev=4.33
-  - libevent=2.1.10
-  - libexpat=2.5.0
-  - libffi=3.4.2
-  - libflac=1.3.4
-  - libgcc-devel_linux-64=10.4.0
-  - libgcc-ng=12.2.0
+  - libexpat=2.7.1
+  - libffi=3.4.6
+  - libfreetype=2.14.1
+  - libfreetype6=2.14.1
+  - libgcc=15.2.0
+  - libgcc-devel_linux-64=15.2.0
+  - libgcc-ng=15.2.0
   - libgd=2.3.3
-  - libgfortran-ng=12.2.0
-  - libgfortran5=12.2.0
-  - libglib=2.74.1
-  - libgomp=12.2.0
-  - libhwloc=2.8.0
-  - libiconv=1.17
+  - libgff=2.0.0
+  - libgfortran=15.2.0
+  - libgfortran5=15.2.0
+  - libglib=2.84.3
+  - libgomp=15.2.0
+  - libhwloc=2.12.1
+  - libiconv=1.18
   - libjemalloc=5.3.0
+  - libjpeg-turbo=3.1.0
   - liblapack=3.9.0
   - liblapacke=3.9.0
-  - libllvm13=13.0.1
-  - libnghttp2=1.51.0
-  - libnsl=2.0.0
-  - libogg=1.3.4
-  - libopenblas=0.3.21
-  - libopus=1.3.1
-  - libpng=1.6.39
-  - libpq=14.5
-  - libsanitizer=10.4.0
-  - libsndfile=1.0.31
-  - libsqlite=3.41.2
-  - libssh2=1.10.0
-  - libstdcxx-devel_linux-64=10.4.0
-  - libstdcxx-ng=12.2.0
-  - libtiff=4.4.0
-  - libtool=2.4.7
-  - libudev1=253
-  - libuuid=2.38.1
-  - libvorbis=1.3.7
-  - libwebp=1.2.4
-  - libwebp-base=1.2.4
-  - libxcb=1.13
-  - libxkbcommon=1.0.3
-  - libxml2=2.9.14
-  - libzlib=1.2.13
-  - lzo=2.10
-  - lzstring=1.0.4
-  - make=4.3
-  - markdown=3.4.3
-  - markdown-it-py=2.2.0
-  - markupsafe=2.1.2
-  - matplotlib=3.7.1
-  - matplotlib-base=3.7.1
-  - matplotlib-inline=0.1.6
-  - mdurl=0.1.0
-  - multiqc=1.14
+  - liblzma=5.8.1
+  - liblzma-devel=5.8.1
+  - libnghttp2=1.67.0
+  - libnsl=2.0.1
+  - libopenblas=0.3.30
+  - libopenssl-static=3.5.4
+  - libpng=1.6.50
+  - libsanitizer=15.2.0
+  - libsqlite=3.50.4
+  - libssh2=1.11.1
+  - libstdcxx=15.2.0
+  - libstdcxx-devel_linux-64=15.2.0
+  - libstdcxx-ng=15.2.0
+  - libtiff=4.7.0
+  - libuuid=2.41.2
+  - libwebp-base=1.6.0
+  - libxcb=1.17.0
+  - libxcrypt=4.4.36
+  - libxml2=2.14.6
+  - libxml2-16=2.14.6
+  - libzlib=1.3.1
+  - llvmlite=0.45.1
+  - logmuse=0.2.8
+  - logomaker=0.8.6
+  - macs3=3.0.3
+  - make=4.4.1
+  - mariadb-connector-c=3.4.7
+  - markdown=3.9
+  - markdown-it-py=4.0.0
+  - markupsafe=3.0.3
+  - mathjax=2.7.7
+  - matplotlib-base=3.10.6
+  - matplotlib-inline=0.1.7
+  - mdurl=0.1.2
+  - more-itertools=10.8.0
+  - msgpack-python=1.1.1
+  - multiqc=1.31
   - munkres=1.1.4
-  - mysql-common=8.0.32
   - mysql-connector-c=6.1.11
-  - mysql-libs=8.0.32
+  - narwhals=2.6.0
   - natsort=8.4.0
-  - nbformat=5.8.0
-  - ncbi-vdb=3.0.2
-  - ncurses=6.3
-  - networkx=3.1
-  - nspr=4.35
-  - nss=3.89
-  - numpy=1.23.5
-  - openjdk=11.0.1
-  - openjpeg=2.5.0
-  - openssl=1.1.1t
+  - nbformat=5.10.4
+  - ncbi-vdb=3.2.1
+  - ncurses=6.5
+  - networkx=3.5
+  - nspr=4.37
+  - nss=3.117
+  - numba=0.62.1
+  - numcodecs=0.16.1
+  - numpy=2.3.3
+  - numpydoc=1.9.0
+  - openjdk=24.0.2
+  - openjpeg=2.5.3
+  - openpyxl=3.1.5
+  - openssl=3.5.4
   - ossuuid=1.6.2
-  - packaging=23.1
-  - pandas=2.0.1
-  - pandoc=3.1.2
-  - pango=1.50.7
-  - parso=0.8.3
-  - patsy=0.5.3
+  - packaging=25.0
+  - pandas=2.3.3
+  - pandoc=3.8.1
+  - pango=1.56.4
+  - parso=0.8.5
+  - pathspec=0.12.1
+  - patsy=1.0.1
   - pbzip2=1.1.13
-  - pcre2=10.37
+  - pcre2=10.45
+  - pephubclient=0.4.4
+  - peppy=0.40.7
   - perl=5.32.1
-  - perl-alien-build=2.48
-  - perl-alien-libxml2=0.17
+  - perl-alien-build=2.84
+  - perl-alien-build-plugin-download-gitlab=0.01
+  - perl-alien-libxml2=0.20
   - perl-business-isbn=3.007
   - perl-business-isbn-data=20210112.006
   - perl-capture-tiny=0.48
   - perl-carp=1.50
   - perl-constant=1.33
-  - perl-data-dumper=2.183
-  - perl-encode=3.19
   - perl-exporter=5.74
   - perl-extutils-makemaker=7.70
   - perl-ffi-checklib=0.28
@@ -233,152 +269,196 @@ dependencies:
   - perl-file-path=2.18
   - perl-file-temp=0.2304
   - perl-file-which=1.24
-  - perl-gd=2.76
-  - perl-gdgraph=1.54
+  - perl-gd=2.83
+  - perl-gdgraph=1.56
   - perl-gdtextutil=0.86
   - perl-importer=0.026
-  - perl-mime-base64=3.16
-  - perl-parent=0.241
+  - perl-parent=0.243
   - perl-path-tiny=0.124
   - perl-pathtools=3.75
   - perl-scope-guard=0.21
-  - perl-storable=3.15
   - perl-sub-info=0.002
-  - perl-term-table=0.016
+  - perl-term-table=0.025
   - perl-test-fatal=0.016
+  - perl-test-nowarnings=1.06
   - perl-test-warnings=0.031
-  - perl-test2-suite=0.000145
+  - perl-test2-suite=0.000163
   - perl-try-tiny=0.31
-  - perl-uri=5.17
-  - perl-xml-libxml=2.0207
+  - perl-uri=5.34
+  - perl-xml-libxml=2.0210
   - perl-xml-namespacesupport=1.12
   - perl-xml-sax=1.02
   - perl-xml-sax-base=1.09
-  - pexpect=4.8.0
+  - pexpect=4.9.0
   - picard=2.27.5
   - pickleshare=0.7.5
-  - pigz=2.6
-  - pillow=9.2.0
-  - pip=23.1.2
-  - pixman=0.40.0
-  - pkgutil-resolve-name=1.3.10
-  - plac=1.3.5
-  - platformdirs=3.5.1
-  - plotly=5.14.1
-  - pluggy=1.0.0
-  - pooch=1.7.0
-  - preseq=3.2.0
-  - prompt-toolkit=3.0.38
-  - prompt_toolkit=3.0.38
-  - psutil=5.9.5
+  - pigz=2.8
+  - pillow=11.3.0
+  - pip=25.2
+  - pixman=0.46.4
+  - plac=1.4.5
+  - platformdirs=4.4.0
+  - plotly=6.3.1
+  - pluggy=1.6.0
+  - polars-lts-cpu=1.33.1
+  - preseq=2.0.2
+  - prompt-toolkit=3.0.52
+  - psutil=7.1.0
   - pthread-stubs=0.4
   - ptyprocess=0.7.0
-  - pulp=2.7.0
-  - pulseaudio=14.0
-  - pure_eval=0.2.2
-  - py2bit=0.3.0
-  - pybedtools=0.9.0
-  - pybigwig=0.3.18
-  - pycparser=2.21
-  - pyfaidx=0.7.2.1
-  - pygments=2.15.1
-  - pyopenssl=23.1.1
-  - pyparsing=3.0.9
-  - pyqt=5.15.4
-  - pyqt5-sip=12.9.0
-  - pyrsistent=0.19.3
-  - pysam=0.20.0
+  - pulp=2.8.0
+  - pure_eval=0.2.3
+  - py2bit=0.3.3
+  - pyaml-env=1.2.2
+  - pybedtools=0.12.0
+  - pybigwig=0.3.24
+  - pycparser=2.22
+  - pydantic=2.11.10
+  - pydantic-core=2.33.2
+  - pyfaidx=0.9.0.3
+  - pygments=2.19.2
+  - pynndescent=0.5.13
+  - pyparsing=3.2.5
+  - pysam=0.22.1
   - pysocks=1.7.1
-  - pytest=7.3.1
-  - pytest-xdist=3.2.1
-  - python=3.10.8
-  - python-dateutil=2.8.2
-  - python-fastjsonschema=2.16.3
-  - python-isal=1.1.0
-  - python-lzo=1.14
-  - python-tzdata=2023.3
-  - python_abi=3.10
-  - pytz=2023.3
-  - pyvcf3=1.0.3
-  - pyyaml=6.0
-  - qt-main=5.15.2
-  - r-base=4.1.3
+  - pytest=8.4.2
+  - pytest-xdist=3.8.0
+  - python=3.11.13
+  - python-dateutil=2.9.0.post0
+  - python-dotenv=1.1.1
+  - python-fastjsonschema=2.21.2
+  - python-gil=3.11.13
+  - python-isal=1.8.0
+  - python-kaleido=0.2.1
+  - python-tzdata=2025.2
+  - python-zlib-ng=1.0.0
+  - python_abi=3.11
+  - pytz=2025.2
+  - pyvcf3=1.0.4
+  - pyyaml=6.0.3
+  - qhull=2020.2
+  - r-base=4.2.3
   - readline=8.2
-  - requests=2.29.0
+  - referencing=0.36.2
+  - regex=2025.9.18
+  - requests=2.32.5
   - reretry=0.11.8
-  - rich=13.3.5
-  - rich-click=1.6.1
-  - rseqc=5.0.1
-  - salmon=1.10.1
-  - samtools=1.16.1
-  - scipy=1.10.1
-  - seaborn=0.12.2
-  - seaborn-base=0.12.2
-  - sed=4.8
-  - setuptools=67.7.2
-  - simplejson=3.19.1
-  - sip=6.5.1
-  - six=1.16.0
-  - smart_open=6.3.0
-  - smmap=3.0.5
-  - snakemake-minimal=7.25.3
+  - rich=14.1.0
+  - rich-click=1.9.2
+  - roman-numerals-py=3.1.0
+  - rpds-py=0.27.1
+  - rseqc=5.0.4
+  - salmon=1.10.3
+  - samtools=1.22.1
+  - scanpy=1.11.4
+  - scikit-learn=1.7.2
+  - scipy=1.16.2
+  - seaborn=0.13.2
+  - seaborn-base=0.13.2
+  - secretstorage=3.4.0
+  - sed=4.9
+  - session-info2=0.2.2
+  - setuptools=80.9.0
+  - shellingham=1.5.4
+  - simplejson=3.20.2
+  - six=1.17.0
+  - slack-sdk=3.36.0
+  - slack_sdk=3.36.0
+  - smart_open=7.3.1
+  - smmap=5.0.2
+  - snakemake=9.12.0
+  - snakemake-interface-common=1.22.0
+  - snakemake-interface-executor-plugins=9.3.9
+  - snakemake-interface-logger-plugins=1.2.4
+  - snakemake-interface-report-plugins=1.2.0
+  - snakemake-interface-scheduler-plugins=2.0.1
+  - snakemake-interface-storage-plugins=4.2.3
+  - snakemake-minimal=9.12.0
+  - sniffio=1.3.1
+  - snowballstemmer=3.0.1
+  - soupsieve=2.8
   - spectra=0.0.11
-  - sqlite=3.41.2
-  - sra-tools=3.0.3
-  - stack_data=0.6.2
-  - star=2.7.10b
-  - statsmodels=0.14.0
-  - stopit=1.1.2
-  - subread=2.0.3
-  - sysroot_linux-64=2.12
+  - sphinx=8.2.3
+  - sphinxcontrib-applehelp=2.0.0
+  - sphinxcontrib-devhelp=2.0.0
+  - sphinxcontrib-htmlhelp=2.1.0
+  - sphinxcontrib-jsmath=1.0.1
+  - sphinxcontrib-qthelp=2.0.0
+  - sphinxcontrib-serializinghtml=1.1.10
+  - sqlite=3.50.4
+  - sra-tools=3.2.1
+  - stack_data=0.6.3
+  - staden_io_lib=1.15.1
+  - star=2.7.11b
+  - statsmodels=0.14.5
+  - subread=2.1.1
+  - sysroot_linux-64=2.34
   - tabulate=0.9.0
-  - tbb=2021.7.0
-  - tenacity=8.2.2
-  - throttler=1.2.1
-  - tk=8.6.12
+  - tbb=2022.2.0
+  - threadpoolctl=3.6.0
+  - throttler=1.2.2
+  - tiktoken=0.11.0
+  - tk=8.6.13
   - tktable=2.10
-  - toml=0.10.2
-  - tomli=2.0.1
-  - toposort=1.10
-  - tornado=6.3.2
-  - trackhub=0.2.4
-  - traitlets=5.9.0
-  - typing-extensions=4.5.0
-  - typing_extensions=4.5.0
-  - tzdata=2023c
-  - ucsc-bedgraphtobigwig=377
-  - ucsc-bedsort=377
-  - ucsc-bedtobigbed=377
-  - ucsc-bigwigmerge=377
-  - ucsc-fetchchromsizes=377
-  - ucsc-genepredtobed=377
-  - ucsc-gtftogenepred=377
-  - ucsc-liftover=377
-  - ucsc-oligomatch=377
-  - ucsc-twobittofa=377
-  - ucsc-wigtobigwig=377
-  - unicodedata2=15.0.0
-  - urllib3=1.26.15
-  - wcwidth=0.2.6
-  - wheel=0.40.0
-  - wrapt=1.15.0
-  - xopen=1.7.0
-  - xorg-kbproto=1.0.7
-  - xorg-libice=1.0.10
-  - xorg-libsm=1.2.3
-  - xorg-libx11=1.8.4
-  - xorg-libxau=1.0.9
-  - xorg-libxdmcp=1.1.3
-  - xorg-libxext=1.3.4
-  - xorg-libxrender=0.9.10
-  - xorg-libxt=1.2.1
-  - xorg-renderproto=0.11.1
-  - xorg-xextproto=7.3.0
-  - xorg-xproto=7.0.31
-  - xz=5.2.6
+  - tomli=2.2.1
+  - tomli-w=1.2.0
+  - tomlkit=0.13.3
+  - tqdm=4.67.1
+  - trackhub=1.0
+  - traitlets=5.14.3
+  - trove-classifiers=2025.9.11.17
+  - typeguard=4.4.4
+  - typer=0.19.2
+  - typer-slim=0.19.2
+  - typer-slim-standard=0.19.2
+  - typing-extensions=4.15.0
+  - typing-inspection=0.4.2
+  - typing_extensions=4.15.0
+  - tzdata=2025b
+  - ubiquerg=0.8.0
+  - ucsc-bedgraphtobigwig=482
+  - ucsc-bedsort=482
+  - ucsc-bedtobigbed=482
+  - ucsc-bigwigmerge=482
+  - ucsc-fetchchromsizes=482
+  - ucsc-genepredtobed=482
+  - ucsc-gtftogenepred=482
+  - ucsc-liftover=482
+  - ucsc-oligomatch=482
+  - ucsc-twobittofa=482
+  - ucsc-wigtobigwig=482
+  - umap-learn=0.5.9.post2
+  - unicodedata2=16.0.0
+  - urllib3=2.5.0
+  - userpath=1.9.2
+  - uv=0.8.22
+  - veracitools=0.1.3
+  - virtualenv=20.34.0
+  - wcwidth=0.2.14
+  - webencodings=0.5.1
+  - wheel=0.45.1
+  - wrapt=1.17.3
+  - xopen=2.0.2
+  - xorg-libice=1.1.2
+  - xorg-libsm=1.2.6
+  - xorg-libx11=1.8.12
+  - xorg-libxau=1.0.12
+  - xorg-libxdmcp=1.1.5
+  - xorg-libxext=1.3.6
+  - xorg-libxfixes=6.0.2
+  - xorg-libxi=1.8.2
+  - xorg-libxrandr=1.5.4
+  - xorg-libxrender=0.9.12
+  - xorg-libxt=1.3.1
+  - xorg-libxtst=1.2.5
+  - xz=5.8.1
+  - xz-gpl-tools=5.8.1
+  - xz-tools=5.8.1
   - yaml=0.2.5
-  - yte=1.5.1
-  - zipp=3.15.0
-  - zlib=1.2.13
-  - zstandard=0.19.0
-  - zstd=1.5.2
+  - yte=1.8.1
+  - zarr=3.1.3
+  - zipp=3.23.0
+  - zlib=1.3.1
+  - zlib-ng=2.2.5
+  - zstandard=0.25.0
+  - zstd=1.5.7
diff --git a/include/WRAPPER_SLURM b/include/WRAPPER_SLURM
index b2a2ffd4f..9f7f1344e 100755
--- a/include/WRAPPER_SLURM
+++ b/include/WRAPPER_SLURM
@@ -19,25 +19,25 @@ if [ -z "$LCDBWF_SNAKEMAKE_PROFILE" ]; then
         PROFILE_CMD="--profile $SNAKEMAKE_PROFILE"
     fi
 else
-# LCDBWF_SNAKEMAKE_PROFILE found, this takes priority if both profile variables are set
+# LCDBWF_SNAKEMAKE_PROFILE takes priority if both profile variables are set
 PROFILE_CMD="--profile $LCDBWF_SNAKEMAKE_PROFILE"
 fi
 
-# Run snakemake
+# Timestamped log file
+LOGFILE="Snakefile_$(date +"%Y-%m-%d_%H%M").log"
+
 (
     time snakemake \
-    -p \
+    --printshellcmds \
     --directory $PWD \
-    -k \
-    --restart-times 3 \
+    --keep-going \
     --rerun-incomplete \
     --jobname "s.{rulename}.{jobid}.sh" \
-    -j 999 \
     --use-conda \
     --configfile config/config.yaml \
     $PROFILE_CMD \
     "$@"
-    ) > "Snakefile.log" 2>&1
+    ) > "$LOGFILE" 2>&1
 
 SNAKE_PID=$!
 
diff --git a/include/reference_config_templates/Danio_rerio/GRCz11.yaml b/include/reference_config_templates/Danio_rerio/GRCz11.yaml
new file mode 100644
index 000000000..909d27b85
--- /dev/null
+++ b/include/reference_config_templates/Danio_rerio/GRCz11.yaml
@@ -0,0 +1,10 @@
+organism: "Danio rerio"
+
+# Primary assembly (excludes haplotypes and alt regions) from Ensembl,
+# soft-masked
+genome:
+  url: "https://ftp.ensembl.org/pub/release-115/fasta/danio_rerio/dna/Danio_rerio.GRCz11.dna_sm.primary_assembly.fa.gz"
+
+# Ensembl provides a version with "chr" prefixes, but this one matches the fasta above.
+annotation:
+  url: "https://ftp.ensembl.org/pub/release-115/gtf/danio_rerio/Danio_rerio.GRCz11.115.gtf.gz"
diff --git a/include/reference_config_templates/Drosophila_melanogaster/r6.65.yaml b/include/reference_config_templates/Drosophila_melanogaster/r6.65.yaml
new file mode 100644
index 000000000..927048457
--- /dev/null
+++ b/include/reference_config_templates/Drosophila_melanogaster/r6.65.yaml
@@ -0,0 +1,7 @@
+organism: "Drosophila melanogaster"
+
+genome:
+  url: "https://s3ftp.flybase.org/genomes/Drosophila_melanogaster/dmel_r6.65_FB2025_04/fasta/dmel-all-chromosome-r6.65.fasta.gz"
+
+annotation:
+  url: "https://s3ftp.flybase.org/genomes/Drosophila_melanogaster/dmel_r6.65_FB2025_04/gtf/dmel-all-r6.65.gtf.gz"
diff --git a/include/reference_config_templates/Eschericia_coli.yaml/ASM584v2.yaml b/include/reference_config_templates/Eschericia_coli.yaml/ASM584v2.yaml
new file mode 100644
index 000000000..97ee4c38c
--- /dev/null
+++ b/include/reference_config_templates/Eschericia_coli.yaml/ASM584v2.yaml
@@ -0,0 +1,7 @@
+organism: "Escherichia coli"
+
+# From NCBI.
+genome:
+  url: "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/005/845/GCF_000005845.2_ASM584v2/GCF_000005845.2_ASM584v2_genomic.fna.gz"
+annotation:
+  url: "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/005/845/GCF_000005845.2_ASM584v2/GCF_000005845.2_ASM584v2_genomic.gtf.gz"
diff --git a/include/reference_config_templates/Eschericia_coli.yaml/K-12_substr.yaml b/include/reference_config_templates/Eschericia_coli.yaml/K-12_substr.yaml
new file mode 120000
index 000000000..e7f0926c0
--- /dev/null
+++ b/include/reference_config_templates/Eschericia_coli.yaml/K-12_substr.yaml
@@ -0,0 +1 @@
+ASM584v2.yaml
\ No newline at end of file
diff --git a/include/reference_config_templates/Homo_sapiens/GENCODE.yaml b/include/reference_config_templates/Homo_sapiens/GENCODE.yaml
new file mode 100644
index 000000000..97a48e932
--- /dev/null
+++ b/include/reference_config_templates/Homo_sapiens/GENCODE.yaml
@@ -0,0 +1,12 @@
+# This config is intended to always point to the latest GENCODE version. If
+# there is a newer version, please update and submit a pull request.
+#
+# https://www.gencodegenes.org/human/
+#
+organism: "Homo sapiens"
+
+genome:
+  url: "https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_49/GRCh38.primary_assembly.genome.fa.gz"
+
+annotation:
+  url: "https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_49/gencode.v49.primary_assembly.annotation.gtf.gz"
diff --git a/include/reference_config_templates/Homo_sapiens/GENCODE_v19.yaml b/include/reference_config_templates/Homo_sapiens/GENCODE_v19.yaml
new file mode 100644
index 000000000..18648cd6a
--- /dev/null
+++ b/include/reference_config_templates/Homo_sapiens/GENCODE_v19.yaml
@@ -0,0 +1,171 @@
+# This is the last GENCODE release for hg19 / GRCh37. See
+# https://www.gencodegenes.org/human/release_19.html.
+#
+# A primary assembly is not available like it is for GRCh38, so we make one by
+# selecting the main chromosomes and unassembled contigs. It's not obvious
+# which ones are the unassembled contigs, but the original fasta file has
+# space-separated record names like this:
+#
+#    >chr20 20
+#    >chr21 21
+#    >chr22 22
+#    >chrX X
+#    >chrY Y
+#    >chrM MT
+#    >GL877870.2 HG1007_PATCH
+#    >GL877872.1 HG1032_PATCH
+#    >GL383535.1 HG104_HG975_PATCH
+#    >JH159133.1 HG1063_PATCH
+#
+# Spot-checking the entries, those that have PATCH in the line are assembly
+# patches; those with HSCHR and HG*TEST are alt loci. None of those should be
+# in a primary assembly. So the "include_pattern" list below was obtained with
+# the following command:
+#
+#  zcat GRCh37.p13.genome.fa.gz \
+#    | grep -Ev "HS|PATCH|HG" \
+#    | cut -f1 -d " " \
+#    | sed "s/>//g"
+#
+# Spot-checking the remaining non-chr, they do all appear to be unassembled
+# contigs, which we do want.
+#
+# So we can use this list of chroms to filter both the fasta as well as the gtf.
+#
+
+organism: "Homo sapiens"
+
+genome:
+  url: "https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_19/GRCh37.p13.genome.fa.gz"
+  postprocess:
+    function: "lib.postprocess.filter_fasta_chroms"
+    kwargs:
+      include_patterns:
+        - chr.*
+        - GL000191.1
+        - GL000192.1
+        - GL000193.1
+        - GL000194.1
+        - GL000195.1
+        - GL000196.1
+        - GL000197.1
+        - GL000198.1
+        - GL000199.1
+        - GL000200.1
+        - GL000201.1
+        - GL000202.1
+        - GL000203.1
+        - GL000204.1
+        - GL000205.1
+        - GL000206.1
+        - GL000207.1
+        - GL000208.1
+        - GL000209.1
+        - GL000210.1
+        - GL000211.1
+        - GL000212.1
+        - GL000213.1
+        - GL000214.1
+        - GL000215.1
+        - GL000216.1
+        - GL000217.1
+        - GL000218.1
+        - GL000219.1
+        - GL000220.1
+        - GL000221.1
+        - GL000222.1
+        - GL000223.1
+        - GL000224.1
+        - GL000225.1
+        - GL000226.1
+        - GL000227.1
+        - GL000228.1
+        - GL000229.1
+        - GL000230.1
+        - GL000231.1
+        - GL000232.1
+        - GL000233.1
+        - GL000234.1
+        - GL000235.1
+        - GL000236.1
+        - GL000237.1
+        - GL000238.1
+        - GL000239.1
+        - GL000240.1
+        - GL000241.1
+        - GL000242.1
+        - GL000243.1
+        - GL000244.1
+        - GL000245.1
+        - GL000246.1
+        - GL000247.1
+        - GL000248.1
+        - GL000249.1
+
+
+annotation:
+  url: "https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_19/gencode.v19.chr_patch_hapl_scaff.annotation.gtf.gz"
+  postprocess:
+    function: "lib.postprocess.filter_gtf_chroms"
+    kwargs:
+      include_patterns:
+        - chr.*
+        - GL000191.1
+        - GL000192.1
+        - GL000193.1
+        - GL000194.1
+        - GL000195.1
+        - GL000196.1
+        - GL000197.1
+        - GL000198.1
+        - GL000199.1
+        - GL000200.1
+        - GL000201.1
+        - GL000202.1
+        - GL000203.1
+        - GL000204.1
+        - GL000205.1
+        - GL000206.1
+        - GL000207.1
+        - GL000208.1
+        - GL000209.1
+        - GL000210.1
+        - GL000211.1
+        - GL000212.1
+        - GL000213.1
+        - GL000214.1
+        - GL000215.1
+        - GL000216.1
+        - GL000217.1
+        - GL000218.1
+        - GL000219.1
+        - GL000220.1
+        - GL000221.1
+        - GL000222.1
+        - GL000223.1
+        - GL000224.1
+        - GL000225.1
+        - GL000226.1
+        - GL000227.1
+        - GL000228.1
+        - GL000229.1
+        - GL000230.1
+        - GL000231.1
+        - GL000232.1
+        - GL000233.1
+        - GL000234.1
+        - GL000235.1
+        - GL000236.1
+        - GL000237.1
+        - GL000238.1
+        - GL000239.1
+        - GL000240.1
+        - GL000241.1
+        - GL000242.1
+        - GL000243.1
+        - GL000244.1
+        - GL000245.1
+        - GL000246.1
+        - GL000247.1
+        - GL000248.1
+        - GL000249.1
diff --git a/include/reference_config_templates/Homo_sapiens/GRCh37.yaml b/include/reference_config_templates/Homo_sapiens/GRCh37.yaml
new file mode 120000
index 000000000..99b7f940f
--- /dev/null
+++ b/include/reference_config_templates/Homo_sapiens/GRCh37.yaml
@@ -0,0 +1 @@
+GENCODE_v19.yaml
\ No newline at end of file
diff --git a/include/reference_config_templates/Homo_sapiens/hg19.yaml b/include/reference_config_templates/Homo_sapiens/hg19.yaml
new file mode 120000
index 000000000..99b7f940f
--- /dev/null
+++ b/include/reference_config_templates/Homo_sapiens/hg19.yaml
@@ -0,0 +1 @@
+GENCODE_v19.yaml
\ No newline at end of file
diff --git a/include/reference_config_templates/Mus_musculus/GENCODE_M25.yaml b/include/reference_config_templates/Mus_musculus/GENCODE_M25.yaml
new file mode 100644
index 000000000..7899df9d1
--- /dev/null
+++ b/include/reference_config_templates/Mus_musculus/GENCODE_M25.yaml
@@ -0,0 +1,13 @@
+# This is the latest GENCODE release for GRCm38/mm10.
+#
+# Primary assembly and associated annotations are directly available from GENCODE,
+# https://www.gencodegenes.org/mouse/release_M25.html
+
+organism: "Mus musculus"
+
+genome:
+  url: "https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M25/GRCm38.primary_assembly.genome.fa.gz"
+
+
+annotation:
+  url: "https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M25/gencode.vM25.primary_assembly.annotation.gtf.gz"
diff --git a/include/reference_config_templates/Mus_musculus/GENCODE_M38.yaml b/include/reference_config_templates/Mus_musculus/GENCODE_M38.yaml
new file mode 100644
index 000000000..47d4fbb07
--- /dev/null
+++ b/include/reference_config_templates/Mus_musculus/GENCODE_M38.yaml
@@ -0,0 +1,12 @@
+# This is the latest release for GRCm39 (mm39).
+
+organism: "Mus musculus"
+
+genome:
+  url: "https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M38/GRCm39.primary_assembly.genome.fa.gz"
+
+# Although there is a separate lncRNA annotation that does not specify that it
+# is a subset, it does appear to be a subset because those features are in
+# this primary assembly annotation.
+annotation:
+  url: "https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M38/gencode.vM38.primary_assembly.annotation.gtf.gz"
diff --git a/include/reference_config_templates/Plodia_interpunctella/ilPloInte3.2.yaml b/include/reference_config_templates/Plodia_interpunctella/ilPloInte3.2.yaml
new file mode 100644
index 000000000..c23f990c2
--- /dev/null
+++ b/include/reference_config_templates/Plodia_interpunctella/ilPloInte3.2.yaml
@@ -0,0 +1,7 @@
+organism: "Plodia interpunctella"
+
+genome:
+  url: "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/027/563/975/GCF_027563975.2_ilPloInte3.2/GCF_027563975.2_ilPloInte3.2_genomic.fna.gz"
+
+annotation:
+  url: "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/027/563/975/GCF_027563975.2_ilPloInte3.2/GCF_027563975.2_ilPloInte3.2_genomic.gtf.gz"
diff --git a/include/reference_config_templates/Rattus_norvegicus/GRCr8.yaml b/include/reference_config_templates/Rattus_norvegicus/GRCr8.yaml
new file mode 100644
index 000000000..b913d4125
--- /dev/null
+++ b/include/reference_config_templates/Rattus_norvegicus/GRCr8.yaml
@@ -0,0 +1,13 @@
+organism: "Rattus norvegicus"
+
+# Although there are individual chromosome fastas with "primary" in the
+# filename, there is no corresponding genome-wide fasta file with "primary" in
+# the filename. However, the toplevel fasta here has "dna_sm:primary_assembly"
+# in all of its record descriptions, so it does not appear to have haplotypes
+# or alt regions.
+genome:
+  url: "https://ftp.ensembl.org/pub/release-115/fasta/rattus_norvegicus/dna/Rattus_norvegicus.GRCr8.dna_sm.toplevel.fa.gz"
+
+annotation:
+  url: "https://ftp.ensembl.org/pub/release-115/gtf/rattus_norvegicus/Rattus_norvegicus.GRCr8.115.gtf.gz"
+
diff --git a/include/reference_config_templates/Saccharomyces_cerevisiae/R64-1-1.115.yaml b/include/reference_config_templates/Saccharomyces_cerevisiae/R64-1-1.115.yaml
new file mode 120000
index 000000000..04eff7f42
--- /dev/null
+++ b/include/reference_config_templates/Saccharomyces_cerevisiae/R64-1-1.115.yaml
@@ -0,0 +1 @@
+S288C.yaml
\ No newline at end of file
diff --git a/include/reference_config_templates/Saccharomyces_cerevisiae/S288C.yaml b/include/reference_config_templates/Saccharomyces_cerevisiae/S288C.yaml
new file mode 100644
index 000000000..62e68ea5a
--- /dev/null
+++ b/include/reference_config_templates/Saccharomyces_cerevisiae/S288C.yaml
@@ -0,0 +1,11 @@
+# https://www.yeastgenome.org/strain/s288c
+
+# From Ensembl. According to README in this FTP dir, if there's no primary
+# assembly then the toplevel is assumed to be the primary assembly.
+
+organism: "Saccharomyces cerevisiae"
+genome:
+  url: "https://ftp.ensembl.org/pub/release-115/fasta/saccharomyces_cerevisiae/dna/Saccharomyces_cerevisiae.R64-1-1.dna_sm.toplevel.fa.gz"
+
+annotation:
+  url: "https://ftp.ensembl.org/pub/release-115/gtf/saccharomyces_cerevisiae/Saccharomyces_cerevisiae.R64-1-1.115.gtf.gz"
diff --git a/include/reference_config_templates/Schizosaccharomyces_pombe/ASM294v2.yaml b/include/reference_config_templates/Schizosaccharomyces_pombe/ASM294v2.yaml
new file mode 100644
index 000000000..ff0d37c8b
--- /dev/null
+++ b/include/reference_config_templates/Schizosaccharomyces_pombe/ASM294v2.yaml
@@ -0,0 +1,7 @@
+organism: "Schizosaccharomyces pombe"
+genome:
+  url: "http://ftp.ensemblgenomes.org/pub/fungi/release-62/fasta/schizosaccharomyces_pombe/dna/Schizosaccharomyces_pombe.ASM294v2.dna_sm.toplevel.fa.gz"
+
+annotation:
+  url: "https://ftp.ensemblgenomes.ebi.ac.uk/pub/fungi/release-62/gff3/schizosaccharomyces_pombe/Schizosaccharomyces_pombe.ASM294v2.62.gff3.gz"
+  postprocess: 'lib.postprocess.gff2gtf'
diff --git a/include/reference_config_templates/test.yaml b/include/reference_config_templates/test.yaml
new file mode 100644
index 000000000..ceb368779
--- /dev/null
+++ b/include/reference_config_templates/test.yaml
@@ -0,0 +1,9 @@
+organism: 'Drosophila melanogaster'
+
+genome:
+  url: "https://raw.githubusercontent.com/lcdb/lcdb-test-data/master/data/seq/dm6.small.fa"
+  postprocess: 'lib.utils.gzipped'
+
+annotation:
+  url: "https://raw.githubusercontent.com/lcdb/lcdb-test-data/master/data/annotation/dm6.small.gtf"
+  postprocess: 'lib.utils.gzipped'
diff --git a/include/reference_configs/Danio_rerio.yaml b/include/reference_configs/Danio_rerio.yaml
index 038ef0ff3..64f653dfd 100644
--- a/include/reference_configs/Danio_rerio.yaml
+++ b/include/reference_configs/Danio_rerio.yaml
@@ -23,7 +23,6 @@ references:
       transcriptome:
         indexes:
           - 'salmon'
-          - 'kallisto'
 
     rRNA:
       genome:
diff --git a/include/reference_configs/Dictyostelium_discoideum.yaml b/include/reference_configs/Dictyostelium_discoideum.yaml
index 9037d0f69..f703343d2 100644
--- a/include/reference_configs/Dictyostelium_discoideum.yaml
+++ b/include/reference_configs/Dictyostelium_discoideum.yaml
@@ -17,7 +17,6 @@ references:
       transcriptome:
         indexes:
           - "salmon"
-          - 'kallisto'
 
     rRNA:
       genome:
diff --git a/include/reference_configs/Drosophila_melanogaster.yaml b/include/reference_configs/Drosophila_melanogaster.yaml
index e228df7a7..0e61fcade 100644
--- a/include/reference_configs/Drosophila_melanogaster.yaml
+++ b/include/reference_configs/Drosophila_melanogaster.yaml
@@ -40,7 +40,6 @@ references:
       transcriptome:
         indexes:
           - 'salmon'
-          - 'kallisto'
 
     # Note: the mappings from r6.23 still work well for r6.28.
     r6-28:
@@ -71,4 +70,3 @@ references:
       transcriptome:
         indexes:
           - 'salmon'
-          - 'kallisto'
diff --git a/include/reference_configs/Gallus_gallus.yaml b/include/reference_configs/Gallus_gallus.yaml
index a618a5a94..13d6d49ad 100644
--- a/include/reference_configs/Gallus_gallus.yaml
+++ b/include/reference_configs/Gallus_gallus.yaml
@@ -24,7 +24,6 @@ references:
     transcriptome:
         indexes:
           - 'salmon'
-          - 'kallisto'
 
     rRNA:
       genome:
diff --git a/include/reference_configs/Homo_sapiens.yaml b/include/reference_configs/Homo_sapiens.yaml
index 58d292ec4..ff6720f1c 100644
--- a/include/reference_configs/Homo_sapiens.yaml
+++ b/include/reference_configs/Homo_sapiens.yaml
@@ -29,7 +29,6 @@ references:
       transcriptome:
         indexes:
           - 'salmon'
-          - 'kallisto'
 
 
     gencode-v25:
@@ -65,7 +64,6 @@ references:
       transcriptome:
         indexes:
           - 'salmon'
-          - 'kallisto'
 
 
     gencode-v19:
@@ -90,7 +88,6 @@ references:
       transcriptome:
         indexes:
           - 'salmon'
-          - 'kallisto'
 
     rRNA:
       genome:
diff --git a/include/reference_configs/Macaca_mulatta.yaml b/include/reference_configs/Macaca_mulatta.yaml
index 111674c7e..acefce08e 100644
--- a/include/reference_configs/Macaca_mulatta.yaml
+++ b/include/reference_configs/Macaca_mulatta.yaml
@@ -24,7 +24,6 @@ references:
       transcriptome:
         indexes:
           - 'salmon'
-          - 'kallisto'
 
     rRNA:
       genome:
diff --git a/include/reference_configs/Mus_musculus.yaml b/include/reference_configs/Mus_musculus.yaml
index ef0eb30fb..316bb3895 100644
--- a/include/reference_configs/Mus_musculus.yaml
+++ b/include/reference_configs/Mus_musculus.yaml
@@ -28,7 +28,6 @@ references:
       transcriptome:
         indexes:
           - 'salmon'
-          - 'kallisto'
 
     gencode_m12:
 
@@ -52,7 +51,6 @@ references:
       transcriptome:
         indexes:
           - 'salmon'
-          - 'kallisto'
 
     rRNA:
       genome:
diff --git a/include/reference_configs/Plodia_interpunctella.yaml b/include/reference_configs/Plodia_interpunctella.yaml
new file mode 100644
index 000000000..ea3c59ca5
--- /dev/null
+++ b/include/reference_configs/Plodia_interpunctella.yaml
@@ -0,0 +1,40 @@
+references:
+  plodia:
+    ilPloInte3.2:
+      genome:
+        url: 'https://api.ncbi.nlm.nih.gov/datasets/v2/genome/accession/GCF_027563975.2/download?include_annotation_type=GENOME_FASTA'
+        postprocess:
+          function: 'lib.postprocess.utils.extract_from_zip'
+          kwargs:
+            path_in_zip: 'ncbi_dataset/data/GCF_027563975.2/GCF_027563975.2_ilPloInte3.2_genomic.fna'
+        indexes:
+          - 'hisat2'
+          - 'bowtie2'
+          - 'star'
+
+      annotation:
+        url: "https://api.ncbi.nlm.nih.gov/datasets/v2/genome/accession/GCF_027563975.2/download?include_annotation_type=GENOME_GTF"
+        postprocess:
+          function: 'lib.postprocess.utils.extract_from_zip'
+          kwargs:
+            path_in_zip: "ncbi_dataset/data/GCF_027563975.2/genomic.gtf"
+        conversions:
+          - 'refflat'
+          - 'bed12'
+
+      transcriptome:
+        indexes:
+          - 'salmon'
+
+    rRNA:
+      genome:
+        url:
+          - 'https://www.arb-silva.de/fileadmin/silva_databases/release_128/Exports/SILVA_128_LSURef_tax_silva_trunc.fasta.gz'
+          - 'https://www.arb-silva.de/fileadmin/silva_databases/release_128/Exports/SILVA_128_SSURef_Nr99_tax_silva_trunc.fasta.gz'
+        indexes:
+          - 'hisat2'
+          - 'bowtie2'
+          - 'star'
+        postprocess:
+          function: 'lib.common.filter_fastas'
+          args: 'Plodia interpunctella'
diff --git a/include/reference_configs/Rattus_norvegicus.yaml b/include/reference_configs/Rattus_norvegicus.yaml
index 3405d9f39..e12db2a47 100644
--- a/include/reference_configs/Rattus_norvegicus.yaml
+++ b/include/reference_configs/Rattus_norvegicus.yaml
@@ -23,7 +23,6 @@ references:
       transcriptome:
         indexes:
           - 'salmon'
-          - 'kallisto'
 
     rRNA:
       genome:
diff --git a/include/reference_configs/Saccharomyces_cerevisiae.yaml b/include/reference_configs/Saccharomyces_cerevisiae.yaml
index 1f5367973..c965f7a56 100644
--- a/include/reference_configs/Saccharomyces_cerevisiae.yaml
+++ b/include/reference_configs/Saccharomyces_cerevisiae.yaml
@@ -29,7 +29,6 @@ references:
       transcriptome:
         indexes:
           - 'salmon'
-          - 'kallisto'
 
     rRNA:
       genome:
diff --git a/include/reference_configs/Schizosaccharomyces_pombe.yaml b/include/reference_configs/Schizosaccharomyces_pombe.yaml
index bbef64c34..74dcca1af 100644
--- a/include/reference_configs/Schizosaccharomyces_pombe.yaml
+++ b/include/reference_configs/Schizosaccharomyces_pombe.yaml
@@ -22,7 +22,6 @@ references:
       transcriptome:
         indexes:
           - 'salmon'
-          - 'kallisto'
 
     rRNA:
       genome:
diff --git a/include/reference_configs/test.yaml b/include/reference_configs/test.yaml
index a8f80b770..dc68f72d7 100644
--- a/include/reference_configs/test.yaml
+++ b/include/reference_configs/test.yaml
@@ -38,7 +38,6 @@ references:
       transcriptome:
         indexes:
           - 'salmon'
-          - 'kallisto'
 
       metadata:
         reference_genome_build: 'dm6'
diff --git a/include/requirements.txt b/include/requirements.txt
index 6001f6d55..98b67ee44 100644
--- a/include/requirements.txt
+++ b/include/requirements.txt
@@ -1,19 +1,14 @@
 bedtools
 biopython
-bowtie
 bowtie2
 cutadapt>=3.0
 deeptools
-fastq-screen
+epic2
 fastqc
 font-ttf-dejavu-sans-mono
-gat
 gffread
 gffutils
-hisat2
-intervalstats
-ipython
-kallisto
+macs3
 multiqc
 pandas
 pandoc
@@ -25,17 +20,15 @@ preseq
 pybedtools
 pyfaidx
 pysam
+python
 pytest
-pytest-xdist
-python>=3.10
 rseqc
 
 # earlier versions of salmon can segfault on Slurm
 salmon>=1.10.1
 
 samtools
-seaborn
-snakemake-minimal
+snakemake>8
 sra-tools
 star
 subread
@@ -43,11 +36,6 @@ trackhub
 ucsc-bedgraphtobigwig
 ucsc-bedsort
 ucsc-bedtobigbed
-ucsc-bigwigmerge
-ucsc-fetchchromsizes
 ucsc-genepredtobed
 ucsc-gtftogenepred
-ucsc-liftover
-ucsc-oligomatch
 ucsc-twobittofa
-ucsc-wigtobigwig
diff --git a/lib/aligners.py b/lib/aligners.py
deleted file mode 100644
index 62fe58a57..000000000
--- a/lib/aligners.py
+++ /dev/null
@@ -1,85 +0,0 @@
-"""
-Helper functions for working with aligners within Snakefiles
-"""
-
-
-def hisat2_index_from_prefix(prefix):
-    """
-    Given a prefix, return a list of the corresponding hisat2 index files.
-    """
-    return ['{prefix}.{n}.ht2'.format(prefix=prefix, n=n) for n in range(1, 9)]
-
-
-def prefix_from_hisat2_index(index_files):
-    """
-    Given a list of index files for hisat2, return the corresponding prefix.
-    """
-    if isinstance(index_files, str):
-        return '.'.join(index_files.split('.')[:-2])
-    else:
-        prefixes = list(
-            set(
-                map(
-                    lambda x: '.'.join(x.split('.')[:-2]), index_files)
-            )
-        )
-        if len(prefixes) != 1:
-            raise ValueError(
-                "More than one prefix detected from '{0}'".format(prefixes)
-            )
-        return prefixes[0]
-
-
-def bowtie2_index_from_prefix(prefix):
-    """
-    Given a prefix, return a list of the corresponding bowtie2 index files.
-    """
-    return (
-        [
-            '{prefix}.{n}.bt2'.format(prefix=prefix, n=n)
-            for n in range(1, 5)
-        ] + [
-            '{prefix}.rev.{n}.bt2'.format(prefix=prefix, n=n)
-            for n in range(1, 3)
-        ]
-    )
-
-
-def prefix_from_bowtie2_index(index_files):
-    """
-    Given a list of index files for bowtie2, return the corresponding prefix.
-    """
-    if isinstance(index_files, str):
-        return '.'.join(index_files.replace('.rev', '').split('.')[:-2])
-    else:
-        prefixes = list(
-            set(
-                map(
-                    lambda x: '.'.join(x.replace('.rev', '').split('.')[:-2]),
-                    index_files)
-            )
-        )
-        if len(prefixes) != 1:
-            raise ValueError(
-                "More than one prefix detected from '{0}'".format(prefixes)
-            )
-        return prefixes[0]
-
-def fastq_arg_from_input(fastqs):
-    """
-    Prepares the correct input FASTQ arguments for bowtie2 and HISAT2 based on
-    whether or not the sample is paired-end.
-
-    Parameters
-    ----------
-    fastqs : list-like
-        List or snakemake.input object containing fastq filenames.
-    """
-
-    if isinstance(fastqs, str) or len(fastqs) == 1:
-        fastqs = '-U {0} '.format(fastqs)
-    else:
-        assert len(fastqs) == 2
-        fastqs = '-1 {0} -2 {1} '.format(*fastqs)
-    return fastqs
-
diff --git a/lib/chipseq.py b/lib/chipseq.py
index 887bb9f94..7015f83b2 100644
--- a/lib/chipseq.py
+++ b/lib/chipseq.py
@@ -1,21 +1,23 @@
+from snakemake.io import expand
+
 """
 Helpers for ChIP-seq.
 """
 
 # Example config for reference
-# __example_config__ = {
+#  {
 #     'peak_calling': {
 #         [
 #             {
 #                 'label': 'rep1',
-#                 'algorithm': 'macs2',
+#                 'algorithm': 'macs',
 #                 'input': ['input_1'],
 #                 'ip': ['ip_1'],
 #                 'extra': '--gs dm',
 #             },
 #             {
 #                 'label': 'rep2',
-#                 'algorithm': 'macs2',
+#                 'algorithm': 'macs',
 #                 'input': ['input_2'],
 #                 'ip': ['ip_2'],
 #                 'extra': '--gs dm',
@@ -24,7 +26,32 @@
 #         ]
 #     }
 # }
+#
+# This needs to be expanded out to the following patterns:
+#
+# [
+#   'data/chipseq_peaks/macs/rep1/peaks.bigbed',
+#   'data/chipseq_peaks/macs/rep2/peaks.bigbed',
+# ]
+#
+# Which in turn needs these bams:
+#
+# [
+#   expand(patterns['merged_techreps'], label=['input_1', 'ip_1']),
+#   expand(patterns['merged_techreps'], label=['input_2', 'ip_2']),
+#
+#
 
+def add_bams_to_peak_calling(config):
+    d = peak_calling_dict(config)
+    for key, block in d.items():
+        peak_calling_run, algorithm = key
+        block['ip_bams'] = expand('data/chipseq_merged/{label}/{label}.cutadapt.unique.nodups.merged.bam', label=block['ip'])
+        block['control_bams'] = expand('data/chipseq_merged/{label}/{label}.cutadapt.unique.nodups.merged.bam', label=block['control'])
+        block['bed'] = f"data/chipseq_peaks/{algorithm}/{peak_calling_run}/peaks.bed"
+        block['bigbed'] = f"data/chipseq_peaks/{algorithm}/{peak_calling_run}/peaks.bigbed"
+        d[key] = block
+    return d
 
 def peak_calling_dict(config, algorithm=None):
     """
@@ -60,11 +87,6 @@ def peak_calling_dict(config, algorithm=None):
         if key in d:
             raise ValueError("peak calling run '{0}' already defined".format(key))
 
-        # If metadata key has been provided, then use that to populate the
-        # block as default values.
-        metadata = config['references'][config['organism']][config['aligner']['tag']].get('metadata', {})
-        block.update(metadata)
-
         d[key] = block
     return d
 
@@ -139,7 +161,7 @@ def merged_input_for_ip(sampletable, merged_ip):
     ... input1      input      s2cell-1             s2cell-input-1
     ... input3      input      s2cell-2             s2cell-input-3
     ... input9      input      s2cell-1             s2cell-input-1'''),
-    ... sep='\s+')
+    ... sep='\\s+')
 
 
     >>> merged_input_for_ip(df, 's2cell-gaf-1')
diff --git a/lib/common.py b/lib/common.py
deleted file mode 100644
index 829cc1298..000000000
--- a/lib/common.py
+++ /dev/null
@@ -1,914 +0,0 @@
-import glob
-import subprocess
-import time
-import os
-import warnings
-import urllib.request as request
-import contextlib
-import yaml
-import pandas
-from Bio import SeqIO
-import gzip
-import binascii
-from lib.imports import resolve_name
-from lib import aligners
-from lib import utils
-from snakemake.shell import shell
-from snakemake.io import expand
-
-# List of possible keys in config that are to be interpreted as paths
-PATH_KEYS = [
-    'references_dir',
-    'sampletable',
-    'sample_dir',
-    'aggregation_dir',
-    'merged_dir',
-    'peaks_dir',
-    'hub_config',
-]
-
-
-def _is_gzipped(fn):
-    """
-    Filename-independent method of checking if a file is gzipped or not. Uses
-    the magic number.
-
-    xref https://stackoverflow.com/a/47080739
-    """
-    with open(fn, 'rb') as f:
-        return binascii.hexlify(f.read(2)) == b'1f8b'
-
-
-def openfile(tmp, mode):
-    """
-    Returns an open file handle; auto-detects gzipped files.
-    """
-    if _is_gzipped(tmp):
-        return gzip.open(tmp, mode)
-    else:
-        return open(tmp, mode)
-
-
-def resolve_config(config, workdir=None):
-    """
-    Finds the config file.
-
-    Parameters
-    ----------
-    config : str, dict
-        If str, assume it's a YAML file and parse it; otherwise pass through
-
-    workdir : str
-        Optional location to specify relative location of all paths in `config`
-    """
-    if isinstance(config, str):
-        config = yaml.load(open(config), Loader=yaml.FullLoader)
-
-    def rel(pth):
-        if workdir is None or os.path.isabs(pth):
-            return pth
-        return os.path.join(workdir, pth)
-    for key in PATH_KEYS:
-        if key in config:
-            config[key] = rel(config[key])
-    return config
-
-
-def gzipped(tmpfiles, outfile):
-    """
-    Cat-and-gzip a list of uncompressed files into a compressed output file.
-    """
-    with gzip.open(outfile, 'wt') as fout:
-        for f in tmpfiles:
-            with open(f) as infile:
-                for line in infile:
-                    fout.write(line)
-
-
-def cat(tmpfiles, outfile):
-    """
-    Simple concatenation of files.
-
-    Note that gzipped files can be concatenated as-is without un- and re-
-    compressing.
-    """
-    shell('cat {tmpfiles} > {outfile}')
-
-
-def filter_fastas(tmpfiles, outfile, pattern):
-    """
-    Extract records from fasta file(s) given a search pattern.
-
-    Given input gzipped FASTAs, create a new gzipped fasta containing only
-    records whose description matches `pattern`.
-
-    Parameters
-    ----------
-    tmpfiles : list
-        gzipped fasta files to look through
-
-    outfile : str
-        gzipped output fastq file
-
-    pattern : str
-        Look for this string in each record's description
-
-    """
-    def gen():
-        for tmp in tmpfiles:
-            handle = gzip.open(tmp, 'rt')
-            parser = SeqIO.parse(handle, 'fasta')
-            for rec in parser:
-                if pattern not in rec.description:
-                    continue
-                rec.seq = rec.seq.back_transcribe()
-                rec.description = rec.name
-                yield rec
-
-    with gzip.open(outfile, 'wt') as fout:
-        SeqIO.write(gen(), fout, 'fasta')
-
-
-def twobit_to_fasta(tmpfiles, outfile):
-    """
-    Converts .2bit files to fasta.
-
-    Parameters
-    ----------
-    tmpfiles : list
-        2bit files to convert
-
-    outfile : str
-        gzipped output fastq file
-    """
-    # Note that twoBitToFa doesn't support multiple input files, but we want to
-    # support them with this function
-    lookup = {i: i + '.fa' for i in tmpfiles}
-    for i in tmpfiles:
-        fn = lookup[i]
-        shell('twoBitToFa {i} {fn}')
-
-    # Make sure we retain the order of the originally-provided files from the
-    # config when concatenating.
-    fastas = [lookup[i] for i in tmpfiles]
-    shell('cat {fastas} | gzip -c > {outfile}')
-    shell('rm {fastas}')
-
-
-def download_and_postprocess(outfile, config, organism, tag, type_):
-    """
-    Given an output file, figure out what to do based on the config.
-
-    See notes below for details.
-
-    Parameters
-    ----------
-    outfile : str
-
-    config : dict
-
-    organism : str
-        Which organism to use. Must be a key in the "references" section of the
-        config.
-
-    tag : str
-        Which tag for the organism to use. Must be a tag for the organism in
-        the config
-
-    type_ : str
-        A supported references type (gtf, fasta) to use.
-
-    Notes
-    -----
-
-    This function:
-
-        - uses `organism`, `tag`, `type_` as a key into the config dict to
-          figure out:
-
-            - what postprocessing function (if any) was specified along with
-              its optional args
-            - the URL[s] to download
-
-        - resolves the name of the postprocessing function (if provided) and
-          imports it
-        - downloads the URL[s] to tempfile[s]
-        - calls the imported postprocessing function using the tempfile[s] and
-          outfile plus any additional specified arguments.
-
-
-    The postprocessing function must have one of the following signatures,
-    where `infiles` contains the list of temporary files downloaded from the
-    URL or URLs specified, and `outfile` is a gzipped file expected to be
-    created by the function::
-
-        def func(infiles, outfile):
-            pass
-
-    or::
-
-        def func(infiles, outfile, *args):
-            pass
-
-    or::
-
-        def func(infiles, outfile, *args, **kwargs):
-            pass
-
-
-    The function is specified as a string that resolves to an importable
-    function, e.g., `postprocess: lib.postprocess.dm6.fix` will call a function
-    called `fix` in the file `lib/postprocess/dm6.py`.
-
-    If the contents of `postprocess:` is a dict, it must have at least the key
-    `function`, and optionally `args` and/or `kwargs` keys. The `function` key
-    indicates the importable path to the function.  `args` can be a string
-    or list of arguments that will be provided as additional args to a function
-    with the second kind of signature above.  If `kwargs` is provided, it is
-    a dict that is passed to the function with the third kind of signature
-    above. For example::
-
-        postprocess:
-            function: lib.postprocess.dm6.fix
-            args:
-                - True
-                - 3
-
-    or::
-
-        postprocess:
-            function: lib.postprocess.dm6.fix
-            args:
-                - True
-                - 3
-            kwargs:
-                skip: exon
-
-    """
-
-    def default_postprocess(origfn, newfn):
-        """
-        If no other postprocess function is defined, then simply move the
-        original to the new.
-        """
-        shell("mv {origfn} {newfn}")
-
-    block = config['references'][organism][tag][type_]
-
-    # postprocess can be missing, in which case we use the default above
-    post_process = block.get('postprocess', None)
-
-    if not isinstance(post_process, list):
-        post_process = [post_process]
-
-    funcs = []
-    func_tmpfiles = []
-    for i, post_process_block in enumerate(post_process):
-        if post_process_block is None:
-            func = default_postprocess
-            args = ()
-            kwargs = {}
-            name = None
-
-        # postprocess can have a single string value (indicating the function) or
-        # it can be a dict with keys "function" and optionally "args". The value of
-        # "args" can be a string or a list.
-        else:
-            if isinstance(post_process_block, dict):
-                name = post_process_block.get('function', post_process)
-                args = post_process_block.get('args', ())
-                kwargs = post_process_block.get('kwargs', {})
-                if isinstance(args, str):
-                    args = (args,)
-            elif isinstance(post_process_block, str):
-                name = post_process_block
-                args = ()
-                kwargs = {}
-
-            # In the special case where there is kwarg beginning and ending
-            # with "__", this can be a dotted function name so it will be
-            # resolved here as well and passed along to the postprocessing
-            # function.
-            #
-            # This makes it possible to do things like add ERCC annotations on
-            # the end of other annotations that themselves need to be
-            # post-processed.
-            for kw in kwargs:
-                if kw.startswith('__') and kw.endswith('__'):
-                    kwargs[kw] = resolve_name(kwargs[kw])
-
-            # import the function
-            func = resolve_name(name)
-
-        tmp_outfile = f'{outfile}.{i}.{name}.tmp'
-        func_tmpfiles.append(tmp_outfile)
-        funcs.append([func, args, kwargs, tmp_outfile])
-
-    # The last func's outfile should be the final outfile
-    funcs[-1][-1] = outfile
-
-    # as described in the docstring above, functions are to assume a list of
-    # urls
-    urls = block['url']
-    if isinstance(urls, str):
-        urls = [urls]
-
-    # Download tempfiles into reasonably-named filenames
-    tmpfiles = ['{0}.{1}.tmp'.format(outfile, i) for i in range(len(urls))]
-    tmpinputfiles = tmpfiles
-    try:
-        for url, tmpfile in zip(urls, tmpfiles):
-            if url.startswith('file:'):
-                url = url.replace('file://', '')
-                shell('cp {url} {tmpfile} 2> {outfile}.log')
-            else:
-                shell("wget {url} -O- > {tmpfile} 2> {outfile}.log")
-
-        for func, args, kwargs, outfile in funcs:
-            func(tmpinputfiles, outfile, *args, **kwargs)
-            tmpinputfiles = [outfile]
-
-    except Exception as e:
-        raise e
-    finally:
-        for i in tmpfiles + func_tmpfiles:
-            if os.path.exists(i):
-                shell('rm {i}')
-
-
-def references_dict(config):
-    """
-    Transforms the references section of the config file.
-
-    The references section of the config file is designed to be human-editable,
-    and to only need the URL(s). User-specified indexes, conversions, and
-    post-processing functions can also be added.
-
-    For example, the config might say::
-
-        human:
-          gencode:
-            fasta: <url to fasta>
-                indexes:
-                  - hisat2
-
-    In this function, we need to convert that "indexes: [hisat2]" into the full
-    path of the hisat2 index that can be used as input for a Snakemake rule. In
-    this example, in the dictionary returned below we can then get that path
-    with `d['human']['gencode']['hisat2']`, or more generally,
-    `d[organism][tag][type]`.
-
-    Parameters
-    ----------
-    config : dict
-
-    Notes
-    -----
-
-    The config file is designed to be easy to edit and use from the user's
-    standpoint. But it's not so great for practical usage. Here we convert the
-    config file which has the format::
-
-    ... references_dir: "/data"
-    ... references:
-    ...   dm6:
-    ...     r6-11:
-    ...       metadata:
-    ...         reference_genome_build: 'dm6'
-    ...         reference_effective_genome_count: 1.2e7
-    ...         reference_effective_genome_proportion: 0.97
-    ...       genome:
-    ...         url: ""
-    ...         indexes:
-    ...           - bowtie2
-    ...           - hisat2
-    ...       annotation:
-    ...         url: ""
-    ...         conversions:
-    ...           - refflat
-    ...       transcriptome:
-    ...           indexes:
-    ...             - salmon
-
-    To this format::
-
-    ... 'dm6': {
-    ...    'r6-11': {
-    ...        'annotation':    '/data/dm6/r6-11/annotation/dm6_r6-11.gtf',
-    ...        'bowtie2':       '/data/dm6/r6-11/genome/bowtie2/dm6_r6-11.1.bt2',
-    ...        'bowtie2_fasta': '/data/dm6/r6-11/genome/bowtie2/dm6_r6-11.fasta',
-    ...        'chromsizes':    '/data/dm6/r6-11/genome/dm6_r6-11.chromsizes',
-    ...        'genome':        '/data/dm6/r6-11/genome/dm6_r6-11.fasta',
-    ...        'hisat2':        '/data/dm6/r6-11/genome/hisat2/dm6_r6-11.1.ht2',
-    ...        'hisat2_fasta':  '/data/dm6/r6-11/genome/hisat2/dm6_r6-11.fasta',
-    ...        'refflat':       '/data/dm6/r6-11/annotation/dm6_r6-11.refflat',
-    ...        'salmon':        '/data/dm6/r6-11/transcriptome/salmon/dm6_r6-11/versionInfo.json',
-    ...        'salmon_fasta':  '/data/dm6/r6-11/transcriptome/salmon/dm6_r6-11.fasta',
-    ...        'transcriptome': '/data/dm6/r6-11/transcriptome/dm6_r6-11.fasta',
-    ...        },
-    ... }
-
-    """
-    if isinstance(config, str):
-        config = yaml.load(open(config), Loader=yaml.FullLoader)
-
-    references_dir = get_references_dir(config)
-
-    # Map "indexes" value to a pattern specific to each index.
-    index_extensions = {
-        'bowtie2': aligners.bowtie2_index_from_prefix('')[0],
-        'hisat2': aligners.hisat2_index_from_prefix('')[0],
-        'star': '/Genome',
-
-        # Notes on salmon indexing:
-        #   - pre-1.0 versions had hash.bin
-        #   - post-1.0 versions do not have hash.bin but do have several other
-        #     different .bin files
-        #   - both appear to have versionInfo.json
-        #
-        # In order to support both, we use a filename found in common between
-        # the version.
-        'salmon': '/versionInfo.json',
-        'kallisto': '/transcripts.idx',
-    }
-
-    conversion_extensions = {
-
-        'intergenic': '.intergenic.gtf',
-        'refflat': '.refflat',
-        'gffutils': '.gtf.db',
-        'bed12': '.bed12',
-        'genelist': '.genelist',
-        'annotation_hub': '.{keytype}.csv',
-        'mappings': '.mapping.tsv.gz',
-    }
-
-    d = {}
-    conversion_kwargs = {}
-
-    merged_references = config['references']
-
-    type_extensions = {
-        'genome': 'fasta',
-        'annotation': 'gtf',
-        'transcriptome': 'fasta'
-    }
-
-    for organism in merged_references.keys():
-        d[organism] = {}
-        for tag in merged_references[organism].keys():
-            e = {}
-            for type_, block in merged_references[organism][tag].items():
-                if type_ == 'metadata':
-                    continue
-                try:
-                    type_extension = type_extensions[type_]
-
-                except KeyError:
-                    raise ValueError(
-
-                        "KeyError: " + type_ + "\n"
-                        "\nConfig file format has changed:\n"
-                        "  - 'fasta:' -> 'genome:'\n"
-                        "  - 'gtf:' -> 'annotation:'\n"
-                        "  - new 'transcriptome:' section\n"
-                        "\nSee docs for details\n\n"
-
-                    )
-                e[type_] = (
-                    '{references_dir}/'
-                    '{organism}/'
-                    '{tag}/'
-                    '{type_}/'
-                    '{organism}_{tag}.{type_extension}'.format(**locals())
-                )
-
-                # Add conversions if specified.
-                if type_ == 'annotation':
-                    conversions = block.get('conversions', [])
-                    for conversion in conversions:
-                        kwargs = {}
-                        if isinstance(conversion, dict):
-                            # if conversion is specified as dict, we assume
-                            # that there is only one key, and that key is the
-                            # actual name of the conversion; the corresponding
-                            # value will be kwargs. This is used e.g. for
-                            # gffutils conversion which often need some
-                            # tweaking of args depending on the gtf format.
-                            assert len(list(conversion.keys())) == 1
-                            kwargs = list(conversion.values())[0]
-                            conversion = list(conversion.keys())[0]
-
-                        # While the full set of columns for annotation hub are
-                        # not known in advance, we can assume at least the
-                        # keytype provided will be an output file. Fill that in
-                        # here.
-                        if conversion == 'annotation_hub':
-                            keytype = kwargs['keytype']
-                            ext = conversion_extensions[conversion].format(keytype=keytype)
-                        else:
-                            ext = conversion_extensions[conversion]
-                        output = (
-                            '{references_dir}/'
-                            '{organism}/'
-                            '{tag}/'
-                            '{type_}/'
-                            '{organism}_{tag}{ext}'.format(**locals())
-                        )
-                        e[conversion] = output
-
-                        conversion_kwargs[output] = kwargs
-
-                if type_ in ['genome', 'transcriptome']:
-                    # Add indexes if specified
-                    indexes = block.get('indexes', [])
-                    for index in indexes:
-                        ext = index_extensions[index]
-
-                        e[index] = (
-                            '{references_dir}/{organism}/{tag}/{type_}/{index}/{organism}_{tag}{ext}'
-                            .format(**locals())
-                        )
-
-                        # Each index will get the original fasta symlinked over
-                        # to its directory
-                        e[index + '_fasta'] = (
-                            '{references_dir}/{organism}/{tag}/{type_}/{index}/{organism}_{tag}.fasta'
-                            .format(**locals())
-                        )
-
-                    # Only makes sense to have chromsizes for genome fasta, not transcriptome.
-                    if type_ == 'genome':
-                        e['chromsizes'] = (
-                            '{references_dir}/'
-                            '{organism}/'
-                            '{tag}/'
-                            '{type_}/'
-                            '{organism}_{tag}.chromsizes'.format(**locals())
-                        )
-                d[organism][tag] = e
-    return d, conversion_kwargs
-
-
-def get_references_dir(config):
-    """
-    Identify the references directory based on config and env vars.
-
-    Returns the references dir, preferring the value of an existing environment
-    variable `REFERENCES_DIR` over the config entry "references_dir". Raise an
-    error if either can't be found.
-
-    Parameters
-    ----------
-    config : dict
-    """
-    config = resolve_config(config)
-    references_dir = os.environ.get(
-        'REFERENCES_DIR', config.get('references_dir', None))
-    if references_dir is None:
-        raise ValueError('No references dir specified')
-    return references_dir
-
-
-def get_sampletable(config):
-    """
-    Return samples and pandas.DataFrame of parsed sampletable.
-
-    Returns the sample IDs and the parsed sampletable from the file specified
-    in the config.
-
-    The sample IDs are assumed to be the first column of the sampletable.
-
-    Parameters
-    ----------
-    config : dict
-    """
-    config = resolve_config(config)
-    sampletable = pandas.read_csv(config['sampletable'], comment="#", sep='\t')
-    samples = sampletable.iloc[:, 0]
-    return samples, sampletable
-
-
-def get_techreps(sampletable, label):
-    """
-    Return all sample IDs for which the "label" column is `label`.
-    """
-    # since we're not requiring a name but we want to use `loc`
-    first_col = sampletable.columns[0]
-    result = list(sampletable.loc[sampletable['label'] == label, first_col])
-
-    # If we're using a ChIP-seq-like sampletable we can provide a more
-    # informative error message.
-
-    is_chipseq = 'antibody' in sampletable.columns
-    if is_chipseq:
-        err = ("""
-        No technical replicates found for label '{}'. Check the ChIP-seq config
-        file to ensure the peak-calling section only specifies values from the
-        sampletable's "label" column.""".format(label)
-        )
-    else:
-        err = "No technical replicates found for label '{}'.".format(label)
-
-    if len(result) == 0:
-        raise ValueError(err)
-
-    return result
-
-
-def load_config(config, missing_references_ok=False):
-    """
-    Loads the config.
-
-    Resolves any included references directories/files and runs the deprecation
-    handler.
-    """
-    if isinstance(config, str):
-        config = yaml.load(open(config), Loader=yaml.FullLoader)
-
-    # Here we populate a list of reference sections. Items later on the list
-    # will have higher priority
-    includes = config.get('include_references', [])
-    for i in includes:
-        if not os.path.exists(i):
-            raise ValueError("include_references: '{}' does not exist".format(i))
-    reference_sections = []
-
-    # First the directories. Directories that come earlier lose to those that
-    # come later.
-    for dirname in filter(os.path.isdir, includes):
-        # Note we're looking recursively for .yaml and .yml, so very large
-        # reference directories are possible
-        for fn in glob.glob(os.path.join(dirname, '**/*.y?ml'),
-                            recursive=True):
-            refs = yaml.load(open(fn), Loader=yaml.FullLoader).get('references', None)
-            if refs is None:
-                if not missing_references_ok:
-                    raise ValueError("No 'references:' section in {0}".format(fn))
-            else:
-                reference_sections.append(refs)
-
-    # Now the files
-    for fn in filter(os.path.isfile, includes):
-        refs = yaml.load(open(fn), Loader=yaml.FullLoader).get('references', None)
-        if refs is None:
-            if not missing_references_ok:
-                raise ValueError("No 'references:' section in {0}".format(fn))
-        else:
-            reference_sections.append(refs)
-
-    # The last thing we include is the references section as written in the
-    # config, which wins over all.
-    reference_sections.append(config.get('references', {}))
-
-    merged_references = {}
-    for ref in reference_sections:
-        for organism in ref.keys():
-            org_dict = merged_references.get(organism, {})
-            for tag in ref[organism].keys():
-                org_dict[tag] = ref[organism][tag]
-            merged_references[organism] = org_dict
-    config['references'] = merged_references
-
-    # Run the deprecation handler on the final config
-    config = deprecation_handler(config)
-
-    return config
-
-
-def deprecation_handler(config):
-    """
-    Checks the config to see if anything has been deprecated.
-
-    Also makes any fixes that can be done automatically.
-    """
-    if 'assembly' in config:
-        config['organism'] = config['assembly']
-        warnings.warn(
-            "'assembly' should be replaced with 'organism' in config files. "
-            "As a temporary measure, a new 'organism' key has been added with "
-            "the value of 'assembly'",
-            DeprecationWarning)
-
-    for org, block1 in config.get('references', {}).items():
-        for tag, block2 in block1.items():
-            gtf_conversions = block2.get('gtf', {}).get('conversions', [])
-            for c in gtf_conversions:
-                if isinstance(c, dict) and 'annotation_hub' in c:
-                    warnings.warn(
-                        "You may want to try the 'mappings' conversion rather "
-                        "than 'annotation_hub' since it works directly off "
-                        "the GTF file rather than assuming concordance between "
-                        "GTF and AnnoationHub instances",
-                        DeprecationWarning)
-
-    return config
-
-
-def is_paired_end(sampletable, sample):
-    """
-    Inspects the sampletable to see if the sample is paired-end or not
-
-    Parameters
-    ----------
-    sampletable : pandas.DataFrame
-        Contains a "layout" or "LibraryLayout" column (but not both). If the
-        lowercase value is "pe" or "paired", consider the sample paired-end.
-        Otherwise consider single-end.
-
-    sample : str
-        Assumed to be found in the first column of `sampletable`
-    """
-    # We can't fall back to detecting PE based on two fastq files provided for
-    # each sample when it's an SRA sampletable (which only has SRR accessions).
-    #
-    # So detect first detect if SRA sampletable based on presence of "Run"
-    # column and all values of that column starting with "SRR", and then raise
-    # an error if the Layout column does not exist.
-
-    if "Run" in sampletable.columns:
-        if all(sampletable["Run"].str.startswith("SRR")):
-            if "Layout" not in sampletable.columns and "layout" not in sampletable.columns:
-                raise ValueError(
-                    "Sampletable appears to be SRA, but no 'Layout' column "
-                    "found. This is required to specify single- or paired-end "
-                    "libraries.")
-
-    row = sampletable.set_index(sampletable.columns[0]).loc[sample]
-    if 'orig_filename_R2' in row:
-        return True
-    if 'layout' in row and 'LibraryLayout' in row:
-        raise ValueError("Expecting column 'layout' or 'LibraryLayout', "
-                         "not both")
-    try:
-        return row['layout'].lower() in ['pe', 'paired']
-    except KeyError:
-        pass
-    try:
-        return row['LibraryLayout'].lower() in ['pe', 'paired']
-    except KeyError:
-        pass
-    return False
-
-
-def fill_r1_r2(sampletable, pattern, r1_only=False):
-    """
-    Returns a function intended to be used as a rule's input function.
-
-    The returned function, when provided with wildcards, will return one or two
-    rendered versions of a pattern depending on SE or PE respectively.
-    Specifically, given a pattern (which is expected to contain a placeholder
-    for "{sample}" and "{n}"), look up in the sampletable whether or not it is
-    paired-end.
-
-    Parameters
-    ----------
-
-    sampletable : pandas.DataFrame
-        Contains a "layout" column with either "SE" or "PE", or "LibraryLayout"
-        column with "SINGLE" or "PAIRED". If column does not exist, assume SE.
-
-    pattern : str
-        Must contain at least a "{sample}" placeholder.
-
-    r1_only : bool
-        If True, then only return the file for R1 even if PE is configured.
-    """
-    def func(wc):
-        try:
-            wc.sample
-        except AttributeError:
-            raise ValueError(
-                'Need "{{sample}}" in pattern '
-                '"{pattern}"'.format(pattern=pattern))
-        n = [1]
-        if is_paired_end(sampletable, wc.sample) and not r1_only:
-            n = [1, 2]
-        res = expand(pattern, sample=wc.sample, n=n)
-        return res
-    return func
-
-
-def pluck(obj, kv):
-    """
-    For a given dict or list that somewhere contains keys `kv`, return the
-    values of those keys.
-
-    Named after the dplyr::pluck, and implemented based on
-    https://stackoverflow.com/a/1987195
-    """
-    if isinstance(obj, list):
-        for i in obj:
-            for x in pluck(i, kv):
-                yield x
-    elif isinstance(obj, dict):
-        if kv in obj:
-            yield obj[kv]
-        for j in obj.values():
-            for x in pluck(j, kv):
-                yield x
-
-
-def check_url(url, verbose=False):
-    """
-    Try to open -- and then immediately close -- a URL.
-
-    Any exceptions can be handled upstream.
-
-    """
-
-    # Some notes here:
-    #
-    #  - A pure python implementation isn't great because urlopen seems to
-    #    cache or hold sessions open or something. EBI servers reject responses
-    #    because too many clients are connected. This doesn't happen using curl.
-    #
-    #  - Using the requests module doesn't help, because urls can be ftp:// and
-    #    requests doesn't support that.
-    #
-    #  - Similarly, using asyncio and aiohttp works great for https, but not
-    #    ftp (I couldn't get aioftp to work properly).
-    #
-    #  - Not all servers support --head. An example of this is
-    #    https://www-s.nist.gov/srmors/certificates/documents/SRM2374_Sequence_v1.FASTA.
-    #
-    #  - Piping curl to head using the -c arg to use bytes seems to work.
-    #    However, we need to set pipefail (otherwise because head exits 0 the
-    #    whole thing exits 0). And in that case, we expect curl to exit every
-    #    time with exit code 23, which is "failed to write output", because of
-    #    the broken pipe. This is handled below.
-    #
-    if verbose:
-        print(f'Checking {url}')
-
-    # Notes on curl args:
-    #
-    #  --max-time to allow the server some seconds to respond
-    #  --retry to allow multiple tries if transient errors (4xx for FTP, 5xx for HTTP) are found
-    #  --silent to not print anything
-    #  --fail to return non-zero exit codes for 404 (default is exit 0 on hitting 404)
-    #
-    # Need to run through bash explicitly to get the pipefail option, which in
-    # turn means running with shell=True
-    proc = subprocess.run(f'/bin/bash -o pipefail -c "curl --retry 3 --max-time 10 --silent --fail {url} | head -c 10 > /dev/null"', shell=True)
-    return proc
-
-
-def check_urls(config, verbose=False):
-    """
-    Given a config filename or existing object, extract the URLs and check
-    them.
-
-    Parameters
-    ----------
-
-    config : str or dict
-        Config object to inspect
-
-    verbose : bool
-        Print which URL is being checked
-
-    wait : int
-        Number of seconds to wait in between checking URLs, to avoid
-        too-many-connection issues
-    """
-    config = load_config(config, missing_references_ok=True)
-    failures = []
-    urls = list(set(utils.flatten(pluck(config, 'url'))))
-    for url in urls:
-        if url.startswith('file://'):
-            continue
-
-        res = check_url(url, verbose=verbose)
-
-        # we expect exit code 23 because we're triggering SIGPIPE with the
-        # "|head -c" above.
-        if res.returncode and res.returncode != 23:
-            failures.append(f'FAIL with exit code {res.returncode}. Command was: {res.args}')
-    if failures:
-        output = '\n   '.join(failures)
-        raise ValueError(f'Found problematic URLs. See https://ec.haxx.se/usingcurl/usingcurl-returns for explanation of exit codes.\n   {output}')
-
-
-def check_all_urls_found(verbose=True):
-    """
-    Recursively loads all references that can be included and checks them.
-    Reports out if there are any failures.
-    """
-    check_urls({'include_references': [
-        'include/reference_configs',
-        'test/test_configs',
-        'workflows/rnaseq/config',
-        'workflows/chipseq/config',
-        'workflows/references/config',
-    ]}, verbose=verbose)
-
-
-def gff2gtf(gff, gtf):
-    """
-    Converts a gff file to a gtf format using the gffread function from Cufflinks
-    """
-    if _is_gzipped(gff[0]):
-        shell('gzip -d -S .gz.0.tmp {gff} -c | gffread - -T -o- | gzip -c > {gtf}')
-    else:
-        shell('gffread {gff} -T -o- | gzip -c > {gtf}')
diff --git a/lib/helpers.py b/lib/helpers.py
deleted file mode 100644
index 053bca2b1..000000000
--- a/lib/helpers.py
+++ /dev/null
@@ -1,205 +0,0 @@
-import collections
-import re
-from itertools import product
-import pandas as pd
-from snakemake.shell import shell
-from snakemake.io import expand, regex
-from lib import common
-
-
-class ConfigurationError(Exception):
-    pass
-
-
-def detect_layout(sampletable):
-    """
-    Identifies whether a sampletable represents single-end or paired-end reads.
-
-    Raises NotImplementedError if there's a mixture.
-    """
-    is_pe = [common.is_paired_end(sampletable, s) for s in sampletable.iloc[:, 0]]
-    if all(is_pe):
-        return "PE"
-    elif not any(is_pe):
-        return "SE"
-    else:
-        p = sampletable.iloc[is_pe, 0].to_list()
-        s = sampletable.iloc[[not i for i in is_pe], 0].to_list()
-        if len(p) > len(s):
-            report = f"SE samples: {s}"
-        else:
-            report = f"PE samples: {p}"
-        raise ValueError(f"Only a single layout (SE or PE) is supported. {report}")
-
-
-def fill_patterns(patterns, fill, combination=product):
-    """
-    Fills in a dictionary of patterns with the dictionary or DataFrame `fill`.
-
-    >>> patterns = dict(a='{sample}_R{N}.fastq')
-    >>> fill = dict(sample=['one', 'two'], N=[1, 2])
-    >>> sorted(fill_patterns(patterns, fill)['a'])
-    ['one_R1.fastq', 'one_R2.fastq', 'two_R1.fastq', 'two_R2.fastq']
-
-    >>> patterns = dict(a='{sample}_R{N}.fastq')
-    >>> fill = dict(sample=['one', 'two'], N=[1, 2])
-    >>> sorted(fill_patterns(patterns, fill, zip)['a'])
-    ['one_R1.fastq', 'two_R2.fastq']
-
-    >>> patterns = dict(a='{sample}_R{N}.fastq')
-    >>> fill = pd.DataFrame({'sample': ['one', 'two'], 'N': [1, 2]})
-    >>> sorted(fill_patterns(patterns, fill)['a'])
-    ['one_R1.fastq', 'two_R2.fastq']
-
-    """
-    # In recent Snakemake versions (e.g., this happens in 5.4.5) file patterns
-    # with no wildcards in them are removed from expand when `zip` is used as
-    # the combination function.
-    #
-    # For example, in 5.4.5:
-    #
-    #   expand('x', zip, d=[1,2,3]) == []
-    #
-    # But in 4.4.0:
-    #
-    #   expand('x', zip, d=[1,2,3]) == ['x', 'x', 'x']
-
-    def update(d, u, c):
-        for k, v in u.items():
-            if isinstance(v, collections.abc.Mapping):
-                r = update(d.get(k, {}), v, c)
-                d[k] = r
-            else:
-                if isinstance(fill, pd.DataFrame):
-                    d[k] = list(set(expand(u[k], zip, **fill.to_dict("list"))))
-                else:
-                    d[k] = list(set(expand(u[k], c, **fill)))
-            if not d[k]:
-                d[k] = [u[k]]
-        return d
-
-    d = {}
-    return update(d, patterns, combination)
-
-
-def extract_wildcards(pattern, target):
-    """
-    Return a dictionary of wildcards and values identified from `target`.
-
-    Returns None if the regex match failed.
-
-    Parameters
-    ----------
-    pattern : str
-        Snakemake-style filename pattern, e.g. ``{output}/{sample}.bam``.
-
-    target : str
-        Filename from which to extract wildcards, e.g., ``data/a.bam``.
-
-    Examples
-    --------
-    >>> pattern = '{output}/{sample}.bam'
-    >>> target = 'data/a.bam'
-    >>> expected = {'output': 'data', 'sample': 'a'}
-    >>> assert extract_wildcards(pattern, target) == expected
-    >>> assert extract_wildcards(pattern, 'asdf') is None
-    """
-    m = re.compile(regex(pattern)).match(target)
-    if m:
-        return m.groupdict()
-
-
-def rscript(string, scriptname, log=None):
-    """
-    Saves the string as `scriptname` and then runs it
-
-    Parameters
-    ----------
-    string : str
-        Filled-in template to be written as R script
-
-    scriptname : str
-        File to save script to
-
-    log : str
-        File to redirect stdout and stderr to. If None, no redirection occurs.
-    """
-    with open(scriptname, "w") as fout:
-        fout.write(string)
-    if log:
-        _log = "> {0} 2>&1".format(log)
-    else:
-        _log = ""
-    shell("Rscript {scriptname} {_log}")
-
-
-def check_unique_fn(df):
-    """
-    Raises an error if the fastq filenames are not unique
-    """
-    fns = df["orig_filename"]
-    if "orig_filename_R2" in df.columns:
-        fns = pd.concat([fns, df["orig_filename_R2"]])
-    if len(fns.unique()) < len(fns):
-        raise ValueError("Fastq filenames non unique, check the sampletable\n")
-
-
-def check_unique_samplename(df):
-    """
-    Raises an error if the samplenames are not unique
-    """
-    ns = df.index
-    if len(ns.unique()) < len(ns):
-        raise ConfigurationError("Samplenames non unique, check the sampletable\n")
-
-
-def preflight(config):
-    """
-    Performs verifications on config and sampletable files
-
-    Parameters
-    ----------
-    config: yaml config object
-    """
-    sampletable = pd.read_table(config["sampletable"], index_col=0, comment="#")
-    check_unique_samplename(sampletable)
-    if "orig_filename" in sampletable.columns:
-        check_unique_fn(sampletable)
-
-
-def rnaseq_preflight(c):
-    if "kallisto" not in c.config:
-        raise ConfigurationError(
-            """
-            Starting in v1.8, an additional 'kallisto' argument is expected
-            in the config file. Note that in the future this may be
-            automatically included, but for now please add the following to the
-            config, where 'tagname' is the tag for the reference of interest:
-
-            kallisto:
-              tag: "tagname"
-            """
-        )
-
-
-def chipseq_preflight(c):
-    pass
-
-
-def strand_arg_lookup(config, lookup):
-    """
-    Given a config object and lookup dictionary, confirm that the config has
-    correctly specified strandedness and then return the value for that key.
-    """
-    if not config.stranded:
-        raise ConfigurationError(
-            "Starting in v1.8, 'stranded' is required in the config file. "
-            "Values can be 'unstranded', 'fr-firststrand' (R1 aligns antisense to original transcript), "
-            "or 'fr-secondstrand' (R1 aligns sense to original transcript). If you are not sure, "
-            "run the workflow with only the 'strand_check' rule, like "
-            "'snakemake -j 5 strand_check'."
-        )
-    if config.stranded not in lookup:
-        keys = list(lookup.keys())
-        raise KeyError(f"'{config.stranded}' not one of {keys}")
-    return lookup[config.stranded]
diff --git a/lib/imports.py b/lib/imports.py
deleted file mode 100644
index f790ef6fb..000000000
--- a/lib/imports.py
+++ /dev/null
@@ -1,22 +0,0 @@
-def resolve_name(name):
-    """
-    Imports a specific object from a dotted path and returns just that object.
-
-    From nose.utils.resolve_name (with the logging parts taken out) which in
-    turn is from unittest.TestLoader.loadTestByName
-    """
-    parts = name.split('.')
-    parts_copy = parts[:]
-    while parts_copy:
-        try:
-            module = __import__('.'.join(parts_copy))
-            break
-        except ImportError:
-            del parts_copy[-1]
-            if not parts_copy:
-                raise
-    parts = parts[1:]
-    obj = module
-    for part in parts:
-        obj = getattr(obj, part)
-    return obj
diff --git a/lib/lcdbwf/R/dds.R b/lib/lcdbwf/R/dds.R
index ae28ef6f6..4e6b46bf3 100644
--- a/lib/lcdbwf/R/dds.R
+++ b/lib/lcdbwf/R/dds.R
@@ -4,7 +4,6 @@
 
 
 salmon.path.func <- function (x) file.path('..', 'data', 'rnaseq_samples', x, paste0(x, '.salmon'), 'quant.sf')
-kallisto.path.func <- function (x) file.path('..', 'data', 'rnaseq_samples', x, paste0(x, '.salmon'), 'quant.sf')
 
 
 
@@ -34,11 +33,10 @@ kallisto.path.func <- function (x) file.path('..', 'data', 'rnaseq_samples', x,
 #' @param strip_dotted_version If TRUE, then remove Ensembl-style dotted
 #'   version numbers from gene IDs (ENSG000001.1 -> ENSG000001)
 #'
-#' @param salmon_pattern, kallisto_pattern Specify the patterns to locations of
-#'   Salmon or Kallisto files. Use the special placeholder string
+#' @param salmon_pattern Specify the pattern to locations of
+#'   Salmon files. Use the special placeholder string
 #'   `__SAMPLENAME__` which will be replaced with the sample name. Only
-#'   relevant if one of config$toggle$salmon or config$toggle$kallisto are
-#'   TRUE.
+#'   relevant if config$toggle$salmon is TRUE
 #'
 #' @param ... Additional arguments will be passed on to the DESeq() call (e.g.,
 #'   parallel, fitType, etc)
@@ -48,7 +46,6 @@ make_dds <- function(design_data, config=NULL, collapse_by=NULL,
                      strip_dotted_version=NULL,
                      featureCounts='../data/rnaseq_aggregation/featurecounts.txt',
                      salmon_pattern="../data/rnaseq_samples/__SAMPLENAME__/__SAMPLENAME__.salmon/quant.sf",
-                     kallisto_pattern="../data/rnaseq_samples/__SAMPLENAME__/__SAMPLENAME__.kallisto/abundance.h5",
                      ...){
 
   # Note we're using pluck() here for the convenience of setting defaults
@@ -65,33 +62,27 @@ make_dds <- function(design_data, config=NULL, collapse_by=NULL,
   }
   location <- purrr::pluck(design_data, 'filename', .default=featureCounts)
   salmon <- purrr::pluck(design_data, 'salmon')
-  kallisto <- purrr::pluck(design_data, 'kallisto')
   subset_counts <- purrr::pluck(design_data, 'subset_counts')
   sample_func <- purrr::pluck(design_data, 'sample_func', .default=lcdbwf_samplename)
 
   # Allow overriding of config values.
   if (!is.null(config)){
     if (is.null(salmon)) salmon <- config$toggle$salmon
-    if (is.null(kallisto)) kallisto <- config$toggle$kallisto
     if (is.null(collapse_by)) collapse_by <- config$main$collapse_by
     if (is.null(strip_dotted_version)) strip_dotted_version <- config$main$strip_dotted_version
   }
 
-  if (salmon & kallisto){
-    stop("Both salmon and kallisto are set to TRUE, not sure how to handle this.")
-  }
-
-  if (salmon | kallisto){
+  if (salmon) {
     # If these arguments were provided, the corresponding loading functions
     # don't accept them so we need to remove. Issue a warning as well.
     if (!is.null(subset_counts) | !is.null(sample_func)){
-      warning("Salmon or Kallisto was specified, but additional arguments ",
+      warning("Salmon was specified, but additional arguments ",
               "were provided to the loading function.")
       subset_counts <- NULL
       sample_func <- NULL
     }
 
-    # For Salmon and Kallisto, we need a tx2gene dataframe. We can get this
+    # For Salmon, we need a tx2gene dataframe. We can get this
     # from a TxDb, which in turn can be retrieved from AnnotationHub, which in
     # turn can be configured with the config object. Luckily, we have the
     # config object here!
@@ -104,12 +95,6 @@ make_dds <- function(design_data, config=NULL, collapse_by=NULL,
       coldata$salmon.path <- sapply(coldata$samplename, function (x) gsub("__SAMPLENAME__", x, salmon_pattern))
       txi <- tximport::tximport(coldata[, 'salmon.path'], type='salmon', tx2gene=tx2gene, ignoreTxVersion=strip_dotted_version)
       dds <- DESeq2::DESeqDataSetFromTximport(txi, colData=coldata, design=design)
-
-  } else if (kallisto) {
-      coldata$kallisto.path <- sapply(coldata$samplename, function (x) gsub("__SAMPLENAME__", x, kallisto_pattern))
-      txi <- tximport::tximport(coldata[, 'kallisto.path'], type='kallisto', tx2gene=tx2gene, ignoreTxVersion=strip_dotted_version)
-      dds <- DESeq2::DESeqDataSetFromTximport(txi, colData=coldata, design=design)
-
   } else {
     dds <- lcdbwf:::DESeqDataSetFromCombinedFeatureCounts(
       location,
diff --git a/lib/lcdbwf/R/plotting.R b/lib/lcdbwf/R/plotting.R
index 9e7bc8e5c..f4aa9c41b 100644
--- a/lib/lcdbwf/R/plotting.R
+++ b/lib/lcdbwf/R/plotting.R
@@ -268,7 +268,7 @@ vargenes_heatmap <- function(rld, cols_for_grouping, n=50){
   mat <- mat - rowMeans(mat)
   df <- as.data.frame(colData(rld)[, cols_for_grouping])
   rownames(df) <- colnames(rld)
-  colnames(df) <- cols.for.grouping
+  colnames(df) <- cols_for_grouping
   pheatmap(mat, annotation_col=df, cluster_cols=TRUE)
 }
 
diff --git a/lib/patterns_targets.py b/lib/patterns_targets.py
deleted file mode 100644
index 542d41161..000000000
--- a/lib/patterns_targets.py
+++ /dev/null
@@ -1,252 +0,0 @@
-"""
-This module handles the reading and filling-in of patterns. It can be used from
-within Snakefiles or in downstream (figure-making) scripts.
-"""
-
-import os
-import collections
-import yaml
-from . import common
-from . import chipseq
-from . import helpers
-
-HERE = os.path.abspath(os.path.dirname(__file__))
-
-# Note: when adding support for new peak callers, add them here.
-PEAK_CALLERS = ['macs2', 'spp', 'sicer', 'epic2']
-
-
-def update_recursive(d, u):
-    """
-    Update dictionary `d` with items in dictionary `u`, recursively
-    """
-    for k, v in u.items():
-        if isinstance(v, collections.abc.Mapping):
-            d[k] = update_recursive(d.get(k, {}), v)
-        else:
-            d[k] = v
-    return d
-
-
-class SeqConfig(object):
-    def __init__(self, config, patterns, workdir=None):
-        """
-        This class takes care of common tasks related to config and patterns
-        files (reading the sampletable, etc) but is intended to be subclassed.
-
-        Parameters
-        ----------
-        config : str or dict
-
-        patterns : str
-            Path to patterns YAML file
-
-        workdir : str
-            Config, patterns, and all paths in `config` should be interpreted
-            as relative to `workdir`
-        """
-        self.path = None
-        self.workdir = '.'
-        if workdir is not None:
-            config = os.path.join(workdir, config)
-            patterns = os.path.join(workdir, patterns)
-            self.workdir = workdir
-
-        if isinstance(config, str):
-            self.path = config
-
-        self.config = common.load_config(
-            common.resolve_config(config, workdir))
-
-        stranded = self.config.get('stranded', None)
-        self.stranded = None
-        if stranded:
-            if stranded in ('unstranded'):
-                self.stranded = 'unstranded'
-            elif stranded in ('fr-firststrand', 'ISR', 'SR', 'reverse'):
-                self.stranded = 'fr-firststrand'
-            elif stranded in ('fr-secondstrand', 'ISF', 'SF', 'forward'):
-                self.stranded = 'fr-secondstrand'
-
-        # Read the config file and extract all sort of useful bits. This mostly
-        # uses the `common` module to handle the details.
-        self.config['references_dir'] = common.get_references_dir(self.config)
-        self.samples, self.sampletable = common.get_sampletable(self.config)
-        self.refdict, self.conversion_kwargs = common.references_dict(self.config)
-        self.organism = self.config['organism']
-        self.patterns = yaml.load(open(patterns), Loader=yaml.FullLoader)
-        self.is_paired = helpers.detect_layout(self.sampletable) == 'PE'
-        if self.is_paired:
-            self.n = [1, 2]
-        else:
-            self.n = [1]
-
-        helpers.preflight(self.config)
-
-class RNASeqConfig(SeqConfig):
-    def __init__(self, config, patterns, workdir=None):
-        """
-        Config object specific to RNA-seq workflows.
-
-        Fills in patterns to create targets by handling the by-sample and
-        by-aggregate sections separately.
-
-        Parameters
-        ----------
-
-        config : dict
-
-        patterns : str
-            Path to patterns YAML file
-
-        workdir : str
-            Config, patterns, and all paths in `config` should be interpreted
-            as relative to `workdir`
-        """
-        SeqConfig.__init__(self, config, patterns, workdir)
-
-        self.fill = dict(sample=self.samples, n=self.n)
-        self.patterns_by_aggregation = self.patterns.pop('patterns_by_aggregate', None)
-        self.targets = helpers.fill_patterns(self.patterns, self.fill, zip)
-
-        # Then the aggregation
-        if self.patterns_by_aggregation is not None and 'merged_bigwigs' in self.config:
-            self.fill_by_aggregation = dict(
-                merged_bigwig_label=self.config['merged_bigwigs'].keys(),
-            )
-            self.targets_by_aggregation = helpers.fill_patterns(
-                self.patterns_by_aggregation,
-                self.fill_by_aggregation
-            )
-            self.targets.update(self.targets_by_aggregation)
-            self.patterns.update(self.patterns_by_aggregation)
-
-        helpers.rnaseq_preflight(self)
-
-
-class ChIPSeqConfig(SeqConfig):
-    def __init__(self, config, patterns, workdir=None):
-        """
-        Config object specific to ChIP-seq workflows.
-
-        Fills in patterns to create targets by handling the by-sample, by-peak,
-        and by-aggregate sections separately.
-
-        Parameters
-        ----------
-
-        config : dict
-
-        patterns : str
-            Path to patterns YAML file
-
-        workdir : str
-            Config, patterns, and all paths in `config` should be interpreted
-            as relative to `workdir`
-        """
-        SeqConfig.__init__(self, config, patterns, workdir)
-
-        self.targets = {}
-
-        # For ChIP-seq, the structure of the patterns is quite different for
-        # samples than it is for peaks. For example, the peaks do not have any
-        # sample info in the filenames but aggregate possibly many different samples
-        #
-        # So construct them separately, and then later update self.patterns and
-        # self.targets.
-        #
-        # The averaged bigwigs are also aggregated, but in a different way.
-        # They will be handled separately.
-        #
-        # First, the samples...
-        self.patterns_by_sample = self.patterns['patterns_by_sample']
-        self.fill_by_sample = dict(
-            n=self.n,
-            sample=self.samples.values,
-            label=self.sampletable.label.values,
-            ip_label=self.sampletable.label[
-                self.sampletable.antibody != 'input'].values
-        )
-        self.targets_by_sample = helpers.fill_patterns(
-            self.patterns_by_sample, self.fill_by_sample)
-
-        self.targets.update(self.targets_by_sample)
-        self.patterns.update(self.patterns_by_sample)
-
-        # Then the aggregation...
-        self.patterns_by_aggregation = self.patterns.pop('patterns_by_aggregate', None)
-        if self.patterns_by_aggregation is not None and 'merged_bigwigs' in self.config:
-            self.fill_by_aggregation = dict(
-                merged_bigwig_label=self.config['merged_bigwigs'].keys(),
-            )
-            self.targets_by_aggregation = helpers.fill_patterns(
-                self.patterns_by_aggregation,
-                self.fill_by_aggregation
-            )
-            self.targets.update(self.targets_by_aggregation)
-            self.patterns.update(self.patterns_by_aggregation)
-
-        # Then the peaks...
-        #
-
-        self.patterns_by_peaks = self.patterns['patterns_by_peaks']
-        self.targets_for_peaks = {}
-
-        # We need to fill in just those peak-calling runs that are specified
-        # for each peak-caller. For reference, here's an example
-        # `patterns_by_peaks` from the YAML:
-        #
-        #        peaks:
-        #           macs2: '{peak_calling}/macs2/{macs2_run}/peaks.bed'
-        #           spp: '{peak_calling}/spp/{spp_run}/peaks.bed'
-        #        bigbed:
-        #            macs2: '{peak_calling}/macs2/{macs2_run}/peaks.bigbed'
-        #            spp: '{peak_calling}/spp/{spp_run}/peaks.bigbed'
-
-
-        # Also note that the snakefile's all rule uses
-        # utils.flatten(c.targets['peaks']), but in the case where no
-        # peak-calling runs are specified these should be initialized,
-        # otherwise we'll get a KeyError.
-        self.targets['peaks'] = []
-        self.targets['bigbed'] = []
-
-        for pc in PEAK_CALLERS:
-            # Extract out just the subset of `patterns_by_peaks` for this
-            # peak-caller e.g., from the example above, if pc='macs2' this
-            # would only be:
-            #
-            #   peaks:
-            #      macs2: '{peak_calling}/macs2/{macs2_run}/peaks.bed'
-            #   bigbed:
-            #       macs2: '{peak_calling}/macs2/{macs2_run}/peaks.bigbed'
-            #
-            _peak_patterns = {
-                k: {pc: v[pc]} for k, v in self.patterns_by_peaks.items()
-            }
-
-
-            # Fix for issue #166, which was caused by commit 8a211122:
-            #
-            # If no runs for the peak-caller are configured, this will be
-            # empty and we should continue on.
-            peaks_to_fill = list(chipseq.peak_calling_dict(self.config, algorithm=pc).keys())
-
-            if not peaks_to_fill:
-                continue
-
-            _fill = {pc + '_run': peaks_to_fill}
-
-            # The trick here is the recursive updating of targets_for_peaks.
-            # We're adding the filled-in runs of each peak caller to the
-            # targets as they're built.
-            update_recursive(
-                self.targets_for_peaks,
-                helpers.fill_patterns(_peak_patterns, _fill)
-            )
-
-
-        self.targets.update(self.targets_for_peaks)
-        self.patterns.update(self.patterns_by_peaks)
-
-        helpers.chipseq_preflight(self)
diff --git a/lib/postprocess/__init__.py b/lib/postprocess/__init__.py
index b6e690fd5..3d7fbbfed 100644
--- a/lib/postprocess/__init__.py
+++ b/lib/postprocess/__init__.py
@@ -1 +1,281 @@
+import gzip
+import logging
+import os
+import re
+import sys
+import tempfile
+import shutil
+import zipfile
+
+import gffutils
+import pandas as pd
+from snakemake.shell import shell
+
+here = os.path.dirname(os.path.abspath(__file__))
+sys.path.insert(0, os.path.join(here, "../../lib"))
+from .. import utils as u
+
 from . import *
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(format="%(asctime)s %(levelname)s %(message)s", level=logging.INFO)
+
+
+def default(origfn, newfn):
+    shell("mv {origfn} {newfn}")
+
+
+def ensure_single_unzipped(tmpfiles, outfile):
+    """
+    Sometimes it makes things easier in downstream code to assume there's
+    a single uncompressed file to work with.
+    """
+    all_gzipped = all([u.is_gzipped(i) for i in tmpfiles])
+    none_gzipped = all([not u.is_gzipped(i) for i in tmpfiles])
+
+    if all_gzipped:
+        shell("zcat {tmpfiles} > {outfile}")
+        return outfile
+
+    elif none_gzipped:
+        shell("cat {tmpfiles} > {outfile}")
+        return outfile
+
+    else:
+        raise ValueError("Mixture of compressed and uncompressed files")
+
+
+def _patterns(include_patterns, exclude_patterns, verbose=False):
+    """
+    Return a function that will include/exclude strings based on the patterns
+    provided.
+    """
+
+    if include_patterns and exclude_patterns:
+        raise ValueError("include_patterns and exclude_patterns are mutually exclusive")
+    patterns = []
+    if include_patterns:
+        for p in include_patterns:
+            patterns.append(re.compile(p))
+
+        def keep(s):     
+            for p in patterns:
+                if p.search(s):
+                    if verbose:
+                        logger.info(f"Keeping {s} because it matches {p}")
+                    return True
+            return False
+
+    elif exclude_patterns:
+        for p in exclude_patterns:
+            patterns.append(re.compile(p))
+
+        def keep(s):
+            for p in patterns:
+                if p.search(s):
+                    if verbose:
+                        logger.info(f"Excluding {s} because it matches {p}")
+                    return False
+            return True
+
+    else:
+        raise ValueError(
+            "Expecting exactly one of include_patterns or exclude_patterns"
+        )
+
+    return keep
+
+
+def filter_fasta_chroms(
+    tmpfiles, outfile, include_patterns=None, exclude_patterns=None
+):
+    # samtools won't work with gzip (only bgzip) files, so the lowest common
+    # denominator is to use uncompressed.
+    working_file = ensure_single_unzipped(tmpfiles, outfile + ".tmp")
+    if include_patterns and exclude_patterns:
+        raise ValueError("include_patterns and exclude_patterns are mutually exclusive")
+
+    logger.info(f"Finding chrom names and putting them in {working_file}.record_names")
+    shell(
+        'grep ">" {working_file} | cut -f1 -d " " | sed "s/>//g" > {working_file}.record_names'
+    )
+
+    keep = _patterns(include_patterns, exclude_patterns)
+    with open(outfile + ".keep", "w") as fout, open(
+        working_file + ".record_names", "r"
+    ) as fin:
+        for line in fin:
+            line = line.replace(">", "").strip()
+            chrom = line.split()[0]
+            if keep(chrom):
+                fout.write(chrom + "\n")
+    shell("samtools faidx -r {outfile}.keep {working_file} | bgzip -c > {outfile}")
+    # shell("rm {outfile}.tmp {outfile}.tmp.fai {outfile}.keep")
+    shell("rm {tmpfiles}")
+
+
+def filter_gtf_chroms(tmpfiles, outfile, include_patterns=None, exclude_patterns=None):
+    working_file = ensure_single_unzipped(tmpfiles, outfile + ".tmp")
+    keep = _patterns(include_patterns, exclude_patterns, verbose=False)
+    with gzip.open(outfile, "wt") as fout:
+        for feature in gffutils.DataIterator(working_file):
+            if keep(feature.chrom):
+                fout.write(str(feature) + "\n")
+    shell("rm {tmpfiles}")
+
+
+def extract_from_zip(tmpfiles, outfile, path_in_zip):
+    """
+    Parameters
+    ----------
+
+    tmpfiles : list
+        One-item list containing zip file
+
+    outfile : str
+        gzipped output file to create
+
+    path_in_zip : str
+        Path within zipfile to extract. You can identify the path using unzip
+        -l x.zip from bash.
+    """
+    assert len(tmpfiles) == 1, f"expected single zip file, got {tmpfiles}"
+
+    extraction_dir = tempfile.mkdtemp()
+
+    with zipfile.ZipFile(tmpfiles[0], "r") as z:
+        z.extract(path_in_zip, path=extraction_dir)
+
+    full_path_to_extracted = os.path.join(extraction_dir, path_in_zip)
+
+    with open(full_path_to_extracted, "rb") as fin:
+        with gzip.open(outfile, "wb") as fout:
+            shutil.copyfileobj(fin, fout)
+
+    shutil.rmtree(extraction_dir)
+
+
+def match_gtf_9th(tmpfiles, outfile, strmatch, optstrand="None"):
+    """
+    Matches string to the 9th field of GTF and an optional strand that defaults to None;
+    if the pattern is found and the provided strand match then the line is excluded
+
+    Parameters
+    ----------
+    tmpfiles : str
+        GTF files
+
+    outfile : str
+        gzipped output GTF file
+
+    strmatch : list
+        List of strings to match in the 9th field of the GTF. Must be list
+
+    optstrand : str
+        String to match to the strand. Default is None
+    """
+    regex_strmatch = re.compile(r"|".join(strmatch))
+
+    with gzip.open(outfile, "wt") as fout:
+        for tmpfn in tmpfiles:
+            with openfile(tmpfn, "rt") as tmp:
+                for line in tmp:
+                    if line.startswith("#"):
+                        fout.write(line)
+                    else:
+                        toks = line.split("\t")
+                        if not (
+                            regex_strmatch.search(toks[8]) != None
+                            and toks[6] == optstrand
+                        ):
+                            fout.write(line)
+
+
+
+def convert_gtf_chroms(tmpfiles, outfile, conv_table):
+    """
+    Convert chrom names in GTF file according to conversion table.
+
+    Parameters
+    ----------
+    tmpfiles : str
+        GTF files to look through
+
+    outfile : str
+        gzipped output GTF file
+
+    conv_table : str
+        Lookup table file for the chromosome name conversion. Uses pandas to
+        read lookup table, so it can be file://, a path relative to the
+        snakefile, or an http://, https://, or ftp:// URL.
+    """
+    lookup = (
+        pd.read_csv(conv_table, sep="\t", header=None, names=("a", "b"))
+        .set_index("a")["b"]
+        .to_dict()
+    )
+
+    with gzip.open(outfile, "wt") as fout:
+        for tmpfn in tmpfiles:
+            with openfile(tmpfn, "rt") as tmp:
+                for line in tmp:
+                    if not line.startswith("#"):
+                        toks = line.split("\t")
+                        chrom = toks[0]
+                        if chrom in lookup.keys():
+                            toks[0] = lookup[chrom]
+                            line = "\t".join(toks)
+                        else:
+                            raise ValueError(
+                                'Chromosome "{chrom}" not found in conversion table '
+                                '"{conv_table}"'.format(
+                                    chrom=chrom, conv_table=conv_table
+                                )
+                            )
+                    fout.write(line)
+
+
+def convert_fasta_chroms(tmpfiles, outfile, conv_table):
+    """
+    Convert chrom names in fasta file according to conversion table.
+
+    Parameters
+    ----------
+    tmpfiles : str
+        fasta files to look through
+
+    outfile : str
+        gzipped output fasta file
+
+    conv_table : str
+        Lookup table file for the chromosome name conversion. Uses pandas to
+        read lookup table, so it can be file://, a path relative to the
+        snakefile, or an http://, https://, or ftp:// URL.
+    """
+
+    lookup = (
+        pd.read_csv(conv_table, sep="\t", header=None, names=("a", "b"))
+        .set_index("a")["b"]
+        .to_dict()
+    )
+
+    with gzip.open(outfile, "wt") as fout:
+        for tmpfn in tmpfiles:
+            with openfile(tmpfn, "rt") as tmp:
+                for line in tmp:
+                    if line.startswith(">"):
+                        line = line.rstrip("\n")
+                        toks = line.split(" ")
+                        chrom = toks[0].lstrip(">")
+                        chrom = chrom.rstrip("\n")
+                        if chrom in lookup.keys():
+                            toks[0] = ">" + lookup[chrom]
+                            line = " ".join(toks) + "\n"
+                        else:
+                            raise ValueError(
+                                'Chromosome "{chrom}" not found in conversion table '
+                                '"{conv_table}"'.format(
+                                    chrom=chrom, conv_table=conv_table
+                                )
+                            )
+                    fout.write(line)
diff --git a/lib/postprocess/adapters.py b/lib/postprocess/adapters.py
deleted file mode 100644
index 1d8ab7ab9..000000000
--- a/lib/postprocess/adapters.py
+++ /dev/null
@@ -1,6 +0,0 @@
-from snakemake.shell import shell
-
-def fasta_postprocess(origfn, newfn):
-    shell(
-          "gzip -c {origfn} > {newfn} "
-          "&& rm {origfn}")
diff --git a/lib/postprocess/dicty.py b/lib/postprocess/dicty.py
deleted file mode 100644
index 237cbbdda..000000000
--- a/lib/postprocess/dicty.py
+++ /dev/null
@@ -1,18 +0,0 @@
-from Bio import SeqIO
-import gzip
-from snakemake.shell import shell
-
-def rrna_postprocess(tmpfiles, outfile):
-    def gen():
-        for tmp in tmpfiles:
-            handle = gzip.open(tmp, 'rt')
-            parser = SeqIO.parse(handle, 'fasta')
-            for rec in parser:
-                if 'Dictyostelium discoideum' not in rec.description:
-                    continue
-                rec.seq = rec.seq.back_transcribe()
-                rec.description = rec.name
-                yield rec
-
-    with gzip.open(outfile, 'wt') as fout:
-        SeqIO.write(gen(), fout, 'fasta')
diff --git a/lib/postprocess/hg19.py b/lib/postprocess/hg19.py
deleted file mode 100644
index 8d0424323..000000000
--- a/lib/postprocess/hg19.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from snakemake.shell import shell
-def plus_lncrna_fasta_postprocess(tmpfiles, outfile):
-    shell('cat {tmpfiles} > {outfile}')
diff --git a/lib/postprocess/hg38.py b/lib/postprocess/hg38.py
deleted file mode 100644
index d21f54ada..000000000
--- a/lib/postprocess/hg38.py
+++ /dev/null
@@ -1,14 +0,0 @@
-import pybedtools
-import gzip
-from snakemake.shell import shell
-import os
-
-
-def strip_ensembl_version(infiles, outfile):
-    def transform(f):
-        f.attrs['gene_id'] = f.attrs['gene_id'].split('.')[0]
-        return f
-    with gzip.open(outfile, 'wt') as fout:
-        for infile in infiles:
-            for feature in pybedtools.BedTool(infile):
-                fout.write(str(transform(feature)))
diff --git a/lib/postprocess/merge.py b/lib/postprocess/merge.py
deleted file mode 100644
index c3d1686e0..000000000
--- a/lib/postprocess/merge.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import os
-from snakemake.shell import shell
-from ..imports import resolve_name
-
-def file_merge(origfns, newfn, *args):
-    tmpfiles = ['{0}.{1}.sub.tmp'.format(newfn, i) for i in range(len(origfns))]
-    try:
-        for origfn, tmpfile, ppfunc in zip(origfns, tmpfiles, args):
-            print(ppfunc)
-            func = resolve_name(ppfunc)
-            func(origfn, tmpfile)
-
-        if os.path.exists(newfn):
-            shell('rm {newfn}')
-
-        if newfn.endswith('.gz'):
-            fn = newfn.replace('.gz', '')
-            for tmpfile in tmpfiles:
-                shell("gunzip -c {tmpfile} >> {fn}")
-            shell("gzip {fn}")
-        else:
-            for tmpfile in tmpfiles:
-                shell("cat {tmpfile} >> {newfn}")
-
-    except Exception as e:
-        raise e
-
-    finally:
-        for i in tmpfiles:
-            if os.path.exists(i):
-                shell('rm {i}')
-
diff --git a/lib/postprocess/utils.py b/lib/postprocess/utils.py
deleted file mode 100644
index abb872880..000000000
--- a/lib/postprocess/utils.py
+++ /dev/null
@@ -1,123 +0,0 @@
-import sys
-import os
-import pandas as pd
-import gzip
-import re
-here = os.path.dirname(os.path.abspath(__file__))
-sys.path.insert(0, os.path.join(here, '../../lib'))
-from common import openfile
-
-def match_gtf_9th(tmpfiles, outfile, strmatch, optstrand = "None"):
-    """
-    Matches string to the 9th field of GTF and an optional strand that defaults to None;
-    if the pattern is found and the provided strand match then the line is excluded
-
-    Parameters
-    ----------
-    tmpfiles : str
-        GTF files
-
-    outfile : str
-        gzipped output GTF file
-
-    strmatch : list
-        List of strings to match in the 9th field of the GTF. Must be list
-
-    optstrand : str
-        String to match to the strand. Default is None
-    """
-    regex_strmatch = re.compile(r'|'.join(strmatch))
-
-    with gzip.open(outfile, 'wt') as fout:
-        for tmpfn in tmpfiles:
-            with openfile(tmpfn, 'rt') as tmp:
-                for line in tmp:
-                    if line.startswith("#"):
-                        fout.write(line)
-                    else:
-                        toks = line.split('\t')
-                        if not (regex_strmatch.search(toks[8]) != None and toks[6] == optstrand):
-                            fout.write(line)
-
-# match_gtf_9th(['/home/esnaultcm/Downloads/Rattus_norvegicus.Rnor_6.0.94.gtf.gz'], "test.gz", ['ENSRNOG00000046319'], '-')
-
-def convert_gtf_chroms(tmpfiles, outfile, conv_table):
-    """
-    Convert chrom names in GTF file according to conversion table.
-
-    Parameters
-    ----------
-    tmpfiles : str
-        GTF files to look through
-
-    outfile : str
-        gzipped output GTF file
-
-    conv_table : str
-        Lookup table file for the chromosome name conversion. Uses pandas to
-        read lookup table, so it can be file://, a path relative to the
-        snakefile, or an http://, https://, or ftp:// URL.
-    """
-    lookup = pd.read_csv(
-        conv_table, sep='\t', header=None, names=('a', 'b')
-    ).set_index('a')['b'].to_dict()
-
-    with gzip.open(outfile, 'wt') as fout:
-        for tmpfn in tmpfiles:
-            with openfile(tmpfn, 'rt') as tmp:
-                for line in tmp:
-                    if not line.startswith("#"):
-                        toks = line.split('\t')
-                        chrom = toks[0]
-                        if chrom in lookup.keys():
-                            toks[0]= lookup[chrom]
-                            line = '\t'.join(toks)
-                        else:
-                            raise ValueError(
-                                'Chromosome "{chrom}" not found in conversion table '
-                                '"{conv_table}"'
-                                .format(chrom=chrom, conv_table=conv_table)
-                            )
-                    fout.write(line)
-
-def convert_fasta_chroms(tmpfiles, outfile, conv_table):
-    """
-    Convert chrom names in fasta file according to conversion table.
-
-    Parameters
-    ----------
-    tmpfiles : str
-        fasta files to look through
-
-    outfile : str
-        gzipped output fasta file
-
-    conv_table : str
-        Lookup table file for the chromosome name conversion. Uses pandas to
-        read lookup table, so it can be file://, a path relative to the
-        snakefile, or an http://, https://, or ftp:// URL.
-    """
-
-    lookup = pd.read_csv(
-        conv_table, sep='\t', header=None, names=('a', 'b')
-    ).set_index('a')['b'].to_dict()
-
-    with gzip.open(outfile, 'wt') as fout:
-        for tmpfn in tmpfiles:
-            with openfile(tmpfn, 'rt') as tmp:
-                for line in tmp:
-                    if line.startswith(">"):
-                        line = line.rstrip("\n")
-                        toks = line.split(' ')
-                        chrom = toks[0].lstrip(">")
-                        chrom = chrom.rstrip("\n")
-                        if chrom in lookup.keys():
-                            toks[0]= ">" + lookup[chrom]
-                            line = ' '.join(toks) + "\n"
-                        else:
-                            raise ValueError(
-                                'Chromosome "{chrom}" not found in conversion table '
-                                '"{conv_table}"'
-                                .format(chrom=chrom, conv_table=conv_table)
-                            )
-                    fout.write(line)
diff --git a/lib/test_suite.py b/lib/test_suite.py
index 21b9c0527..eb018c3ff 100644
--- a/lib/test_suite.py
+++ b/lib/test_suite.py
@@ -1,88 +1 @@
-import os
-import pprint
-from textwrap import dedent
-from . import common
-
-
-def test_config_loading(tmpdir):
-    f0 = tmpdir.mkdir('subdir').join('file0.yaml')
-    dir_to_include = tmpdir.join('subdir')
-    f0.write(dedent('''
-    references:
-      species_to_keep:
-        tag_from_directory:
-          fasta:
-            url: "https://from_directory"
-
-        # Will get overwritten by a specific file
-        tag_from_file:
-          fasta:
-            url: "https://from_directory"
-
-        # Will get overwritten by specific file, and then that will get
-        # overwritten by the config
-        tag_from_config:
-          fasta:
-            url: "https://from_directory"
-    '''))
-    f1 = tmpdir.join('subdir', 'file1.yaml')
-    f1.write(dedent('''
-    references:
-      species2:
-        tag_only_in_directory:
-          fasta:
-            url: ""
-            indexes:
-              - bowtie2
-    '''))
-
-    f2 = tmpdir.join('file1.yaml')
-    f2.write(dedent('''
-    references:
-      species_to_keep:
-        tag_from_file:
-          fasta:
-            url: "https://from_file"
-        tag_from_config:
-          fasta:
-            url: "https://from_file"
-
-    '''))
-
-    f3 = tmpdir.join('file3.yaml')
-    f3.write(dedent('''
-    references_dir: "/data"
-    references:
-      species_to_keep:
-        tag_from_config:
-          fasta:
-            url: "https://from_config"
-
-    include_references:
-      - {dir_to_include}
-      - {f2}
-    '''.format(dir_to_include=dir_to_include, f2=f2)))
-
-    config = common.load_config(str(f3))
-
-    assert config == {
-        'references_dir': '/data',
-        'include_references': [
-            '{0}/subdir'.format(str(tmpdir)),
-            '{0}/file1.yaml'.format(str(tmpdir)),
-        ],
-        'references': {
-            'species_to_keep': {
-                'tag_from_config': {
-                    'fasta': {'url': 'https://from_config'}},
-                'tag_from_directory': {
-                    'fasta': {'url': 'https://from_directory'}},
-                'tag_from_file': {
-                    'fasta': {'url': 'https://from_file'}}
-            },
-            'species2': {
-                'tag_only_in_directory': {
-                    'fasta': {'indexes': ['bowtie2'], 'url': ''}}},
-        },
-    }
-
+from . import utils
diff --git a/lib/utils.py b/lib/utils.py
index 3c2808905..541b64803 100644
--- a/lib/utils.py
+++ b/lib/utils.py
@@ -1,24 +1,39 @@
+import binascii
+import csv
+import gzip
 import os
-import contextlib
-import collections
+import subprocess
+import warnings
 from collections.abc import Iterable
+
+import gffutils
+import pandas as pd
+from Bio import SeqIO
 from snakemake.shell import shell
 
 
-@contextlib.contextmanager
-def temp_env(env):
+def resolve_name(name):
     """
-    Context manager to temporarily set os.environ.
+    Imports a specific object from a dotted path and returns just that object.
+
+    From nose.utils.resolve_name (with the logging parts taken out) which in
+    turn is from unittest.TestLoader.loadTestByName
     """
-    env = dict(env)
-    orig = os.environ.copy()
-    _env = {k: str(v) for k, v in env.items()}
-    os.environ.update(_env)
-    try:
-        yield
-    finally:
-        os.environ.clear()
-        os.environ.update(orig)
+    parts = name.split(".")
+    parts_copy = parts[:]
+    while parts_copy:
+        try:
+            module_ = __import__(".".join(parts_copy))
+            break
+        except ImportError:
+            del parts_copy[-1]
+            if not parts_copy:
+                raise
+    parts = parts[1:]
+    obj = module_
+    for part in parts:
+        obj = getattr(obj, part)
+    return obj
 
 
 def flatten(iter, unlist=False):
@@ -52,22 +67,19 @@ def gen():
 
 
 def test_flatten():
-    assert (
-        sorted(
-            flatten(
-                {
-                    "a": {
-                        "b": {
-                            "c": ["a", "b", "c"],
-                        },
+    assert sorted(
+        flatten(
+            {
+                "a": {
+                    "b": {
+                        "c": ["a", "b", "c"],
                     },
-                    "x": ["e", "f", "g"],
-                    "y": {"z": "d"},
-                }
-            )
+                },
+                "x": ["e", "f", "g"],
+                "y": {"z": "d"},
+            }
         )
-        == ["a", "b", "c", "d", "e", "f", "g"]
-    )
+    ) == ["a", "b", "c", "d", "e", "f", "g"]
 
     assert flatten("a", True) == "a"
     assert flatten(["a"], True) == "a"
@@ -75,247 +87,754 @@ def test_flatten():
     assert flatten(["a"]) == ["a"]
 
 
-def updatecopy(orig, update_with, keys=None, override=False):
+def make_relative_symlink(target, linkname):
     """
-    Update a copy of a dictionary, with a bit more control than the built-in
-    dict.update.
+    Helper function to create a relative symlink.
 
-    Parameters
-    -----------
+    Changes to the dirname of the linkname and figures out the relative path to
+    the target before creating the symlink.
+    """
+    linkdir = os.path.dirname(linkname)
+    relative_target = os.path.relpath(target, start=linkdir)
+    linkbase = os.path.basename(linkname)
+    if not os.path.exists(linkdir):
+        shell("mkdir -p {linkdir}")
+    shell(f"cd {linkdir}; ln -sf {relative_target} {linkbase}")
 
-    orig : dict
-        Dict to update
 
-    update_with : dict
-        Dict with new values
+def is_gzipped(fn):
+    """
+    Filename-independent method of checking if a file is gzipped or not. Uses
+    the magic number.
+
+    xref https://stackoverflow.com/a/47080739
+    """
+    with open(fn, "rb") as f:
+        return binascii.hexlify(f.read(2)) == b"1f8b"
 
-    keys : list or None
-        If not None, then only consider these keys in `update_with`. Otherwise
-        consider all.
 
-    override : bool
-        If True, then this is similar to `dict.update`, except only those keys
-        in `keys` will be considered. If False (default), then if a key exists
-        in both `orig` and `update_with`, no updating will occur so `orig` will
-        retain its original value.
+def openfile(tmp, mode):
     """
-    d = orig.copy()
-    if keys is None:
-        keys = update_with.keys()
-    for k in keys:
-        if k in update_with:
-            if k in d and not override:
-                continue
-            d[k] = update_with[k]
-    return d
+    Returns an open file handle; auto-detects gzipped files.
+    """
+    if is_gzipped(tmp):
+        return gzip.open(tmp, mode)
+    else:
+        return open(tmp, mode)
+
+
+def gzipped(tmpfiles, outfile):
+    """
+    Cat-and-gzip a list of uncompressed files into a compressed output file.
+    """
+    with gzip.open(outfile, "wt") as fout:
+        for f in tmpfiles:
+            with open(f) as infile:
+                for line in infile:
+                    fout.write(line)
+
 
+def is_paired_end(sampletable, sample):
+    """
+    Inspects the sampletable to see if the sample is paired-end or not
+
+    Parameters
+    ----------
+    sampletable : pandas.DataFrame
+        If SRA sampletable, contains a "layout" or "LibraryLayout" column (but
+        not both). If the lowercase value is "pe" or "paired", consider the
+        sample paired-end. Otherwise consider single-end.
+
+        Otherwise, if there's an "orig_filename_R2" column consider it
+        paired-end, otherwise single-end.
+
+    sample : str
+        Assumed to be found in the first column of `sampletable`
+    """
+    # We can't fall back to detecting PE based on two fastq files provided for
+    # each sample when it's an SRA sampletable (which only has SRR accessions).
+    #
+    # So detect first detect if SRA sampletable based on presence of "Run"
+    # column and all values of that column starting with "SRR", and then raise
+    # an error if the Layout or LibraryLayout column does not exist.
+
+    sra_layout_columns = ["layout", "librarylayout"]
+    sampletable_columns = [i.lower() for i in sampletable.columns]
+    if "run" in sampletable_columns:
+        if all(sampletable["Run"].str.startswith("SRR")):
+            if len(set(sra_layout_columns).intersection(sampletable_columns)) == 0:
+                raise ValueError(
+                    "Sampletable appears to be SRA, but no 'Layout' column "
+                    "found. This is required to specify single- or paired-end "
+                    "libraries."
+                )
+
+    row = sampletable.set_index(sampletable.columns[0]).loc[sample]
+    if "orig_filename_R2" in row:
+        return True
+    if "layout" in row and "LibraryLayout" in row:
+        raise ValueError("Expecting column 'layout' or 'LibraryLayout', " "not both")
+    try:
+        return row["layout"].lower() in ["pe", "paired"]
+    except KeyError:
+        pass
+    try:
+        return row["LibraryLayout"].lower() in ["pe", "paired"]
+    except KeyError:
+        pass
+    return False
+
+
+def pluck(obj, kv):
+    """
+    For a given dict or list that somewhere contains keys `kv`, return the
+    values of those keys.
 
-def update_recursive(orig, update_with):
+    Named after the dplyr::pluck, and implemented based on
+    https://stackoverflow.com/a/1987195
     """
-    Recursively update one dict with another.
+    if isinstance(obj, list):
+        for i in obj:
+            for x in pluck(i, kv):
+                yield x
+    elif isinstance(obj, dict):
+        if kv in obj:
+            yield obj[kv]
+        for j in obj.values():
+            for x in pluck(j, kv):
+                yield x
 
-    From https://stackoverflow.com/a/3233356
 
-    >>> orig = {'a': {'b': 1, 'c': 2, 'd': [7, 8, 9]}}
-    >>> update_with = {'a': {'b': 5}}
-    >>> expected = {'a': {'b': 5, 'c': 2, 'd': [7, 8, 9]}}
-    >>> result = update_recursive(orig, update_with)
-    >>> assert result == expected, result
+# Config parsing and handling
 
-    >>> update_with = {'a': {'d': 1}}
-    >>> result = update_recursive(orig, update_with)
-    >>> expected = {'a': {'b': 5, 'c': 2, 'd': 1}}
-    >>> result = update_recursive(orig, update_with)
-    >>> assert result == expected, result
+
+class ConfigurationError(Exception):
+    pass
+
+
+def detect_layout(sampletable):
+    """
+    Identifies whether a sampletable represents single-end or paired-end reads.
+
+    Raises NotImplementedError if there's a mixture.
     """
-    for k, v in update_with.items():
-        if isinstance(v, collections.abc.Mapping):
-            orig[k] = update_recursive(orig.get(k, {}), v)
+    is_pe = [is_paired_end(sampletable, s) for s in sampletable.iloc[:, 0]]
+    if all(is_pe):
+        return "PE"
+    elif not any(is_pe):
+        return "SE"
+    else:
+        p = sampletable.iloc[is_pe, 0].to_list()
+        s = sampletable.iloc[[not i for i in is_pe], 0].to_list()
+        if len(p) > len(s):
+            report_ = f"SE samples: {s}"
         else:
-            orig[k] = v
-    return orig
+            report_ = f"PE samples: {p}"
+        raise ValueError(f"Only a single layout (SE or PE) is supported. {report_}")
+
+
+def filter_rrna_fastas(tmpfiles, outfile, pattern):
+    """
+    Extract records from fasta file(s) given a search pattern.
+
+    Given input gzipped FASTAs, create a new gzipped fasta containing only
+    records whose description matches `pattern`.
 
+    Parameters
+    ----------
+    tmpfiles : list
+        gzipped fasta files to look through
+
+    outfile : str
+        gzipped output fastq file
+
+    pattern : str
+        Look for this string in each record's description
 
-def boolean_labels(names, idx, mapping={True: "AND", False: "NOT"}, strip="AND_"):
     """
-    Creates labels for boolean lists.
+    if pattern is None:
+        raise ValueError("Pattern cannot be None")
+
+    def gen():
+        for tmp in tmpfiles:
+            handle = gzip.open(tmp, "rt")
+            parser = SeqIO.parse(handle, "fasta")
+            for rec in parser:
+                if pattern not in rec.description:
+                    continue
+                rec.seq = rec.seq.back_transcribe()
+                # rec.description = rec.name
+                yield rec
+
+    with gzip.open(outfile, "wt") as fout:
+        SeqIO.write(gen(), fout, "fasta")
 
-    For example:
 
-    >>> names = ['exp1', 'exp2', 'exp3']
-    >>> idx = [True, True, False]
-    >>> boolean_labels(names, idx)
-    'exp1_AND_exp2_NOT_exp3'
+def twobit_to_fasta(tmpfiles, outfile):
+    """
+    Converts .2bit files to fasta.
 
     Parameters
     ----------
+    tmpfiles : list
+        2bit files to convert
+
+    outfile : str
+        gzipped output fastq file
+    """
+    # Note that twoBitToFa doesn't support multiple input files, but we want to
+    # support them with this function
+    lookup = {i: i + ".fa" for i in tmpfiles}
+    for i in tmpfiles:
+        fn = lookup[i]
+        shell("twoBitToFa {i} {fn}")
+
+    # Make sure we retain the order of the originally-provided files from the
+    # config when concatenating.
+    fastas = [lookup[i] for i in tmpfiles]
+    shell("cat {fastas} | gzip -c > {outfile}")
+    shell("rm {fastas}")
+
+
+def download_and_postprocess(urls, postprocess, outfile, log):
+    """
+    Many reference files cannot be used as-is and need to be modified.
+
+    This function supports providing one or more URLs, and any postprocess
+    functions to get the reference files usable.
+
+    Parameters
+    ----------
+    urls : str or list
+        URL(s) to download. Can be a list, in which case they will be concatenated.
+
+    postprocess : str | dict | list | None
+        Postprocessing config. See below for details.
+
+    outfile : str
+        Output filename to save final output. Expected to be gzipped.
+
+    log : str
+        Log filename that will accumulate all logs
+
+    Notes
+    -----
+
+    This function:
+
+        - downloads the URL[s] to tempfile[s]
+        - resolves the name of the postprocessing function(s) if provided and
+          imports it
+        - calls the imported postprocessing function using the tempfile[s] and
+          outfile plus any additional specified arguments.
+
+    The postprocessing function must have one of the following signatures,
+    where `infiles` contains the list of temporary files downloaded from the
+    URL or URLs specified, and `outfile` is a gzipped file expected to be
+    created by the function::
+
+        def func(infiles, outfile):
+            pass
+
+    or::
+
+        def func(infiles, outfile, *args):
+            pass
+
+    or::
 
-    names : list
-        List of names to include in output
+        def func(infiles, outfile, *args, **kwargs):
+            pass
 
-    idx : list
-        List of booleans, same size as `names`
 
-    mapping : dict
-        Linking words to use for True and False
+    The function is specified as a string that resolves to an importable
+    function, e.g., `postprocess: lib.postprocess.dm6.fix` will call a function
+    called `fix` in the file `lib/postprocess/dm6.py`.
 
-    strip : str
-        Strip this text off the beginning of labels.
+    If the contents of `postprocess:` is a dict, it must have at least the key
+    `function`, and optionally `args` and/or `kwargs` keys. The `function` key
+    indicates the importable path to the function.  `args` can be a string
+    or list of arguments that will be provided as additional args to a function
+    with the second kind of signature above.  If `kwargs` is provided, it is
+    a dict that is passed to the function with the third kind of signature
+    above. For example::
 
-    given a list of names and a same-size boolean, return strings like
+        postprocess:
+            function: lib.postprocess.dm6.fix
+            args:
+                - True
+                - 3
 
-    a_NOT_b_AND_c
+    or::
 
-    or
+        postprocess:
+            function: lib.postprocess.dm6.fix
+            args:
+                - True
+                - 3
+            kwargs:
+                skip: exon
 
-    a_AND_b_AND_c_NOT_d_AND_e
     """
-    s = []
-    for i, (n, x) in enumerate(zip(names, idx)):
-        s.append(mapping[x] + "_" + n)
-    s = "_".join(s)
-    if s.startswith(strip):
-        s = s.replace(strip, "", 1)
-    return s
 
+    if not isinstance(postprocess, list):
+        postprocess = [postprocess]
+
+    # Will contain tuples of (func, args, kwargs, tmp_outfile)
+    funcs = []
+
+    # It is possible to chain multiple postprocessing functions together by
+    # providing them as a list.
+    #
+    #   postprocess = [
+    #
+    #     "lib.func1",
+    #
+    #     {
+    #       "function": "lib.func2",
+    #       "args": (True, True),
+    #     },
+    #
+    #     {
+    #       "function": "lib.func3",
+    #       "args": (1, 2),
+    #       "kwargs": {"gzipped": True),
+    #     },
+    #
+    #   ]
+    #
+    def _default(origfn, newfn):
+        shell("mv {origfn} {newfn}")
+
+    for i, postprocess_i in enumerate(postprocess):
+
+        if postprocess_i is None:
+            func = _default
+            args = ()
+            kwargs = {}
+            name = None
+
+        # postprocess can have a single string value indicating the function or
+        # it can be a dict with keys "function" and optionally "args". The value of
+        # "args" can be a string or a list.
+        else:
+            if isinstance(postprocess_i, dict):
+                name = postprocess_i.get("function", postprocess)
+                args = postprocess_i.get("args", ())
+                kwargs = postprocess_i.get("kwargs", {})
+                if isinstance(args, str):
+                    args = (args,)
+            elif isinstance(postprocess_i, str):
+                name = postprocess_i
+                args = ()
+                kwargs = {}
 
-def make_relative_symlink(target, linkname):
+            else:
+                raise ValueError(
+                    f"Unhandled type of postprocessing configuration: {postprocess_i}"
+                )
+
+            # In the special case where there is kwarg beginning and ending
+            # with "__", this can be a dotted function name so it will be
+            # resolved here as well and passed along to the postprocessing
+            # function.
+            #
+            # This makes it possible to do things like add ERCC annotations on
+            # the end of other annotations that themselves need to be
+            # post-processed.
+            for kw in kwargs:
+                if kw.startswith("__") and kw.endswith("__"):
+                    kwargs[kw] = resolve_name(kwargs[kw])
+
+            # import the function
+            func = resolve_name(name)
+
+        tmp_outfile = f"{outfile}.{i}.{name}.tmp"
+        funcs.append([func, args, kwargs, tmp_outfile])
+
+    # The last func's outfile should be the final outfile
+    funcs[-1][-1] = outfile
+
+    # as described in the docstring above, functions are to assume a list of
+    # urls
+    if isinstance(urls, str):
+        urls = [urls]
+
+    # Download into reasonably-named temp filenames
+    downloaded_tmpfiles = [f"{outfile}.{i}.tmp" for i in range(len(urls))]
+
+    # For the first postprocess, its input will be all the downloaded files.
+    postprocess_input = downloaded_tmpfiles
+    try:
+        # Copy (if local URI) or download into the specified temp files
+        for url, tmpfile in zip(urls, downloaded_tmpfiles):
+            if url.startswith("file:"):
+                url = url.replace("file://", "")
+                shell("cp {url} {tmpfile} 2> {log}")
+            else:
+                shell("wget {url} -O- > {tmpfile} 2> {log}")
+
+        for func, args, kwargs, tmp_outfile in funcs:
+            func(
+                # all downloaded files (if the first postprocess), or the
+                # output of the last postprocess
+                postprocess_input,
+                # the temp output for just this postprocess
+                tmp_outfile,
+                *args,
+                **kwargs,
+            )
+
+            # We want the next postprocess to use the output of what we just
+            # ran; as documented above the input files are expected to be in
+            # a list.
+            postprocess_input = [tmp_outfile]
+
+    except Exception as e:
+        raise e
+    finally:
+        to_delete = downloaded_tmpfiles
+
+        # all but the last postprocess func output (the last one is the final
+        # output that we want to keep!)
+        to_delete += [i[-1] for i in funcs[:-1]]
+
+        for i in to_delete:
+            if os.path.exists(i):
+                shell("rm {i}")
+    if not is_gzipped(outfile):
+        raise ValueError(f"{outfile} does not appear to be gzipped.")
+
+
+def get_techreps(sampletable, label):
     """
-    Helper function to create a relative symlink.
+    Return all sample IDs for which the "merged_label" column is `label`.
+    """
+    # since we're not requiring a name but we want to use `loc`
+    first_col = sampletable.columns[0]
+    result = list(sampletable.loc[sampletable["merged_label"] == label, first_col])
+
+    # If we're using a ChIP-seq-like sampletable we can provide a more
+    # informative error message.
+
+    is_chipseq = "antibody" in sampletable.columns
+    if is_chipseq:
+        err = """
+        No technical replicates found for label '{}'. Check the ChIP-seq config
+        file to ensure the peak-calling section only specifies values from the
+        sampletable's "label" column.""".format(
+            label
+        )
+    else:
+        err = "No technical replicates found for label '{}'.".format(label)
 
-    Changes to the dirname of the linkname and figures out the relative path to
-    the target before creating the symlink.
+    if len(result) == 0:
+        raise ValueError(err)
+
+    return result
+
+
+def deprecation_handler(config):
     """
-    linkdir = os.path.dirname(linkname)
-    relative_target = os.path.relpath(target, start=linkdir)
-    linkbase = os.path.basename(linkname)
-    if not os.path.exists(linkdir):
-        shell("mkdir -p {linkdir}")
-    shell("cd {linkdir}; ln -sf {relative_target} {linkbase}")
+    Checks the config to see if anything has been deprecated.
+
+    Also makes any fixes that can be done automatically.
+    """
+    if "assembly" in config:
+        config["organism"] = config["assembly"]
+        warnings.warn(
+            "'assembly' should be replaced with 'organism' in config files. "
+            "As a temporary measure, a new 'organism' key has been added with "
+            "the value of 'assembly'",
+            DeprecationWarning,
+        )
 
+    for org, block1 in config.get("references", {}).items():
+        for tag, block2 in block1.items():
+            gtf_conversions = block2.get("gtf", {}).get("conversions", [])
+            for c in gtf_conversions:
+                if isinstance(c, dict) and "annotation_hub" in c:
+                    warnings.warn(
+                        "You may want to try the 'mappings' conversion rather "
+                        "than 'annotation_hub' since it works directly off "
+                        "the GTF file rather than assuming concordance between "
+                        "GTF and AnnoationHub instances",
+                        DeprecationWarning,
+                    )
 
-def autobump(*args, **kwargs):
+    return config
+
+
+def check_url(url, verbose=False):
     """
-    Used to automatically bump resources depending on how many times the job
-    was attempted. This will return a function that is appropriate to use for
-    an entry in Snakemake's `resources:` directive::
+    Try to open -- and then immediately close -- a URL.
 
-        rule example:
-            input: "a.txt"
-            resources:
-                mem_mb=autobump(gb=10),
-                runtime=autobump(hours=2, increment_hours=10)
+    Any exceptions can be handled upstream.
 
-    Values can be specified in multiple ways.
+    """
 
-    A single number will be provided as the resource, and will be used to
-    increment each time. For example, this is the equivalent of 10 GB for the
-    first attempt, and 20 GB for the second:
+    # Some notes here:
+    #
+    #  - A pure python implementation isn't great because urlopen seems to
+    #    cache or hold sessions open or something. EBI servers reject responses
+    #    because too many clients are connected. This doesn't happen using curl.
+    #
+    #  - Using the requests module doesn't help, because urls can be ftp:// and
+    #    requests doesn't support that.
+    #
+    #  - Similarly, using asyncio and aiohttp works great for https, but not
+    #    ftp (I couldn't get aioftp to work properly).
+    #
+    #  - Not all servers support --head. An example of this is
+    #    https://www-s.nist.gov/srmors/certificates/documents/SRM2374_Sequence_v1.FASTA.
+    #
+    #  - Piping curl to head using the -c arg to use bytes seems to work.
+    #    However, we need to set pipefail (otherwise because head exits 0 the
+    #    whole thing exits 0). And in that case, we expect curl to exit every
+    #    time with exit code 23, which is "failed to write output", because of
+    #    the broken pipe. This is handled below.
+    #
+    if verbose:
+        print(f"Checking {url}")
+
+    # Notes on curl args:
+    #
+    #  --max-time to allow the server some seconds to respond
+    #  --retry to allow multiple tries if transient errors (4xx for FTP, 5xx for HTTP) are found
+    #  --silent to not print anything
+    #  --fail to return non-zero exit codes for 404 (default is exit 0 on hitting 404)
+    #
+    # Need to run through bash explicitly to get the pipefail option, which in
+    # turn means running with shell=True
+    proc = subprocess.run(
+        f'/bin/bash -o pipefail -c "curl --retry 3 --max-time 10 --silent --fail {url} | head -c 10 > /dev/null"',
+        shell=True,
+    )
+    return proc
 
-    >>> f = autobump(1024 * 10)
-    >>> f(None, 1)
-    10240
 
-    Adding a second unnamed argument will use it as a value to increment by for
-    each subsequent attempt. This will use 10 GB for the first attempt, and 110
-    GB for the second attempt.
+def check_urls(config, verbose=False):
+    """
+    Given a config filename or existing object, extract the URLs and check
+    them.
 
-    >>> f = autobump(1024 * 10, 1024 * 100)
-    >>> f(None, 1)
-    10240
+    Parameters
+    ----------
 
-    >>> f(None, 2)
-    112640
+    config : str or dict
+        Config object to inspect
 
-    Instead of bare numbers, keyword arguments can be used for more convenient
-    specification of units. The above two examples can also take this form:
+    verbose : bool
+        Print which URL is being checked
+
+    wait : int
+        Number of seconds to wait in between checking URLs, to avoid
+        too-many-connection issues
+    """
+    failures = []
+    urls = list(set(flatten(pluck(config, "url"))))
+    for url in urls:
+        if url.startswith("file://"):
+            continue
+
+        res = check_url(url, verbose=verbose)
+
+        # we expect exit code 23 because we're triggering SIGPIPE with the
+        # "|head -c" above.
+        if res.returncode and res.returncode != 23:
+            failures.append(
+                f"FAIL with exit code {res.returncode}. Command was: {res.args}"
+            )
+    if failures:
+        output = "\n   ".join(failures)
+        raise ValueError(
+            f"Found problematic URLs. See https://ec.haxx.se/usingcurl/usingcurl-returns for explanation of exit codes.\n   {output}"
+        )
 
-    >>> f = autobump(gb=10)
-    >>> f(None, 1)
-    10240
 
-    >>> f = autobump(gb=10, increment_gb=100)
-    >>> f(None, 2)
-    112640
+def gff2gtf(gff, gtf):
+    """
+    Converts a gff file to a gtf format using the gffread function from Cufflinks
+    """
+    if is_gzipped(gff[0]):
+        shell("gzip -d -S .gz.0.tmp {gff} -c | gffread - -T -o- | gzip -c > {gtf}")
+    else:
+        shell("gffread {gff} -T -o- | gzip -c > {gtf}")
 
 
-    Units can be minutes, hours, days, mb, gb, or tb. For example:
+def detect_sra(sampletable):
+    return "Run" in sampletable.columns and any(
+        sampletable["Run"].str.startswith("SRR")
+    )
 
-    >>> f = autobump(hours=2, increment_hours=5)
-    >>> f(None, 2)
-    420
 
+def mappings_tsv(
+    gtf,
+    tsv,
+    exclude_featuretypes=None,
+    include_featuretypes=None,
+    include_attributes=None,
+    exclude_attributes=None,
+):
     """
-    multiplier = {
-        "mb": 1,
-        "minutes": 1,
-        "gb": 1024,
-        "hours": 60,
-        "days": 1440,
-        "tb": 1024 * 1024,
-    }
-    units = list(multiplier.keys())
+    Create a TSV file of attributes found in a GTF file.
+
+    Parameters
+    ----------
 
-    if args and kwargs:
+    gtf, tsv : str
+        Input and output filenames respectively
+
+    exclude_featuretypes, include_featuretypes : list
+        Mutually exclusive; use these to restrict the features considered.
+        E.g., we likely don't need entries for start_codon if those are in the
+        GTF.
+
+    include_attributes, exclude_attributes : list
+        Mutually exclusive. Restrict the attributes reported in the TSV. Should at least have
+        a column for gene ID and transcript ID in order for downstream RNA-seq
+        work.
+    """
+
+    if exclude_featuretypes and include_featuretypes:
         raise ValueError(
-            "Mixture of unnamed and keyword arguments not supported with autobump()"
+            "Both include_featuretypes and exclude_featuretypes were specified."
+        )
+    if exclude_attributes and include_attributes:
+        raise ValueError(
+            "Both include_attributes and exclude_attributes were specified."
+        )
+
+    res = []
+    keys = set(["__featuretype__"])
+    seen = set()
+    for f in gffutils.DataIterator(gtf):
+        ft = f.featuretype
+        if exclude_featuretypes and ft in exclude_featuretypes:
+            continue
+        if include_featuretypes and ft not in include_featuretypes:
+            continue
+
+        d = dict(f.attributes)
+
+        if include_featuretypes:
+            d = {k: v for k, v in d.items() if k in include_featuretypes}
+        if exclude_featuretypes:
+            d = {k: v for k, v in d.items() if k not in exclude_featuretypes}
+
+        keys.update(d.keys())
+        d["__featuretype__"] = ft
+
+        # Exclude duplicates (rather than sorting and uniq-ing the file later)
+        h = hash(str(d))
+        if h in seen:
+            continue
+        seen.update([h])
+
+        res.append(d)
+
+    def unlist_dict(d):
+        for k, v in d.items():
+            if isinstance(v, list):
+                d[k] = "|".join(v)
+        return d
+
+    if include_attributes:
+        sorted_keys = sorted(include_attributes)
+    else:
+        sorted_keys = sorted(keys)
+    with open(tsv, "w") as fout:
+        writer = csv.DictWriter(
+            fout, fieldnames=sorted_keys, restval="", delimiter="\t"
         )
+        writer.writeheader()
+        for row in res:
+            writer.writerow(unlist_dict(row))
 
-    if len(kwargs) > 2:
-        raise ValueError("Only 2 kwargs allowed for autobump()")
 
-    elif len(args) == 1 and not kwargs:
-        baseline_converted = args[0]
-        increment_converted = baseline_converted
+def preflight(config, sampletable):
+    """
+    Performs verifications on config and sampletable files
 
-    elif len(args) == 2 and not kwargs:
-        baseline_converted, increment_converted = args
+    Parameters
+    ----------
+    config: yaml config object
+    """
 
-    elif len(kwargs) <= 2:
-        baseline_kwargs = [k for k in kwargs.keys() if k in units]
-        if len(baseline_kwargs) != 1:
-            raise ValueError(
-                "Multiple baseline kwargs found. Do you need to change one to have an 'increment_' prefix?"
-            )
+    if len(sampletable) != len(sampletable.iloc[:, 0].unique()):
+        raise ConfigurationError("Samplenames non unique, check the sampletable")
 
-        baseline_kwarg = baseline_kwargs[0]
-        baseline_value = kwargs[baseline_kwarg]
-        baseline_unit = baseline_kwarg
+    # For non-SRA sampletables
+    if "orig_filename" in sampletable.columns:
+        fns = sampletable["orig_filename"]
+        if "orig_filename_R2" in sampletable.columns:
+            fns = pd.concat([fns, sampletable["orig_filename_R2"]])
+        if len(fns.unique()) < len(fns):
+            raise ValueError("Fastq filenames non unique, check the sampletable\n")
 
-        increment_kwargs = [k for k in kwargs if k.startswith("increment_")]
-        if increment_kwargs:
-            assert len(increment_kwargs) == 1
-            increment_kwarg = increment_kwargs[0]
-            increment_value = kwargs[increment_kwarg]
-            increment_unit = increment_kwarg.split("_")[-1]
-        else:
-            increment_value = baseline_value
-            increment_unit = baseline_unit
+    if "genome" not in config:
+        raise ConfigurationError("Config is missing 'genome' key")
+    if "url" not in config["genome"]:
+        raise ConfigurationError("Config is missing 'url' key for 'genome'")
+
+
+def rnaseq_preflight(config, sampletable):
+    preflight(config, sampletable)
+    if "annotation" not in config:
+        raise ConfigurationError("Config is missing 'annotation' key")
+    if "url" not in config["annotation"]:
+        raise ConfigurationError("Config is missing 'url' key for 'annotation'")
+    if "stranded" not in config:
+        raise ConfigurationError("Config is missing 'stranded' key")
+    if "organism" not in config:
+        raise ConfigurationError("Config is missing 'organism' key")
 
-        if baseline_unit not in multiplier:
-            raise ValueError(
-                f"Baseline unit {baseline_unit} not in valid units {units}"
-            )
-        if increment_unit not in multiplier:
-            raise ValueError(
-                f"Increment unit {increment_unit} not in valid units {units}"
-            )
 
-        baseline_converted = baseline_value * multiplier[baseline_unit]
-        increment_converted = increment_value * multiplier[increment_unit]
+def chipseq_preflight(config, sampletable):
+    preflight(config, sampletable)
+    if "peaks" not in config:
+        config["peaks"] = []
 
+
+def read_sampletable(config):
+    """
+    Given a config object, return the sampletable with the first column used as the index.
+
+    Autodetect tsv/csv.
+    """
+    sampletable_fn = config.get("sampletable", "config/sampletable.tsv")
+    if sampletable_fn.endswith(".tsv"):
+        sep = "\t"
+    elif sampletable_fn.endswith(".csv"):
+        sep = ","
     else:
-        raise ValueError(f"Unhandled args and kwargs: {args}, {kwargs}")
+        raise ConfigurationError(
+            f"Sampletable should end in .csv or .tsv to indicate format, got {sampletable_fn}"
+        )
+    sampletable = pd.read_table(sampletable_fn, sep=sep, comment="#")
+    sampletable = sampletable.set_index(sampletable.columns[0], drop=False)
+    return sampletable
 
-    def f(wildcards, attempt):
-        return  baseline_converted + (attempt - 1) * increment_converted
 
-    return f
+def prepare_chipseq_sampletable(config):
+    """
+    Given a config, return the validated and prepared ChIP-seq table.
+    """
+    sampletable = read_sampletable(config)
+    if "label" in sampletable.columns:
+        sampletable["merged_label"] = sampletable["label"]
+    sampletable["merged_label"] = sampletable["merged_label"].fillna(sampletable.iloc[:, 0])
+    chipseq_preflight(config, sampletable)
+    return sampletable
 
 
-def gb(size_in_gb):
-    return 1024 * size_in_gb
+def prepare_rnaseq_sampletable(config):
+    """
+    Given a config, return the validated and prepared RNA-seq table.
+    """
+    sampletable = read_sampletable(config)
+    rnaseq_preflight(config, sampletable)
+    return sampletable
 
 
-def hours(time_in_hours):
-    return time_in_hours * 60
+# vim: ft=python
diff --git a/scripts/bed_to_bigbed.py b/scripts/bed_to_bigbed.py
new file mode 100644
index 000000000..13ab54442
--- /dev/null
+++ b/scripts/bed_to_bigbed.py
@@ -0,0 +1,56 @@
+import sys
+import os
+import numpy as np
+import pandas as pd
+from snakemake.shell import shell
+
+sys.path.insert(0, os.path.dirname(__file__) + "/..")
+from lib import chipseq
+
+# Based on the filename, identify the algorithm;
+# Based on the contents, identify the format.
+algorithm = os.path.basename(os.path.dirname(snakemake.input.bed))
+kind = chipseq.detect_peak_format(snakemake.input.bed)
+
+# bedToBigBed doesn't handle zero-size files
+if os.stat(snakemake.input.bed).st_size == 0:
+    shell("touch {output}")
+
+# Note that autoSql filenames are relative to the workdir of the snakefile
+# calling this script.
+elif kind == 'narrowPeak':
+    _as = '../../include/autosql/bigNarrowPeak.as'
+    _type = 'bed6+4'
+    names=[
+        'chrom', 'chromStart', 'chromEnd', 'name', 'score',
+        'strand', 'signalValue', 'pValue', 'qValue', 'peak']
+elif kind == 'broadPeak':
+    _as = '../../include/autosql/bigBroadPeak.as'
+    _type = 'bed6+3'
+    names=[
+        'chrom', 'chromStart', 'chromEnd', 'name', 'score',
+        'strand', 'signalValue', 'pValue', 'qValue']
+elif kind == 'epic2Input':
+    _as = f'../../include/autosql/{kind}Peak.as'
+    _type = 'bed6+4'
+    names=[
+        'chrom', 'chromStart', 'chromEnd', 'pValue', 'score',
+        'strand', 'ChIPCount', 'InputCount', 'FDR', 'log2FoldChange']
+elif kind == 'epic2NoInput':
+    _as = f'../../include/autosql/{kind}Peak.as'
+    _type = 'bed6'
+    names=[
+        'chrom', 'chromStart', 'chromEnd', 'ChIPCount', 'score',
+        'strand']
+else:
+    raise ValueError("Unhandled format for {0}".format(input.bed))
+
+df = pd.read_table(snakemake.input.bed, index_col=False, names=names)
+df['score'] = df['score'] - df['score'].min()
+df['score'] = (df['score'] / df['score'].max()) * 1000
+df['score'] = df['score'].replace([np.inf, -np.inf], np.nan).fillna(0)
+df['score'] = df['score'].astype(int)
+df.to_csv(snakemake.output[0] + '.tmp', sep='\t', index=False, header=False)
+
+shell('bedToBigBed -as={_as} -type={_type} {snakemake.output}.tmp {snakemake.input.chromsizes} {snakemake.output} &> {snakemake.log}')
+shell('rm {snakemake.output}.tmp')
diff --git a/wrappers/wrappers/epic2/wrapper.py b/scripts/epic2.py
similarity index 77%
rename from wrappers/wrappers/epic2/wrapper.py
rename to scripts/epic2.py
index ee66e7669..6ac30bdbd 100644
--- a/wrappers/wrappers/epic2/wrapper.py
+++ b/scripts/epic2.py
@@ -2,8 +2,6 @@
 import glob
 from snakemake import shell
 
-log = snakemake.log_fmt_shell()
-logfile = None
 extra = snakemake.params.get('extra', '')
 
 outdir, basebed = os.path.split(snakemake.output.bed)
@@ -11,21 +9,17 @@
 extra = snakemake.params.block.get('extra', '')
 
 # `-c` has to be skipped if no control is provided
-# if os.path.isfile(snakemake.input.control):
 if len(snakemake.input.control) > 0:
     arguments = '-c {snakemake.input.control} '
 else:
     arguments = ''
-# Add `--guess-bampe` if input dataset is paired-end
-if snakemake.params.is_paired:
-    arguments += '--guess-bampe '
 
 
 shell(
     'epic2 ' + arguments + extra +
     '-t {snakemake.input.ip} '
-    '--chromsizes {snakemake.input.chromsizes} | '
-    'sort -k1,1 -k2,2n > {label}.tmp.bed '
+    '--chromsizes {snakemake.input.chromsizes} 2> {snakemake.log} | '
+    'sort -k1,1 -k2,2n > {label}.tmp.bed'
 )
 
 # Fix the output file so that it doesn't have negative numbers and so it fits
diff --git a/wrappers/wrappers/macs2/callpeak/wrapper.py b/scripts/macs_callpeak.py
similarity index 99%
rename from wrappers/wrappers/macs2/callpeak/wrapper.py
rename to scripts/macs_callpeak.py
index d90c17d64..1f1eb1203 100644
--- a/wrappers/wrappers/macs2/callpeak/wrapper.py
+++ b/scripts/macs_callpeak.py
@@ -18,7 +18,7 @@
     genome_count_flag = ' -g ' + effective_genome_count + ' '
 
 cmds = (
-    'macs2 '
+    'macs3 '
     'callpeak '
     '-c {snakemake.input.control} '
     '-t {snakemake.input.ip} '
diff --git a/wrappers/wrappers/combos/merge_and_dedup/wrapper.py b/scripts/merge_and_dedup.py
similarity index 100%
rename from wrappers/wrappers/combos/merge_and_dedup/wrapper.py
rename to scripts/merge_and_dedup.py
diff --git a/scripts/rrna_libsizes_table.py b/scripts/rrna_libsizes_table.py
new file mode 100644
index 000000000..ea2b68209
--- /dev/null
+++ b/scripts/rrna_libsizes_table.py
@@ -0,0 +1,68 @@
+"""
+Prepares a TSV and JSON file for multiqc to pick up and display as a sortable
+table
+"""
+import os
+import re
+import pandas as pd
+import yaml
+from snakemake.io import regex_from_filepattern
+
+
+def rrna_sample(f):
+    m = re.compile(
+        regex_from_filepattern(
+            snakemake.params.rrna_pattern,
+        )
+    ).match(f)
+    if m:
+        return m.groupdict()["sample"]
+
+
+def sample(f):
+    m = re.compile(
+        regex_from_filepattern(
+            snakemake.params.fastq_pattern,
+        )
+    ).match(f)
+    if m:
+        return m.groupdict()["sample"]
+
+
+def million(f):
+    return float(open(f).read()) / 1e6
+
+
+rrna = sorted(snakemake.input.rrna, key=rrna_sample)
+fastq = sorted(snakemake.input.fastq, key=sample)
+samples = list(map(rrna_sample, rrna))
+rrna_m = list(map(million, rrna))
+fastq_m = list(map(million, fastq))
+
+df = pd.DataFrame(
+    dict(
+        sample=samples,
+        million_reads_rRNA=rrna_m,
+        million_reads_fastq=fastq_m,
+    )
+)
+df = df.set_index("sample")
+df["rRNA_percentage"] = df.million_reads_rRNA / df.million_reads_fastq * 100
+
+df[["million_reads_fastq", "million_reads_rRNA", "rRNA_percentage"]].to_csv(
+    snakemake.output.tsv, sep="\t"
+)
+y = {
+    "id": "rrna_percentages_table",
+    "section_name": "rRNA content",
+    "description": "Amount of reads mapping to rRNA sequence",
+    "plot_type": "table",
+    "pconfig": {
+        "id": "rrna_percentages_table_table",
+        "title": "rRNA content table",
+        "min": 0,
+    },
+    "data": yaml.load(df.transpose().to_json(), Loader=yaml.FullLoader),
+}
+with open(snakemake.output.json, "w") as fout:
+    yaml.dump(y, fout, default_flow_style=False)
diff --git a/test/lcdb-wf-test b/test/lcdb-wf-test
deleted file mode 100755
index df59b24c5..000000000
--- a/test/lcdb-wf-test
+++ /dev/null
@@ -1,583 +0,0 @@
-#!/usr/bin/env python
-
-"""
-This script aims to make it more convenient to run various tests using
-different configs.
-
-Below are configured various tests that are exposed to the commandline as
-subcommands. These in turn support other commandline args to run a specific
-test under that subcommand.
-
-The command-line help is the authoritative source for commands. Since it is
-partly autogenerated, be sure to check it out by running with -h from the
-command line.
-
-Here is a high-level description of what's going on here, which is not in the
-command-line help:
-
-The Runner class, at creation time, sets up a top-level ArgumentParser with
-args used throughout, like which env to use, or which dir to consider as the
-"original" directory (for testing cases where we've deployed somewhere but we
-want to use the test configs from the originally-cloned repo).
-
-The Runner class also has `_cmd_<subcommand name>` methods. At runtime, the
-Runner's ArgumentParser inspects the Runner to see what `_cmd_*` methods it
-has, and adds subcommands for each one it finds.
-
-It's the job of each of those methods to make an ArgumentParser, parse the
-args, and do the right thing.
-
-Since there are a lot of RNA-seq tests, and they use different parameters (like
-different config files, and restricting the run to a sub-dag), these are
-configured in the workflow_test_params.yaml file and the ArgumentParser is
-automatically populated with these arguments.
-
-You can always see the CI tests (currently in .circleci/config.yml at the
-top-level of the repo) for how this tool is used.
-
-"""
-
-import os
-import shlex
-from textwrap import dedent
-import subprocess as sp
-import sys
-from pathlib import Path
-import argparse
-import yaml
-
-HERE = Path(__file__).resolve().parent
-TOPLEVEL = Path(__file__).resolve().parent.parent
-
-WORKFLOW_ARGS = yaml.safe_load(open(TOPLEVEL / "test" / "workflow_test_params.yaml"))
-
-
-def print_header(name):
-    print("-" * 80)
-    print("lcdb-wf-test: ", name)
-    print("-" * 80)
-
-
-class Runner(object):
-    """
-    To add a new command, create a new method with a name starting with
-    "_cmd_", create a new ArgumentParser.
-    """
-
-    default_env = os.getenv("LCDBWF_ENV", str(TOPLEVEL / "env"))
-    default_env_r = os.getenv("LCDBWF_ENV_R", str(TOPLEVEL / "env-r"))
-    global_parser = argparse.ArgumentParser(add_help=False)
-    global_parser.add_argument(
-        "--env", default=default_env,
-        help=f"""Main conda environment to use. Override
-        by setting $LCDBWF_ENV or override that by explicity setting --env. Currently will use {default_env}"""
-    )
-    global_parser.add_argument(
-        "--env-r",
-        default=default_env_r,
-        help=f"""Main R conda environment to use. Override by setting
-        $LCDBWF_ENV_R or override that by explicity setting --env-r. Currently
-        will use {default_env_r}"""
-    )
-    global_parser.add_argument(
-        "--orig",
-        default=str(TOPLEVEL),
-        help=f"""If specified, you can use the special string '__ORIG__' in
-        command line arguments which will be filled in with the value provided
-        here. Mostly used in CI.""",
-    )
-
-    def __init__(self):
-        parser = argparse.ArgumentParser(
-            description="""
-            Test runner for lcdb-wf.
-
-            For any any tests that use Snakemake, you'll need to provide the
-            relevant extra arguments for Snakemake as well (-n, -j,
-            --use-conda, etc). These additional args are passed directly to
-            Snakemake.
-
-                %(prog)s data --kind all
-                %(prog)s unit_tests --pytest
-                %(prog)s unit_tests --r-test
-                %(prog)s rnaseq --run-workflow
-                %(prog)s rnaseq --trackhub
-                %(prog)s rnaseq --downstream
-                %(prog)s chipseq --run-workflow
-                %(prog)s references --run-workflow --configfile=config/config.yaml
-
-            DATA
-            ----
-            %(prog)s data --kind all --verbose
-
-            UNIT TESTS
-            ----------
-            # Run the pytest unit tests on the lib/
-            %(prog)s unit_tests --pytest
-
-            # Run tests on lcdbwf R package
-            %(prog)s unit_tests --r-test
-
-            # Ensure URLs in the configs exist
-            %(prog)s unit_tests --url-check
-
-            # Ensure rnaseq.Rmd has matching sections in the docs
-            %(prog)s unit_tests --ensure-docs
-
-            RNASEQ
-            ------
-            # Run main workflow
-            %(prog)s rnaseq --run-workflow
-
-            # Build RNA-seq trackhub from output of main workflow
-            %(prog)s rnaseq --trackhub
-
-            # Run rnaseq.Rmd
-            %(prog)s rnaseq --downstream
-
-            # Each of these runs a restricted subset of the workflow with
-            # customized configs; they should be run one at a time.
-            %(prog)s rnaseq --run-workflow --sra-pe
-            %(prog)s rnaseq --run-workflow --sra-se
-            %(prog)s rnaseq --run-workflow --strandedness-pe
-            %(prog)s rnaseq --run-workflow --strandedness-se
-            %(prog)s rnaseq --run-workflow --star-2pass
-            %(prog)s rnaseq --run-workflow --star-1pass
-            %(prog)s rnaseq --run-workflow --pe
-
-            """,
-            formatter_class=argparse.RawDescriptionHelpFormatter
-        )
-
-        # Introspection to build subcommands based on which `_cmd_*` methods
-        # are defined
-        choices = [i.replace("_cmd_", "") for i in dir(self) if i.startswith("_cmd_")]
-
-        parser.add_argument("command", help="Subcommand to run", choices=choices)
-
-        # Second arg is the subcommand; dispatch to the appropriate method
-        args = parser.parse_args(sys.argv[1:2])
-
-        if not hasattr(self, "_cmd_" + args.command):
-            print("Unrecognized command")
-            parser.print_help()
-            sys.exit(1)
-
-        # Get it and then immediately call it.
-        subcommand = getattr(self, "_cmd_" + args.command)
-        subcommand()
-
-    def _cmd_data(self):
-        """
-        Subcommand for downloading test data
-        """
-
-        parser = argparse.ArgumentParser(
-            description="Download data",
-            parents=[self.global_parser],
-        )
-
-        parser.add_argument(
-            "--kind",
-            default="all",
-            choices=["all", "rnaseq", "chipseq"],
-            help="Kind of data to download",
-        )
-        parser.add_argument(
-            "--branch", default="master", help="Branch from lcdb-test-data to use"
-        )
-        parser.add_argument(
-            "--verbose",
-            action="store_true",
-            help="Be verbose about what's being downloaded",
-        )
-
-        args = parser.parse_args(sys.argv[2:])
-
-        repo = "lcdb-test-data"
-        URL = f"https://github.com/lcdb/{repo}/blob/{args.branch}/data/{{}}?raw=true"
-
-        # This dict maps files in the `data` directory of test-data repo to
-        # a local path to which it should be downloaded, as expected by the
-        # various test configs and sampletables. Directories are made as
-        # needed. First one is commented as an example.
-        data_files = {
-            "rnaseq": [
-                (
-                    # Path in test data repo on GitHub
-                    "rnaseq_samples/sample1/sample1.small_R1.fastq.gz",
-
-                    # Download it to this path locally
-                    "workflows/rnaseq/data/example_data/rnaseq_sample1.fq.gz",
-                ),
-                (
-                    "rnaseq_samples/sample2/sample2.small_R1.fastq.gz",
-                    "workflows/rnaseq/data/example_data/rnaseq_sample2.fq.gz",
-                ),
-                (
-                    "rnaseq_samples/sample3/sample3.small_R1.fastq.gz",
-                    "workflows/rnaseq/data/example_data/rnaseq_sample3.fq.gz",
-                ),
-                (
-                    "rnaseq_samples/sample4/sample4.small_R1.fastq.gz",
-                    "workflows/rnaseq/data/example_data/rnaseq_sample4.fq.gz",
-                ),
-                (
-                    "rnaseq_samples/sample1/sample1.small_R1.fastq.gz",
-                    "workflows/rnaseq/data/example_data/rnaseq_sample1PE_1.fq.gz",
-                ),
-                (
-                    "rnaseq_samples/sample1/sample1.small_R2.fastq.gz",
-                    "workflows/rnaseq/data/example_data/rnaseq_sample1PE_2.fq.gz",
-                ),
-                (
-                    "rnaseq_samples/sample2/sample2.small_R1.fastq.gz",
-                    "workflows/rnaseq/data/example_data/rnaseq_sample2PE_1.fq.gz",
-                ),
-                (
-                    "rnaseq_samples/sample2/sample2.small_R2.fastq.gz",
-                    "workflows/rnaseq/data/example_data/rnaseq_sample2PE_2.fq.gz",
-                ),
-            ],
-            "chipseq": [
-                (
-                    "chipseq_samples/input_1/input_1.tiny_R1.fastq.gz",
-                    "workflows/chipseq/data/example_data/chipseq_input1.fq.gz",
-                ),
-                (
-                    "chipseq_samples/input_2/input_2.tiny_R1.fastq.gz",
-                    "workflows/chipseq/data/example_data/chipseq_input2.fq.gz",
-                ),
-                (
-                    "chipseq_samples/input_3/input_3.tiny_R1.fastq.gz",
-                    "workflows/chipseq/data/example_data/chipseq_input3.fq.gz",
-                ),
-                (
-                    "chipseq_samples/ip_1/ip_1.tiny_R1.fastq.gz",
-                    "workflows/chipseq/data/example_data/chipseq_ip1.fq.gz",
-                ),
-                (
-                    "chipseq_samples/ip_2/ip_2.tiny_R1.fastq.gz",
-                    "workflows/chipseq/data/example_data/chipseq_ip2.fq.gz",
-                ),
-                (
-                    "chipseq_samples/ip_3/ip_3.tiny_R1.fastq.gz",
-                    "workflows/chipseq/data/example_data/chipseq_ip3.fq.gz",
-                ),
-                (
-                    "chipseq_samples/ip_4/ip_4.tiny_R1.fastq.gz",
-                    "workflows/chipseq/data/example_data/chipseq_ip4.fq.gz",
-                ),
-            ],
-        }
-
-        if args.kind == "all":
-            kinds = list(data_files.keys())
-        else:
-            kinds = [args.kind]
-        for kind in kinds:
-            for fn, dest in data_files[kind]:
-                url = URL.format(fn)
-                if args.verbose:
-                    print(f"downloading {url}")
-                if dest is None:
-                    dest = fn
-                dest = Path(dest)
-                dest.parent.mkdir(parents=True, exist_ok=True)
-                sp.run(
-                    f"wget -q -O- {url} > {dest}", shell=True, check=True, cwd=TOPLEVEL
-                )
-
-    def _cmd_unit_tests(self):
-        """
-        Subcommand for unit tests -- these don't run Snakemake.
-        """
-        parser = argparse.ArgumentParser(
-            description="Run various unit tests and checks",
-            parents=[self.global_parser],
-        )
-        parser.add_argument(
-            "--pytest",
-            action="store_true",
-            help="Run pytest unit tests and module doctests on lib/ directory",
-        )
-        parser.add_argument(
-            "--url-check",
-            action="store_true",
-            help="Ensure that URLs found in config files (e.g., to genome references) are still valid",
-        )
-        parser.add_argument(
-            "--r-test",
-            action="store_true",
-            help="""Run devtools::test on the lcdbwf R package. Activates the
-            conda environment specified by --env-r just before running.""",
-        )
-
-        parser.add_argument(
-            "--ensure-docs",
-            action="store_true",
-            help="Ensure that all named R chunks are documented in the online help docs",
-        )
-
-        args = parser.parse_args(sys.argv[2:])
-
-        if args.pytest:
-            print_header("pytest")
-            sp.run(["pytest", "--doctest-modules", "lib"], check=True, cwd=TOPLEVEL)
-
-        if args.url_check:
-            print_header("url check")
-            sys.path.insert(0, str(TOPLEVEL))
-            from lib.common import check_all_urls_found
-
-            check_all_urls_found()
-
-        if args.r_test:
-            print_header("R test")
-            p = sp.run(
-                'eval "$(conda shell.bash hook)" '
-                f"&& conda activate {args.env_r} "
-                '''&& Rscript -e "devtools::test('lib/lcdbwf', reporter=c('summary', 'fail'), export_all=TRUE)"''',
-                shell=True,
-                check=True,
-                executable="/bin/bash"
-            )
-            if p.returncode:
-                sys.exit(1)
-
-        if args.ensure_docs:
-            sp.run(["./ensure_docs.py"], check=True, cwd=TOPLEVEL / "ci")
-
-    def _cmd_rnaseq(self):
-        """
-        Subcommand for RNA-seq. There are many tests here, with different
-        config files and sampletables etc. So the possibilities are configured
-        over in workflow_test_params.yaml and auto-generated here.
-        """
-
-        parser = argparse.ArgumentParser(
-            description="Run rnaseq workflow and downstream tests",
-            parents=[self.global_parser],
-        )
-        parser.add_argument(
-            "--run-workflow",
-            action="store_true",
-            help="""Run rnaseq workflow using the run_tesh.sh harness, which
-            edits the Snakefile to use test settings before running. Additional
-            args not specified here are passed to Snakemake, or use other flags
-            below to easily specify config sets.""",
-        )
-        parser.add_argument(
-            "--trackhub", action="store_true", help="Build the rnaseq track hub"
-        )
-        parser.add_argument(
-            "--downstream",
-            action="store_true",
-            help="""Run the downstream rnaseq.Rmd, via
-            workflows/rnaseq/run_downstream_test.sh. This runs the preprocessor
-            on the files to allow the use of # [TEST SETTINGS] comments; see
-            that script for details. Activates environment configured in
-            --env-r before running.""",
-        )
-
-        # Here we programmatically build the parser from the
-        # workflow_test_params.yaml file which configures arguments for each
-        # test. Here, the configured tests are added to a mutually-exclusive
-        # group to avoid inadvertently overwriting each others' config file
-        # params (in which case the test would not be the the thing you thought
-        # you were testing...). They all write their params to the
-        # args.additional_args attribute, which is passed to run_test.sh, which
-        # in turn passes them to Snakemake itself.
-        group = parser.add_mutually_exclusive_group()
-        workflow_prefix = "bash run_test.sh"
-        workflow_dir = TOPLEVEL / "workflows/rnaseq"
-        for key, val in WORKFLOW_ARGS["rnaseq"].items():
-            group.add_argument(
-                "--" + key,
-                action="store_const",
-                default="",
-                dest="additional_args",
-                const=val["args"],
-
-                # Be really explicit about what's being run, so you can run it
-                # yourself separately if you want (or for double-checking this
-                # is doing what you want it to do)
-                help=dedent(
-                    f"""
-                    {val['desc']}
-
-                    Runs the following, as configured in workflow_test_params.yaml:
-
-                      cd {workflow_dir} && {workflow_prefix} {val['args']}
-                    """),
-            )
-
-        args, extra = parser.parse_known_args(sys.argv[2:])
-
-        if args.run_workflow:
-            print(args)
-            if args.additional_args:
-                extra.extend(shlex.split(args.additional_args))
-
-            extra = [i.replace("__ORIG__", args.orig) for i in extra]
-            strargs = " ".join(extra)
-            cmd = (
-                'eval "$(conda shell.bash hook)" '
-                f"&& conda activate {args.env} "
-                f"&& (cd {workflow_dir} && {workflow_prefix} {strargs})"
-            )
-            print_header(f"Running the following command:\n{cmd}")
-            sp.run(
-                cmd,
-                check=True,
-                shell=True,
-                executable="/bin/bash"
-            )
-        if args.trackhub:
-            cmd = (
-                'eval "$(conda shell.bash hook)" '
-                f"&& conda activate {args.env} "
-                f"&& (cd {workflow_dir} "
-                "&& python rnaseq_trackhub.py config/config.yaml config/hub_config.yaml)"
-            )
-            print_header(f"Building trackhub with command: {cmd}")
-
-            sp.run(
-                cmd,
-                shell=True,
-                check=True,
-                executable="/bin/bash"
-            )
-            print("See workflows/rnaseq/staging for the built trackhub")
-
-        if args.downstream:
-            print_header("running downstream rnaseq.Rmd")
-            sp.run(
-                'eval "$(conda shell.bash hook)" '
-                f"&& conda activate {args.env_r} "
-                "&& (cd workflows/rnaseq && bash run_downstream_test.sh)",
-                shell=True,
-                check=True,
-                executable="/bin/bash"
-            )
-
-    def _cmd_chipseq(self):
-        """
-        This function handles the "chipseq" subcommand.
-        """
-
-        parser = argparse.ArgumentParser(
-            description="Run chipseq workflow",
-            parents=[self.global_parser],
-        )
-        parser.add_argument(
-            "--run-workflow",
-            action="store_true",
-            help="""Run chipseq workflow using the run_tesh.sh harness, which
-            edits the Snakefile to use test settings before running. Additional
-            args not specified here are passed to Snakemake, or use other flags
-            below to easily specify config sets.""",
-        )
-        parser.add_argument(
-            "--trackhub", action="store_true", help="Build the rnaseq track hub"
-        )
-        args, extra = parser.parse_known_args(sys.argv[2:])
-        workflow_prefix = "bash run_test.sh"
-        workflow_dir = TOPLEVEL / "workflows/chipseq"
-
-        if args.run_workflow:
-            extra = [i.replace("__ORIG__", args.orig) for i in extra]
-            strargs = " ".join(extra)
-            cmd = (
-                'eval "$(conda shell.bash hook)" '
-                f"&& conda activate {args.env} "
-                f"&& (cd {workflow_dir} && {workflow_prefix} {strargs})"
-            )
-            print_header(f"Running the following command:\n{cmd}")
-            sp.run(
-                cmd,
-                shell=True,
-                check=True,
-                executable="/bin/bash"
-            )
-        if args.trackhub:
-            cmd = (
-                'eval "$(conda shell.bash hook)" '
-                f"&& conda activate {args.env} "
-                f"&& (cd {workflow_dir} "
-                "&& python chipseq_trackhub.py config/config.yaml config/hub_config.yaml)"
-            )
-            print_header(f"Building trackhub with command: {cmd}")
-
-            sp.run(
-                cmd,
-                shell=True,
-                check=True,
-                executable="/bin/bash"
-            )
-            print("See workflows/chipseq/staging for the built trackhub")
-
-    def _cmd_references(self):
-        parser = argparse.ArgumentParser(
-            description="Run references workflow",
-            parents=[self.global_parser],
-        )
-        parser.add_argument(
-            "--run-workflow",
-            action="store_true",
-            help="""Run references workflow using the run_tesh.sh harness, which
-            edits the Snakefile to use test settings before running."""
-        )
-        args, extra = parser.parse_known_args(sys.argv[2:])
-
-        workflow_prefix = "bash run_test.sh"
-        workflow_dir = TOPLEVEL / "workflows/references"
-        if args.run_workflow:
-            extra = [i.replace("__ORIG__", args.orig) for i in extra]
-            strargs = " ".join(extra)
-            cmd = (
-                'eval "$(conda shell.bash hook)" '
-                f"&& conda activate {args.env} "
-                f"&& (cd {workflow_dir} && {workflow_prefix} {strargs})"
-            )
-            print_header(f"Running the following command:\n{cmd}")
-            sp.run(
-                cmd,
-                shell=True,
-                check=True,
-                executable="/bin/bash"
-            )
-
-    def _cmd_colocalization(self):
-        parser = argparse.ArgumentParser(
-            description="Run colocalization workflow",
-            parents=[self.global_parser],
-        )
-        parser.add_argument(
-            "--run-workflow",
-            action="store_true",
-            help="""Run colocalization workflow using the run_test.sh harness"""
-        )
-        args, extra = parser.parse_known_args(sys.argv[2:])
-        workflow_prefix = "bash run_test.sh"
-        workflow_dir = TOPLEVEL / "workflows/colocalization"
-        if args.run_workflow:
-            extra = [i.replace("__ORIG__", args.orig) for i in extra]
-            strargs = " ".join(extra)
-            cmd = (
-                'eval "$(conda shell.bash hook)" '
-                f"&& conda activate {args.env} "
-                f"&& (cd {workflow_dir} && {workflow_prefix} {strargs})"
-            )
-            print_header(f"Running the following command:\n{cmd}")
-            sp.run(
-                cmd,
-                shell=True,
-                check=True,
-                executable="/bin/bash"
-            )
-
-if __name__ == "__main__":
-    Runner()
-
-# vim: ft=python
diff --git a/test/test_configs/complex-dataset-chipseq-config.yaml b/test/test_configs/complex-dataset-chipseq-config.yaml
index 61406e94e..ff7247010 100644
--- a/test/test_configs/complex-dataset-chipseq-config.yaml
+++ b/test/test_configs/complex-dataset-chipseq-config.yaml
@@ -44,49 +44,49 @@ merged_bigwigs:
 chipseq:
   peak_calling:
     - label: BRD4-dBET6-1
-      algorithm: macs2
+      algorithm: macs
       ip:
         - BRD4-dBET6-1
       control:
         - input-dBET6-1
     - label: BRD4-dBET6-2
-      algorithm: macs2
+      algorithm: macs
       ip:
         - BRD4-dBET6-2
       control:
         - input-dBET6-2
     - label: BRD4-DMSO-1
-      algorithm: macs2
+      algorithm: macs
       ip:
         - BRD4-DMSO-1
       control:
         - input-DMSO-1
     - label: BRD4-DMSO-2
-      algorithm: macs2
+      algorithm: macs
       ip:
         - BRD4-DMSO-2
       control:
         - input-DMSO-2
     - label: MTHFD1-dBET6-1
-      algorithm: macs2
+      algorithm: macs
       ip:
         - MTHFD1-dBET6-1
       control:
         - input-dBET6-1
     - label: MTHFD1-dBET6-2
-      algorithm: macs2
+      algorithm: macs
       ip:
         - MTHFD1-dBET6-2
       control:
         - input-dBET6-2
     - label: MTHFD1-DMSO-1
-      algorithm: macs2
+      algorithm: macs
       ip:
         - MTHFD1-DMSO-1
       control:
         - input-DMSO-1
     - label: MTHFD1-DMSO-2
-      algorithm: macs2
+      algorithm: macs
       ip:
         - MTHFD1-DMSO-2
       control:
diff --git a/test/test_configs/complex-dataset-rnaseq-config.yaml b/test/test_configs/complex-dataset-rnaseq-config.yaml
index d4a3ed90c..ee7264b88 100644
--- a/test/test_configs/complex-dataset-rnaseq-config.yaml
+++ b/test/test_configs/complex-dataset-rnaseq-config.yaml
@@ -23,9 +23,6 @@ gtf:
 salmon:
   tag: "gencode-v28"
 
-kallisto:
-  tag: "gencode-v28"
-
 fastq_screen:
   - label: rRNA
     organism: human
diff --git a/test/test_configs/hisat2.tsv b/test/test_configs/hisat2.tsv
new file mode 100644
index 000000000..df6746cea
--- /dev/null
+++ b/test/test_configs/hisat2.tsv
@@ -0,0 +1,3 @@
+samplename	group	layout	orig_filename
+sample1-hisat2	control	SE	data/example_data/rnaseq_sample1PE_1.fq.gz
+sample2-hisat2	control	SE	data/example_data/rnaseq_sample2.fq.gz
diff --git a/test/test_configs/override.yaml b/test/test_configs/override.yaml
deleted file mode 100644
index bd05a9257..000000000
--- a/test/test_configs/override.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-# Due to the way Snakemake recursively merges config items, we need to
-# recursively reset this dictonary to override the default one in order to
-# allow arbitrary other sample names.
-#
-# Use it like this
-#
-#   snakemake --configfile ../../test/override.yaml --config sampletable=/path/to/tsv
-#
-merged_bigwigs:
-  control_pos:
-    pos: []
-  treatment_all:
-    pos: []
-    neg: []
diff --git a/test/test_configs/star_1pass.tsv b/test/test_configs/star_1pass.tsv
deleted file mode 100644
index 3c73275ea..000000000
--- a/test/test_configs/star_1pass.tsv
+++ /dev/null
@@ -1,3 +0,0 @@
-samplename	group	layout	orig_filename
-sample1-star-1pass	control	SE	data/example_data/rnaseq_sample1PE_1.fq.gz
-sample2-star-1pass	control	SE	data/example_data/rnaseq_sample2.fq.gz
diff --git a/test/test_configs/star_2pass.tsv b/test/test_configs/star_2pass.tsv
deleted file mode 100644
index 8cf98eb07..000000000
--- a/test/test_configs/star_2pass.tsv
+++ /dev/null
@@ -1,3 +0,0 @@
-samplename	group	layout	orig_filename
-sample1-star-2pass	control	SE	data/example_data/rnaseq_sample1PE_1.fq.gz
-sample2-star-2pass	control	SE	data/example_data/rnaseq_sample2.fq.gz
diff --git a/test/test_configs/star_override_1pass.yaml b/test/test_configs/star_override_1pass.yaml
deleted file mode 100644
index cba6ff764..000000000
--- a/test/test_configs/star_override_1pass.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-aligner:
-  index: star
-  tag: test
-
-merged_bigwigs:
-  control_pos:
-    pos: []
-  treatment_all:
-    pos: []
-    neg: []
diff --git a/test/test_configs/star_override_2pass.yaml b/test/test_configs/star_override_2pass.yaml
deleted file mode 100644
index b091eba3d..000000000
--- a/test/test_configs/star_override_2pass.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-aligner:
-  index: 'star-twopass'
-  tag: test
-
-merged_bigwigs:
-  control_pos:
-    pos: []
-  treatment_all:
-    pos: []
-    neg: []
diff --git a/test/test_configs/test_chipseq_regression.yaml b/test/test_configs/test_chipseq_regression.yaml
index 8ca61ed01..c59ab9bf2 100644
--- a/test/test_configs/test_chipseq_regression.yaml
+++ b/test/test_configs/test_chipseq_regression.yaml
@@ -7,7 +7,7 @@ chipseq:
   peak_calling:
 
     - label: gaf-wingdisc-1
-      algorithm: macs2
+      algorithm: macs
       ip:
         - gaf-wingdisc-1
       control:
diff --git a/test/test_configs/test_file_uri.yaml b/test/test_configs/test_file_uri.yaml
index 571078c68..2315525ae 100644
--- a/test/test_configs/test_file_uri.yaml
+++ b/test/test_configs/test_file_uri.yaml
@@ -24,10 +24,6 @@ gtf:
 salmon:
   tag: "test"
 
-kallisto:
-  tag: "test"
-
-
 fastq_screen:
   - label: test
     organism: filebased
diff --git a/test/test_configs/test_rnaseq_config.yaml b/test/test_configs/test_rnaseq_config.yaml
index 6c674345d..ff043f407 100644
--- a/test/test_configs/test_rnaseq_config.yaml
+++ b/test/test_configs/test_rnaseq_config.yaml
@@ -1,43 +1,16 @@
-sampletable: 'config/sampletable.tsv'
-
-patterns: 'config/rnaseq_patterns.yaml'
-
-# Which key in the `references` dict below to use
-organism: 'dmel'
-
-# If not specified here, use the environment variable REFERENCES_DIR.
-references_dir: 'references_data'
-
-aligner:
-  index: 'hisat2'
-  tag: 'test'
+organism: Drosophila melanogaster
 
-stranded: 'fr-firststrand'
-
-rrna:
-  index: 'bowtie2'
-  tag: 'rRNA'
-
-gtf:
-  tag: "test"
-
-salmon:
-  tag: "test"
-
-kallisto:
-  tag: "test"
+sampletable: 'config/sampletable.tsv'
 
-fastq_screen:
-  - label: rRNA
-    organism: dmel
-    tag: test
-  - label: Fly
-    organism: dmel
-    tag: test
+# See https://rnabio.org/module-09-appendix/0009/12/01/StrandSettings for more info.
+stranded: 'fr-firststrand'     # for dUTP libraries
+#         'fr-secondstrand'    # for ligation libraries
+#         'unstranded'         # for libraries without strand specificity
 
-# See the reference config files in the top level of the repo,
-# include/reference_configs, for inspiration for more species.
+genome:
+  url: "https://raw.githubusercontent.com/lcdb/lcdb-test-data/master/data/seq/dm6.small.fa"
+  postprocess: 'lib.utils.gzipped'
 
-include_references:
-  - '../../include/reference_configs/test.yaml'
-  - '../../include/reference_configs/Drosophila_melanogaster.yaml'
+annotation:
+  url: "https://raw.githubusercontent.com/lcdb/lcdb-test-data/master/data/annotation/dm6.small.gtf"
+  postprocess: 'lib.utils.gzipped'
diff --git a/test/test_configs/test_sra_config.yaml b/test/test_configs/test_sra_config.yaml
index f3f92cc48..427cae90f 100644
--- a/test/test_configs/test_sra_config.yaml
+++ b/test/test_configs/test_sra_config.yaml
@@ -1,7 +1,4 @@
-patterns: 'config/rnaseq_patterns.yaml'
-
-# Which key in the `references` dict below to use
-organism: 'human'
+organism: 'Homo sapiens'
 
 # If not specified here, use the environment variable REFERENCES_DIR.
 references_dir: 'references_data'
diff --git a/test/test_configs/test_sra_sampletable.csv b/test/test_configs/test_sra_sampletable.csv
new file mode 100644
index 000000000..34f57090f
--- /dev/null
+++ b/test/test_configs/test_sra_sampletable.csv
@@ -0,0 +1,3 @@
+Run,LibraryLayout
+SRR948304,PAIRED
+SRR948305,PAIRED
diff --git a/test/test_configs/test_sra_sampletable.tsv b/test/test_configs/test_sra_sampletable.tsv
deleted file mode 100644
index 0f55c4369..000000000
--- a/test/test_configs/test_sra_sampletable.tsv
+++ /dev/null
@@ -1,3 +0,0 @@
-samplename	Run	layout
-sra2	SRR948304	PAIRED
-sra3	SRR948305	PAIRED
diff --git a/test/workflow_test_params.yaml b/test/workflow_test_params.yaml
index 70e57da66..5c483e593 100644
--- a/test/workflow_test_params.yaml
+++ b/test/workflow_test_params.yaml
@@ -1,4 +1,5 @@
 # This file configures arguments for running various workflows that are  pulled
+#
 # into the test/lcdb-wf-test runner script automatically. It is a way of 
 #
 # NOTE:
@@ -27,38 +28,6 @@ rnaseq:
       --configfile __ORIG__/test/test_configs/test_rnaseq_config.yaml
       --config sampletable=__ORIG__/test/test_configs/test_sra_sampletable_SE_only.tsv
 
-  strandedness-pe:
-    desc: Tests running the strandedness pre-check using paired-end data.
-    args: |
-      --until strand_check
-      --configfile __ORIG__/test/test_configs/test_rnaseq_config.yaml
-      --config sampletable=__ORIG__/test/test_configs/test_pe_sampletable.tsv
-
-  strandedness-se:
-    desc: Tests running the strandedness pre-check using single-ended data.
-    args: |
-      --until strand_check
-      --configfile __ORIG__/test/test_configs/test_rnaseq_config.yaml
-      --config sampletable=__ORIG__/test/test_configs/two_samples.tsv
-
-  star-2pass:
-    desc: Tests running STAR in 2-pass mode. Only runs until the star_pass2 rule.
-    args: |
-        --until star_pass2
-        --configfile
-        __ORIG__/test/test_configs/test_rnaseq_config.yaml
-        __ORIG__/test/test_configs/star_override_2pass.yaml
-        --config sampletable=__ORIG__/test/test_configs/star_2pass.tsv
-
-  star-1pass:
-    desc: Tests running STAR in 1-pass (default) mode. Only runs until the star rule.
-    args: |
-        --until star
-        --configfile
-        __ORIG__/test/test_configs/test_rnaseq_config.yaml
-        __ORIG__/test/test_configs/star_override_1pass.yaml
-        --config sampletable=__ORIG__/test/test_configs/star_1pass.tsv
-
   pe:
     desc: Tests paired-end data
     args: |
diff --git a/workflows/chipseq/Snakefile b/workflows/chipseq/Snakefile
index f278b8968..33926815d 100644
--- a/workflows/chipseq/Snakefile
+++ b/workflows/chipseq/Snakefile
@@ -1,179 +1,172 @@
 import sys
-sys.path.insert(0, srcdir('../..'))
 import os
-from textwrap import dedent
 import yaml
-import tempfile
 import pandas as pd
-import numpy as np
-import pybedtools
-from lib import common, utils, helpers, aligners, chipseq
-from lib.patterns_targets import ChIPSeqConfig
-from lib.utils import autobump, gb, hours
 
-# ----------------------------------------------------------------------------
-#
-# Search for the string "NOTE:" to look for points of configuration that might
-# be helpful for your experiment.
-#
-# ----------------------------------------------------------------------------
+sys.path.insert(0, os.path.dirname(workflow.snakefile) + "/../..")
+from lib import utils
+from lib import chipseq
 
-if not workflow.overwrite_configfiles:
-    configfile: 'config/config.yaml'
 
-config = common.load_config(config)
+configfile: "config/config.yaml"
 
-include: '../references/Snakefile'
 
-# Verify configuration of config and sampletable files
-helpers.preflight(config)
+sampletable = utils.prepare_chipseq_sampletable(config)
+is_paired = utils.detect_layout(sampletable) == "PE"
+n = ["1", "2"] if is_paired else ["1"]
+SAMPLES = sampletable.index.values
+LABELS = sampletable.merged_label.values
+REFERENCES = config.get("references", "references")
+peaks = chipseq.add_bams_to_peak_calling(config)
 
-c = ChIPSeqConfig(
-    config,
-    config.get('patterns', 'config/chipseq_patterns.yaml')
-)
-
-SAMPLES = c.sampletable.iloc[:, 0].values
 
 wildcard_constraints:
-    n = '[1,2]',
-    sample = '|'.join(SAMPLES)
-
-
-
-def wrapper_for(path):
-    return 'file:' + os.path.join('../..','wrappers', 'wrappers', path)
-
-
-# ----------------------------------------------------------------------------
-# RULES
-# ----------------------------------------------------------------------------
-
-
-# See "patterns and targets" in the documentation for what's going on here.
-final_targets = utils.flatten((
-    c.targets['bam'],
-    utils.flatten(c.targets['fastqc']),
-    [c.targets['fastq_screen']],
-    [c.targets['multiqc']],
-    utils.flatten(c.targets['markduplicates']),
-    utils.flatten(c.targets['bigwig']),
-    utils.flatten(c.targets['peaks']),
-    utils.flatten(c.targets['merged_techreps']),
-    utils.flatten(c.targets['fingerprint']),
-    utils.flatten(c.targets['bigbed']),
-    utils.flatten(c.targets['multibigwigsummary']),
-    utils.flatten(c.targets['plotcorrelation']),
-))
-
-if config.get('merged_bigwigs', None):
-    final_targets.extend(utils.flatten(c.targets['merged_bigwig']))
-
-
-def render_r1_r2(pattern, r1_only=False):
-    return expand(pattern, sample='{sample}', n=c.n)
-
-def r1_only(pattern):
-    return expand(pattern, sample='{sample}', n=1)
-
-rule targets:
-    """
-    Final targets to create
-    """
-    input: final_targets
+    n="[1,2]",
+    sample="|".join(SAMPLES),
+    ext=".fa|.gtf",
 
 
-if 'orig_filename' in c.sampletable.columns:
+localrules:
+    symlinks,
+    symlink_targets,
 
-    localrules: symlinks
 
-    # Convert the sampletable to be indexed by the first column, for
-    # convenience in generating the input/output filenames.
-    _st = c.sampletable.set_index(c.sampletable.columns[0])
+rule all:
+    input:
+        "data/chipseq_aggregation/multiqc.html",
+        expand(
+            "data/chipseq_merged/{label}/{label}.cutadapt.unique.nodups.bam.bigwig",
+            label=LABELS,
+        ),
+        [v["bed"] for k, v in peaks.items()],
 
-    def orig_for_sample(wc):
-        """
-        Given a sample, returns either one or two original fastq files
-        depending on whether the library was single- or paired-end.
-        """
-        if c.is_paired:
-            return _st.loc[wc.sample, ['orig_filename', 'orig_filename_R2']]
-        return _st.loc[wc.sample, ['orig_filename']]
 
+rule fasta:
+    output:
+        f"{REFERENCES}/genome.fa.gz",
+    log:
+        f"{REFERENCES}/logs/genome.fa.gz.log",
+    resources:
+        mem_mb="4g",
+        runtime="2h",
+    params:
+        urls=config["genome"]["url"],
+        postprocess=config["genome"].get("postprocess", None),
+    run:
+        utils.download_and_postprocess(
+            urls=params.urls,
+            postprocess=params.postprocess,
+            outfile=output[0],
+            log=log,
+        )
 
-    rule symlinks:
-        """
-        Symlinks files over from original filename
-        """
-        input:
-            orig_for_sample
-        output:
-            render_r1_r2(c.patterns['fastq'])
-        threads: 1
-        resources:
-            mem_mb=gb(1),
-            runtime=10,
-        run:
-            assert len(output) == len(input), (input, output)
-            for src, linkname in zip(input, output):
-                utils.make_relative_symlink(src, linkname)
 
+rule chromsizes:
+    input:
+        f"{REFERENCES}/genome.fa.gz",
+    output:
+        f"{REFERENCES}/genome.chromsizes",
+    log:
+        f"{REFERENCES}/logs/genome.chromsizes.log",
+    params:
+        # java_args='-Xmx2g'  # [enable for test]
+        java_args="-Xmx20g",  # [disable for test]
+    resources:
+        mem="24g",
+        runtime="2h",
+    shell:
+        "export LC_COLLATE=C; "
+        "rm -f {output}.tmp "
+        "&& picard "
+        "{params.java_args} "
+        "CreateSequenceDictionary R={input} O={output}.tmp &> {log} "
+        '&& grep "^@SQ" {output}.tmp '
+        """| awk '{{print $2, $3}}' """
+        '| sed "s/SN://g;s/ LN:/\\t/g" '
+        "| sort -k1,1 > {output} "
+        "&& rm -f {output}.tmp "
+
+
+rule bowtie2_index:
+    input:
+        f"{REFERENCES}/genome.fa.gz",
+    output:
+        f"{REFERENCES}/bowtie2/genome.1.bt2",
+        f"{REFERENCES}/bowtie2/genome.fa",
+    log:
+        f"{REFERENCES}/logs/bowtie2_genome.log",
+    resources:
+        mem="32g",
+        disk="50g",
+        runtime="8h",
+    threads: 8
+    run:
+        prefix = subpath(output[0], strip_suffix=".1.bt2")
+        shell("bowtie2-build --threads {threads} {input} {prefix} &> {log}")
+        utils.make_relative_symlink(input[0], output[-1])
 
-    rule symlink_targets:
-        input: c.targets['fastq']
 
+rule symlinks:
+    input:
+        lambda wc: (
+            sampletable.loc[wc.sample, ["orig_filename", "orig_filename_R2"]]
+            if is_paired
+            else sampletable.loc[wc.sample, ["orig_filename"]]
+        ),
+    output:
+        expand(
+            "data/chipseq_samples/{sample}/{sample}_R{n}.fastq.gz",
+            n=n,
+            allow_missing=True,
+        ),
+    threads: 1
+    resources:
+        mem="1g",
+        runtime="10m",
+    run:
+        assert len(output) == len(input), (input, output)
+        for src, linkname in zip(input, output):
+            utils.make_relative_symlink(src, linkname)
 
-if 'Run' in c.sampletable.columns and sum(c.sampletable['Run'].str.startswith('SRR')) > 0:
 
-    # Convert the sampletable to be indexed by the first column, for
-    # convenience in generating the input/output filenames.
-    _st = c.sampletable.set_index(c.sampletable.columns[0])
+rule symlink_targets:
+    input:
+        expand(
+            "data/chipseq_samples/{sample}/{sample}_R{n}.fastq.gz", sample=SAMPLES, n=n
+        ),
 
-    rule fastq_dump:
-        output:
-            fastq=render_r1_r2(c.patterns['fastq'])
-        log:
-            r1_only(c.patterns['fastq'])[0] + '.log'
-        params:
-            is_paired=c.is_paired,
-            sampletable=_st,
-            # limit = 100000, # [TEST SETTINGS]
-        resources:
-            mem_mb=autobump(gb=8),
-            runtime=autobump(hours=2)
-        conda:
-            '../../wrappers/wrappers/fastq-dump/environment.yaml'
-        script:
-            wrapper_for('fastq-dump/wrapper.py')
 
 rule cutadapt:
-    """
-    Run cutadapt
-    """
     input:
-        fastq=render_r1_r2(c.patterns['fastq'])
+        fastq=expand(
+            "data/chipseq_samples/{sample}/{sample}_R{n}.fastq.gz",
+            n=n,
+            allow_missing=True,
+        ),
     output:
-        fastq=render_r1_r2(c.patterns['cutadapt'])
-    resources:
-        mem_mb=gb(2),
-        runtime=autobump(hours=2)
+        fastq=expand(
+            "data/chipseq_samples/{sample}/{sample}_R{n}.cutadapt.fastq.gz",
+            n=n,
+            allow_missing=True,
+        ),
     log:
-        render_r1_r2(c.patterns['cutadapt'])[0] + '.log'
+        "data/chipseq_samples/{sample}/{sample}_cutadapt.fastq.gz.log",
     threads: 6
+    resources:
+        mem="2g",
+        runtime="2h",
     run:
-
-        # NOTE: Change cutadapt params here
-        if c.is_paired:
+        if is_paired:
             shell(
                 "cutadapt "
+                "-j {threads} "
                 "-o {output[0]} "
                 "-p {output[1]} "
+                "--nextseq-trim 20 "
+                "--overlap 6 "
+                "--minimum-length 25 "
                 "-a AGATCGGAAGAGCACACGTCTGAACTCCAGTCA "
                 "-A AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT "
-                '--nextseq-trim 20 '
-                "--overlap 6 "
-                '-j {threads} '
-                '--minimum-length 25 '
                 "{input.fastq[0]} "
                 "{input.fastq[1]} "
                 "&> {log}"
@@ -181,523 +174,305 @@ rule cutadapt:
         else:
             shell(
                 "cutadapt "
+                "-j {threads} "
                 "-o {output[0]} "
-                "-a AGATCGGAAGAGCACACGTCTGAACTCCAGTCA "
-                '--nextseq-trim 20 '
+                "--nextseq-trim 20 "
                 "--overlap 6 "
-                '-j {threads} '
-                '--minimum-length 25 '
+                "--minimum-length 25 "
+                "-a AGATCGGAAGAGCACACGTCTGAACTCCAGTCA "
                 "{input.fastq[0]} "
                 "&> {log}"
             )
 
 
 rule fastqc:
-    """
-    Run FastQC
-    """
     input:
-        '{sample_dir}/{sample}/{sample}{suffix}'
-    threads:
-        6
+        "data/chipseq_samples/{sample}/{sample}{suffix}",
+    threads: 1
     output:
-        html='{sample_dir}/{sample}/fastqc/{sample}{suffix}_fastqc.html',
-        zip='{sample_dir}/{sample}/fastqc/{sample}{suffix}_fastqc.zip',
+        html="data/chipseq_samples/{sample}/fastqc/{sample}{suffix}_fastqc.html",
+        zip="data/chipseq_samples/{sample}/fastqc/{sample}{suffix}_fastqc.zip",
     resources:
-        mem_mb=gb(2),
-        runtime=autobump(hours=2)
-    script:
-        wrapper_for('fastqc/wrapper.py')
+        mem="8g",
+        runtime="2h",
+    log:
+        "data/chipseq_samples/{sample}/fastqc/{sample}{suffix}_fastqc.log",
+    run:
+        outdir = os.path.dirname(output.html) or "."
+        outfile = os.path.basename(input[0])
+        for s in [".fastq", ".fq", ".gz", ".bam"]:
+            outfile = outfile.replace(s, "")
+        out_zip = os.path.join(outdir, outfile + "_fastqc.zip")
+        out_html = os.path.join(outdir, outfile + "_fastqc.html")
+
+        shell(
+            "fastqc "
+            "--noextract "
+            "--quiet "
+            "--outdir {outdir} "
+            "{input} "
+            "&> {log} "
+        )
+
+        if not os.path.abspath(out_zip) == os.path.abspath(output.zip):
+            shell("mv {out_zip} {output.zip}")
+        if not os.path.abspath(out_html) == os.path.abspath(output.html):
+            shell("mv {out_html} {output.html}")
 
 
 rule bowtie2:
-    """
-    Map reads with Bowtie2
-    """
     input:
-        fastq=common.fill_r1_r2(c.sampletable, c.patterns['cutadapt']),
-        index=[c.refdict[c.organism][config['aligner']['tag']]['bowtie2']]
+        fastq=expand(
+            "data/chipseq_samples/{sample}/{sample}_R{n}.cutadapt.fastq.gz",
+            n=n,
+            allow_missing=True,
+        ),
+        index=f"{REFERENCES}/bowtie2/genome.1.bt2",
     output:
-        bam=c.patterns['bam']
+        bam=temporary("data/chipseq_samples/{sample}/{sample}.cutadapt.bam"),
     log:
-        c.patterns['bam'] + '.log'
+        "data/chipseq_samples/{sample}/{sample}.cutadapt.bam.log",
     threads: 16
     resources:
-        mem_mb=gb(32),
-        runtime=autobump(hours=2)
+        mem="32g",
+        runtime="2h",
     run:
-        prefix = aligners.prefix_from_bowtie2_index(input.index)
-        sam = output.bam.replace('.bam', '.sam')
+        prefix = subpath(input.index, strip_suffix=".1.bt2")
 
-        if c.is_paired:
-            assert len(input.fastq) == 2
-            fastqs = '-1 {0} -2 {1} '.format(*input.fastq)
+        if is_paired:
+            fastqs = f"-1 {input.fastq[0]} -2 {input.fastq[1]}"
         else:
-            assert len(input.fastq) == 1
-            fastqs = '-U {0} '.format(input.fastq)
+            fastqs = f"-U {input.fastq}"
 
         shell(
             "bowtie2 "
-            "-x {prefix} "
+            f"-x {prefix} "
             "{fastqs} "
-            '--no-unal '  # NOTE: suppress unaligned reads
             "--threads {threads} "
-            "-S {sam} "
-            "> {log} 2>&1"
+            "--no-unal "
+            "-S {output.bam}.sam "
+            "> {log} 2>&1 "
         )
-
         shell(
-            "samtools view -Sb {sam} "
-            "| samtools sort - -o {output.bam} -O BAM "
-            "&& rm {sam}"
+            "samtools view -Sb {output.bam}.sam "
+            "| samtools sort -O BAM - -o {output.bam}"
         )
+        shell("rm {output.bam}.sam")
 
 
 rule unique:
-    """
-    Remove multimappers
-    """
     input:
-        c.patterns['bam']
+        "data/chipseq_samples/{sample}/{sample}.cutadapt.bam",
     output:
-        c.patterns['unique']
+        "data/chipseq_samples/{sample}/{sample}.cutadapt.unique.bam",
     threads: 1
     resources:
-        mem_mb=gb(1),
-        runtime=autobump(hours=2)
+        mem="1g",
+        runtime="2h",
     shell:
-        # NOTE: the quality score chosen here should reflect the scores output
-        # by the aligner used. For example, STAR uses 255 as max mapping
-        # quality.
-        'samtools view -b -q 20 {input} > {output}'
+        "samtools view "
+        "-b "
+        "-q 20 "
+        "{input} "
+        "> {output}"
 
 
 rule fastq_count:
-    """
-    Count reads in a FASTQ file
-    """
     input:
-        fastq='{sample_dir}/{sample}/{sample}{suffix}.fastq.gz'
+        fastq="{sample_dir}/{sample}/{sample}{suffix}.fastq.gz",
     output:
-        '{sample_dir}/{sample}/{sample}{suffix}.fastq.gz.libsize'
+        "{sample_dir}/{sample}/{sample}{suffix}.fastq.gz.libsize",
     threads: 1
     resources:
-        mem_mb=gb(1),
-        runtime=autobump(hours=2)
+        mem="1g",
+        runtime="2h",
     shell:
-        'zcat {input} | echo $((`wc -l`/4)) > {output}'
+        "zcat {input} | echo $((`wc -l`/4)) > {output}"
 
 
 rule bam_count:
-    """
-    Count reads in a BAM file
-    """
     input:
-        bam='{sample_dir}/{sample}/{suffix}.bam'
+        bam="{sample_dir}/{sample}/{suffix}.bam",
     output:
-        '{sample_dir}/{sample}/{suffix}.bam.libsize'
+        "{sample_dir}/{sample}/{suffix}.bam.libsize",
     threads: 1
     resources:
-        mem_mb=gb(2),
-        runtime=autobump(hours=2)
+        mem="2g",
+        runtime="2h",
     shell:
-        'samtools view -c {input} > {output}'
+        "samtools view -c {input} > {output}"
 
 
 rule bam_index:
-    """
-    Index a BAM
-    """
     input:
-        bam='{prefix}.bam'
+        bam="{prefix}.bam",
     output:
-        bai='{prefix}.bam.bai'
+        bai="{prefix}.bam.bai",
     threads: 1
     resources:
-        mem_mb=gb(2),
-        runtime=autobump(hours=2)
+        mem="2g",
+        runtime="2h",
     shell:
-        'samtools index {input} {output}'
-
-
-def fastq_screen_references():
-    """
-    Returns the Bowtie2 indexes for the configured references from the
-    `fastq_screen:` section of the config
-    """
-    refs = {}
-    for i in config['fastq_screen']:
-        refs[i['label']] = c.refdict[i['organism']][i['tag']]['bowtie2']
-    return refs
-
-
-rule fastq_screen:
-    """
-    Run fastq_screen to look for contamination from other genomes
-    """
-    input:
-        **fastq_screen_references(),
-        fastq=r1_only(rules.cutadapt.output.fastq),
-    output:
-        txt=c.patterns['fastq_screen']
-    log:
-        c.patterns['fastq_screen'] + '.log'
-    threads: 6
-    resources:
-        mem_mb=autobump(gb=4),
-        runtime=autobump(hours=2)
-    params: subset=100000
-    script:
-        wrapper_for('fastq_screen/wrapper.py')
-
-
-multiqc_inputs = [
-    utils.flatten(c.targets['fastqc']) +
-    utils.flatten(c.targets['cutadapt']) +
-    utils.flatten(c.targets['bam']) +
-    utils.flatten(c.targets['markduplicates']) +
-    utils.flatten(c.targets['fingerprint']) +
-    utils.flatten(c.targets['peaks']) +
-    utils.flatten(c.targets['fastq_screen']) +
-    utils.flatten(c.targets['plotcorrelation'])
-]
-
-if c.is_paired:
-    multiqc_inputs.extend(utils.flatten(c.targets['collectinsertsizemetrics']['metrics']))
-
-rule multiqc:
-    """
-    Aggregate various QC stats and logs into a single HTML report with MultiQC
-    """
-    # NOTE: if you add more rules and want MultiQC to pick up the output, best
-    # to add outputs from those rules to the inputs here.
-    input:
-        files=multiqc_inputs,
-        config='config/multiqc_config.yaml'
-    output:
-        c.targets['multiqc']
-    log:
-        c.targets['multiqc'][0] + '.log'
-    threads: 1
-    resources:
-        mem_mb=gb(2),
-        runtime=autobump(hours=2)
-    run:
-        analysis_directory = set([os.path.dirname(i) for i in input])
-        outdir = os.path.dirname(c.targets['multiqc'][0])
-        basename = os.path.basename(c.targets['multiqc'][0])
-        shell(
-            'LC_ALL=en_US.utf8 LC_LANG=en_US.utf8 '
-            'multiqc '
-            '--quiet '
-            '--outdir {outdir} '
-            '--force '
-            '--filename {basename} '
-            '--config {input.config} '
-            '{analysis_directory} '
-            '&> {log} '
-        )
+        "samtools index {input} {output}"
 
 
 rule markduplicates:
-    """
-    Mark or remove PCR duplicates with Picard MarkDuplicates
-    """
     input:
-        bam=c.patterns['unique']
+        bam="data/chipseq_samples/{sample}/{sample}.cutadapt.unique.bam",
     output:
-        bam=c.patterns['markduplicates']['bam'],
-        metrics=c.patterns['markduplicates']['metrics']
+        bam="data/chipseq_samples/{sample}/{sample}.cutadapt.unique.nodups.bam",
+        metrics="data/chipseq_samples/{sample}/{sample}.cutadapt.unique.nodups.bam.metrics",
     log:
-        c.patterns['markduplicates']['bam'] + '.log'
+        "data/chipseq_samples/{sample}/{sample}.cutadapt.unique.nodups.bam.log",
     threads: 1
     resources:
-        mem_mb=gb(32),
-        runtime=autobump(hours=2),
-        disk_mb=gb(100)
+        mem="32g",
+        disk="100g",
+        runtime="2h",
     params:
-        # NOTE: Be careful with the memory here; make sure you have enough
-        # and/or it matches the resources you're requesting in the cluster
-        # config.
-        java_args='-Xmx20g'
-        # java_args='-Xmx2g'  # [TEST SETTINGS -1]
+        # java_args='-Xmx2g'  # [enable for test]
+        java_args="-Xmx20g",  # [disable for test]
     shell:
-        'picard '
-        '{params.java_args} '
-        'MarkDuplicates '
-        'INPUT={input.bam} '
-        'OUTPUT={output.bam} '
-        'REMOVE_DUPLICATES=true '
-        'METRICS_FILE={output.metrics} '
-        'VALIDATION_STRINGENCY=LENIENT '
-        '&> {log}'
+        "picard "
+        "{params.java_args} "
+        "MarkDuplicates "
+        "-INPUT {input.bam} "
+        "-OUTPUT {output.bam} "
+        "-METRICS_FILE {output.metrics} "
+        "-REMOVE_DUPLICATES true "
+        "-VALIDATION_STRINGENCY LENIENT "
+        "&> {log}"
 
 
 rule merge_techreps:
-    """
-    Technical replicates are merged and then re-deduped.
-
-    If there's only one technical replicate, its unique, nodups bam is simply
-    symlinked.
-    """
     input:
         lambda wc: expand(
-            c.patterns['markduplicates']['bam'],
-            sample=common.get_techreps(c.sampletable, wc.label),
-        )
+            "data/chipseq_samples/{sample}/{sample}.cutadapt.unique.nodups.bam",
+            sample=utils.get_techreps(sampletable, wc.label),
+        ),
     output:
-        bam=c.patterns['merged_techreps'],
-        metrics=c.patterns['merged_techreps'] + '.metrics'
+        bam="data/chipseq_merged/{label}/{label}.cutadapt.unique.nodups.merged.bam",
+        metrics="data/chipseq_merged/{label}/{label}.cutadapt.unique.nodups.merged.bam.metrics",
     log:
-        c.patterns['merged_techreps'] + '.log'
+        "data/chipseq_merged/{label}/{label}.cutadapt.unique.nodups.merged.bam.log",
     threads: 1
     resources:
-        mem_mb=gb(32),
-        runtime=autobump(hours=2),
-        disk_mb=gb(100),
+        mem="32g",
+        disk="100g",
+        runtime="2h",
     params:
-        # NOTE: Be careful with the memory here; make sure you have enough
-        # and/or it matches the resources you're requesting in the cluster
-        # config.
-        java_args='-Xmx32g'
-        # java_args='-Xmx2g'  # [TEST SETTINGS -1]
+        # java_args='-Xmx2g'  # [enable for test]
+        java_args="-Xmx32g",  # [disable for test]
     script:
-        wrapper_for('combos/merge_and_dedup/wrapper.py')
+        "../../scripts/merge_and_dedup.py"
+
+
+if is_paired:
 
-if c.is_paired:
     rule collectinsertsizemetrics:
         input:
-            bam=c.patterns['markduplicates']['bam'],
+            bam="data/chipseq_merged/{label}/{label}.cutadapt.unique.nodups.merged.bam",
         output:
-            pdf=c.patterns['collectinsertsizemetrics']['pdf'],
-            metrics=c.patterns['collectinsertsizemetrics']['metrics']
+            pdf="data/chipseq_samples/{sample}/{sample}.cutadapt.unique.nodups.bam.collectinsertsizemetrics.pdf",
+            metrics="data/chipseq_samples/{sample}/{sample}.cutadapt.unique.nodups.bam.collectinsertsizemetrics.metrics",
         log:
-            c.patterns['collectinsertsizemetrics']['metrics'] + '.log'
+            "data/chipseq_samples/{sample}/{sample}.cutadapt.unique.nodups.bam.collectinsertsizemetrics.metrics.log",
         threads: 1
         resources:
-            mem_mb=gb(32),
-            runtime=autobump(hours=2)
+            mem="32g",
+            runtime="2h",
         params:
-            java_args='-Xmx20g'
-            # java_args='-Xmx2g'  # [TEST SETTINGS -1]
+            # java_args='-Xmx2g'  # [enable for test]
+            java_args="-Xmx20g",  # [disable for test]
         shell:
-            'picard '
-            '{params.java_args} '
-            'CollectInsertSizeMetrics '
-            'I={input.bam} '
-            'O={output.metrics} '
-            'H={output.pdf} '
-            '&> {log} '
+            "picard "
+            "{params.java_args} "
+            "CollectInsertSizeMetrics "
+            "I={input.bam} "
+            "O={output.metrics} "
+            "H={output.pdf} "
+            "&> {log} "
 
-rule bigwig:
-    """
-    Create a bigwig.
 
-    See note below about normalizing!
-    """
+rule bigwig:
     input:
-        bam=c.patterns['merged_techreps'],
-        bai=c.patterns['merged_techreps'] + '.bai',
+        bam="data/chipseq_merged/{label}/{label}.cutadapt.unique.nodups.merged.bam",
+        bai="data/chipseq_merged/{label}/{label}.cutadapt.unique.nodups.merged.bam.bai",
     output:
-        c.patterns['bigwig']
+        "data/chipseq_merged/{label}/{label}.cutadapt.unique.nodups.bam.bigwig",
     log:
-        c.patterns['bigwig'] + '.log'
+        "data/chipseq_merged/{label}/{label}.cutadapt.unique.nodups.bam.bigwig.log",
     threads: 1
     resources:
-        mem_mb=gb(16),
-        runtime=autobump(hours=2)
+        mem="16g",
+        runtime="2h",
     shell:
-        'bamCoverage '
-        '--bam {input.bam} '
-        '-o {output} '
-        '-p {threads} '
-        '--minMappingQuality 20 '
-        '--ignoreDuplicates '
-        # Can't use the CPM normalization for testing due to <1000 reads total
-        # in example data; keep uncommented when running in production
-        # [TEST SETTINGS +1]
-        '--normalizeUsing CPM '
-        '--extendReads 300 '
-        '&> {log}'
-
-
-rule fingerprint:
-    """
-    Runs deepTools plotFingerprint to assess how well the ChIP experiment
-    worked.
-
-    Note: uses the merged techreps.
-    """
+        "bamCoverage "
+        "--bam {input.bam} "
+        "-o {output} "
+        "-p {threads} "
+        "--minMappingQuality 20 "
+        "--ignoreDuplicates "
+        "--extendReads 300 "
+        "--normalizeUsing CPM "
+        "&> {log}"
+
+
+rule macs:
     input:
-        bams=lambda wc: expand(c.patterns['merged_techreps'], label=wc.ip_label),
-        control=lambda wc: expand(c.patterns['merged_techreps'], label=chipseq.merged_input_for_ip(c.sampletable, wc.ip_label)),
-        bais=lambda wc: expand(c.patterns['merged_techreps'] + '.bai', label=wc.ip_label),
-        control_bais=lambda wc: expand(c.patterns['merged_techreps'] + '.bai', label=chipseq.merged_input_for_ip(c.sampletable, wc.ip_label)),
+        ip=lambda wc: expand(
+            "data/chipseq_merged/{label}/{label}.cutadapt.unique.nodups.merged.bam",
+            label=chipseq.samples_for_run(config, wc.macs_run, "macs", "ip"),
+        ),
+        control=lambda wc: expand(
+            "data/chipseq_merged/{label}/{label}.cutadapt.unique.nodups.merged.bam",
+            label=chipseq.samples_for_run(config, wc.macs_run, "macs", "control"),
+        ),
+        chromsizes=rules.chromsizes.output,
     output:
-        plot=c.patterns['fingerprint']['plot'],
-        raw_counts=c.patterns['fingerprint']['raw_counts'],
-        metrics=c.patterns['fingerprint']['metrics']
-    threads: 8
-    log: c.patterns['fingerprint']['metrics'] + '.log'
-    threads: 1
+        bed="data/chipseq_peaks/macs/{macs_run}/peaks.bed",
     resources:
-        mem_mb=gb(32),
-        runtime=autobump(hours=2)
-    run:
-        if len(input.control) == 0:
-            jsdsample_arg = ""
-        else:
-            jsdsample_arg = '--JSDsample ' + str(input.control)
-        shell(
-            'plotFingerprint ' '--bamfiles {input.bams} '
-            '-p {threads} '
-            # The JSDsample argument is disabled for testing as it dramatically
-            # increases the run time.
-            # [TEST SETTINGS +1]
-            '{jsdsample_arg} '
-            '--smartLabels '
-            '--extendReads=300 '
-            '--skipZeros '
-            '--outQualityMetrics {output.metrics} '
-            '--outRawCounts {output.raw_counts} '
-            '--plotFile {output.plot} '
-            # Default is 500k; use fewer to speed up testing:
-            # '--numberOfSamples 50 '  # [TEST SETTINGS ]
-            '&> {log} '
-            '&& sed -i "s/NA/0.0/g" {output.metrics} '
-        )
-
-
-rule sicer:
-    """
-    Run the SICER peak caller
-    """
-    input:
-        ip=lambda wc:
-            expand(
-                c.patterns['merged_techreps'],
-                label=chipseq.samples_for_run(config, wc.sicer_run, 'sicer', 'ip'),
-            ),
-        control=lambda wc:
-            expand(
-                c.patterns['merged_techreps'],
-                label=chipseq.samples_for_run(config, wc.sicer_run, 'sicer', 'control'),
-            ),
-        chromsizes=refdict[c.organism][config['aligner']['tag']]['chromsizes'],
-    output:
-        bed=c.patterns['peaks']['sicer']
+        mem="16g",
+        runtime="2h",
     log:
-        c.patterns['peaks']['sicer'] + '.log'
-    resources:
-        mem_mb=gb(16),
-        runtime=autobump(hours=2)
+        "data/chipseq_peaks/macs/{macs_run}/peaks.bed.log",
     params:
-        block=lambda wc: chipseq.block_for_run(config, wc.sicer_run, 'sicer')
-    wrapper:
-        wrapper_for('sicer')
+        block=lambda wc: chipseq.block_for_run(config, wc.macs_run, "macs"),
+    script:
+        "../../scripts/macs_callpeak.py"
 
-rule macs2:
-    """
-    Run the macs2 peak caller
-    """
-    input:
-        ip=lambda wc:
-            expand(
-                c.patterns['merged_techreps'],
-                label=chipseq.samples_for_run(config, wc.macs2_run, 'macs2', 'ip'),
-            ),
-        control=lambda wc:
-            expand(
-                c.patterns['merged_techreps'],
-                label=chipseq.samples_for_run(config, wc.macs2_run, 'macs2', 'control'),
-            ),
-        chromsizes=refdict[c.organism][config['aligner']['tag']]['chromsizes'],
-    output:
-        bed=c.patterns['peaks']['macs2']
-    resources:
-        mem_mb=gb(16),
-        runtime=autobump(hours=2)
-    log:
-        c.patterns['peaks']['macs2'] + '.log'
-    params:
-        block=lambda wc: chipseq.block_for_run(config, wc.macs2_run, 'macs2')
-    wrapper:
-        wrapper_for('macs2/callpeak')
 
-# Epic2 peak caller
-# See https://github.com/biocore-ntnu/epic2
 rule epic2:
-    """
-    Run the epic2 peak caller
-    """
     input:
-        ip=lambda wc:
-            expand(
-                c.patterns['merged_techreps'],
-                label=chipseq.samples_for_run(config, wc.epic2_run, 'epic2', 'ip'),
-            ),
-        control=lambda wc:
-            expand(
-                c.patterns['merged_techreps'],
-                label=chipseq.samples_for_run(config, wc.epic2_run, 'epic2', 'control'),
-            ),
-        bai=lambda wc:  # epic2 requires both .bam and .bam.bai (bam index) files (.bam.bai is not explicitly)
-            expand(
-                c.patterns['merged_techreps'] + '.bai',
-                label=chipseq.samples_for_run(config, wc.epic2_run, 'epic2', 'ip'),
-            ),
-        chromsizes=refdict[c.organism][config['aligner']['tag']]['chromsizes']
+        ip=lambda wc: expand(
+            "data/chipseq_merged/{label}/{label}.cutadapt.unique.nodups.merged.bam",
+            label=chipseq.samples_for_run(config, wc.epic2_run, "epic2", "ip"),
+        ),
+        control=lambda wc: expand(
+            "data/chipseq_merged/{label}/{label}.cutadapt.unique.nodups.merged.bam",
+            label=chipseq.samples_for_run(config, wc.epic2_run, "epic2", "control"),
+        ),
+        bai=lambda wc: expand(
+            "data/chipseq_merged/{label}/{label}.cutadapt.unique.nodups.merged.bam.bai",
+            label=chipseq.samples_for_run(config, wc.epic2_run, "epic2", "ip"),
+        )
+        + expand(
+            "data/chipseq_merged/{label}/{label}.cutadapt.unique.nodups.merged.bam.bai",
+            label=chipseq.samples_for_run(config, wc.epic2_run, "epic2", "control"),
+        ),
+        chromsizes=rules.chromsizes.output,
     output:
-        bed=c.patterns['peaks']['epic2']
+        bed="data/chipseq_peaks/epic2/{epic2_run}/peaks.bed",
     resources:
-        mem_mb=gb(16),
-        runtime=autobump(hours=2)
-    log:
-        c.patterns['peaks']['epic2'] + '.log'
-    params:
-        block=lambda wc: chipseq.block_for_run(config, wc.epic2_run, 'epic2'),
-        is_paired=c.is_paired
-    wrapper:
-        wrapper_for('epic2')
-
-
-rule spp:
-    """
-    Run the SPP peak caller
-    """
-    input:
-        ip=lambda wc:
-            expand(
-                c.patterns['merged_techreps'],
-                label=chipseq.samples_for_run(config, wc.spp_run, 'spp', 'ip'),
-            ),
-        control=lambda wc:
-            expand(
-                c.patterns['merged_techreps'],
-                label=chipseq.samples_for_run(config, wc.spp_run, 'spp', 'control'),
-            ),
-        chromsizes=refdict[c.organism][config['aligner']['tag']]['chromsizes'],
-    output:
-        bed=c.patterns['peaks']['spp'],
-        enrichment_estimates=c.patterns['peaks']['spp'] + '.est.wig',
-        smoothed_enrichment_mle=c.patterns['peaks']['spp'] + '.mle.wig',
-        rdata=c.patterns['peaks']['spp'] + '.RData'
+        mem="16g",
+        runtime="2h",
     log:
-        c.patterns['peaks']['spp'] + '.log'
-    resources:
-        mem_mb=gb(16),
-        runtime=autobump(hours=2)
+        "data/chipseq_peaks/epic2/{epic2_run}/peaks.bed.log",
     params:
-        block=lambda wc: chipseq.block_for_run(config, wc.spp_run, 'spp'),
-        keep_tempfiles=False,
-        # NOTE: Be careful with the memory here; make sure you have enough
-        # and/or it matches the resources you're requesting in the cluster
-        # config.
-        java_args='-Xmx24g',
-        # java_args='-Xmx2g',  # [TEST SETTINGS -1]
-    threads: 2
-    wrapper:
-        wrapper_for('spp')
+        block=lambda wc: chipseq.block_for_run(config, wc.epic2_run, "epic2"),
+        is_paired=is_paired,
+    script:
+        "../../scripts/epic2.py"
 
 
 rule bed_to_bigbed:
@@ -705,59 +480,17 @@ rule bed_to_bigbed:
     Convert BED to bigBed
     """
     input:
-        bed='{prefix}.bed',
-        chromsizes=refdict[c.organism][config['aligner']['tag']]['chromsizes']
-    output: '{prefix}.bigbed'
+        bed="{prefix}.bed",
+        chromsizes=rules.chromsizes.output,
+    output:
+        "{prefix}.bigbed",
     resources:
-        mem_mb=gb(2),
-        runtime=autobump(hours=2)
-    log: '{prefix}.bigbed.log'
-    run:
-        # Based on the filename, identify the algorithm. Based on the contents,
-        # identify the format.
-        algorithm = os.path.basename(os.path.dirname(input.bed))
-        kind = chipseq.detect_peak_format(input.bed)
-
-        # bedToBigBed doesn't handle zero-size files
-        # bigbed is not created from epic2-generated peaks
-        if os.stat(input.bed).st_size == 0:
-            shell("touch {output}")
-        elif kind == 'narrowPeak':
-            _as = '../../include/autosql/bigNarrowPeak.as'
-            _type = 'bed6+4'
-            names=[
-                'chrom', 'chromStart', 'chromEnd', 'name', 'score',
-                'strand', 'signalValue', 'pValue', 'qValue', 'peak']
-        elif kind == 'broadPeak':
-            _as = '../../include/autosql/bigBroadPeak.as'
-            _type = 'bed6+3'
-            names=[
-                'chrom', 'chromStart', 'chromEnd', 'name', 'score',
-                'strand', 'signalValue', 'pValue', 'qValue']
-        elif kind == 'epic2Input':
-            _as = f'../../include/autosql/{kind}Peak.as'
-            _type = 'bed6+4'
-            names=[
-                'chrom', 'chromStart', 'chromEnd', 'pValue', 'score',
-                'strand', 'ChIPCount', 'InputCount', 'FDR', 'log2FoldChange']
-        elif kind == 'epic2NoInput':
-            _as = f'../../include/autosql/{kind}Peak.as'
-            _type = 'bed6'
-            names=[
-                'chrom', 'chromStart', 'chromEnd', 'ChIPCount', 'score',
-                'strand']
-        else:
-            raise ValueError("Unhandled format for {0}".format(input.bed))
-
-        df = pd.read_table(input.bed, index_col=False, names=names)
-        df['score'] = df['score'] - df['score'].min()
-        df['score'] = (df['score'] / df['score'].max()) * 1000
-        df['score'] = df['score'].replace([np.inf, -np.inf], np.nan).fillna(0)
-        df['score'] = df['score'].astype(int)
-        df.to_csv(output[0] + '.tmp', sep='\t', index=False, header=False)
-
-        shell('bedToBigBed -as={_as} -type={_type} {output}.tmp {input.chromsizes} {output} &> {log}')
-        shell('rm {output}.tmp')
+        mem="2g",
+        runtime="2h",
+    log:
+        "{prefix}.bigbed.log",
+    script:
+        "../../scripts/bed_to_bigbed.py"
 
 
 rule multibigwigsummary:
@@ -765,25 +498,28 @@ rule multibigwigsummary:
     Summarize the bigWigs across genomic bins
     """
     input:
-        c.targets['bigwig']
+        expand(
+            "data/chipseq_merged/{label}/{label}.cutadapt.unique.nodups.bam.bigwig",
+            label=sampletable.label,
+        ),
     output:
-        npz=c.targets['multibigwigsummary']['npz'],
-        tab=c.targets['multibigwigsummary']['tab']
+        npz="data/chipseq_aggregation/deeptools/multibigwigsummary_matrix.npz",
+        tab="data/chipseq_aggregation/deeptools/multibigwigsummary.tab",
     threads: 16
     resources:
-        mem_mb=gb(16),
-        runtime=autobump(hours=2)
+        mem="16g",
+        runtime="2h",
     run:
         # from the input files, figure out the sample name.
-        labels = ' '.join([i.split('/')[-2] for i in input])
+        labels = " ".join([i.split("/")[-2] for i in input])
         shell(
-            'multiBigwigSummary '
-            'bins '
-            '-b {input} '
-            '--labels {labels} '
-            '--numberOfProcessors {threads} '
-            '-out {output.npz} '
-            '--outRawCounts {output.tab}'
+            "multiBigwigSummary "
+            "bins "
+            "-b {input} "
+            "--labels {labels} "
+            "--numberOfProcessors {threads} "
+            "-out {output.npz} "
+            "--outRawCounts {output.tab}"
         )
 
 
@@ -792,68 +528,152 @@ rule plotcorrelation:
     Plot a heatmap of correlations across all samples
     """
     input:
-        c.targets['multibigwigsummary']['npz']
+        npz="data/chipseq_aggregation/deeptools/multibigwigsummary_matrix.npz",
     output:
-        heatmap=c.targets['plotcorrelation']['heatmap'],
-        tab=c.targets['plotcorrelation']['tab']
+        tab="data/chipseq_aggregation/deeptools/plotcorrelation.tab",
+        heatmap="data/chipseq_aggregation/deeptools/correlation_heatmap.png",
     resources:
-        mem_mb=gb(2),
-        runtime=autobump(hours=2)
+        mem="2g",
+        runtime="2h",
     shell:
-        'plotCorrelation '
-        '--corData {input} '
-        '--corMethod spearman '
-        '--whatToPlot heatmap '
-        '--plotFile {output.heatmap} '
-        '--colorMap Reds '
-        '--outFileCorMatrix {output.tab}'
-
-        # NOTE: if you're expecting negative correlation, try a divergent
-        # colormap and setting the min/max to ensure that the colomap is
-        # centered on zero:
-        # '--colorMap RdBu_r '
-        # '--zMin -1 '
-        # '--zMax 1 '
-
-if 'merged_bigwigs' in config:
-    rule merge_bigwigs:
-        """
-        Merge together bigWigs as specified in the config ("merged_bigwigs"
-        section).
-        """
-        input:
-            bigwigs=lambda wc: expand(
-                c.patterns['bigwig'],
-                label=config['merged_bigwigs'][wc.merged_bigwig_label],
-            ),
-            chromsizes=refdict[c.organism][config['aligner']['tag']]['chromsizes'],
-        output:
-            c.patterns['merged_bigwig']
-        resources:
-            mem_mb=gb(16),
-            runtime=autobump(hours=2)
-        log:
-            c.patterns['merged_bigwig'] + '.log'
-        script:
-            wrapper_for('average-bigwigs/wrapper.py')
+        "plotCorrelation "
+        "--corData {input} "
+        "--plotFile {output.heatmap} "
+        "--outFileCorMatrix {output.tab} "
+        "--corMethod spearman "
+        "--whatToPlot heatmap "
+        "--colorMap Reds "
 
-rule idxstats:
-    """
-    Run samtools idxstats on sample bams
-    """
+
+rule samtools_idxstats:
+    input:
+        bam="data/chipseq_samples/{sample}/{sample}.cutadapt.unique.nodups.bam",
+        bai="data/chipseq_samples/{sample}/{sample}.cutadapt.unique.nodups.bam.bai",
+    output:
+        txt="data/chipseq_samples/{sample}/{sample}.samtools_idxstats.txt",
+    resources:
+        mem="16g",
+        runtime="2h",
+    log:
+        "data/chipseq_samples/{sample}/{sample}.samtools_idxstats.txt.log",
+    shell:
+        "samtools idxstats {input.bam} 2> {log} 1> {output.txt}"
+
+
+rule samtools_flagstat:
+    input:
+        bam="data/chipseq_samples/{sample}/{sample}.cutadapt.unique.nodups.bam",
+        bai="data/chipseq_samples/{sample}/{sample}.cutadapt.unique.nodups.bam.bai",
+    output:
+        "data/chipseq_samples/{sample}/{sample}.samtools_flagstat.txt",
+    resources:
+        mem="8g",
+        runtime="2h",
+    log:
+        "data/chipseq_samples/{sample}/{sample}.samtools_flagstat.txt.log",
+    shell:
+        "samtools flagstat {input.bam} > {output}"
+
+
+rule samtools_stats:
     input:
-        bam=c.patterns['markduplicates']['bam'],
-        bai=c.patterns['markduplicates']['bam'] + '.bai'
+        bam="data/chipseq_samples/{sample}/{sample}.cutadapt.unique.nodups.bam",
+        bai="data/chipseq_samples/{sample}/{sample}.cutadapt.unique.nodups.bam.bai",
     output:
-        txt=c.patterns['samtools']['idxstats']
+        "data/chipseq_samples/{sample}/{sample}.samtools_stats.txt",
     resources:
-        mem_mb=gb(16),
-        runtime=autobump(hours=2)
-    log: 
-        c.patterns['samtools']['idxstats'] + '.log'
+        mem="8g",
+        runtime="2h",
+    log:
+        "data/chipseq_samples/{sample}/{sample}.samtools_stats.txt.log",
+    shell:
+        "samtools stats {input.bam} > {output}"
+
+
+rule multiqc:
+    input:
+        expand(
+            rules.fastqc.output.zip,
+            sample=SAMPLES,
+            suffix=["_R1.fastq.gz", "_R1.cutadapt.fastq.gz", ".cutadapt.bam"],
+        ),
+        expand(rules.cutadapt.output, sample=SAMPLES),
+        expand(rules.bowtie2.output, sample=SAMPLES),
+        expand(rules.markduplicates.output, sample=SAMPLES),
+        expand(rules.unique.output, sample=SAMPLES),
+        expand(rules.samtools_stats.output, sample=SAMPLES),
+        expand(rules.samtools_flagstat.output, sample=SAMPLES),
+        expand(rules.samtools_idxstats.output, sample=SAMPLES),
+        expand(rules.bigwig.output, label=LABELS),
+        expand(rules.merge_techreps.output, label=LABELS),
+        expand(rules.collectinsertsizemetrics.output.metric, sample=SAMPLES)
+        if is_paired
+        else [],
+        [v["bigbed"] for v in peaks.values()],
+        config="config/multiqc_config.yaml",
+    output:
+        "data/chipseq_aggregation/multiqc.html",
+    log:
+        "data/chipseq_aggregation/multiqc.html.log",
+    threads: 1
+    resources:
+        mem="2g",
+        runtime="2h",
     run:
+        analysis_directory = "data"
+        outdir = os.path.dirname(output[0])
+        basename = os.path.basename(output[0])
         shell(
-            'samtools idxstats {input.bam} 2> {log} 1> {output.txt}'
+            "LC_ALL=en_US.utf8 LC_LANG=en_US.utf8 "
+            "multiqc "
+            "--quiet "
+            "--outdir {outdir} "
+            "--force "
+            "--filename {basename} "
+            "--config {input.config} "
+            "{analysis_directory} "
+            "&> {log} "
         )
 
-# vim: ft=python
+
+# If the sampletable is from SRA, handle it here.
+if utils.detect_sra(sampletable):
+    sampletable["orig_filename"] = expand(
+        "original_data/sra_samples/{sample}/{sample}_R{n}.fastq.gz", sample=SAMPLES, n=1
+    )
+
+    if is_paired:
+        sampletable["orig_filename_R2"] = expand(
+            "original_data/sra_samples/{sample}/{sample}_R{n}.fastq.gz",
+            sample=SAMPLES,
+            n=2,
+        )
+
+    rule fastq_dump:
+        output:
+            fastq=expand(
+                "original_data/sra_samples/{sample}/{sample}_R{n}.fastq.gz",
+                n=n,
+                allow_missing=True,
+            ),
+        log:
+            "original_data/sra_samples/{sample}/{sample}.fastq.gz.log",
+        params:
+            # extra="-X 100000",  # [enable for test]
+            is_paired=is_paired,
+        resources:
+            mem="1g",
+            disk="1g",
+            runtime="2h",
+        run:
+            srr = sampletable.loc[wildcards.sample, "Run"]
+            extra = params.get("extra", "")
+            if is_paired:
+                shell("fastq-dump {srr} --gzip --split-files {extra} &> {log}")
+                shell("mv {srr}_1.fastq.gz {output[0]}")
+                shell("mv {srr}_2.fastq.gz {output[1]}")
+            else:
+                shell(
+                    "fastq-dump {srr} -Z {extra} 2> {log} | gzip -c > {output[0]}.tmp"
+                )
+                shell("mv {output[0]}.tmp {output[0]}")
diff --git a/workflows/chipseq/chipseq_trackhub.py b/workflows/chipseq/chipseq_trackhub.py
index e2bf9ecb9..80637a282 100644
--- a/workflows/chipseq/chipseq_trackhub.py
+++ b/workflows/chipseq/chipseq_trackhub.py
@@ -11,7 +11,6 @@
 
 import os
 import sys
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
 import re
 import argparse
 from pprint import pprint
@@ -24,8 +23,8 @@
 from trackhub.helpers import filter_composite_from_subgroups, dimensions_from_subgroups, hex2rgb
 from trackhub.upload import upload_hub, stage_hub
 
-from lib import chipseq
-from lib.patterns_targets import ChIPSeqConfig
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
+from lib import chipseq, utils
 
 ap = argparse.ArgumentParser()
 ap.add_argument('config', help='Main config.yaml file')
@@ -39,6 +38,8 @@
 # details
 config = yaml.load(open(args.config), Loader=yaml.FullLoader)
 
+peaks_dir = "data/chipseq_peaks"
+
 if args.additional_configs:
     for cfg in args.additional_configs:
         update_config(config, yaml.load(open(cfg), Loader=yaml.FullLoader))
@@ -53,10 +54,8 @@
     genome=hub_config['hub']['genome']
 )
 
-c = ChIPSeqConfig(config, os.path.join(os.path.dirname(args.config), 'chipseq_patterns.yaml'))
-
 # Set up subgroups based on unique values from columns specified in the config
-df = pandas.read_csv(config['sampletable'], comment='#', sep='\t')
+df = utils.prepare_chipseq_sampletable(config)
 cols = hub_config['subgroups']['columns']
 subgroups = []
 for col in cols:
@@ -81,9 +80,8 @@
 subgroups.append(
     SubGroupDefinition(
         name='algorithm', label='algorithm', mapping={
-            'macs2': 'macs2',
-            'spp': 'spp',
-            'sicer': 'sicer',
+            'macs': 'macs',
+            'epic2': 'epic2',
             'NA': 'NA',
         }))
 
@@ -146,8 +144,7 @@ def decide_color(samplename):
 
 for label in df['label'].unique():
 
-    # ASSUMPTION: bigwig filename pattern
-    bigwig = c.patterns['bigwig'].format(label=label)
+    bigwig = f"data/chipseq_merged/{label}/{label}.cutadapt.unique.nodups.bam.bigwig"
 
     subgroup = df[df.loc[:, 'label'] == label].to_dict('records')[0]
     subgroup = {
@@ -213,14 +210,14 @@ def decide_color(samplename):
 
     # ASSUMPTION: BED filename pattern
     bed_filename = os.path.join(
-        config['peaks_dir'],
+        peaks_dir,
         algorithm,
         label,
         'peaks.bed')
 
     # ASSUMPTION: bigBed filename pattern
     bigbed_filename = os.path.join(
-        config['peaks_dir'],
+        peaks_dir,
         algorithm,
         label,
         'peaks.bigbed')
@@ -246,7 +243,7 @@ def decide_color(samplename):
 
     if algorithm == "sicer":
         subgroup['peaks'] = 'no'
-        prefilter_wig = glob.glob(os.path.join(config['peaks_dir'],
+        prefilter_wig = glob.glob(os.path.join(peaks_dir,
                                                algorithm,
                                                label,
                                                '*prefilter.bigWig'))
@@ -254,7 +251,7 @@ def decide_color(samplename):
             prefilter_wig = prefilter_wig[0]
         else:
             raise ValueError('SICER output for {0} has no prefilter bigWig file'.format(label))
-        postfilter_wig = glob.glob(os.path.join(config['peaks_dir'],
+        postfilter_wig = glob.glob(os.path.join(peaks_dir,
                                                 algorithm,
                                                 label,
                                                 '*postfilter.bigWig'))
diff --git a/workflows/chipseq/config/chipseq_patterns.yaml b/workflows/chipseq/config/chipseq_patterns.yaml
deleted file mode 100644
index 3e44107a3..000000000
--- a/workflows/chipseq/config/chipseq_patterns.yaml
+++ /dev/null
@@ -1,67 +0,0 @@
-patterns_by_sample:
-
-  fastq: 'data/chipseq_samples/{sample}/{sample}_R{n}.fastq.gz'
-  cutadapt: 'data/chipseq_samples/{sample}/{sample}_R{n}.cutadapt.fastq.gz'
-  bam: 'data/chipseq_samples/{sample}/{sample}.cutadapt.bam'
-
-  fastqc:
-    raw: 'data/chipseq_samples/{sample}/fastqc/{sample}_R1.fastq.gz_fastqc.zip'
-    cutadapt: 'data/chipseq_samples/{sample}/fastqc/{sample}_R1.cutadapt.fastq.gz_fastqc.zip'
-    bam: 'data/chipseq_samples/{sample}/fastqc/{sample}.cutadapt.unique.nodups.bam_fastqc.zip'
-
-  libsizes:
-    fastq:   'data/chipseq_samples/{sample}/{sample}_R1.fastq.gz.libsize'
-    cutadapt: 'data/chipseq_samples/{sample}/{sample}_R1.cutadapt.fastq.gz.libsize'
-    bam:     'data/chipseq_samples/{sample}/{sample}.cutadapt.bam.libsize'
-    unique:     'data/chipseq_samples/{sample}/{sample}.cutadapt.unique.bam.libsize'
-    nodups: 'data/chipseq_samples/{sample}/{sample}.cutadapt.unique.nodups.bam.libsize'
-
-  fastq_screen: 'data/chipseq_samples/{sample}/{sample}.cutadapt.screen.txt'
-  libsizes_table: 'data/chipseq_aggregation/libsizes_table.tsv'
-  libsizes_yaml: 'data/chipseq_aggregation/libsizes_table_mqc.yaml'
-  multiqc: 'data/chipseq_aggregation/multiqc.html'
-  unique: 'data/chipseq_samples/{sample}/{sample}.cutadapt.unique.bam'
-
-  markduplicates:
-    bam: 'data/chipseq_samples/{sample}/{sample}.cutadapt.unique.nodups.bam'
-    bai: 'data/chipseq_samples/{sample}/{sample}.cutadapt.unique.nodups.bam.bai'
-    metrics: 'data/chipseq_samples/{sample}/{sample}.cutadapt.unique.nodups.bam.metrics'
-
-  merged_techreps: 'data/chipseq_merged/{label}/{label}.cutadapt.unique.nodups.merged.bam'
-
-  bigwig: 'data/chipseq_merged/{label}/{label}.cutadapt.unique.nodups.bam.bigwig'
-
-  fingerprint:
-    plot: 'data/chipseq_aggregation/fingerprints/{ip_label}/{ip_label}_fingerprint.png'
-    raw_counts: 'data/chipseq_aggregation/fingerprints/{ip_label}/{ip_label}_fingerprint.tab'
-    metrics: 'data/chipseq_aggregation/fingerprints/{ip_label}/{ip_label}_fingerprint.metrics'
-
-  multibigwigsummary:
-    npz: 'data/chipseq_aggregation/deeptools/multibigwigsummary_matrix.npz'
-    tab: 'data/chipseq_aggregation/deeptools/multibigwigsummary.tab'
-
-  plotcorrelation:
-    tab: 'data/chipseq_aggregation/deeptools/plotcorrelation.tab'
-    heatmap: 'data/chipseq_aggregation/deeptools/correlation_heatmap.png'
-
-  collectinsertsizemetrics:
-    pdf: 'data/chipseq_samples/{sample}/{sample}.cutadapt.unique.nodups.bam.collectinsertsizemetrics.pdf'
-    metrics: 'data/chipseq_samples/{sample}/{sample}.cutadapt.unique.nodups.bam.collectinsertsizemetrics.metrics'
-
-  samtools:
-    idxstats: 'data/rnaseq_samples/{sample}/idxstat_{sample}.txt'
-
-patterns_by_peaks:
-  peaks:
-    macs2: 'data/chipseq_peaks/macs2/{macs2_run}/peaks.bed'
-    spp: 'data/chipseq_peaks/spp/{spp_run}/peaks.bed'
-    sicer: 'data/chipseq_peaks/sicer/{sicer_run}/peaks.bed'
-    epic2: 'data/chipseq_peaks/epic2/{epic2_run}/peaks.bed'
-  bigbed:
-    macs2: 'data/chipseq_peaks/macs2/{macs2_run}/peaks.bigbed'
-    spp: 'data/chipseq_peaks/spp/{spp_run}/peaks.bigbed'
-    sicer: 'data/chipseq_peaks/sicer/{sicer_run}/peaks.bigbed'
-    epic2: 'data/chipseq_peaks/epic2/{epic2_run}/peaks.bigbed'
-
-patterns_by_aggregate:
-  merged_bigwig: 'data/chipseq_aggregation/merged_bigwigs/{merged_bigwig_label}.bigwig'
diff --git a/workflows/chipseq/config/config.yaml b/workflows/chipseq/config/config.yaml
index 591fe13b2..d20f6e36e 100644
--- a/workflows/chipseq/config/config.yaml
+++ b/workflows/chipseq/config/config.yaml
@@ -1,28 +1,8 @@
-# NOTE: all paths are relative to the calling Snakefile.
-#
-# sampletable: TSV file defining sample metadata.
-# First column must have header name "samplename".
 sampletable: 'config/sampletable.tsv'
 
-# Which key in the `references` dict below to use
-organism: 'dmel'
-
-# What reference genome used
-# Check the assembly in https://www.ncbi.nlm.nih.gov/datasets/genome/
-# options:
-# - 'mm10' for mouse
-# - 'hg38' or 'hg19' for human
-# - 'dm6' for drosophila
-# - 'danRer11' for zebrafish
-# - 'sacCer3' for yeast
-# - 'rn6' for rat
-# genome: 'dm6'
-
-# If not specified here, use the environment variable REFERENCES_DIR.
-references_dir: 'references_data'
-
-peaks_dir: 'data/chipseq_peaks'
-
+genome:
+  url: "https://raw.githubusercontent.com/lcdb/lcdb-test-data/master/data/seq/dm6.small.fa"
+  postprocess: 'lib.utils.gzipped'
 
 chipseq:
   # The peak_calling section is a list of dicts, each one defining a single
@@ -39,36 +19,13 @@ chipseq:
   # merging step of the workflow merges and de-dupes appropriately so that the
   # peak callers only see BAMs with all duplicates removed.
   #
-  # The "extra" block is used to pass extra information to the peak-caller in
-  # a run-specific manner. Check the wrapper README for details on this. For
-  # example, the macs2 wrapper passes `extra` verbatim to the command line, but
-  # the spp wrapper handles things differently.
-  #
   # Each wrapper is built to accept either single or multiple BAMs and output
   # at least a BED file of peaks.
   #
   peak_calling:
-    - label: gaf-embryo-sicer
-      algorithm: sicer
-      ip:
-        - gaf-embryo-1
-      control:
-        - input-embryo-1
-      redundancy_threshold: 1
-      window_size: 200
-      fragment_size: 150
-      # optional user-specified override mappable genome proportion if
-      # specified here, SICER will use this value instead of the value specific
-      # to the genome build if NOT specified here, SICER will use the
-      # mappability value for your genome build
-      effective_genome_fraction: 0.75
-      genome_build: dm6
-      gap_size: 600
-      fdr: 0.01
-
 
     - label: gaf-embryo-1
-      algorithm: macs2
+      algorithm: macs
       ip:
         - gaf-embryo-1
       control:
@@ -80,25 +37,8 @@ chipseq:
       effective_genome_count: 7e7
       extra: '--nomodel --extsize 147'
 
-    - label: gaf-embryo-1
-      algorithm: spp
-      ip:
-        - gaf-embryo-1
-      control:
-        - input-embryo-1
-      extra:
-        fdr: 0.3
-        zthr: 4
-
-    - label: gaf-embryo-1-defaults
-      algorithm: spp
-      ip:
-        - gaf-embryo-1
-      control:
-        - input-embryo-1
-
     - label: gaf-wingdisc-pooled
-      algorithm: macs2
+      algorithm: macs
       ip:
         - gaf-wingdisc-1
         - gaf-wingdisc-2
@@ -107,18 +47,6 @@ chipseq:
         - input-wingdisc-2
       extra: '--nomodel --extsize 147'
 
-    - label: gaf-wingdisc-pooled
-      algorithm: spp
-      ip:
-        - gaf-wingdisc-1
-        - gaf-wingdisc-2
-      control:
-        - input-wingdisc-1
-        # - input-wingdisc-2
-      extra:
-        fdr: 0.5
-        zthr: 4
-
     - label: gaf-wingdisc-pooled-1
       algorithm: epic2
       ip:
@@ -142,29 +70,3 @@ chipseq:
         - gaf-wingdisc-2
       control: []
       extra: ''
-
-fastq_screen:
-  - label: rRNA
-    organism: dmel
-    tag: test
-  - label: Fly
-    organism: dmel
-    tag: test
-
-merged_bigwigs:
-  input-wingdisc:
-    - input-wingdisc-1
-    - input-wingdisc-2
-  gaf-wingdisc:
-    - gaf-wingdisc-1
-    - gaf-wingdisc-2
-  gaf-embryo:
-    - gaf-embryo-1
-
-aligner:
-  index: 'bowtie2'
-  tag: 'test'
-
-include_references:
-  - '../../include/reference_configs/Drosophila_melanogaster.yaml'
-  - '../../include/reference_configs/test.yaml'
diff --git a/workflows/chipseq/config/multiqc_config.yaml b/workflows/chipseq/config/multiqc_config.yaml
index e548ad16c..30c59ac5b 100644
--- a/workflows/chipseq/config/multiqc_config.yaml
+++ b/workflows/chipseq/config/multiqc_config.yaml
@@ -14,15 +14,12 @@ extra_fn_clean_exts:
   - '.salmon'
   - '_R1'
   - '_R2'
+  - '_samtools'
 
 
-# Modify the module search patterns to match what we're creating in the
-# workflow.
-#
-# See http://multiqc.info/docs/#module-search-patterns
-sp:
-  fastq_screen:
-    fn: '*.screen.txt'
+fn_ignore_files:
+  - '*.merged.bam.metrics'
+
 
 # Set the module order to reflect the order of the workflow. Note that here
 # we're also running the FastQC module multiple times, and putting them in
@@ -35,19 +32,35 @@ sp:
 # See http://multiqc.info/docs/#order-of-modules
 module_order:
     - fastqc:
-        name: 'FastQC (raw)'
+        name: 'FastQC (raw R1)'
         path_filters_exclude:
             - '*.cutadapt.fastq.gz_fastqc.zip'
+            - '*_R2.*'
         path_filters:
-            - '*.fastq.gz_fastqc.zip'
+            - '*_R1.fastq.gz_fastqc.zip'
+    - fastqc:
+        name: 'FastQC (raw R2)'
+        path_filters_exclude:
+            - '*.cutadapt.fastq.gz_fastqc.zip'
+            - '*_R1.*'
+        path_filters:
+            - '*_R2.fastq.gz_fastqc.zip'
     - libsizes_table
     - cutadapt
     - fastqc:
-        name: 'FastQC (trimmed)'
+        name: 'FastQC (trimmed R1)'
+        target: ''
+        path_filters_exclude:
+            - '*_R2.*'
+        path_filters:
+            - '*_R1.cutadapt.fastq.gz_fastqc.zip'
+    - fastqc:
+        name: 'FastQC (trimmed R2)'
         target: ''
+        path_filters_exclude:
+            - '*_R1.*'
         path_filters:
-            - '*.cutadapt.fastq.gz_fastqc.zip'
-    - fastq_screen
+            - '*_R2.cutadapt.fastq.gz_fastqc.zip'
     - bowtie2
     - fastqc:
         name: 'FastQC (aligned, unique, nodups)'
@@ -62,15 +75,23 @@ module_order:
 #
 # See http://multiqc.info/docs/#customising-tables
 table_columns_placement:
-  FastQC (raw):
+  FastQC (raw R1):
     total_sequences: 20
     percent_duplicates: 30
     percent_gc: 40
-  FastQC (trimmed):
+  FastQC (raw R2):
     total_sequences: 21
     percent_duplicates: 31
     percent_gc: 41
-  FastQC (aligned, unique, nodups):
+  FastQC (trimmed R1):
     total_sequences: 22
     percent_duplicates: 32
     percent_gc: 42
+  FastQC (trimmed R2):
+    total_sequences: 23
+    percent_duplicates: 33
+    percent_gc: 43
+  FastQC (aligned, unique, nodups):
+    total_sequences: 24
+    percent_duplicates: 34
+    percent_gc: 44
diff --git a/workflows/chipseq/config/sampletable.tsv b/workflows/chipseq/config/sampletable.tsv
index 05212460d..bb7e78314 100644
--- a/workflows/chipseq/config/sampletable.tsv
+++ b/workflows/chipseq/config/sampletable.tsv
@@ -1,11 +1,11 @@
 # Samplenames with the same "label" will be considered technical replicates
-samplename	antibody	biological_material	replicate	label	orig_filename
-input_1	input	wingdisc-1	1	input-wingdisc-1	data/example_data/chipseq_input1.fq.gz
-input_2	input	wingdisc-2	2	input-wingdisc-2	data/example_data/chipseq_input2.fq.gz
-ip_1	gaf	wingdisc-1	1	gaf-wingdisc-1	data/example_data/chipseq_ip1.fq.gz
-ip_2	gaf	wingdisc-2	2	gaf-wingdisc-2	data/example_data/chipseq_ip2.fq.gz
-
+samplename	label	antibody	biological_material	replicate	orig_filename
+input-wingdisc-1		input	wingdisc-1	1	data/example_data/chipseq_input1.fq.gz
+input-wingdisc-2		input	wingdisc-2	2	data/example_data/chipseq_input2.fq.gz
+gaf-wingdisc-1		gaf	wingdisc-1	1	data/example_data/chipseq_ip1.fq.gz
+gaf-wingdisc-2		gaf	wingdisc-2	2	data/example_data/chipseq_ip2.fq.gz
+                                                            
 # Note here we are treating ip_3 and ip_4 as technical replicates for the sake of testing
-ip_3	gaf	embryo-1	1	gaf-embryo-1	data/example_data/chipseq_ip3.fq.gz
-ip_4	gaf	embryo-1	1	gaf-embryo-1	data/example_data/chipseq_ip4.fq.gz
-input_3	input	embryo-1	1	input-embryo-1	data/example_data/chipseq_input3.fq.gz
+ip_3	gaf-embryo-1	gaf	embryo-1	1	data/example_data/chipseq_ip3.fq.gz
+ip_4	gaf-embryo-1	gaf	embryo-1	1	data/example_data/chipseq_ip4.fq.gz
+input-embryo-1		input	embryo-1	1	data/example_data/chipseq_input3.fq.gz
diff --git a/workflows/colocalization/Snakefile b/workflows/colocalization/Snakefile
deleted file mode 100644
index ac0b0413f..000000000
--- a/workflows/colocalization/Snakefile
+++ /dev/null
@@ -1,264 +0,0 @@
-import sys
-sys.path.insert(0, srcdir('../..'))
-import os
-from textwrap import dedent
-import yaml
-import tempfile
-import pandas as pd
-from lib import helpers, aligners
-from lib import utils
-from lib import common
-from lib.patterns_targets import RNASeqConfig, ChIPSeqConfig
-import os
-from snakemake.utils import makedirs
-import pandas
-import yaml
-import numpy as np
-
-configfile: 'config/config.yaml'
-
-chipseq_config = ChIPSeqConfig('config/config.yaml', 'config/chipseq_patterns.yaml', workdir='../chipseq')
-
-subworkflow chipseq:
-    configfile: chipseq_config.path
-    workdir: '../chipseq'
-
-subworkflow references:
-    configfile: chipseq_config.path
-    workdir: '../chipseq'
-
-subworkflow external:
-    workdir: '../external'
-
-chipseq_refdict, chipseq_args = common.references_dict(chipseq_config.config)
-
-# The rule to create the chromsizes file is in the references workflow; the
-# path to it can be determined from the config file (though it is awkwardly
-# nested)
-chromsizes = references(
-    chipseq_refdict[
-        chipseq_config.config['organism']
-    ][
-        chipseq_config.config['aligner']['tag']
-    ]['chromsizes']
-)
-
-# In the existing config file, we assume that all BED files are from the
-# `external` workflow.
-
-for k, v in config['beds'].items():
-    config['beds'][k] = external(v)
-
-# If ADD_CHIPSEQ_PEAKS is True, we will addn all the called peaks to the bed
-# files to check for colocalization.
-ADD_CHIPSEQ_PEAKS = True
-# ADD_CHIPSEQ_PEAKS = False  # [TEST SETTINGS -1]
-
-if ADD_CHIPSEQ_PEAKS:
-    peaks = chipseq(utils.flatten(chipseq_config.targets['peaks']))
-    for fn in peaks:
-        toks = fn.split('/')
-        peakcaller = toks[-3]
-        label = toks[-2]
-        key = peakcaller + '_' + label
-        config['beds'][key] = fn
-
-
-# Number of shufflings for GAT
-# N = 100 [TEST_SETTINGS +1]
-N = 10000
-
-targets = expand(
-    '{outdir}/{algorithm}/{domain}/{query}/{query}_vs_{reference}.txt',
-    outdir=config['output'],
-    domain=config['domains'].keys(),
-    query=config['beds'].keys(),
-    reference=config['beds'].keys(),
-    algorithm=['IntervalStats', 'GAT', 'jaccard', 'fisher'],
-)
-
-# Currently-supported options {algorithm: (possible values)}
-# IntervalStats: (f_05, f_01, f_001)
-# GAT: (l2fold, fractions)
-# jaccard: (jaccard)
-# fisher: (pval)
-pattern = '{outdir}/{algorithm}/{domain}/{value}_heatmap.pdf'
-targets += expand(pattern, outdir=config['output'], domain=config['domains'],
-                  algorithm='IntervalStats', value=['f_01'])
-targets += expand(pattern, outdir=config['output'], domain=config['domains'],
-                  algorithm='GAT', value=['l2fold'])
-targets += expand(pattern, outdir=config['output'], domain=config['domains'],
-                  algorithm='jaccard', value=['jaccard'])
-targets += expand(pattern, outdir=config['output'], domain=config['domains'],
-                  algorithm='fisher', value=['pval'])
-
-rule targets:
-    input: targets
-
-
-rule sorted_chromsizes:
-    input: chromsizes
-    output: os.path.join(config['output'], config['organism'] + '.sorted.chromsizes')
-    shell:
-        'sort -k1,1 {input} > {output}'
-
-rule chromsizes_bed:
-    input: rules.sorted_chromsizes.output
-    output: os.path.join(config['output'], config['organism'] + '.bed')
-    shell:
-        """awk '{{OFS="\\t"; print $1,"0",$2}}' {input} > {output}"""
-
-
-rule jaccard:
-    input:
-        domain=lambda wc: config['domains'][getattr(wc, 'domain')],
-        query=lambda wc: config['beds'][getattr(wc, 'query')],
-        reference=lambda wc: config['beds'][getattr(wc, 'reference')],
-        chromsizes=rules.sorted_chromsizes.output
-    output: '{outdir}/jaccard/{domain}/{query}/{query}_vs_{reference}.txt'
-    shell:
-        """
-        bedtools intersect -a {input.query} -b {input.domain} | sort -k1,1 -k2n  > {output}.query.jaccard
-        bedtools intersect -a {input.reference} -b {input.domain} | sort -k1,1 -k2n  > {output}.reference.jaccard
-        bedtools jaccard -a {output}.query.jaccard -b {output}.reference.jaccard -g {input.chromsizes} > {output}
-        rm {output}.query.jaccard {output}.reference.jaccard
-        """
-
-
-rule fisher:
-    input:
-        domain=lambda wc: config['domains'][getattr(wc, 'domain')],
-        query=lambda wc: config['beds'][getattr(wc, 'query')],
-        reference=lambda wc: config['beds'][getattr(wc, 'reference')],
-        chromsizes=rules.sorted_chromsizes.output
-    output: '{outdir}/fisher/{domain}/{query}/{query}_vs_{reference}.txt'
-    shell:
-        """
-        bedtools intersect -a {input.query} -b {input.domain} | sort -k1,1 -k2n > {output}.query.fisher
-        bedtools intersect -a {input.reference} -b {input.domain} | sort -k1,1 -k2n > {output}.reference.fisher
-        bedtools fisher -a {output}.query.fisher -b {output}.reference.fisher -g {input.chromsizes} > {output}
-        rm {output}.query.fisher {output}.reference.fisher
-        """
-
-
-rule intervalstats:
-    input:
-        domain=lambda wc: config['domains'][getattr(wc, 'domain')],
-        query=lambda wc: config['beds'][getattr(wc, 'query')],
-        reference=lambda wc: config['beds'][getattr(wc, 'reference')],
-    output: '{outdir}/IntervalStats/{domain}/{query}/{query}_vs_{reference}.txt'
-    log: '{outdir}/IntervalStats/{domain}/{query}/{query}_vs_{reference}.log'
-    run:
-        if input.query == input.reference:
-            run_self = '--self'
-        else:
-            run_self = ''
-        shell(
-            'IntervalStats '
-            '--query {input.query} '
-            '--reference {input.reference} '
-            '--output {output}.full '
-            '--domain {input.domain} '
-            '{run_self} &> {log}'
-        )
-
-        # Summarize the output into a faster-to-parse file used by downstream
-        # analysis code.
-        #
-        # Output has columns:
-        #
-        # - n_{05,01,001}: number of significant associations at {0.05, 0.01,
-        #   0.001} respectively
-        #
-        # - f_{05,01,001}: fraction of total that are signficant
-        #
-        # - n: number of features
-        #
-        # - query, reference: labels
-        #
-        # - filename: "all" filename containing the details in case anything
-        #   needs re-calculation.
-        _df = pandas.read_table(
-            str(output[0]) + '.full',
-            names=['query', 'closest_ref', 'length', 'distance',
-                   'numerator', 'denominator', 'pval'])
-
-        n = float(len(_df))
-
-        def frac(x):
-            if n == 0:
-                return np.nan
-            return x / n
-
-        n_05 = sum(_df.pval < 0.05)
-        n_01 = sum(_df.pval < 0.01)
-        n_001 = sum(_df.pval < 0.001)
-        f_05 = frac(n_05)
-        f_01 = frac(n_01)
-        f_001 = frac(n_001)
-
-        df = pandas.DataFrame(
-            [
-                dict(
-                    query=wildcards.query,
-                    filename=str(output[0]) + '.full',
-                    reference=wildcards.reference,
-                    n=float(n),
-                    n_05=n_05,
-                    n_01=n_01,
-                    n_001=n_001,
-                    f_05=f_05,
-                    f_01=f_01,
-                    f_001=f_001,
-                )
-            ]
-        )
-        df.to_csv(str(output[0]), sep='\t', index=False)
-
-
-rule gat:
-    input:
-        domain=lambda wc: config['domains'][getattr(wc, 'domain')],
-        query=lambda wc: config['beds'][getattr(wc, 'query')],
-        reference=lambda wc: config['beds'][getattr(wc, 'reference')],
-    output: '{outdir}/GAT/{domain}/{query}/{query}_vs_{reference}.txt'
-    run:
-        shell('cut -f1,2,3 {input.query} > {output}.query.tmp')
-        shell('cut -f1,2,3 {input.reference} > {output}.reference.tmp')
-        if os.stat(output[0] + '.query.tmp').st_size == 0:
-            shell('touch {output}')
-        else:
-            shell(
-                'gat-run.py '
-                '--ignore-segment-tracks '
-                '--annotations {output}.reference.tmp '
-                '--segments {output}.query.tmp '
-                '--workspace {input.domain} '
-                '--counter nucleotide-overlap '
-                '--num-samples {N} '
-                '--output-counts-pattern {output}.%s.counts '
-                '--log {output}.log '
-                '--stdout {output} '
-            )
-        shell('rm {output}.query.tmp {output}.reference.tmp')
-
-
-rule heatmap:
-    input:
-        expand(
-            '{{outdir}}/{{algorithm}}/{{domain}}/{query}/{query}_vs_{reference}.txt',
-            query=list(config['beds'].keys()),
-            reference=list(config['beds'].keys())
-        )
-    output:
-        '{outdir}/{algorithm}/{domain}/{value}_heatmap.pdf'
-
-    shell:
-        'python scripts/colocalization_heatmap.py '
-        '--domain {wildcards.domain} '
-        '--algorithm {wildcards.algorithm} '
-        '--value {wildcards.value} '
-        '--outdir {config[output]} '
-        '--output {output}'
-
-# vim: ft=python
diff --git a/workflows/colocalization/config/config.yaml b/workflows/colocalization/config/config.yaml
deleted file mode 100644
index 407347049..000000000
--- a/workflows/colocalization/config/config.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-beds:
-  # from the external workflow
-  SuHw_Kc: data/suhw_kc.bed
-  CTCF_Kc: data/ctcf_kc.bed
-domains:
-  dm6: results/dm6.bed
-output: results
-organism: dm6
diff --git a/workflows/colocalization/run_test.sh b/workflows/colocalization/run_test.sh
deleted file mode 100755
index 7aacb413c..000000000
--- a/workflows/colocalization/run_test.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-set -e
-python -m doctest ../../ci/preprocessor.py
-python ../../ci/preprocessor.py Snakefile > Snakefile.test && snakemake -s Snakefile.test "$@"
diff --git a/workflows/colocalization/scripts/colocalization_heatmap.py b/workflows/colocalization/scripts/colocalization_heatmap.py
deleted file mode 100644
index b337fbb40..000000000
--- a/workflows/colocalization/scripts/colocalization_heatmap.py
+++ /dev/null
@@ -1,267 +0,0 @@
-import matplotlib
-matplotlib.use('agg')
-import os
-import glob
-import pandas as pd
-import numpy as np
-import seaborn as sns
-from scipy.spatial import distance
-from scipy.cluster import hierarchy
-from matplotlib import pyplot as plt
-import argparse
-
-ap = argparse.ArgumentParser()
-ap.add_argument('--domain')
-ap.add_argument('--algorithm')
-ap.add_argument('--value')
-ap.add_argument('--outdir')
-ap.add_argument('--output')
-args = ap.parse_args()
-
-domain = args.domain
-algorithm = args.algorithm
-value = args.value
-outdir = args.outdir
-output = args.output
-
-
-def dataframe_for_domain(domain, algorithm):
-    """
-    Read all files within a directory and build the dataframe.
-
-    Empty files are listed as NaNs in the dataframe.
-    """
-    df = []
-    files = glob.glob(os.path.join(outdir, algorithm, domain, '*', '*.txt'))
-    for filename in files:
-        query, reference = os.path.basename(filename).replace('.txt', '').split('_vs_')
-        try:
-            _df = pd.read_csv(filename, comment='#', sep='\t')
-        except pd.errors.EmptyDataError:
-            _df = pd.DataFrame([dict(value=np.nan)])
-
-        _df['query'] = query
-        _df['reference'] = reference
-        df.append(
-            _df.iloc[0].to_dict()
-        )
-    return pd.DataFrame(df)
-
-
-# Cluster methods
-METRIC = 'correlation'
-METHOD = 'average'
-
-
-def dataframe_for_value(domain, algorithm, value):
-
-    df = dataframe_for_domain(domain, algorithm)
-
-    vmin, vmax = None, None
-
-    # For IntervalStats, Use the "fraction of intervals with p<0.01" as the
-    # value.
-    #
-    # These are all positive values. NaNs are set to 0, and the diagonal is
-    # set to 1.0 (i.e., 100% of intervals are significant with respect to
-    # each other)
-    if algorithm == 'IntervalStats':
-        piv = df.pivot(index='query', columns='reference', values=value)
-        fill_piv = piv.fillna(0)
-        vmax = fill_piv.max().max()
-        np.fill_diagonal(fill_piv.values, 1)
-        units = 'fraction pvals < 0.%s' % (value.split('_')[-1])
-        title = 'IntervalStats'
-
-    # For GAT log2foldchange, set anything with qval > 0.05 to
-    # logfoldchange = 0. Diagonal is filled with 0 (log2foldchange of 1).
-    # NaNs are also set to 0.
-    elif algorithm == 'GAT' and value == 'l2fold':
-        piv = df.pivot(index='query', columns='reference', values='l2fold')
-
-        # used for checking
-        mask = df.pivot(index='query', columns='reference',
-                        values='qvalue')
-        title = 'GAT foldchange'
-        piv[mask > 0.05] = 0
-        piv = piv.fillna(0)
-        fill_piv = piv
-        np.fill_diagonal(fill_piv.values, 0)
-        units = 'log2fold'
-
-    # For GAT fractions, we set the upper and lower triangles of the matrix
-    # to the "track" and "annotation" overlaps in GAT terminology. We also
-    # get a significance value here (qval) so we set the fraction overlap
-    # to zero for anything with qval > 0.05.
-    elif algorithm == 'GAT' and value == 'fractions':
-        segment_frac = df.pivot(index='query', columns='reference',
-                                values='percent_overlap_size_track')
-        annotation_frac = df.pivot(index='query', columns='reference',
-                                   values='percent_overlap_size_annotation')
-        mask = df.pivot(index='query', columns='reference', values='qvalue')
-        piv = segment_frac
-        lower_tri_mask = np.ones(piv.shape, dtype='bool')
-        lower_tri_mask[np.tril_indices(len(piv))] = False
-        piv[lower_tri_mask] = annotation_frac[lower_tri_mask]
-        piv[mask > 0.05] = 0
-        fill = 0
-        fill_piv = piv
-        units = 'percentage overlap'
-        title = 'GAT percentage nucleotide overlap'
-
-    # For fisher, we want to plot the -log10(two-tail pval).
-    #
-    # So we keep track of the ratio, flip pvals where ratio <1, and replace
-    # inf and -inf with the otherwise max and min values respectively. NaNs
-    # are given a -log10(pval) = 0 (so a pval of 1.0).
-    elif algorithm == 'fisher' and value == 'pval':
-        piv = df.pivot(index='query', columns='reference',
-                       values='two-tail')
-        mask_left = df.pivot(index='query', columns='reference',
-                             values='left')
-        mask_right = df.pivot(index='query', columns='reference',
-                              values='right')
-        mask_ratio = df.pivot(index='query', columns='reference',
-                              values='ratio')
-        flip = mask_ratio < 1
-        piv = -np.log10(piv)
-        piv[flip] *= -1
-        mx = piv.replace([np.inf], 0).max().max()
-        mn = piv.replace([-np.inf], 0).min().min()
-        piv = piv.replace([np.inf], mx)
-        piv = piv.replace([-np.inf], mn)
-        fill_piv = piv.fillna(0)
-        units = '-log10(pval)'
-        title = 'Fisher'
-
-    ####################################################
-    # TODO: also plot fisher ratio
-    ####################################################
-
-    # For jaccard, we plot the value directly. While the value can range
-    # [0, 1], in practice we rarely find such good overlap.
-    elif algorithm == 'jaccard' and value == 'jaccard':
-        piv = df.pivot(index='query', columns='reference', values='jaccard')
-        fill_piv = piv
-        units = 'Jaccard statistic'
-        vmin, vmax = (0, .3)
-        title = 'Jaccard'
-
-    return dict(
-      fill_piv=fill_piv,
-      vmin=vmin,
-      vmax=vmax,
-      units=units,
-      title=title
-    )
-
-
-def plot_heatmap(fill_piv, vmin, vmax, title, units, metric='euclidean',
-                 method='average', idx=None, clustermap_kwargs=dict()):
-    """
-    Plot a clustered heatmap of the provided values. Rows are clustered
-    identically as columns so that the diagonal represents the self-self
-    comparisons.
-
-    Parameters
-    ----------
-
-    fill_piv : pandas.DataFrame
-        A prepared dataframe where rownames == colnames and where -inf, inf,
-        and NaN have been filled in with finite values.
-
-    vmin, vmax : float
-        Colormap limits. NOT CURRENTLY USED.
-
-    title : str
-        Title for plot
-
-    units : str
-        Units to use in colorbar
-
-    metric : str
-        Clustering metric. See `scipy.distance` for available options.
-
-    method : clustering method
-        Hierarchical clustering linkage method. See `scipy.hierarchy` for
-        available options.
-
-    idx : None or index
-        If not None, then this index is used to subset `fill_piv`.
-
-    clustermap_kwargs : dict
-        Additional arguments passed to seaborn.clustermap.
-    """
-
-
-    fill_piv = fill_piv.astype(float)
-    # subset if requested
-    if idx is not None:
-        fill_piv = fill_piv.loc[idx, idx]
-
-    # Distance matrix, setting NaN to zero if necessary
-    dist = distance.pdist(fill_piv.values, metric=metric)
-    dist[np.isnan(dist)] = 0
-    dist[dist < 0] = 0
-
-    # ward actually uses values directly rather than using the distance matrix.
-    if method == 'ward':
-        vals = fill_piv.values
-    else:
-        vals = dist
-
-    # Here we compute the row linkage and provide that to sns.clustermap as
-    # both row and column linkages so that the same clustering is used. This
-    # gets us the self-self colocalization on the diagonal.
-    row_linkage = hierarchy.linkage(vals, method=method)
-
-    # catch and fix errors in dendrogram before sending to clustermap
-    mx = row_linkage[np.isfinite(row_linkage)].max()
-    mn = row_linkage[np.isfinite(row_linkage)].min()
-    # row_linkage[np.isinf(row_linkage)] = mx
-    # scipy.clip(row_linkage, 0, mx, row_linkage)
-    ind = hierarchy.dendrogram(row_linkage, no_plot=True)['leaves']
-
-
-    a = sns.clustermap(fill_piv, row_linkage=row_linkage,
-                       col_linkage=row_linkage, **clustermap_kwargs)
-
-    # Fix labels
-    for txt in a.ax_heatmap.get_xticklabels():
-        txt.set_rotation(90)
-    for txt in a.ax_heatmap.get_yticklabels():
-        txt.set_rotation(0)
-
-    # Use the provided units to label the colorbar
-    a.cax.set_ylabel(units)
-
-    # Add figure-level title and tweak margins.
-    fig = plt.gcf()
-    fig.suptitle(title, weight='bold', size=20)
-    fig.subplots_adjust(right=0.8, bottom=0.2)
-    return a
-
-
-v = dataframe_for_value(domain, algorithm, value)
-
-if (v['fill_piv'] < 0).values.any() & (v['fill_piv'] > 0).values.any():
-    center = 0
-    cmap = 'RdBu_r'
-else:
-    center = None
-    cmap = sns.cubehelix_palette(as_cmap=True)
-
-
-fig = plot_heatmap(
-  fill_piv=v['fill_piv'],
-  vmin=v['vmin'],
-  vmax=v['vmax'],
-  title=v['title'],
-  units=v['units'],
-  metric='euclidean',
-  method='average',
-  idx=None,
-  clustermap_kwargs=dict(center=center, cmap=cmap)
-)
-
-fig.savefig(output)
diff --git a/workflows/colocalization/scripts/heatmap_env.yaml b/workflows/colocalization/scripts/heatmap_env.yaml
deleted file mode 100644
index 668a0d762..000000000
--- a/workflows/colocalization/scripts/heatmap_env.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-channels:
-  - conda-forge
-dependencies:
-  - matplotlib
-  - pandas
-  - seaborn
-  - scipy
diff --git a/workflows/external/Snakefile b/workflows/external/Snakefile
index 9f8308c9d..79c3d1e2a 100644
--- a/workflows/external/Snakefile
+++ b/workflows/external/Snakefile
@@ -16,38 +16,14 @@ rule targets:
     input:
         list(modencode.keys()),
 
-
-rule download_chainfile:
-    """
-    Download the chainfile we need for liftover
-    """
-    output: 'data/dm3ToDm6.over.chain.gz'
-    shell:
-        'wget -O- '
-        'http://hgdownload.cse.ucsc.edu/goldenPath/dm3/liftOver/dm3ToDm6.over.chain.gz '
-        '> {output}'
-
-
 rule beds:
     """
-    Download URLs, get rid of "track" lines, and then prepare them for liftover
+    Download URLs, get rid of "track" lines.
     """
-    output: temporary('data/{factor}_{celltype}.bed.dm3')
+    output: 'data/{factor}_{celltype}.bed'
     run:
-        key = str(output[0]).replace('.dm3', '')
+        key = str(output[0])
         url = modencode[key]
-        shell(
-            'wget -O - "{url}" | grep -v "track" > {output}')
-
-rule liftover:
-    """
-    Perform the liftover
-    """
-    input:
-        bed='{prefix}.dm3',
-        chainfile=rules.download_chainfile.output
-    output: '{prefix}'
-    shell:
-        'liftOver {input.bed} {input.chainfile} {output} {output}.unmapped'
+        shell('wget -O - "{url}" | grep -v "track" > {output}')
 
 # vim: ft=python
diff --git a/workflows/references/Snakefile b/workflows/references/Snakefile
deleted file mode 100644
index d6bc9d0f6..000000000
--- a/workflows/references/Snakefile
+++ /dev/null
@@ -1,367 +0,0 @@
-import os
-import sys
-sys.path.insert(0, srcdir('../..'))
-import gzip
-import yaml
-import importlib
-import tempfile
-import pandas
-from snakemake.utils import makedirs
-from lib.imports import resolve_name
-from lib import utils
-from lib.utils import autobump, gb, hours
-from lib import aligners, helpers
-from lib import common
-
-# Note: when running this workflow on its own (say, to generate all references
-# ahead of time) you wil need to provide a config file from the command line.
-#
-# Otherwise, this file is expected to be `include:`ed into other workflows,
-# which will have their own config files.
-
-config = common.load_config(config)
-
-references_dir = common.get_references_dir(config)
-refdict, conversion_kwargs = common.references_dict(config)
-
-makedirs([references_dir, os.path.join(references_dir, 'logs')])
-
-localrules: symlink_fasta_to_index_dir
-
-wildcard_constraints:
-    _type="genome|transcriptome|annotation",
-    _ext="fasta|gtf"
-
-
-rule all_references:
-    input: utils.flatten(refdict)
-
-
-rule download_and_process:
-    """Downloads the configured URL, applies any configured post-processing, and
-    saves the resulting gzipped file to *.fasta.gz or *.gtf.gz.
-    """
-    output:
-        temporary('{references_dir}/{organism}/{tag}/{_type}/{organism}_{tag}.{_ext}.gz')
-    run:
-        common.download_and_postprocess(output[0], config, wildcards.organism, wildcards.tag, wildcards._type)
-
-
-rule unzip:
-    """Generic rule to unzip files as needed, for example when building
-    indexes.
-    """
-    input:
-        rules.download_and_process.output
-    output:
-        protected('{references_dir}/{organism}/{tag}/{_type}/{organism}_{tag}.{_ext}')
-    wildcard_constraints:
-        _type="genome|annotation"
-    log:
-        '{references_dir}/logs/{organism}/{tag}/{_type}/{organism}_{tag}.{_ext}.log'
-    shell: 'gunzip -c {input} > {output}'
-
-
-rule bowtie2_index:
-    """
-    Build bowtie2 index
-    """
-    input:
-        '{references_dir}/{organism}/{tag}/genome/{organism}_{tag}.fasta'
-    output:
-        protected(aligners.bowtie2_index_from_prefix('{references_dir}/{organism}/{tag}/genome/bowtie2/{organism}_{tag}'))
-    log:
-        '{references_dir}/logs/{organism}/{tag}/genome/bowtie2/{organism}_{tag}.log'
-    resources:
-        runtime=autobump(hours=8),
-        mem_mb=autobump(gb=32),
-        disk_mb=autobump(gb=50)
-    run:
-        prefix = aligners.prefix_from_bowtie2_index(output)
-        shell(
-            'bowtie2-build '
-            '{input} '
-            '{prefix} '
-            '&> {log}')
-
-
-rule star_index:
-    input:
-        fasta='{references_dir}/{organism}/{tag}/genome/{organism}_{tag}.fasta',
-        gtf='{references_dir}/{organism}/{tag}/annotation/{organism}_{tag}.gtf',
-    output:
-        protected('{references_dir}/{organism}/{tag}/genome/star/{organism}_{tag}/Genome')
-    log:
-        '{references_dir}/{organism}/{tag}/genome/star/{organism}_{tag}/Genome.log'
-    threads:
-        8
-    resources:
-        runtime=autobump(hours=8),
-        mem_mb=gb(64)
-    run:
-        genomedir = os.path.dirname(output[0])
-        shell('rm -r {genomedir}')
-        shell('mkdir -p {genomedir}')
-        shell(
-            'STAR '
-            '--runMode genomeGenerate '
-            '--runThreadN {threads} '
-            '--genomeDir {genomedir} '
-            '--genomeFastaFiles {input.fasta} '
-
-            # NOTE: GTF is optional
-            '--sjdbGTFfile {input.gtf} '
-
-            # NOTE: STAR docs say that 100 should work well.
-            '--sjdbOverhang 100 '
-
-            # NOTE: for small genomes, may need to scale this down to
-            # min(14, log2(GenomeLength) / 2 - 1)
-            # --genomeSAindexNbases 14
-            '&> {log}'
-        )
-        # STAR writes a hard-coded Log.out file to the current working
-        # directory. So put that on the end of the log file for the rule and
-        # then clean up.
-        shell('cat {genomedir}/Log.out >> {log} && rm {genomedir}/Log.out')
-
-
-rule hisat2_index:
-    """
-    Build HISAT2 index
-    """
-    input:
-        '{references_dir}/{organism}/{tag}/genome/{organism}_{tag}.fasta'
-    output:
-        protected(aligners.hisat2_index_from_prefix('{references_dir}/{organism}/{tag}/genome/hisat2/{organism}_{tag}'))
-    log:
-        '{references_dir}/logs/{organism}/{tag}/genome/hisat2/{organism}_{tag}.log'
-    resources:
-        runtime=autobump(hours=8),
-        mem_mb=gb(32),
-        disk_mb=gb(50)
-    run:
-        prefix = aligners.prefix_from_hisat2_index(output)
-        shell(
-            'hisat2-build '
-            '{input} '
-            '{prefix} '
-            '&> {log}')
-
-
-rule symlink_fasta_to_index_dir:
-    """Aligners often want the reference fasta in the same dir as the index, so
-    this makes the appropriate symlink
-    """
-    input:
-        fasta='{references_dir}/{organism}/{tag}/{_type}/{organism}_{tag}.fasta'
-    output:
-        '{references_dir}/{organism}/{tag}/{_type}/{index}/{organism}_{tag}.fasta'
-    resources:
-        runtime=hours(1)
-    log:
-        '{references_dir}/logs/{organism}/{tag}/{_type}/{index}/{organism}_{tag}.fasta.log'
-    run:
-        utils.make_relative_symlink(input[0], output[0])
-
-
-rule transcriptome_fasta:
-    input:
-        fasta='{references_dir}/{organism}/{tag}/genome/{organism}_{tag}.fasta',
-        gtf='{references_dir}/{organism}/{tag}/annotation/{organism}_{tag}.gtf'
-    output:
-        protected('{references_dir}/{organism}/{tag}/transcriptome/{organism}_{tag}.fasta')
-    resources:
-        runtime=hours(1)
-    shell:
-        'gffread {input.gtf} -w {output} -g {input.fasta}'
-
-
-rule salmon_index:
-    "Build salmon index"
-    output:
-        protected('{references_dir}/{organism}/{tag}/transcriptome/salmon/{organism}_{tag}/versionInfo.json')
-    input:
-        fasta='{references_dir}/{organism}/{tag}/transcriptome/{organism}_{tag}.fasta'
-    log:
-        '{references_dir}/logs/{organism}/{tag}/transcriptome/salmon/{organism}_{tag}.log'
-    params:
-        outdir='{references_dir}/{organism}/{tag}/transcriptome/salmon/{organism}_{tag}'
-    resources:
-        mem_mb=gb(32),
-        runtime=hours(2)
-    shell:
-        'salmon index '
-        '--transcripts {input.fasta} '
-        '--index {params.outdir} '
-        '&> {log}'
-
-
-rule kallisto_index:
-    "Build kallisto index"
-    output:
-        index=protected('{references_dir}/{organism}/{tag}/transcriptome/kallisto/{organism}_{tag}/transcripts.idx')
-    input:
-        fasta='{references_dir}/{organism}/{tag}/transcriptome/{organism}_{tag}.fasta'
-    log:
-        '{references_dir}/logs/{organism}/{tag}/transcriptome/kallisto/{organism}_{tag}.log'
-    resources:
-        runtime=hours(2),
-        mem_mb=gb(32),
-    shell:
-        'kallisto index '
-        '--index {output.index} '
-        '{input.fasta} '
-        '&> {log}'
-
-
-rule conversion_refflat:
-    """Converts a GTF into refFlat format
-    """
-    input:
-        '{references_dir}/{organism}/{tag}/annotation/{organism}_{tag}.gtf'
-    output:
-        protected('{references_dir}/{organism}/{tag}/annotation/{organism}_{tag}.refflat')
-    log:
-        '{references_dir}/logs/{organism}/{tag}/annotation/{organism}_{tag}.refflat.log'
-    resources:
-        runtime=hours(2),
-        mem_mb=gb(2)
-    shell:
-        'gtfToGenePred -ignoreGroupsWithoutExons {input} {output}.tmp '
-        '''&& awk '{{print $1"\t"$0}}' {output}.tmp > {output} '''
-        '&& rm {output}.tmp '
-
-
-rule conversion_bed12:
-    input:
-        '{references_dir}/{organism}/{tag}/annotation/{organism}_{tag}.gtf'
-    output:
-        protected('{references_dir}/{organism}/{tag}/annotation/{organism}_{tag}.bed12')
-    resources:
-        runtime=hours(2),
-        mem_mb=gb(2)
-    shell:
-        'gtfToGenePred -ignoreGroupsWithoutExons {input} {output}.tmp '
-        '&& genePredToBed {output}.tmp {output} '
-        '&& rm {output}.tmp'
-
-rule conversion_gffutils:
-    """Converts a GTF into a gffutils sqlite3 database
-    """
-    input:
-        gtf='{references_dir}/{organism}/{tag}/annotation/{organism}_{tag}.gtf'
-    output:
-        db=protected('{references_dir}/{organism}/{tag}/annotation/{organism}_{tag}.gtf.db')
-    log:
-        '{references_dir}/logs/{organism}/{tag}/annotation/{organism}_{tag}.gtf.db.log'
-    resources:
-        runtime=hours(2),
-        mem_mb=gb(4)
-    run:
-        import gffutils
-        kwargs = conversion_kwargs[output[0]]
-        fd, tmpdb = tempfile.mkstemp(suffix='.db', prefix='gffutils_')
-        db = gffutils.create_db(data=input.gtf, dbfn=tmpdb, **kwargs)
-        shell('mv {tmpdb} {output.db}')
-
-
-rule chromsizes:
-    """Creates a chromsizes table from fasta
-    """
-    input:
-        '{references_dir}/{organism}/{tag}/genome/{organism}_{tag}.fasta'
-    output:
-        protected('{references_dir}/{organism}/{tag}/genome/{organism}_{tag}.chromsizes')
-    log:
-        '{references_dir}/logs/{organism}/{tag}/genome/{organism}_{tag}.fasta.log'
-    params:
-        # NOTE: Be careful with the memory here; make sure you have enough
-        # and/or it matches the resources you're requesting in the cluster
-        # config.
-        java_args='-Xmx20g'
-        # java_args='-Xmx2g'  # [TEST SETTINGS -1]
-    resources:
-        mem_mb=gb(24),
-        runtime=hours(2)
-    shell:
-        'export LC_COLLATE=C; '
-        'rm -f {output}.tmp '
-        '&& picard '
-        '{params.java_args} '
-        'CreateSequenceDictionary R={input} O={output}.tmp &> {log} '
-        '&& grep "^@SQ" {output}.tmp '
-        '''| awk '{{print $2, $3}}' '''
-        '| sed "s/SN://g;s/ LN:/\\t/g" '
-        '| sort -k1,1 > {output} '
-        '&& rm -f {output}.tmp '
-
-
-rule genelist:
-    """Creates a list of unique gene names in the GTF
-    """
-    input:
-        gtf='{references_dir}/{organism}/{tag}/annotation/{organism}_{tag}.gtf'
-    output:
-        protected('{references_dir}/{organism}/{tag}/annotation/{organism}_{tag}.genelist')
-    resources:
-        runtime=hours(1),
-        mem_mb=gb(2)
-    run:
-        attribute = conversion_kwargs[output[0]]['gene_id']
-        import gffutils
-        genes = set()
-        for feature in gffutils.DataIterator(input.gtf):
-            genes.update(feature.attributes[attribute])
-        with open(output[0], 'w') as fout:
-            for feature in sorted(list(set(genes))):
-                fout.write(feature + '\n')
-
-
-rule mappings:
-    """
-    Creates gzipped TSV mapping between attributes in the GTF.
-    """
-    input:
-        gtf='{references_dir}/{organism}/{tag}/annotation/{organism}_{tag}.gtf'
-    output:
-        protected('{references_dir}/{organism}/{tag}/annotation/{organism}_{tag}.mapping.tsv.gz')
-    params:
-        include_featuretypes=lambda wildcards, output: conversion_kwargs[output[0]].get('include_featuretypes', [])
-    resources:
-        runtime=hours(2),
-        mem_mb=gb(2)
-    run:
-        import gffutils
-
-        # Will want to change the setting back to what it was originally when
-        # we're done
-        orig_setting = gffutils.constants.always_return_list
-        gffutils.constants.always_return_list = False
-
-        include_featuretypes = params.include_featuretypes
-
-        res = []
-        for f in gffutils.DataIterator(input[0]):
-
-            ft = f.featuretype
-
-            if include_featuretypes and (ft not in include_featuretypes):
-                continue
-
-            d = dict(f.attributes)
-            d['__featuretype__'] = ft
-            res.append(d)
-
-        df = pandas.DataFrame(res)
-
-        # Depending on how many attributes there were and the
-        # include_featuretypes settings, this may take a while.
-        df = df.drop_duplicates()
-
-        df.to_csv(output[0], sep='\t', index=False, compression='gzip')
-
-        # Restore original setting
-        gffutils.constants.always_return_list = orig_setting
-
-# vim: ft=python
diff --git a/workflows/references/config/config.yaml b/workflows/references/config/config.yaml
deleted file mode 100644
index 49618dcd0..000000000
--- a/workflows/references/config/config.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-references_dir: 'references_dir'
-
-# See the reference config files in the top level of the repo,
-# include/reference_configs, for inspiration for more species.
-include_references:
-  - '../../include/reference_configs/test.yaml'
diff --git a/workflows/references/run_test.sh b/workflows/references/run_test.sh
deleted file mode 100755
index 7aacb413c..000000000
--- a/workflows/references/run_test.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-set -e
-python -m doctest ../../ci/preprocessor.py
-python ../../ci/preprocessor.py Snakefile > Snakefile.test && snakemake -s Snakefile.test "$@"
diff --git a/workflows/rnaseq/Snakefile b/workflows/rnaseq/Snakefile
index f9a11c4d8..a9bc40f1a 100644
--- a/workflows/rnaseq/Snakefile
+++ b/workflows/rnaseq/Snakefile
@@ -1,244 +1,352 @@
 import sys
-
-sys.path.insert(0, srcdir('../..'))
 import os
-from textwrap import dedent
 import yaml
-import tempfile
 import pandas as pd
-from lib import common, utils, helpers, aligners
-from lib.utils import autobump, gb, hours
-from lib.patterns_targets import RNASeqConfig
 
-# ----------------------------------------------------------------------------
-#
-# Search for the string "NOTE:" to look for points of configuration that might
-# be helpful for your experiment.
-#
-# ----------------------------------------------------------------------------
+sys.path.insert(0, os.path.dirname(workflow.snakefile) + "/../..")
+from lib import utils
 
-if not workflow.overwrite_configfiles:
-    configfile: 'config/config.yaml'
 
-config = common.load_config(config)
+configfile: "config/config.yaml"
 
-include: '../references/Snakefile'
 
-# Verify configuration of config and sampletable files
-helpers.preflight(config)
+sampletable = utils.prepare_rnaseq_sampletable(config)
+is_paired = utils.detect_layout(sampletable) == "PE"
+n = ["1", "2"] if is_paired else ["1"]
+SAMPLES = sampletable.index
+REFERENCES = config.get("references", "references")
+sample_dir = "data/rnaseq_samples"
 
-c = RNASeqConfig(config, config.get('patterns', 'config/rnaseq_patterns.yaml'))
 
-SAMPLES = c.sampletable.iloc[:, 0].values
 wildcard_constraints:
-    n = '[1,2]',
-    sample = '|'.join(SAMPLES)
-
-
-def wrapper_for(path):
-    return 'file:' + os.path.join('../..','wrappers', 'wrappers', path)
-
-
-# ----------------------------------------------------------------------------
-# RULES
-# ----------------------------------------------------------------------------
-
-# See "patterns and targets" in the documentation for what's going on here.
-final_targets = utils.flatten((
-    utils.flatten(c.targets['fastqc']),
-    [c.targets['fastq_screen']],
-    [c.targets['rrna_percentages_table']],
-    [c.targets['multiqc']],
-    utils.flatten(c.targets['featurecounts']),
-    utils.flatten(c.targets['markduplicates']),
-    utils.flatten(c.targets['salmon']),
-    utils.flatten(c.targets['kallisto']),
-    utils.flatten(c.targets['preseq']),
-    utils.flatten(c.targets['rseqc']),
-    utils.flatten(c.targets['collectrnaseqmetrics']),
-    utils.flatten(c.targets['bigwig']),
-    utils.flatten(c.targets['samtools']),
-))
-
-if config.get('merged_bigwigs', None):
-    final_targets.extend(utils.flatten(c.targets['merged_bigwig']))
-
-
-def render_r1_r2(pattern, r1_only=False):
-    return expand(pattern, sample='{sample}', n=c.n)
-
-def r1_only(pattern):
-    return expand(pattern, sample='{sample}', n=1)
-
-rule targets:
-    """
-    Final targets to create
-    """
-    input: final_targets
-
-if 'orig_filename' in c.sampletable.columns:
-
-    localrules: symlinks, symlink_targets
-
-    # Convert the sampletable to be indexed by the first column, for
-    # convenience in generating the input/output filenames.
-    _st = c.sampletable.set_index(c.sampletable.columns[0])
-
-    def orig_for_sample(wc):
-        """
-        Given a sample, returns either one or two original fastq files
-        depending on whether the library was single- or paired-end.
-        """
-        if c.is_paired:
-            return _st.loc[wc.sample, ['orig_filename', 'orig_filename_R2']]
-        return _st.loc[wc.sample, ['orig_filename']]
-
-
-    rule symlinks:
-        """
-        Symlinks files over from original filename
-        """
-        input:
-            orig_for_sample
-        output:
-            render_r1_r2(c.patterns['fastq'])
-        threads: 1
-        resources:
-            mem_mb=100,
-            runtime=10,
-        run:
-            assert len(output) == len(input), (input, output)
-            for src, linkname in zip(input, output):
-                utils.make_relative_symlink(src, linkname)
+    n="[1,2]",
+    sample="|".join(SAMPLES),
 
 
-    rule symlink_targets:
-        input: c.targets['fastq']
+localrules:
+    symlinks,
+    symlink_targets,
 
 
-if 'Run' in c.sampletable.columns and sum(c.sampletable['Run'].str.startswith('SRR')) > 0:
+rule all:
+    input:
+        "data/rnaseq_aggregation/multiqc.html",
+        f"{REFERENCES}/annotation.mapping.tsv.gz",
 
-    # Convert the sampletable to be indexed by the first column, for
-    # convenience in generating the input/output filenames.
-    _st = c.sampletable.set_index(c.sampletable.columns[0])
 
-    rule fastq_dump:
-        output:
-            fastq=render_r1_r2(c.patterns['fastq'])
-        log:
-            r1_only(c.patterns['fastq'])[0] + '.log'
-        params:
-            is_paired=c.is_paired,
-            sampletable=_st,
-            # limit = 100000, # [TEST SETTINGS]
-        resources:
-            mem_mb=gb(1),
-            disk_mb=autobump(gb=1),
-            runtime=autobump(hours=2)
-        conda:
-            '../../wrappers/wrappers/fastq-dump/environment.yaml'
-        script:
-            wrapper_for('fastq-dump/wrapper.py')
-
-# This can be set at the command line with --config strand_check_reads=1000
-config.setdefault('strand_check_reads', 1e5)
-
-rule sample_strand_check:
+rule symlinks:
     input:
-        fastq=common.fill_r1_r2(c.sampletable, c.patterns['fastq']),
-        index=[c.refdict[c.organism][config['aligner']['tag']]['bowtie2']],
-        bed12=c.refdict[c.organism][config['gtf']['tag']]['bed12']
+        lambda wc: (
+            sampletable.loc[wc.sample, ["orig_filename", "orig_filename_R2"]]
+            if is_paired
+            else sampletable.loc[wc.sample, ["orig_filename"]]
+        ),
     output:
-        strandedness=c.patterns['strand_check']['tsv'],
-        bam=temporary(c.patterns['strand_check']['bam']),
-        idx=temporary(c.patterns['strand_check']['bam'] + '.bai'),
-        fastqs=temporary(render_r1_r2(c.patterns['strand_check']['fastq'])),
+        expand("data/rnaseq_samples/{{sample}}/{{sample}}_R{n}.fastq.gz", n=n),
+    threads: 1
+    resources:
+        mem="1g",
+        runtime="10m",
+    run:
+        assert len(output) == len(input), (input, output)
+        for src, linkname in zip(input, output):
+            utils.make_relative_symlink(src, linkname)
+
+
+rule fasta:
+    output:
+        f"{REFERENCES}/genome.fa.gz",
     log:
-        c.patterns['strand_check']['tsv'] + '.log'
-    threads: 6
+        f"{REFERENCES}/logs/genome.fa.gz.log",
     resources:
-        mem_mb=gb(8),
-        runtime=autobump(hours=2)
+        mem_mb="4g",
+        runtime="2h",
+    params:
+        urls=config["genome"]["url"],
+        postprocess=config["genome"].get("postprocess", None),
     run:
-        prefix = aligners.prefix_from_bowtie2_index(input.index)
-        nreads = int(config['strand_check_reads']) * 4
-        if c.is_paired:
-            assert len(input.fastq) == 2
-            assert len(output.fastqs) == 2
-            shell('set +o pipefail; zcat {input.fastq[0]} | head -n {nreads} > {output.fastqs[0]}')
-            shell('set +o pipefail; zcat {input.fastq[0]} | head -n {nreads} > {output.fastqs[1]}')
-            fastqs = f'-1 {output.fastqs[0]} -2 {output.fastqs[1]} '
-        else:
-            assert len(input.fastq) == 1
-            assert len(output.fastqs) == 1
-            shell('set +o pipefail; zcat {input.fastq[0]} | head -n {nreads} > {output.fastqs[0]}')
-            fastqs = f'-U {output.fastqs[0]} '
-        shell(
-            "bowtie2 "
-            "-x {prefix} "
-            "{fastqs} "
-            '--no-unal '
-            "--threads {threads} 2> {log} "
-            "| samtools view -Sb - "
-            "| samtools sort - -o {output.bam} "
+        utils.download_and_postprocess(
+            urls=params.urls,
+            postprocess=params.postprocess,
+            outfile=output[0],
+            log=log,
         )
-        shell("samtools index {output.bam}")
-        shell(
-            'infer_experiment.py -r {input.bed12} -i {output.bam} > {output} 2> {log}'
+
+
+rule faidx:
+    input:
+        f"{REFERENCES}/genome.fa",
+    output:
+        f"{REFERENCES}/genome.fa.fai",
+    resources:
+        mem_mb="4g",
+        runtime="2h",
+    shell:
+        "samtools faidx {input}"
+
+
+rule annotation:
+    output:
+        f"{REFERENCES}/annotation.gtf.gz",
+    log:
+        f"{REFERENCES}/logs/annotation.gtf.gz.log",
+    resources:
+        mem="4g",
+        runtime="2h",
+    params:
+        urls=config["annotation"]["url"],
+        postprocess=config["annotation"].get("postprocess", None),
+    run:
+        utils.download_and_postprocess(
+            urls=params.urls,
+            postprocess=params.postprocess,
+            outfile=output[0],
+            log=log,
         )
 
-rule strand_check:
+
+rule rrna_fasta:
+    output:
+        f"{REFERENCES}/rrna.fa.gz",
+    log:
+        f"{REFERENCES}/logs/rrna.fa.log",
+    resources:
+        mem="4g",
+        runtime="2h",
+    params:
+        organism=config.get("organism", None),
+        silva_release="138.1",
+    run:
+        # SILVA database fasta file with all species
+        urls = [
+            f"https://www.arb-silva.de/fileadmin/silva_databases/release_{params.silva_release.replace('.', '_')}/Exports/SILVA_{params.silva_release}_LSURef_NR99_tax_silva.fasta.gz",
+            f"https://www.arb-silva.de/fileadmin/silva_databases/release_{params.silva_release.replace('.', '_')}/Exports/SILVA_{params.silva_release}_SSURef_NR99_tax_silva.fasta.gz",
+        ]
+
+        # Keep only sequences for the configured organism
+        utils.download_and_postprocess(
+            urls=urls,
+            postprocess={
+                "function": "lib.utils.filter_rrna_fastas",
+                "args": params.organism,
+            },
+            outfile=output[0],
+            log=log,
+        )
+
+
+rule unzip:
+    input:
+        f"{REFERENCES}/{{prefix}}.gz",
+    output:
+        f"{REFERENCES}/{{prefix}}",
+    resources:
+        mem="4g",
+        runtime="2h",
+    shell:
+        "gunzip -c {input} > {output}"
+
+
+rule rrna_index:
+    input:
+        f"{REFERENCES}/rrna.fa.gz",
+    output:
+        f"{REFERENCES}/bowtie2/rrna.1.bt2",
+        f"{REFERENCES}/bowtie2/rrna.fa.gz",
+    log:
+        f"{REFERENCES}/logs/bowtie2_rrna.log",
+    resources:
+        mem="32g",
+        disk="50g",
+        runtime="8h",
+    threads: 8
+    run:
+        prefix = subpath(output[0], strip_suffix=".1.bt2")
+        shell("bowtie2-build --threads {threads} {input} {prefix} &> {log}")
+        utils.make_relative_symlink(input[0], output[-1])
+
+
+rule star_index:
     input:
-        expand(c.patterns['strand_check']['tsv'], sample=SAMPLES)
+        fasta=f"{REFERENCES}/genome.fa",
+        gtf=f"{REFERENCES}/annotation.gtf",
     output:
-        html='strand_check/strandedness.html',
-        filelist=temporary('strand_check/filelist')
+        f"{REFERENCES}/star/Genome",
     log:
-        'strand_check/strandedness.log'
+        f"{REFERENCES}/logs/star.log",
+    threads: 8
     resources:
-        mem_mb=gb(1),
-        runtime=autobump(hours=2)
+        mem="64g",
+        runtime="8h",
     run:
-        with open(output.filelist, 'w') as fout:
-            for i in  input:
-                fout.write(i + '\n')
+        genomedir = os.path.dirname(output[0])
+        shell("rm -r {genomedir}")
+        shell("mkdir -p {genomedir}")
         shell(
-            'multiqc '
-            '--force '
-            '--module rseqc '
-            '--file-list {output.filelist} '
-            '--filename {output.html} &> {log}'
+            "STAR "
+            "--runMode genomeGenerate "
+            "--runThreadN {threads} "
+            "--genomeDir {genomedir} "
+            "--genomeFastaFiles {input.fasta} "
+            # NOTE: GTF is optional but highly recommended by STAR docs
+            "--sjdbGTFfile {input.gtf} "
+            # NOTE: STAR docs say that 100 should work well.
+            "--sjdbOverhang 100 "
+            # NOTE: for small genomes, may need to scale this down to
+            # min(14, log2(GenomeLength) / 2 - 1)
+            # --genomeSAindexNbases 14
+            "&> {log}"
+        )
+        # STAR writes a hard-coded Log.out file to the current working
+        # directory. So put that on the end of the log file for the rule and
+        # then clean up.
+        shell("cat {genomedir}/Log.out >> {log} && rm {genomedir}/Log.out")
+        shell("ln -s {input.fasta} {genomedir}")
+
+
+rule transcriptome_fasta:
+    input:
+        fasta=f"{REFERENCES}/genome.fa",
+        gtf=f"{REFERENCES}/annotation.gtf",
+        fai=f"{REFERENCES}/genome.fa.fai",
+    output:
+        fa=f"{REFERENCES}/transcriptome.fa",
+        gz=f"{REFERENCES}/transcriptome.fa.gz",
+    log:
+        f"{REFERENCES}/transcriptome.log",
+    resources:
+        mem="4g",
+        runtime="2h",
+    shell:
+        "gffread {input.gtf} -w {output.fa} -g {input.fasta} &> {log} "
+        "&& gzip -c {output.fa} > {output.gz} "
+
+
+rule salmon_index:
+    input:
+        f"{REFERENCES}/transcriptome.fa.gz",
+    output:
+        f"{REFERENCES}/salmon/versionInfo.json",
+    log:
+        f"{REFERENCES}/logs/salmon.log",
+    params:
+        outdir=f"{REFERENCES}/salmon",
+    resources:
+        mem="32g",
+        runtime="2h",
+    run:
+        outdir = os.path.dirname(output[0])
+        shell("salmon index --transcripts {input} --index {outdir} &> {log}")
+
+
+rule conversion_refflat:
+    input:
+        f"{REFERENCES}/annotation.gtf.gz",
+    output:
+        f"{REFERENCES}/annotation.refflat",
+    log:
+        f"{REFERENCES}/logs/annotation.refflat.log",
+    resources:
+        mem="2g",
+        runtime="2h",
+    shell:
+        "gtfToGenePred -ignoreGroupsWithoutExons {input} {output}.tmp "
+        """&& awk '{{print $1"\t"$0}}' {output}.tmp > {output} """
+        "&& rm {output}.tmp "
+
+
+rule conversion_bed12:
+    input:
+        f"{REFERENCES}/annotation.gtf.gz",
+    output:
+        f"{REFERENCES}/annotation.bed12",
+    resources:
+        mem="2g",
+        runtime="2h",
+    shell:
+        "gtfToGenePred -ignoreGroupsWithoutExons {input} {output}.tmp "
+        "&& genePredToBed {output}.tmp {output} "
+        "&& rm {output}.tmp"
+
+
+rule chromsizes:
+    input:
+        f"{REFERENCES}/genome.fa.gz",
+    output:
+        f"{REFERENCES}/genome.chromsizes",
+    log:
+        f"{REFERENCES}/logs/genome.chromsizes.log",
+    params:
+        # java_args='-Xmx2g'  # [enable for test]
+        java_args="-Xmx20g",  # [disable for test]
+    resources:
+        mem="24g",
+        runtime="2h",
+    shell:
+        "export LC_COLLATE=C; "
+        "rm -f {output}.tmp "
+        "&& picard "
+        "{params.java_args} "
+        "CreateSequenceDictionary R={input} O={output}.tmp &> {log} "
+        '&& grep "^@SQ" {output}.tmp '
+        """| awk '{{print $2, $3}}' """
+        '| sed "s/SN://g;s/ LN:/\\t/g" '
+        "| sort -k1,1 > {output} "
+        "&& rm -f {output}.tmp "
+
+
+rule mappings:
+    input:
+        gtf=f"{REFERENCES}/annotation.gtf.gz",
+    output:
+        f"{REFERENCES}/annotation.mapping.tsv.gz",
+    resources:
+        mem="24g",
+        runtime="2h",
+    run:
+        tsv = output[0].replace(".gz", "")
+        mappings_args = dict(
+            exclude_featuretypes=None,
+            include_featuretypes=None,
+            include_attributes=None,
         )
+        mappings_args.update(config["annotation"].get("mappings", {}))
+        utils.mappings_tsv(input.gtf, tsv, **mappings_args)
+        shell("gzip {tsv}")
+
+
+rule symlink_targets:
+    input:
+        expand(
+            "data/rnaseq_samples/{sample}/{sample}_R{n}.fastq.gz", sample=SAMPLES, n=n
+        ),
 
 
 rule cutadapt:
-    """
-    Run cutadapt
-    """
     input:
-        fastq=render_r1_r2(c.patterns['fastq'])
+        fastq=expand("data/rnaseq_samples/{{sample}}/{{sample}}_R{n}.fastq.gz", n=n),
     output:
-        fastq=render_r1_r2(c.patterns['cutadapt'])
+        fastq=expand(
+            "data/rnaseq_samples/{{sample}}/{{sample}}_R{n}.cutadapt.fastq.gz", n=n
+        ),
     log:
-        render_r1_r2(c.patterns['cutadapt'])[0] + '.log'
+        "data/rnaseq_samples/{sample}/{sample}_cutadapt.fastq.gz.log",
     threads: 6
     resources:
-        mem_mb=gb(2),
-        runtime=autobump(hours=2)
+        mem="2g",
+        runtime="2h",
     run:
-
-        # NOTE: Change cutadapt params here
-        if c.is_paired:
+        if is_paired:
             shell(
                 "cutadapt "
                 "-o {output[0]} "
                 "-p {output[1]} "
-                "-a AGATCGGAAGAGCACACGTCTGAACTCCAGTCA "
-                "-A AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT "
+                "-j {threads} "
                 "--nextseq-trim 20 "
                 "--overlap 6 "
-                '-j {threads} '
-                '--minimum-length 25 '
+                "--minimum-length 25 "
+                "-a AGATCGGAAGAGCACACGTCTGAACTCCAGTCA "
+                "-A AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT "
                 "{input.fastq[0]} "
                 "{input.fastq[1]} "
                 "&> {log}"
@@ -247,876 +355,606 @@ rule cutadapt:
             shell(
                 "cutadapt "
                 "-o {output[0]} "
-                "-a AGATCGGAAGAGCACACGTCTGAACTCCAGTCA "
+                "-j {threads} "
                 "--nextseq-trim 20 "
                 "--overlap 6 "
-                '-j {threads} '
-                '--minimum-length 25 '
+                "--minimum-length 25 "
+                "-a AGATCGGAAGAGCACACGTCTGAACTCCAGTCA "
                 "{input.fastq[0]} "
                 "&> {log}"
             )
 
 
 rule fastqc:
-    """
-    Run FastQC
-    """
     input:
-        '{sample_dir}/{sample}/{sample}{suffix}'
-    threads:
-        6
+        "data/rnaseq_samples/{sample}/{sample}{suffix}",
+    threads: 1
     output:
-        html='{sample_dir}/{sample}/fastqc/{sample}{suffix}_fastqc.html',
-        zip='{sample_dir}/{sample}/fastqc/{sample}{suffix}_fastqc.zip',
+        html="data/rnaseq_samples/{sample}/fastqc/{sample}{suffix}_fastqc.html",
+        zip="data/rnaseq_samples/{sample}/fastqc/{sample}{suffix}_fastqc.zip",
     resources:
-        mem_mb=gb(8),
-        runtime=autobump(hours=2)
-    script:
-        wrapper_for('fastqc/wrapper.py')
-
-
-if config['aligner']['index'] == 'hisat2':
-    rule hisat2:
-        """
-        Map reads with HISAT2
-        """
-        input:
-            fastq=common.fill_r1_r2(c.sampletable, c.patterns['cutadapt']),
-            index=[c.refdict[c.organism][config['aligner']['tag']]['hisat2']]
-        output:
-            bam=temporary(c.patterns['bam'])
-        log:
-            c.patterns['bam'] + '.log'
-        threads: 6
-        resources:
-            mem_mb=gb(32),
-            runtime=autobump(hours=8)
-        run:
-            prefix = aligners.prefix_from_bowtie2_index(input.index)
-            sam = output.bam.replace('.bam', '.sam')
-
-            if c.is_paired:
-                assert len(input.fastq) == 2
-                fastqs = '-1 {0} -2 {1} '.format(*input.fastq)
-            else:
-                assert len(input.fastq) == 1
-                fastqs = '-U {0} '.format(input.fastq)
-
-            shell(
-                "hisat2 "
-                "-x {prefix} "
-                "{fastqs} "
-                '--no-unal '
-                "--threads {threads} "
-                "-S {sam} "
-                "> {log} 2>&1"
-            )
-
-            shell(
-                "samtools view -Sb {sam} "
-                "| samtools sort - -o {output.bam} -O BAM "
-                "&& rm {sam}"
-            )
+        mem="8g",
+        runtime="2h",
+    log:
+        "data/rnaseq_samples/{sample}/fastqc/{sample}{suffix}_fastqc.log",
+    run:
+        outdir = os.path.dirname(output.html) or "."
+        outfile = os.path.basename(input[0])
+        for s in [".fastq", ".fq", ".gz", ".bam"]:
+            outfile = outfile.replace(s, "")
+        out_zip = os.path.join(outdir, outfile + "_fastqc.zip")
+        out_html = os.path.join(outdir, outfile + "_fastqc.html")
 
-if config['aligner']['index'].startswith('star'):
-
-    # STAR can be run in 1-pass or 2-pass modes. Since we may be running it
-    # more than once in almost the same way, we pull out the shell command here
-    # and use it below.
-    STAR_CMD = (
-        'STAR '
-        '--runThreadN {threads} '
-        '--genomeDir {genomedir} '
-        '--readFilesIn {input.fastq} '
-        '--readFilesCommand zcat '
-        '--outFileNamePrefix {prefix} '
-
-        # NOTE: The STAR docs indicate that the following parameters are
-        # standard options for ENCODE long-RNA-seq pipeline.  Comments are from
-        # the STAR docs.
-        '--outFilterType BySJout '               # reduces number of spurious junctions
-        '--outFilterMultimapNmax 20 '            # if more than this many multimappers, consider unmapped
-        '--alignSJoverhangMin 8 '                # min overhang for unannotated junctions
-        '--alignSJDBoverhangMin 1 '              # min overhang for annotated junctions
-        '--outFilterMismatchNmax 999 '           # max mismatches per pair
-        '--outFilterMismatchNoverReadLmax 0.04 ' # max mismatches per pair relative to read length
-        '--alignIntronMin 20 '                   # min intron length
-        '--alignIntronMax 1000000 '              # max intron length
-        '--alignMatesGapMax 1000000 '            # max distance between mates
-        '--outSAMunmapped None '                 # do not report aligned reads in output
-    )
-    logfile_extensions = ['Log.progress.out', 'Log.out', 'Log.final.out', 'Log.std.out']
-
-if config['aligner']['index'] == 'star':
-
-    rule star:
-        """
-        Align with STAR (1-pass mode)
-        """
-        input:
-            fastq=common.fill_r1_r2(c.sampletable, c.patterns['cutadapt']),
-            index=[c.refdict[c.organism][config['aligner']['tag']]['star']],
-            annotation=c.refdict[c.organism][config['gtf']['tag']]['annotation'],
-        output:
-            bam=temporary(c.patterns['bam']),
-            sjout=temporary(c.patterns['bam'].replace('.bam', '.star.SJ.out.tab')),
-        log:
-            c.patterns['bam'].replace('.bam', '.star.bam.log')
-        threads: 16
-        resources:
-            mem_mb=gb(64),
-            runtime=autobump(hours=8)
-        run:
-            genomedir = os.path.dirname(input.index[0])
-            outdir = os.path.dirname(output[0])
-            prefix = output.bam.replace('.bam', '.star.')
-            shell(
-                STAR_CMD + (
-                    '--outSAMtype BAM SortedByCoordinate '
-                    '--outStd BAM_SortedByCoordinate > {output.bam} '
-                    '2> {log} '
-                )
-            )
+        shell(
+            "fastqc "
+            "--noextract "
+            "--quiet "
+            "--outdir {outdir} "
+            "{input} "
+            "&> {log} "
+        )
 
-            # move various hard-coded log files to log directory
-            logfiles = expand(prefix + '{ext}', ext=logfile_extensions)
-            shell('mkdir -p {outdir}/star_logs '
-                  '&& mv {logfiles} {outdir}/star_logs')
-
-if config['aligner']['index'] == 'star-twopass':
-
-    rule star_pass1:
-        """
-        First pass of alignment with STAR to get the junctions
-        """
-        input:
-            fastq=common.fill_r1_r2(c.sampletable, c.patterns['cutadapt']),
-            index=[c.refdict[c.organism][config['aligner']['tag']]['star']],
-            annotation=c.refdict[c.organism][config['gtf']['tag']]['annotation'],
-        output:
-            sjout=temporary(c.patterns['bam'].replace('.bam', '.star-pass1.SJ.out.tab')),
-        log:
-            c.patterns['bam'].replace('.bam', '.star-pass1.bam.log')
-        threads: 16
-        resources:
-            mem_mb=gb(64),
-            runtime=autobump(hours=8)
-        run:
-            genomedir = os.path.dirname(input.index[0])
-            outdir = os.path.dirname(output[0])
-            prefix = output.sjout.replace('SJ.out.tab', '')
-            shell(
-                STAR_CMD +
-                (
-                    # In this first pass, we don't actually care about the
-                    # alignment -- just the detected junctions. So we output
-                    # the SAM to /dev/null.
-                    '--outStd SAM > /dev/null '
-                    '2> {log} '
-                )
-            )
+        if not os.path.abspath(out_zip) == os.path.abspath(output.zip):
+            shell("mv {out_zip} {output.zip}")
+        if not os.path.abspath(out_html) == os.path.abspath(output.html):
+            shell("mv {out_html} {output.html}")
 
-            # move various hard-coded log files to log directory
-            logfiles = expand(prefix + '{ext}', ext=logfile_extensions)
-            shell('mkdir -p {outdir}/star-pass1_logs '
-                  '&& mv {logfiles} {outdir}/star-pass1_logs')
-
-
-    rule star_pass2:
-        """
-        Second pass of alignment with STAR using splice junctions across all
-        samples to get the final BAM
-        """
-        input:
-            sjout=expand(c.patterns['bam'].replace('.bam', '.star-pass1.SJ.out.tab'), sample=SAMPLES),
-            fastq=common.fill_r1_r2(c.sampletable, c.patterns['cutadapt']),
-            index=[c.refdict[c.organism][config['aligner']['tag']]['star']],
-            annotation=c.refdict[c.organism][config['gtf']['tag']]['annotation'],
-        output:
-            bam=temporary(c.patterns['bam']),
-            sjout=temporary(c.patterns['bam'].replace('.bam', '.star-pass2.SJ.out.tab')),
-        log:
-            c.patterns['bam'].replace('.bam', '.star-pass2.bam.log')
-        threads: 16
-        resources:
-            mem_mb=gb(64),
-            runtime=autobump(hours=8)
-        run:
-            genomedir = os.path.dirname(input.index[0])
-            outdir = os.path.dirname(output[0])
-            prefix = output.bam.replace('.bam', '.star-pass2.')
-            shell(
-                STAR_CMD + (
-                    # In contrast to pass 1, we will be keeping these BAMs --
-                    # so sort them
-                    '--outSAMtype BAM SortedByCoordinate '
-
-                    # Splice junction databases from all samples in the first
-                    # pass.
-                    '--sjdbFileChrStartEnd {input.sjout} '
-                    '--outStd BAM_SortedByCoordinate > {output.bam} '
-                    '2> {log} '
-                )
-            )
 
-            # move various hard-coded log files to log directory
-            logfiles = expand(prefix + '{ext}', ext=logfile_extensions)
-            shell('mkdir -p {outdir}/star-pass2_logs '
-                  '&& mv {logfiles} {outdir}/star-pass2_logs')
+rule star:
+    "Align with STAR (1-pass mode)"
+    input:
+        fastq=rules.cutadapt.output,
+        index=rules.star_index.output,
+        annotation=f"{REFERENCES}/annotation.gtf",
+    output:
+        bam=temporary("data/rnaseq_samples/{sample}/{sample}.cutadapt.bam"),
+        sjout=temporary(
+            "data/rnaseq_samples/{sample}/{sample}.cutadapt.star.SJ.out.tab"
+        ),
+    log:
+        "data/rnaseq_samples/{sample}/{sample}.cutadapt.bam.log",
+    threads: 16
+    resources:
+        mem="64g",
+        runtime="8h",
+        disk="80g",
+    run:
+        genomedir = os.path.dirname(input.index[0])
+        outdir = os.path.dirname(output[0])
+        prefix = output.bam.replace(".bam", ".star.")
+        if os.getenv("TMPDIR"):
+            tmpdir_arg = "--outTmpDir $TMPDIR/star "
+        else:
+            tmpdir_arg = ""
+        shell(
+            "STAR "
+            "--runThreadN {threads} "
+            "--genomeDir {genomedir} "
+            "--readFilesIn {input.fastq} "
+            "--readFilesCommand zcat "
+            "--outFileNamePrefix {prefix} "
+            "{tmpdir_arg} "
+            "--outSAMtype BAM SortedByCoordinate "
+            "--outStd BAM_SortedByCoordinate > {output.bam} "
+            # NOTE: The STAR docs indicate that the following parameters are
+            # standard options for ENCODE long-RNA-seq pipeline.  Comments are from
+            # the STAR docs.
+            "--outFilterType BySJout "  # reduces number of spurious junctions
+            "--outFilterMultimapNmax 20 "  # if more than this many multimappers, consider unmapped
+            "--alignSJoverhangMin 8 "  # min overhang for unannotated junctions
+            "--alignSJDBoverhangMin 1 "  # min overhang for annotated junctions
+            "--outFilterMismatchNmax 999 "  # max mismatches per pair
+            "--outFilterMismatchNoverReadLmax 0.04 "  # max mismatches per pair relative to read length
+            "--alignIntronMin 20 "  # min intron length
+            "--alignIntronMax 1000000 "  # max intron length
+            "--alignMatesGapMax 1000000 "  # max distance between mates
+            "--outSAMunmapped None "  # do not report aligned reads in output
+            "2> {log} "
+        )
 
-            shell('rm -r {prefix}_STARgenome')
+        # move various hard-coded log files to log directory
+        logfiles = expand(
+            prefix + "{ext}",
+            ext=["Log.progress.out", "Log.out", "Log.final.out", "Log.std.out"],
+        )
+        shell("mkdir -p {outdir}/star_logs && mv {logfiles} {outdir}/star_logs")
 
 
 rule rRNA:
-    """
-    Map reads with bowtie2 to the rRNA reference
-    """
     input:
-        fastq=r1_only(c.patterns['cutadapt']),
-        index=[c.refdict[c.organism][config['rrna']['tag']]['bowtie2']]
+        fastq="data/rnaseq_samples/{sample}/{sample}_R1.cutadapt.fastq.gz",
+        index=f"{REFERENCES}/bowtie2/rrna.1.bt2",
     output:
-        bam=temporary(c.patterns['rrna']['bam'])
+        bam="data/rnaseq_samples/{sample}/rRNA/{sample}.cutadapt.rrna.bam",
     log:
-        c.patterns['rrna']['bam'] + '.log'
+        "data/rnaseq_samples/{sample}/rRNA/{sample}.cutadapt.rrna.bam.log",
     threads: 6
     resources:
-        mem_mb=gb(2),
-        runtime=autobump(hours=2)
+        mem="2g",
+        runtime="2h",
     run:
-        prefix = aligners.prefix_from_bowtie2_index(input.index)
-        sam = output.bam.replace('.bam', '.sam')
-
+        prefix = subpath(input.index, strip_suffix=".1.bt2")
         shell(
             "bowtie2 "
-            "-x {prefix} "
+            f"-x {prefix} "
             "-U {input.fastq} "
-            '-k 1 '       # NOTE: we only care if >=1 mapped
-            '--no-unal '  # NOTE: suppress unaligned reads
             "--threads {threads} "
-            "-S {sam} "
-            "> {log} 2>&1"
+            "--no-unal "
+            "-k 1 "
+            "-S {output.bam}.sam "
+            "> {log} 2>&1 "
         )
-
         shell(
-            "samtools view -Sb {sam} "
-            "| samtools sort - -o {output.bam} -O BAM "
-            "&& rm {sam}"
+            "samtools view -Sb {output.bam}.sam "
+            "| samtools sort -O BAM - -o {output.bam}"
         )
+        shell("rm {output.bam}.sam")
 
 
 rule fastq_count:
-    """
-    Count reads in a FASTQ file
-    """
     input:
-        fastq='{sample_dir}/{sample}/{sample}{suffix}.fastq.gz'
+        fastq="{sample_dir}/{sample}/{sample}{suffix}.fastq.gz",
     output:
-        '{sample_dir}/{sample}/{sample}{suffix}.fastq.gz.libsize'
+        "{sample_dir}/{sample}/{sample}{suffix}.fastq.gz.libsize",
     threads: 1
     resources:
-        mem_mb=gb(1),
-        runtime=autobump(hours=2)
+        mem="1g",
+        runtime="2h",
     shell:
-        'zcat {input} | echo $((`wc -l`/4)) > {output}'
+        "zcat {input} | echo $((`wc -l`/4)) > {output}"
 
 
 rule bam_count:
-    """
-    Count reads in a BAM file
-    """
     input:
-        bam='{sample_dir}/{sample}/{suffix}.bam'
+        bam="{sample_dir}/{sample}/{suffix}.bam",
     output:
-        '{sample_dir}/{sample}/{suffix}.bam.libsize'
+        "{sample_dir}/{sample}/{suffix}.bam.libsize",
     threads: 1
     resources:
-        mem_mb=gb(2),
-        runtime=autobump(hours=2)
+        mem="2g",
+        runtime="2h",
     shell:
-        'samtools view -c {input} > {output}'
+        "samtools view -c {input} > {output}"
 
 
 rule bam_index:
-    """
-    Index a BAM
-    """
     input:
-        bam='{prefix}.bam'
+        bam="{prefix}.bam",
     output:
-        bai='{prefix}.bam.bai'
+        bai="{prefix}.bam.bai",
     threads: 1
     resources:
-        mem_mb=gb(2),
-        runtime=autobump(hours=2)
+        mem="2g",
+        runtime="2h",
     shell:
-        'samtools index {input} {output}'
-
-
-def fastq_screen_references():
-    """
-    Returns the Bowtie2 indexes for the configured references from the
-    `fastq_screen:` section of the config
-    """
-    refs = {}
-    for i in config['fastq_screen']:
-        refs[i['label']] = c.refdict[i['organism']][i['tag']]['bowtie2']
-    return refs
-
-
-rule fastq_screen:
-    """
-    Run fastq_screen to look for contamination from other genomes
-    """
-    input:
-        **fastq_screen_references(),
-        fastq=r1_only(rules.cutadapt.output.fastq),
-    output:
-        txt=c.patterns['fastq_screen']
-    log:
-        c.patterns['fastq_screen'] + '.log'
-    threads: 6
-    resources:
-        mem_mb=gb(4),
-        runtime=autobump(hours=2)
-    params: subset=100000
-    script:
-        wrapper_for('fastq_screen/wrapper.py')
+        "samtools index {input} {output}"
 
 
-rule featurecounts:
-    """
-    Count reads in annotations with featureCounts from the subread package
-    """
+rule markduplicates:
     input:
-        annotation=c.refdict[c.organism][config['gtf']['tag']]['annotation'],
-        bam=c.targets['markduplicates']['bam']
+        bam="data/rnaseq_samples/{sample}/{sample}.cutadapt.bam",
     output:
-        counts='{sample_dir}/rnaseq_aggregation/featurecounts.txt'
+        bam="data/rnaseq_samples/{sample}/{sample}.cutadapt.markdups.bam",
+        metrics="data/rnaseq_samples/{sample}/{sample}.cutadapt.markdups.bam.metrics",
     log:
-        '{sample_dir}/rnaseq_aggregation/featurecounts.txt.log'
-    threads: 8
+        "data/rnaseq_samples/{sample}/{sample}.cutadapt.markdups.bam.log",
+    threads: 1
     resources:
-        mem_mb=gb(16),
-        runtime=autobump(hours=2)
-    run:
-        # NOTE: By default, we use -p for paired-end
-        p_arg = ''
-        if c.is_paired:
-            p_arg = '-p --countReadPairs '
-
-        strand_arg = helpers.strand_arg_lookup(
-            c, {
-                'unstranded': '-s0 ',
-                'fr-firststrand': '-s2 ',
-                'fr-secondstrand': '-s1 ',
-            }
-        )
-
-        shell(
-            'featureCounts '
-            '{strand_arg} '
-            '{p_arg} '
-            '-T {threads} '
-            '-a {input.annotation} '
-            '-o {output.counts} '
-            '{input.bam} '
-            '&> {log}'
-        )
+        mem="32g",
+        runtime="2h",
+        disk="100g",
+    params:
+        # java_args='-Xmx2g'  # [enable for test]
+        java_args="-Xmx20g",  # [disable for test]
+    shell:
+        "picard "
+        "{params.java_args} "
+        "MarkDuplicates "
+        "INPUT={input.bam} "
+        "OUTPUT={output.bam} "
+        "METRICS_FILE={output.metrics} "
+        "VALIDATION_STRINGENCY=LENIENT "
+        "&> {log}"
 
 
-rule rrna_libsizes_table:
-    """
-    Aggregate rRNA counts into a table
-    """
+rule namesorted_bam:
     input:
-        rrna=c.targets['rrna']['libsize'],
-        fastq=c.targets['libsizes']['cutadapt']
+        bam=rules.markduplicates.output.bam,
     output:
-        json=c.patterns['rrna_percentages_yaml'],
-        tsv=c.patterns['rrna_percentages_table']
+        temporary(
+            "data/rnaseq_samples/{sample}/{sample}.cutadapt.markdups.namesort.bam"
+        ),
     threads: 1
     resources:
-        mem_mb=gb(2),
-        runtime=autobump(hours=2)
-    run:
-        def rrna_sample(f):
-            return helpers.extract_wildcards(c.patterns['rrna']['libsize'], f)['sample']
-
-        def sample(f):
-            return helpers.extract_wildcards(c.patterns['libsizes']['cutadapt'], f)['sample']
-
-        def million(f):
-            return float(open(f).read()) / 1e6
-
-        rrna = sorted(input.rrna, key=rrna_sample)
-        fastq = sorted(input.fastq, key=sample)
-        samples = list(map(rrna_sample, rrna))
-        rrna_m = list(map(million, rrna))
-        fastq_m = list(map(million, fastq))
-
-        df = pd.DataFrame(dict(
-            sample=samples,
-            million_reads_rRNA=rrna_m,
-            million_reads_fastq=fastq_m,
-        ))
-        df = df.set_index('sample')
-        df['rRNA_percentage'] = df.million_reads_rRNA / df.million_reads_fastq * 100
-
-        df[['million_reads_fastq', 'million_reads_rRNA', 'rRNA_percentage']].to_csv(output.tsv, sep='\t')
-        y = {
-            'id': 'rrna_percentages_table',
-            'section_name': 'rRNA content',
-            'description': 'Amount of reads mapping to rRNA sequence',
-            'plot_type': 'table',
-            'pconfig': {
-                'id': 'rrna_percentages_table_table',
-                'title': 'rRNA content table',
-                'min': 0
-            },
-            'data': yaml.load(df.transpose().to_json(), Loader=yaml.FullLoader),
-        }
-        with open(output.json, 'w') as fout:
-            yaml.dump(y, fout, default_flow_style=False)
+        mem="16g",
+        runtime="2h",
+    shell:
+        "samtools sort -n -o {output} {input}"
 
 
-rule multiqc:
-    """
-    Aggregate various QC stats and logs into a single HTML report with MultiQC
-    """
-    # NOTE: if you add more rules and want MultiQC to pick up the output, then
-    # add outputs from those rules to the inputs here.
+rule featurecounts:
     input:
-        files=(
-            utils.flatten(c.targets['fastqc']) +
-            utils.flatten(c.targets['rrna_percentages_yaml']) +
-            utils.flatten(c.targets['cutadapt']) +
-            utils.flatten(c.targets['featurecounts']) +
-            utils.flatten(c.targets['markduplicates']) +
-            utils.flatten(c.targets['salmon']) +
-            utils.flatten(c.targets['rseqc']) +
-            utils.flatten(c.targets['fastq_screen']) +
-            utils.flatten(c.targets['preseq']) +
-            utils.flatten(c.targets['collectrnaseqmetrics']) +
-            utils.flatten(c.targets['samtools'])
+        annotation=f"{REFERENCES}/annotation.gtf.gz",
+        bam=expand(
+            (
+                rules.namesorted_bam.output
+                if is_paired
+                else rules.markduplicates.output.bam
+            ),
+            sample=SAMPLES,
         ),
-        config='config/multiqc_config.yaml'
-    output: c.targets['multiqc']
-    log: c.targets['multiqc'][0] + '.log'
-    threads: 1
+    output:
+        "data/rnaseq_aggregation/featurecounts.txt",
+    log:
+        "data/rnaseq_aggregation/featurecounts.txt.log",
+    threads: 8
     resources:
-        mem_mb=gb(2),
-        runtime=autobump(hours=2)
+        mem="16g",
+        runtime="2h",
+    params:
+        strand_arg={
+            "unstranded": "-s0 ",
+            "fr-firststrand": "-s2 ",
+            "fr-secondstrand": "-s1 ",
+        }[config["stranded"]],
     run:
-        analysis_directory = set([os.path.dirname(i) for i in input])
-        outdir = os.path.dirname(c.targets['multiqc'][0])
-        basename = os.path.basename(c.targets['multiqc'][0])
+        p_arg = "-p --countReadPairs " if is_paired else ""
         shell(
-            'LC_ALL=en_US.utf8 LC_LANG=en_US.utf8 '
-            'multiqc '
-            '--quiet '
-            '--outdir {outdir} '
-            '--force '
-            '--filename {basename} '
-            '--config {input.config} '
-            '{analysis_directory} '
-            '&> {log} '
+            "featureCounts "
+            "{params.strand_arg} "
+            "{p_arg} "
+            "-T {threads} "
+            "-a {input.annotation} "
+            "-o {output} "
+            "{input.bam} "
+            "&> {log}"
         )
 
 
-rule markduplicates:
-    """
-    Mark or remove PCR duplicates with Picard MarkDuplicates
-    """
+rule rrna_libsizes_table:
     input:
-        bam=c.patterns['bam']
+        rrna=expand(
+            "data/rnaseq_samples/{sample}/rRNA/{sample}.cutadapt.rrna.bam.libsize",
+            sample=SAMPLES,
+        ),
+        fastq=expand(
+            "data/rnaseq_samples/{sample}/{sample}_R1.cutadapt.fastq.gz.libsize",
+            sample=SAMPLES,
+        ),
     output:
-        bam=c.patterns['markduplicates']['bam'],
-        metrics=c.patterns['markduplicates']['metrics']
-    log:
-        c.patterns['markduplicates']['bam'] + '.log'
-    params:
-        # NOTE: Be careful with the memory here; make sure you have enough
-        # and/or it matches the resources you're requesting in the cluster
-        # config.
-        java_args='-Xmx20g'
-        # java_args='-Xmx2g'  # [TEST SETTINGS -1]
+        tsv="data/rnaseq_aggregation/rrna_percentages_table.tsv",
+        json="data/rnaseq_aggregation/rrna_percentages_table_mqc.yaml",
     threads: 1
+    params:
+        rrna_pattern=lambda wc: "data/rnaseq_samples/{sample}/rRNA/{sample}.cutadapt.rrna.bam.libsize",
+        fastq_pattern=lambda wc: "data/rnaseq_samples/{sample}/{sample}_R1.cutadapt.fastq.gz.libsize",
     resources:
-        mem_mb=gb(32),
-        runtime=autobump(hours=2),
-        disk_mb=autobump(gb=100),
-    shell:
-        'picard '
-        '{params.java_args} '
-        'MarkDuplicates '
-        'INPUT={input.bam} '
-        'OUTPUT={output.bam} '
-        'METRICS_FILE={output.metrics} '
-        'VALIDATION_STRINGENCY=LENIENT '
-        '&> {log}'
+        mem="2g",
+        runtime="2h",
+    script:
+        "../../scripts/rrna_libsizes_table.py"
 
 
 rule collectrnaseqmetrics:
-    """
-    Calculate various RNA-seq QC metrics with Picarc CollectRnaSeqMetrics
-    """
     input:
-        bam=c.patterns['markduplicates']['bam'],
-        refflat=c.refdict[c.organism][config['gtf']['tag']]['refflat']
+        bam=rules.markduplicates.output.bam,
+        refflat=rules.conversion_refflat.output,
     output:
-        metrics=c.patterns['collectrnaseqmetrics']['metrics'],
-    params:
-        # NOTE: Be careful with the memory here; make sure you have enough
-        # and/or it matches the resources you're requesting in the cluster
-        # config.
-        java_args='-Xmx20g'
-        # java_args='-Xmx2g'  # [TEST SETTINGS -1]
+        metrics="data/rnaseq_samples/{sample}/{sample}.collectrnaseqmetrics.metrics",
     log:
-        c.patterns['collectrnaseqmetrics']['metrics'] + '.log'
+        "data/rnaseq_samples/{sample}/{sample}.collectrnaseqmetrics.metrics.log",
     threads: 1
     resources:
-        mem_mb=gb(32),
-        runtime=autobump(hours=2)
+        mem="32g",
+        runtime="2h",
+    params:
+        # java_args='-Xmx2g', # [enable for test]
+        java_args="-Xmx20g",  # [disable for test]
+        strand_arg={
+            "unstranded": "STRAND=NONE ",
+            "fr-firststrand": "STRAND=SECOND_READ_TRANSCRIPTION_STRAND ",
+            "fr-secondstrand": "STRAND=FIRST_READ_TRANSCRIPTION_STRAND ",
+        }[config["stranded"]],
     run:
-        strand_arg = helpers.strand_arg_lookup(
-            c, {
-                'unstranded': 'STRAND=NONE ',
-                'fr-firststrand': 'STRAND=SECOND_READ_TRANSCRIPTION_STRAND ',
-                'fr-secondstrand': 'STRAND=FIRST_READ_TRANSCRIPTION_STRAND ',
-            }
-        )
         shell(
-            'picard '
-            '{params.java_args} '
-            'CollectRnaSeqMetrics '
-            '{strand_arg} '
-            'VALIDATION_STRINGENCY=LENIENT '
-            'REF_FLAT={input.refflat} '
-            'INPUT={input.bam} '
-            'OUTPUT={output.metrics} '
-            '&> {log}'
+            "picard "
+            "{params.java_args} "
+            "CollectRnaSeqMetrics "
+            "{params.strand_arg} "
+            "VALIDATION_STRINGENCY=LENIENT "
+            "REF_FLAT={input.refflat} "
+            "INPUT={input.bam} "
+            "OUTPUT={output.metrics} "
+            "&> {log}"
         )
 
 
 rule preseq:
-    """
-    Compute a library complexity curve with preseq
-    """
     input:
-        bam=c.patterns['bam']
+        bam="data/rnaseq_samples/{sample}/{sample}.cutadapt.markdups.bam",
     output:
-        c.patterns['preseq']
+        "data/rnaseq_samples/{sample}/{sample}_preseq_c_curve.txt",
+    log:
+        "data/rnaseq_samples/{sample}/{sample}_preseq_c_curve.txt.log",
     threads: 1
     resources:
-        mem_mb=gb(1),
-        runtime=autobump(hours=2)
+        mem="1g",
+        runtime="2h",
     shell:
-        'preseq '
-        'c_curve '
-        '-B {input} '
-        '-o {output} '
+        "preseq "
+        "c_curve "
+        "-B {input} "
+        "-o {output} "
+        "&> {log}"
 
 
 rule salmon:
-    """
-    Quantify reads coming from transcripts with Salmon
-    """
     input:
-        fastq=common.fill_r1_r2(c.sampletable, c.patterns['cutadapt']),
-        index=c.refdict[c.organism][config['salmon']['tag']]['salmon'],
+        fastq=rules.cutadapt.output,
+        index=f"{REFERENCES}/salmon/versionInfo.json",
     output:
-        c.patterns['salmon']
-    params:
-        index_dir=os.path.dirname(c.refdict[c.organism][config['salmon']['tag']]['salmon']),
-        outdir=os.path.dirname(c.patterns['salmon'])
+        "data/rnaseq_samples/{sample}/{sample}.salmon/quant.sf",
     log:
-        c.patterns['salmon'] + '.log'
+        "data/rnaseq_samples/{sample}/{sample}.salmon/quant.sf.log",
     threads: 6
     resources:
-        mem_mb=gb(32),
-        runtime=autobump(hours=2)
+        mem="32g",
+        runtime="2h",
     run:
-        if c.is_paired:
-            fastq_arg = f'-1 {input.fastq[0]} -2 {input.fastq[1]} '
+        outdir = os.path.dirname(output[0])
+        index_dir = os.path.dirname(input.index)
+        if is_paired:
+            fastq_arg = f"-1 {input.fastq[0]} -2 {input.fastq[1]} "
         else:
-            fastq_arg = f'-r {input.fastq} '
+            fastq_arg = f"-r {input.fastq} "
         shell(
-            'salmon quant '
-            '--index {params.index_dir} '
-            '--output {params.outdir} '
-            '--threads {threads} '
-
-            # NOTE: --libType=A auto-detects library type. Change if needed.
-            '--libType=A '
-
-            # NOTE: Docs suggest using --gcBias, --validateMappings, and
-            # --seqBias is a good idea
-            '--gcBias '
-            '--seqBias '
-            '--validateMappings '
-            '{fastq_arg} '
-            '&> {log}'
+            "salmon quant "
+            "--index {index_dir} "
+            "--output {outdir} "
+            "--threads {threads} "
+            "--libType=A "
+            "--gcBias "
+            "--seqBias "
+            "--validateMappings "
+            "{fastq_arg} "
+            "&> {log}"
         )
 
 
-rule kallisto:
-    """
-    Quantify reads coming from transcripts with Kallisto
-    """
+rule rseqc_infer_experiment:
     input:
-        fastq=common.fill_r1_r2(c.sampletable, c.patterns['cutadapt']),
-        index=c.refdict[c.organism][config['kallisto']['tag']]['kallisto'],
+        bam=rules.markduplicates.output,
+        bed12=rules.conversion_bed12.output,
     output:
-        c.patterns['kallisto']
-    params:
-        index_dir=os.path.dirname(c.refdict[c.organism][config['kallisto']['tag']]['kallisto']),
-        outdir=os.path.dirname(c.patterns['kallisto'])
+        "data/rnaseq_samples/{sample}/rseqc/{sample}_infer_experiment.txt",
     log:
-        c.patterns['kallisto'] + '.log'
-    threads:
-        8
+        "data/rnaseq_samples/{sample}/rseqc/{sample}_infer_experiment.txt.log",
     resources:
-        mem_mb=gb(32),
-        runtime=autobump(hours=2),
-    run:
-        if c.is_paired:
-            se_args = ''
-            assert len(input.fastq) == 2
-        else:
-            # For single-end, add the experimentally-determined fragment length
-            # and standard deviation here
-            se_args = '--single --fragment-length 300 --sd 20 '
-            assert len(input.fastq) == 1
-
-        strand_arg = helpers.strand_arg_lookup(
-            c, {
-                'unstranded': '',
-                'fr-firststrand': '--rf-stranded',
-                'fr-secondstrand': '--fr-stranded',
-            }
-        )
+        mem="2g",
+        runtime="2h",
+    shell:
+        "infer_experiment.py -r {input.bed12} -i {input.bam} > {output} &> {log}"
 
-        shell(
-            'kallisto quant '
-            '--index {input.index} '
-            '--output-dir {params.outdir} '
-            '--threads {threads} '
-            '--bootstrap-samples 100 '
-            '--bias '
-            '--threads {threads} '
-            '{se_args} '
-            '{strand_arg} '
-            '{input.fastq} '
-            '&> {log}'
-        )
 
-rule rseqc_infer_experiment:
-    """
-    Infer strandedness of experiment
-    """
+rule rseqc_read_distribution:
     input:
-        bam=c.patterns['markduplicates']['bam'],
-        bed12=c.refdict[c.organism][config['gtf']['tag']]['bed12']
+        bam=rules.markduplicates.output,
+        bed12=rules.conversion_bed12.output,
     output:
-        txt=c.patterns['rseqc']['infer_experiment']
+        "data/rnaseq_samples/{sample}/rseqc/{sample}_read_distribution.txt",
     log:
-        c.patterns['rseqc']['infer_experiment'] + '.log'
+        "data/rnaseq_samples/{sample}/rseqc/{sample}_read_distribution.txt.log",
     resources:
-        mem_mb=gb(2),
-        runtime=autobump(hours=2)
-
+        mem="2g",
+        runtime="2h",
     shell:
-        'infer_experiment.py -r {input.bed12} -i {input.bam} > {output} &> {log}'
+        "read_distribution.py -i {input.bam} -r {input.bed12} > {output} &> {log}"
 
-rule rseqc_read_distribution:
-    """
-    read distribution plots
-    """
+
+rule samtools_idxstats:
     input:
-        bam=c.patterns['markduplicates']['bam'],
-        bed12=c.refdict[c.organism][config['gtf']['tag']]['bed12'],
+        bam=rules.markduplicates.output.bam,
+        bai=rules.markduplicates.output.bam + ".bai",
     output:
-        txt=c.patterns['rseqc']['read_distribution']
+        "data/rnaseq_samples/{sample}/idxstat_{sample}.txt",
     log:
-        c.patterns['rseqc']['read_distribution'] + '.log'
+        "data/rnaseq_samples/{sample}/idxstat_{sample}.txt.log",
     resources:
-        mem_mb=gb(2),
-        runtime=autobump(hours=2)
+        mem="16g",
+        runtime="2h",
     shell:
-        'read_distribution.py -i {input.bam} -r {input.bed12} > {output} &> {log}'
+        "samtools idxstats {input.bam} 2> {log} 1> {output}"
 
 
-rule idxstats:
-    """
-    Run samtools idxstats on sample bams
-    """
+rule samtools_flagstat:
     input:
-        bam=c.patterns['markduplicates']['bam'],
-        bai=c.patterns['markduplicates']['bam'] + '.bai'
+        bam=rules.markduplicates.output.bam,
+        bai=rules.markduplicates.output.bam + ".bai",
     output:
-        txt=c.patterns['samtools']['idxstats']
-    log: 
-        c.patterns['samtools']['idxstats'] + '.log'
+        "data/rnaseq_samples/{sample}/{sample}.cutadapt.markdups.bam.flagstat",
+    log:
+        "data/rnaseq_samples/{sample}/{sample}.cutadapt.markdups.bam.flagstat.log",
     resources:
-        mem_mb=gb(16),
-        runtime=autobump(hours=2)
-    run:
-        shell(
-            'samtools idxstats {input.bam} 2> {log} 1> {output.txt}'
-        )
+        mem="8g",
+        runtime="2h",
+    shell:
+        "samtools flagstat {input.bam} > {output}"
+
 
+rule samtools_stats:
+    input:
+        bam=rules.markduplicates.output.bam,
+        bai=rules.markduplicates.output.bam + ".bai",
+    output:
+        "data/rnaseq_samples/{sample}/{sample}.cutadapt.markdups.bam.stats",
+    log:
+        "data/rnaseq_samples/{sample}/{sample}.cutadapt.markdups.bam.stats.log",
+    resources:
+        mem="8g",
+        runtime="2h",
+    shell:
+        "samtools stats {input.bam} > {output}"
 
-# Common arguments used for bamCoverage rules below
-BAMCOVERAGE_ARGS = (
-    '--minMappingQuality 20 '  # excludes multimappers
-    '--smoothLength 10 '       # smooth signal with specified window
-    # '--normalizeUsing BPM '    # equivalent to TPM # [TEST SETTINGS]
-)
 
 rule bigwig_neg:
-    """
-    Create a bigwig for negative-strand reads
-    """
     input:
-        bam=c.patterns['markduplicates']['bam'],
-        bai=c.patterns['markduplicates']['bam'] + '.bai',
-    output: c.patterns['bigwig']['neg']
+        bam=rules.markduplicates.output.bam,
+        bai=rules.markduplicates.output.bam + ".bai",
+    output:
+        "data/rnaseq_samples/{sample}/{sample}.cutadapt.bam.neg.bigwig",
     threads: 8
     resources:
-        mem_mb=gb(16),
-        runtime=autobump(hours=2)
+        mem="16g",
+        runtime="2h",
     log:
-        c.patterns['bigwig']['neg'] + '.log'
+        "data/rnaseq_samples/{sample}/{sample}.cutadapt.bam.neg.bigwig.log",
+    params:
+        strand_arg={
+            "unstranded": "",
+            "fr-firststrand": "--filterRNAstrand reverse ",
+            "fr-secondstrand": "--filterRNAstrand forward ",
+        }[config["stranded"]],
     run:
-        strand_arg = helpers.strand_arg_lookup(
-            c, {
-                'unstranded': '',
-                'fr-firststrand': '--filterRNAstrand reverse ',
-                'fr-secondstrand': '--filterRNAstrand forward ',
-            }
-        )
         shell(
-            'bamCoverage '
-            '--bam {input.bam} '
-            '-o {output} '
-            '-p {threads} '
-            '{BAMCOVERAGE_ARGS} '
-            '{strand_arg} '
-            '&> {log}'
+            "bamCoverage "
+            "--bam {input.bam} "
+            "-o {output} "
+            "-p {threads} "
+            "{params.strand_arg} "
+            "--minMappingQuality 20 "
+            "--smoothLength 10 "
+            "--normalizeUsing BPM "  # [disable for test]
+            "&> {log}"
         )
 
 
 rule bigwig_pos:
-    """
-    Create a bigwig for postive-strand reads.
-    """
     input:
-        bam=c.patterns['markduplicates']['bam'],
-        bai=c.patterns['markduplicates']['bam'] + '.bai',
-    output: c.patterns['bigwig']['pos']
+        bam=rules.markduplicates.output.bam,
+        bai=rules.markduplicates.output.bam + ".bai",
+    output:
+        "data/rnaseq_samples/{sample}/{sample}.cutadapt.bam.pos.bigwig",
     threads: 8
     resources:
-        mem_mb=gb(16),
-        runtime=autobump(hours=2)
+        mem="16g",
+        runtime="2h",
     log:
-        c.patterns['bigwig']['pos'] + '.log'
-
+        "data/rnaseq_samples/{sample}/{sample}.cutadapt.bam.pos.bigwig.log",
+    params:
+        strand_arg={
+            "unstranded": "",
+            "fr-firststrand": "--filterRNAstrand forward ",
+            "fr-secondstrand": "--filterRNAstrand reverse ",
+        }[config["stranded"]],
     run:
-        strand_arg = helpers.strand_arg_lookup(
-            c, {
-                'unstranded': '',
-                'fr-firststrand': '--filterRNAstrand forward ',
-                'fr-secondstrand': '--filterRNAstrand reverse ',
-            }
-        )
         shell(
-            'bamCoverage '
-            '--bam {input.bam} '
-            '-o {output} '
-            '-p {threads} '
-            '{BAMCOVERAGE_ARGS} '
-            '{strand_arg} '
-            '&> {log}'
+            "bamCoverage "
+            "--bam {input.bam} "
+            "-o {output} "
+            "-p {threads} "
+            "--minMappingQuality 20 "
+            "--smoothLength 10 "
+            "--normalizeUsing BPM "  # [disable for test]
+            "{params.strand_arg} "
+            "&> {log}"
         )
 
 
-def bigwigs_to_merge(wc):
-    chunk = config['merged_bigwigs'][wc.merged_bigwig_label]
-    neg_labels = chunk.get('neg', [])
-    pos_labels = chunk.get('pos', [])
-    pos_bigwigs = expand(
-        c.patterns['bigwig']['pos'],
-        sample=pos_labels
-    )
-    neg_bigwigs = expand(
-        c.patterns['bigwig']['neg'],
-        sample=neg_labels)
-    return pos_bigwigs + neg_bigwigs
-
-if 'merged_bigwigs' in config:
-    rule merge_bigwigs:
-        """
-        Merge together bigWigs as specified in the config ("merged_bigwigs"
-        section).
-        """
-        input:
-            bigwigs=bigwigs_to_merge,
-            chromsizes=refdict[c.organism][config['aligner']['tag']]['chromsizes'],
-        output:
-            c.patterns['merged_bigwig']
-        log:
-            c.patterns['merged_bigwig'] + '.log'
-        resources:
-            mem_mb=gb(16),
-            runtime=autobump(hours=2)
-        script:
-            wrapper_for('average-bigwigs/wrapper.py')
-
-
-rule rnaseq_rmarkdown:
-    """
-    Run and render the RMarkdown file that performs differential expression
-    """
-    input:
-        featurecounts=utils.flatten(c.targets['featurecounts']),
-        salmon=utils.flatten(c.targets['salmon']),
-
-        # NOTE: the Rmd will likely need heavy editing depending on the project.
-        rmd='downstream/rnaseq.Rmd',
-        sampletable=config['sampletable']
-    output:
-        'downstream/rnaseq.html'
-    log:
-        'downstream/rnaseq.log'
-    shell:
-        'Rscript -e '
-        '''"rmarkdown::render('{input.rmd}')" '''
-        '> {log} 2>&1'
-        # [TEST_SETTINGS -1]
-
-rule flagstat:
+rule multiqc:
     input:
-        bam=c.patterns['markduplicates']['bam'],
-        bai=c.patterns['markduplicates']['bam'] + '.bai'
+        files=(
+            expand(
+                rules.fastqc.output.zip,
+                sample=SAMPLES,
+                suffix=utils.flatten(
+                    [
+                        expand(i, n=n)
+                        for i in [
+                            "_R{n}.fastq.gz",
+                            "_R{n}.cutadapt.fastq.gz",
+                            ".cutadapt.bam",
+                        ]
+                    ]
+                ),
+            ),
+            expand(rules.markduplicates.output, sample=SAMPLES),
+            expand(rules.salmon.output, sample=SAMPLES),
+            expand(rules.preseq.output, sample=SAMPLES),
+            expand(rules.collectrnaseqmetrics.output, sample=SAMPLES),
+            expand(rules.samtools_stats.output, sample=SAMPLES),
+            expand(rules.samtools_flagstat.output, sample=SAMPLES),
+            expand(rules.samtools_idxstats.output, sample=SAMPLES),
+            expand(rules.rseqc_infer_experiment.output, sample=SAMPLES),
+            expand(rules.rseqc_read_distribution.output, sample=SAMPLES),
+            expand(rules.bigwig_pos.output, sample=SAMPLES),
+            expand(rules.bigwig_neg.output, sample=SAMPLES),
+            rules.rrna_libsizes_table.output,
+            rules.featurecounts.output,
+        ),
+        config="config/multiqc_config.yaml",
     output:
-        c.patterns['samtools']['flagstat']
+        "data/rnaseq_aggregation/multiqc.html",
     log:
-        c.patterns['samtools']['flagstat'] + '.log'
-    shell:
-        'samtools flagstat {input.bam} > {output}'
-
+        "data/rnaseq_aggregation/multiqc.log",
+    threads: 1
+    resources:
+        mem="2g",
+        runtime="2h",
+        disk="10g",
+    run:
+        analysis_directory = set([os.path.dirname(i) for i in input])
+        outdir = os.path.dirname(output[0])
+        basename = os.path.basename(output[0])
+        shell(
+            "LC_ALL=en_US.utf8 LC_LANG=en_US.utf8 "
+            "multiqc "
+            "--quiet "
+            "--outdir {outdir} "
+            "--force "
+            "--filename {basename} "
+            "--config {input.config} "
+            "{analysis_directory} "
+            "&> {log} "
+        )
 
-rule samtools_stats:
-    input:
-        bam=c.patterns['markduplicates']['bam'],
-        bai=c.patterns['markduplicates']['bam'] + '.bai'
-    output:
-        c.patterns['samtools']['stats']
-    log:
-        c.patterns['samtools']['stats'] + '.log'
-    shell:
-        'samtools stats {input.bam} > {output}'
 
+if utils.detect_sra(sampletable):
+    sampletable["orig_filename"] = expand(
+        "original_data/sra_samples/{sample}/{sample}_R{n}.fastq.gz", sample=SAMPLES, n=1
+    )
 
+    if is_paired:
+        sampletable["orig_filename_R2"] = expand(
+            "original_data/sra_samples/{sample}/{sample}_R{n}.fastq.gz",
+            sample=SAMPLES,
+            n=2,
+        )
 
-# vim: ft=python
+    rule fastq_dump:
+        output:
+            fastq=expand(
+                "original_data/sra_samples/{sample}/{sample}_R{n}.fastq.gz",
+                n=n,
+                allow_missing=True,
+            ),
+        log:
+            "original_data/sra_samples/{sample}/{sample}.fastq.gz.log",
+        params:
+            # extra="-X 100000",  # [enable for test]
+            is_paired=is_paired,
+        resources:
+            mem="1g",
+            disk="1g",
+            runtime="2h",
+        run:
+            srr = sampletable.loc[wildcards.sample, "Run"]
+            extra = params.get("extra", "")
+            if is_paired:
+                shell("fastq-dump {srr} --gzip --split-files {extra} &> {log}")
+                shell("mv {srr}_1.fastq.gz {output[0]}")
+                shell("mv {srr}_2.fastq.gz {output[1]}")
+            else:
+                shell(
+                    "fastq-dump {srr} -Z {extra} 2> {log} | gzip -c > {output[0]}.tmp"
+                )
+                shell("mv {output[0]}.tmp {output[0]}")
diff --git a/workflows/rnaseq/config/config.yaml b/workflows/rnaseq/config/config.yaml
index 7b0db18da..2c34c6d9a 100644
--- a/workflows/rnaseq/config/config.yaml
+++ b/workflows/rnaseq/config/config.yaml
@@ -1,59 +1,6 @@
 sampletable: 'config/sampletable.tsv'
 
-patterns: 'config/rnaseq_patterns.yaml'
-
-# Which key in the `references` dict below to use
-organism: 'dmel'
-
-# If not specified here, use the environment variable REFERENCES_DIR.
-references_dir: 'references_data'
- 
 # See https://rnabio.org/module-09-appendix/0009/12/01/StrandSettings for more info.
 stranded: 'fr-firststrand'     # for dUTP libraries
 #         'fr-secondstrand'    # for ligation libraries
 #         'unstranded'         # for libraries without strand specificity
-
-aligner:
-  index: 'star'
-  tag: 'test'
-
-rrna:
-  index: 'bowtie2'
-  tag: 'rRNA'
-
-gtf:
-  tag: "test"
-
-salmon:
-  tag: "test"
-
-kallisto:
-  tag: "test"
-
-fastq_screen:
-  - label: rRNA
-    organism: dmel
-    tag: test
-  - label: Fly
-    organism: dmel
-    tag: test
-
-merged_bigwigs:
-  control_pos:
-    pos:
-      - sample1
-      - sample2
-  treatment_all:
-    pos:
-      - sample3
-      - sample4
-    neg:
-      - sample3
-      - sample4
-
-# See the reference config files in the top level of the repo,
-# include/reference_configs, for inspiration for more species.
-
-include_references:
-  - '../../include/reference_configs/test.yaml'
-  - '../../include/reference_configs/Drosophila_melanogaster.yaml'
diff --git a/workflows/rnaseq/config/multiqc_config.yaml b/workflows/rnaseq/config/multiqc_config.yaml
index 3e291495e..4b8c9f911 100644
--- a/workflows/rnaseq/config/multiqc_config.yaml
+++ b/workflows/rnaseq/config/multiqc_config.yaml
@@ -16,15 +16,6 @@ extra_fn_clean_exts:
   - '_R2'
 
 
-# Modify the module search patterns to match what we're creating in the
-# workflow.
-#
-# See http://multiqc.info/docs/#module-search-patterns
-sp:
-  fastq_screen:
-    fn: '*.screen.txt'
-
-
 # Ignore the rRNA files, which were just cluttering the tables. We're
 # independently calculating the rRNA libsizes anyway, so there's no need for
 # them to be included.
@@ -33,10 +24,6 @@ sp:
 fn_ignore_files:
   - '*rrna.bam*'
 
-  # ignore log files from the first pass of STAR 2-pass alignment
-  - '*pass1*'
-
-
 # Set the module order to reflect the order of the workflow. Note that here
 # we're also running the FastQC module multiple times, and putting them in
 # their logical locations within the order of the workflow.
@@ -48,19 +35,35 @@ fn_ignore_files:
 # See http://multiqc.info/docs/#order-of-modules
 module_order:
     - fastqc:
-        name: 'FastQC (raw)'
+        name: 'FastQC (raw R1)'
         path_filters_exclude:
-            - '*.cutadapt.fastq.gz_fastqc.zip'
+            - '*_R2*'
+            - '*_R1.cutadapt.fastq.gz_fastqc.zip'
         path_filters:
-            - '*.fastq.gz_fastqc.zip'
-    - libsizes_table
+            - '*_R1.fastq.gz_fastqc.zip'
+    - fastqc:
+        name: 'FastQC (raw R2)'
+        path_filters_exclude:
+            - '*_R1*'
+            - '*_R2.cutadapt.fastq.gz_fastqc.zip'
+        path_filters:
+            - '*_R2.fastq.gz_fastqc.zip'
     - rrna_percentages_table
     - cutadapt
     - fastqc:
-        name: 'FastQC (trimmed)'
+        name: 'FastQC (trimmed R1)'
         target: ''
+        path_filters_exclude:
+            - '*_R2*'
         path_filters:
-            - '*.cutadapt.fastq.gz_fastqc.zip'
+            - '*_R1.cutadapt.fastq.gz_fastqc.zip'
+    - fastqc:
+        name: 'FastQC (trimmed R2)'
+        target: ''
+        path_filters_exclude:
+            - '*_R1*'
+        path_filters:
+            - '*_R2.cutadapt.fastq.gz_fastqc.zip'
     - fastq_screen
     - bowtie2
     - fastqc:
@@ -85,16 +88,46 @@ remove_sections:
 # of the different stages on the stats.
 #
 # See http://multiqc.info/docs/#customising-tables
+table_columns_name:
+  FastQC (raw R1):
+    total_sequences: "Seq (raw R1)"
+    percent_duplicates: "Dups (raw R1)"
+    percent_gc: "GC (raw R1)"
+  FastQC (raw R2):
+    total_sequences: "Seq (raw R2)"
+    percent_duplicates: "Dups (raw R2)"
+    percent_gc: "GC (raw R2)"
+  FastQC (trimmed R1):
+    total_sequences: "Seq (trim R1)"
+    percent_duplicates: "Dups (trim R1)"
+    percent_gc: "GC (trim R1)"
+  FastQC (trimmed R2):
+    total_sequences: "Seq (trim R2)"
+    percent_duplicates: "Dups (trim R2)"
+    percent_gc: "GC (trim R2)"
+  FastQC (aligned):
+    total_sequences: "Seq (aligned)"
+    percent_duplicates: "Dups (aligned)"
+    percent_gc: "GC (aligned)"
+
 table_columns_placement:
-  FastQC (raw):
+  FastQC (raw R1):
     total_sequences: 20
     percent_duplicates: 30
     percent_gc: 40
-  FastQC (trimmed):
+  FastQC (raw R2):
     total_sequences: 21
     percent_duplicates: 31
     percent_gc: 41
-  FastQC (aligned):
+  FastQC (trimmed R1):
     total_sequences: 22
     percent_duplicates: 32
     percent_gc: 42
+  FastQC (trimmed R2):
+    total_sequences: 23
+    percent_duplicates: 33
+    percent_gc: 43
+  FastQC (aligned):
+    total_sequences: 24
+    percent_duplicates: 34
+    percent_gc: 44
diff --git a/workflows/rnaseq/config/rnaseq_patterns.yaml b/workflows/rnaseq/config/rnaseq_patterns.yaml
deleted file mode 100644
index 5379d0dcb..000000000
--- a/workflows/rnaseq/config/rnaseq_patterns.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-strand_check:
-  fastq: 'strand_check/{sample}/{sample}_R{n}.strandedness.fastq'
-  bam: 'strand_check/{sample}/{sample}.strandedness.bam'
-  tsv: 'strand_check/{sample}/{sample}.strandedness'
-fastq: 'data/rnaseq_samples/{sample}/{sample}_R{n}.fastq.gz'
-cutadapt: 'data/rnaseq_samples/{sample}/{sample}_R{n}.cutadapt.fastq.gz'
-bam: 'data/rnaseq_samples/{sample}/{sample}.cutadapt.bam'
-fastqc:
-  raw: 'data/rnaseq_samples/{sample}/fastqc/{sample}_R1.fastq.gz_fastqc.zip'
-  cutadapt: 'data/rnaseq_samples/{sample}/fastqc/{sample}_R1.cutadapt.fastq.gz_fastqc.zip'
-  bam: 'data/rnaseq_samples/{sample}/fastqc/{sample}.cutadapt.bam_fastqc.zip'
-libsizes:
-  fastq:   'data/rnaseq_samples/{sample}/{sample}_R1.fastq.gz.libsize'
-  cutadapt: 'data/rnaseq_samples/{sample}/{sample}_R1.cutadapt.fastq.gz.libsize'
-  bam:     'data/rnaseq_samples/{sample}/{sample}.cutadapt.bam.libsize'
-fastq_screen: 'data/rnaseq_samples/{sample}/{sample}.cutadapt.screen.txt'
-featurecounts: 'data/rnaseq_aggregation/featurecounts.txt'
-libsizes_table: 'data/rnaseq_aggregation/libsizes_table.tsv'
-libsizes_yaml: 'data/rnaseq_aggregation/libsizes_table_mqc.yaml'
-rrna_percentages_table: 'data/rnaseq_aggregation/rrna_percentages_table.tsv'
-rrna_percentages_yaml: 'data/rnaseq_aggregation/rrna_percentages_table_mqc.yaml'
-rrna:
-   bam: 'data/rnaseq_samples/{sample}/rRNA/{sample}.cutadapt.rrna.bam'
-   libsize: 'data/rnaseq_samples/{sample}/rRNA/{sample}.cutadapt.rrna.bam.libsize'
-multiqc: 'data/rnaseq_aggregation/multiqc.html'
-markduplicates:
-   bam: 'data/rnaseq_samples/{sample}/{sample}.cutadapt.markdups.bam'
-   bai: 'data/rnaseq_samples/{sample}/{sample}.cutadapt.markdups.bam.bai'
-   metrics: 'data/rnaseq_samples/{sample}/{sample}.cutadapt.markdups.bam.metrics'
-collectrnaseqmetrics:
-   metrics: 'data/rnaseq_samples/{sample}/{sample}.collectrnaseqmetrics.metrics'
-preseq: 'data/rnaseq_samples/{sample}/{sample}_preseq_c_curve.txt'
-salmon: 'data/rnaseq_samples/{sample}/{sample}.salmon/quant.sf'
-kallisto: 'data/rnaseq_samples/{sample}/{sample}.kallisto/abundance.h5'
-rseqc:
-   infer_experiment: 'data/rnaseq_samples/{sample}/rseqc/{sample}_infer_experiment.txt'
-   read_distribution: 'data/rnaseq_samples/{sample}/rseqc/{sample}_read_distribution.txt'
-bigwig:
-   pos: 'data/rnaseq_samples/{sample}/{sample}.cutadapt.bam.pos.bigwig'
-   neg: 'data/rnaseq_samples/{sample}/{sample}.cutadapt.bam.neg.bigwig'
-downstream:
-   rnaseq: 'downstream/rnaseq.html'
-patterns_by_aggregate:
-  merged_bigwig: 'data/rnaseq_aggregation/merged_bigwigs/{merged_bigwig_label}.bigwig'
-samtools:
-  idxstats: 'data/rnaseq_samples/{sample}/idxstat_{sample}.txt'
-  flagstat: 'data/rnaseq_samples/{sample}/{sample}.cutadapt.markdups.bam.flagstat'
-  stats: 'data/rnaseq_samples/{sample}/{sample}.cutadapt.markdups.bam.stats'
diff --git a/workflows/rnaseq/config/sra_sampletable.csv b/workflows/rnaseq/config/sra_sampletable.csv
new file mode 100644
index 000000000..1ecdc3cff
--- /dev/null
+++ b/workflows/rnaseq/config/sra_sampletable.csv
@@ -0,0 +1,20 @@
+Run,Assay Type,AvgSpotLen,Bases,BioProject,BioSample,Bytes,Center Name,Consent,DATASTORE filetype,DATASTORE provider,DATASTORE region,Experiment,GEO_Accession (exp),Instrument,LibraryLayout,LibrarySelection,LibrarySource,Organism,Platform,ReleaseDate,create_date,version,Sample Name,source_name,SRA Study,treatment,cell_type,Developmental_stage,cell_line
+SRR5182696,RNA-Seq,50,720229800,PRJNA362227,SAMN06236711,456024643,GEO,public,"fastq,run.zq,sra","s3,gs,ncbi","gs.us-east1,ncbi.public,s3.us-east-1",SRX2498797,GSM2461336,Illumina HiSeq 2500,SINGLE,cDNA,TRANSCRIPTOMIC,Drosophila melanogaster,ILLUMINA,2017-11-22T00:00:00Z,2017-01-17T18:11:00Z,1,GSM2461336,"sorted live neurons\, P14 pupal\, dicer-gfp_shep-rnai",SRP096911,dicer-gfp_shep-rnai,neurons,P14 pupal,
+SRR5182697,RNA-Seq,50,651467650,PRJNA362227,SAMN06236734,413453724,GEO,public,"fastq,run.zq,sra","gs,ncbi,s3","s3.us-east-1,ncbi.public,gs.us-east1",SRX2498798,GSM2461337,Illumina HiSeq 2500,SINGLE,cDNA,TRANSCRIPTOMIC,Drosophila melanogaster,ILLUMINA,2017-11-22T00:00:00Z,2017-01-17T18:11:00Z,1,GSM2461337,"sorted live neurons\, P14 pupal\, dicer-gfp_shep-rnai",SRP096911,dicer-gfp_shep-rnai,neurons,P14 pupal,
+SRR5182698,RNA-Seq,50,501312400,PRJNA362227,SAMN06236733,318819526,GEO,public,"run.zq,sra,fastq","gs,ncbi,s3","gs.us-east1,ncbi.public,s3.us-east-1",SRX2498799,GSM2461338,Illumina HiSeq 2500,SINGLE,cDNA,TRANSCRIPTOMIC,Drosophila melanogaster,ILLUMINA,2017-11-22T00:00:00Z,2017-01-17T18:10:00Z,1,GSM2461338,"sorted live neurons\, P14 pupal\, dicer-gfp_shep-rnai",SRP096911,dicer-gfp_shep-rnai,neurons,P14 pupal,
+SRR5182699,RNA-Seq,50,744291500,PRJNA362227,SAMN06236732,473503018,GEO,public,"fastq,sra,run.zq","ncbi,gs,s3","gs.us-east1,s3.us-east-1,ncbi.public",SRX2498800,GSM2461339,Illumina HiSeq 2500,SINGLE,cDNA,TRANSCRIPTOMIC,Drosophila melanogaster,ILLUMINA,2017-11-22T00:00:00Z,2017-01-17T18:11:00Z,1,GSM2461339,"sorted live neurons\, third instar larval\, dicer-gfp_shep-rnai",SRP096911,dicer-gfp_shep-rnai,neurons,third instar larval,
+SRR5182700,RNA-Seq,50,607856150,PRJNA362227,SAMN06236731,386029421,GEO,public,"run.zq,fastq,sra","ncbi,gs,s3","gs.us-east1,ncbi.public,s3.us-east-1",SRX2498801,GSM2461340,Illumina HiSeq 2500,SINGLE,cDNA,TRANSCRIPTOMIC,Drosophila melanogaster,ILLUMINA,2017-11-22T00:00:00Z,2017-01-17T18:12:00Z,1,GSM2461340,"sorted live neurons\, third instar larval\, dicer-gfp_shep-rnai",SRP096911,dicer-gfp_shep-rnai,neurons,third instar larval,
+SRR5182701,RNA-Seq,50,641763000,PRJNA362227,SAMN06236730,407428219,GEO,public,"run.zq,sra,fastq","ncbi,s3,gs","ncbi.public,gs.us-east1,s3.us-east-1",SRX2498802,GSM2461341,Illumina HiSeq 2500,SINGLE,cDNA,TRANSCRIPTOMIC,Drosophila melanogaster,ILLUMINA,2017-11-22T00:00:00Z,2017-01-17T18:11:00Z,1,GSM2461341,"sorted live neurons\, third instar larval\, dicer-gfp_shep-rnai",SRP096911,dicer-gfp_shep-rnai,neurons,third instar larval,
+SRR5182702,RNA-Seq,50,602992350,PRJNA362227,SAMN06236729,383110310,GEO,public,"fastq,sra,run.zq","gs,s3,ncbi","s3.us-east-1,ncbi.public,gs.us-east1",SRX2498803,GSM2461342,Illumina HiSeq 2500,SINGLE,cDNA,TRANSCRIPTOMIC,Drosophila melanogaster,ILLUMINA,2017-11-22T00:00:00Z,2017-01-17T18:11:00Z,1,GSM2461342,"sorted live neurons\, P14 pupal\, dicer-gfp_x",SRP096911,dicer-gfp_x,neurons,P14 pupal,
+SRR5182703,RNA-Seq,50,639787300,PRJNA362227,SAMN06236728,406192647,GEO,public,"fastq,run.zq,sra","gs,s3,ncbi","gs.us-east1,s3.us-east-1,ncbi.public",SRX2498804,GSM2461343,Illumina HiSeq 2500,SINGLE,cDNA,TRANSCRIPTOMIC,Drosophila melanogaster,ILLUMINA,2017-11-22T00:00:00Z,2017-01-17T18:11:00Z,1,GSM2461343,"sorted live neurons\, P14 pupal\, dicer-gfp_x",SRP096911,dicer-gfp_x,neurons,P14 pupal,
+SRR5182704,RNA-Seq,50,645383100,PRJNA362227,SAMN06236727,409821107,GEO,public,"fastq,sra,run.zq","s3,gs,ncbi","gs.us-east1,s3.us-east-1,ncbi.public",SRX2498805,GSM2461344,Illumina HiSeq 2500,SINGLE,cDNA,TRANSCRIPTOMIC,Drosophila melanogaster,ILLUMINA,2017-11-22T00:00:00Z,2017-01-17T18:13:00Z,1,GSM2461344,"sorted live neurons\, P14 pupal\, dicer-gfp_x",SRP096911,dicer-gfp_x,neurons,P14 pupal,
+SRR5182705,RNA-Seq,50,867006750,PRJNA362227,SAMN06236726,550448623,GEO,public,"sra,fastq,run.zq","ncbi,s3,gs","gs.us-east1,s3.us-east-1,ncbi.public",SRX2498806,GSM2461345,Illumina HiSeq 2500,SINGLE,cDNA,TRANSCRIPTOMIC,Drosophila melanogaster,ILLUMINA,2017-11-22T00:00:00Z,2017-01-17T18:12:00Z,1,GSM2461345,"sorted live neurons\, third instar larval\, dicer-gfp_x",SRP096911,dicer-gfp_x,neurons,third instar larval,
+SRR5182706,RNA-Seq,50,664061850,PRJNA362227,SAMN06236725,421272040,GEO,public,"sra,run.zq,fastq","s3,gs,ncbi","s3.us-east-1,gs.us-east1,ncbi.public",SRX2498807,GSM2461346,Illumina HiSeq 2500,SINGLE,cDNA,TRANSCRIPTOMIC,Drosophila melanogaster,ILLUMINA,2017-11-22T00:00:00Z,2017-01-17T18:12:00Z,1,GSM2461346,"sorted live neurons\, third instar larval\, dicer-gfp_x",SRP096911,dicer-gfp_x,neurons,third instar larval,
+SRR5182707,RNA-Seq,50,718867500,PRJNA362227,SAMN06236724,455538089,GEO,public,"fastq,sra,run.zq","s3,ncbi,gs","ncbi.public,gs.us-east1,s3.us-east-1",SRX2498808,GSM2461347,Illumina HiSeq 2500,SINGLE,cDNA,TRANSCRIPTOMIC,Drosophila melanogaster,ILLUMINA,2017-11-22T00:00:00Z,2017-01-17T18:12:00Z,1,GSM2461347,"sorted live neurons\, third instar larval\, dicer-gfp_x",SRP096911,dicer-gfp_x,neurons,third instar larval,
+SRR5182708,RNA-Seq,51,313585740,PRJNA362227,SAMN06236723,192621062,GEO,public,"sra,fastq,run.zq","ncbi,s3,gs","ncbi.public,gs.us-east1,s3.us-east-1",SRX2498809,GSM2461348,Illumina HiSeq 2500,SINGLE,cDNA,TRANSCRIPTOMIC,Drosophila melanogaster,ILLUMINA,2017-11-22T00:00:00Z,2017-01-17T18:11:00Z,1,GSM2461348,"cell culture from nervous system\, ds-GFP",SRP096911,dsRNA targeting GFP,,,BG3
+SRR5182709,RNA-Seq,51,354164145,PRJNA362227,SAMN06236722,217275323,GEO,public,"sra,run.zq,fastq","s3,gs,ncbi","ncbi.public,s3.us-east-1,gs.us-east1",SRX2498810,GSM2461349,Illumina HiSeq 2500,SINGLE,cDNA,TRANSCRIPTOMIC,Drosophila melanogaster,ILLUMINA,2017-11-22T00:00:00Z,2017-01-17T18:11:00Z,1,GSM2461349,"cell culture from nervous system\, ds-GFP",SRP096911,dsRNA targeting GFP,,,BG3
+SRR5182710,RNA-Seq,51,331996689,PRJNA362227,SAMN06236721,204104825,GEO,public,"run.zq,fastq,sra","gs,ncbi,s3","gs.us-east1,ncbi.public,s3.us-east-1",SRX2498811,GSM2461350,Illumina HiSeq 2500,SINGLE,cDNA,TRANSCRIPTOMIC,Drosophila melanogaster,ILLUMINA,2017-11-22T00:00:00Z,2017-01-17T18:11:00Z,1,GSM2461350,"cell culture from nervous system\, ds-GFP",SRP096911,dsRNA targeting GFP,,,BG3
+SRR5182711,RNA-Seq,51,484674828,PRJNA362227,SAMN06236720,298600165,GEO,public,"run.zq,fastq,sra","s3,ncbi,gs","s3.us-east-1,ncbi.public,gs.us-east1",SRX2498812,GSM2461351,Illumina HiSeq 2500,SINGLE,cDNA,TRANSCRIPTOMIC,Drosophila melanogaster,ILLUMINA,2017-11-22T00:00:00Z,2017-01-17T18:11:00Z,1,GSM2461351,"cell culture from nervous system\, ds-shep",SRP096911,dsRNA targeting shep,,,BG3
+SRR5182712,RNA-Seq,51,379084887,PRJNA362227,SAMN06236719,233433872,GEO,public,"run.zq,fastq,sra","s3,gs,ncbi","ncbi.public,gs.us-east1,s3.us-east-1",SRX2498813,GSM2461352,Illumina HiSeq 2500,SINGLE,cDNA,TRANSCRIPTOMIC,Drosophila melanogaster,ILLUMINA,2017-11-22T00:00:00Z,2017-01-17T18:11:00Z,1,GSM2461352,"cell culture from nervous system\, ds-shep",SRP096911,dsRNA targeting shep,,,BG3
+SRR5182713,RNA-Seq,51,410430405,PRJNA362227,SAMN06236718,252947684,GEO,public,"fastq,sra,run.zq","gs,s3,ncbi","s3.us-east-1,ncbi.public,gs.us-east1",SRX2498814,GSM2461353,Illumina HiSeq 2500,SINGLE,cDNA,TRANSCRIPTOMIC,Drosophila melanogaster,ILLUMINA,2017-11-22T00:00:00Z,2017-01-17T18:11:00Z,1,GSM2461353,"cell culture from nervous system\, ds-shep",SRP096911,dsRNA targeting shep,,,BG3
+
diff --git a/workflows/rnaseq/config/sra_sampletable.tsv b/workflows/rnaseq/config/sra_sampletable.tsv
deleted file mode 100644
index 3ed904c6d..000000000
--- a/workflows/rnaseq/config/sra_sampletable.tsv
+++ /dev/null
@@ -1,7 +0,0 @@
-samplename	AvgSpotLen	BioSample	Experiment	MBases	MBytes	Run	SRA_Sample	Sample_Name	developmental_stage	source_name	treatment	Assay_Type	BioProject	Center_Name	Consent	DATASTORE_filetype	DATASTORE_provider	InsertSize	Instrument	LibraryLayout	LibrarySelection	LibrarySource	LoadDate	Organism	Platform	ReleaseDate	SRA_Study	cell_line	cell_type
-gfp1	51	SAMN06236723	SRX2498809	299	183	SRR5182708	SRS1925642	GSM2461348		cell culture from nervous system, ds-GFP	dsRNA targeting GFP	RNA-Seq	PRJNA362227	GEO	public	sra	ncbi	0	Illumina HiSeq 2500	SINGLE	cDNA	TRANSCRIPTOMIC	2017-01-17	Drosophila melanogaster	ILLUMINA	2017-11-22	SRP096911	BG3	
-gfp2	51	SAMN06236722	SRX2498810	337	207	SRR5182709	SRS1925643	GSM2461349		cell culture from nervous system, ds-GFP	dsRNA targeting GFP	RNA-Seq	PRJNA362227	GEO	public	sra	ncbi	0	Illumina HiSeq 2500	SINGLE	cDNA	TRANSCRIPTOMIC	2017-01-17	Drosophila melanogaster	ILLUMINA	2017-11-22	SRP096911	BG3	
-gfp3	51	SAMN06236721	SRX2498811	316	194	SRR5182710	SRS1925644	GSM2461350		cell culture from nervous system, ds-GFP	dsRNA targeting GFP	RNA-Seq	PRJNA362227	GEO	public	sra	ncbi	0	Illumina HiSeq 2500	SINGLE	cDNA	TRANSCRIPTOMIC	2017-01-17	Drosophila melanogaster	ILLUMINA	2017-11-22	SRP096911	BG3	
-shep1	51	SAMN06236720	SRX2498812	462	284	SRR5182711	SRS1925645	GSM2461351		cell culture from nervous system, ds-shep	dsRNA targeting shep	RNA-Seq	PRJNA362227	GEO	public	sra	ncbi	0	Illumina HiSeq 2500	SINGLE	cDNA	TRANSCRIPTOMIC	2017-01-17	Drosophila melanogaster	ILLUMINA	2017-11-22	SRP096911	BG3	
-shep2	51	SAMN06236719	SRX2498813	361	222	SRR5182712	SRS1925646	GSM2461352		cell culture from nervous system, ds-shep	dsRNA targeting shep	RNA-Seq	PRJNA362227	GEO	public	sra	ncbi	0	Illumina HiSeq 2500	SINGLE	cDNA	TRANSCRIPTOMIC	2017-01-17	Drosophila melanogaster	ILLUMINA	2017-11-22	SRP096911	BG3	
-shep3	51	SAMN06236718	SRX2498814	391	241	SRR5182713	SRS1925647	GSM2461353		cell culture from nervous system, ds-shep	dsRNA targeting shep	RNA-Seq	PRJNA362227	GEO	public	sra	ncbi	0	Illumina HiSeq 2500	SINGLE	cDNA	TRANSCRIPTOMIC	2017-01-17	Drosophila melanogaster	ILLUMINA	2017-11-22	SRP096911	BG3	
diff --git a/workflows/rnaseq/downstream/config.yaml b/workflows/rnaseq/downstream/config.yaml
index 2ec85070b..740984c46 100644
--- a/workflows/rnaseq/downstream/config.yaml
+++ b/workflows/rnaseq/downstream/config.yaml
@@ -134,16 +134,12 @@ toggle:
   # `salmon=TRUE` argument to lcdbwf::make_dds.
   salmon: FALSE
 
-  # Import Kallisto results instead of featureCounts? See similar notes above
-  # for Salmon.
-  kallisto: FALSE
-
   # Create diagnostic plots for all dds objects?
   dds_diagnostics: TRUE
 
   # Create diagnostic plots for results objects? If TRUE, will check the
   # config$plotting$diagnostics_for_results list.
-  results_diagnostics: TRUE
+  results_diagnostics: FALSE
 
 # ANNOTATION -------------------------------------------------------------------
 # Configuration specific to annotations and databases
diff --git a/workflows/rnaseq/downstream/gene-patterns.Rmd b/workflows/rnaseq/downstream/gene-patterns.Rmd
index 4c4251028..c46e790cf 100644
--- a/workflows/rnaseq/downstream/gene-patterns.Rmd
+++ b/workflows/rnaseq/downstream/gene-patterns.Rmd
@@ -100,8 +100,8 @@ col <- NULL
 
 # NOTE: This is set very low for test data. Default is 15.---------------------
 #   Minimum cluster size.
-# minc <- 1  # [ TEST SETTINGS +1 ]
 minc <- 15
+# minc <- 1  # [ enable for test ]
 
 # NOTE: This is a very low value used for getting the degPatterns to run -----
 low.minc <- 1
diff --git a/workflows/rnaseq/downstream/rnaseq.Rmd b/workflows/rnaseq/downstream/rnaseq.Rmd
index d21d13f27..504044698 100644
--- a/workflows/rnaseq/downstream/rnaseq.Rmd
+++ b/workflows/rnaseq/downstream/rnaseq.Rmd
@@ -15,7 +15,6 @@ knitr::opts_chunk$set(
     message=FALSE,
     cache.extra_file_dep_1 = file.info('../config/sampletable.tsv')$mtime,
     cache.extra_file_dep_2 = file.info('../data/rnaseq_aggregation/featurecounts.txt')$mtime,
-    cache.extra_file_dep_3 = file.info('../data/rnaseq_samples/*/*.kallisto/abundance.h5')$mtime,
     cache.extra_file_dep_4 = file.info('../data/rnaseq_samples/*/*.salmon/quant.sf')$mtime
 )
 ```
@@ -90,7 +89,7 @@ colData <- read.table(config$main$sampletable, sep='\t', header=TRUE, stringsAsF
 rownames(colData) <- colData[,1]
 ```
 
-```{r dds_initial, cache=TRUE, config=c(config$main, config$toggle$salmon, config$toggle$kallisto)}
+```{r dds_initial, cache=TRUE, config=c(config$main, config$toggle$salmon)}
 # Convert featureCounts gene-level counts into DESeq2 object, and run
 # variance-stabiliizing transform.
 dds_initial <- lcdbwf:::make_dds(
@@ -106,7 +105,7 @@ vsd <- varianceStabilizingTransformation(dds_initial, blind=TRUE)
 Here is the sample table with metadata used for this analysis:
 
 ```{r print_coldata}
-exclude.for.printing <- c('featurecounts.path', 'salmon.path', 'kallisto.path',
+exclude.for.printing <- c('featurecounts.path', 'salmon.path',
                           'orig_filename', 'orig_filename_R2', 'layout',
                           'sizeFactor')
 colData(dds_initial) %>%
@@ -138,8 +137,8 @@ for(group in config$plotting$covariates_for_plots){
 }
 ```
 
-```{r sizefactors, results='asis', eval=!(config$toggle$salmon | config$toggle$kallisto)}
-# Note that when loading Salmon or Kallisto, DESeq2 does not calculate size
+```{r sizefactors, results='asis', eval=!(config$toggle$salmon)}
+# Note that when loading Salmon, DESeq2 does not calculate size
 # factors.
 
 lcdbwf:::mdcat(text$sizefactors)
@@ -180,13 +179,8 @@ lst <- list(
                 design=~group,
                 salmon=TRUE),
 
-            # Example 4: use kallisto
-            kallisto=list(
-                sampletable=colData,
-                design=~group,
-                kallisto=TRUE),
 
-            # Example 5: use LRT
+            # Example 4: use LRT
             LRT=list(
                 sampletable=colData,
                 design=~group,
@@ -265,20 +259,10 @@ contr_03a_salmon <- lcdbwf:::make_results(
 )
 ```
 
+
 ```{r results_04, dependson='dds_list', cache=TRUE}
 # Example 4:
-#   - like example 3, but kallisto instead of salmon
-contr_03_kallisto <- lcdbwf:::make_results(
-  dds_name="kallisto",
-  contrast=c('group', 'treatment', 'control'),
-  type='normal',
-  label='Using Kallisto'
-)
-```
-
-```{r results_05, dependson='dds_list', cache=TRUE}
-# Example 5:
-#   - Examples 1-4 use the default DESeq2 test, Wald.
+#   - Examples 1-3 use the default DESeq2 test, Wald.
 #   - Here, we use the nBinomLRT (LRT) test.
 # NOTE: Use 'type=NULL' to skip LFC shrinkage as
 # make_results sets all LRT LFC values to 0.
@@ -288,7 +272,7 @@ contr_03_kallisto <- lcdbwf:::make_results(
 # make_results detects the 'test' type from the
 # dds object specified with 'dds_name'.
 
-contr_05_lrt <- lcdbwf:::make_results(
+contr_04_lrt <- lcdbwf:::make_results(
   dds_name="LRT",
   type=NULL,
   label='Using LRT'
diff --git a/workflows/rnaseq/rnaseq_trackhub.py b/workflows/rnaseq/rnaseq_trackhub.py
index 912735744..92199cad6 100644
--- a/workflows/rnaseq/rnaseq_trackhub.py
+++ b/workflows/rnaseq/rnaseq_trackhub.py
@@ -10,7 +10,6 @@
 
 import os
 import sys
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
 import re
 from pprint import pprint
 import pandas
@@ -22,7 +21,8 @@
 from trackhub.upload import upload_hub, stage_hub
 import argparse
 
-from lib.patterns_targets import RNASeqConfig
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
+from lib import utils
 
 ap = argparse.ArgumentParser()
 ap.add_argument('config', help='Main config.yaml file')
@@ -41,7 +41,6 @@
     for cfg in args.additional_configs:
         update_config(config, yaml.load(open(cfg), Loader=yaml.FullLoader))
 
-c = RNASeqConfig(config, os.path.join(os.path.dirname(args.config), 'rnaseq_patterns.yaml'))
 
 hub, genomes_file, genome, trackdb = default_hub(
     hub_name=hub_config['hub']['name'],
@@ -52,7 +51,7 @@
 )
 
 # Set up subgroups based on the configured columns
-df = pandas.read_csv(config['sampletable'], comment='#', sep='\t')
+df = utils.prepare_rnaseq_sampletable(config)
 cols = hub_config['subgroups']['columns']
 subgroups = []
 for col in cols:
@@ -126,7 +125,7 @@ def decide_color(samplename):
     for direction in 'pos', 'neg':
 
         # ASSUMPTION: bigwig filename pattern
-        bigwig = c.patterns['bigwig'][direction].format(sample=sample)
+        bigwig = f"data/rnaseq_samples/{sample}/{sample}.cutadapt.bam.{direction}.bigwig"
 
         subgroup = df[df.iloc[:, 0] == sample].to_dict('records')[0]
         subgroup = {
diff --git a/workflows/rnaseq/run_test.sh b/workflows/rnaseq/run_test.sh
index 7aacb413c..fc76064e5 100755
--- a/workflows/rnaseq/run_test.sh
+++ b/workflows/rnaseq/run_test.sh
@@ -1,3 +1,3 @@
 set -e
 python -m doctest ../../ci/preprocessor.py
-python ../../ci/preprocessor.py Snakefile > Snakefile.test && snakemake -s Snakefile.test "$@"
+python ../../ci/preprocessor.py Snakefile > Snakefile.test && snakemake -s Snakefile.test --configfile ../../include/reference_config_templates/test.yaml "$@" 
diff --git a/wrappers/.gitignore b/wrappers/.gitignore
deleted file mode 100644
index ede3cddab..000000000
--- a/wrappers/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
-.test*
-__pycache__
-.snakemake
-.cache
-**.snakemake*
diff --git a/wrappers/LICENSE b/wrappers/LICENSE
deleted file mode 100644
index 17b3ab770..000000000
--- a/wrappers/LICENSE
+++ /dev/null
@@ -1,21 +0,0 @@
-MIT License
-
-Copyright (c) 2016 lcdb
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
diff --git a/wrappers/README.md b/wrappers/README.md
deleted file mode 100644
index 79d134e91..000000000
--- a/wrappers/README.md
+++ /dev/null
@@ -1 +0,0 @@
-See documentation at http://lcdb-wf.readthedocs.io/en/latest/wrappers.html
diff --git a/wrappers/test/conftest.py b/wrappers/test/conftest.py
deleted file mode 100644
index d346905e1..000000000
--- a/wrappers/test/conftest.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import os
-import pytest
-import tempfile
-import shutil
-import inspect
-from snakemake.shell import shell
-from snakemake.utils import makedirs
-from lcdblib.snakemake import aligners
-
-from raw_data_fixtures import *
diff --git a/wrappers/test/raw_data_fixtures.py b/wrappers/test/raw_data_fixtures.py
deleted file mode 100644
index c19f8601b..000000000
--- a/wrappers/test/raw_data_fixtures.py
+++ /dev/null
@@ -1,180 +0,0 @@
-"""
-Fixtures used for downloading data from the test data repo
-"""
-
-import os
-import pytest
-from utils import tmpdir_for_func, _download_file, symlink_in_tempdir, run, dpath
-
-# ----------------------------------------------------------------------------
-# FASTQ files
-@pytest.fixture(scope='session')
-def sample1_se_fq(tmpdir_factory):
-    d = tmpdir_for_func(tmpdir_factory)
-    fn = 'rnaseq_samples/sample1/sample1.small_R1.fastq.gz'
-    return _download_file(fn, d)
-
-@pytest.fixture(scope='session')
-def sample1_se_tiny_fq(tmpdir_factory):
-    """
-    Single-end FASTQ file with 1010 reads
-    """
-    d = tmpdir_for_func(tmpdir_factory)
-    fn = 'rnaseq_samples/sample1/sample1.tiny_R1.fastq.gz'
-    return _download_file(fn, d)
-
-@pytest.fixture(scope='session')
-def sample1_pe_fq(tmpdir_factory):
-    pair = []
-    d = tmpdir_for_func(tmpdir_factory)
-    for fn in [
-        'rnaseq_samples/sample1/sample1.small_R1.fastq.gz',
-        'rnaseq_samples/sample1/sample1.small_R2.fastq.gz'
-    ]:
-        pair.append(_download_file(fn, d))
-    return pair
-
-@pytest.fixture(scope='session')
-def sample1_pe_tiny_fq(tmpdir_factory):
-    pair = []
-    d = tmpdir_for_func(tmpdir_factory)
-    for fn in [
-        'rnaseq_samples/sample1/sample1.tiny_R1.fastq.gz',
-        'rnaseq_samples/sample1/sample1.tiny_R2.fastq.gz'
-    ]:
-        pair.append(_download_file(fn, d))
-    return pair
-
-# ----------------------------------------------------------------------------
-# BAM files
-
-@pytest.fixture(scope='session')
-def sample1_se_bam(tmpdir_factory):
-    d = tmpdir_for_func(tmpdir_factory)
-    fn = 'rnaseq_samples/sample1/sample1.small.single.sorted.bam'
-    return _download_file(fn, d)
-
-
-@pytest.fixture(scope='session')
-def sample1_pe_bam(tmpdir_factory):
-    d = tmpdir_for_func(tmpdir_factory)
-    fn = 'rnaseq_samples/sample1/sample1.small.paired.sorted.bam'
-    return _download_file(fn, d)
-
-
-@pytest.fixture(scope='session')
-def sample1_se_tiny_bam(tmpdir_factory):
-    d = tmpdir_for_func(tmpdir_factory)
-    fn = 'rnaseq_samples/sample1/sample1.tiny.single.sorted.bam'
-    return _download_file(fn, d)
-
-
-@pytest.fixture(scope='session')
-def sample1_pe_tiny_bam(tmpdir_factory):
-    d = tmpdir_for_func(tmpdir_factory)
-    fn = 'rnaseq_samples/sample1/sample1.tiny.paired.sorted.bam'
-    return _download_file(fn, d)
-
-
-@pytest.fixture(scope='session')
-def sample1_se_bam_bai(sample1_se_bam, tmpdir_factory):
-    """
-    Returns both the bam and the bam.bai
-    """
-    snakefile = '''
-    rule index:
-        input: bam='sample1.sorted.bam'
-        output: bai='sample1.sorted.bam.bai'
-        wrapper: 'file:wrapper'
-    '''
-    input_data_func = symlink_in_tempdir(
-        {
-            sample1_se_bam: 'sample1.sorted.bam'
-
-        }
-    )
-    tmpdir = str(tmpdir_factory.mktemp('sample1_se_bam_bai'))
-    run(dpath('../wrappers/samtools/index'), snakefile, None, input_data_func, tmpdir)
-    return {
-            'bam': os.path.join(tmpdir, 'sample1.sorted.bam'),
-            'bai': os.path.join(tmpdir, 'sample1.sorted.bam.bai'),
-    }
-
-
-@pytest.fixture(scope='session')
-def sample1_se_tiny_bam_bai(sample1_se_tiny_bam, tmpdir_factory):
-    """
-    Returns both the bam and the bam.bai
-    """
-    snakefile = '''
-    rule index:
-        input: bam='sample1.sorted.bam'
-        output: bai='sample1.sorted.bam.bai'
-        wrapper: 'file:wrapper'
-    '''
-    input_data_func = symlink_in_tempdir(
-        {
-            sample1_se_tiny_bam: 'sample1.sorted.bam'
-
-        }
-    )
-    tmpdir = str(tmpdir_factory.mktemp('sample1_se_tiny_bam_bai'))
-    run(dpath('../wrappers/samtools/index'), snakefile, None, input_data_func, tmpdir)
-    return {
-            'bam': os.path.join(tmpdir, 'sample1.sorted.bam'),
-            'bai': os.path.join(tmpdir, 'sample1.sorted.bam.bai'),
-    }
-
-# ----------------------------------------------------------------------------
-# Annotations
-
-@pytest.fixture(scope='session')
-def transcriptome(tmpdir_factory):
-    d = tmpdir_for_func(tmpdir_factory)
-    fn = 'seq/dm6.small.transcriptome.fa'
-    return _download_file(fn, d)
-
-
-@pytest.fixture(scope='session')
-def dm6_fa(tmpdir_factory):
-    fn = 'seq/dm6.small.fa'
-    d = tmpdir_for_func(tmpdir_factory)
-    return _download_file(fn, d)
-
-
-@pytest.fixture(scope='session')
-def annotation(tmpdir_factory):
-    fn = 'annotation/dm6.small.gtf'
-    d = tmpdir_for_func(tmpdir_factory)
-    return _download_file(fn, d)
-
-
-@pytest.fixture(scope='session')
-def annotation_refflat(tmpdir_factory):
-    fn = 'annotation/dm6.small.refflat'
-    d = tmpdir_for_func(tmpdir_factory)
-    return _download_file(fn, d)
-
-
-@pytest.fixture(scope='session')
-def annotation_db(annotation):
-    import gffutils
-    gffutils.create_db(
-        data=annotation, dbfn=annotation + '.db',
-        merge_strategy='merge',
-        id_spec={'transcript': ['transcript_id', 'transcript_symbol'],
-                 'gene': ['gene_id', 'gene_symbol']},
-        gtf_transcript_key='transcript_id',
-        gtf_gene_key='gene_id')
-    return annotation + '.db'
-
-
-@pytest.fixture(scope='session')
-def annotation_bed12(annotation_db):
-    import gffutils
-    db = gffutils.FeatureDB(annotation_db)
-    bed12 = '.'.join(annotation_db.strip().split('.')[:-2]) + '.bed12'
-    with open(bed12, 'w') as handle:
-        for t in db.features_of_type('transcript'):
-            handle.write(db.bed12(t, name_field='transcript_id') + '\n')
-    return bed12
diff --git a/wrappers/test/test_atropos.py b/wrappers/test/test_atropos.py
deleted file mode 100644
index f695202ee..000000000
--- a/wrappers/test/test_atropos.py
+++ /dev/null
@@ -1,156 +0,0 @@
-import pytest
-import os
-import gzip
-from utils import run, dpath, symlink_in_tempdir
-
-
-def test_atropos_simple(sample1_se_tiny_fq, tmpdir):
-    snakefile = '''
-                rule atropos:
-                    input:
-                        fastq='sample1_R1.fastq.gz'
-                    output:
-                        fastq='sample1_R1.trim.fastq.gz'
-                    params: extra='-a AAA'
-                    threads: 2
-                    wrapper: "file:wrapper"
-                '''
-    input_data_func = symlink_in_tempdir(
-        {
-            sample1_se_tiny_fq: 'sample1_R1.fastq.gz'
-        }
-    )
-
-    def check():
-        """
-        check for line lengths and that they are at least different sized
-        """
-        a = sum(1 for _ in gzip.open('sample1_R1.fastq.gz'))
-        b = sum(1 for _ in gzip.open('sample1_R1.trim.fastq.gz'))
-        assert a == b == 4040
-        assert os.path.getsize('sample1_R1.fastq.gz') != os.path.getsize('sample1_R1.trim.fastq.gz')
-
-    run(dpath('../wrappers/atropos'), snakefile, check, input_data_func, tmpdir, cores=2)
-
-
-def test_atropos_simple_with_log(sample1_se_tiny_fq, tmpdir):
-    snakefile = '''
-                rule atropos:
-                    input:
-                        fastq='sample1_R1.fastq.gz'
-                    output:
-                        fastq='sample1_R1.trim.fastq.gz'
-                    params: extra='-a AAA'
-                    threads: 2
-                    log: 'sample1.atropos.log'
-                    wrapper: "file:wrapper"
-                '''
-    input_data_func = symlink_in_tempdir(
-        {
-            sample1_se_tiny_fq: 'sample1_R1.fastq.gz'
-        }
-    )
-
-    def check():
-        """
-        check for line lengths and that they are at least different sized
-        """
-        a = sum(1 for _ in gzip.open('sample1_R1.fastq.gz'))
-        b = sum(1 for _ in gzip.open('sample1_R1.trim.fastq.gz'))
-        assert a == b == 4040
-        assert 'This is Atropos' in open('sample1.atropos.log').readline()
-        assert os.path.getsize('sample1_R1.fastq.gz') != os.path.getsize('sample1_R1.trim.fastq.gz')
-
-    run(dpath('../wrappers/atropos'), snakefile, check, input_data_func, tmpdir, cores=2)
-
-
-def test_atropos_se_with_list(sample1_se_tiny_fq, tmpdir):
-    snakefile = '''
-                rule atropos:
-                    input: 'sample1_R1.fastq.gz'
-                    output: 'sample1_R1.trim.fastq.gz'
-                    params: extra='-a AAA'
-                    threads: 2
-                    wrapper: "file:wrapper"
-                '''
-    input_data_func = symlink_in_tempdir(
-        {
-            sample1_se_tiny_fq: 'sample1_R1.fastq.gz'
-        }
-    )
-
-    def check():
-        """
-        check for line lengths and that they are at least different sized
-        """
-        a = sum(1 for _ in gzip.open('sample1_R1.fastq.gz'))
-        b = sum(1 for _ in gzip.open('sample1_R1.trim.fastq.gz'))
-        assert a == b == 4040
-        assert os.path.getsize('sample1_R1.fastq.gz') != os.path.getsize('sample1_R1.trim.fastq.gz')
-
-    run(dpath('../wrappers/atropos'), snakefile, check, input_data_func, tmpdir, cores=2)
-
-
-def test_atropos_pe(sample1_pe_tiny_fq, tmpdir):
-    snakefile = '''
-    rule atropos:
-        input:
-            R1='sample1_R1.fastq.gz',
-            R2='sample1_R2.fastq.gz',
-        output:
-            R1='sample1_R1.trim.fastq.gz',
-            R2='sample2_R1.trim.fastq.gz',
-        params: extra='-a AAA'
-        threads: 2
-        log: 'sample1.atropos.log'
-        wrapper: "file:wrapper"
-                '''
-    input_data_func = symlink_in_tempdir(
-        {
-            sample1_pe_tiny_fq[0]: 'sample1_R1.fastq.gz',
-            sample1_pe_tiny_fq[1]: 'sample1_R2.fastq.gz',
-        }
-    )
-
-    def check():
-        """
-        check for line lengths and that they are at least different sized
-        """
-        a = sum(1 for _ in gzip.open('sample1_R1.fastq.gz'))
-        b = sum(1 for _ in gzip.open('sample1_R1.trim.fastq.gz'))
-        assert a == b == 4040
-        assert 'This is Atropos' in open('sample1.atropos.log').readline()
-        assert os.path.getsize('sample1_R1.fastq.gz') != os.path.getsize('sample1_R1.trim.fastq.gz')
-
-    run(dpath('../wrappers/atropos'), snakefile, check, input_data_func, tmpdir, cores=2)
-
-
-def test_atropos_pe_with_list(sample1_pe_tiny_fq, tmpdir):
-
-    snakefile = '''
-                rule atropos:
-                    input: 'sample1_R1.fastq.gz', 'sample1_R2.fastq.gz',
-                    output: 'sample1_R1.trim.fastq.gz', 'sample2_R1.trim.fastq.gz',
-                    params: extra='-a AAA'
-                    threads: 2
-                    log: 'sample1.atropos.log'
-                    wrapper: "file:wrapper"
-                '''
-    input_data_func = symlink_in_tempdir(
-        {
-            sample1_pe_tiny_fq[0]: 'sample1_R1.fastq.gz',
-            sample1_pe_tiny_fq[1]: 'sample1_R2.fastq.gz',
-        }
-    )
-
-    def check():
-        """
-        check for line lengths and that they are at least different sized
-        """
-        a = sum(1 for _ in gzip.open('sample1_R1.fastq.gz'))
-        b = sum(1 for _ in gzip.open('sample1_R1.trim.fastq.gz'))
-        assert a == b == 4040
-        assert 'This is Atropos' in open('sample1.atropos.log').readline()
-        assert os.path.getsize('sample1_R1.fastq.gz') != os.path.getsize('sample1_R1.trim.fastq.gz')
-
-    run(dpath('../wrappers/atropos'), snakefile, check, input_data_func, tmpdir, cores=2)
diff --git a/wrappers/test/test_bowtie2.py b/wrappers/test/test_bowtie2.py
deleted file mode 100644
index 6ee9b76f1..000000000
--- a/wrappers/test/test_bowtie2.py
+++ /dev/null
@@ -1,95 +0,0 @@
-import os
-import pytest
-from snakemake.shell import shell
-from lcdblib.snakemake import aligners
-from utils import run, dpath, symlink_in_tempdir, tmpdir_for_func
-
-
-@pytest.fixture(scope='session')
-def bowtie2_indexes(dm6_fa, tmpdir_factory):
-    d = tmpdir_for_func(tmpdir_factory)
-    snakefile = '''
-    rule bowtie2:
-        input: fasta='dm6.fa'
-        output: index=['dm6.1.bt2', 'dm6.2.bt2']
-        log: 'bowtie2.log'
-        wrapper: 'file:wrapper'
-    '''
-    input_data_func = symlink_in_tempdir(
-        {
-            dm6_fa: 'dm6.fa'
-        }
-    )
-
-    def check():
-        assert 'Total time for backward call to driver' in open('bowtie2.log').readlines()[-1]
-        assert list(shell('bowtie2-inspect dm6 -n', iterable=True)) == ['2L', '2R']
-
-    run(
-        dpath('../wrappers/bowtie2/build'),
-        snakefile, check, input_data_func, d)
-    return aligners.bowtie2_index_from_prefix(os.path.join(d, 'dm6'))
-
-
-def _dict_of_bowtie2_indexes(bowtie2_indexes, prefix):
-    d = {}
-    indexes = aligners.bowtie2_index_from_prefix(prefix)
-    bowtie2_indexes = sorted(bowtie2_indexes)
-    indexes = sorted(indexes)
-    for k, v in zip(bowtie2_indexes, indexes):
-        d[k] = v
-    return d
-
-
-def test_bowtie2_align_se(bowtie2_indexes, sample1_se_tiny_fq, tmpdir):
-    d = _dict_of_bowtie2_indexes(bowtie2_indexes, 'dm6')
-    indexes = list(d.values())
-    snakefile = '''
-        rule bowtie2_align:
-            input:
-                fastq='sample1_R1.fastq.gz',
-                index={indexes}
-            output:
-                bam='sample1.bam'
-            log: "bowtie2.log"
-            wrapper: "file:wrapper"
-    '''.format(indexes=indexes)
-    d[sample1_se_tiny_fq] = 'sample1_R1.fastq.gz'
-    input_data_func = symlink_in_tempdir(d)
-
-    def check():
-        assert "overall alignment rate" in open('bowtie2.log').read()
-
-        # should have at least some mapped and unmapped
-        assert int(list(shell('samtools view -c -f 0x04 sample1.bam', iterable=True))[0]) > 0
-        assert int(list(shell('samtools view -c -F 0x04 sample1.bam', iterable=True))[0]) > 0
-
-    run(dpath('../wrappers/bowtie2/align'), snakefile, check, input_data_func, tmpdir)
-
-
-def test_bowtie2_align_se_rm_unmapped(bowtie2_indexes, sample1_se_tiny_fq, tmpdir):
-    d = _dict_of_bowtie2_indexes(bowtie2_indexes, 'dm6')
-    indexes = list(d.values())
-    snakefile = '''
-        rule bowtie2_align:
-            input:
-                fastq='sample1_R1.fastq.gz',
-                index={indexes}
-            output:
-                bam='sample1.bam'
-            params:
-                samtools_view_extra='-F 0x04'
-            log: "bowtie2.log"
-            wrapper: "file:wrapper"
-    '''.format(indexes=indexes)
-    d[sample1_se_tiny_fq] = 'sample1_R1.fastq.gz'
-    input_data_func = symlink_in_tempdir(d)
-
-    def check():
-        assert "overall alignment rate" in open('bowtie2.log').read()
-
-        # should have at least some mapped and unmapped
-        assert int(list(shell('samtools view -c -f 0x04 sample1.bam', iterable=True))[0]) == 0
-        assert int(list(shell('samtools view -c -F 0x04 sample1.bam', iterable=True))[0]) > 0
-
-    run(dpath('../wrappers/bowtie2/align'), snakefile, check, input_data_func, tmpdir)
diff --git a/wrappers/test/test_cutadapt.py b/wrappers/test/test_cutadapt.py
deleted file mode 100644
index 97f5c7f36..000000000
--- a/wrappers/test/test_cutadapt.py
+++ /dev/null
@@ -1,151 +0,0 @@
-import os
-import gzip
-from utils import run, dpath, rm, symlink_in_tempdir
-
-def test_cutadapt_simple(sample1_se_tiny_fq, tmpdir):
-    snakefile = '''
-                rule cutadapt:
-                    input:
-                        fastq='sample1_R1.fastq.gz'
-                    output:
-                        fastq='sample1_R1.trim.fastq.gz'
-                    params: extra='-a AAA'
-                    wrapper: "file:wrapper"
-                '''
-    input_data_func=symlink_in_tempdir(
-        {
-            sample1_se_tiny_fq: 'sample1_R1.fastq.gz'
-        }
-    )
-
-    def check():
-        """
-        check for line lengths and that they are at least different sized
-        """
-        a = sum(1 for _ in gzip.open('sample1_R1.fastq.gz'))
-        b = sum(1 for _ in gzip.open('sample1_R1.trim.fastq.gz'))
-        assert a == b == 4040
-
-        assert os.path.getsize('sample1_R1.fastq.gz') != os.path.getsize('sample1_R1.trim.fastq.gz')
-
-    run(dpath('../wrappers/cutadapt'), snakefile, check, input_data_func, tmpdir)
-
-
-def test_cutadapt_simple_with_log(sample1_se_tiny_fq, tmpdir):
-    snakefile = '''
-                rule cutadapt:
-                    input:
-                        fastq='sample1_R1.fastq.gz'
-                    output:
-                        fastq='sample1_R1.trim.fastq.gz'
-                    params: extra='-a AAA'
-                    log: 'sample1.cutadapt.log'
-                    wrapper: "file:wrapper"
-                '''
-    input_data_func=symlink_in_tempdir(
-        {
-            sample1_se_tiny_fq: 'sample1_R1.fastq.gz'
-        }
-    )
-
-    def check():
-        """
-        check for line lengths and that they are at least different sized
-        """
-        a = sum(1 for _ in gzip.open('sample1_R1.fastq.gz'))
-        b = sum(1 for _ in gzip.open('sample1_R1.trim.fastq.gz'))
-        assert a == b == 4040
-        assert 'This is cutadapt' in open('sample1.cutadapt.log').readline()
-
-        assert os.path.getsize('sample1_R1.fastq.gz') != os.path.getsize('sample1_R1.trim.fastq.gz')
-
-    run(dpath('../wrappers/cutadapt'), snakefile, check, input_data_func, tmpdir)
-
-
-def test_cutadapt_se_with_list(sample1_se_tiny_fq, tmpdir):
-    snakefile = '''
-                rule cutadapt:
-                    input: 'sample1_R1.fastq.gz'
-                    output: 'sample1_R1.trim.fastq.gz'
-                    params: extra='-a AAA'
-                    wrapper: "file:wrapper"
-                '''
-    input_data_func=symlink_in_tempdir(
-        {
-            sample1_se_tiny_fq: 'sample1_R1.fastq.gz'
-        }
-    )
-
-    def check():
-        """
-        check for line lengths and that they are at least different sized
-        """
-        a = sum(1 for _ in gzip.open('sample1_R1.fastq.gz'))
-        b = sum(1 for _ in gzip.open('sample1_R1.trim.fastq.gz'))
-        assert a == b == 4040
-
-        assert os.path.getsize('sample1_R1.fastq.gz') != os.path.getsize('sample1_R1.trim.fastq.gz')
-
-    run(dpath('../wrappers/cutadapt'), snakefile, check, input_data_func, tmpdir)
-
-def test_cutadapt_pe(sample1_pe_tiny_fq, tmpdir):
-    snakefile = '''
-                rule cutadapt:
-                    input:
-                        R1='sample1_R1.fastq.gz',
-                        R2='sample1_R2.fastq.gz',
-                    output:
-                        R1='sample1_R1.trim.fastq.gz',
-                        R2='sample2_R1.trim.fastq.gz',
-                    params: extra='-a AAA'
-                    log: 'sample1.cutadapt.log'
-                    wrapper: "file:wrapper"
-                '''
-    input_data_func=symlink_in_tempdir(
-        {
-            sample1_pe_tiny_fq[0]: 'sample1_R1.fastq.gz',
-            sample1_pe_tiny_fq[1]: 'sample1_R2.fastq.gz',
-        }
-    )
-
-    def check():
-        """
-        check for line lengths and that they are at least different sized
-        """
-        a = sum(1 for _ in gzip.open('sample1_R1.fastq.gz'))
-        b = sum(1 for _ in gzip.open('sample1_R1.trim.fastq.gz'))
-        assert a == b == 4040
-        assert 'This is cutadapt' in open('sample1.cutadapt.log').readline()
-
-        assert os.path.getsize('sample1_R1.fastq.gz') != os.path.getsize('sample1_R1.trim.fastq.gz')
-
-    run(dpath('../wrappers/cutadapt'), snakefile, check, input_data_func, tmpdir)
-
-def test_cutadapt_pe_with_list(sample1_pe_tiny_fq, tmpdir):
-    snakefile = '''
-                rule cutadapt:
-                    input: 'sample1_R1.fastq.gz', 'sample1_R2.fastq.gz',
-                    output: 'sample1_R1.trim.fastq.gz', 'sample2_R1.trim.fastq.gz',
-                    params: extra='-a AAA'
-                    log: 'sample1.cutadapt.log'
-                    wrapper: "file:wrapper"
-                '''
-    input_data_func=symlink_in_tempdir(
-        {
-            sample1_pe_tiny_fq[0]: 'sample1_R1.fastq.gz',
-            sample1_pe_tiny_fq[1]: 'sample1_R2.fastq.gz',
-        }
-    )
-
-    def check():
-        """
-        check for line lengths and that they are at least different sized
-        """
-        a = sum(1 for _ in gzip.open('sample1_R1.fastq.gz'))
-        b = sum(1 for _ in gzip.open('sample1_R1.trim.fastq.gz'))
-        assert a == b == 4040
-        assert 'This is cutadapt' in open('sample1.cutadapt.log').readline()
-
-        assert os.path.getsize('sample1_R1.fastq.gz') != os.path.getsize('sample1_R1.trim.fastq.gz')
-
-    run(dpath('../wrappers/cutadapt'), snakefile, check, input_data_func, tmpdir)
diff --git a/wrappers/test/test_deeptools.py b/wrappers/test/test_deeptools.py
deleted file mode 100644
index cbf876904..000000000
--- a/wrappers/test/test_deeptools.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import os
-import gzip
-from utils import run, dpath, rm, symlink_in_tempdir
-import pyBigWig
-
-def test_deeptools_bamCoverage(sample1_se_tiny_bam, sample1_se_tiny_bam_bai, tmpdir):
-    snakefile = '''
-                rule deeptools:
-                    input:
-                        bam='sample1.bam',
-                        bai='sample1.bam.bai'
-                    output: 'sample1.bw',
-                    log: 'deeptools.log'
-                    wrapper: "file:wrapper"
-                '''
-    input_data_func=symlink_in_tempdir(
-        {
-            sample1_se_tiny_bam: 'sample1.bam',
-            sample1_se_tiny_bam_bai['bai']: 'sample1.bam.bai',
-        }
-    )
-
-    def check():
-        bw = pyBigWig.open('sample1.bw')
-        header_keys = list(bw.header().keys())
-        for k in ['maxVal', 'minVal', 'nBasesCovered', 'nLevels', 'sumData',
-                  'sumSquared', 'version']:
-            assert k in header_keys
-
-        # bigWig version should be independent of BAM input, so we can check
-        # the value
-        assert bw.header()['version'] == 4
-
-        first_chrom = list(bw.chroms().keys())[0]
-        assert isinstance(bw.stats(first_chrom)[0], float)
-
-    run(dpath('../wrappers/deeptools/bamCoverage'), snakefile, check, input_data_func, tmpdir)
diff --git a/wrappers/test/test_demo.py b/wrappers/test/test_demo.py
deleted file mode 100644
index dd7be5ee6..000000000
--- a/wrappers/test/test_demo.py
+++ /dev/null
@@ -1,159 +0,0 @@
-# This file demonstrates tests for the `demo` wrapper. It is heavily commented,
-# and is included as part of the test suite to ensure that it's correct.
-
-# The `run` function does most of the work. It creates a tempdir, copies over
-# input data, Snakefile, and wrapper, runs the Snakefile, and runs
-# a user-provided test function against the output.
-from utils import run
-
-
-# The `dpath` function figures out the path the wrapper even when in a tempdir
-from utils import dpath
-
-# `symlink_in_tempdir` is a decorator function that lets us easily map fixtures
-# to input files expected by our Snakefile. The examples below will demonstrate
-# how it works.
-from utils import symlink_in_tempdir
-
-
-# A note on fixtures
-# ------------------
-#
-# py.test implicitly does a `from conftest import *`, so we will have the
-# fixtures from that package available here.
-#
-# Currently we have the fixtures from raw_data_fixtures.py imported into
-# conftest.py, which in turn makes them available in this file.
-#
-# py.test also includes a built-in `tmpdir` fixture which we use here to have
-# a nicely-named tmpdir for running the test.
-#
-# See http://doc.pytest.org/en/latest/fixture.html for more info.
-
-
-# Our first test. The test function names must start with `test_` in order for
-# py.test to find them.
-def test_demo(sample1_se_tiny_fq, tmpdir):
-
-    # A note on these arguments
-    # -------------------------
-    #
-    # Test function arguments are expected to be fixtures. The fixture
-    # `sample1_se_tiny_fq` will be the path to the downloaded example data.  See
-    # conftest.sample1_se_tiny_fq().
-    #
-    # The fixture `tmpdir` (which comes built-in with py.test) will be
-    # a py.path.local object pointing to a tempdir created just for this test.
-    # It will match the glob /tmp/pytest-*, and only the last 3 tempdirs are
-    # retained.
-
-    # Write the snakefile
-    # -------------------
-    # First we write the Snakefile to use in testing. Inputs need to come from
-    # fixutres. Write whatever filename you'd like; we'll connect the fixture
-    # to the written filename below.
-    #
-    # `snakefile` is typically a triple-quoted string; it will be automatically
-    # run through textwrap.dedent later so you don't have to worry about
-    # indentation.
-    #
-    # The wrapper will be copied to a subdirectory of the temp dir called,
-    # appropriately enough, "wrapper". So your snakefile will generally end
-    # with the line `wrapper: "file:wrapper"`.
-    snakefile = '''
-    rule demo:
-        input: 'a.fastq.gz'
-        output: 'b.fastq.gz'
-        wrapper: "file:wrapper"
-    '''
-
-    # Map fixtures to input files
-    # ---------------------------
-    # Next we map the fixture sample1_se_tiny_fq (a temp file which has downloaded
-    # data from the test data repo into a temp dir) to the input file that our
-    # Snakefile expects.
-    #
-    # Keys are paths to downloaded example data (typically downloaded just once
-    # per py.test session), which is provided by the fixture. The values of the
-    # dict are paths relative to the Snakefile and must match what is expected
-    # by the snakefile.
-    #
-    # Technically, `symlink_in_tempdir` returns a function that takes a path as
-    # its argument and symlinks keys over to values within that path. While
-    # this seems a little convoluted, doing it this way means that we don't
-    # have to keep track -- or even care -- what the fixture's provided
-    # filename is, avoiding the need to keep looking back at the fixtures
-    # module to remember what the filenames are.  It keeps the input file setup
-    # logic tightly coupled to the Snakefile, since they're both defined in the
-    # same function.
-    #
-    # So: since the above snakefile expects a.fastq.gz as input, we need to
-    # make that happen, like this:
-    input_data_func=symlink_in_tempdir(
-        {
-            sample1_se_tiny_fq: 'a.fastq.gz'
-        }
-    )
-
-    # Write a test function
-    # ---------------------
-    # This is our test function. It will be called after the Snakefile has been
-    # run and it will be called in the same temp directory in which the
-    # Snakefile is run, so paths should be relative to the Snakefile.
-    #
-    # This function should not accept any arguments.
-    #
-    # In this case, the demo wrapper simply copies input to output, so here we
-    # assert the files are identical.
-    def check():
-        assert open('a.fastq.gz', 'rb').read() == open('b.fastq.gz', 'rb').read()
-
-    # Call `run()`
-    # ------------
-    # Now that we have defined everything, the `run` function does all of the
-    # work. Note we pass the `tmpdir` fixture here.
-    #
-    # (that's because py.test manages tmpdirs for tests, which are in this
-    # current module, but run() lives in the utils module which won't get
-    # nicely managed. But run() needs to know where to build the test case,
-    # hence the need to pass it here)
-    run(dpath('../wrappers/demo'), snakefile, check, input_data_func, tmpdir)
-
-
-
-# This test function shows how to use downloaded paired-end data from
-# a different fixture.
-def test_demo_pe(sample1_pe_fq, tmpdir):
-
-    # In contrast to the sample1_se_tiny_fq fixture used in the previous function,
-    # here the paired-end fixture `sample1_pe_fq` is a tuple of path names (see
-    # conftest.sample1_pe_fq())
-
-
-    # The snakefile reflects what the wrapper expects for PE (see
-    # wrappers/demo/README.md).
-    snakefile = '''
-    rule demo:
-        input:
-            R1='a1.fastq.gz',
-            R2='a2.fastq.gz'
-        output:
-            R1='b1.fastq.gz',
-            R2='b2.fastq.gz'
-        wrapper: "file:wrapper"
-    '''
-
-    # Map fixture to input files. Again, since this is paired-end we need to
-    # make sure both files are provided the right filename for testing.
-    input_data_func=symlink_in_tempdir(
-        {
-            sample1_pe_fq[0]: 'a1.fastq.gz',
-            sample1_pe_fq[1]: 'a2.fastq.gz',
-        }
-    )
-
-    def check():
-        assert open('a1.fastq.gz', 'rb').read() == open('b1.fastq.gz', 'rb').read()
-        assert open('a2.fastq.gz', 'rb').read() == open('b2.fastq.gz', 'rb').read()
-
-    run(dpath('../wrappers/demo'), snakefile, check, input_data_func, tmpdir)
diff --git a/wrappers/test/test_dupradar.py b/wrappers/test/test_dupradar.py
deleted file mode 100644
index 6122bd5ca..000000000
--- a/wrappers/test/test_dupradar.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import os
-import pytest
-from test_picard import sample1_se_bam_markdups
-from utils import symlink_in_tempdir, run, dpath
-
-
-@pytest.fixture(scope='session')
-def sample1_se_dupradar(sample1_se_bam_markdups, annotation, tmpdir_factory):
-    snakefile = '''
-    rule dupradar:
-        input:
-            bam='sample1.bam',
-            annotation='dm6.gtf'
-        output:
-            density_scatter='sample1.density_scatter.png',
-            expression_histogram='sample1.expression_histogram.png',
-            expression_barplot='sample1.expression_barplot.png',
-            expression_boxplot='sample1.expression_boxplot.png',
-            multimapping_histogram='sample1.multimapping_histogram.png',
-            dataframe='sample1.dupradar.tsv',
-            model='sample1.model.txt',
-            curve='sample1.curve.txt'
-        wrapper:
-            'file:wrapper'
-    '''
-    input_data_func = symlink_in_tempdir(
-        {
-            sample1_se_bam_markdups['bam']: 'sample1.bam',
-            annotation: 'dm6.gtf',
-        }
-    )
-    tmpdir = str(tmpdir_factory.mktemp('dupradar_fixture'))
-    run(dpath('../wrappers/dupradar'), snakefile, None, input_data_func, tmpdir, use_conda=False)
-    mapping = dict(
-        density_scatter='sample1.density_scatter.png',
-        expression_histogram='sample1.expression_histogram.png',
-        expression_barplot='sample1.expression_barplot.png',
-        expression_boxplot='sample1.expression_boxplot.png',
-        multimapping_histogram='sample1.multimapping_histogram.png',
-        dataframe='sample1.dupradar.tsv',
-    )
-    for k, v in mapping.items():
-        mapping[k] = os.path.join(tmpdir, v)
-    return mapping
-
-
-#@pytest.mark.xfail
-def test_dupradar(sample1_se_dupradar):
-    assert open(sample1_se_dupradar['dataframe']).readline().startswith('"ID"\t"geneLength"')
diff --git a/wrappers/test/test_fastq_screen.py b/wrappers/test/test_fastq_screen.py
deleted file mode 100644
index 5cae9832c..000000000
--- a/wrappers/test/test_fastq_screen.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import os
-import zipfile
-from utils import run, dpath, rm, symlink_in_tempdir
-from test_bowtie2 import bowtie2_indexes
-
-def test_fastq_screen(sample1_se_tiny_fq, bowtie2_indexes, tmpdir):
-    snakefile = '''
-    rule fastq_screen:
-        input:
-            fastq='sample1_R1.fastq.gz',
-            dm6={indexes}
-        output:
-            txt='sample1_R1_screen.txt'
-        params:
-            subset=100000,
-            aligner='bowtie2'
-        wrapper:
-            "file:wrapper"
-    '''.format(indexes=bowtie2_indexes)
-
-    input_data_func=symlink_in_tempdir(
-        {
-            sample1_se_tiny_fq: 'sample1_R1.fastq.gz'
-        }
-    )
-
-    def check():
-        with open('sample1_R1_screen.txt') as fh:
-            res = fh.readlines()
-            r1 = res[0].strip().split()
-            r3 = res[2].strip().split()
-            assert r1[-1] == '100000'
-            assert r3[0] == 'dm6'
-
-
-    run(dpath('../wrappers/fastq_screen'), snakefile, check, input_data_func, tmpdir)
diff --git a/wrappers/test/test_fastqc.py b/wrappers/test/test_fastqc.py
deleted file mode 100644
index 5df5eda9c..000000000
--- a/wrappers/test/test_fastqc.py
+++ /dev/null
@@ -1,70 +0,0 @@
-import os
-import zipfile
-from utils import run, dpath, rm, symlink_in_tempdir
-
-import pytest
-from utils import tmpdir_for_func, _download_file
-
-@pytest.fixture(scope='session')
-def fastqc(sample1_se_tiny_fq, tmpdir_factory):
-    snakefile = '''
-    rule fastqc:
-        input:
-            fastq='sample1_R1.fastq.gz'
-        output:
-            html='sample1_R1_fastqc.html',
-            zip='sample1_R1_fastqc.zip'
-        wrapper: "file:wrapper"'''
-    input_data_func = symlink_in_tempdir(
-        {
-            sample1_se_tiny_fq: 'sample1_R1.fastq.gz'
-        }
-    )
-    tmpdir = str(tmpdir_factory.mktemp('fastqc_fixture'))
-    run(dpath('../wrappers/fastqc'), snakefile, None, input_data_func, tmpdir)
-    return os.path.join(tmpdir, 'sample1_R1_fastqc.zip')
-
-
-def test_fastqc(sample1_se_tiny_fq, tmpdir):
-    snakefile = '''
-    rule fastqc:
-        input:
-            fastq='sample1_R1.fastq.gz'
-        output:
-            html='results/sample1_R1.html',
-            zip='sample1_R1.zip'
-        wrapper: "file:wrapper"'''
-    input_data_func=symlink_in_tempdir(
-        {
-            sample1_se_tiny_fq: 'sample1_R1.fastq.gz'
-        }
-    )
-
-    def check():
-        assert '<html>' in open('results/sample1_R1.html').readline()
-        contents = [
-            'sample1_R1_fastqc/',
-            'sample1_R1_fastqc/Icons/',
-            'sample1_R1_fastqc/Images/',
-            'sample1_R1_fastqc/Icons/fastqc_icon.png',
-            'sample1_R1_fastqc/Icons/warning.png',
-            'sample1_R1_fastqc/Icons/error.png',
-            'sample1_R1_fastqc/Icons/tick.png',
-            'sample1_R1_fastqc/summary.txt',
-            'sample1_R1_fastqc/Images/per_base_quality.png',
-            'sample1_R1_fastqc/Images/per_tile_quality.png',
-            'sample1_R1_fastqc/Images/per_sequence_quality.png',
-            'sample1_R1_fastqc/Images/per_base_sequence_content.png',
-            'sample1_R1_fastqc/Images/per_sequence_gc_content.png',
-            'sample1_R1_fastqc/Images/per_base_n_content.png',
-            'sample1_R1_fastqc/Images/sequence_length_distribution.png',
-            'sample1_R1_fastqc/Images/duplication_levels.png',
-            'sample1_R1_fastqc/Images/adapter_content.png',
-            'sample1_R1_fastqc/fastqc_report.html',
-            'sample1_R1_fastqc/fastqc_data.txt',
-            'sample1_R1_fastqc/fastqc.fo'
-        ]
-        for i in zipfile.ZipFile('sample1_R1.zip').namelist():
-            assert i in contents
-
-    run(dpath('../wrappers/fastqc'), snakefile, check, input_data_func, tmpdir)
diff --git a/wrappers/test/test_featurecounts.py b/wrappers/test/test_featurecounts.py
deleted file mode 100644
index cb3760f39..000000000
--- a/wrappers/test/test_featurecounts.py
+++ /dev/null
@@ -1,59 +0,0 @@
-import os
-import gzip
-from utils import run, dpath, rm, symlink_in_tempdir
-
-def test_featurecounts_se(sample1_se_tiny_bam, annotation, tmpdir):
-    snakefile = '''
-                rule featurecounts:
-                    input:
-                        annotation='dm6.gtf',
-                        bam='sample1.bam'
-                    output:
-                        counts='sample1.counts',
-                    log: 'featurecounts.log'
-                    wrapper: "file:wrapper"
-                '''
-    input_data_func=symlink_in_tempdir(
-        {
-            sample1_se_tiny_bam: 'sample1.bam',
-            annotation: 'dm6.gtf',
-        }
-    )
-
-    def check():
-        assert '//===================' in open('featurecounts.log').read()
-        assert '# Program:featureCounts' in open('sample1.counts').readline()
-        assert open('sample1.counts.summary').readline().startswith('Status')
-        assert sum(1 for _ in open('sample1.counts')) == 169
-
-    run(dpath('../wrappers/featurecounts'), snakefile, check, input_data_func, tmpdir)
-
-def test_featurecounts_pe(sample1_pe_tiny_bam, annotation, tmpdir):
-    snakefile = '''
-                rule featurecounts:
-                    input:
-                        annotation='dm6.gtf',
-                        bam='sample1.bam'
-                    output:
-                        counts='sample1.counts',
-                    log: 'featurecounts.log'
-                    params: extra='-p -P -s 1 -B --splitOnly'
-                    wrapper: "file:wrapper"
-                '''
-    input_data_func=symlink_in_tempdir(
-        {
-            sample1_pe_tiny_bam: 'sample1.bam',
-            annotation: 'dm6.gtf',
-        }
-    )
-
-    def check():
-        assert '//===================' in open('featurecounts.log').read()
-        assert '# Program:featureCounts' in open('sample1.counts').readline()
-        assert open('sample1.counts.summary').readline().startswith('Status')
-        assert sum(1 for _ in open('sample1.counts')) == 169
-
-        # TODO: maybe assert that below a certain level are counted when all
-        # those extra arguments are used?
-
-    run(dpath('../wrappers/featurecounts'), snakefile, check, input_data_func, tmpdir)
diff --git a/wrappers/test/test_hisat2.py b/wrappers/test/test_hisat2.py
deleted file mode 100644
index add7abb07..000000000
--- a/wrappers/test/test_hisat2.py
+++ /dev/null
@@ -1,120 +0,0 @@
-import os
-import pytest
-from snakemake.shell import shell
-from lcdblib.snakemake import aligners
-from utils import run, dpath, symlink_in_tempdir, tmpdir_for_func
-
-
-@pytest.fixture(scope='session')
-def hisat2_indexes(dm6_fa, tmpdir_factory):
-    d = tmpdir_for_func(tmpdir_factory)
-    snakefile = '''
-    rule hisat2:
-        input: fasta='2L.fa'
-        output: index=['2L.1.ht2', '2L.2.ht2']
-        log: 'hisat.log'
-        wrapper: 'file:wrapper'
-    '''
-    input_data_func = symlink_in_tempdir(
-        {
-            dm6_fa: '2L.fa'
-        }
-    )
-
-    def check():
-        assert 'Total time for call to driver' in open('hisat.log').readlines()[-1]
-        assert list(shell('hisat2-inspect 2L -n', iterable=True)) == ['2L', '2R']
-
-    run(
-        dpath('../wrappers/hisat2/build'),
-        snakefile, check, input_data_func, d)
-    return aligners.hisat2_index_from_prefix(os.path.join(d, '2L'))
-
-
-def _dict_of_hisat2_indexes(hisat2_indexes, prefix):
-    d = {}
-    indexes = aligners.hisat2_index_from_prefix(prefix)
-    hisat2_indexes = sorted(hisat2_indexes)
-    indexes = sorted(indexes)
-    for k, v in zip(hisat2_indexes, indexes):
-        d[k] = v
-    return d
-
-
-def test_hisat2_align_se(hisat2_indexes, sample1_se_tiny_fq, tmpdir):
-    d = _dict_of_hisat2_indexes(hisat2_indexes, '2L')
-    indexes = list(d.values())
-    snakefile = '''
-        rule hisat2_align:
-            input:
-                fastq='sample1_R1.fastq.gz',
-                index={indexes}
-            output:
-                bam='sample1.bam'
-            log: "hisat2.log"
-            wrapper: "file:wrapper"
-    '''.format(indexes=indexes)
-    d[sample1_se_tiny_fq] = 'sample1_R1.fastq.gz'
-    input_data_func = symlink_in_tempdir(d)
-
-    def check():
-        assert "overall alignment rate" in open('hisat2.log').read()
-
-        # should have at least some mapped and unmapped
-        assert int(list(shell('samtools view -c -f 0x04 sample1.bam', iterable=True))[0]) > 0
-        assert int(list(shell('samtools view -c -F 0x04 sample1.bam', iterable=True))[0]) > 0
-
-    run(dpath('../wrappers/hisat2/align'), snakefile, check, input_data_func, tmpdir)
-
-
-def test_hisat2_align_se_SRA(hisat2_indexes, tmpdir):
-    d = _dict_of_hisat2_indexes(hisat2_indexes, '2L')
-    indexes = list(d.values())
-    snakefile = '''
-        rule hisat2_align:
-            input:
-                index={indexes}
-            output:
-                bam='sample1.bam'
-            params: hisat2_extra='--sra-acc SRR1990338'
-            log: "hisat2.log"
-            wrapper: "file:wrapper"
-    '''.format(indexes=indexes)
-    input_data_func = symlink_in_tempdir(d)
-
-    def check():
-        assert "overall alignment rate" in open('hisat2.log').read()
-
-        # should have at least some mapped and unmapped
-        assert int(list(shell('samtools view -c -f 0x04 sample1.bam', iterable=True))[0]) > 0
-        assert int(list(shell('samtools view -c -F 0x04 sample1.bam', iterable=True))[0]) > 0
-
-    run(dpath('../wrappers/hisat2/align'), snakefile, check, input_data_func, tmpdir)
-
-
-def test_hisat2_align_se_rm_unmapped(hisat2_indexes, sample1_se_tiny_fq, tmpdir):
-    d = _dict_of_hisat2_indexes(hisat2_indexes, '2L')
-    indexes = list(d.values())
-    snakefile = '''
-        rule hisat2_align:
-            input:
-                fastq='sample1_R1.fastq.gz',
-                index={indexes}
-            output:
-                bam='sample1.bam'
-            params:
-                samtools_view_extra='-F 0x04'
-            log: "hisat2.log"
-            wrapper: "file:wrapper"
-    '''.format(indexes=indexes)
-    d[sample1_se_tiny_fq] = 'sample1_R1.fastq.gz'
-    input_data_func = symlink_in_tempdir(d)
-
-    def check():
-        assert "overall alignment rate" in open('hisat2.log').read()
-
-        # should have at least some mapped and unmapped
-        assert int(list(shell('samtools view -c -f 0x04 sample1.bam', iterable=True))[0]) == 0
-        assert int(list(shell('samtools view -c -F 0x04 sample1.bam', iterable=True))[0]) > 0
-
-    run(dpath('../wrappers/hisat2/align'), snakefile, check, input_data_func, tmpdir)
diff --git a/wrappers/test/test_kallisto.py b/wrappers/test/test_kallisto.py
deleted file mode 100644
index 32e32e1bd..000000000
--- a/wrappers/test/test_kallisto.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import os
-import json
-import pytest
-import pysam
-from snakemake.shell import shell
-from lcdblib.snakemake import aligners
-from utils import run, dpath, rm, symlink_in_tempdir, tmpdir_for_func
-
-
-@pytest.fixture(scope='session')
-def kallisto_index(tmpdir_factory, transcriptome):
-    d = tmpdir_for_func(tmpdir_factory)
-    snakefile = '''
-    rule kallisto:
-        input: fasta='transcriptome.fa'
-        output: index='transcriptome.idx'
-        log: 'log'
-        wrapper: 'file:wrapper'
-    '''
-    input_data_func = symlink_in_tempdir(
-        {
-            transcriptome: 'transcriptome.fa',
-        }
-    )
-
-    def check():
-        log = open('log').read()
-        assert '[build] target deBruijn graph'
-
-    run(
-        dpath('../wrappers/kallisto/index'),
-        snakefile, check, input_data_func, d)
-    return os.path.join(d, 'transcriptome.idx')
-
-
-def test_kallisto_quant(tmpdir, sample1_se_tiny_fq, kallisto_index):
-    snakefile = '''
-    rule kallisto_quant:
-        input:
-             fastq='sample1.fq.gz',
-             index='out/transcriptome.idx'
-
-        params: extra='--single --fragment-length=200 --sd=20'
-        output:
-            h5='quant/abundance.h5',
-            tsv='quant/abundance.tsv',
-            json='quant/run_info.json',
-        wrapper: 'file:wrapper'
-    '''
-    input_data_func = symlink_in_tempdir(
-        {
-            sample1_se_tiny_fq: 'sample1.fq.gz',
-            kallisto_index: 'out/transcriptome.idx',
-        }
-    )
-
-    def check():
-        assert sum(1 for _ in open('quant/abundance.tsv')) == 310
-        assert open('quant/abundance.tsv').readline() == (
-                'target_id\tlength\teff_length\test_counts\ttpm\n')
-        keys = ['call', 'index_version', 'n_bootstraps', 'n_processed', 'n_targets', 'start_time']
-        d = json.load(open('quant/run_info.json'))
-        for k in keys:
-            assert k in d
-
-
-    run(
-        dpath('../wrappers/kallisto/quant'),
-        snakefile, check, input_data_func, tmpdir)
diff --git a/wrappers/test/test_multiqc.py b/wrappers/test/test_multiqc.py
deleted file mode 100644
index 8f3618075..000000000
--- a/wrappers/test/test_multiqc.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import pytest
-import os
-import gzip
-from utils import run, dpath, rm, symlink_in_tempdir
-from test_fastqc import fastqc
-
-
-def test_multiqc(fastqc, tmpdir):
-    snakefile = '''
-    rule multiqc:
-        input: 'results/sample1_R1_fastqc.zip'
-        output: 'multiqc.html'
-        log: 'log'
-        params:
-            analysis_directory='results'
-        wrapper: 'file:wrapper'
-    '''
-    input_data_func=symlink_in_tempdir(
-        {
-            fastqc: 'results/sample1_R1_fastqc.zip',
-        }
-    )
-
-    def check():
-        assert '<!DOCTYPE html>' in open('multiqc.html').readline()
-
-    run(dpath('../wrappers/multiqc'), snakefile, check, input_data_func, tmpdir)
-
-def test_multiqc_other_dir(fastqc, tmpdir):
-    snakefile = '''
-    rule multiqc:
-        input: 'results/sample1_R1_fastqc.zip'
-        output: 'reports/multiqc.html'
-        log: 'log'
-        params:
-            analysis_directory='results'
-        wrapper: 'file:wrapper'
-    '''
-    input_data_func=symlink_in_tempdir(
-        {
-            fastqc: 'results/sample1_R1_fastqc.zip',
-        }
-    )
-
-    def check():
-        assert '<!DOCTYPE html>' in open('reports/multiqc.html').readline()
-
-    run(dpath('../wrappers/multiqc'), snakefile, check, input_data_func, tmpdir)
diff --git a/wrappers/test/test_picard.py b/wrappers/test/test_picard.py
deleted file mode 100644
index 659d116b0..000000000
--- a/wrappers/test/test_picard.py
+++ /dev/null
@@ -1,116 +0,0 @@
-import pytest
-import os
-import gzip
-from utils import run, dpath, rm, symlink_in_tempdir
-
-
-@pytest.fixture(scope='session')
-def sample1_se_bam_markdups(sample1_se_bam, tmpdir_factory):
-    snakefile = '''
-    rule markduplicates:
-        input:
-            bam='sample1.bam'
-        output:
-            bam='sample1.dupsmarked.bam',
-            metrics='sample1.dupmetrics.txt'
-        log: 'log'
-        wrapper: 'file:wrapper'
-    '''
-    input_data_func = symlink_in_tempdir(
-        {
-            sample1_se_bam: 'sample1.bam',
-        }
-    )
-    tmpdir = str(tmpdir_factory.mktemp('markduplicates_fixture'))
-    run(dpath('../wrappers/picard/markduplicates'), snakefile, None, input_data_func, tmpdir, use_conda=True)
-    return {
-            'bam': os.path.join(tmpdir, 'sample1.dupsmarked.bam'),
-            'metrics': os.path.join(tmpdir, 'sample1.dupmetrics.txt')
-            }
-
-
-def test_markduplicates_se(sample1_se_bam_markdups, tmpdir):
-    assert open(sample1_se_bam_markdups['metrics']).readline().startswith('##')
-
-
-def test_picard_collectrnaseqmetrics_se(sample1_se_tiny_bam, annotation_refflat, tmpdir):
-    snakefile = '''
-    rule collectrnaseqmetrics:
-        input:
-            bam='sample1.bam',
-            refflat='dm6.refflat',
-        output:
-            metrics='sample1.metrics'
-        log: 'log'
-        params:
-            extra="STRAND=NONE",
-            java_args='-Xmx512m'
-        wrapper: 'file:wrapper'
-    '''
-    input_data_func=symlink_in_tempdir(
-        {
-            sample1_se_tiny_bam: 'sample1.bam',
-            annotation_refflat: 'dm6.refflat',
-        }
-    )
-
-    def check():
-        assert '## METRICS CLASS' in open('sample1.metrics').read()
-
-    run(dpath('../wrappers/picard/collectrnaseqmetrics'), snakefile, check, input_data_func, tmpdir, use_conda=True)
-
-
-def test_picard_collectrnaseqmetrics_se_plot(sample1_se_tiny_bam, annotation_refflat, tmpdir):
-    snakefile = '''
-    rule collectrnaseqmetrics:
-        input:
-            bam='sample1.bam',
-            refflat='dm6.refflat',
-        output:
-            metrics='sample1.metrics',
-            plot='sample1.pdf'
-        log: 'log'
-        params: extra="STRAND=NONE CHART=sample1.pdf"
-        wrapper: 'file:wrapper'
-    '''
-    input_data_func=symlink_in_tempdir(
-        {
-            sample1_se_tiny_bam: 'sample1.bam',
-            annotation_refflat: 'dm6.refflat',
-        }
-    )
-
-    def check():
-        assert '## METRICS CLASS' in open('sample1.metrics').read()
-
-    run(dpath('../wrappers/picard/collectrnaseqmetrics'), snakefile, check, input_data_func, tmpdir, use_conda=True)
-
-
-@pytest.mark.xfail
-def test_picard_collectrnaseqmetrics_too_small_heap(sample1_se_tiny_bam, annotation_refflat, tmpdir):
-    # set the java vm heap size to 128 bytes which should fail. This tests to
-    # make sure the java args are making it through to the wrapper.
-    snakefile = '''
-    rule collectrnaseqmetrics:
-        input:
-            bam='sample1.bam',
-            refflat='dm6.refflat',
-        output:
-            metrics='sample1.metrics'
-        log: 'log'
-        params:
-            extra="STRAND=NONE",
-            java_args='-Xmx128'
-        wrapper: 'file:wrapper'
-    '''
-    input_data_func=symlink_in_tempdir(
-        {
-            sample1_se_tiny_bam: 'sample1.bam',
-            annotation_refflat: 'dm6.refflat',
-        }
-    )
-
-    def check():
-        assert '## METRICS CLASS' in open('sample1.metrics').read()
-
-    run(dpath('../wrappers/picard/collectrnaseqmetrics'), snakefile, check, input_data_func, tmpdir, use_conda=True)
diff --git a/wrappers/test/test_rseqc.py b/wrappers/test/test_rseqc.py
deleted file mode 100644
index d97ae9190..000000000
--- a/wrappers/test/test_rseqc.py
+++ /dev/null
@@ -1,151 +0,0 @@
-import pytest
-import os
-import gzip
-from utils import run, dpath, rm, symlink_in_tempdir
-from textwrap import dedent
-
-def test_infer_experiment(sample1_se_tiny_bam, annotation_bed12, tmpdir):
-    snakefile = '''
-                rule infer_experiment:
-                    input:
-                        bam='sample1_R1.bam',
-                        bed='dm6.bed12'
-                    output:
-                        txt = 'sample1_R1.infer_experiment.txt'
-                    wrapper: "file:wrapper"
-                '''
-    input_data_func=symlink_in_tempdir(
-        {
-            sample1_se_tiny_bam: 'sample1_R1.bam',
-            annotation_bed12: 'dm6.bed12'
-        }
-    )
-
-    def check():
-        """
-        check for line lengths and that they are at least different sized
-        """
-        expected = dedent("""\
-                This is SingleEnd Data
-                Fraction of reads failed to determine:
-                Fraction of reads explained by "++,--":
-                Fraction of reads explained by "+-,-+":""").splitlines(False)
-
-        with open('sample1_R1.infer_experiment.txt', 'r') as handle:
-            results = handle.read().strip()
-        for ex in expected:
-            assert ex in results
-
-    run(dpath('../wrappers/rseqc/infer_experiment'), snakefile, check, input_data_func, tmpdir, use_conda=True)
-
-
-def test_gB_cov(sample1_se_tiny_bam, sample1_se_tiny_bam_bai, annotation_bed12, tmpdir):
-    snakefile = '''
-                rule geneBody_coverage:
-                    input:
-                        bam='sample1_R1.sort.bam',
-                        bai='sample1_R1.sort.bam.bai',
-                        bed='dm6.bed12'
-                    output: txt='sample1_R1.geneBodyCoverage.txt',
-                            r='sample1_R1.geneBodyCoverage.r',
-                            img='sample1_R1.geneBodyCoverage.pdf',
-                    wrapper: "file:wrapper"
-                '''
-    input_data_func=symlink_in_tempdir(
-        {
-            sample1_se_tiny_bam: 'sample1_R1.sort.bam',
-            sample1_se_tiny_bam_bai['bai']: 'sample1_R1.sort.bam.bai',
-            annotation_bed12: 'dm6.bed12'
-        }
-    )
-
-    def check():
-        """
-        check for line lengths and that they are at least different sized
-        """
-
-        # R code
-        with open('sample1_R1.geneBodyCoverage.r', 'r') as handle:
-            result = handle.readline().split(' ')[0]
-
-        assert  result == 'sample1_R1.sort'
-
-        # text
-        with open('sample1_R1.geneBodyCoverage.txt', 'r') as handle:
-            result = handle.readlines()[1].split('\t')[0]
-
-        assert  result == 'sample1_R1.sort'
-
-        # PDF
-        assert os.path.exists('sample1_R1.geneBodyCoverage.pdf')
-
-    run(dpath('../wrappers/rseqc/geneBody_coverage'), snakefile, check, input_data_func, tmpdir, use_conda=True)
-
-
-def test_gB_cov_png(sample1_se_tiny_bam, sample1_se_tiny_bam_bai, annotation_bed12, tmpdir):
-    snakefile = '''
-                rule geneBody_coverage:
-                    input:
-                        bam='sample1_R1.sort.bam',
-                        bai='sample1_R1.sort.bam.bai',
-                        bed='dm6.bed12'
-                    output:
-                        txt='sample1_R1.geneBodyCoverage.txt',
-                        r='sample1_R1.geneBodyCoverage.r',
-                        img='sample1_R1.geneBodyCoverage.png',
-                    params:
-                        extra: = '-f png'
-                    wrapper: "file:wrapper"
-                '''
-    input_data_func=symlink_in_tempdir(
-        {
-            sample1_se_tiny_bam: 'sample1_R1.sort.bam',
-            sample1_se_tiny_bam_bai['bai']: 'sample1_R1.sort.bam.bai',
-            annotation_bed12: 'dm6.bed12'
-        }
-    )
-
-    def check():
-        """ Check that the PNG is created """
-        assert os.path.exists('sample1_R1.geneBodyCoverage.png')
-
-
-@pytest.mark.skip
-def test_tin(sample1_se_tiny_bam, sample1_se_tiny_bam_bai, annotation_bed12, tmpdir):
-    snakefile = '''
-                rule tin:
-                    input:
-                        bam='sample1_R1.sort.bam',
-                        bai='sample1_R1.sort.bam.bai',
-                        bed='dm6.bed12'
-                    output: table='sample1_R1.tin.tsv',
-                            summary='sample1_R1.tin.summary.txt'
-                    wrapper: "file:wrapper"
-                '''
-    input_data_func=symlink_in_tempdir(
-        {
-            sample1_se_tiny_bam: 'sample1_R1.sort.bam',
-            sample1_se_tiny_bam_bai['bai']: 'sample1_R1.sort.bam.bai',
-            annotation_bed12: 'dm6.bed12'
-        }
-    )
-
-    def check():
-        """
-        check for line lengths and that they are at least different sized
-        """
-
-        # R code
-        with open('sample1_R1.tin.tsv', 'r') as handle:
-            result = handle.readline().strip().split('\t')
-
-        assert  result == ['geneID', 'chrom', 'tx_start', 'tx_end', 'TIN']
-
-        # text
-        with open('sample1_R1.tin.summary.txt', 'r') as handle:
-            result = handle.readline().strip().split('\t')
-
-        assert  result == ['Bam_file', 'TIN(mean)', 'TIN(median)', 'TIN(stdev)']
-
-    run(dpath('../wrappers/rseqc/tin'), snakefile, check, input_data_func, tmpdir, use_conda=True)
-
diff --git a/wrappers/test/test_salmon.py b/wrappers/test/test_salmon.py
deleted file mode 100644
index 2e3796fa5..000000000
--- a/wrappers/test/test_salmon.py
+++ /dev/null
@@ -1,83 +0,0 @@
-import os
-import pytest
-from snakemake.shell import shell
-from utils import run, dpath, rm, symlink_in_tempdir, tmpdir_for_func
-
-
-@pytest.fixture(scope='session')
-def salmon_index(tmpdir_factory, transcriptome):
-    d = tmpdir_for_func(tmpdir_factory)
-    snakefile = '''
-    rule salmon:
-        input: fasta='transcriptome.fa'
-        output: hash='salmon_index/hash.bin'
-        log: 'log'
-        wrapper: 'file:wrapper'
-    '''
-    input_data_func = symlink_in_tempdir(
-        {
-            transcriptome: 'transcriptome.fa',
-        }
-    )
-
-    def check():
-        log = open('log').read()
-        assert '[info] done building index' in log
-
-    run(
-        dpath('../wrappers/salmon/index'),
-        snakefile, check, input_data_func, d)
-    return os.path.join(d, 'salmon_index')
-
-
-def test_salmon_quant(tmpdir, sample1_se_tiny_fq, salmon_index):
-    snakefile = '''
-    rule salmon_quant:
-        input:
-             unmatedReads='sample1.fq.gz',
-             index=['idx/hash.bin', 'idx/sa.bin']
-        output: 'sample1/salmon/quant.sf'
-        params: extra='--libType A'
-        log: 'salmon.quant.log'
-        wrapper: 'file:wrapper'
-    '''
-    input_data_func = symlink_in_tempdir(
-        {
-            sample1_se_tiny_fq: 'sample1.fq.gz',
-            salmon_index: 'idx',
-        }
-    )
-
-    def check():
-        assert open('sample1/salmon/quant.sf').readline() == (
-                'Name\tLength\tEffectiveLength\tTPM\tNumReads\n')
-
-    run(
-        dpath('../wrappers/salmon/quant'),
-        snakefile, check, input_data_func, tmpdir)
-
-def test_salmon_quant_single_index(tmpdir, sample1_se_tiny_fq, salmon_index):
-    snakefile = '''
-    rule salmon_quant:
-        input:
-             unmatedReads='sample1.fq.gz',
-             index='idx/hash.bin'
-        output: 'sample1/salmon/quant.sf'
-        params: extra='--libType A'
-        log: 'salmon.quant.log'
-        wrapper: 'file:wrapper'
-    '''
-    input_data_func = symlink_in_tempdir(
-        {
-            sample1_se_tiny_fq: 'sample1.fq.gz',
-            salmon_index: 'idx',
-        }
-    )
-
-    def check():
-        assert open('sample1/salmon/quant.sf').readline() == (
-                'Name\tLength\tEffectiveLength\tTPM\tNumReads\n')
-
-    run(
-        dpath('../wrappers/salmon/quant'),
-        snakefile, check, input_data_func, tmpdir)
diff --git a/wrappers/test/test_samtools.py b/wrappers/test/test_samtools.py
deleted file mode 100644
index 51ff105af..000000000
--- a/wrappers/test/test_samtools.py
+++ /dev/null
@@ -1,12 +0,0 @@
-import subprocess as sp
-import pytest
-from snakemake import shell
-
-
-def test_samtools_sort_and_index(sample1_se_tiny_bam, sample1_se_tiny_bam_bai):
-    """
-    This test is primarily a trigger for the fixtures.
-    """
-    with pytest.raises(sp.CalledProcessError):
-        shell('samtools view {sample1_se_tiny_bam} 2L:1-100')
-    shell('samtools view {sample1_se_tiny_bam_bai[bam]} 2L:1-100')
diff --git a/wrappers/test/utils.py b/wrappers/test/utils.py
deleted file mode 100644
index 74dd396bc..000000000
--- a/wrappers/test/utils.py
+++ /dev/null
@@ -1,152 +0,0 @@
-"""
-Stripped-down version of Snakemake's test framework.
-"""
-
-import sys
-import os
-from textwrap import dedent
-import subprocess as sp
-import tempfile
-import hashlib
-import urllib
-import shutil
-import shlex
-import inspect
-
-import pytest
-from snakemake import snakemake
-from snakemake.shell import shell
-from snakemake.utils import makedirs
-
-
-SCRIPTPATH = shutil.which('snakemake')
-
-# test data url
-URL = 'https://github.com/lcdb/lcdb-test-data/blob/add-chipseq/data/{}?raw=true'
-
-
-def tmpdir_for_func(factory):
-    caller = inspect.stack()[1][3]
-    return str(factory.mktemp(caller))
-
-
-def _download_file(fn, d):
-    """
-    Intended to be called from a pytest.fixture function.
-
-    `fn` is a path to a file that is used to fill in `URL`. `d` is a tempdir
-    likely created by the calling function to which the file will be
-    downloaded.
-
-    The path to the downloaded file is returned.
-    """
-    url = URL.format(fn)
-    dest = os.path.join(d, fn)
-    makedirs(os.path.dirname(dest))
-    basename = os.path.basename(fn)
-    shell('wget -q -O- {url} > {dest}')
-    return dest
-
-
-def dpath(path):
-    "path relative to this file"
-    return os.path.realpath(os.path.join(os.path.dirname(__file__), path))
-
-
-def md5sum(filename):
-    data = open(filename, 'rb').read()
-    return hashlib.md5(data).hexdigest()
-
-
-def run(path, snakefile, check=None, input_data_func=None, tmpdir=None, use_conda=False, **params):
-    """
-    Parameters
-    ----------
-
-    path : str
-        Path to a wrapper directory.
-
-    snakefile : str
-        Contents of a snakefile. `dedent()` will be run on it.
-
-    check : callable or None
-        After running the snakefile on the input data, this function will be
-        called while inside the directory. This function is where the actual
-        tests (assertions etc) should be performed.
-
-        If None, the snakefile will be run but no tests will be performed on
-        the output.
-
-    input_data_func : None | callable
-        If not None, then this callable object will be called with
-        a single argument corresponding to the temp directory. It will be
-        called after the wrapper and test-case contents have been copied to the
-        temp dir, but before the test is run. It is expected to create any data
-        required in whatever directory structure is required.
-
-    tmpdir : None or path
-
-    """
-    # store any tempdirs here for later deletion
-    to_clean_up = []
-
-
-    if tmpdir is None:
-        tmpdir = tempfile.mkdtemp(prefix='.test', dir=os.path.abspath('.'))
-    else:
-        tmpdir = str(tmpdir)
-    try:
-        # copy over the wrapper
-        wrapper_dir = os.path.join(tmpdir, 'wrapper')
-        os.makedirs(wrapper_dir)
-        cmds = (
-            'find {} -maxdepth 1 -type f -print0 | xargs -0 cp -t {}'
-            .format(shlex.quote(path), shlex.quote(wrapper_dir))
-        )
-        sp.call(cmds, shell=True)
-
-        # write the snakefile, filling in the "wrapper" placeholder
-        with open(os.path.join(tmpdir, 'Snakefile'), 'w') as fout:
-            fout.write('shell.executable("/bin/bash")\n')
-            fout.write(dedent(snakefile))
-
-        # Create the input data
-        input_data_func(tmpdir)
-
-        success = snakemake(os.path.join(tmpdir, 'Snakefile'), workdir=tmpdir, stats='stats.txt',
-                            snakemakepath=SCRIPTPATH, config={}, use_conda=use_conda, **params)
-        assert success, 'expected successful execution'
-
-        # Change to the tmpdir and run the test function
-        if check is not None:
-            cwd = os.getcwd()
-            os.chdir(tmpdir)
-            check()
-            os.chdir(cwd)
-
-    finally:
-        for t in to_clean_up:
-            shutil.rmtree(t)
-        #shutil.rmtree(tmpdir)
-
-
-def symlink_in_tempdir(mapping):
-    """
-    Returns a function that can be used for the `input_data_func` to utils.run.
-
-    `mapping` is a dict where keys are 'target' and values are 'linkname'.
-
-    It will symlink the data downloaded by the fixture into the temp dir
-    created for the test case.
-    """
-    def _wrapped(tmpdir):
-        for k, v in mapping.items():
-            _linkname = os.path.join(tmpdir, v)
-            _target = k
-            _linkdir = os.path.dirname(_linkname)
-            shell('mkdir -p {_linkdir} && ln -s {_target} {_linkname}')
-    return _wrapped
-
-
-def rm(path):
-    shutil.rmtree(path)
diff --git a/wrappers/test_toy.py b/wrappers/test_toy.py
deleted file mode 100644
index a8e63a129..000000000
--- a/wrappers/test_toy.py
+++ /dev/null
@@ -1,100 +0,0 @@
-import os
-from textwrap import dedent
-import pytest
-import utils
-
-# Each module has a config dict
-config = dict()
-
-
-def generic_fixture(key, mapping, factory):
-    """
-    Tries to handle as much of the magic as possible.
-
-    Parameters
-    ----------
-    key : str
-        Key into the module-level config dict
-
-    mapping : dict
-        Maps paths from fixtures to input files expected by the snakefile
-
-    tmpdir : str
-        Path to temporary dir, usually created by utils.tmpdir_for_func
-
-    Returns
-    -------
-    After a successful Snakemake run, returns the dictionary of the config's
-    `output` key but with paths fixed to be relative to tmpdir. This returned
-    dict is ready to be used as a fixture by test functions.
-    """
-    conf = config[key]
-    tmpdir = utils.tmpdir_for_func(factory)
-    input_data_func = utils.symlink_in_tempdir(mapping)
-    utils.run(utils.dpath(conf['wrapper']), conf['snakefile'], None, input_data_func, tmpdir)
-    output = conf['output'].copy()
-    for k, v in output.items():
-        output[k] = os.path.join(tmpdir, v)
-    return output
-
-
-# In order for the doc generation to find this config info without re-running
-# all tests, it needs to be in the module-level dict. It similarly can't be
-# added during the fixture function's runtime.
-#
-# However, the mapping and tmpdir must be provided by the function, so the
-# config and the function are tightly coupled.
-#
-# So we add the item to the dictionary here, right above the function that will
-# be using it to keep them tightly coupled in the file.
-config['hisat2_index'] = dict(
-    description="Basic example of generating a hisat2 index",
-    wrapper="../wrappers/hisat2/build",
-    snakefile="""
-      rule hisat2_build:
-          input:
-              fasta="2L.fa"
-          output:
-              index=expand("hisat2_index/assembly.{n}.ht2", n=range(1,9))
-          log: "hisat.log"
-          wrapper: "file://wrapper"
-    """,
-    output={'prefix': 'hisat2_index/assembly'}
-)
-
-
-# All the hard work is done in the config and in generic_fixture(). Now we just
-# need to set up the correct mapping of fixtures to input files.
-@pytest.fixture(scope='module')
-def hisat2_index(tmpdir_factory, dm6_fa):
-    mapping = {dm6_fa: '2L.fa'}
-    return generic_fixture('hisat2_index', mapping, tmpdir_factory)
-
-# The actual test.
-def test_index(hisat2_index):
-    assert os.path.exists(hisat2_index['prefix'] + '.1.ht2')
-
-
-def extract_examples_for_wrapper(wrapper):
-    """
-    Returns the examples for the wrapper in markdown format.
-
-    Parameters
-    ----------
-    wrapper : str
-        Expected to be the value of one of the config dict's `wrapper` keys.
-    """
-    markdown = []
-    for k, v in config.items():
-        if v['wrapper'] != wrapper:
-            continue
-        snakefile = dedent(v['snakefile'])
-        markdown.append(
-            dedent(
-                """
-                {}
-
-                ```python""".format(v['description'])))
-        markdown.append(snakefile)
-        markdown.append("```")
-    return "\n".join(markdown)
diff --git a/wrappers/wrappers/atropos/README.md b/wrappers/wrappers/atropos/README.md
deleted file mode 100644
index 56b28b18e..000000000
--- a/wrappers/wrappers/atropos/README.md
+++ /dev/null
@@ -1,167 +0,0 @@
-# Wrapper for atropos
-[Atropos](https://atropos.readthedocs.io/en/latest/index.html) is a fork of
-[Cutadapt](http://cutadapt.readthedocs.io/en/stable/index.html) which finds and
-removes adapter sequences, primers, poly-A tails and other types of unwanted
-sequence from your high-throughput sequencing reads.
-
-# Examples
-
-Minimal usage:
-
-```
-rule atropos:
-    input: fastq='{sample}.fastq'
-    output: fastq='{sample}.trim.fastq'
-    threads: 4
-    wrapper:
-        "file://path/to/atropos"
-```
-
-Use an adapters file and quality-trim reads to Q20:
-
-```
-rule atropos:
-    input: fastq='{sample}.fastq'
-    output: fastq='{sample}.trim.fastq'
-    params: extra="-a file:adapters.fa -q 20"
-    threads: 4
-    wrapper:
-        "file://path/to/atropos"
-```
-
-Optionally provide the adapters file as input in order to trigger a re-run if
-it has changed. The wrapper only pays attention to `input.fastq`, so adding
-another key doesn't affect the wrapper:
-
-```
-rule atropos:
-    input:
-        fastq='{sample}.fastq',
-        adapters='adapters.fa'
-    output: fastq='{sample}.trim.fastq'
-    params: extra="-a file:adapters.fa -q 20"
-    threads: 4
-    wrapper:
-        "file://path/to/atropos"
-```
-
-Example of how to use with other output files. Since the wrapper only pays
-attention to `output.fastq`, so other output files can be indicated but their
-filenames have to be indicated in `params.`:
-
-```
-rule atropos:
-    input:
-        fastq='{sample}.fastq',
-        adapters='adapters.fa'
-    output:
-        fastq='{sample}.trim.fastq',
-        short='{sample}.trim.too-short.fastq',
-        untrimmed='{sample}.untrimmed.fastq',
-    params:
-        extra=(
-            "-a file:adapters.fa "
-            "-q 20 "
-            "--too-short-output={sample}.trim.too-short.fastq "
-            "--untrimmed-output={sample}.untrimmed.fastq"
-        )
-    threads: 4
-    wrapper:
-        "file://path/to/atropos"
-```
-
-You can also run in pair-end mode.
-
-```
-rule atropos:
-    input:
-        R1='{sample}_r1.fastq',
-        R2='{sample}_r2.fastq',
-        adapters='adapters.fa'
-    output:
-        R1='{sample}_r1.trim.fastq',
-        R1='{sample}_r2.trim.fastq'
-    params: extra="-a file:adapters.fa -A file:adapters.fa -q 20"
-    threads: 4
-    wrapper:
-        "file://path/to/atropos"
-```
-
-
-## Input
-
-All inputs are FASTQ files, and they can be optionally gzipped.
-
-### Single-end mode:
-
-fastq : single-end FASTQ file
-
-### Paired-end mode:
-
-R1 : Read 1 FASTQ
-R2 : Read 2 FASTQ
-
-See examples below for other input options including adapters.
-
-## Output
-q
-### Single-end mode:
-
-fastq : Trimmed FASTQ file.
-
-### Paired-end mode:
-
-R1 : trimmed R1 FASTQ file
-R2 : trimmed R2 FASTQ file
-
-See examples below for other output options.
-
-## Log
-If a log file is specified, stdout and stderr will be captured there.
-
-## Threads
-One improvement of atropos over cutadapt is the ability to use threads which
-are passed to the `-T` option.
-
-## Params
-Additional parameters can be passed to atropos verbatim by supplying a string
-in `params.extra`.
-
-
-## Notes
-
-To dynamically select PE or SE without using `dynamic` support in snakemake,
-you can use a PHONY rule and use a function for `params.R2`, like in this
-example:
-
-```python
-def _input_func_atropos(wildcards):
-    """Determine if the sample is PE or SE"""
-    flags = some function to pull in se or pe info
-    if 'PE' in flags:
-        return {'R1': expand(fastqs['r1'], **wildcards)[0], 'R2': expand(fastqs['r2'], **wildcards)[0]}
-    else:
-        return {'R1': expand(fastqs['r1'], **wildcards)[0]}
-
-def _params_r2_atropos(wildcards):
-    """function to make temp R2 if pe."""
-    flags = some function to pull in se or pe info
-    if 'PE' in flags:
-        return expand(patterns['atropos']['r2'], **wildcards)[0] + '.tmp.gz'
-    else:
-        return None
-
-rule atropos:
-    input: unpack(_input_func_atropos)
-    output: R1=temp(patterns['atropos']['r1'])
-    params: R2=_params_r2_atropos
-    threads: 8
-    wrapper: wrapper_for('atropos')
-
-rule atropos_phony:
-    input: rules.atropos.output
-    output: temp(patterns['atropos']['r2'])
-    shell: """
-    mv {output[0]}.tmp.gz {output[0]}
-    """
-```
diff --git a/wrappers/wrappers/atropos/environment.yaml b/wrappers/wrappers/atropos/environment.yaml
deleted file mode 100644
index 314bcf2c4..000000000
--- a/wrappers/wrappers/atropos/environment.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-channels:
-  - bioconda
-dependencies:
-  - atropos ==1.1.5
diff --git a/wrappers/wrappers/atropos/wrapper.py b/wrappers/wrappers/atropos/wrapper.py
deleted file mode 100644
index b6af4311b..000000000
--- a/wrappers/wrappers/atropos/wrapper.py
+++ /dev/null
@@ -1,80 +0,0 @@
-__author__ = "Ryan Dale"
-__copyright__ = "Copyright 2016, Ryan Dale"
-__email__ = "dalerr@niddk.nih.gov"
-__license__ = "MIT"
-
-from snakemake.shell import shell
-
-extra = snakemake.params.get('extra', '')
-log = snakemake.log_fmt_shell()
-inputs = snakemake.input
-outputs = snakemake.output
-
-if isinstance(inputs, dict) and isinstance(outputs, dict):
-    # Get inputs
-    in_R1 = inputs.get('R1', None)
-    in_R2 = inputs.get('R2', None)
-    in_FASTQ = inputs.get('fastq', None)
-
-    if (in_R1 is None) and (in_FASTQ is not None):
-        in_R1 = in_FASTQ
-    elif (in_R1 is None) and (in_FASTQ is None):
-        raise KeyError('If providing a dictionary for input/output, you must uese either '
-            '`R1` or `fastq` for the first read. If providing a second read you must use `R2`.')
-
-    # Get outputs
-    out_R1 = outputs.get('R1', None)
-    out_R2 = outputs.get('R2', snakemake.params.get('R2', None))
-    out_FASTQ = outputs.get('fastq', None)
-
-    if (out_R1 is None) and (out_FASTQ is not None):
-        out_R1 = out_FASTQ
-    elif (out_R1 is None) and (out_FASTQ is None):
-        raise KeyError('If providing a dictionary for input/output, you must uese either '
-            '`R1` or `fastq` for the first read. If providing a second read you must use `R2`.')
-
-elif isinstance(inputs, list) and isinstance(outputs, list):
-    # Get inputs
-    if len(inputs) == 1:
-        in_R1 = inputs[0]
-        in_R2 = None
-    elif len(inputs) == 2:
-        in_R1 = sorted(inputs)[0]
-        in_R2 = sorted(inputs)[1]
-    else:
-        raise IndexError("If providing a list for input/output, they must have either 1 or 2 values.")
-
-    # Get outputs
-    if len(outputs) == 1:
-        out_R1 = outputs[0]
-        out_R2 = snakemake.params.get('R2', None)
-    elif len(outputs) == 2:
-        out_R1 = sorted(outputs)[0]
-        out_R2 = sorted(outputs)[1]
-    else:
-        raise IndexError("If providing a list for input/output, they must have either 1 or 2 values.")
-
-# Run paired end if both in_R2 and out_R2 are provided
-if (in_R2 is not None) and (out_R2 is not None):
-    shell(
-        "atropos trim "
-        "--threads {snakemake.threads} "
-        "{extra} "
-        "-pe1 {in_R1} "
-        "-pe2 {in_R2} "
-        "-o {out_R1} "
-        "-p {out_R2} "
-        "{log}"
-    )
-elif (in_R1 is not None) and (out_R1 is not None) and (in_R2 is None) and (out_R2 is None):
-    shell(
-        "atropos trim "
-        "{extra} "
-        "--threads {snakemake.threads} "
-        "-se {in_R1} "
-        "-o {out_R1} "
-        "{log}"
-    )
-else:
-    raise ValueError("Input and Output must match. If you give two value for "
-        "input you must give two values for output.")
diff --git a/wrappers/wrappers/average-bigwigs/README.md b/wrappers/wrappers/average-bigwigs/README.md
deleted file mode 100644
index af837c1f0..000000000
--- a/wrappers/wrappers/average-bigwigs/README.md
+++ /dev/null
@@ -1,75 +0,0 @@
-# Average bigWigs
-
-Often we'd like to merge multiple bigWigs together for downstream work
-(heatmaps, etc) but there's no single tool to do this. This wrapper runs
-`bigWigMerge` on the inputs to sum their values, then uses `awk` to divide by
-their values and sort the way bedGraphToBigWig wants them.
-
-The intermediate bedGraph file will be created in ``$TMPDIR``.
-
-## Examples
-
-Minimal usage:
-
-```python
-rule average_bigwigs:
-    input: 
-        bigwigs=[
-            'a.bw',
-            'b.bw',
-            'c.bw'],
-        chromsizes='genome.chromsizes'
-    output:
-        'out.bw'
-    wrapper:
-        'file://path/to/wrapper'
-```
-
-Increase memory used for sorting:
-
-```python
-rule average_bigwigs:
-    input: 
-        bigwigs=[
-            'a.bw',
-            'b.bw',
-            'c.bw'],
-        chromsizes='genome.chromsizes'
-    output:
-        'out.bw'
-    params:
-        memory='32G'
-    wrapper:
-        'file://path/to/wrapper'
-```
-
-Single bigwig just gets symlinked over.
-
-```python
-rule average_bigwigs:
-    input: 
-        bigwigs='a.bw',
-        chromsizes='genome.chromsizes'
-    output:
-        'out.bw'
-    params:
-        memory='32G'
-    wrapper:
-        'file://path/to/wrapper'
-```
-
-## Input
-
-List of bigWig files.
-
-
-## Output
-
-Single bigWig file created by averaging the inputs
-
-## Threads
-Does not use threads
-
-## Params
-
-memory: Passed to `sort` as the `-S` argument.
diff --git a/wrappers/wrappers/average-bigwigs/environment.yaml b/wrappers/wrappers/average-bigwigs/environment.yaml
deleted file mode 100644
index 64dcd1557..000000000
--- a/wrappers/wrappers/average-bigwigs/environment.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-channels:
-  - bioconda
-dependencies:
-  - ucsc-bigwigmerge
-  - ucsc-bedgraphtobigwig
diff --git a/wrappers/wrappers/average-bigwigs/wrapper.py b/wrappers/wrappers/average-bigwigs/wrapper.py
deleted file mode 100644
index 94be840ac..000000000
--- a/wrappers/wrappers/average-bigwigs/wrapper.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import os, sys
-sys.path.append(os.path.abspath('../../'))
-from lib import utils
-import tempfile
-from snakemake.shell import shell
-# Inspired by http://wresch.github.io/2014/01/31/merge-bigwig-files.html
-
-# If memory was supplied, we'll use that for sorting.
-if 'memory' in snakemake.params:
-    mem_arg = '-S {snakemake.params.memory}'
-else:
-    mem_arg = ''
-
-if len(snakemake.input.bigwigs) == 1:
-    utils.make_relative_symlink(snakemake.input.bigwigs[0], snakemake.output[0])
-
-else:
-
-    # bigWigMerge outputs sum; we need to divide each by n.
-    f = 1.0 / len(snakemake.input.bigwigs)
-
-    tmp = tempfile.NamedTemporaryFile(delete=False).name
-    tmpdir = tempfile.gettempdir()
-
-    shell(
-        'export LC_ALL=C; '
-        'bigWigMerge {snakemake.input.bigwigs} stdout 2> {snakemake.log} '
-        """| awk 'BEGIN{{OFS="\t"}}{{$4={f}*$4; print}}' """
-        '| sort {mem_arg} -T {tmpdir} -k1,1 -k2,2n > {tmp} '
-        '&& bedGraphToBigWig {tmp} {snakemake.input.chromsizes} '
-        '{snakemake.output} &>> {snakemake.log}'
-    )
diff --git a/wrappers/wrappers/combos/merge_and_dedup/README.md b/wrappers/wrappers/combos/merge_and_dedup/README.md
deleted file mode 100644
index b768e7d22..000000000
--- a/wrappers/wrappers/combos/merge_and_dedup/README.md
+++ /dev/null
@@ -1,66 +0,0 @@
-# Merge and deduplicate
-
-Merges BAM files and then deduplicates the output. However if only one BAM file
-is created, the file is simply symlinked.
-
-This wrapper is often needed in ChIP-seq to merge technical replicates. The
-same fragment could have been sequenced in multiple tech reps, resulting in
-duplicate reads in the merged output even though each individual BAM already
-had duplicates removed.
-
-This method has an advantage over merging first and then deduping in separate
-rules when we want to retain both individual (per tech rep) deduped BAMs as
-well as merged deduped BAMs. Since the deduping has already happened once for
-each tech rep, we want to avoid doing so again if no merging happens.
-
-## Examples
-
-Minimal usage:
-
-```python
-rule merge_and_dedup:
-    input: 'a1.bam', 'a2.bam'
-    output:
-        bam='a-merged.bam',
-        metrics='a-merged.bam.metrics'
-    wrapper:
-        'file://path/to/wrapper'
-```
-
-In the following case, a symlink will be created since no merging needs to be
-performed on a single file:
-
-```python
-rule merge_and_dedup:
-    input: 'a1.bam'
-    output:
-        bam='a-merged.bam',
-        metrics='a-merged.bam.metrics'
-    wrapper:
-        'file://path/to/wrapper'
-```
-
-
-## Input
-
-Single BAM or list of BAMs.
-
-## Output
-
-- `bam`: output bam file
-- `metrics`: optional output metrics file. Default is to use
-  `{snakemake.output.bam}.metrics`.
-
-## Threads
-
-Threads are passed to `samtools merge`.
-
-## Params
-
-- `samtools_merge_extra`: addtional args passed verbatim to `samtools merge`
-
-- `markduplicates_extra`: addtional args passed verbatim to `markduplicates_extra`
-
-- `java_args`: passed to MarkDuplicates, often used to provide more memory
-  (e.g., `-Xmx32g`). Be sure to increase the corresponding rule's memory
-  resource to account for the additional allocation
diff --git a/wrappers/wrappers/combos/merge_and_dedup/environment.yaml b/wrappers/wrappers/combos/merge_and_dedup/environment.yaml
deleted file mode 100644
index b3e77ddb9..000000000
--- a/wrappers/wrappers/combos/merge_and_dedup/environment.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-channels:
-  - bioconda
-  - conda-forge
-
-dependencies:
-  - picard
-  - samtools
diff --git a/wrappers/wrappers/demo/README.md b/wrappers/wrappers/demo/README.md
deleted file mode 100644
index a87fb3aa3..000000000
--- a/wrappers/wrappers/demo/README.md
+++ /dev/null
@@ -1,69 +0,0 @@
-# Demo wrapper
-
-This wrapper demonstrates current best-practices.
-
-The target audience of the wrapper's README should be yourself six months from
-now, under a tight deadline, frantically looking for that rule you wrote so you
-can copy/paste into a custom Snakefile.
-
-Examples should come first. There should be at least a minimal example and
-a reasonably complicated example. To be complete you can add links to docs,
-a brief description of the tool, and example output.
-
-This demo wrapper simply copies input files to output files.
-
-## Examples
-
-Minimal usage:
-
-```python
-rule demo:
-    input: 'a.txt'
-    output: 'b.txt'
-    wrapper:
-        'file://path/to/wrapper'
-```
-
-"paired-end" usage:
-
-```python
-rule demo:
-    input:
-        R1='a1.txt',
-        R2='a2.txt'
-    output:
-        R1='b1.txt',
-        R2='b2.txt'
-    wrapper:
-        'file://path/to/wrapper'
-```
-
-## Input
-
-Input file formats for this wrapper can be anything.
-
-### Single-end mode:
-
-Expects a single unnamed input file.
-
-### Paired-end mode:
-
-Expects two input files with keys `R1` and `R2`.
-
-## Output
-
-Output files are simply copies of input.
-
-### Single-end mode:
-
-Expects a single unnamed output file
-
-### Paired-end mode:
-
-Expects two output files with keys `R1` and `R2`.
-
-## Threads
-Does not use threads
-
-## Params
-Does not use params
diff --git a/wrappers/wrappers/demo/environment.yaml b/wrappers/wrappers/demo/environment.yaml
deleted file mode 100644
index f56993b24..000000000
--- a/wrappers/wrappers/demo/environment.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-channels:
-  - defaults
-dependencies:
-  - python=3
diff --git a/wrappers/wrappers/demo/wrapper.py b/wrappers/wrappers/demo/wrapper.py
deleted file mode 100644
index 158ce4090..000000000
--- a/wrappers/wrappers/demo/wrapper.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env python
-
-from snakemake.shell import shell
-
-# All wrappers must be able to handle an optional params.extra.
-extra = snakemake.params.get('extra', '')
-
-
-# This lets us handle whether to write to a log file or to write to stdout.
-# See snakemake.script.log_fmt_shell for details.
-log = snakemake.log_fmt_shell()
-
-
-# This demo shows how to handle paired-end and single-end input data as two
-# different cases, depending on whether the rule's input included an "R2" key
-# or not.
-paired_end = (
-    'R1' in snakemake.input.keys() and
-    'R2' in snakemake.input.keys()
-)
-
-if paired_end: 
-    shell('cp {snakemake.input.R1} {snakemake.output.R1}')
-    shell('cp {snakemake.input.R2} {snakemake.output.R2}')
-
-else:
-    shell("cp {snakemake.input} {snakemake.output} {log}")
diff --git a/wrappers/wrappers/dupradar/README.md b/wrappers/wrappers/dupradar/README.md
deleted file mode 100644
index 0667bd9cd..000000000
--- a/wrappers/wrappers/dupradar/README.md
+++ /dev/null
@@ -1,83 +0,0 @@
-# Wrapper for dupRadar
-
-dupRadar provides an easy way to distinguish between artifactual vs natural
-duplicate reads in RNA-Seq data. Prior to dupRadar only global duplication rates
-were used and they don't take into account the effect of gene expression levels. 
-dupRadar relates *duplication rates* and *length normalized read counts* of every
-gene to model the dependency of both variables. 
-
-[Link to homepage](https://www.bioconductor.org/packages/release/bioc/html/dupRadar.html)
-
-[Link to manual](https://www.bioconductor.org/packages/devel/bioc/vignettes/dupRadar/inst/doc/dupRadar.html)
-
-## Example
-
-Single-end, not stranded:
-
-```python
-rule dupRadar:
-    input:
-       bam='sample1.bam',
-       annotation='dm6.gtf',
-    output:
-        density_scatter='sample1.density_scatter.png',
-        expression_histogram='sample1.expression_histogram.png',
-        expression_boxplot='sample1.expression_boxplot.png',
-        expression_barplot='sample1.expression_barplot.png',
-        multimapping_histogram='sample1.multimapping_histogram.png',
-        dataframe='sample1.dupradar.tsv'
-    wrapper:
-        wrapper_for('dupRadar')
-```
-
-Paired-end, stranded:
-
-```python
-rule dupRadar:
-    input:
-       bam='{sample_dir}/{sample}/{sample}.cutadapt.hisat2.unique.sort.dedup.bam',
-       annotation='annotations/dm6.gtf',
-    output:
-        density_scatter='sample1.density_scatter.png',
-        expression_histogram='sample1.expression_histogram.png',
-        expression_boxplot='sample1.expression_boxplot.png',
-        expression_barplot='sample1.expression_barplot.png',
-        dataframe='sample1.dupradar.tsv'
-    params:
-        paired=True,
-        stranded=True
-    wrapper:
-        wrapper_for('dupRadar')
-```
-
-## Input
-* `bam`: BAM file with mapped reads has to be duplicate marked using either
-  Picard or BamUtil
-
-* `annotation`: GTF file contaning features to count the reads falling on the
-  features.
-
-## Output
-Output plots are described in the [dupRadar
-vignette)[http://bioconductor.org/packages/release/bioc/vignettes/dupRadar/inst/doc/dupRadar.html].
-See that page for descriptions of outputs and how to interpret them.
-
-* `density_scatter`: expression vs percent duplication
-* `expression_boxplot`: expression vs percent duplication, binned into boxes
-* `expression_histogram`: standard histogram of expression (RPKM)
-* `expression_barplot`: percentage duplication in 5% expression bins.
-* `multimapping_histogram`: histogram showing fraction of reads coming from
-  multimapping reads
-* `dataframe`: results from `analyzeDuprates` saved as a TSV for downstream
-  analysis. Following the vignette, we also add the fraction of multimappers in
-  each gene as the column `mhRate`.
-* `model`: Slope and intercept of the dupsExpFit
-* `curve`: Simplified curve of the GLM for downstream plotting
-
-## Threads
-Threads are passed to dupRadar and are in turn passed to featureCounts, which
-it calls automatically.
-
-## Params
-* `paired`: True | False. Default False.
-* `stranded`: True | False | "reverse". Default False.
diff --git a/wrappers/wrappers/dupradar/environment.yaml b/wrappers/wrappers/dupradar/environment.yaml
deleted file mode 100644
index d59b35e11..000000000
--- a/wrappers/wrappers/dupradar/environment.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-channels:
-  - conda-forge
-  - bioconda
-  - lcdb
-dependencies:
-  - python=3
-  - bioconductor-dupradar
-  - r-kernsmooth
-  - r-base >=3.5.1
-  - ghostscript
diff --git a/wrappers/wrappers/dupradar/wrapper.py b/wrappers/wrappers/dupradar/wrapper.py
deleted file mode 100644
index e9ef30d6a..000000000
--- a/wrappers/wrappers/dupradar/wrapper.py
+++ /dev/null
@@ -1,94 +0,0 @@
-import tempfile
-from snakemake.shell import shell
-import os, sys
-sys.path.append(os.path.abspath('../..'))
-from lib import helpers
-
-extra = snakemake.params.get('extra', '')
-try:
-    log = snakemake.log
-except AttributeError:
-    log = None
-
-stranded = snakemake.params.get('stranded', False)
-try:
-    stranded_int = {False: 0, True: 1, 'reverse': 2}[stranded]
-except KeyError:
-    raise ValueError('"stranded" must be True|False|"reverse"')
-
-paired = snakemake.params.get('paired', False)
-try:
-    paired_bool= {True: 'TRUE', False: 'FALSE'}[paired]
-except KeyError:
-    raise ValueError('"paired" must be True or False')
-
-tempdir = tempfile.mkdtemp()
-
-# To avoid issues with png() related to X11 and cairo, we can use bitmap() instead.
-# (thanks
-# http://stackoverflow.com/questions/24999983/
-# r-unable-to-start-device-png-capabilities-has-true-for-png
-# #comment52353278_25064603 )
-
-script = """
-library(dupRadar)
-bam <- "{snakemake.input.bam}"
-gtf <- "{snakemake.input.annotation}"
-dm <- analyzeDuprates(bam, gtf, {stranded_int}, {paired_bool}, {snakemake.threads}, tmpDir = "{tempdir}")
-
-dm$mhRate <- (dm$allCountsMulti - dm$allCounts) / dm$allCountsMulti
-bitmap(file="{snakemake.output.multimapping_histogram}")
-hist(dm$mhRate, breaks=50, main=basename(bam),
-    xlab="Multimapping rate per gene", ylab="Frequency")
-dev.off()
-
-bitmap(file="{snakemake.output.density_scatter}")
-duprateExpDensPlot(dm, main=basename(bam))
-dev.off()
-
-bitmap(file="{snakemake.output.expression_histogram}")
-expressionHist(dm)
-dev.off()
-
-bitmap(file="{snakemake.output.expression_boxplot}")
-par(mar=c(10,4,4,2)+.1)
-duprateExpBoxplot(dm, main=basename(bam))
-dev.off()
-
-bitmap(file="{snakemake.output.expression_barplot}")
-readcountExpBoxplot(dm)
-dev.off()
-
-write.table(dm, file="{snakemake.output.dataframe}", sep="\\t")
-
-# The following is from
-# https://github.com/ewels/NGI-RNAseq/blob/master/bin/dupRadar.r
-
-fit <- duprateExpFit(DupMat=dm)
-df <- data.frame(intercept=as.numeric(fit$intercept), slope=c(fit$slope))
-cat("# dupRadar model params\\n", file="{snakemake.output.model}")
-write.table(df, file="{snakemake.output.model}", sep="\\t", append=TRUE, row.names=FALSE)
-
-# Get numbers from dupRadar GLM
-curve_x <- sort(log10(dm$RPK))
-curve_y = 100*predict(fit$glm, data.frame(x=curve_x), type="response")
-# Remove all of the infinite values
-infs = which(curve_x %in% c(-Inf,Inf))
-curve_x = curve_x[-infs]
-curve_y = curve_y[-infs]
-# Reduce number of data points
-curve_x <- curve_x[seq(1, length(curve_x), 10)]
-curve_y <- curve_y[seq(1, length(curve_y), 10)]
-# Convert x values back to real counts
-curve_x = 10^curve_x
-# Write to file
-write.table(
-  cbind(curve_x, curve_y),
-  file="{snakemake.output.curve}",
-  quote=FALSE, row.names=FALSE
-)
-""".format(**locals())
-
-tmp = tempfile.NamedTemporaryFile(delete=False).name
-helpers.rscript(script, tmp, log=log)
-shell("rm -r {tempdir}")
diff --git a/wrappers/wrappers/epic2/environment.yaml b/wrappers/wrappers/epic2/environment.yaml
deleted file mode 100644
index cacda5daa..000000000
--- a/wrappers/wrappers/epic2/environment.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-channels:
-  - bioconda
-  - conda-forge
-dependencies:
-  - epic2
-  - numpy
-  - bedtools
-  - ucsc-bedsort=377
diff --git a/wrappers/wrappers/fastq-dump/environment.yaml b/wrappers/wrappers/fastq-dump/environment.yaml
deleted file mode 100644
index 6653b6cc8..000000000
--- a/wrappers/wrappers/fastq-dump/environment.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-channels:
-  - conda-forge
-  - bioconda
-dependencies:
-  - sra-tools>=3
diff --git a/wrappers/wrappers/fastq-dump/wrapper.py b/wrappers/wrappers/fastq-dump/wrapper.py
deleted file mode 100644
index 507efe434..000000000
--- a/wrappers/wrappers/fastq-dump/wrapper.py
+++ /dev/null
@@ -1,41 +0,0 @@
-from snakemake import shell
-output = snakemake.output
-log = snakemake.log
-
-srr = snakemake.params.sampletable.loc[snakemake.wildcards.sample, 'Run']
-
-if hasattr(snakemake.params, "limit"):
-    limit = f'-X {snakemake.params.limit}'
-else:
-    limit = ""
-
-# Two different paths depending on the layout. In both cases, we
-# want to avoid creating the final output until the very end, to
-# avoid incomplete downloads.
-if snakemake.params.is_paired:
-    # For PE we need to use --split-files, which also means using
-    # the slower --gzip
-    shell(
-        'fastq-dump '
-        '{srr} '
-        '--gzip '
-        '--split-files '
-        '{limit} '
-        '&> {log}'
-    )
-
-    # The filenames are predictable, so we can move them as needed.
-    shell('mv {srr}_1.fastq.gz {output[0]}')
-    shell('mv {srr}_2.fastq.gz {output[1]}')
-
-else:
-    # For SE, we can use the faster stdout | gzip, and move it
-    # directly when done.
-    shell(
-        'fastq-dump '
-        '{srr} '
-        '-Z '
-        '{limit} '
-        '2> {log} | gzip -c > {output[0]}.tmp '
-        '&& mv {output[0]}.tmp {output[0]} '
-    )
diff --git a/wrappers/wrappers/fastq_screen/README.md b/wrappers/wrappers/fastq_screen/README.md
deleted file mode 100644
index efd36a326..000000000
--- a/wrappers/wrappers/fastq_screen/README.md
+++ /dev/null
@@ -1,61 +0,0 @@
-# Wrapper for fastq_screen
-
-[`fastq_screen`](http://www.bioinformatics.babraham.ac.uk/projects/fastq_screen)
-screens a library of sequences in FASTQ format against a set of sequence
-databases identifying the composition of the library and possible contaminants.
-
-Fastq screen uses a configuration file pointing to different database. For example:
-
-```
-DATABASE	ecoli	/data/Escherichia_coli/Bowtie2Index/genome	BOWTIE2
-DATABASE	hg19	/data/hg19/Bowtie2Index/genome	BOWTIE2
-DATABASE	mm10	/data/mm10/Bowtie2Index/genome	BOWTIE2
-```
-
-This configuration file is automatically generated by the wrapper based on
-which indexes are given as inputs (see **Example**). Currently the wrapper only
-supports bowtie2 and defaults to using a subset of 100000 reads.  Which can be
-overridden using `params.subset` setting.  Furthermore, `params.extra` is
-passed arguments verbatim to `fastq_screen`, for example
-`extra="--illumina1_3"` or `extra="--bowtie2 '--trim5=8'"`.
-
-Note that `fastq_screen` hard-codes the output filenames. This wrapper moves
-the hard-coded output files to those specified by the rule. Currently the
-wrapper does not save png's generated by fastq screen. It does, however, support
-the contextual saving of tagged and/or filtered output fastqs from fastq_screen.
-If desired, combinations of "--tag" and/or "--filter [filter_codes]" should be
-provided to the run via the "extra" parameter in the Snakemake rule. The output
-fastqs will *not* be tracked by Snakemake. They will be named as
-"{snakemake.output.txt}.tagged.fastq.gz" or "{snakemake.output.txt}.tagged_filter.fastq.gz"
-respectively.
-
-## Example:
-
-```
-rule fastq_screen:
-    input:
-        fastq="samples/{sample}.fastq.gz",
-        ecoli=["/data/Escherichia_coli/Bowtie2Index/genome.1.bt2", "/data/Escherichia_coli/Bowtie2Index/genome.2.bt2"],
-        hg19=["/data/hg19/Bowtie2Index/genome.1.bt2", "/data/hg19/Bowtie2Index/genome.2.bt2"],
-        mm10=["/data/mm10/Bowtie2Index/genome.1.bt2", "/data/mm10/Bowtie2Index/genome.2.bt2"]
-    output:
-        txt="qc/{sample}.fastq_screen.txt"
-    params:
-        subset=100000,
-        aligner='bowtie2'
-    threads: 8
-    wrapper:
-        "file:wrapper"
-```
-
-## Input
-
-* `fastq` is a FASTQ file, gzipped or not.
-
-* Additional arguments are used as labels and their values will be used to
-  generate database location.
-
-## Output
-
-`txt`: a text file containing the fraction of reads mapping to each provided
-index
diff --git a/wrappers/wrappers/fastq_screen/environment.yaml b/wrappers/wrappers/fastq_screen/environment.yaml
deleted file mode 100644
index 360a727c3..000000000
--- a/wrappers/wrappers/fastq_screen/environment.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-channels:
-  - conda-forge
-  - bioconda
-dependencies:
-  - python=3
-  - fastq-screen
-  - bowtie2
diff --git a/wrappers/wrappers/fastq_screen/wrapper.py b/wrappers/wrappers/fastq_screen/wrapper.py
deleted file mode 100644
index 9b262cc11..000000000
--- a/wrappers/wrappers/fastq_screen/wrapper.py
+++ /dev/null
@@ -1,72 +0,0 @@
-import os
-from snakemake.shell import shell
-import sys
-sys.path.append(os.path.abspath('../..'))
-from lib import aligners
-import tempfile
-
-__author__ = "Ryan Dale"
-__copyright__ = "Copyright 2016, Ryan Dale"
-__email__ = "dalerr@niddk.nih.gov"
-__license__ = "MIT"
-
-# Pull in parameters
-extra = snakemake.params.get('extra', '')
-aligner = snakemake.params.get('aligner', 'bowtie2')
-subset = snakemake.params.get('subset', 100000)
-
-if aligner == 'bowtie2':
-    parse_index = aligners.prefix_from_bowtie2_index
-
-# Make log
-log = snakemake.log_fmt_shell()
-
-# snakemake.params.fastq_screen_config can be either a dict or a string. If
-# string, interpret as a filename pointing to the fastq_screen config file.
-# Otherwise, create a new tempfile out of the contents of the dict:
-
-tmp = tempfile.NamedTemporaryFile(delete=False).name
-with open(tmp, 'w') as fout:
-    for k, v in snakemake.input.items():
-        if k != 'fastq':
-            label = k
-            if isinstance(v, str):
-                v = [v]
-            index = parse_index(v)
-            fout.write(
-                '\t'.join(['DATABASE', label, index, aligner.upper()]) + '\n')
-    config_file = tmp
-
-# fastq_screen hard-codes filenames according to this prefix. We will send
-# hard-coded output to a temp dir, and then move them later.
-tempdir = tempfile.mkdtemp()
-
-# Note that we assume only R1 is coming in.
-prefix = os.path.basename(snakemake.input.fastq[0].split('.fastq')[0])
-
-shell(
-    "fastq_screen --outdir {tempdir} "
-    "--force "
-    "--aligner {aligner} "
-    "--conf {config_file} "
-    "--subset {subset} "
-    "--threads {snakemake.threads} "
-    "{extra} "
-    "{snakemake.input.fastq} "
-    "{log}"
-)
-
-# Move output to the filenames specified by the rule
-shell("cp {tempdir}/{prefix}_screen.txt {snakemake.output.txt}")
-
-# Check for the output of the --tag option to fastq_screen
-if os.path.isfile("{tempdir}/{prefix}.tagged.fastq.gz"):
-    shell("cp {tempdir}/{prefix}.tagged.fastq.gz {snakemake.output.txt}.tagged.fastq.gz")
-
-# Check for the output of the --filter XXXXXX option to fastq_screen
-if os.path.isfile("{tempdir}/{prefix}.tagged_filter.fastq.gz"):
-    shell("cp {tempdir}/{prefix}.tagged_filter.fastq.gz {snakemake.output.txt}.tagged_filter.fastq.gz")
-
-# Clean up temp
-shell("rm -r {tempdir}")
-shell("rm {tmp}")
diff --git a/wrappers/wrappers/fastqc/README.md b/wrappers/wrappers/fastqc/README.md
deleted file mode 100644
index 678bf9be0..000000000
--- a/wrappers/wrappers/fastqc/README.md
+++ /dev/null
@@ -1,32 +0,0 @@
-# Wrapper for FastQC
-
-[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) performs
-quality control for high-throughput sequencing data.
-
-## Input
-FASTQ, SAM, or BAM file. FastQC will auto-detect, but you can also use
-`--format` and one of bam, sam, bam_mapped, sam_mapped or fastq in the
-params.extra field (see example).
-
-## Output
-- html: an html file containing the report for the sample
-- zip: a zip file containing the images and text file of results
-
-## Threads
-Supports threads, passed in as the `--threads` arg
-
-## Params
-Additional parameters can be passed to FastQC verbatim by supplying a string in params.extra.
-
-# Example
-
-```
-rule fastqc:
-    input: 'samples/{sample}.fastq'
-    output:
-        html='samples/{sample}.fastqc.html',
-        zip='samples/{sample}.fastqc.zip'
-    params: extra="--contaminants adapters.tsv --format fastq"
-    wrapper:
-        "file://path/to/fastqc"
-```
diff --git a/wrappers/wrappers/fastqc/environment.yaml b/wrappers/wrappers/fastqc/environment.yaml
deleted file mode 100644
index 3d0dee627..000000000
--- a/wrappers/wrappers/fastqc/environment.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-channels:
-  - bioconda
-  - conda-forge
-dependencies:
-  # for fastqc running in minimal containers, which complain about missing
-  # fonts
-  - openjdk >=8.0.144
-  - font-ttf-dejavu-sans-mono
-  - fastqc
diff --git a/wrappers/wrappers/fastqc/wrapper.py b/wrappers/wrappers/fastqc/wrapper.py
deleted file mode 100644
index 32032bbd7..000000000
--- a/wrappers/wrappers/fastqc/wrapper.py
+++ /dev/null
@@ -1,48 +0,0 @@
-__author__ = "Ryan Dale"
-__copyright__ = "Copyright 2016, Ryan Dale"
-__email__ = "dalerr@niddk.nih.gov"
-__license__ = "MIT"
-
-import os
-from snakemake.shell import shell
-from snakemake.utils import makedirs
-
-# fastqc creates a zip file and an html file but the filename is hard-coded by
-# replacing fastq|fastq.gz|fq|fq.gz|bam with _fastqc.zip|_fastqc.html in the
-# input file's basename.
-#
-# So we identify that file and move it to the expected output after fastqc is
-# done.
-
-outfile = os.path.basename(snakemake.input[0])
-outdir = os.path.dirname(snakemake.output.html)
-if outdir == '':
-    outdir = '.'
-
-strip = ['.fastq', '.fq', '.gz', '.bam']
-for s in strip:
-    outfile = outfile.replace(s, '')
-out_zip = os.path.join(outdir, outfile + '_fastqc.zip')
-out_html = os.path.join(outdir, outfile + '_fastqc.html')
-
-extra = snakemake.params.get('extra', '')
-log = snakemake.log_fmt_shell()
-
-shell(
-    'fastqc '
-    '--threads {snakemake.threads} '
-    '--noextract '
-    '--quiet '
-    '--outdir {outdir} '
-    '{extra} '
-    '{snakemake.input} '
-    '{log} '
-)
-
-def same_file(x, y):
-    return os.path.abspath(x) == os.path.abspath(y)
-
-if not same_file(out_zip,snakemake.output.zip):
-    shell('mv {out_zip} {snakemake.output.zip}')
-if not same_file(out_html, snakemake.output.html):
-    shell('mv {out_html} {snakemake.output.html}')
diff --git a/wrappers/wrappers/macs2/callpeak/README.md b/wrappers/wrappers/macs2/callpeak/README.md
deleted file mode 100644
index bafad8381..000000000
--- a/wrappers/wrappers/macs2/callpeak/README.md
+++ /dev/null
@@ -1,61 +0,0 @@
-# MACS2
-
-Wraps the `macs2 callpeak` subprogram to call ChIP-seq peaks on input BAM
-files.
-
-## Examples
-
-Minimal usage. MACS2 outputs a whole directory; this directory is the dirname
-of `output.bed`. Note the specification of the genome size in `params.extra`.
-
-```python
-rule macs2:
-    input:
-        treatment='ip.bam',
-        control='input.bam',
-        chromsizes='dm6.chromsizes'
-    output:
-        bed='out/peaks.bed'
-    extra: '-g dm'
-    wrapper:
-        'file://path/to/wrapper'
-```
-
-MACS2 supports multiple ip and input samples (they are concatenated). This also
-shows broad peak-calling, asks MACS2 to create scaled bedgraphs, and adds them as
-output files so downstream rules can use them:
-
-```python
-rule macs2:
-    input:
-        treatment=['ip1.bam', 'ip2.bam'],
-        control=['input1.bam', 'input2.bam'],
-        chromsizes='dm6.chromsizes'
-    output:
-        bed='out/peaks.bed'
-    params: extra='-g dm --bdg --SPMR --broad'
-    wrapper:
-        'file://path/to/wrapper'
-```
-
-## Input
-
-`treatment`: single BAM or list of BAMs for IP
-
-`control`: single BAM or list of BAMs for input
-
-`chromsizes`: Chromsizes table, used to ensure peak boundaries do not extend
-outside of chromosome limits.
-
-## Output
-
-`bed`: BED file of called peaks. This is symlinked from the
-`*_peaks.narrowPeak` or `*_peaks.broadPeak` file created by MACS2.
-
-Other files are created, these can be added as additional named outputs for use
-by downstream rules, however the wrapper only pays attention to
-`snakemake.output.bed`.
-
-
-## Params
-Additional params in `extra` will be passed verbatim to `macs2 callpeak`.
diff --git a/wrappers/wrappers/macs2/callpeak/environment.yaml b/wrappers/wrappers/macs2/callpeak/environment.yaml
deleted file mode 100644
index 51d042703..000000000
--- a/wrappers/wrappers/macs2/callpeak/environment.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-channels:
-  - bioconda
-  - conda-forge
-dependencies:
-  - macs2
-  - numpy
-  - bedtools
-  - ucsc-bedsort=377
diff --git a/wrappers/wrappers/sicer/README.md b/wrappers/wrappers/sicer/README.md
deleted file mode 100644
index 9be29101d..000000000
--- a/wrappers/wrappers/sicer/README.md
+++ /dev/null
@@ -1,59 +0,0 @@
-# SICER
-
-Wraps the `sicer` program to call ChIP-seq peaks on input BED files.
-
-## Examples
-
-Minimal usage. SICER is the best operating piece of hot garbage you'll ever find.
-It has a completely fixed set of input parameters it requires, hard-coded genome
-data in SICER/lib/GenomeData.py (submit bug report in bioconda if you need
-additions), and it can't be run from the same directory at the same time due to
-hard coded output filenames. It's a proper mess boss.
-
-```python
-rule sicer:
-    input:
-        ip='ip.bed',
-        control='input.bed',
-	redundancy_threshold=1,
-	window_size=200,
-	fragment_size=150,
-	effective_genome_fraction=0.75,
-	gap_size=600,
-	fdr=0.01
-    output:
-        bed='out/peaks.bed'
-    wrapper:
-        'file://path/to/wrapper'
-```
-
-
-## Input
-
-`ip`: single BED for IP
-
-`control`: single BED for input
-
-`redundancy_threshold`: cutoff count above which duplicates are removed
-
-`window_size`: SICER resolution; 200 recommended for histones
-
-`fragment_size`: twice the shift from the beginning to the center of a read
-
-`effective_genome_fraction`: percentage of mappable genome; only set it here if you want to override the genome build in config.yaml
-
-`gap_size`: nonnegative integer multiple of window size. used to merge contiguous regions (higher means more liberal merging).
-
-`fdr`: FDR cutoff for calling significant regions.
-
-## Output
-
-`bed`: BED file of called peaks. This is a delicately processed version of `*island.bed` from SICER.
-
-Other files are created, these can be added as additional named outputs for use
-by downstream rules, however the wrapper only pays attention to
-`snakemake.output.bed`.
-
-
-## Params
-Do not use `extra` for this rule.
diff --git a/wrappers/wrappers/sicer/environment.yaml b/wrappers/wrappers/sicer/environment.yaml
deleted file mode 100644
index 44cd4d766..000000000
--- a/wrappers/wrappers/sicer/environment.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-channels:
-  - bioconda
-  - conda-forge
-dependencies:
-  - python=2
-  - numpy
-  - sicer
-  - bedtools
-  - ucsc-bedsort=377
-  - ucsc-wigtobigwig=377
diff --git a/wrappers/wrappers/sicer/wrapper.py b/wrappers/wrappers/sicer/wrapper.py
deleted file mode 100644
index 7fd29a9ed..000000000
--- a/wrappers/wrappers/sicer/wrapper.py
+++ /dev/null
@@ -1,147 +0,0 @@
-import tempfile
-import os
-import glob
-from snakemake import shell
-
-logfile = None
-
-# as SICER's interface is rather strict, this wrapper enforces named variables
-# instead of 'extra' arbitrary string
-
-def get_value(key, key2=None):
-    """
-    Get the value from params.block if it exists, otherwise from params.
-
-    If key2 is not None, it's a different key to extract from the same params.block.
-
-    Raises ValueError if nothing is configured.
-    """
-    if key2 is None:
-        key2 = key
-        val = snakemake.params.block.get(key, snakemake.params.get(key))
-    else:
-        val = snakemake.params.block.get(key, snakemake.params.block.get(key2))
-
-    if val is None:
-        raise ValueError(
-            "SICER requires the specification of '{0}'".format(key))
-    return val
-
-redundancy_threshold = get_value('redundancy_threshold')
-window_size = get_value('window_size')
-fragment_size = get_value('fragment_size')
-effective_genome_fraction = get_value('effective_genome_fraction', 'reference_effective_genome_fraction')
-gap_size = get_value('gap_size')
-fdr = get_value('fdr')
-genome_build = get_value('genome_build', 'reference_genome_build')
-
-outdir, basebed = os.path.split(snakemake.output.bed)
-label = snakemake.params.block['label']
-
-tmpdir = tempfile.mkdtemp()
-cwd = os.getcwd()
-
-# SICER expects bed input format, not bam as in other peak callers
-shell(
-    'bamToBed -i {snakemake.input.ip} > {tmpdir}/ip.bed ; '
-    'bamToBed -i {snakemake.input.control} > {tmpdir}/in.bed '
-)
-
-# SICER emits a single hard-coded file that does not respect output directory.
-# So move each run into its own temp directory to avoid collisions with
-# other processes.
-os.chdir(tmpdir)
-
-shell(
-    # there is a CI-specific bug, in which the python symlink is not correctly resolved to python2.7;
-    # so as a really desperate hack, modify SICER's python calls to directly touch 2.7
-    """sed 's/^python/$CONDA_PREFIX\/bin\/python2.7/' """
-    """$CONDA_PREFIX/share/sicer*/SICER.sh > {tmpdir}/SICER.sh && chmod u+x {tmpdir}/SICER.sh """
-)
-shell(
-    # run SICER
-    """{tmpdir}/SICER.sh {tmpdir} ip.bed in.bed {tmpdir} """
-    """{genome_build} {redundancy_threshold} {window_size} """
-    """{fragment_size} {effective_genome_fraction} {gap_size} {fdr} > tmp.output 2>&1 """
-)
-
-# Move back once the run is complete.
-os.chdir(cwd)
-
-# one of the results files gets converted to the broadPeak format ala macs
-resultsfile = glob.glob(os.path.join(tmpdir, '*-islands-summary-FDR*'))
-if len(resultsfile) == 1:
-    hit = resultsfile[0]
-    basehit = os.path.basename(resultsfile[0])
-elif len(resultsfile) > 1:
-    raise ValueError(
-        "Multiple islands-summary-FDR files found in {1}: {0}"
-        .format(os.listdir(tmpdir), tmpdir)
-    )
-else:
-    raise ValueError("No islands-summary-FDR file found in {1}: {0}".format(os.listdir(tmpdir), tmpdir))
-
-# "summary graph for [the run] in bedGraph format"
-summary_graph = glob.glob(os.path.join(tmpdir, '*-W{0}.graph*'.format(window_size)))
-if len(summary_graph) == 1:
-    summary_graph = summary_graph[0]
-else:
-    raise ValueError("SICER graph output file not found")
-
-# the bedGraph file above, normalized by library size per million, in wig format
-normalized_prefilter_wig = glob.glob(os.path.join(tmpdir, '*-W{0}-normalized.wig'.format(window_size)))
-if len(normalized_prefilter_wig) == 1:
-    normalized_prefilter_wig = normalized_prefilter_wig[0]
-else:
-    raise ValueError("SICER normalized prefilter wig file not found")
-
-# "summary of all candidate islands with their statistical significance
-candidate_islands = glob.glob(os.path.join(tmpdir, '*-W{0}-G{1}-islands-summary'.format(window_size, gap_size)))
-if len(candidate_islands) == 1:
-    candidate_islands = candidate_islands[0]
-else:
-    raise ValueError("SICER candidate islands file not found")
-
-# "delineation of significant islands"
-significant_islands = glob.glob(os.path.join(tmpdir, '*-W{0}-G{1}-FDR*-island.bed'.format(window_size, gap_size)))
-if len(significant_islands) == 1:
-    significant_islands = significant_islands[0]
-else:
-    raise ValueError("SICER significant islands file not found")
-
-# "library of raw redundancy-removed reads on significant islands
-redundancy_removed = glob.glob(os.path.join(tmpdir, '*-W{0}-G{1}-FDR*-islandfiltered.bed'.format(window_size, gap_size)))
-if len(redundancy_removed) == 1:
-    redundancy_removed = redundancy_removed[0]
-else:
-    raise ValueError("SICER redundancy removed library file not found")
-
-# "wig file for the island-filtered redundancy-removed reads
-normalized_postfilter_wig = glob.glob(os.path.join(tmpdir, '*-W{0}-G{1}-FDR*-islandfiltered-normalized.wig'.format(window_size, gap_size)))
-if len(normalized_postfilter_wig) == 1:
-    normalized_postfilter_wig = normalized_postfilter_wig[0]
-else:
-    raise ValueError("SICER normalized postfilter wig file not found")
-
-shell(
-    "export LC_COLLATE=C; "
-    # format the output in broadPeak format
-    # note that SICER can emit p-values of 0 and in that case this file will contain "inf" entries
-    """awk -F"\\t" -v lab={label} """
-    """'{{printf("%s\\t%d\\t%d\\t%s_peak_%d\\t%d\\t.\\t%g\\t%g\\t%g\\n", $1, """
-    """$2, $3-1, lab, NR, -10*log($6)/log(10), $7, -log($6)/log(10), -log($8)/log(10))}}' """
-    "{hit} > {snakemake.output.bed}.tmp && "
-    # sort the bed file, just to be sure
-    "bedSort {snakemake.output.bed}.tmp {snakemake.output.bed} && "
-    # rename the assorted output files
-    "mv {resultsfile} {snakemake.output.bed}-islands-summary-significant && "
-    "mv {summary_graph} {snakemake.output.bed}.graph && "
-    "wigToBigWig {normalized_prefilter_wig} {snakemake.input.chromsizes} {snakemake.output.bed}-normalized-prefilter.bigWig && "
-    "wigToBigWig {normalized_postfilter_wig} {snakemake.input.chromsizes} {snakemake.output.bed}-normalized-postfilter.bigWig && "
-    "mv {candidate_islands} {snakemake.output.bed}-islands-summary && "
-    "mv {significant_islands} {snakemake.output.bed}-island.bed && "
-    "mv {redundancy_removed} {snakemake.output.bed}-islandfiltered.bed && "
-    "mv {tmpdir}/tmp.output {snakemake.output.bed}.log && "
-    # clean up the temp directory
-    "rm {snakemake.output.bed}.tmp && rm -Rf {tmpdir}"
-)
diff --git a/wrappers/wrappers/spp/README.md b/wrappers/wrappers/spp/README.md
deleted file mode 100644
index a8eb7c439..000000000
--- a/wrappers/wrappers/spp/README.md
+++ /dev/null
@@ -1,175 +0,0 @@
-# spp
-
-Wraps the [`spp`](http://compbio.med.harvard.edu/Supplements/ChIP-seq/) peak-caller.
-
-This is a rather complicated wrapper. See input and output sections below for
-details.
-
-
-## Examples
-
-Minimal usage:
-
-```python
-rule spp:
-    input:
-      ip="ip.bam",
-      control="control.bam",
-      chromsizes='dm6.chromsizes'
-    output: "peaks.bed"
-    wrapper:
-        'file://path/to/wrapper'
-```
-
-Specify parameters (see below for options):
-
-
-```python
-rule spp:
-    input:
-      ip="ip.bam",
-      control="control.bam",
-      chromsizes='dm6.chromsizes'
-    output: "peaks.bed"
-    params: block={'fdr': 0.1}
-
-    wrapper:
-        'file://path/to/wrapper'
-```
-
-Specify additional output files:
-
-```python
-rule spp:
-    input:
-        ip="ip.bam",
-        control="control.bam",
-        chromsizes='dm6.chromsizes'
-    output:
-        bed="peaks.bed"
-        enrichment_estimates="enrichment_est.bedgraph",
-        smoothed_enrichment_mle="enrichment_mle.bedgraph",
-        rdata="image.RData"
-    params: block={'fdr': 0.1}
-    log: "spp.log"
-```
-
-The works, with multiple replicate BAMs to be merged, keeping the tempfiles,
-increasing the memory available to MarkDuplicates, all the output files,
-adjusting spp params, and using 8 threads for merging and duplicates removal:
-
-
-```python
-rule spp:
-    input:
-        ip=["ip.bam", "ip2.bam"],
-        control=["control.bam", "control2.bam", "control3.bam"],
-        chromsizes='dm6.chromsizes'
-    output:
-        bed="peaks.bed"
-        enrichment_estimates="enrichment_est.bedgraph",
-        smoothed_enrichment_mle="enrichment_mle.bedgraph",
-        rdata="image.RData"
-    log: 'spp.log'
-    threads: 8
-    params:
-        block={'fdr': 0.1, 'bins': 10},
-        java_args='-Xmx64g'
-        keep_tempfiles=True
-    log: "spp.log"
-```
-
-## Input
-
-`ip`, `control`: BAM files. Duplicates should already be removed.
-
-`chromsizes`: Chromsizes table, used to ensure peak boundaries do not extend
-outside of chromosome limits.
-
-SPP itself only supports a single BAM file for IP and a single BAM file for
-control.  However, to support the common case of pooling replicates to gain
-coverage, this wrapper does handle multiple BAMs.
-
-If more than one BAM is provided for either IP or control, the BAMs are merged
-and then duplicates are removed from the merged file (to handle reads that
-occur  in both replicates, which would otherwise cause spp to complain) are
-then removed using MarkDuplicates. This merged, deduped BAM is then provided to
-SPP.
-
-The merged BAM, merged-and-deduped BAM, and metrics file (from MarkDuplicates)
-are created as temp files. The temp filenames are indicated in the log. If you
-need these for debugging, set `params: keep_tempfiles=True` to keep them.
-
-## Output
-
-The only required output is `bed`. Others, if specified, will trigger their
-respective creation.
-
-`bed`: narrowPeak format.
-
-`smoothed_enrichment_mle`: BEDGRAPH file (even though SPP calls it a "WIG") of
-smoothed enrichment using the `smoothed.enrichment.mle` method from SPP.
-Optional, if not specified it will not be created.
-
-`enrichment_estimates`: BEDGRAPH file (even though SPP calls it a "WIG") of
-enrichment estimates using the `get.conservative.fold.enrichment.profile`
-function from SPP. Optional, if not specified will not be created.
-
-`rdata`: Saves an image of the workspace. Handy for debugging. Optional, if not
-specified will not be created.
-
-An R script named after the BED file (`{snakemake.output.bed}.R`), will be
-written to the output directory. This can be run from the same directory as the
-snakefile was run from for debugging purposes.
-
-## Threads
-We do not run SPP in parallel mode due to trouble with running the `snow`
-library on clusters (it seems to crash unexpectedly and intermittently).
-However, for multiple BAMs, we pass the threads to samtools and MarkDuplicates.
-
-## Params
-
-### wrapper params
-
-`keep_tempfiles`: bool; if True then tempfiles created by merging and deduping
-replicate BAMs will be retained for debugging purposes.
-
-`java_args`: str; additional args provided to picard, e.g., `java_args="-Xmx64g"`
-
-### spp params
-
-Since SPP doesn't have a command-line interface, we can't use the "extras="
-mechanism to pass params verbatim. Instead, the R script created by the wrapper
-supports the following parameters, provided as keys to the `block` param to
-make it easier to work with the chipseq config format. For example:
-
-```python
-params:
-    block={'bins': 5, 'fdr': 0.1},
-    java_args='-Xmx64g'
-```
-
-`srange`: tuple; controls the range of lags over which to calculate
-cross-correlation. Default is `(50, 500)`
-
-`bins`: integer; controls how the binding characteristics will be binned. Default
-is `5`.
-
-`tecfilter`: bool; passed to `find.binding.positions` function. Default is True;
-set to False to prevent the exclusion of large regions with higher input than
-expected.
-
-`remove_anomalies`: bool; enable/disable the remove.tag.anomalies step. Defualt
-is False (do not remove anomalies). Setting to True can increase the time
-dramatically.
-
-`fdr`: float; false discovery rate when calling peaks. Default is `0.05`.
-
-`whs`: int. window half-size. Used if the auto-calculated
-`binding.characteristics` is NA. Default is `500`.
-
-`zthr`: float. Z threshold used when adding broad regions. Default is `3`.
-
-`bandwidth`: int. Bandwith for smoothing WIG file. Default is `200`.
-
-`step`: int; step size for smoothing WIG file. Default is `100`.
diff --git a/wrappers/wrappers/spp/environment.yaml b/wrappers/wrappers/spp/environment.yaml
deleted file mode 100644
index 42dd8086f..000000000
--- a/wrappers/wrappers/spp/environment.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-channels:
-  - conda-forge
-  - bioconda
-  - defaults
-
-dependencies:
-  - picard
-  - bedtools
-  - samtools
-  - r-spp
-  - r >=3.5.1
diff --git a/wrappers/wrappers/spp/wrapper.py b/wrappers/wrappers/spp/wrapper.py
deleted file mode 100644
index 364c3ba10..000000000
--- a/wrappers/wrappers/spp/wrapper.py
+++ /dev/null
@@ -1,256 +0,0 @@
-from textwrap import dedent
-import tempfile
-from snakemake.shell import shell
-log = snakemake.log_fmt_shell(append=True)
-
-# Since we'll be appending the output from multiple commands to the same log,
-# we want to ensure that the provided log file is empty to start
-if snakemake.log:
-    shell('cat /dev/null > {snakemake.log}')
-
-java_args = snakemake.params.get('java_args', '')
-keep_tempfiles = snakemake.params.get('keep_tempfiles', False)
-
-registered_for_deletion = [
-    snakemake.output.bed + '.tmp',
-    snakemake.output.bed + '.tmp.genome',
-]
-
-
-def merge_and_dedup(bams):
-    """
-    spp only handles one replicate at a time. To support pooled samples, we
-    merge and remove duplicates, storing the result in a tempfile.
-
-    If only one item is provided, return it immediately
-    """
-
-    if len(bams) == 1:
-        return bams
-
-    merged = tempfile.NamedTemporaryFile(delete=False, prefix='merged', suffix='.bam').name
-    merged_and_deduped = tempfile.NamedTemporaryFile(delete=False, prefix='merged_and_duped', suffix='.bam').name
-    metrics = tempfile.NamedTemporaryFile(delete=False, prefix='metrics', suffix='.txt').name
-
-    shell('echo "tempfiles created by merge_and_dedup: {merged} {merged_and_deduped} {metrics}" {log}')
-
-    if not keep_tempfiles:
-        registered_for_deletion.extend([merged, merged_and_deduped, metrics])
-
-    bams = ' '.join(bams)
-    shell(
-        'samtools merge '
-        '-f '
-        '-@ {snakemake.threads} '
-        '{merged} '
-        '{bams} '
-        '{log} '
-    )
-    shell(
-        'picard '
-        '{java_args} '
-        'MarkDuplicates '
-        'INPUT={merged} '
-        'OUTPUT={merged_and_deduped} '
-        'METRICS_FILE={metrics} '
-        'REMOVE_DUPLICATES=true '
-        '{log} '
-    )
-    return merged_and_deduped
-
-
-def Rbool(x):
-    """
-    Convert to R boolean string used to fill in a template
-    """
-    if x:
-        return 'TRUE'
-    return 'FALSE'
-
-
-# ----------------------------------------------------------------------------
-# DEFAULTS
-#
-extra = snakemake.params.block.get('extra', {})
-
-DEFAULTS = {
-    # srange controls the range of lags over which to calculate cross-correlation
-    'srange': (50, 500),
-    # bins controls how the binding characteristics will be binned
-    'bins': 5,
-    # enable/disable the remove.tag.anomalies step
-    'remove_anomalies': False,
-    # false discovery rate when calling peaks
-    'fdr': 0.05,
-    # window half-size. Used if binding.characteristics is NA.
-    'whs': 500,
-    # Z threshold used when adding broad regions.
-    'zthr': 3,
-    # bandwith for smoothing WIG file
-    'bandwidth': 200,
-    # step for smoothing WIG file
-    'step': 100,
-    # Set to False to disable the filtering of large regions with high input signal
-    'tecfilter': True,
-}
-
-params = {}
-for k, v in DEFAULTS.items():
-    v = extra.get(k, v)
-    if isinstance(v, bool):
-        v = Rbool(v)
-    params[k] = v
-
-# ----------------------------------------------------------------------------
-
-# R_template is incrementally built up so that we can intersperse comments and
-# to keep things better organized. It will be filled in with `**locals()` at
-# the end.
-
-ip = merge_and_dedup(snakemake.input.ip)
-control = merge_and_dedup(snakemake.input.control)
-
-
-R_template = """
-library(spp)
-chip.data <- read.bam.tags("{ip}")
-input.data <- read.bam.tags("{control}")
-"""
-
-
-#
-R_template += """
-for (chrom in names(chip.data$tags)){{
-    if (length(chip.data$tags[[chrom]]) < 10){{
-        print(paste("Chromosome", chrom, "has <10 reads; removing from analysis"))
-        chip.data$tags[[chrom]] <- NULL
-        chip.data$quality[[chrom]] <- NULL
-        input.data$tags[[chrom]] <- NULL
-        input.data$quality[[chrom]] <- NULL
-    }}
-}}
-"""
-
-# Use configured srange and bins, if provided. `accept.all.tags=TRUE` is
-# hard-coded since we were getting errors if FALSE.
-R_template += """
-binding.characteristics <- get.binding.characteristics(
-  chip.data,
-  srange=c({params[srange][0]}, {params[srange][1]}),
-  bin={params[bins]},
-  accept.all.tags=TRUE,
-  remove.tag.anomalies={params[remove_anomalies]}
-)
-"""
-
-R_template += """
-# Extract info from binding characteristics
-tag.shift <- round(binding.characteristics$peak$x/2)
-detection.window.halfsize <- binding.characteristics$whs
-if (!is.finite(detection.window.halfsize)){{
-  detection.window.halfsize <- {params[whs]}
-}}
-"""
-
-R_template += """
-# Reset data to tags, and remove any chromosomes with no data.
-# (tags is a list, names are chromosomes and values are integer vectors)
-
-chip.data <- chip.data$tags
-input.data <- input.data$tags
-
-chip.data[sapply(chip.data, is.null)] <- NULL
-input.data[sapply(input.data, is.null)] <- NULL
-"""
-
-
-if 'smoothed_enrichment_mle' in snakemake.output.keys():
-    R_template += dedent("""
-    smoothed.enrichment.estimate <- get.smoothed.enrichment.mle(
-      chip.data,
-      input.data,
-      bandwidth={params[bandwidth]},
-      step={params[step]},
-      tag.shift=tag.shift)
-    writewig(
-      smoothed.enrichment.estimate,
-      "{snakemake.output.smoothed_enrichment_mle}",
-      feature=""
-    )
-    """)
-
-if 'enrichment_estimates' in snakemake.output.keys():
-    R_template += dedent("""
-    enrichment.estimates <- get.conservative.fold.enrichment.profile(
-        chip.data, input.data, fws=500, step=100, alpha=0.01
-    )
-    writewig(enrichment.estimates, "{snakemake.output.enrichment_estimates}", feature="")
-    rm(enrichment.estimates)
-    """)
-
-R_template += """
-# Get peaks
-bp <- find.binding.positions(
-  signal.data=chip.data,
-  control.data=input.data,
-  fdr={params[fdr]},
-  whs=detection.window.halfsize,
-  tec.filter={params[tecfilter]}
-)
-"""
-
-R_template += """
-# Add broad regions to peaks
-bp <- add.broad.peak.regions(
-  chip.data,
-  input.data,
-  bp,
-  window.size=detection.window.halfsize,
-  z.thr={params[zthr]}
-)
-write.narrowpeak.binding(bp, "{snakemake.output.bed}.tmp")
-"""
-
-# Save image for later introspection or debugging
-if 'rdata' in snakemake.output.keys():
-    R_template += dedent("""
-    save.image("{snakemake.output.rdata}")
-    """)
-
-# write the filled-in template to the output directory for later debugging
-script_filename = snakemake.output.bed + '.R'
-with open(script_filename, 'w') as fout:
-    fout.write(R_template.format(**locals()))
-
-# Run it
-shell('Rscript {script_filename} {log}')
-
-# Fix the output file so that it doesn't have negative numbers and so it fits
-# inside the genome
-shell(
-    """awk -F "\\t" '{{OFS="\\t"; print $1, "0", $2}}' """
-    "{snakemake.input.chromsizes} "
-    "> {snakemake.output.bed}.tmp.genome"
-)
-shell(
-    "sort -k1,1 -k2,2n {snakemake.output.bed}.tmp | "
-    """awk -F "\\t" '{{OFS="\\t"; if (($2>0) && ($3>0)) print $0}}' | """
-    "bedtools intersect -a - -b {snakemake.output.bed}.tmp.genome > {snakemake.output.bed}"
-)
-
-# SPP's writewig() adds a header and is space-separated, so this turns it into
-# a proper bedGraph file ready for conversion to bigwig.
-if 'enrichment_estimates' in snakemake.output.keys():
-    shell('grep -v "track" {snakemake.output.enrichment_estimates} '
-          '| sed "s/ /\\t/g" > {snakemake.output.enrichment_estimates}.tmp '
-          '&& mv {snakemake.output.enrichment_estimates}.tmp '
-          '{snakemake.output.enrichment_estimates}')
-
-if 'smoothed_enrichment_mle' in snakemake.output.keys():
-    shell('grep -v "track" {snakemake.output.smoothed_enrichment_mle} '
-          '| sed "s/ /\\t/g" > {snakemake.output.smoothed_enrichment_mle}.tmp '
-          '&& mv {snakemake.output.smoothed_enrichment_mle}.tmp '
-          '{snakemake.output.smoothed_enrichment_mle}')
-
-for fn in registered_for_deletion:
-    shell('rm -v {fn} {log}')