diff --git a/.circleci/config.yml b/.circleci/config.yml index d1ceee31d..e09514f46 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -217,15 +217,36 @@ jobs: name: Build and upload docs command: ci/build-docs.sh + report-env: + <<: *defaults + steps: + - add_ssh_keys: + fingerprints: + - 99:b4:dd:2c:82:9a:27:07:ca:b4:eb:bf:9c:49:4a:72 + - checkout + - *restore_cache + - *set-path + - run: + name: Report environment + command: conda env export -n lcdb-wf-test + workflows: version: 2 test-suite: jobs: - - initial-setup + - initial-setup: + filters: + branches: + ignore: + - gitlab-runner-config - pytest: requires: - initial-setup + filters: + branches: + ignore: + - gitlab-runner-config - chipseq: requires: - initial-setup @@ -233,7 +254,7 @@ workflows: filters: branches: ignore: - - master + - gitlab-runner-config - rnaseq: requires: - initial-setup @@ -241,7 +262,7 @@ workflows: filters: branches: ignore: - - master + - gitlab-runner-config - rnaseq-star: requires: - initial-setup @@ -249,7 +270,7 @@ workflows: filters: branches: ignore: - - master + - gitlab-runner-config - references: requires: - initial-setup @@ -257,7 +278,7 @@ workflows: filters: branches: ignore: - - master + - gitlab-runner-config - colocalization: requires: - initial-setup @@ -265,7 +286,19 @@ workflows: filters: branches: ignore: - - master + - gitlab-runner-config - build-docs: requires: - initial-setup + + - report-env: + requires: + - rnaseq + - rnaseq-star + - chipseq + - references + - colocalization + filters: + branches: + ignore: + - gitlab-runner-config diff --git a/.circleci/setup.sh b/.circleci/setup.sh index 0e372b7cf..aba20c175 100755 --- a/.circleci/setup.sh +++ b/.circleci/setup.sh @@ -1,42 +1,22 @@ #!/bin/bash set -e -WORKSPACE=`pwd` -MINICONDA_VER=4.3.21 +MINICONDA_VER=latest +tag="Linux" -# Set path -echo "export PATH=$WORKSPACE/miniconda/bin:$PATH" >> $BASH_ENV -source $BASH_ENV +if ! [ -x "$(command -v conda)" ]; then + apt-get update + apt-get install -y curl + curl -L -o miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-$MINICONDA_VER-$tag-x86_64.sh + bash miniconda.sh -b -p $CI_PROJECT_DIR/miniconda + conda update -y conda + export PATH=$CI_PROJECT_DIR/miniconda/bin:$PATH -if ! type conda > /dev/null; then - echo "Setting up conda..." - # setup conda if not loaded from cache - mkdir -p $WORKSPACE - - # step 1: download and install miniconda - if [[ $OSTYPE == darwin* ]]; then - tag="MacOSX" - elif [[ $OSTYPE == linux* ]]; then - tag="Linux" - else - echo "Unsupported OS: $OSTYPE" - exit 1 - fi - curl -L -o miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-$MINICONDA_VER-$tag-x86_64.sh - bash miniconda.sh -b -p $WORKSPACE/miniconda - - conda config --system --add channels defaults - conda config --system --add channels conda-forge - conda config --system --add channels bioconda - conda config --system --add channels lcdb - - # After SSHing in, for some reason this seems to fix it... - conda install -y r-base=3.4.1 bioconductor-genomeinfodbdata bioconductor-annotationhub - conda update -y conda - conda create -n lcdb-wf-test -y --file requirements.txt - conda remove -y r-base - - yum install -y git + conda create -n lcdb-wf-test -y --file requirements.txt fi +conda config --system --add channels defaults +conda config --system --add channels bioconda +conda config --system --add channels conda-forge + diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 000000000..ef2b1ab77 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,35 @@ +stages: + - init + - workflows + +before_script: + - export PATH="$CI_PROJECT_DIR/miniconda/bin:$PATH" + +initialize-conda: + stage: "init" + image: "ubuntu:latest" + script: + - "bash .circleci/setup.sh" + - "conda env export -n lcdb-wf-test > env.yaml" + cache: + key: "v1" + paths: + - "$CI_PROJECT_DIR/miniconda/" + artifacts: + paths: + - '$CI_PROJECT_DIR/env.yaml' + + +run-chipseq: + stage: "workflows" + image: "ubuntu:latest" + script: + - source activate lcdb-wf-test + - python ci/get-data.py + - cd workflows/chipseq + - ./run_test.sh --use-conda -j8 -k -p -r + cache: + key: "v1" + paths: + - "$CI_PROJECT_DIR/miniconda/" + policy: "pull" diff --git a/deploy.py b/deploy.py new file mode 100644 index 000000000..2f993a4bb --- /dev/null +++ b/deploy.py @@ -0,0 +1,129 @@ +import os +import tempfile +import argparse +import subprocess as sp +import datetime +import json + +HERE = os.path.dirname(__file__) + +usage = """ +This script assists in the deployment of lcdb-wf to working directories. + +The lcdb-wf repository contains infrastructure for testing that is not +typically needed when using it in practice. Furthermore, you might not need all +possible workflows. + +This script copies over only the files requred for each "flavor" of analysis +(rnaseq, chipseq, colocalization, full) and also stores a file, +`.lcdb-wf-deployment.yaml`, containing details about the git commit that was +used and the timestamp. This can be used to compare changes and stay +up-to-date. +""" + +ap = argparse.ArgumentParser(usage=usage) +ap.add_argument('--flavor', default='full', help='''Options are rnaseq, chipseq, colocalization, full. Default is full.''') +ap.add_argument('--dest', help='''Destination directory in which to copy files''') +args = ap.parse_args() +dest = args.dest +flavor = args.flavor + +flavors = { + 'all': { + 'include': [ + 'wrappers/wrappers', + 'include', + 'lib', + 'requirements.txt', + ], + 'exclude': [ + 'wrappers/wrappers/demo', + 'workflows/*/run_test.sh', + + # The following files to exclude are those that are created from + # a test run. + 'lib/__pycache__', + 'lib/postprocess/__pycache__', + 'include/AnnotationHubCache', + 'workflows/*/Snakefile.test', + 'workflows/*/references_data', + 'workflows/*/.snakemake', + 'workflows/*/data', + 'workflows/rnaseq/downstream/rnaseq_cache', + 'workflows/rnaseq/downstream/rnaseq_files', + 'workflows/rnaseq/downstream/final_clusters', + 'workflows/rnaseq/downstream/*.tsv*', + 'workflows/rnaseq/downstream/*log', + 'workflows/rnaseq/downstream/*html', + 'workflows/colocalization/results', + ], + }, + 'chipseq': [ + 'workflows/chipseq', + 'workflows/references', + ], + 'rnaseq': [ + 'workflows/rnaseq', + 'workflows/rnaseq/downstream/', + 'workflows/references', + ], + 'colocalization': [ + 'workflows/colocalization', + ], + + 'full': [ + 'workflows', + ], +} + +paths = set(flavors['all']['include']) +paths = paths | set(flavors[flavor]) + +exclude = tempfile.NamedTemporaryFile(delete=False).name +with open(exclude, 'w') as fout: + fout.write('\n'.join(flavors['all']['exclude'])) + +include = tempfile.NamedTemporaryFile(delete=False).name +with open(include, 'w') as fout: + fout.write('\n'.join(paths) + '\n') + + +sp.check_call([ + 'rsync', + '--relative', + '-ar', + '--files-from={}'.format(include), + '--exclude-from={}'.format(exclude), + HERE, + dest]) + + +commit, message = sp.check_output( + ['git', 'log', '--oneline', '-1'], + universal_newlines=True +).strip().split(' ', 1) +now = datetime.datetime.strftime(datetime.datetime.now(), '%Y%m%d%H%M') +remotes = sp.check_output( + ['git', 'remote', '-v'], + universal_newlines=True +) +remotes = [i.strip() for i in remotes.splitlines()] +branch = sp.check_output([ + 'git', 'branch'], universal_newlines=True) +branch = [i for i in branch.splitlines() if i.startswith('*')] +assert len(branch) == 1 +branch = branch[0] +branch = branch.split('* ')[1] + +d = { + 'git': { + 'commit': commit, + 'message': message, + 'remotes': remotes, + 'branch': branch, + }, + 'timestamp': now} +log = os.path.join(dest, '.lcdb-wf-deployment.json') +with open(log, 'w') as fout: + fout.write(json.dumps(d) + '\n') +os.chmod(log, 0o440) diff --git a/docs/changelog.rst b/docs/changelog.rst index 4059694c7..5e766c623 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,14 @@ Changelog Changes since v1.2 ------------------ +Infrastructure +~~~~~~~~~~~~~~ +- new deploy script to copy over only the files necessary for an analysis, + avoiding the clutter of testing infrastructure. +- lcdblib, an external package, is no longer a dependency. In the interest of + transparency, the relevant code was copied over to the ``lib`` directory in + this repository. + ChIP-seq and RNA-seq ~~~~~~~~~~~~~~~~~~~~ @@ -236,4 +244,4 @@ Both RNA-seq and ChIP-seq v1.0 ---- -First full release. \ No newline at end of file +First full release. diff --git a/docs/deploy.rst b/docs/deploy.rst new file mode 100644 index 000000000..da8bc9ac9 --- /dev/null +++ b/docs/deploy.rst @@ -0,0 +1,34 @@ +Deploying ``lcdb-wf`` and staying up-to-date +============================================ +The repository comes with lots of infrastructure for testing that is not +necessarily needed in practice when using lcdb-wf for a project. To get +a simplified version: + +.. code-block:: bash + + python deploy.py --flavor rnaseq project-dir + + +The script will use ``rsync`` to copy over files to `project-dir`, excluding +various test files and excluding any files that may have been created in the +process of testing. For "flavor", choose ``chipseq``, ``rnaseq``, +``colocalization``, or ``full`` to get everything. + +This script also writes a file in the destination called +``.lcdb-wf-deployment.json`` which stores details about what commit was used to +deploy and the timestamp. This can come in handy later when comparing +a deployed directory with the main repository to decide whether to update. + +Updating +-------- +The most straightforward approach to updating is to use a diff tool like `meld +`_ to visually compare differences between a deployed +project and a freshly-cloned version of lcdb-wf: + +.. code-block:: bash + + git clone https://github.com/lcdb/lcdb-wf.git comparison-directory + meld project-dir comparison-directory + +This way you can pick and choose which updates are relevant without having to +resort to arcane git commands and difficult merges. diff --git a/docs/getting-started.rst b/docs/getting-started.rst index d7e40d923..b155ac9cc 100644 --- a/docs/getting-started.rst +++ b/docs/getting-started.rst @@ -29,10 +29,9 @@ Otherwise, install `Miniconda `_. .. code-block:: bash - conda config --add channels lcdb conda config --add channels defaults - conda config --add channels conda-forge conda config --add channels bioconda + conda config --add channels conda-forge Setup required once per project @@ -77,11 +76,7 @@ use anything. Note that here we specify the channels to use, which include :: - conda create -n lcdb-wf \ - --file requirements.txt \ - --channel bioconda \ - --channel conda-forge \ - --channel lcdb + conda create -n lcdb-wf --file requirements.txt Then activate the environment:: @@ -93,6 +88,16 @@ You might want to hold off on this for now if you'll be running the tests:: source deactivate +.. note:: + + An alternative approach is to create an environment at a specific path, for + example inside a project directory: + + .. code-block:: bash + + conda create -p ./env --file requirements.txt + source activate ./env + Next steps ---------- diff --git a/docs/index.rst b/docs/index.rst index 861b88f3e..f19efd245 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -89,6 +89,7 @@ See :ref:`getting-started` to get started. getting-started tests + deploy workflows config references diff --git a/lib/patterns_targets.py b/lib/patterns_targets.py index 643d8316a..0b837932f 100644 --- a/lib/patterns_targets.py +++ b/lib/patterns_targets.py @@ -161,6 +161,7 @@ def __init__(self, config, patterns, workdir=None): # First, the samples... self.patterns_by_sample = self.patterns['patterns_by_sample'] self.fill_by_sample = dict( + n=[1,2], sample=self.samples.values, label=self.sampletable.label.values, ip_label=self.sampletable.label[ diff --git a/requirements.txt b/requirements.txt index 9f5486176..c6c4b35ab 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,8 +17,10 @@ cutadapt deeptools >=3.0.1 fastqc fastq-screen + # for fastqc running on circleci font-ttf-dejavu-sans-mono + gat gffutils >=0.8.7.1 ghostscript diff --git a/workflows/chipseq/config/chipseq_patterns.yaml b/workflows/chipseq/config/chipseq_patterns.yaml index 94130b546..1e7a4fd63 100644 --- a/workflows/chipseq/config/chipseq_patterns.yaml +++ b/workflows/chipseq/config/chipseq_patterns.yaml @@ -1,7 +1,7 @@ patterns_by_sample: - fastq: 'data/chipseq_samples/{sample}/{sample}_R1.fastq.gz' - cutadapt: 'data/chipseq_samples/{sample}/{sample}_R1.cutadapt.fastq.gz' + fastq: 'data/chipseq_samples/{sample}/{sample}_R{n}.fastq.gz' + cutadapt: 'data/chipseq_samples/{sample}/{sample}_R{n}.cutadapt.fastq.gz' bam: 'data/chipseq_samples/{sample}/{sample}.cutadapt.bam' fastqc: