From 6e6d74945318f3e4a7207412ca1fb441387a6047 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Wed, 2 Sep 2020 13:07:16 -0500 Subject: [PATCH 1/6] stub alakazam app --- apps/alakazam/4.1/common/alakazam_common.sh | 132 ++++++++++++++++ apps/alakazam/4.1/ls5/alakazam.json | 161 ++++++++++++++++++++ apps/alakazam/4.1/ls5/alakazam.sh | 69 +++++++++ apps/alakazam/4.1/ls5/test/test-app.json | 17 +++ apps/alakazam/4.1/ls5/test/test.sh | 0 apps/alakazam/4.1/ls5/upload-bundle.sh | 33 ++++ 6 files changed, 412 insertions(+) create mode 100644 apps/alakazam/4.1/common/alakazam_common.sh create mode 100644 apps/alakazam/4.1/ls5/alakazam.json create mode 100644 apps/alakazam/4.1/ls5/alakazam.sh create mode 100644 apps/alakazam/4.1/ls5/test/test-app.json create mode 100644 apps/alakazam/4.1/ls5/test/test.sh create mode 100644 apps/alakazam/4.1/ls5/upload-bundle.sh diff --git a/apps/alakazam/4.1/common/alakazam_common.sh b/apps/alakazam/4.1/common/alakazam_common.sh new file mode 100644 index 0000000..865ac8e --- /dev/null +++ b/apps/alakazam/4.1/common/alakazam_common.sh @@ -0,0 +1,132 @@ +# +# Alakazam common functions +# +# This script relies upon global variables +# source alakazam_common.sh +# +# Author: Scott Christley +# Date: Aug 17, 2020 +# + +# required global variables: +# PYTHON +# AGAVE_JOB_ID +# and... +# The agave app input and parameters + +# the app +export APP_NAME=alakazam + +# ---------------------------------------------------------------------------- +function expandfile () { + fileBasename="${1%.*}" # file.txt.gz -> file.txt + fileExtension="${1##*.}" # file.txt.gz -> gz + + if [ ! -f $1 ]; then + echo "Could not find input file $1" 1>&2 + exit 1 + fi + + if [ "$fileExtension" == "gz" ]; then + gunzip $1 + export file=$fileBasename + # don't archive the intermediate file + elif [ "$fileExtension" == "bz2" ]; then + bunzip2 $1 + export file=$fileBasename + elif [ "$fileExtension" == "zip" ]; then + unzip -o $1 + export file=$fileBasename + else + export file=$1 + fi +} + +# prevent Agave from archiving the file +function noArchive() { + echo $1 >> .agave.archive +} + +# ---------------------------------------------------------------------------- +# Analysis provenance +function initProvenance() { + # nothing yet + echo "initProvenance" +} + +# ---------------------------------------------------------------------------- +# Workflow + +function print_versions() { + echo "VERSIONS:" + #echo " $(DefineClones.py --version 2>&1)" + singularity exec ${singularity_image} versions report + echo -e "\nSTART at $(date)" +} + +function print_parameters() { + echo "Input files:" + echo "singularity_image=${singularity_image}" + echo "rearrangement_file=${rearrangement_file}" + echo "" + echo "Application parameters:" + echo "single_flag=${single_flag}" + echo "optional_number=${optional_number}" + echo "optional_enum=${optional_enum}" +} + +function run_alakazam_workflow() { + initProvenance + + # launcher job file + if [ -f joblist ]; then + echo "Warning: removing file 'joblist'. That filename is reserved." 1>&2 + rm joblist + touch joblist + fi + noArchive "joblist" + + # for each file + # decompress if necessary + # generate commands to run + fileList=($rearrangement_file) + count=0 + while [ "x${fileList[count]}" != "x" ] + do + file=${fileList[count]} + noArchive $file + expandfile $file + filename="${file##*/}" + fileBasename="${file%.*}" # file.airr.tsv -> file.airr + fileOutname=${fileBasename}.clones.tsv + noArchive $fileOutname + + #ARGS="--format airr --act set --model ham --sym min --norm len --dist 0.165" + #if [ -n "$define_clones_mode" ]; then + # ARGS="$ARGS --mode $define_clones_mode" + #fi + #if [ -n "$define_clones_nproc" ]; then + # ARGS="$ARGS --nproc $define_clones_nproc" + #fi + + # Define Clones + #if [[ $define_clones -eq 1 ]]; then + # echo "DefineClones.py -d ${filename} -o ${fileOutname} $ARGS" >> joblist + #fi + + count=$(( $count + 1 )) + done + + # check number of jobs to be run + numJobs=$(cat joblist | wc -l) + export LAUNCHER_PPN=$LAUNCHER_LOW_PPN + if [ $numJobs -lt $LAUNCHER_PPN ]; then + export LAUNCHER_PPN=$numJobs + fi + + # run launcher + #$LAUNCHER_DIR/paramrun + + ls -l + +} diff --git a/apps/alakazam/4.1/ls5/alakazam.json b/apps/alakazam/4.1/ls5/alakazam.json new file mode 100644 index 0000000..f58c9eb --- /dev/null +++ b/apps/alakazam/4.1/ls5/alakazam.json @@ -0,0 +1,161 @@ +{ + "name": "irplus-alakazam-ls5", + "version": "4.1", + "label": "toy", + "defaultQueue": "normal", + "defaultNodeCount": 1, + "defaultProcessorsPerNode": 40, + "defaultRequestedTime": "1:00:00", + "shortDescription": "Alakazam app on Lonestar5", + "longDescription": "iReceptor+ Alakazam app", + "executionType": "HPC", + "executionSystem": "irplus-ls5.tacc.utexas.edu", + "parallelism": "PARALLEL", + "checkpointable": false, + "deploymentPath": "/irplus/apps/alakazam/4.1/ls5/", + "deploymentSystem": "data.vdjserver.org", + "templatePath": "alakazam.sh", + "testPath": "test/test.sh", + "ontology": [ + "http://sswapmeet.sswap.info/agave/apps/Application" + ], + "modules": [ + "purge", + "load TACC" + ], + "inputs": [ + { + "id": "singularity_image", + "details": { + "label": "", + "description": "Singularity image file", + "showAttribute": false + }, + "semantics": { + "minCardinality": 1, + "maxCardinality": 1, + "ontology": [ + "http://sswapmeet.sswap.info/mime/application/Json" + ], + "fileTypes": [ + "text-0" + ] + }, + "value": { + "default": "", + "visible": true, + "required": true + } + }, + { + "id": "rearrangement_file", + "details": { + "label": "", + "description": "Rearrangement AIRR TSV file", + "showAttribute": false + }, + "semantics": { + "minCardinality": 1, + "maxCardinality": 1, + "ontology": [ + "http://sswapmeet.sswap.info/mime/application/Json" + ], + "fileTypes": [ + "text-0" + ] + }, + "value": { + "default": "", + "visible": true, + "required": true + } + } + ], + "parameters": [ + { + "id": "creator", + "value": { + "visible": true, + "required": true, + "type": "string", + "default": "" + }, + "details": { + "label": "Creator", + "description": "Username that created this job." + }, + "semantics": { + "minCardinality": 1, + "maxCardinality": 1, + "ontology": [ + "xsd:string" + ] + } + }, + { + "id": "single_flag", + "value": { + "visible": true, + "required": true, + "type": "bool", + "order": 0, + "enquote": false, + "default": false, + "validator": null + }, + "details": { + "label": "", + "description": "Boolean flag parameter", + "argument": null, + "showArgument": false, + "repeatArgument": false + }, + "semantics": { + "minCardinality": 1, + "maxCardinality": 1, + "ontology": [ + "xsd:boolean" + ] + } + }, + { + "id": "optional_number", + "value": { + "visible": true, + "required": false, + "type": "number" + }, + "details": { + "label": "", + "description": "Optional number parameter" + }, + "semantics": { + "minCardinality": 0, + "maxCardinality": 1, + "ontology": [ + "xsd:integer" + ] + } + }, + { + "id": "optional_enum", + "value": { + "visible": true, + "required": false, + "type": "enumeration", + "enumValues": [ "allele", "gene" ] + }, + "details": { + "label": "", + "description": "Optional enum parameter" + }, + "semantics": { + "minCardinality": 0, + "maxCardinality": 1, + "ontology": [ + "xsd:string" + ] + } + } + ] +} diff --git a/apps/alakazam/4.1/ls5/alakazam.sh b/apps/alakazam/4.1/ls5/alakazam.sh new file mode 100644 index 0000000..224472e --- /dev/null +++ b/apps/alakazam/4.1/ls5/alakazam.sh @@ -0,0 +1,69 @@ +# +# wrapper script +# for Lonestar5 +# + +# Configuration settings + +# These get set by Tapis + +# input files +singularity_image="${singularity_image}" +rearrangement_file="${rearrangement_file}" + +# application parameters +single_flag=${single_flag} +optional_number="${optional_number}" +optional_enum="${optional_enum}" + +# Agave info +AGAVE_JOB_ID=${AGAVE_JOB_ID} +AGAVE_JOB_NAME=${AGAVE_JOB_NAME} +AGAVE_LOG_NAME=${AGAVE_JOB_NAME}-${AGAVE_JOB_ID} + +# ---------------------------------------------------------------------------- +# unpack local executables +#tar zxf binaries.tgz + +# modules +module load python3 +module load launcher/3.4 +module load tacc-singularity + +PYTHON=python3 + +export PATH="$PWD/bin:${PATH}" +export PYTHONPATH=$PWD/lib/python3.7/site-packages:$PYTHONPATH + +# bring in common functions +source ./toy_common.sh + +# ---------------------------------------------------------------------------- +# Launcher to use multicores on node +export LAUNCHER_WORKDIR=$PWD +export LAUNCHER_LOW_PPN=1 +export LAUNCHER_MID_PPN=8 +export LAUNCHER_MAX_PPN=25 +export LAUNCHER_PPN=1 +export LAUNCHER_JOB_FILE=joblist +export LAUNCHER_SCHED=interleaved + +# Start +printf "START at $(date)\n\n" + +# If you want to tell Tapis that the job failed +export JOB_ERROR=0 + +print_parameters +print_versions +run_alakazam_workflow + +# End +printf "DONE at $(date)\n\n" + +# remove binaries before archiving +rm -rf bin lib + +if [[ $JOB_ERROR -eq 1 ]]; then + ${AGAVE_JOB_CALLBACK_FAILURE} +fi diff --git a/apps/alakazam/4.1/ls5/test/test-app.json b/apps/alakazam/4.1/ls5/test/test-app.json new file mode 100644 index 0000000..fb04d72 --- /dev/null +++ b/apps/alakazam/4.1/ls5/test/test-app.json @@ -0,0 +1,17 @@ +{ + "name":"test_alakazam", + "appId": "irplus-alakazam-ls5-4.1", + "batchQueue": "normal", + "maxRunTime": "01:00:00", + "nodeCount": 1, + "archive": false, + "archiveSystem": "data.vdjserver.org", + "inputs": { + "singularity_image": "agave://data.vdjserver.org//irplus/images/immcantation_suite-4.1.0.sif", + "rearrangement_file": "agave://data.vdjserver.org//irplus/data/ipa1-668.tsv" + }, + "parameters": { + "creator": "schristley", + "single_flag": true + } +} diff --git a/apps/alakazam/4.1/ls5/test/test.sh b/apps/alakazam/4.1/ls5/test/test.sh new file mode 100644 index 0000000..e69de29 diff --git a/apps/alakazam/4.1/ls5/upload-bundle.sh b/apps/alakazam/4.1/ls5/upload-bundle.sh new file mode 100644 index 0000000..1deed64 --- /dev/null +++ b/apps/alakazam/4.1/ls5/upload-bundle.sh @@ -0,0 +1,33 @@ +# +TOOL=alakazam +SYSTEM=ls5 +VER=4.1 + +# Copy all of the object files to the bundle directory +# and create a binaries.tgz +# +# For example: +# cd bundle + +# tar zcvf binaries.tgz bin lib + +# delete old working area in tapis +tapis files delete agave:///irplus/apps/$TOOL/$VER/$SYSTEM + +# create directory structure +tapis files mkdir agave:///irplus/apps $TOOL +tapis files mkdir agave:///irplus/apps/$TOOL $VER +tapis files mkdir agave:///irplus/apps/$TOOL/$VER $SYSTEM +tapis files mkdir agave:///irplus/apps/$TOOL/$VER/$SYSTEM test + +# upload app assets +tapis files upload agave:///irplus/apps/$TOOL/$VER/$SYSTEM alakazam.sh +tapis files upload agave:///irplus/apps/$TOOL/$VER/$SYSTEM alakazam.json +tapis files upload agave:///irplus/apps/$TOOL/$VER/$SYSTEM ../common/alakazam_common.sh +tapis files list agave:///irplus/apps/$TOOL/$VER/$SYSTEM + +# upload test assets +tapis files upload agave:///irplus/apps/$TOOL/$VER/$SYSTEM/test test/test.sh +tapis files upload agave:///irplus/apps/$TOOL/$VER/$SYSTEM/test test/test-app.json +tapis files list agave:///irplus/apps/$TOOL/$VER/$SYSTEM/test + From 556d1d17a90c04f5c446feefc88231dd81e18a76 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Wed, 2 Sep 2020 18:00:16 -0500 Subject: [PATCH 2/6] gene usage calculation --- apps/alakazam/4.1/common/alakazam_common.sh | 57 +++----------- apps/alakazam/4.1/common/create_r_scripts.py | 81 ++++++++++++++++++++ apps/alakazam/4.1/ls5/alakazam.json | 6 +- apps/alakazam/4.1/ls5/alakazam.sh | 4 +- apps/alakazam/4.1/ls5/test/test-app.json | 4 +- apps/alakazam/4.1/ls5/upload-bundle.sh | 1 + 6 files changed, 98 insertions(+), 55 deletions(-) create mode 100644 apps/alakazam/4.1/common/create_r_scripts.py diff --git a/apps/alakazam/4.1/common/alakazam_common.sh b/apps/alakazam/4.1/common/alakazam_common.sh index 865ac8e..ee654c0 100644 --- a/apps/alakazam/4.1/common/alakazam_common.sh +++ b/apps/alakazam/4.1/common/alakazam_common.sh @@ -70,7 +70,7 @@ function print_parameters() { echo "rearrangement_file=${rearrangement_file}" echo "" echo "Application parameters:" - echo "single_flag=${single_flag}" + echo "gene_usage_flag=${gene_usage_flag}" echo "optional_number=${optional_number}" echo "optional_enum=${optional_enum}" } @@ -78,55 +78,16 @@ function print_parameters() { function run_alakazam_workflow() { initProvenance - # launcher job file - if [ -f joblist ]; then - echo "Warning: removing file 'joblist'. That filename is reserved." 1>&2 - rm joblist - touch joblist - fi - noArchive "joblist" - - # for each file - # decompress if necessary - # generate commands to run - fileList=($rearrangement_file) - count=0 - while [ "x${fileList[count]}" != "x" ] - do - file=${fileList[count]} - noArchive $file - expandfile $file - filename="${file##*/}" - fileBasename="${file%.*}" # file.airr.tsv -> file.airr - fileOutname=${fileBasename}.clones.tsv - noArchive $fileOutname - - #ARGS="--format airr --act set --model ham --sym min --norm len --dist 0.165" - #if [ -n "$define_clones_mode" ]; then - # ARGS="$ARGS --mode $define_clones_mode" - #fi - #if [ -n "$define_clones_nproc" ]; then - # ARGS="$ARGS --nproc $define_clones_nproc" - #fi + # Gene Usage + if [[ $gene_usage_flag -eq 1 ]]; then + # expand rearrangement file if its compressed + expandfile $rearrangement_file - # Define Clones - #if [[ $define_clones -eq 1 ]]; then - # echo "DefineClones.py -d ${filename} -o ${fileOutname} $ARGS" >> joblist - #fi + # generate R script + $PYTHON ./create_r_scripts.py --rearrangement_file $file --gene gene_usage.R - count=$(( $count + 1 )) - done - - # check number of jobs to be run - numJobs=$(cat joblist | wc -l) - export LAUNCHER_PPN=$LAUNCHER_LOW_PPN - if [ $numJobs -lt $LAUNCHER_PPN ]; then - export LAUNCHER_PPN=$numJobs + # run it + singularity exec -B $PWD:/data ${singularity_image} R --no-save < gene_usage.R fi - # run launcher - #$LAUNCHER_DIR/paramrun - - ls -l - } diff --git a/apps/alakazam/4.1/common/create_r_scripts.py b/apps/alakazam/4.1/common/create_r_scripts.py new file mode 100644 index 0000000..9c575b3 --- /dev/null +++ b/apps/alakazam/4.1/common/create_r_scripts.py @@ -0,0 +1,81 @@ +# +# Generate R scripts for Alakazam +# +# Author: Scott Christley +# Date: Sep 2, 2020 +# + +from __future__ import print_function +import json +import argparse + +# +# main routine +# + +parser = argparse.ArgumentParser(description='Generate R scripts.') +parser.add_argument('--gene', type=str, help='Output R script for Alakazam gene usage') +parser.add_argument('--clonal', type=str, help='Output R script for Alakazam clonal analysis') +parser.add_argument('--diversity', type=str, help='Output R script for Alakazam diversity analysis') +parser.add_argument('--lineage', type=str, help='Output R script for Alakazam lineage reconstruction') +parser.add_argument('--rearrangement_file', type=str, required=True, help='Rearrangment input file name') + +args = parser.parse_args() +if (args): + + # gene usage R script, we just calculate all of them + if (args.gene): + with open(args.gene, 'w') as r_file: + r_file.write('library(alakazam)\n') + r_file.write('db <- readChangeoDb("/data/' + args.rearrangement_file + '")\n') + + # allele + r_file.write("genes <- countGenes(db, gene='v_call', group='repertoire_id', mode='allele', copy='duplicate_count')\n") + r_file.write("write.table(genes, row.names=F, sep='\\t', file='v_allele_usage.tsv')\n") + r_file.write("genes <- countGenes(db, gene='d_call', group='repertoire_id', mode='allele', copy='duplicate_count')\n") + r_file.write("write.table(genes, row.names=F, sep='\\t', file='d_allele_usage.tsv')\n") + r_file.write("genes <- countGenes(db, gene='j_call', group='repertoire_id', mode='allele', copy='duplicate_count')\n") + r_file.write("write.table(genes, row.names=F, sep='\\t', file='j_allele_usage.tsv')\n") + # TODO: Alakazam throws an error and stops execution if no data in c_call field + # We need to figure out to check for this + #r_file.write("genes <- countGenes(db, gene='c_call', group='repertoire_id', mode='allele', copy='duplicate_count', fill=T)\n") + #r_file.write("write.table(genes, row.names=F, sep='\\t', file='c_allele_usage.tsv')\n") + + # gene + r_file.write("genes <- countGenes(db, gene='v_call', group='repertoire_id', mode='gene', copy='duplicate_count')\n") + r_file.write("write.table(genes, row.names=F, sep='\\t', file='v_gene_usage.tsv')\n") + r_file.write("genes <- countGenes(db, gene='d_call', group='repertoire_id', mode='gene', copy='duplicate_count')\n") + r_file.write("write.table(genes, row.names=F, sep='\\t', file='d_gene_usage.tsv')\n") + r_file.write("genes <- countGenes(db, gene='j_call', group='repertoire_id', mode='gene', copy='duplicate_count')\n") + r_file.write("write.table(genes, row.names=F, sep='\\t', file='j_gene_usage.tsv')\n") + #r_file.write("genes <- countGenes(db, gene='c_call', group='repertoire_id', mode='gene', copy='duplicate_count')\n") + #r_file.write("write.table(genes, row.names=F, sep='\\t', file='c_gene_usage.tsv')\n") + + # family/subgroup + r_file.write("genes <- countGenes(db, gene='v_call', group='repertoire_id', mode='family', copy='duplicate_count')\n") + r_file.write("write.table(genes, row.names=F, sep='\\t', file='v_subgroup_usage.tsv')\n") + r_file.write("genes <- countGenes(db, gene='d_call', group='repertoire_id', mode='family', copy='duplicate_count')\n") + r_file.write("write.table(genes, row.names=F, sep='\\t', file='d_subgroup_usage.tsv')\n") + r_file.write("genes <- countGenes(db, gene='j_call', group='repertoire_id', mode='family', copy='duplicate_count')\n") + r_file.write("write.table(genes, row.names=F, sep='\\t', file='j_subgroup_usage.tsv')\n") + #r_file.write("genes <- countGenes(db, gene='c_call', group='repertoire_id', mode='family', copy='duplicate_count')\n") + #r_file.write("write.table(genes, row.names=F, sep='\\t', file='c_subgroup_usage.tsv')\n") + + r_file.write("q()\n") + + # clonal abundance R script + if (args.clonal): + pass + + # diversity curve R script + if (args.diversity): + pass + + # lineage reconstruction R script + if (args.lineage): + pass + +else: + # invalid arguments + parser.print_help() + diff --git a/apps/alakazam/4.1/ls5/alakazam.json b/apps/alakazam/4.1/ls5/alakazam.json index f58c9eb..344f688 100644 --- a/apps/alakazam/4.1/ls5/alakazam.json +++ b/apps/alakazam/4.1/ls5/alakazam.json @@ -1,7 +1,7 @@ { "name": "irplus-alakazam-ls5", "version": "4.1", - "label": "toy", + "label": "alakazam", "defaultQueue": "normal", "defaultNodeCount": 1, "defaultProcessorsPerNode": 40, @@ -93,7 +93,7 @@ } }, { - "id": "single_flag", + "id": "gene_usage_flag", "value": { "visible": true, "required": true, @@ -105,7 +105,7 @@ }, "details": { "label": "", - "description": "Boolean flag parameter", + "description": "Calculate gene usage", "argument": null, "showArgument": false, "repeatArgument": false diff --git a/apps/alakazam/4.1/ls5/alakazam.sh b/apps/alakazam/4.1/ls5/alakazam.sh index 224472e..fa073ec 100644 --- a/apps/alakazam/4.1/ls5/alakazam.sh +++ b/apps/alakazam/4.1/ls5/alakazam.sh @@ -12,7 +12,7 @@ singularity_image="${singularity_image}" rearrangement_file="${rearrangement_file}" # application parameters -single_flag=${single_flag} +gene_usage_flag=${gene_usage_flag} optional_number="${optional_number}" optional_enum="${optional_enum}" @@ -36,7 +36,7 @@ export PATH="$PWD/bin:${PATH}" export PYTHONPATH=$PWD/lib/python3.7/site-packages:$PYTHONPATH # bring in common functions -source ./toy_common.sh +source ./alakazam_common.sh # ---------------------------------------------------------------------------- # Launcher to use multicores on node diff --git a/apps/alakazam/4.1/ls5/test/test-app.json b/apps/alakazam/4.1/ls5/test/test-app.json index fb04d72..b8c8813 100644 --- a/apps/alakazam/4.1/ls5/test/test-app.json +++ b/apps/alakazam/4.1/ls5/test/test-app.json @@ -8,10 +8,10 @@ "archiveSystem": "data.vdjserver.org", "inputs": { "singularity_image": "agave://data.vdjserver.org//irplus/images/immcantation_suite-4.1.0.sif", - "rearrangement_file": "agave://data.vdjserver.org//irplus/data/ipa1-668.tsv" + "rearrangement_file": "agave://data.vdjserver.org//irplus/data/TCR/vdjserver1.airr.tsv.gz" }, "parameters": { "creator": "schristley", - "single_flag": true + "gene_usage_flag": true } } diff --git a/apps/alakazam/4.1/ls5/upload-bundle.sh b/apps/alakazam/4.1/ls5/upload-bundle.sh index 1deed64..9fd2050 100644 --- a/apps/alakazam/4.1/ls5/upload-bundle.sh +++ b/apps/alakazam/4.1/ls5/upload-bundle.sh @@ -24,6 +24,7 @@ tapis files mkdir agave:///irplus/apps/$TOOL/$VER/$SYSTEM test tapis files upload agave:///irplus/apps/$TOOL/$VER/$SYSTEM alakazam.sh tapis files upload agave:///irplus/apps/$TOOL/$VER/$SYSTEM alakazam.json tapis files upload agave:///irplus/apps/$TOOL/$VER/$SYSTEM ../common/alakazam_common.sh +tapis files upload agave:///irplus/apps/$TOOL/$VER/$SYSTEM ../common/create_r_scripts.py tapis files list agave:///irplus/apps/$TOOL/$VER/$SYSTEM # upload test assets From fa03f61c7965e2717f8ce91228323e3635e92d84 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Thu, 3 Sep 2020 14:54:20 -0500 Subject: [PATCH 3/6] generic R scripts that accept parameters, add AA properties calculation --- apps/alakazam/4.1/common/aa_properties.R | 36 +++++++++ apps/alakazam/4.1/common/alakazam_common.sh | 19 +++-- apps/alakazam/4.1/common/create_r_scripts.py | 81 ------------------- apps/alakazam/4.1/common/gene_usage.R | 59 ++++++++++++++ apps/alakazam/4.1/ls5/alakazam.json | 52 ++++++++++++ apps/alakazam/4.1/ls5/alakazam.sh | 7 +- .../4.1/ls5/test/test-aa-properties.json | 18 +++++ apps/alakazam/4.1/ls5/test/test-app.json | 5 +- apps/alakazam/4.1/ls5/upload-bundle.sh | 3 +- 9 files changed, 186 insertions(+), 94 deletions(-) create mode 100755 apps/alakazam/4.1/common/aa_properties.R delete mode 100644 apps/alakazam/4.1/common/create_r_scripts.py create mode 100755 apps/alakazam/4.1/common/gene_usage.R create mode 100644 apps/alakazam/4.1/ls5/test/test-aa-properties.json diff --git a/apps/alakazam/4.1/common/aa_properties.R b/apps/alakazam/4.1/common/aa_properties.R new file mode 100755 index 0000000..683b842 --- /dev/null +++ b/apps/alakazam/4.1/common/aa_properties.R @@ -0,0 +1,36 @@ +#!/usr/bin/env Rscript + +# Alakazam AA properties +# +# Author: Scott Christley +# Date: Sep 3, 2020 +# + +# based upon this script for parsing args with optparse +# https://bitbucket.org/kleinstein/immcantation/src/master/pipelines/shazam-threshold.R + +suppressPackageStartupMessages(library("optparse")) +suppressPackageStartupMessages(library("alakazam")) +suppressPackageStartupMessages(library("airr")) + +# Define commmandline arguments +opt_list <- list(make_option(c("-d", "--db"), dest="DB", + help="Tabulated data file, in AIRR format (TSV)."), + make_option(c("-t", "--trim"), dest="TRIM", default=FALSE, + help=paste("Trim conserved residues.", + "\n\t\tDefaults to FALSE.")), + +# Parse arguments +opt <- parse_args(OptionParser(option_list=opt_list)) + +# Check input file +if (!("DB" %in% names(opt))) { + stop("You must provide a database file with the -d option.") +} + +# Read rearrangement data +db <- airr::read_rearrangement(opt$DB) + +aa_db <- aminoAcidProperties(db) + +airr::write_rearrangement(aa_db, 'aa_properties.airr.tsv') diff --git a/apps/alakazam/4.1/common/alakazam_common.sh b/apps/alakazam/4.1/common/alakazam_common.sh index ee654c0..95744a0 100644 --- a/apps/alakazam/4.1/common/alakazam_common.sh +++ b/apps/alakazam/4.1/common/alakazam_common.sh @@ -71,23 +71,26 @@ function print_parameters() { echo "" echo "Application parameters:" echo "gene_usage_flag=${gene_usage_flag}" - echo "optional_number=${optional_number}" - echo "optional_enum=${optional_enum}" + echo "aa_properties_flag=${aa_properties_flag}" + echo "aa_properties_trim=${aa_properties_trim}" } function run_alakazam_workflow() { initProvenance + # expand rearrangement file if its compressed + expandfile $rearrangement_file + noArchive $file + # Gene Usage if [[ $gene_usage_flag -eq 1 ]]; then - # expand rearrangement file if its compressed - expandfile $rearrangement_file - - # generate R script - $PYTHON ./create_r_scripts.py --rearrangement_file $file --gene gene_usage.R + singularity exec -B $PWD:/data ${singularity_image} /data/gene_usage.R -d $file + fi + # Amino Acid properties + if [[ $aa_properties_flag -eq 1 ]]; then # run it - singularity exec -B $PWD:/data ${singularity_image} R --no-save < gene_usage.R + singularity exec -B $PWD:/data ${singularity_image} /data/aa_properties.R -d $file fi } diff --git a/apps/alakazam/4.1/common/create_r_scripts.py b/apps/alakazam/4.1/common/create_r_scripts.py deleted file mode 100644 index 9c575b3..0000000 --- a/apps/alakazam/4.1/common/create_r_scripts.py +++ /dev/null @@ -1,81 +0,0 @@ -# -# Generate R scripts for Alakazam -# -# Author: Scott Christley -# Date: Sep 2, 2020 -# - -from __future__ import print_function -import json -import argparse - -# -# main routine -# - -parser = argparse.ArgumentParser(description='Generate R scripts.') -parser.add_argument('--gene', type=str, help='Output R script for Alakazam gene usage') -parser.add_argument('--clonal', type=str, help='Output R script for Alakazam clonal analysis') -parser.add_argument('--diversity', type=str, help='Output R script for Alakazam diversity analysis') -parser.add_argument('--lineage', type=str, help='Output R script for Alakazam lineage reconstruction') -parser.add_argument('--rearrangement_file', type=str, required=True, help='Rearrangment input file name') - -args = parser.parse_args() -if (args): - - # gene usage R script, we just calculate all of them - if (args.gene): - with open(args.gene, 'w') as r_file: - r_file.write('library(alakazam)\n') - r_file.write('db <- readChangeoDb("/data/' + args.rearrangement_file + '")\n') - - # allele - r_file.write("genes <- countGenes(db, gene='v_call', group='repertoire_id', mode='allele', copy='duplicate_count')\n") - r_file.write("write.table(genes, row.names=F, sep='\\t', file='v_allele_usage.tsv')\n") - r_file.write("genes <- countGenes(db, gene='d_call', group='repertoire_id', mode='allele', copy='duplicate_count')\n") - r_file.write("write.table(genes, row.names=F, sep='\\t', file='d_allele_usage.tsv')\n") - r_file.write("genes <- countGenes(db, gene='j_call', group='repertoire_id', mode='allele', copy='duplicate_count')\n") - r_file.write("write.table(genes, row.names=F, sep='\\t', file='j_allele_usage.tsv')\n") - # TODO: Alakazam throws an error and stops execution if no data in c_call field - # We need to figure out to check for this - #r_file.write("genes <- countGenes(db, gene='c_call', group='repertoire_id', mode='allele', copy='duplicate_count', fill=T)\n") - #r_file.write("write.table(genes, row.names=F, sep='\\t', file='c_allele_usage.tsv')\n") - - # gene - r_file.write("genes <- countGenes(db, gene='v_call', group='repertoire_id', mode='gene', copy='duplicate_count')\n") - r_file.write("write.table(genes, row.names=F, sep='\\t', file='v_gene_usage.tsv')\n") - r_file.write("genes <- countGenes(db, gene='d_call', group='repertoire_id', mode='gene', copy='duplicate_count')\n") - r_file.write("write.table(genes, row.names=F, sep='\\t', file='d_gene_usage.tsv')\n") - r_file.write("genes <- countGenes(db, gene='j_call', group='repertoire_id', mode='gene', copy='duplicate_count')\n") - r_file.write("write.table(genes, row.names=F, sep='\\t', file='j_gene_usage.tsv')\n") - #r_file.write("genes <- countGenes(db, gene='c_call', group='repertoire_id', mode='gene', copy='duplicate_count')\n") - #r_file.write("write.table(genes, row.names=F, sep='\\t', file='c_gene_usage.tsv')\n") - - # family/subgroup - r_file.write("genes <- countGenes(db, gene='v_call', group='repertoire_id', mode='family', copy='duplicate_count')\n") - r_file.write("write.table(genes, row.names=F, sep='\\t', file='v_subgroup_usage.tsv')\n") - r_file.write("genes <- countGenes(db, gene='d_call', group='repertoire_id', mode='family', copy='duplicate_count')\n") - r_file.write("write.table(genes, row.names=F, sep='\\t', file='d_subgroup_usage.tsv')\n") - r_file.write("genes <- countGenes(db, gene='j_call', group='repertoire_id', mode='family', copy='duplicate_count')\n") - r_file.write("write.table(genes, row.names=F, sep='\\t', file='j_subgroup_usage.tsv')\n") - #r_file.write("genes <- countGenes(db, gene='c_call', group='repertoire_id', mode='family', copy='duplicate_count')\n") - #r_file.write("write.table(genes, row.names=F, sep='\\t', file='c_subgroup_usage.tsv')\n") - - r_file.write("q()\n") - - # clonal abundance R script - if (args.clonal): - pass - - # diversity curve R script - if (args.diversity): - pass - - # lineage reconstruction R script - if (args.lineage): - pass - -else: - # invalid arguments - parser.print_help() - diff --git a/apps/alakazam/4.1/common/gene_usage.R b/apps/alakazam/4.1/common/gene_usage.R new file mode 100755 index 0000000..a05fe56 --- /dev/null +++ b/apps/alakazam/4.1/common/gene_usage.R @@ -0,0 +1,59 @@ +#!/usr/bin/env Rscript + +# Alakazam gene usage +# +# Author: Scott Christley +# Date: Sep 3, 2020 +# + +# based upon this script for parsing args with optparse +# https://bitbucket.org/kleinstein/immcantation/src/master/pipelines/shazam-threshold.R + +suppressPackageStartupMessages(library("optparse")) +suppressPackageStartupMessages(library("alakazam")) +suppressPackageStartupMessages(library("airr")) + +# Define commmandline arguments +opt_list <- list(make_option(c("-d", "--db"), dest="DB", + help="Tabulated data file, in AIRR format (TSV).")) + +# Parse arguments +opt <- parse_args(OptionParser(option_list=opt_list)) + +# Check input file +if (!("DB" %in% names(opt))) { + stop("You must provide a database file with the -d option.") +} + +# Read rearrangement data +db <- airr::read_rearrangement(opt$DB) + +# allele +genes <- countGenes(db, gene='v_call', group='repertoire_id', mode='allele', copy='duplicate_count') +write.table(genes, row.names=F, sep='\t', file='v_allele_usage.tsv') +genes <- countGenes(db, gene='d_call', group='repertoire_id', mode='allele', copy='duplicate_count') +write.table(genes, row.names=F, sep='\t', file='d_allele_usage.tsv') +genes <- countGenes(db, gene='j_call', group='repertoire_id', mode='allele', copy='duplicate_count') +write.table(genes, row.names=F, sep='\t', file='j_allele_usage.tsv') +# TODO: Alakazam throws an error and stops execution if no data in c_call field +# We need to figure out to check for this +#genes <- countGenes(db, gene='c_call', group='repertoire_id', mode='allele', copy='duplicate_count', fill=T) +#write.table(genes, row.names=F, sep='\t', file='c_allele_usage.tsv') + +# gene +genes <- countGenes(db, gene='v_call', group='repertoire_id', mode='gene', copy='duplicate_count') +write.table(genes, row.names=F, sep='\t', file='v_gene_usage.tsv') +genes <- countGenes(db, gene='d_call', group='repertoire_id', mode='gene', copy='duplicate_count') +write.table(genes, row.names=F, sep='\t', file='d_gene_usage.tsv') +genes <- countGenes(db, gene='j_call', group='repertoire_id', mode='gene', copy='duplicate_count') +write.table(genes, row.names=F, sep='\t', file='j_gene_usage.tsv') +#genes <- countGenes(db, gene='c_call', group='repertoire_id', mode='gene', copy='duplicate_count') +#write.table(genes, row.names=F, sep='\t', file='c_gene_usage.tsv') + +# family/subgroup +genes <- countGenes(db, gene='v_call', group='repertoire_id', mode='family', copy='duplicate_count') +write.table(genes, row.names=F, sep='\t', file='v_subgroup_usage.tsv') +genes <- countGenes(db, gene='d_call', group='repertoire_id', mode='family', copy='duplicate_count') +write.table(genes, row.names=F, sep='\t', file='d_subgroup_usage.tsv') +genes <- countGenes(db, gene='j_call', group='repertoire_id', mode='family', copy='duplicate_count') +write.table(genes, row.names=F, sep='\t', file='j_subgroup_usage.tsv') diff --git a/apps/alakazam/4.1/ls5/alakazam.json b/apps/alakazam/4.1/ls5/alakazam.json index 344f688..d16453d 100644 --- a/apps/alakazam/4.1/ls5/alakazam.json +++ b/apps/alakazam/4.1/ls5/alakazam.json @@ -118,6 +118,58 @@ ] } }, + { + "id": "aa_properties_flag", + "value": { + "visible": true, + "required": true, + "type": "bool", + "order": 0, + "enquote": false, + "default": false, + "validator": null + }, + "details": { + "label": "", + "description": "Calculate amino acid properties", + "argument": null, + "showArgument": false, + "repeatArgument": false + }, + "semantics": { + "minCardinality": 1, + "maxCardinality": 1, + "ontology": [ + "xsd:boolean" + ] + } + }, + { + "id": "aa_properties_trim", + "value": { + "visible": true, + "required": false, + "type": "bool", + "order": 0, + "enquote": false, + "default": false, + "validator": null + }, + "details": { + "label": "", + "description": "Trim parameter for calculate amino acid properties", + "argument": null, + "showArgument": false, + "repeatArgument": false + }, + "semantics": { + "minCardinality": 1, + "maxCardinality": 1, + "ontology": [ + "xsd:boolean" + ] + } + }, { "id": "optional_number", "value": { diff --git a/apps/alakazam/4.1/ls5/alakazam.sh b/apps/alakazam/4.1/ls5/alakazam.sh index fa073ec..4906783 100644 --- a/apps/alakazam/4.1/ls5/alakazam.sh +++ b/apps/alakazam/4.1/ls5/alakazam.sh @@ -13,8 +13,8 @@ rearrangement_file="${rearrangement_file}" # application parameters gene_usage_flag=${gene_usage_flag} -optional_number="${optional_number}" -optional_enum="${optional_enum}" +aa_properties_flag="${aa_properties_flag}" +aa_properties_trim="${aa_properties_trim}" # Agave info AGAVE_JOB_ID=${AGAVE_JOB_ID} @@ -25,6 +25,9 @@ AGAVE_LOG_NAME=${AGAVE_JOB_NAME}-${AGAVE_JOB_ID} # unpack local executables #tar zxf binaries.tgz +chmod +x gene_usage.R +chmod +x aa_properties.R + # modules module load python3 module load launcher/3.4 diff --git a/apps/alakazam/4.1/ls5/test/test-aa-properties.json b/apps/alakazam/4.1/ls5/test/test-aa-properties.json new file mode 100644 index 0000000..5c2ebe6 --- /dev/null +++ b/apps/alakazam/4.1/ls5/test/test-aa-properties.json @@ -0,0 +1,18 @@ +{ + "name":"aa_properties", + "appId": "irplus-alakazam-ls5-4.1", + "batchQueue": "normal", + "maxRunTime": "01:00:00", + "nodeCount": 1, + "archive": false, + "archiveSystem": "data.vdjserver.org", + "inputs": { + "singularity_image": "agave://data.vdjserver.org//irplus/images/immcantation_suite-4.1.0.sif", + "rearrangement_file": "agave://data.vdjserver.org//irplus/data/TCR/vdjserver1.airr.tsv.gz" + }, + "parameters": { + "creator": "schristley", + "gene_usage_flag": false, + "aa_properties_flag": true + } +} diff --git a/apps/alakazam/4.1/ls5/test/test-app.json b/apps/alakazam/4.1/ls5/test/test-app.json index b8c8813..fd36d50 100644 --- a/apps/alakazam/4.1/ls5/test/test-app.json +++ b/apps/alakazam/4.1/ls5/test/test-app.json @@ -1,5 +1,5 @@ { - "name":"test_alakazam", + "name":"gene_usage", "appId": "irplus-alakazam-ls5-4.1", "batchQueue": "normal", "maxRunTime": "01:00:00", @@ -12,6 +12,7 @@ }, "parameters": { "creator": "schristley", - "gene_usage_flag": true + "gene_usage_flag": true, + "aa_properties_flag": false } } diff --git a/apps/alakazam/4.1/ls5/upload-bundle.sh b/apps/alakazam/4.1/ls5/upload-bundle.sh index 9fd2050..f9d9a84 100644 --- a/apps/alakazam/4.1/ls5/upload-bundle.sh +++ b/apps/alakazam/4.1/ls5/upload-bundle.sh @@ -24,7 +24,8 @@ tapis files mkdir agave:///irplus/apps/$TOOL/$VER/$SYSTEM test tapis files upload agave:///irplus/apps/$TOOL/$VER/$SYSTEM alakazam.sh tapis files upload agave:///irplus/apps/$TOOL/$VER/$SYSTEM alakazam.json tapis files upload agave:///irplus/apps/$TOOL/$VER/$SYSTEM ../common/alakazam_common.sh -tapis files upload agave:///irplus/apps/$TOOL/$VER/$SYSTEM ../common/create_r_scripts.py +tapis files upload agave:///irplus/apps/$TOOL/$VER/$SYSTEM ../common/gene_usage.R +tapis files upload agave:///irplus/apps/$TOOL/$VER/$SYSTEM ../common/aa_properties.R tapis files list agave:///irplus/apps/$TOOL/$VER/$SYSTEM # upload test assets From 751c01170bd862edfad2833206ea59837f5b39a7 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Fri, 25 Sep 2020 13:24:52 -0500 Subject: [PATCH 4/6] add metadata_file --- apps/alakazam/4.1/common/alakazam_common.sh | 2 +- apps/alakazam/4.1/ls5/alakazam.json | 23 +++++++++++++++++++++ apps/alakazam/4.1/ls5/alakazam.sh | 1 + 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/apps/alakazam/4.1/common/alakazam_common.sh b/apps/alakazam/4.1/common/alakazam_common.sh index 95744a0..c4945b7 100644 --- a/apps/alakazam/4.1/common/alakazam_common.sh +++ b/apps/alakazam/4.1/common/alakazam_common.sh @@ -59,7 +59,6 @@ function initProvenance() { function print_versions() { echo "VERSIONS:" - #echo " $(DefineClones.py --version 2>&1)" singularity exec ${singularity_image} versions report echo -e "\nSTART at $(date)" } @@ -67,6 +66,7 @@ function print_versions() { function print_parameters() { echo "Input files:" echo "singularity_image=${singularity_image}" + echo "metadata_file=${metadata_file}" echo "rearrangement_file=${rearrangement_file}" echo "" echo "Application parameters:" diff --git a/apps/alakazam/4.1/ls5/alakazam.json b/apps/alakazam/4.1/ls5/alakazam.json index d16453d..073cb3e 100644 --- a/apps/alakazam/4.1/ls5/alakazam.json +++ b/apps/alakazam/4.1/ls5/alakazam.json @@ -47,6 +47,29 @@ "required": true } }, + { + "id": "metadata_file", + "details": { + "label": "", + "description": "AIRR Repertoire metadata file", + "showAttribute": false + }, + "semantics": { + "minCardinality": 1, + "maxCardinality": 1, + "ontology": [ + "http://sswapmeet.sswap.info/mime/application/Json" + ], + "fileTypes": [ + "text-0" + ] + }, + "value": { + "default": "", + "visible": true, + "required": true + } + }, { "id": "rearrangement_file", "details": { diff --git a/apps/alakazam/4.1/ls5/alakazam.sh b/apps/alakazam/4.1/ls5/alakazam.sh index 4906783..d358f1f 100644 --- a/apps/alakazam/4.1/ls5/alakazam.sh +++ b/apps/alakazam/4.1/ls5/alakazam.sh @@ -9,6 +9,7 @@ # input files singularity_image="${singularity_image}" +metadata_file="${metadata_file}" rearrangement_file="${rearrangement_file}" # application parameters From fde0e002885b53bb7654a809a9da1ff55f091a3d Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Sat, 19 Dec 2020 04:49:34 -0600 Subject: [PATCH 5/6] add metadata file --- apps/alakazam/4.1/ls5/test/test-app.json | 1 + 1 file changed, 1 insertion(+) diff --git a/apps/alakazam/4.1/ls5/test/test-app.json b/apps/alakazam/4.1/ls5/test/test-app.json index fd36d50..e45b95f 100644 --- a/apps/alakazam/4.1/ls5/test/test-app.json +++ b/apps/alakazam/4.1/ls5/test/test-app.json @@ -8,6 +8,7 @@ "archiveSystem": "data.vdjserver.org", "inputs": { "singularity_image": "agave://data.vdjserver.org//irplus/images/immcantation_suite-4.1.0.sif", + "metadata_file": "agave://data.vdjserver.org//irplus/data/TCR/vdjserver1-metadata.airr.json", "rearrangement_file": "agave://data.vdjserver.org//irplus/data/TCR/vdjserver1.airr.tsv.gz" }, "parameters": { From a403cda2c6a5558ff9fe7446c228c5b63558db2d Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Tue, 6 Apr 2021 10:45:21 -0500 Subject: [PATCH 6/6] specify sequence --- apps/alakazam/4.1/common/aa_properties.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/alakazam/4.1/common/aa_properties.R b/apps/alakazam/4.1/common/aa_properties.R index 683b842..7effef2 100755 --- a/apps/alakazam/4.1/common/aa_properties.R +++ b/apps/alakazam/4.1/common/aa_properties.R @@ -31,6 +31,6 @@ if (!("DB" %in% names(opt))) { # Read rearrangement data db <- airr::read_rearrangement(opt$DB) -aa_db <- aminoAcidProperties(db) +aa_db <- aminoAcidProperties(db, seq="junction_aa", label="junction") airr::write_rearrangement(aa_db, 'aa_properties.airr.tsv')