diff --git a/apps/alakazam/4.1/common/aa_properties.R b/apps/alakazam/4.1/common/aa_properties.R new file mode 100755 index 0000000..7effef2 --- /dev/null +++ b/apps/alakazam/4.1/common/aa_properties.R @@ -0,0 +1,36 @@ +#!/usr/bin/env Rscript + +# Alakazam AA properties +# +# Author: Scott Christley +# Date: Sep 3, 2020 +# + +# based upon this script for parsing args with optparse +# https://bitbucket.org/kleinstein/immcantation/src/master/pipelines/shazam-threshold.R + +suppressPackageStartupMessages(library("optparse")) +suppressPackageStartupMessages(library("alakazam")) +suppressPackageStartupMessages(library("airr")) + +# Define commmandline arguments +opt_list <- list(make_option(c("-d", "--db"), dest="DB", + help="Tabulated data file, in AIRR format (TSV)."), + make_option(c("-t", "--trim"), dest="TRIM", default=FALSE, + help=paste("Trim conserved residues.", + "\n\t\tDefaults to FALSE.")), + +# Parse arguments +opt <- parse_args(OptionParser(option_list=opt_list)) + +# Check input file +if (!("DB" %in% names(opt))) { + stop("You must provide a database file with the -d option.") +} + +# Read rearrangement data +db <- airr::read_rearrangement(opt$DB) + +aa_db <- aminoAcidProperties(db, seq="junction_aa", label="junction") + +airr::write_rearrangement(aa_db, 'aa_properties.airr.tsv') diff --git a/apps/alakazam/4.1/common/alakazam_common.sh b/apps/alakazam/4.1/common/alakazam_common.sh new file mode 100644 index 0000000..c4945b7 --- /dev/null +++ b/apps/alakazam/4.1/common/alakazam_common.sh @@ -0,0 +1,96 @@ +# +# Alakazam common functions +# +# This script relies upon global variables +# source alakazam_common.sh +# +# Author: Scott Christley +# Date: Aug 17, 2020 +# + +# required global variables: +# PYTHON +# AGAVE_JOB_ID +# and... +# The agave app input and parameters + +# the app +export APP_NAME=alakazam + +# ---------------------------------------------------------------------------- +function expandfile () { + fileBasename="${1%.*}" # file.txt.gz -> file.txt + fileExtension="${1##*.}" # file.txt.gz -> gz + + if [ ! -f $1 ]; then + echo "Could not find input file $1" 1>&2 + exit 1 + fi + + if [ "$fileExtension" == "gz" ]; then + gunzip $1 + export file=$fileBasename + # don't archive the intermediate file + elif [ "$fileExtension" == "bz2" ]; then + bunzip2 $1 + export file=$fileBasename + elif [ "$fileExtension" == "zip" ]; then + unzip -o $1 + export file=$fileBasename + else + export file=$1 + fi +} + +# prevent Agave from archiving the file +function noArchive() { + echo $1 >> .agave.archive +} + +# ---------------------------------------------------------------------------- +# Analysis provenance +function initProvenance() { + # nothing yet + echo "initProvenance" +} + +# ---------------------------------------------------------------------------- +# Workflow + +function print_versions() { + echo "VERSIONS:" + singularity exec ${singularity_image} versions report + echo -e "\nSTART at $(date)" +} + +function print_parameters() { + echo "Input files:" + echo "singularity_image=${singularity_image}" + echo "metadata_file=${metadata_file}" + echo "rearrangement_file=${rearrangement_file}" + echo "" + echo "Application parameters:" + echo "gene_usage_flag=${gene_usage_flag}" + echo "aa_properties_flag=${aa_properties_flag}" + echo "aa_properties_trim=${aa_properties_trim}" +} + +function run_alakazam_workflow() { + initProvenance + + # expand rearrangement file if its compressed + expandfile $rearrangement_file + noArchive $file + + # Gene Usage + if [[ $gene_usage_flag -eq 1 ]]; then + singularity exec -B $PWD:/data ${singularity_image} /data/gene_usage.R -d $file + fi + + # Amino Acid properties + if [[ $aa_properties_flag -eq 1 ]]; then + # run it + singularity exec -B $PWD:/data ${singularity_image} /data/aa_properties.R -d $file + fi + +} diff --git a/apps/alakazam/4.1/common/gene_usage.R b/apps/alakazam/4.1/common/gene_usage.R new file mode 100755 index 0000000..a05fe56 --- /dev/null +++ b/apps/alakazam/4.1/common/gene_usage.R @@ -0,0 +1,59 @@ +#!/usr/bin/env Rscript + +# Alakazam gene usage +# +# Author: Scott Christley +# Date: Sep 3, 2020 +# + +# based upon this script for parsing args with optparse +# https://bitbucket.org/kleinstein/immcantation/src/master/pipelines/shazam-threshold.R + +suppressPackageStartupMessages(library("optparse")) +suppressPackageStartupMessages(library("alakazam")) +suppressPackageStartupMessages(library("airr")) + +# Define commmandline arguments +opt_list <- list(make_option(c("-d", "--db"), dest="DB", + help="Tabulated data file, in AIRR format (TSV).")) + +# Parse arguments +opt <- parse_args(OptionParser(option_list=opt_list)) + +# Check input file +if (!("DB" %in% names(opt))) { + stop("You must provide a database file with the -d option.") +} + +# Read rearrangement data +db <- airr::read_rearrangement(opt$DB) + +# allele +genes <- countGenes(db, gene='v_call', group='repertoire_id', mode='allele', copy='duplicate_count') +write.table(genes, row.names=F, sep='\t', file='v_allele_usage.tsv') +genes <- countGenes(db, gene='d_call', group='repertoire_id', mode='allele', copy='duplicate_count') +write.table(genes, row.names=F, sep='\t', file='d_allele_usage.tsv') +genes <- countGenes(db, gene='j_call', group='repertoire_id', mode='allele', copy='duplicate_count') +write.table(genes, row.names=F, sep='\t', file='j_allele_usage.tsv') +# TODO: Alakazam throws an error and stops execution if no data in c_call field +# We need to figure out to check for this +#genes <- countGenes(db, gene='c_call', group='repertoire_id', mode='allele', copy='duplicate_count', fill=T) +#write.table(genes, row.names=F, sep='\t', file='c_allele_usage.tsv') + +# gene +genes <- countGenes(db, gene='v_call', group='repertoire_id', mode='gene', copy='duplicate_count') +write.table(genes, row.names=F, sep='\t', file='v_gene_usage.tsv') +genes <- countGenes(db, gene='d_call', group='repertoire_id', mode='gene', copy='duplicate_count') +write.table(genes, row.names=F, sep='\t', file='d_gene_usage.tsv') +genes <- countGenes(db, gene='j_call', group='repertoire_id', mode='gene', copy='duplicate_count') +write.table(genes, row.names=F, sep='\t', file='j_gene_usage.tsv') +#genes <- countGenes(db, gene='c_call', group='repertoire_id', mode='gene', copy='duplicate_count') +#write.table(genes, row.names=F, sep='\t', file='c_gene_usage.tsv') + +# family/subgroup +genes <- countGenes(db, gene='v_call', group='repertoire_id', mode='family', copy='duplicate_count') +write.table(genes, row.names=F, sep='\t', file='v_subgroup_usage.tsv') +genes <- countGenes(db, gene='d_call', group='repertoire_id', mode='family', copy='duplicate_count') +write.table(genes, row.names=F, sep='\t', file='d_subgroup_usage.tsv') +genes <- countGenes(db, gene='j_call', group='repertoire_id', mode='family', copy='duplicate_count') +write.table(genes, row.names=F, sep='\t', file='j_subgroup_usage.tsv') diff --git a/apps/alakazam/4.1/ls5/alakazam.json b/apps/alakazam/4.1/ls5/alakazam.json new file mode 100644 index 0000000..073cb3e --- /dev/null +++ b/apps/alakazam/4.1/ls5/alakazam.json @@ -0,0 +1,236 @@ +{ + "name": "irplus-alakazam-ls5", + "version": "4.1", + "label": "alakazam", + "defaultQueue": "normal", + "defaultNodeCount": 1, + "defaultProcessorsPerNode": 40, + "defaultRequestedTime": "1:00:00", + "shortDescription": "Alakazam app on Lonestar5", + "longDescription": "iReceptor+ Alakazam app", + "executionType": "HPC", + "executionSystem": "irplus-ls5.tacc.utexas.edu", + "parallelism": "PARALLEL", + "checkpointable": false, + "deploymentPath": "/irplus/apps/alakazam/4.1/ls5/", + "deploymentSystem": "data.vdjserver.org", + "templatePath": "alakazam.sh", + "testPath": "test/test.sh", + "ontology": [ + "http://sswapmeet.sswap.info/agave/apps/Application" + ], + "modules": [ + "purge", + "load TACC" + ], + "inputs": [ + { + "id": "singularity_image", + "details": { + "label": "", + "description": "Singularity image file", + "showAttribute": false + }, + "semantics": { + "minCardinality": 1, + "maxCardinality": 1, + "ontology": [ + "http://sswapmeet.sswap.info/mime/application/Json" + ], + "fileTypes": [ + "text-0" + ] + }, + "value": { + "default": "", + "visible": true, + "required": true + } + }, + { + "id": "metadata_file", + "details": { + "label": "", + "description": "AIRR Repertoire metadata file", + "showAttribute": false + }, + "semantics": { + "minCardinality": 1, + "maxCardinality": 1, + "ontology": [ + "http://sswapmeet.sswap.info/mime/application/Json" + ], + "fileTypes": [ + "text-0" + ] + }, + "value": { + "default": "", + "visible": true, + "required": true + } + }, + { + "id": "rearrangement_file", + "details": { + "label": "", + "description": "Rearrangement AIRR TSV file", + "showAttribute": false + }, + "semantics": { + "minCardinality": 1, + "maxCardinality": 1, + "ontology": [ + "http://sswapmeet.sswap.info/mime/application/Json" + ], + "fileTypes": [ + "text-0" + ] + }, + "value": { + "default": "", + "visible": true, + "required": true + } + } + ], + "parameters": [ + { + "id": "creator", + "value": { + "visible": true, + "required": true, + "type": "string", + "default": "" + }, + "details": { + "label": "Creator", + "description": "Username that created this job." + }, + "semantics": { + "minCardinality": 1, + "maxCardinality": 1, + "ontology": [ + "xsd:string" + ] + } + }, + { + "id": "gene_usage_flag", + "value": { + "visible": true, + "required": true, + "type": "bool", + "order": 0, + "enquote": false, + "default": false, + "validator": null + }, + "details": { + "label": "", + "description": "Calculate gene usage", + "argument": null, + "showArgument": false, + "repeatArgument": false + }, + "semantics": { + "minCardinality": 1, + "maxCardinality": 1, + "ontology": [ + "xsd:boolean" + ] + } + }, + { + "id": "aa_properties_flag", + "value": { + "visible": true, + "required": true, + "type": "bool", + "order": 0, + "enquote": false, + "default": false, + "validator": null + }, + "details": { + "label": "", + "description": "Calculate amino acid properties", + "argument": null, + "showArgument": false, + "repeatArgument": false + }, + "semantics": { + "minCardinality": 1, + "maxCardinality": 1, + "ontology": [ + "xsd:boolean" + ] + } + }, + { + "id": "aa_properties_trim", + "value": { + "visible": true, + "required": false, + "type": "bool", + "order": 0, + "enquote": false, + "default": false, + "validator": null + }, + "details": { + "label": "", + "description": "Trim parameter for calculate amino acid properties", + "argument": null, + "showArgument": false, + "repeatArgument": false + }, + "semantics": { + "minCardinality": 1, + "maxCardinality": 1, + "ontology": [ + "xsd:boolean" + ] + } + }, + { + "id": "optional_number", + "value": { + "visible": true, + "required": false, + "type": "number" + }, + "details": { + "label": "", + "description": "Optional number parameter" + }, + "semantics": { + "minCardinality": 0, + "maxCardinality": 1, + "ontology": [ + "xsd:integer" + ] + } + }, + { + "id": "optional_enum", + "value": { + "visible": true, + "required": false, + "type": "enumeration", + "enumValues": [ "allele", "gene" ] + }, + "details": { + "label": "", + "description": "Optional enum parameter" + }, + "semantics": { + "minCardinality": 0, + "maxCardinality": 1, + "ontology": [ + "xsd:string" + ] + } + } + ] +} diff --git a/apps/alakazam/4.1/ls5/alakazam.sh b/apps/alakazam/4.1/ls5/alakazam.sh new file mode 100644 index 0000000..d358f1f --- /dev/null +++ b/apps/alakazam/4.1/ls5/alakazam.sh @@ -0,0 +1,73 @@ +# +# wrapper script +# for Lonestar5 +# + +# Configuration settings + +# These get set by Tapis + +# input files +singularity_image="${singularity_image}" +metadata_file="${metadata_file}" +rearrangement_file="${rearrangement_file}" + +# application parameters +gene_usage_flag=${gene_usage_flag} +aa_properties_flag="${aa_properties_flag}" +aa_properties_trim="${aa_properties_trim}" + +# Agave info +AGAVE_JOB_ID=${AGAVE_JOB_ID} +AGAVE_JOB_NAME=${AGAVE_JOB_NAME} +AGAVE_LOG_NAME=${AGAVE_JOB_NAME}-${AGAVE_JOB_ID} + +# ---------------------------------------------------------------------------- +# unpack local executables +#tar zxf binaries.tgz + +chmod +x gene_usage.R +chmod +x aa_properties.R + +# modules +module load python3 +module load launcher/3.4 +module load tacc-singularity + +PYTHON=python3 + +export PATH="$PWD/bin:${PATH}" +export PYTHONPATH=$PWD/lib/python3.7/site-packages:$PYTHONPATH + +# bring in common functions +source ./alakazam_common.sh + +# ---------------------------------------------------------------------------- +# Launcher to use multicores on node +export LAUNCHER_WORKDIR=$PWD +export LAUNCHER_LOW_PPN=1 +export LAUNCHER_MID_PPN=8 +export LAUNCHER_MAX_PPN=25 +export LAUNCHER_PPN=1 +export LAUNCHER_JOB_FILE=joblist +export LAUNCHER_SCHED=interleaved + +# Start +printf "START at $(date)\n\n" + +# If you want to tell Tapis that the job failed +export JOB_ERROR=0 + +print_parameters +print_versions +run_alakazam_workflow + +# End +printf "DONE at $(date)\n\n" + +# remove binaries before archiving +rm -rf bin lib + +if [[ $JOB_ERROR -eq 1 ]]; then + ${AGAVE_JOB_CALLBACK_FAILURE} +fi diff --git a/apps/alakazam/4.1/ls5/test/test-aa-properties.json b/apps/alakazam/4.1/ls5/test/test-aa-properties.json new file mode 100644 index 0000000..5c2ebe6 --- /dev/null +++ b/apps/alakazam/4.1/ls5/test/test-aa-properties.json @@ -0,0 +1,18 @@ +{ + "name":"aa_properties", + "appId": "irplus-alakazam-ls5-4.1", + "batchQueue": "normal", + "maxRunTime": "01:00:00", + "nodeCount": 1, + "archive": false, + "archiveSystem": "data.vdjserver.org", + "inputs": { + "singularity_image": "agave://data.vdjserver.org//irplus/images/immcantation_suite-4.1.0.sif", + "rearrangement_file": "agave://data.vdjserver.org//irplus/data/TCR/vdjserver1.airr.tsv.gz" + }, + "parameters": { + "creator": "schristley", + "gene_usage_flag": false, + "aa_properties_flag": true + } +} diff --git a/apps/alakazam/4.1/ls5/test/test-app.json b/apps/alakazam/4.1/ls5/test/test-app.json new file mode 100644 index 0000000..e45b95f --- /dev/null +++ b/apps/alakazam/4.1/ls5/test/test-app.json @@ -0,0 +1,19 @@ +{ + "name":"gene_usage", + "appId": "irplus-alakazam-ls5-4.1", + "batchQueue": "normal", + "maxRunTime": "01:00:00", + "nodeCount": 1, + "archive": false, + "archiveSystem": "data.vdjserver.org", + "inputs": { + "singularity_image": "agave://data.vdjserver.org//irplus/images/immcantation_suite-4.1.0.sif", + "metadata_file": "agave://data.vdjserver.org//irplus/data/TCR/vdjserver1-metadata.airr.json", + "rearrangement_file": "agave://data.vdjserver.org//irplus/data/TCR/vdjserver1.airr.tsv.gz" + }, + "parameters": { + "creator": "schristley", + "gene_usage_flag": true, + "aa_properties_flag": false + } +} diff --git a/apps/alakazam/4.1/ls5/test/test.sh b/apps/alakazam/4.1/ls5/test/test.sh new file mode 100644 index 0000000..e69de29 diff --git a/apps/alakazam/4.1/ls5/upload-bundle.sh b/apps/alakazam/4.1/ls5/upload-bundle.sh new file mode 100644 index 0000000..f9d9a84 --- /dev/null +++ b/apps/alakazam/4.1/ls5/upload-bundle.sh @@ -0,0 +1,35 @@ +# +TOOL=alakazam +SYSTEM=ls5 +VER=4.1 + +# Copy all of the object files to the bundle directory +# and create a binaries.tgz +# +# For example: +# cd bundle + +# tar zcvf binaries.tgz bin lib + +# delete old working area in tapis +tapis files delete agave:///irplus/apps/$TOOL/$VER/$SYSTEM + +# create directory structure +tapis files mkdir agave:///irplus/apps $TOOL +tapis files mkdir agave:///irplus/apps/$TOOL $VER +tapis files mkdir agave:///irplus/apps/$TOOL/$VER $SYSTEM +tapis files mkdir agave:///irplus/apps/$TOOL/$VER/$SYSTEM test + +# upload app assets +tapis files upload agave:///irplus/apps/$TOOL/$VER/$SYSTEM alakazam.sh +tapis files upload agave:///irplus/apps/$TOOL/$VER/$SYSTEM alakazam.json +tapis files upload agave:///irplus/apps/$TOOL/$VER/$SYSTEM ../common/alakazam_common.sh +tapis files upload agave:///irplus/apps/$TOOL/$VER/$SYSTEM ../common/gene_usage.R +tapis files upload agave:///irplus/apps/$TOOL/$VER/$SYSTEM ../common/aa_properties.R +tapis files list agave:///irplus/apps/$TOOL/$VER/$SYSTEM + +# upload test assets +tapis files upload agave:///irplus/apps/$TOOL/$VER/$SYSTEM/test test/test.sh +tapis files upload agave:///irplus/apps/$TOOL/$VER/$SYSTEM/test test/test-app.json +tapis files list agave:///irplus/apps/$TOOL/$VER/$SYSTEM/test +