Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions apps/alakazam/4.1/common/aa_properties.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/usr/bin/env Rscript

# Alakazam AA properties
#
# Author: Scott Christley
# Date: Sep 3, 2020
#

# based upon this script for parsing args with optparse
# https://bitbucket.org/kleinstein/immcantation/src/master/pipelines/shazam-threshold.R

suppressPackageStartupMessages(library("optparse"))
suppressPackageStartupMessages(library("alakazam"))
suppressPackageStartupMessages(library("airr"))

# Define commmandline arguments
opt_list <- list(make_option(c("-d", "--db"), dest="DB",
help="Tabulated data file, in AIRR format (TSV)."),
make_option(c("-t", "--trim"), dest="TRIM", default=FALSE,
help=paste("Trim conserved residues.",
"\n\t\tDefaults to FALSE.")),

# Parse arguments
opt <- parse_args(OptionParser(option_list=opt_list))

# Check input file
if (!("DB" %in% names(opt))) {
stop("You must provide a database file with the -d option.")
}

# Read rearrangement data
db <- airr::read_rearrangement(opt$DB)

aa_db <- aminoAcidProperties(db, seq="junction_aa", label="junction")

airr::write_rearrangement(aa_db, 'aa_properties.airr.tsv')
96 changes: 96 additions & 0 deletions apps/alakazam/4.1/common/alakazam_common.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#
# Alakazam common functions
#
# This script relies upon global variables
# source alakazam_common.sh
#
# Author: Scott Christley
# Date: Aug 17, 2020
#

# required global variables:
# PYTHON
# AGAVE_JOB_ID
# and...
# The agave app input and parameters

# the app
export APP_NAME=alakazam

# ----------------------------------------------------------------------------
function expandfile () {
fileBasename="${1%.*}" # file.txt.gz -> file.txt
fileExtension="${1##*.}" # file.txt.gz -> gz

if [ ! -f $1 ]; then
echo "Could not find input file $1" 1>&2
exit 1
fi

if [ "$fileExtension" == "gz" ]; then
gunzip $1
export file=$fileBasename
# don't archive the intermediate file
elif [ "$fileExtension" == "bz2" ]; then
bunzip2 $1
export file=$fileBasename
elif [ "$fileExtension" == "zip" ]; then
unzip -o $1
export file=$fileBasename
else
export file=$1
fi
}

# prevent Agave from archiving the file
function noArchive() {
echo $1 >> .agave.archive
}

# ----------------------------------------------------------------------------
# Analysis provenance
function initProvenance() {
# nothing yet
echo "initProvenance"
}

# ----------------------------------------------------------------------------
# Workflow

function print_versions() {
echo "VERSIONS:"
singularity exec ${singularity_image} versions report
echo -e "\nSTART at $(date)"
}

function print_parameters() {
echo "Input files:"
echo "singularity_image=${singularity_image}"
echo "metadata_file=${metadata_file}"
echo "rearrangement_file=${rearrangement_file}"
echo ""
echo "Application parameters:"
echo "gene_usage_flag=${gene_usage_flag}"
echo "aa_properties_flag=${aa_properties_flag}"
echo "aa_properties_trim=${aa_properties_trim}"
}

function run_alakazam_workflow() {
initProvenance

# expand rearrangement file if its compressed
expandfile $rearrangement_file
noArchive $file

# Gene Usage
if [[ $gene_usage_flag -eq 1 ]]; then
singularity exec -B $PWD:/data ${singularity_image} /data/gene_usage.R -d $file
fi

# Amino Acid properties
if [[ $aa_properties_flag -eq 1 ]]; then
# run it
singularity exec -B $PWD:/data ${singularity_image} /data/aa_properties.R -d $file
fi

}
59 changes: 59 additions & 0 deletions apps/alakazam/4.1/common/gene_usage.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/usr/bin/env Rscript

# Alakazam gene usage
#
# Author: Scott Christley
# Date: Sep 3, 2020
#

# based upon this script for parsing args with optparse
# https://bitbucket.org/kleinstein/immcantation/src/master/pipelines/shazam-threshold.R

suppressPackageStartupMessages(library("optparse"))
suppressPackageStartupMessages(library("alakazam"))
suppressPackageStartupMessages(library("airr"))

# Define commmandline arguments
opt_list <- list(make_option(c("-d", "--db"), dest="DB",
help="Tabulated data file, in AIRR format (TSV)."))

# Parse arguments
opt <- parse_args(OptionParser(option_list=opt_list))

# Check input file
if (!("DB" %in% names(opt))) {
stop("You must provide a database file with the -d option.")
}

# Read rearrangement data
db <- airr::read_rearrangement(opt$DB)

# allele
genes <- countGenes(db, gene='v_call', group='repertoire_id', mode='allele', copy='duplicate_count')
write.table(genes, row.names=F, sep='\t', file='v_allele_usage.tsv')
genes <- countGenes(db, gene='d_call', group='repertoire_id', mode='allele', copy='duplicate_count')
write.table(genes, row.names=F, sep='\t', file='d_allele_usage.tsv')
genes <- countGenes(db, gene='j_call', group='repertoire_id', mode='allele', copy='duplicate_count')
write.table(genes, row.names=F, sep='\t', file='j_allele_usage.tsv')
# TODO: Alakazam throws an error and stops execution if no data in c_call field
# We need to figure out to check for this
#genes <- countGenes(db, gene='c_call', group='repertoire_id', mode='allele', copy='duplicate_count', fill=T)
#write.table(genes, row.names=F, sep='\t', file='c_allele_usage.tsv')

# gene
genes <- countGenes(db, gene='v_call', group='repertoire_id', mode='gene', copy='duplicate_count')
write.table(genes, row.names=F, sep='\t', file='v_gene_usage.tsv')
genes <- countGenes(db, gene='d_call', group='repertoire_id', mode='gene', copy='duplicate_count')
write.table(genes, row.names=F, sep='\t', file='d_gene_usage.tsv')
genes <- countGenes(db, gene='j_call', group='repertoire_id', mode='gene', copy='duplicate_count')
write.table(genes, row.names=F, sep='\t', file='j_gene_usage.tsv')
#genes <- countGenes(db, gene='c_call', group='repertoire_id', mode='gene', copy='duplicate_count')
#write.table(genes, row.names=F, sep='\t', file='c_gene_usage.tsv')

# family/subgroup
genes <- countGenes(db, gene='v_call', group='repertoire_id', mode='family', copy='duplicate_count')
write.table(genes, row.names=F, sep='\t', file='v_subgroup_usage.tsv')
genes <- countGenes(db, gene='d_call', group='repertoire_id', mode='family', copy='duplicate_count')
write.table(genes, row.names=F, sep='\t', file='d_subgroup_usage.tsv')
genes <- countGenes(db, gene='j_call', group='repertoire_id', mode='family', copy='duplicate_count')
write.table(genes, row.names=F, sep='\t', file='j_subgroup_usage.tsv')
Loading