diff --git a/.travis.yml b/.travis.yml
index 54c2f5e1..d446bcd4 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -48,6 +48,7 @@ before_script:
script:
- pylint $TRAVIS_BUILD_DIR/docker/vcf_combiner/usr/bin/combine_vcf.py
- pytest $TRAVIS_BUILD_DIR/docker/vcf_combiner/usr/bin/combine_vcf.py
+ - bash -n $TRAVIS_BUILD_DIR/docker/genomicsdb_builder/usr/bin/build_genomicsdb
- cd $GENOMICSDB_BUILD_DIR
- cmake $TRAVIS_BUILD_DIR -DBUILD_JAVA=1 -DCMAKE_BUILD_TYPE=Coverage -DCMAKE_INSTALL_PREFIX=$GENOMICSDB_INSTALL_DIR -DLIBCSV_DIR=$TRAVIS_BUILD_DIR/dependencies/libcsv -DGENOMICSDB_RELEASE_VERSION=$GENOMICSDB_RELEASE_VERSION -DMAVEN_QUIET=True
- ln -s $TRAVIS_BUILD_DIR/tests
diff --git a/docker/genomicsdb_builder/.dockerignore b/docker/genomicsdb_builder/.dockerignore
new file mode 100644
index 00000000..b084cee3
--- /dev/null
+++ b/docker/genomicsdb_builder/.dockerignore
@@ -0,0 +1,2 @@
+# no build
+/build_src/
diff --git a/docker/genomicsdb_builder/Dockerfile b/docker/genomicsdb_builder/Dockerfile
new file mode 100644
index 00000000..b52947db
--- /dev/null
+++ b/docker/genomicsdb_builder/Dockerfile
@@ -0,0 +1,45 @@
+# The MIT License (MIT)
+# Copyright (c) 2016-2017 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of
+# this software and associated documentation files (the "Software"), to deal in
+# the Software without restriction, including without limitation the rights to
+# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# The code utilized some features of RedHat pre-built container
+
+FROM centos:7
+
+LABEL vendor="Intel Inc." name="GenomicsDB Builder" version="1.0" description="Build GenomicsDB"
+
+RUN yum install -y --setopt=tsflags=nodocs centos-release-scl && \
+ yum-config-manager --enable rhel-server-rhscl-7-rpms && \
+ yum install -y devtoolset-4 && \
+ yum install -y --setopt=tsflags=nodocs epel-release && \
+ yum repolist && \
+ yum install -y --setopt=tsflags=nodocs cmake git.x86_64 libcsv libcsv-devel mpich-devel openssl-devel zlib-devel unzip.x86_64 && \
+ yum install -y python34.x86_64 && \
+ yum clean all
+
+ENV BASH_ENV=/etc/profile.d/cont-env.sh PATH=$PATH:/usr/lib64/mpich/bin HOME=/home/default
+
+WORKDIR /home/default
+
+ADD ./usr /usr
+ADD ./etc /etc
+ADD ./root /root
+
+ENTRYPOINT ["/usr/bin/container-entrypoint"]
+CMD ["build_genomicsdb"]
diff --git a/docker/genomicsdb_builder/README.md b/docker/genomicsdb_builder/README.md
new file mode 100644
index 00000000..2fd51a95
--- /dev/null
+++ b/docker/genomicsdb_builder/README.md
@@ -0,0 +1,23 @@
+### GenomicsDB Building Docker
+
+The centos-build is the source code for building a GenomicsDB building environment Docker image. The details about GenomicsDB building environment can be found at [GenomicsDB Wiki]( https://github.com/Intel-HLS/GenomicsDB/wiki/Compiling-GenomicsDB#building.)
+
+To create a GenomicsDB building environment Docker image, run the following command:
+
+docker build -t name_of_the_builder_image --no-cache .
+
+Once the image is created. You can build the latest GenomicsDB executables by running:
+
+docker run -it -v /path/2/output/:/output/ [-e GDB_BRANCH=branch_name_default_is_master] name_of_the_builder_image
+
+In this case, the docker container builds GenomicsDB with the following options:
+
+* CMAKE_BUILD_TYPE=Release
+* DO_PROFILING=False
+* DISABLE_OPENMP=True
+* BUILD_JAVA=False
+* DO_PROFILING=False
+* CMAKE_INSTALL_PREFIX=/output/
+* PROTOBUF_LIBRARY=dont_worry_the_docker_manage_it
+
+You can pass GenomicsDB building options via docker run command arguments. The docker will pass your arguments to cmake. Since the docker builds protobuf library internally, you just ignore the PROTOBUF_LIBRARY option.
diff --git a/docker/genomicsdb_builder/etc/profile.d/cont-env.sh b/docker/genomicsdb_builder/etc/profile.d/cont-env.sh
new file mode 100644
index 00000000..01a215ef
--- /dev/null
+++ b/docker/genomicsdb_builder/etc/profile.d/cont-env.sh
@@ -0,0 +1 @@
+source /usr/share/cont-lib/cont-env.sh
diff --git a/docker/genomicsdb_builder/root/.bashrc b/docker/genomicsdb_builder/root/.bashrc
new file mode 100644
index 00000000..01a215ef
--- /dev/null
+++ b/docker/genomicsdb_builder/root/.bashrc
@@ -0,0 +1 @@
+source /usr/share/cont-lib/cont-env.sh
diff --git a/docker/genomicsdb_builder/usr/bin/build_genomicsdb b/docker/genomicsdb_builder/usr/bin/build_genomicsdb
new file mode 100755
index 00000000..b21c7c27
--- /dev/null
+++ b/docker/genomicsdb_builder/usr/bin/build_genomicsdb
@@ -0,0 +1,99 @@
+#! /bin/bash
+# The MIT License (MIT)
+# Copyright (c) 2017 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of
+# this software and associated documentation files (the "Software"), to deal in
+# the Software without restriction, including without limitation the rights to
+# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+if [ ! -d /output ]; then
+ echo 'can not find /output/ exit...'
+ return 2
+fi
+export BUILD_ROOT=${HOME}/build_src
+export PROTOBUF_LIBRARY=$BUILD_ROOT/protobuf_build
+echo "build_home=$build_home, PROTOBUF_LIBRARY=$PROTOBUF_LIBRARY, GenomicsDB_HOME=$GenomicsDB_HOME"
+
+protobuf_to_dir=/output
+genomicsdb_to_dir=/output
+
+# make protobuf
+build_proto_buf() {
+ echo "+++ Building protobuf at ${PROTOBUF_LIBRARY}..."
+ mkdir -p /output/protobuf
+ mkdir -p ${PROTOBUF_LIBRARY} && pushd ${PROTOBUF_LIBRARY} >/dev/null 2>&1
+ git clone https://github.com/google/protobuf.git
+ cd protobuf
+ git checkout 3.0.x
+ ./autogen.sh
+ ./configure --prefix=$protobuf_to_dir --with-pic
+ if [ -f ./Makefile ]; then
+ make && make install
+ basename $(ls $protobuf_to_dir/bin/protoc)
+ find $protobuf_to_dir/lib/ -name 'libproto*' -type f -exec basename {} \;
+ echo "--- Done building protobuf"
+ popd >/dev/null 2>&1
+ return 0
+ else
+ popd >/dev/null 2>&1
+ echo "ERROR: build_proto_buf not find Makefile"
+ return -1
+ fi
+}
+
+# make genomicsdb
+build_gdb() {
+ echo
+ echo "+++ Building GenomicsDB at ${GenomicsDB_HOME}..."
+ git clone --recursive https://github.com/Intel-HLS/GenomicsDB.git
+ ws=GenomicsDB/build
+ mkdir -p $ws && pushd $ws >/dev/null 2>&1
+ branch=${GDB_BRANCH:=master}
+ git checkout $branch
+ git branch
+ if [ $# -gt 0 ]; then
+ cmake --warn-uninitialized --debug-output .. -DCMAKE_INSTALL_PREFIX=$genomicsdb_to_dir -DPROTOBUF_LIBRARY=$protobuf_to_dir $@
+ else
+ cmake --warn-uninitialized --debug-output .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$genomicsdb_to_dir -DDO_PROFILING=False -DPROTOBUF_LIBRARY=$protobuf_to_dir -DDISABLE_OPENMP=True -DBUILD_JAVA=False -DDO_PROFILING=False
+ fi
+
+ if [ -f ./Makefile ]; then
+ make && make install
+ echo "INFO: Successfully built GenomicsDB... run test"
+ ../tests/run.py $PWD $genomicsdb_to_dir
+ popd >/dev/null 2>&1
+ return 0
+ else
+ echo "ERROR: build_gdb not find Makefile"
+ popd >/dev/null 2>&1
+ return -1
+ fi
+}
+
+#source /opt/rh/devtoolset-4/enable
+gcc --version
+
+mkdir -p ${BUILD_ROOT} && pushd ${BUILD_ROOT} >/dev/null 2>&1
+build_proto_buf
+retst=$?
+if [ $retst -eq 0 ]; then
+ build_gdb $@
+ retst=$?
+ rsync -az $genomicsdb_to_dir /usr
+fi
+[[ $retst -eq 0 ]] && echo "DONE: built GenomicsDB" || echo "FAIL: cannot build GenomicsDB"
+popd >/dev/null 2>&1
+return $retst
diff --git a/docker/genomicsdb_builder/usr/bin/container-entrypoint b/docker/genomicsdb_builder/usr/bin/container-entrypoint
new file mode 100755
index 00000000..d21b6fa0
--- /dev/null
+++ b/docker/genomicsdb_builder/usr/bin/container-entrypoint
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+. /usr/share/cont-lib/cont-lib.sh
+
+cont_debug "command: $*"
+
+__cont_source_scripts "/usr/share/cont-entry"
+
+test -z "$*" && set -- bash
+exec "$@"
diff --git a/docker/genomicsdb_builder/usr/bin/container-usage b/docker/genomicsdb_builder/usr/bin/container-usage
new file mode 100755
index 00000000..062e5200
--- /dev/null
+++ b/docker/genomicsdb_builder/usr/bin/container-usage
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+# TODO: support API for executable help scripts
+cat /usr/share/cont-docs/*.txt 2>/dev/null
+
+
diff --git a/docker/genomicsdb_builder/usr/share/cont-docs/70-general.txt b/docker/genomicsdb_builder/usr/share/cont-docs/70-general.txt
new file mode 100644
index 00000000..4fa749cc
--- /dev/null
+++ b/docker/genomicsdb_builder/usr/share/cont-docs/70-general.txt
@@ -0,0 +1,20 @@
+General container help
+----------------------
+
+Run `docker run THIS_IMAGE container-usage` to get this help.
+
+Run `docker run -ti THIS_IMAGE bash` to obtain interactive shell.
+
+Run `docker exec -ti CONTAINERID container-entrypoint` to access already running container.
+
+In order to get the container ID after running the image, pass `--cidfile=`
+option to the `docker run` command. That will instruct Docker to write
+a file with the container ID.
+
+You may try `-e CONT_DEBUG=VAL` with VAL up to 3 to get more verbose debugging
+info.
+
+
+Report bugs to .
+
+
diff --git a/docker/genomicsdb_builder/usr/share/cont-docs/README b/docker/genomicsdb_builder/usr/share/cont-docs/README
new file mode 100644
index 00000000..511e0c9b
--- /dev/null
+++ b/docker/genomicsdb_builder/usr/share/cont-docs/README
@@ -0,0 +1 @@
+Files '*.txt' are automatically read and added to 'container-usage' output.
diff --git a/docker/genomicsdb_builder/usr/share/cont-entry/cont-env.sh b/docker/genomicsdb_builder/usr/share/cont-entry/cont-env.sh
new file mode 100644
index 00000000..01a215ef
--- /dev/null
+++ b/docker/genomicsdb_builder/usr/share/cont-entry/cont-env.sh
@@ -0,0 +1 @@
+source /usr/share/cont-lib/cont-env.sh
diff --git a/docker/genomicsdb_builder/usr/share/cont-layer/common/env/enabledevtoolset-4.sh b/docker/genomicsdb_builder/usr/share/cont-layer/common/env/enabledevtoolset-4.sh
new file mode 100755
index 00000000..3677466c
--- /dev/null
+++ b/docker/genomicsdb_builder/usr/share/cont-layer/common/env/enabledevtoolset-4.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+# M:R: this file was part of Red Hat samples
+source scl_source enable devtoolset-4
diff --git a/docker/genomicsdb_builder/usr/share/cont-lib/autoload/README b/docker/genomicsdb_builder/usr/share/cont-lib/autoload/README
new file mode 100644
index 00000000..b52db739
--- /dev/null
+++ b/docker/genomicsdb_builder/usr/share/cont-lib/autoload/README
@@ -0,0 +1,2 @@
+All '*.sh' files in this directory will be automatically sourced together with
+cont-lib.sh script.
diff --git a/docker/genomicsdb_builder/usr/share/cont-lib/cont-env.sh b/docker/genomicsdb_builder/usr/share/cont-lib/cont-env.sh
new file mode 100644
index 00000000..74682e1c
--- /dev/null
+++ b/docker/genomicsdb_builder/usr/share/cont-lib/cont-env.sh
@@ -0,0 +1,6 @@
+. /usr/share/cont-lib/cont-lib.sh
+
+cont_debug "changing environment variables"
+
+cont_source_hooks env common
+
diff --git a/docker/genomicsdb_builder/usr/share/cont-lib/cont-lib.sh b/docker/genomicsdb_builder/usr/share/cont-lib/cont-lib.sh
new file mode 100644
index 00000000..8b76a382
--- /dev/null
+++ b/docker/genomicsdb_builder/usr/share/cont-lib/cont-lib.sh
@@ -0,0 +1,97 @@
+__cont_source_scripts()
+{
+ local i
+ local dir="$1"
+ for i in "$dir"/*.sh; do
+ if test -r "$i"; then
+ . "$i"
+ fi
+ done
+}
+
+
+# CONT_SOURCE_HOOKS HOOKDIR [PROJECT]
+# -----------------------------------
+# Source '*.sh' files from the following directories (in this order):
+# a. /usr/share/cont-layer/PROJECT/HOOK/
+# b. /usr/share/cont-volume/PROJECT/HOOK/
+#
+# The PROJECT argument is optional because it may be set globally by
+# $CONT_PROJECT environment variable. The need for PROJECT argument is
+# basically to push people to install script into theirs own directories,
+# which will allow easier multi-project containers maintenance.
+cont_source_hooks()
+{
+ local i dir
+ local hook="$1"
+ local project="$CONT_PROJECT"
+ local dir
+
+ test -z "$hook" && return
+ test -n "$2" && project="$2"
+
+ for dir in /usr/share/cont-layer /usr/share/cont-volume; do
+ dir="$dir/$project/$hook"
+ cont_debug2 "loading scripts from $dir"
+ __cont_source_scripts "$dir"
+ done
+}
+
+__cont_msg()
+{
+ echo "$*" >&2
+}
+
+
+__cont_dbg()
+{
+ test -z "$CONT_DEBUG" && CONT_DEBUG=0
+ test "$CONT_DEBUG" -lt "$1" && return
+ local lvl="$1"
+ shift
+ __cont_msg "debug_$lvl: $*"
+}
+
+
+cont_warn() { __cont_msg "warn: $*" ; }
+cont_error() { __cont_msg "error: $*"; }
+cont_debug() { __cont_dbg 1 "$*" ; }
+cont_debug2() { __cont_dbg 2 "$*" ; }
+cont_debug3() { __cont_dbg 3 "$*" ; }
+
+
+__cont_encode_env()
+{
+ local i
+ for i in $1
+ do
+ eval local val="\$$i"
+ printf ": \${%s=%q}\n" "$i" "$val"
+ done
+}
+
+
+# CONT_STORE_ENV VARIABLES FILENAME
+# ---------------------------------
+# Create source-able script conditionally setting specified VARIABLES by
+# inheritting the values from current environment; Create the file on path
+# FILENAME. Already existing variables will not be changed by sourcing the
+# resulting script. The argument VARIABLES expects list of space separated
+# variable names.
+#
+# Usage:
+# $ my_var=my_value
+# $ my_var2="my value2"
+# $ cont_store_env "my_var my_var2" ~/.my-environment
+# $ cat ~/.my-environment
+# : ${my_var=my_value}
+# : ${my_var2=my\ value2}
+cont_store_env()
+{
+ cont_debug "creating env file '$2'"
+ __cont_encode_env "$1" > "$2" \
+ || cont_warn "can't store environment $1 into $2 file"
+}
+
+
+__cont_source_scripts "/usr/share/cont-lib/autoload"
diff --git a/docker/genomicsdb_builder/usr/share/cont-lib/parser-simple-config.sh b/docker/genomicsdb_builder/usr/share/cont-lib/parser-simple-config.sh
new file mode 100644
index 00000000..4455957d
--- /dev/null
+++ b/docker/genomicsdb_builder/usr/share/cont-lib/parser-simple-config.sh
@@ -0,0 +1,83 @@
+. "/usr/share/cont-lib/cont-lib.sh"
+
+# SEMICOLON_SPLIT VAR
+# -------------------
+# Split the contents of string variable VAR into list of strings (on separate
+# line), each of those strings will be printed to standard output. The ';' and
+# newline characters are used as split separators. You can use quadrigraph @.,@
+# for ';' character not splitting the VAR (and use @&t@ to expand into empty
+# string). More info about quadrigraphs may be found in autoconf info page.
+cont_semicolon_split()
+{
+ eval set -- "\"\$$1\""
+ test x = x"$1" && return 0
+
+ echo "$1" \
+ | sed \
+ -e 's/[[:space:]]*;[[:space:]]*/\n/g' \
+ -e 's/^[[:space:]]*//g' \
+ -e 's/\([^\\]\)[[:space:]]*$/\1/g' \
+ | sed \
+ -e 's|@.,@|;|g' \
+ -e 's|@&t@||g'
+}
+
+# CONT_PARSER_SIMPLE_CONFIG CONFIG_VAR CALLBACK [ARGS]
+# ----------------------------------------------------
+# Parse contents of variable of name CONFIG_VAR, call CALLBACK function (or
+# command) for each parsed configuration option.
+#
+# The format of configuration file is:
+#
+# = [; ...]
+#
+# Content of is not limited, but keep it sane please (lets say we support
+# the C syntax of identifiers). The must be single-line string. Should
+# the contain ';' or '\' character, it must be escaped by '\'.
+#
+# The semantics of CALLBACK you *must* provide is:
+#
+# CALLBACK KEY VALUE [ARGS]
+# -------------------------
+# KEY and VALUE are strings with parsed result. ARGS is additional payload
+# you may provide during CONTAINER_SIMPLE_CONFIG_PARSER call.
+#
+# Example of usage:
+#
+# $ cat script.sh
+# callback()
+# {
+# local var="$1" val="$2"
+# shift 2
+# test -n "$*" && local payload=" [$*]"
+# echo "$var=$val$payload"
+# }
+# config='URL = "http://example.com"; semicolon = "@.,@"'
+# cont_parser_simple_config config callback additional data
+#
+# $ ./script
+# URL="http://example.com" [additional data]
+# semicolon=";" [additional data]
+cont_parser_simple_config()
+{
+ local conf_var="$1"
+ local callback="$2"
+ shift 2
+
+ while read line; do
+ test -z "$line" && continue
+ if [[ $line =~ ^([^[:space:]]+)[[:space:]]*=[[:space:]]*(.*)$ ]]
+ then
+ local k="${BASH_REMATCH[1]}" v="${BASH_REMATCH[2]}"
+ cont_debug3 "calling callback with: $k = $v"
+ "$callback" "$k" "$v" "$@" || {
+ cont_error "$FUNCNAME: callback failed"
+ return 1
+ }
+ else
+ cont_warn "wrong config: $line"
+ fi
+ done < <(cont_semicolon_split "$conf_var")
+
+ return 0
+}
diff --git a/docker/requirements.txt b/docker/requirements.txt
deleted file mode 100644
index 06c44966..00000000
--- a/docker/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-pytest ~= 3.0.7
-pylint ~= 1.7.1
-
diff --git a/docker/vcf_combiner/Dockerfile b/docker/vcf_combiner/Dockerfile
index bc033407..105cb920 100644
--- a/docker/vcf_combiner/Dockerfile
+++ b/docker/vcf_combiner/Dockerfile
@@ -1,5 +1,5 @@
# The MIT License (MIT)
-# Copyright (c) 2016-2017 Intel Corporation
+# Copyright (c) 2017 Intel Corporation
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
@@ -41,4 +41,11 @@ WORKDIR /tmp
ENV BASH_ENV=/etc/profile.d/cont-env.sh HOME=/home/default PATH=$PATH:/usr/lib64/mpich/bin
+RUN groupadd -r default -f -g 5658 && \
+ useradd -u 5658 -g default -o -c "Default User" default -s /sbin/nologin
+
+#USER default
+
+ENTRYPOINT ["/usr/bin/container-entrypoint"]
+
CMD ["container-usage"]
diff --git a/docker/vcf_combiner/usr/bin/combine_vcf.py b/docker/vcf_combiner/usr/bin/combine_vcf.py
index 4dd9f5b1..0bdd4bfa 100755
--- a/docker/vcf_combiner/usr/bin/combine_vcf.py
+++ b/docker/vcf_combiner/usr/bin/combine_vcf.py
@@ -2,25 +2,25 @@
# pylint: disable=missing-docstring, invalid-name, broad-except, too-many-branches, too-many-locals, line-too-long
"""
- * The MIT License (MIT)
- * Copyright (c) 2016-2017 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy of
- * this software and associated documentation files (the "Software"), to deal in
- * the Software without restriction, including without limitation the rights to
- * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
- * the Software, and to permit persons to whom the Software is furnished to do so,
- * subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
- * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
- * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
- * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ The MIT License (MIT)
+ Copyright (c) 2016-2017 Intel Corporation
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
+ this software and associated documentation files (the "Software"), to deal in
+ the Software without restriction, including without limitation the rights to
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+ the Software, and to permit persons to whom the Software is furnished to do so,
+ subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in all
+ copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
import sys
import os
@@ -41,7 +41,6 @@
COL_PARTITION_SIZE_UNIT = 16384
DefaultVIDFile = "/usr/share/cont-intel/vid.json"
-DefaultVIDEnv = 'HOST_VID_PATH' # to run the script at host, specify the path to a vid file
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
##### loader config file tags
@@ -58,7 +57,7 @@
"offload_vcf_output_processing" : True,
"produce_GT_field" : False,
"size_per_column_partition" : COL_PARTITION_SIZE_UNIT,
- "vid_mapping_file": ""
+ "vid_mapping_file": ''
}
cp_pos = namedtuple('pos_only', "begin, vcf_output_filename")
cp_chr = namedtuple('chromosome', 'begin, end, vcf_output_filename')
@@ -83,17 +82,15 @@ class CombineVCF(object):
''' VCF file combiner '''
logger = logging.getLogger("CombineVCF")
brief_options = "i:o:R:c:p"
- full_options = ['samples=', 'output=', 'reference=', 'callsets=', 'produce_GT_field', \
- 'chromosome=', 'begin=', 'end=', 'dryrun', 'version']
+ full_options = ['samples=', 'output=', 'reference=', 'callsets=', 'vid_mapping_file=', \
+ 'produce_GT_field', 'chromosome=', 'begin=', 'end=', 'dryrun']
def __init__(self):
self.dryrun = False
self.output_file = None
+ self.vid_mapping_file = DefaultVIDFile
def _parse_args(self, args):
- def get_vid_mapping_file():
- return DefaultVIDFile if os.path.isfile(DefaultVIDFile) else os.environ[DefaultVIDEnv]
- vmf_args = {"vid_mapping_file": get_vid_mapping_file}
def check_chromosome():
assert begin, 'No begin position is given'
if end:
@@ -106,14 +103,12 @@ def check_chromosome():
chromosome = None
begin = None
end = None
+ vid_mapping_file = DefaultVIDFile
for opt, user_input in myopts:
if opt == '-p' or opt == '--produce_GT_field':
produce_GT_field = True
elif opt == '--dryrun':
self.dryrun = True
- elif opt == '--version':
- print(self.get_version())
- exit()
else:
assert user_input, 'specify a value for option %s' % opt
if opt == '-i' or opt == '--samples':
@@ -128,6 +123,9 @@ def check_chromosome():
assert os.path.isfile(user_input), "specify a valid callset file name"
callset_mapping_file = user_input
num_part_units = self.__check_callset(callset_mapping_file)
+ elif opt == '--vid_mapping_file':
+ assert os.path.isfile(user_input), "specify a valid vid mapping file"
+ vid_mapping_file = user_input
elif opt == '--chromosome':
chromosome = user_input
elif opt == '--begin':
@@ -135,8 +133,7 @@ def check_chromosome():
elif opt == '--end':
end = int(user_input)
else:
- print(self.get_version())
- exit()
+ print("WARN: unknown option %s, ignored", opt)
if not callset_mapping_file:
callset_mapping_file = "callsets_%s.json" % datetime.now().strftime("%y%m%d%H%M")
@@ -148,12 +145,13 @@ def check_chromosome():
col_par_setting = check_chromosome() if chromosome \
else get_col_partition(self.output_file, begin if begin else 0)
- loader_cfg = get_loader_cfg(**vmf_args)
+ loader_cfg = get_loader_cfg()
loader_cfg = loader_cfg._replace(reference_genome=reference_genome, \
vcf_output_filename=self.output_file, \
column_partitions=[col_par_setting], \
size_per_column_partition=int(abs(num_part_units)) * COL_PARTITION_SIZE_UNIT, \
callset_mapping_file=callset_mapping_file, \
+ vid_mapping_file=vid_mapping_file, \
produce_GT_field=True if produce_GT_field else False)
return loader_cfg
@@ -214,9 +212,9 @@ def __get_inputs(self, inputfiles):
inputs = [line.strip() for line in inputfiles if os.path.isfile(line)]
return inputs if inputs else RuntimeError("No valid samples input files found")
- @staticmethod
- def generate_loader_config(nt_loader):
- json_fname = "lc_%s" % datetime.now().strftime("%y%m%d%H%M")
+ def generate_loader_config(self, nt_loader):
+ json_fname = os.path.join(os.path.dirname(self.output_file), \
+ "loader_config_%s.json" % datetime.now().strftime("%y%m%d%H%M"))
with open(json_fname, 'w') as ofd:
json.dump(nt_loader._asdict(), ofd)
return json_fname
@@ -254,18 +252,19 @@ def run(self):
raise CombineVCFException("Failed to combining VCF files: %s" % (err))
@staticmethod
- def get_version():
- return "%s %s" % (ProductName, Version)
+ def get_my_name():
+ return ProductName
-def test_quick_test():
+def test_code_runs_after_pylint():
''' a quick test for github check-in '''
- x = CombineVCF().get_version()
+ print("Run a quick test ...")
+ x = CombineVCF().get_my_name()
assert x
- x = x.split()
- assert len(x) == 2
- x = x[1]
- assert x == Version
+ assert x == ProductName
if __name__ == "__main__":
combiner = CombineVCF()
- combiner.run()
+ if len(sys.argv) > 1:
+ combiner.run()
+ else:
+ combiner.get_my_name()
diff --git a/docker/vcf_combiner/usr/bin/container-entrypoint b/docker/vcf_combiner/usr/bin/container-entrypoint
index a26a32bc..10fe7bd0 100755
--- a/docker/vcf_combiner/usr/bin/container-entrypoint
+++ b/docker/vcf_combiner/usr/bin/container-entrypoint
@@ -1,10 +1,25 @@
#!/bin/bash
+. /usr/share/cont-lib/cont-lib.sh
-source /usr/share/cont-lib/cont-lib.sh
+ls /usr/bin/vcf2tiledb > /dev/null 2>&1
+test $? -ne 0 && cont_error "cannot find vcf2tiledb... exiting" && exit 2
-cont_debug "command: $*"
-
-__cont_source_scripts "/usr/share/cont-entry"
+if [ "$#" -gt 0 -a "$1" == "--version" ]; then
+ __cont_msg "info vcf2tiledb version is $(vcf2tiledb --version)"
+ exit 0
+fi
+my_uid=$(id -u)
test -z "$*" && set -- bash
+if [ "$1" == "combine_vcf" ]; then
+ __cont_msg "info run combine_vcf as user id $my_uid, vcf2tiledb version is $(vcf2tiledb --version)"
+ echo
+fi
+
+if [ $my_uid == 5658 ]; then
+ cd $HOME
+else
+ export HOME=/
+fi
+
exec "$@"
diff --git a/docker/vcf_combiner/usr/share/cont-lib/autoload/genomicsdb-env.sh b/docker/vcf_combiner/usr/share/cont-lib/autoload/genomicsdb-env.sh
index bbe1fe83..c79945ca 100644
--- a/docker/vcf_combiner/usr/share/cont-lib/autoload/genomicsdb-env.sh
+++ b/docker/vcf_combiner/usr/share/cont-lib/autoload/genomicsdb-env.sh
@@ -1,2 +1,6 @@
-export HOME=/home/default
+if [ $(id -u) == 5658 ]; then
+ export HOME=/home/default
+else
+ export HOME=/
+fi
export PATH=$PATH:/usr/lib64/mpich/bin