Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 18 additions & 35 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,48 +1,31 @@
# select operating system
FROM ubuntu:16.04
FROM alpine

# install operating system packages
RUN apt-get update -y && apt-get install git curl gettext unzip wget software-properties-common python python-software-properties python-pip python3-pip dnsutils make -y

## add more packages, if necessary
# install Java8
RUN add-apt-repository ppa:webupd8team/java -y && apt-get update && apt-get -y install openjdk-8-jdk-headless

# install boto3 library for PySpark applications to connect to S3
RUN pip install boto3==1.9


# use bpkg to handle complex bash entrypoints
RUN curl -Lo- "https://raw.githubusercontent.com/bpkg/bpkg/master/setup.sh" | bash
RUN bpkg install cha87de/bashutil -g
## add more bash dependencies, if necessary
# install operating system packages

# add config, init and source files
# entrypoint
ADD init /opt/docker-init
ADD conf /opt/docker-conf

# folders
RUN mkdir /opt/apache-livy
RUN mkdir /var/apache-spark-binaries/

# binaries
# apache livy
RUN wget http://mirror.23media.de/apache/incubator/livy/0.5.0-incubating/livy-0.5.0-incubating-bin.zip -O /tmp/livy.zip
RUN unzip /tmp/livy.zip -d /opt/
# Logging dir
RUN mkdir /opt/livy-0.5.0-incubating-bin/logs

# apache spark
RUN wget https://archive.apache.org/dist/spark/spark-2.3.1/spark-2.3.1-bin-hadoop2.7.tgz -O /tmp/spark-2.3.1-bin-hadoop2.7.tgz
RUN tar -xvzf /tmp/spark-2.3.1-bin-hadoop2.7.tgz -C /opt/

# set Python3 as default
RUN rm /usr/bin/python
RUN ln -s /usr/bin/python3 /usr/bin/python

RUN apk add unzip wget curl git bash openjdk8 gettext make coreutils procps \
&& apk update \
&& wget https://www-eu.apache.org/dist/incubator/livy/0.7.0-incubating/apache-livy-0.7.0-incubating-bin.zip -O /tmp/livy.zip \
&& apk add --no-cache libc6-compat \
&& ln -s /lib/libc.musl-x86_64.so.1 /lib/ld-linux-x86-64.so.2 \
&& wget https://archive.apache.org/dist/spark/spark-2.4.3/spark-2.4.3-bin-hadoop2.7.tgz -O /tmp/spark.tgz \
&& unzip /tmp/livy.zip -d /opt/ \
&& tar -xvzf /tmp/spark.tgz -C /opt/ \
# postgres jar
&& wget https://jdbc.postgresql.org/download/postgresql-42.2.10.jar -P /opt/spark-2.4.3-bin-hadoop2.7/jars \
# spark-excel support
&& wget https://oss.sonatype.org/content/repositories/public/com/crealytics/spark-excel_2.11/0.13.1/spark-excel_2.11-0.13.1.jar -P /opt/spark-2.4.3-bin-hadoop2.7/jars \
&& wget https://repo1.maven.org/maven2/org/apache/commons/commons-collections4/4.1/commons-collections4-4.1.jar -P /opt/spark-2.4.3-bin-hadoop2.7/jars \
&& wget https://repo1.maven.org/maven2/org/apache/xmlbeans/xmlbeans/3.1.0/xmlbeans-3.1.0.jar -P /opt/spark-2.4.3-bin-hadoop2.7/jars \
&& wget https://repo1.maven.org/maven2/org/apache/poi/poi-ooxml-schemas/4.1.2/poi-ooxml-schemas-4.1.2.jar -P /opt/spark-2.4.3-bin-hadoop2.7/jars \
&& git clone https://github.com/cha87de/bashutil.git


# expose ports
EXPOSE 8998

Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ Based on the latest release of the [Apache Livy project](https://livy.incubator.

### Supported Versions:

Livy-Server: 0.5.0
Livy-Server: 0.7.0

Apache Spark: 2.3.1
Apache Spark: 2.4.4

Python: Python3 (including boto3 1.9)

Expand All @@ -37,6 +37,6 @@ Livy Server start on default port 8998
### Usage:

```bash
docker run -d -p 8998:8998 -e SPARK_MASTER_ENDPOINT=1.2.3.4 -e SPARK_MASTER_PORT=7077 -v /tmp:/tmp cloudiator/livy-server-docker:latest
docker run -d -p 8998:8998 -e SPARK_MASTER_ENDPOINT=1.2.3.4 -e SPARK_MASTER_PORT=7077 -v /tmp:/tmp cloudiator/livy-server:latest
```

47 changes: 0 additions & 47 deletions examples/pi.py

This file was deleted.

31 changes: 17 additions & 14 deletions init/entrypoint
Original file line number Diff line number Diff line change
@@ -1,41 +1,44 @@
#!/bin/bash

# include bpkg dependencies
source /usr/local/bin/retry
source /usr/local/bin/bgo
source /usr/local/bin/bgowait

# global variables
GLOBAL_VAR="xyz"
source /bashutil/retry
source /bashutil/bgo
source /bashutil/bgowait

[[ -z "${LIVY_EXECUTION_MODE}" ]] && mode='local' || mode="${LIVY_EXECUTION_MODE}"

##############################################################################
# validate if all container variables are set
##############################################################################
function validate(){
if [[ $mode == "local" ]]; then
return 0
elif [[ $mode == "cluster" ]]; then
vars="SPARK_MASTER_ENDPOINT SPARK_MASTER_PORT DEPLOY_MODE"
for var in $vars; do
if [[ $(env | awk -F "=" '{print $1}' | grep "^$var$") != "$var" ]]; then
echo "$var not set but required."
return 1
fi
done
if [[ -z ${GLOBAL_VAR+x} ]]; then
echo "GLOBAL_VAR variable cannot be looked up."
return 1
fi
else
echo -e "Unsupported value $LIVY_EXECUTION_MODE"
return 1
fi
}

##############################################################################
# write config vars with configfile template
##############################################################################
function writeConfigOptions(){
if [ $mode == "cluster" ]; then
echo "write config options"
export SPARK_MASTER_ENDPOINT=$SPARK_MASTER_ENDPOINT
export SPARK_MASTER_PORT=$SPARK_MASTER_PORT
export DEPLOY_MODE=$DEPLOY_MODE

cat /opt/docker-conf/livy.conf | envsubst > /opt/livy-0.5.0-incubating-bin/conf/livy.conf

cat /opt/docker-conf/livy.conf | envsubst > /opt/apache-livy-0.7.0-incubating-bin/conf/livy.conf
fi
}

function init(){
Expand All @@ -58,9 +61,9 @@ function init(){

function livy_server_service(){

export SPARK_HOME=/opt/spark-2.3.1-bin-hadoop2.7/
export SPARK_HOME=/opt/spark-2.4.3-bin-hadoop2.7/
echo "starting Livy Server!"
/opt/livy-0.5.0-incubating-bin/bin/livy-server start
/opt/apache-livy-0.7.0-incubating-bin/bin/livy-server start

# whatever blocking call
tail -f /dev/null
Expand Down