From e4fe7f4ac140cd2d70e1742deaa88dde14c37d1b Mon Sep 17 00:00:00 2001 From: Andrew Yochum Date: Fri, 13 Sep 2019 15:46:08 -0700 Subject: [PATCH 1/2] Change Hadoop config to use default AWS credential chain --- Dockerfile | 2 ++ conf/hadoop/core-site.xml | 8 ++++++++ 2 files changed, 10 insertions(+) create mode 100644 conf/hadoop/core-site.xml diff --git a/Dockerfile b/Dockerfile index 78b0c1a..d15d3ab 100644 --- a/Dockerfile +++ b/Dockerfile @@ -46,6 +46,8 @@ RUN curl -sL --retry 3 \ | tar -x -C /usr/ \ && rm -rf $HADOOP_HOME/share/doc \ && chown -R root:root $HADOOP_HOME +COPY conf/hadoop/core-site.xml /tmp/core-site.xml +RUN cp /tmp/core-site.xml $HADOOP_HOME/etc/hadoop # SPARK ENV SPARK_VERSION 2.4.1 diff --git a/conf/hadoop/core-site.xml b/conf/hadoop/core-site.xml new file mode 100644 index 0000000..fb3f538 --- /dev/null +++ b/conf/hadoop/core-site.xml @@ -0,0 +1,8 @@ + + + + + fs.s3a.aws.credentials.provider + com.amazonaws.auth.DefaultAWSCredentialsProviderChain + + From 7b022a94fccee95123c20bc8f5183e72f5f85de8 Mon Sep 17 00:00:00 2001 From: Andrew Yochum Date: Fri, 13 Sep 2019 15:46:33 -0700 Subject: [PATCH 2/2] Add example usage for AWS_PROFILE env var --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 5679220..c70f7c1 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,10 @@ To start `spark-shell` with your AWS credentials: docker run --rm -it -e "AWS_ACCESS_KEY_ID=YOURKEY" -e "AWS_SECRET_ACCESS_KEY=YOURSECRET" -p 4040:4040 gettyimages/spark bin/spark-shell +To start `spark-shell` with your desired AWS_PROFILE from ${HOME}/.aws/credentials: + + docker run --rm -it -e "AWS_PROFILE=YOURPROFILE" -v "${HOME}/.aws":/root/.aws -p 4040:4040 gettyimages/spark bin/spark-shell + To do a thing with Pyspark echo -e "import pyspark\n\nprint(pyspark.SparkContext().parallelize(range(0, 10)).count())" > count.py