diff --git a/Dockerfile b/Dockerfile index 78b0c1a..d15d3ab 100644 --- a/Dockerfile +++ b/Dockerfile @@ -46,6 +46,8 @@ RUN curl -sL --retry 3 \ | tar -x -C /usr/ \ && rm -rf $HADOOP_HOME/share/doc \ && chown -R root:root $HADOOP_HOME +COPY conf/hadoop/core-site.xml /tmp/core-site.xml +RUN cp /tmp/core-site.xml $HADOOP_HOME/etc/hadoop # SPARK ENV SPARK_VERSION 2.4.1 diff --git a/README.md b/README.md index 5679220..c70f7c1 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,10 @@ To start `spark-shell` with your AWS credentials: docker run --rm -it -e "AWS_ACCESS_KEY_ID=YOURKEY" -e "AWS_SECRET_ACCESS_KEY=YOURSECRET" -p 4040:4040 gettyimages/spark bin/spark-shell +To start `spark-shell` with your desired AWS_PROFILE from ${HOME}/.aws/credentials: + + docker run --rm -it -e "AWS_PROFILE=YOURPROFILE" -v "${HOME}/.aws":/root/.aws -p 4040:4040 gettyimages/spark bin/spark-shell + To do a thing with Pyspark echo -e "import pyspark\n\nprint(pyspark.SparkContext().parallelize(range(0, 10)).count())" > count.py diff --git a/conf/hadoop/core-site.xml b/conf/hadoop/core-site.xml new file mode 100644 index 0000000..fb3f538 --- /dev/null +++ b/conf/hadoop/core-site.xml @@ -0,0 +1,8 @@ + + + + + fs.s3a.aws.credentials.provider + com.amazonaws.auth.DefaultAWSCredentialsProviderChain + +