From 52c12f42f0870323d9d47400bf8d868e286cd3ab Mon Sep 17 00:00:00 2001
From: John Field <John.Field@gMail.com>
Date: Wed, 6 Feb 2019 11:06:20 +0000
Subject: [PATCH 1/3] basic dockerize

---
 README.md          | 12 +++++++++++-
 docker-compose.yml |  9 +++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)
 create mode 100644 docker-compose.yml

diff --git a/README.md b/README.md
index a87d760..51af04f 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 #### What?
 
-* We want to be able to structure our dataset (see "Campaign Lab Data Inventory").
+* We want to be able to structure our dataset - see [Campaign Lab Data Inventory](https://docs.google.com/spreadsheets/d/1s5zWhdXi0-YBUMkK2Le3cfENBsfc29vOnFhnfn8N6dU).
 * In order to do this, we first should define what the structure (schema) of the different data sources are.
 * This will help us down the line to create modules that transform our raw data into our target data, for later export into a database, R package, or any other tools for utilising the data in a highly structured and annotated format.
 
@@ -55,3 +55,13 @@
 * *source* is a link (if available) to the actual dataset.
 * The *description* is a one liner that describes the dataset
 * *properties* is a list of the *datapoints* that we want to *end up with after transforming the raw dataset*.
+
+
+### Dockerized
+I'm learning my way around data science and Python. So am working with Docker to improve reproducability and other good reasons.
+For now, have mounted the entire repo into the image's workspace.
+
+* docker-compose up
+* Get a login URL  (localhost:8888?token=...) from the output
+* docker exec -it jupyter-notebook /bin/bash
+* python -c 'from london_election_results import get_data; print(get_data())'
\ No newline at end of file
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..7352482
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,9 @@
+version: '3.1'
+services:
+  notebook:
+    image: jupyter/datascience-notebook
+    container_name: jupyter-notebook
+    ports:
+      - "8888:8888"
+    volumes:
+      - ./:/home/jovyan/work

From 0955ab0d2e9f8360c41bf2c1eeffe4d61d6596d4 Mon Sep 17 00:00:00 2001
From: John Field <John.Field@gMail.com>
Date: Thu, 7 Feb 2019 06:58:37 +0000
Subject: [PATCH 2/3] Add basic elasticsearch for dev ETL

---
 docker-compose.yml | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/docker-compose.yml b/docker-compose.yml
index 7352482..a1169cf 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -7,3 +7,26 @@ services:
       - "8888:8888"
     volumes:
       - ./:/home/jovyan/work
+  dejavu:
+    image: appbaseio/dejavu
+    container_name: dejavu
+    ports:
+      - "1358:1358"
+    volumes:
+      - ./:/home/jovyan/work
+  elasticsearch:
+    image: docker.elastic.co/elasticsearch/elasticsearch-oss:6.5.2
+    container_name: elasticsearch
+    ports:
+      - "9200:9200"
+    #volumes:
+    #  - ./env/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml
+    environment:
+      discovery.type: "single-node"
+      ES_JAVA_OPTS: "-Xmx256m -Xms256m"
+      http.port: "9200"
+      http.cors.enabled: "true"
+      http.cors.allow-origin: "http://localhost:1358,http://127.0.0.1:1358"
+      http.cors.allow-headers: "X-Requested-With,X-Auth-Token,Content-Type,Content-Length,Authorization"
+      http.cors.allow-methods: "OPTIONS, HEAD, GET, PUT, POST, DELETE"
+      http.cors.allow-credentials: "true"

From 61abbd1a660c5c0fd9d8f690255d423dcb838d3e Mon Sep 17 00:00:00 2001
From: John Field <John.Field@gMail.com>
Date: Thu, 7 Feb 2019 13:01:34 +0000
Subject: [PATCH 3/3] Actually, just use kibana

---
 README.md          | 27 +++++++++++++++++++--------
 docker-compose.yml | 43 ++++++++++++++++++++++---------------------
 2 files changed, 41 insertions(+), 29 deletions(-)

diff --git a/README.md b/README.md
index 51af04f..4b36bab 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,10 @@
 ## Campaign Lab Data Pipeline
 
+For context, see [Campaign Lab Guide](https://github.com/CampaignLab/Campaign-Lab-Guide/blob/master/Campaign%20Lab%20Guide.md0).
+
 #### What?
 
-* We want to be able to structure our dataset - see [Campaign Lab Data Inventory](https://docs.google.com/spreadsheets/d/1s5zWhdXi0-YBUMkK2Le3cfENBsfc29vOnFhnfn8N6dU).
+* We want to be able to structure our dataset from the Data Inventory.
 * In order to do this, we first should define what the structure (schema) of the different data sources are.
 * This will help us down the line to create modules that transform our raw data into our target data, for later export into a database, R package, or any other tools for utilising the data in a highly structured and annotated format.
 
@@ -57,11 +59,20 @@
 * *properties* is a list of the *datapoints* that we want to *end up with after transforming the raw dataset*.
 
 
-### Dockerized
-I'm learning my way around data science and Python. So am working with Docker to improve reproducability and other good reasons.
-For now, have mounted the entire repo into the image's workspace.
+### Toolset
+(Author is learning his way around data science and Python, better approaches welcome.)
+Datasets are expected to be largely static; transformers are intended to be manually run and eyeballed as needed, instead of automated.
+They can be run in a local environment.
+For reproducability and dev tooling, can also use a container environment via Docker.
+
+Run a specific command:
+`docker-compose run datascience python -c 'from london_election_results import get_data; print(get_data())'`
+
+Running the environment:
 
-* docker-compose up
-* Get a login URL  (localhost:8888?token=...) from the output
-* docker exec -it jupyter-notebook /bin/bash
-* python -c 'from london_election_results import get_data; print(get_data())'
\ No newline at end of file
+* `docker-compose up`
+* `http://localhost:9200` #elasticsearch
+* `http://localhost:5601` #kibana
+* Can import a CSV with e.g.
+* `docker-compose run datascience python -c 'elasticsearch_loader --es-host http://elasticsearch:9200 --index campaignlab --type campaignlab csv ../schemas/local_election_results_2018-05-03.csv`
+* Follow https://www.elastic.co/guide/en/kibana/current/tutorial-build-dashboard.html to visualise.
\ No newline at end of file
diff --git a/docker-compose.yml b/docker-compose.yml
index a1169cf..29b932c 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,32 +1,33 @@
 version: '3.1'
 services:
-  notebook:
-    image: jupyter/datascience-notebook
-    container_name: jupyter-notebook
+  datascience:
+    image: civisanalytics/datascience-python:4.2.0
+    container_name: datascience-python
     ports:
       - "8888:8888"
     volumes:
-      - ./:/home/jovyan/work
-  dejavu:
-    image: appbaseio/dejavu
-    container_name: dejavu
-    ports:
-      - "1358:1358"
-    volumes:
-      - ./:/home/jovyan/work
+      - ./:/pipeline
+    working_dir: "/pipeline/transformers"
+    tty: true
+    # Keep container running idle.
+    command: [ "/bin/sh", "-c", "pip install elasticsearch-loader; tail -f /dev/null"]
   elasticsearch:
-    image: docker.elastic.co/elasticsearch/elasticsearch-oss:6.5.2
+    image: docker.elastic.co/elasticsearch/elasticsearch-oss:6.6.0
     container_name: elasticsearch
     ports:
       - "9200:9200"
-    #volumes:
-    #  - ./env/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml
     environment:
-      discovery.type: "single-node"
+      CLUSTER_NAME: "campaignlab"
+      HTTP_PORT: "9200"
+      DISCOVERY_TYPE: "single-node"
       ES_JAVA_OPTS: "-Xmx256m -Xms256m"
-      http.port: "9200"
-      http.cors.enabled: "true"
-      http.cors.allow-origin: "http://localhost:1358,http://127.0.0.1:1358"
-      http.cors.allow-headers: "X-Requested-With,X-Auth-Token,Content-Type,Content-Length,Authorization"
-      http.cors.allow-methods: "OPTIONS, HEAD, GET, PUT, POST, DELETE"
-      http.cors.allow-credentials: "true"
+
+  kibana:
+    image: docker.elastic.co/kibana/kibana-oss:6.6.0
+    container_name: kibana
+    ports:
+      - "5601:5601"
+      - "8080:8080"
+    environment:
+      SERVER_NAME: "kibana"
+      ELASTICSEARCH_HOSTS: "http://elasticsearch:9200"