Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions collection.mk
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
commit-collection\
clobber-today

include makerules/docker.mk

ifeq ($(COLLECTION_DIR),)
COLLECTION_DIR=collection/
endif
Expand Down Expand Up @@ -35,23 +37,31 @@ first-pass:: collect
second-pass:: collection

collect:: $(SOURCE_CSV) $(ENDPOINT_CSV)
digital-land collect $(ENDPOINT_CSV)
$(digital-land) collect $(ENDPOINT_CSV) --collection-dir $(COLLECTION_DIR)

collection::
digital-land collection-save-csv
$(digital-land) collection-save-csv --collection-dir $(COLLECTION_DIR)

clobber-today::
rm -rf $(LOG_FILES_TODAY) $(COLLECTION_INDEX)

makerules::
curl -qfsL '$(SOURCE_URL)/makerules/main/collection.mk' > makerules/collection.mk

# These will run as usual if DEVELOPMENT isn't explicitly set to 1
ifeq ($(DEVELOPMENT),0)
commit-dataset::
mkdir -p $(DATASET_DIRS)
git add $(DATASET_DIRS)
git diff --quiet && git diff --staged --quiet || (git commit -m "Data $(shell date +%F)"; git push origin $(BRANCH))

commit-collection::
git add collection
git diff --quiet && git diff --staged --quiet || (git commit -m "Collection $(shell date +%F)"; git push origin $(BRANCH))

save-resources::
aws s3 sync s3://collection-dataset/$(REPOSITORY)/$(RESOURCE_DIR) $(RESOURCE_DIR)
endif

load-resources::
aws s3 sync $(RESOURCE_DIR) s3://collection-dataset/$(REPOSITORY)/$(RESOURCE_DIR)
Expand Down
89 changes: 89 additions & 0 deletions docker.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
ifneq ($(DOCKERISED),1)
DOCKERISED = 0
DEVELOPMENT = 0
else
# Run in development mode by default for now
ifeq ($(DEVELOPMENT),0)
DEVELOPMENT = 0
else
DEVELOPMENT = 1
endif
endif
$(info DOCKERISED is $(DOCKERISED))
$(info DEVELOPMENT is $(DEVELOPMENT))

EXTRA_MOUNTS :=
EXTRA_DL_ARGS :=
# ifeq ($(and $(DOCKERISED),$(DEVELOPMENT)))
ifeq ($(DOCKERISED),1)
ifeq ($(DEVELOPMENT),1)
EXTRA_MOUNTS += -v $(PWD)/local_collection/collection/log:/pipeline/collection/log
EXTRA_MOUNTS += -v $(PWD)/local_collection/collection/resource:/pipeline/collection/resource
ifneq (,$(wildcard ./fixed))
EXTRA_MOUNTS += -v $(PWD)/local_collection/fixed:/pipeline/fixed
endif
ifneq (,$(wildcard ./harmonised))
EXTRA_MOUNTS += -v $(PWD)/local_collection/harmonised:/pipeline/harmonised
endif
ifneq (,$(wildcard ./harmonised))
EXTRA_MOUNTS += -v $(PWD)/local_collection/transformed:/pipeline/transformed
endif

ifdef ($(LOCAL_SPECIFICATION_PATH),)
EXTRA_MOUNTS += -v $(LOCAL_SPECIFICATION_PATH)/specification:/collection/specification
else ifeq ($(LOCAL_SPECIFICATION),1)
EXTRA_MOUNTS += -v $(PWD)/../specification/specification:/collection/specification
endif

ifdef ($(LOCAL_DL_PYTHON_PATH),)
EXTRA_MOUNTS += -v $(LOCAL_DL_PYTHON_PATH):/Src
else ifeq ($(LOCAL_DL_PYTHON),1)
EXTRA_MOUNTS += -v $(PWD)/../digital-land-python:/src
endif

endif
$(info EXTRA_MOUNTS is $(EXTRA_MOUNTS))

DOCKER_TAG=latest
ECR_URL=public.ecr.aws/l6z6v3j6/

EXTRA_DL_ARGS += --specification-dir /collection/specification

/pipeline/collection/resource.csv:

/pipeline/collection/source.csv:

/pipeline/collection/endpoint.csv:

dockerised = docker run -t \
-e LOCAL_USER_ID=$(shell id -u) \
-e AWS_ACCESS_KEY_ID \
-e AWS_DEFAULT_REGION \
-e AWS_REGION \
-e AWS_SECRET_ACCESS_KEY \
-e AWS_SECURITY_TOKEN \
-e AWS_SESSION_EXPIRATION \
-e AWS_SESSION_TOKEN \
-v $(PWD):/pipeline \
$(EXTRA_MOUNTS) \
$(ECR_URL)digital-land-python:$(DOCKER_TAG)

shell_cmd = $(dockerised) bash

digital-land = $(dockerised) \
digital-land \
$(EXTRA_DL_ARGS)

docker-pull::
ifndef ($(DISABLE_DOCKER_PULL),)
docker pull $(ECR_URL)digital-land-python:$(DOCKER_TAG)
endif

init:: docker-pull
else
shell_cmd = $(SHELL)
digital-land = digital-land
endif

debug_shell:
$(shell_cmd)
3 changes: 2 additions & 1 deletion makerules.mk
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
SOURCE_URL=https://raw.githubusercontent.com/digital-land/

include makerules/docker.mk
# deduce the repository
ifeq ($(REPOSITORY),)
REPOSITORY=$(shell basename -s .git `git config --get remote.origin.url`)
Expand Down Expand Up @@ -58,7 +59,7 @@ second-pass::
@:

# initialise
ifeq (,$(wildcard /.dockerenv ))
ifeq ($(DOCKERISED),0)
init::
pip install --upgrade pip
ifneq (,$(wildcard requirements.txt))
Expand Down
59 changes: 32 additions & 27 deletions pipeline.mk
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
dataset\
commit-dataset

include makerules/docker.mk

# data sources
# collected resources
ifeq ($(COLLECTION_DIR),)
Expand Down Expand Up @@ -53,21 +55,21 @@ endif

define run-pipeline =
mkdir -p $(@D) $(ISSUE_DIR)$(notdir $(@D))
digital-land --pipeline-name $(notdir $(@D)) $(DIGITAL_LAND_FLAGS) pipeline --issue-dir $(ISSUE_DIR)$(notdir $(@D)) $(PIPELINE_FLAGS) $< $@
$(digital-land) --pipeline-name $(notdir $(@D)) $(DIGITAL_LAND_FLAGS) pipeline --issue-dir $(ISSUE_DIR)$(notdir $(@D)) $(PIPELINE_FLAGS) $< $@
endef

define build-dataset =
mkdir -p $(@D)
time digital-land --pipeline-name $(notdir $(basename $@)) load-entries --output-path $(basename $@).sqlite3 $(^)
time digital-land --pipeline-name $(notdir $(basename $@)) build-dataset $(basename $@).sqlite3 $@
time $(digital-land) --pipeline-name $(notdir $(basename $@)) load-entries --output-path $(basename $@).sqlite3 $(^)
time $(digital-land) --pipeline-name $(notdir $(basename $@)) build-dataset $(basename $@).sqlite3 $@
endef

collection:: collection/pipeline.mk

-include collection/pipeline.mk

collection/pipeline.mk: collection/resource.csv collection/source.csv
digital-land collection-pipeline-makerules > collection/pipeline.mk
$(shell_cmd) -c "digital-land $(EXTRA_DL_ARGS) collection-pipeline-makerules > collection/pipeline.mk"

# restart the make process to pick-up collected resource files
second-pass::
Expand All @@ -86,46 +88,49 @@ endif
endif

clobber::
rm -rf $(TRANSFORMED_DIR) $(ISSUE_DIR) $(DATASET_DIR)
$(shell_cmd) rm -rf $(TRANSFORMED_DIR) $(ISSUE_DIR) $(DATASET_DIR)

clean::
rm -rf ./var
$(shell_cmd) rm -rf ./var

# local copies of the organisation dataset needed by harmonise
init::
@mkdir -p $(CACHE_DIR)
curl -qfs "https://raw.githubusercontent.com/digital-land/organisation-dataset/main/collection/organisation.csv" > $(CACHE_DIR)organisation.csv
$(shell_cmd) curl -qfs "https://raw.githubusercontent.com/digital-land/organisation-dataset/main/collection/organisation.csv" > $(CACHE_DIR)organisation.csv

makerules::
curl -qfsL '$(SOURCE_URL)/makerules/main/pipeline.mk' > makerules/pipeline.mk
$(shell_cmd) curl -qfsL '$(SOURCE_URL)/makerules/main/pipeline.mk' > makerules/pipeline.mk

fetch-s3::
$(shell_cmd) aws s3 sync s3://collection-dataset/$(REPOSITORY)/$(RESOURCE_DIR) $(RESOURCE_DIR) --no-progress

fetch-transformed-s3::
$(shell_cmd) aws s3 sync s3://collection-dataset/$(REPOSITORY)/$(ISSUE_DIR) $(ISSUE_DIR) --no-progress
$(shell_cmd) aws s3 sync s3://collection-dataset/$(REPOSITORY)/$(TRANSFORMED_DIR) $(TRANSFORMED_DIR) --no-progress
$(shell_cmd) aws s3 sync s3://collection-dataset/$(REPOSITORY)/$(DATASET_DIR) $(DATASET_DIR) --no-progress

# These will run as usual if DEVELOPMENT isn't explicitly set to 1
ifeq ($(DEVELOPMENT),0)
commit-dataset::
mkdir -p $(DATASET_DIRS)
git add $(DATASET_DIRS)
git diff --quiet && git diff --staged --quiet || (git commit -m "Data $(shell date +%F)"; git push origin $(BRANCH))

fetch-s3::
aws s3 sync s3://collection-dataset/$(REPOSITORY)/$(RESOURCE_DIR) $(RESOURCE_DIR) --no-progress

fetch-transformed-s3::
aws s3 sync s3://collection-dataset/$(REPOSITORY)/$(ISSUE_DIR) $(ISSUE_DIR) --no-progress
aws s3 sync s3://collection-dataset/$(REPOSITORY)/$(TRANSFORMED_DIR) $(TRANSFORMED_DIR) --no-progress
aws s3 sync s3://collection-dataset/$(REPOSITORY)/$(DATASET_DIR) $(DATASET_DIR) --no-progress

push-collection-s3::
aws s3 sync $(RESOURCE_DIR) s3://collection-dataset/$(REPOSITORY)/$(RESOURCE_DIR) --no-progress
aws s3 cp $(COLLECTION_DIR)/log.csv s3://collection-dataset/$(REPOSITORY)/$(COLLECTION_DIR) --no-progress
aws s3 cp $(COLLECTION_DIR)/resource.csv s3://collection-dataset/$(REPOSITORY)/$(COLLECTION_DIR) --no-progress
aws s3 cp $(COLLECTION_DIR)/source.csv s3://collection-dataset/$(REPOSITORY)/$(COLLECTION_DIR) --no-progress
aws s3 cp $(COLLECTION_DIR)/endpoint.csv s3://collection-dataset/$(REPOSITORY)/$(COLLECTION_DIR) --no-progress
$(shell_cmd) aws s3 sync $(RESOURCE_DIR) s3://collection-dataset/$(REPOSITORY)/$(RESOURCE_DIR) --no-progress
$(shell_cmd) aws s3 cp $(COLLECTION_DIR)/log.csv s3://collection-dataset/$(REPOSITORY)/$(COLLECTION_DIR) --no-progress
$(shell_cmd) aws s3 cp $(COLLECTION_DIR)/resource.csv s3://collection-dataset/$(REPOSITORY)/$(COLLECTION_DIR) --no-progress
$(shell_cmd) aws s3 cp $(COLLECTION_DIR)/source.csv s3://collection-dataset/$(REPOSITORY)/$(COLLECTION_DIR) --no-progress
$(shell_cmd) aws s3 cp $(COLLECTION_DIR)/endpoint.csv s3://collection-dataset/$(REPOSITORY)/$(COLLECTION_DIR) --no-progress

push-dataset-s3::
@mkdir -p $(TRANSFORMED_DIR)
aws s3 sync $(TRANSFORMED_DIR) s3://collection-dataset/$(REPOSITORY)/$(TRANSFORMED_DIR) --no-progress
@mkdir -p $(ISSUE_DIR)
aws s3 sync $(ISSUE_DIR) s3://collection-dataset/$(REPOSITORY)/$(ISSUE_DIR) --no-progress
@mkdir -p $(DATASET_DIR)
aws s3 sync $(DATASET_DIR) s3://collection-dataset/$(REPOSITORY)/$(DATASET_DIR) --no-progress
@$(shell_cmd) mkdir -p $(TRANSFORMED_DIR)
$(shell_cmd) aws s3 sync $(TRANSFORMED_DIR) s3://collection-dataset/$(REPOSITORY)/$(TRANSFORMED_DIR) --no-progress
@$(shell_cmd) mkdir -p $(ISSUE_DIR)
$(shell_cmd) aws s3 sync $(ISSUE_DIR) s3://collection-dataset/$(REPOSITORY)/$(ISSUE_DIR) --no-progress
@$(shell_cmd) mkdir -p $(DATASET_DIR)
$(shell_cmd) aws s3 sync $(DATASET_DIR) s3://collection-dataset/$(REPOSITORY)/$(DATASET_DIR) --no-progress
endif

pipeline-run::
aws batch submit-job --job-name $(REPOSITORY)-$(shell date '+%Y-%m-%d-%H-%M-%S') --job-queue dl-batch-queue --job-definition dl-batch-def --container-overrides '{"environment": [{"name":"BATCH_FILE_URL","value":"https://raw.githubusercontent.com/digital-land/docker-builds/main/pipeline_run.sh"}, {"name" : "REPOSITORY","value" : "$(REPOSITORY)"}]}'
Expand Down
7 changes: 4 additions & 3 deletions render.mk
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ ifeq ($(VIEW_MODEL),)
VIEW_MODEL=$(DATASET_DIR)view_model.sqlite3
endif

include makerules/docker.mk

TEMPLATE_FILES=$(wildcard templates/*)

Expand All @@ -53,7 +54,7 @@ render:: $(TEMPLATE_FILES) $(SPECIFICATION_FILES) $(DATASET_FILES) $(DATASET_PAT
ifneq ($(RENDER_COMMAND),)
$(RENDER_COMMAND)
else
digital-land --pipeline-name $(DATASET) render --dataset-path $(DATASET_PATH) $(RENDER_FLAGS)
$(digital-land) --pipeline-name $(DATASET) render --dataset-path $(DATASET_PATH) $(RENDER_FLAGS)
endif
@touch ./docs/.nojekyll

Expand All @@ -62,10 +63,10 @@ server:
cd docs && python3 -m http.server

clobber clean:: clobber-dataset clobber-docs

clobber-dataset::
rm -rf $(DATASET_PATH)

clobber-docs::
rm -rf $(DOCS_DIR)

Expand Down