diff --git a/collection.mk b/collection.mk index e4cf268..7bf0908 100644 --- a/collection.mk +++ b/collection.mk @@ -4,6 +4,8 @@ commit-collection\ clobber-today +include makerules/docker.mk + ifeq ($(COLLECTION_DIR),) COLLECTION_DIR=collection/ endif @@ -35,10 +37,10 @@ first-pass:: collect second-pass:: collection collect:: $(SOURCE_CSV) $(ENDPOINT_CSV) - digital-land collect $(ENDPOINT_CSV) + $(digital-land) collect $(ENDPOINT_CSV) --collection-dir $(COLLECTION_DIR) collection:: - digital-land collection-save-csv + $(digital-land) collection-save-csv --collection-dir $(COLLECTION_DIR) clobber-today:: rm -rf $(LOG_FILES_TODAY) $(COLLECTION_INDEX) @@ -46,12 +48,20 @@ clobber-today:: makerules:: curl -qfsL '$(SOURCE_URL)/makerules/main/collection.mk' > makerules/collection.mk +# These will run as usual if DEVELOPMENT isn't explicitly set to 1 +ifeq ($(DEVELOPMENT),0) +commit-dataset:: + mkdir -p $(DATASET_DIRS) + git add $(DATASET_DIRS) + git diff --quiet && git diff --staged --quiet || (git commit -m "Data $(shell date +%F)"; git push origin $(BRANCH)) + commit-collection:: git add collection git diff --quiet && git diff --staged --quiet || (git commit -m "Collection $(shell date +%F)"; git push origin $(BRANCH)) save-resources:: aws s3 sync s3://collection-dataset/$(REPOSITORY)/$(RESOURCE_DIR) $(RESOURCE_DIR) +endif load-resources:: aws s3 sync $(RESOURCE_DIR) s3://collection-dataset/$(REPOSITORY)/$(RESOURCE_DIR) diff --git a/docker.mk b/docker.mk new file mode 100644 index 0000000..1e5e95f --- /dev/null +++ b/docker.mk @@ -0,0 +1,89 @@ +ifneq ($(DOCKERISED),1) +DOCKERISED = 0 +DEVELOPMENT = 0 +else +# Run in development mode by default for now +ifeq ($(DEVELOPMENT),0) +DEVELOPMENT = 0 +else +DEVELOPMENT = 1 +endif +endif +$(info DOCKERISED is $(DOCKERISED)) +$(info DEVELOPMENT is $(DEVELOPMENT)) + +EXTRA_MOUNTS := +EXTRA_DL_ARGS := +# ifeq ($(and $(DOCKERISED),$(DEVELOPMENT))) +ifeq ($(DOCKERISED),1) +ifeq ($(DEVELOPMENT),1) +EXTRA_MOUNTS += -v $(PWD)/local_collection/collection/log:/pipeline/collection/log +EXTRA_MOUNTS += -v $(PWD)/local_collection/collection/resource:/pipeline/collection/resource +ifneq (,$(wildcard ./fixed)) +EXTRA_MOUNTS += -v $(PWD)/local_collection/fixed:/pipeline/fixed +endif +ifneq (,$(wildcard ./harmonised)) +EXTRA_MOUNTS += -v $(PWD)/local_collection/harmonised:/pipeline/harmonised +endif +ifneq (,$(wildcard ./harmonised)) +EXTRA_MOUNTS += -v $(PWD)/local_collection/transformed:/pipeline/transformed +endif + +ifdef ($(LOCAL_SPECIFICATION_PATH),) +EXTRA_MOUNTS += -v $(LOCAL_SPECIFICATION_PATH)/specification:/collection/specification +else ifeq ($(LOCAL_SPECIFICATION),1) +EXTRA_MOUNTS += -v $(PWD)/../specification/specification:/collection/specification +endif + +ifdef ($(LOCAL_DL_PYTHON_PATH),) +EXTRA_MOUNTS += -v $(LOCAL_DL_PYTHON_PATH):/Src +else ifeq ($(LOCAL_DL_PYTHON),1) +EXTRA_MOUNTS += -v $(PWD)/../digital-land-python:/src +endif + +endif +$(info EXTRA_MOUNTS is $(EXTRA_MOUNTS)) + +DOCKER_TAG=latest +ECR_URL=public.ecr.aws/l6z6v3j6/ + +EXTRA_DL_ARGS += --specification-dir /collection/specification + +/pipeline/collection/resource.csv: + +/pipeline/collection/source.csv: + +/pipeline/collection/endpoint.csv: + +dockerised = docker run -t \ + -e LOCAL_USER_ID=$(shell id -u) \ + -e AWS_ACCESS_KEY_ID \ + -e AWS_DEFAULT_REGION \ + -e AWS_REGION \ + -e AWS_SECRET_ACCESS_KEY \ + -e AWS_SECURITY_TOKEN \ + -e AWS_SESSION_EXPIRATION \ + -e AWS_SESSION_TOKEN \ + -v $(PWD):/pipeline \ + $(EXTRA_MOUNTS) \ + $(ECR_URL)digital-land-python:$(DOCKER_TAG) + +shell_cmd = $(dockerised) bash + +digital-land = $(dockerised) \ + digital-land \ + $(EXTRA_DL_ARGS) + +docker-pull:: +ifndef ($(DISABLE_DOCKER_PULL),) + docker pull $(ECR_URL)digital-land-python:$(DOCKER_TAG) +endif + +init:: docker-pull +else +shell_cmd = $(SHELL) +digital-land = digital-land +endif + +debug_shell: + $(shell_cmd) diff --git a/makerules.mk b/makerules.mk index 8b0a7cb..15c8861 100644 --- a/makerules.mk +++ b/makerules.mk @@ -1,5 +1,6 @@ SOURCE_URL=https://raw.githubusercontent.com/digital-land/ +include makerules/docker.mk # deduce the repository ifeq ($(REPOSITORY),) REPOSITORY=$(shell basename -s .git `git config --get remote.origin.url`) @@ -58,7 +59,7 @@ second-pass:: @: # initialise -ifeq (,$(wildcard /.dockerenv )) +ifeq ($(DOCKERISED),0) init:: pip install --upgrade pip ifneq (,$(wildcard requirements.txt)) diff --git a/pipeline.mk b/pipeline.mk index 801ba59..748c5fa 100644 --- a/pipeline.mk +++ b/pipeline.mk @@ -3,6 +3,8 @@ dataset\ commit-dataset +include makerules/docker.mk + # data sources # collected resources ifeq ($(COLLECTION_DIR),) @@ -53,13 +55,13 @@ endif define run-pipeline = mkdir -p $(@D) $(ISSUE_DIR)$(notdir $(@D)) - digital-land --pipeline-name $(notdir $(@D)) $(DIGITAL_LAND_FLAGS) pipeline --issue-dir $(ISSUE_DIR)$(notdir $(@D)) $(PIPELINE_FLAGS) $< $@ + $(digital-land) --pipeline-name $(notdir $(@D)) $(DIGITAL_LAND_FLAGS) pipeline --issue-dir $(ISSUE_DIR)$(notdir $(@D)) $(PIPELINE_FLAGS) $< $@ endef define build-dataset = mkdir -p $(@D) - time digital-land --pipeline-name $(notdir $(basename $@)) load-entries --output-path $(basename $@).sqlite3 $(^) - time digital-land --pipeline-name $(notdir $(basename $@)) build-dataset $(basename $@).sqlite3 $@ + time $(digital-land) --pipeline-name $(notdir $(basename $@)) load-entries --output-path $(basename $@).sqlite3 $(^) + time $(digital-land) --pipeline-name $(notdir $(basename $@)) build-dataset $(basename $@).sqlite3 $@ endef collection:: collection/pipeline.mk @@ -67,7 +69,7 @@ collection:: collection/pipeline.mk -include collection/pipeline.mk collection/pipeline.mk: collection/resource.csv collection/source.csv - digital-land collection-pipeline-makerules > collection/pipeline.mk + $(shell_cmd) -c "digital-land $(EXTRA_DL_ARGS) collection-pipeline-makerules > collection/pipeline.mk" # restart the make process to pick-up collected resource files second-pass:: @@ -86,46 +88,49 @@ endif endif clobber:: - rm -rf $(TRANSFORMED_DIR) $(ISSUE_DIR) $(DATASET_DIR) + $(shell_cmd) rm -rf $(TRANSFORMED_DIR) $(ISSUE_DIR) $(DATASET_DIR) clean:: - rm -rf ./var + $(shell_cmd) rm -rf ./var # local copies of the organisation dataset needed by harmonise init:: @mkdir -p $(CACHE_DIR) - curl -qfs "https://raw.githubusercontent.com/digital-land/organisation-dataset/main/collection/organisation.csv" > $(CACHE_DIR)organisation.csv + $(shell_cmd) curl -qfs "https://raw.githubusercontent.com/digital-land/organisation-dataset/main/collection/organisation.csv" > $(CACHE_DIR)organisation.csv makerules:: - curl -qfsL '$(SOURCE_URL)/makerules/main/pipeline.mk' > makerules/pipeline.mk + $(shell_cmd) curl -qfsL '$(SOURCE_URL)/makerules/main/pipeline.mk' > makerules/pipeline.mk + +fetch-s3:: + $(shell_cmd) aws s3 sync s3://collection-dataset/$(REPOSITORY)/$(RESOURCE_DIR) $(RESOURCE_DIR) --no-progress + +fetch-transformed-s3:: + $(shell_cmd) aws s3 sync s3://collection-dataset/$(REPOSITORY)/$(ISSUE_DIR) $(ISSUE_DIR) --no-progress + $(shell_cmd) aws s3 sync s3://collection-dataset/$(REPOSITORY)/$(TRANSFORMED_DIR) $(TRANSFORMED_DIR) --no-progress + $(shell_cmd) aws s3 sync s3://collection-dataset/$(REPOSITORY)/$(DATASET_DIR) $(DATASET_DIR) --no-progress +# These will run as usual if DEVELOPMENT isn't explicitly set to 1 +ifeq ($(DEVELOPMENT),0) commit-dataset:: mkdir -p $(DATASET_DIRS) git add $(DATASET_DIRS) git diff --quiet && git diff --staged --quiet || (git commit -m "Data $(shell date +%F)"; git push origin $(BRANCH)) -fetch-s3:: - aws s3 sync s3://collection-dataset/$(REPOSITORY)/$(RESOURCE_DIR) $(RESOURCE_DIR) --no-progress - -fetch-transformed-s3:: - aws s3 sync s3://collection-dataset/$(REPOSITORY)/$(ISSUE_DIR) $(ISSUE_DIR) --no-progress - aws s3 sync s3://collection-dataset/$(REPOSITORY)/$(TRANSFORMED_DIR) $(TRANSFORMED_DIR) --no-progress - aws s3 sync s3://collection-dataset/$(REPOSITORY)/$(DATASET_DIR) $(DATASET_DIR) --no-progress - push-collection-s3:: - aws s3 sync $(RESOURCE_DIR) s3://collection-dataset/$(REPOSITORY)/$(RESOURCE_DIR) --no-progress - aws s3 cp $(COLLECTION_DIR)/log.csv s3://collection-dataset/$(REPOSITORY)/$(COLLECTION_DIR) --no-progress - aws s3 cp $(COLLECTION_DIR)/resource.csv s3://collection-dataset/$(REPOSITORY)/$(COLLECTION_DIR) --no-progress - aws s3 cp $(COLLECTION_DIR)/source.csv s3://collection-dataset/$(REPOSITORY)/$(COLLECTION_DIR) --no-progress - aws s3 cp $(COLLECTION_DIR)/endpoint.csv s3://collection-dataset/$(REPOSITORY)/$(COLLECTION_DIR) --no-progress + $(shell_cmd) aws s3 sync $(RESOURCE_DIR) s3://collection-dataset/$(REPOSITORY)/$(RESOURCE_DIR) --no-progress + $(shell_cmd) aws s3 cp $(COLLECTION_DIR)/log.csv s3://collection-dataset/$(REPOSITORY)/$(COLLECTION_DIR) --no-progress + $(shell_cmd) aws s3 cp $(COLLECTION_DIR)/resource.csv s3://collection-dataset/$(REPOSITORY)/$(COLLECTION_DIR) --no-progress + $(shell_cmd) aws s3 cp $(COLLECTION_DIR)/source.csv s3://collection-dataset/$(REPOSITORY)/$(COLLECTION_DIR) --no-progress + $(shell_cmd) aws s3 cp $(COLLECTION_DIR)/endpoint.csv s3://collection-dataset/$(REPOSITORY)/$(COLLECTION_DIR) --no-progress push-dataset-s3:: - @mkdir -p $(TRANSFORMED_DIR) - aws s3 sync $(TRANSFORMED_DIR) s3://collection-dataset/$(REPOSITORY)/$(TRANSFORMED_DIR) --no-progress - @mkdir -p $(ISSUE_DIR) - aws s3 sync $(ISSUE_DIR) s3://collection-dataset/$(REPOSITORY)/$(ISSUE_DIR) --no-progress - @mkdir -p $(DATASET_DIR) - aws s3 sync $(DATASET_DIR) s3://collection-dataset/$(REPOSITORY)/$(DATASET_DIR) --no-progress + @$(shell_cmd) mkdir -p $(TRANSFORMED_DIR) + $(shell_cmd) aws s3 sync $(TRANSFORMED_DIR) s3://collection-dataset/$(REPOSITORY)/$(TRANSFORMED_DIR) --no-progress + @$(shell_cmd) mkdir -p $(ISSUE_DIR) + $(shell_cmd) aws s3 sync $(ISSUE_DIR) s3://collection-dataset/$(REPOSITORY)/$(ISSUE_DIR) --no-progress + @$(shell_cmd) mkdir -p $(DATASET_DIR) + $(shell_cmd) aws s3 sync $(DATASET_DIR) s3://collection-dataset/$(REPOSITORY)/$(DATASET_DIR) --no-progress +endif pipeline-run:: aws batch submit-job --job-name $(REPOSITORY)-$(shell date '+%Y-%m-%d-%H-%M-%S') --job-queue dl-batch-queue --job-definition dl-batch-def --container-overrides '{"environment": [{"name":"BATCH_FILE_URL","value":"https://raw.githubusercontent.com/digital-land/docker-builds/main/pipeline_run.sh"}, {"name" : "REPOSITORY","value" : "$(REPOSITORY)"}]}' diff --git a/render.mk b/render.mk index 7dc304c..fc68053 100644 --- a/render.mk +++ b/render.mk @@ -33,6 +33,7 @@ ifeq ($(VIEW_MODEL),) VIEW_MODEL=$(DATASET_DIR)view_model.sqlite3 endif +include makerules/docker.mk TEMPLATE_FILES=$(wildcard templates/*) @@ -53,7 +54,7 @@ render:: $(TEMPLATE_FILES) $(SPECIFICATION_FILES) $(DATASET_FILES) $(DATASET_PAT ifneq ($(RENDER_COMMAND),) $(RENDER_COMMAND) else - digital-land --pipeline-name $(DATASET) render --dataset-path $(DATASET_PATH) $(RENDER_FLAGS) + $(digital-land) --pipeline-name $(DATASET) render --dataset-path $(DATASET_PATH) $(RENDER_FLAGS) endif @touch ./docs/.nojekyll @@ -62,10 +63,10 @@ server: cd docs && python3 -m http.server clobber clean:: clobber-dataset clobber-docs - + clobber-dataset:: rm -rf $(DATASET_PATH) - + clobber-docs:: rm -rf $(DOCS_DIR)