From d8e8324d6da1088400ad20da2d7818ed2ce83c94 Mon Sep 17 00:00:00 2001 From: Tom Wier Date: Thu, 21 Nov 2024 12:52:43 +0300 Subject: [PATCH 01/25] fix(#182): add dbt-local-path --- dbt/dbt-run.py | 5 +++++ docker-compose.local.yml | 43 ++++++++++++++++++++++++++++++++++++++++ env.template | 3 +++ 3 files changed, 51 insertions(+) create mode 100644 docker-compose.local.yml diff --git a/dbt/dbt-run.py b/dbt/dbt-run.py index 7906e94..f5e9f2e 100644 --- a/dbt/dbt-run.py +++ b/dbt/dbt-run.py @@ -61,6 +61,11 @@ def get_package(): "revision": init_package.fragment }]}) + if os.getenv("DBT_LOCAL_PATH"): + package_json = json.dumps({"packages": [{ + "local": '/dbt/local/' + }]}) + with open("/dbt/packages.yml", "w") as f: f.write(package_json) diff --git a/docker-compose.local.yml b/docker-compose.local.yml new file mode 100644 index 0000000..fa62659 --- /dev/null +++ b/docker-compose.local.yml @@ -0,0 +1,43 @@ +name: ${COMPOSE_PROJECT_NAME:-cht-sync} +services: + couch2pg: + build: ./couch2pg/ + extra_hosts: + - "host.docker.internal:host-gateway" + logging: + driver: "json-file" + options: + max-size: "512m" + environment: + - COUCHDB_USER=${COUCHDB_USER} + - COUCHDB_PASSWORD=${COUCHDB_PASSWORD} + - COUCHDB_HOST=${COUCHDB_HOST} + - COUCHDB_DBS=${COUCHDB_DBS} + - COUCHDB_PORT=${COUCHDB_PORT} + - COUCHDB_SECURE=${COUCHDB_SECURE:-true} + - POSTGRES_USER=${POSTGRES_USER} + - POSTGRES_PASSWORD=${POSTGRES_PASSWORD} + - POSTGRES_HOST=${POSTGRES_HOST} + - POSTGRES_DB=${POSTGRES_DB} + - POSTGRES_PORT=5432 + - POSTGRES_SCHEMA=${POSTGRES_SCHEMA} + - POSTGRES_TABLE=${POSTGRES_TABLE} + restart: always + + dbt: + build: ./dbt/ + working_dir: /dbt/ + volumes: + - "${DBT_LOCAL_PATH:-dbt}:/dbt/local/" + environment: + - POSTGRES_HOST=${POSTGRES_HOST} + - POSTGRES_USER=${POSTGRES_USER} + - POSTGRES_PASSWORD=${POSTGRES_PASSWORD} + - POSTGRES_DB=${POSTGRES_DB} + - POSTGRES_TABLE=${POSTGRES_TABLE} + - POSTGRES_SCHEMA=${POSTGRES_SCHEMA} + - ROOT_POSTGRES_SCHEMA=${POSTGRES_SCHEMA} + - CHT_PIPELINE_BRANCH_URL=${CHT_PIPELINE_BRANCH_URL} + - DATAEMON_INTERVAL=${DATAEMON_INTERVAL} + - DBT_PACKAGE_TARBALL_URL=${DBT_PACKAGE_TARBALL_URL} + - DBT_LOCAL_PATH=${DBT_LOCAL_PATH} diff --git a/env.template b/env.template index eb25151..96206c6 100644 --- a/env.template +++ b/env.template @@ -11,6 +11,9 @@ POSTGRES_PORT=5432 CHT_PIPELINE_BRANCH_URL="https://github.com/medic/cht-pipeline.git#main" DATAEMON_INTERVAL=5 +# if running locally, path to pipeline +DBT_LOCAL_PATH="/path/to/cht-pipeline/" + # couchdb COUCHDB_USER=medic COUCHDB_PASSWORD=password From c7bf41f12c3a72370cb3caf2d440e7dedb4c99a6 Mon Sep 17 00:00:00 2001 From: Tom Wier Date: Thu, 21 Nov 2024 13:01:41 +0300 Subject: [PATCH 02/25] fix(#182): changing path name --- dbt/dbt-run.py | 2 +- docker-compose.local.yml | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/dbt/dbt-run.py b/dbt/dbt-run.py index f5e9f2e..8f30dd1 100644 --- a/dbt/dbt-run.py +++ b/dbt/dbt-run.py @@ -63,7 +63,7 @@ def get_package(): if os.getenv("DBT_LOCAL_PATH"): package_json = json.dumps({"packages": [{ - "local": '/dbt/local/' + "local": '/dbt/package/' }]}) with open("/dbt/packages.yml", "w") as f: diff --git a/docker-compose.local.yml b/docker-compose.local.yml index fa62659..cae6b09 100644 --- a/docker-compose.local.yml +++ b/docker-compose.local.yml @@ -28,7 +28,7 @@ services: build: ./dbt/ working_dir: /dbt/ volumes: - - "${DBT_LOCAL_PATH:-dbt}:/dbt/local/" + - "${DBT_LOCAL_PATH}:/dbt/package/" environment: - POSTGRES_HOST=${POSTGRES_HOST} - POSTGRES_USER=${POSTGRES_USER} @@ -37,7 +37,5 @@ services: - POSTGRES_TABLE=${POSTGRES_TABLE} - POSTGRES_SCHEMA=${POSTGRES_SCHEMA} - ROOT_POSTGRES_SCHEMA=${POSTGRES_SCHEMA} - - CHT_PIPELINE_BRANCH_URL=${CHT_PIPELINE_BRANCH_URL} - DATAEMON_INTERVAL=${DATAEMON_INTERVAL} - - DBT_PACKAGE_TARBALL_URL=${DBT_PACKAGE_TARBALL_URL} - DBT_LOCAL_PATH=${DBT_LOCAL_PATH} From 6e546621ffd6c7dc6f1ae18d249aa2cc5be0895c Mon Sep 17 00:00:00 2001 From: Tom Wier Date: Thu, 21 Nov 2024 13:18:30 +0300 Subject: [PATCH 03/25] chore(#182): changing tests to use local path --- package.json | 6 +++--- tests/.e2e-env | 2 +- tests/dbt/Dockerfile | 15 --------------- tests/dbt/docker-compose.yml | 5 ----- tests/dbt/nginx.conf | 21 --------------------- 5 files changed, 4 insertions(+), 45 deletions(-) delete mode 100644 tests/dbt/Dockerfile delete mode 100644 tests/dbt/docker-compose.yml delete mode 100644 tests/dbt/nginx.conf diff --git a/package.json b/package.json index d3ecf66..8558d9b 100644 --- a/package.json +++ b/package.json @@ -5,10 +5,10 @@ "main": "", "scripts": { "postinstall": "cd couch2pg && npm ci", - "test:e2e": "npm run test:e2e-data && npm run test:e2e-containers && mocha tests/**/*.spec.js --timeout 50000; npm run test:e2e-stop-containers ", + "test:e2e": "npm run test:e2e-data && npm run test:e2e-containers && mocha tests/**/*.spec.js --timeout 50000 && npm run test:e2e-stop-containers ", "lint": "eslint --color --cache .", - "test:e2e-stop-containers": "docker compose --env-file ./tests/.e2e-env -f docker-compose.yml -f docker-compose.couchdb.yml -f docker-compose.postgres.yml down -v", - "test:e2e-containers": "docker compose --env-file ./tests/.e2e-env -f docker-compose.yml -f docker-compose.couchdb.yml -f docker-compose.postgres.yml -f tests/dbt/docker-compose.yml up -d --build --force-recreate", + "test:e2e-stop-containers": "docker compose --env-file ./tests/.e2e-env -f docker-compose.local.yml -f docker-compose.couchdb.yml -f docker-compose.postgres.yml down -v", + "test:e2e-containers": "docker compose --env-file ./tests/.e2e-env -f docker-compose.local.yml -f docker-compose.couchdb.yml -f docker-compose.postgres.yml up -d --build --force-recreate", "test:e2e-data": "cd tests/data && rm -rf ./json_docs && cht csv-to-docs", "test": "cd couch2pg && npm run test" }, diff --git a/tests/.e2e-env b/tests/.e2e-env index 9d57327..51f0892 100644 --- a/tests/.e2e-env +++ b/tests/.e2e-env @@ -7,7 +7,7 @@ DBT_POSTGRES_USER="postgres" DBT_POSTGRES_PASSWORD="postgres" DBT_POSTGRES_SCHEMA="dbt" DBT_POSTGRES_HOST="postgres" -DBT_PACKAGE_TARBALL_URL="http://dbt-package/dbt/package.tar.gz" +DBT_LOCAL_PATH="./tests/dbt/package" DATAEMON_INTERVAL=0 COUCHDB_USER="medic" COUCHDB_PASSWORD="password" diff --git a/tests/dbt/Dockerfile b/tests/dbt/Dockerfile deleted file mode 100644 index 5d90812..0000000 --- a/tests/dbt/Dockerfile +++ /dev/null @@ -1,15 +0,0 @@ -FROM nginx:1.25.1-alpine as base_nginx - -RUN apk add --update --no-cache \ - curl \ - socat \ - sed \ - bash \ - tar - -WORKDIR /dbt - -COPY nginx.conf /etc/nginx/nginx.conf -COPY package ./package -RUN tar -czvf ./package.tar.gz ./package - diff --git a/tests/dbt/docker-compose.yml b/tests/dbt/docker-compose.yml deleted file mode 100644 index 32f0378..0000000 --- a/tests/dbt/docker-compose.yml +++ /dev/null @@ -1,5 +0,0 @@ -services: - dbt-package: - build: ./tests/dbt/ - ports: - - "10080:80" diff --git a/tests/dbt/nginx.conf b/tests/dbt/nginx.conf deleted file mode 100644 index 2e743aa..0000000 --- a/tests/dbt/nginx.conf +++ /dev/null @@ -1,21 +0,0 @@ -user nginx; -worker_processes auto; - -events { - worker_connections 10240; -} - -http { - sendfile on; - keepalive_timeout 3600; - - server { - listen 80; - listen [::]:80; - - root /; - location / { - try_files $uri $uri/ =404; - } - } -} From 6f7fc59d7f308c8520261b475c8e22fcf143d4a5 Mon Sep 17 00:00:00 2001 From: Tom Wier Date: Wed, 15 Jan 2025 13:22:20 +0300 Subject: [PATCH 04/25] fix: adding dbt-thread-count to local docker-compose --- docker-compose.local.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/docker-compose.local.yml b/docker-compose.local.yml index cae6b09..202a249 100644 --- a/docker-compose.local.yml +++ b/docker-compose.local.yml @@ -39,3 +39,4 @@ services: - ROOT_POSTGRES_SCHEMA=${POSTGRES_SCHEMA} - DATAEMON_INTERVAL=${DATAEMON_INTERVAL} - DBT_LOCAL_PATH=${DBT_LOCAL_PATH} + - DBT_THREAD_COUNT=${DBT_THREAD_COUNT} From 4fc4018d12367b159c18100aaf4c0696638fa3b6 Mon Sep 17 00:00:00 2001 From: Tom Wier Date: Wed, 15 Jan 2025 14:32:04 +0300 Subject: [PATCH 05/25] feat: using dbt selectors to test separating dbt runs in k8s --- dbt/dbt-run.py | 7 ++++++- deploy/cht_sync/templates/dbt.yaml | 33 +++++++++++++++++------------- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/dbt/dbt-run.py b/dbt/dbt-run.py index 7906e94..4521a21 100644 --- a/dbt/dbt-run.py +++ b/dbt/dbt-run.py @@ -131,7 +131,12 @@ def update_models(): def run_incremental_models(): # update incremental models (and tables if there are any) - subprocess.run(["dbt", "run", "--profiles-dir", ".dbt", "--exclude", "config.materialized:view"]) + args = ["dbt", "run", "--profiles-dir", ".dbt", "--exclude", "config.materialized:view"] + selector = os.getenv("DBT_SELECTOR") + if selector: + args.append('--select') + args.append(selector) + subprocess.run(args) if __name__ == "__main__": diff --git a/deploy/cht_sync/templates/dbt.yaml b/deploy/cht_sync/templates/dbt.yaml index 8d2151f..05f4404 100644 --- a/deploy/cht_sync/templates/dbt.yaml +++ b/deploy/cht_sync/templates/dbt.yaml @@ -1,3 +1,6 @@ +{{- $dbt_selectors := .Values.dbt_selectors -}} +{{- range $index, $dbt_selector := $dbt_selectors }} +--- apiVersion: apps/v1 kind: Deployment metadata: @@ -15,30 +18,32 @@ spec: automountServiceAccountToken: false containers: - name: dbt - image: medicmobile/dataemon:{{ .Values.image_tag | default "latest" }} + image: medicmobile/dataemon:{{ $.Values.image_tag | default "latest" }} resources: limits: - cpu: {{ (.Values.dbt).cpu_limit | default "500m" }} - memory: {{ (.Values.dbt).memory_limit | default "1Gi" }} + cpu: {{ ($.Values.dbt).cpu_limit | default "500m" }} + memory: {{ ($.Values.dbt).memory_limit | default "1Gi" }} env: - name: POSTGRES_HOST - value: {{ .Values.postgres.host | default "postgres" }} + value: {{ $.Values.postgres.host | default "postgres" }} - name: POSTGRES_USER - value: {{ .Values.postgres.user }} + value: {{ $.Values.postgres.user }} - name: POSTGRES_PORT - value: {{ .Values.postgres.port | default "5432" | quote }} + value: {{ $.Values.postgres.port | default "5432" | quote }} - name: POSTGRES_PASSWORD - value: {{ .Values.postgres.password }} + value: {{ $.Values.postgres.password }} - name: POSTGRES_DB - value: {{ .Values.postgres.db }} + value: {{ $.Values.postgres.db }} - name: POSTGRES_TABLE - value: {{ .Values.postgres.table }} + value: {{ $.Values.postgres.table }} - name: POSTGRES_SCHEMA - value: {{ .Values.postgres.schema }} + value: {{ $.Values.postgres.schema }} - name: ROOT_POSTGRES_SCHEMA - value: {{ .Values.postgres.schema }} + value: {{ $.Values.postgres.schema }} - name: CHT_PIPELINE_BRANCH_URL - value: {{ .Values.cht_pipeline_branch_url }} + value: {{ $.Values.cht_pipeline_branch_url }} - name: DBT_THREAD_COUNT - value: {{ .Values.dbt_thread_count | default "1" | quote }} - + value: {{ $.Values.dbt_thread_count | default "1" | quote }} + - name: DBT_SELECTOR + value: {{ $dbt_selector }} +{{- end }} From e2ee2ffa471d3d861600a799dbddd4d1508c4bea Mon Sep 17 00:00:00 2001 From: Tom Wier Date: Thu, 30 Jan 2025 11:28:12 +0300 Subject: [PATCH 06/25] fix(#172): adding selector to manifest query --- dbt/dbt-run.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/dbt/dbt-run.py b/dbt/dbt-run.py index 4521a21..bd8d7ff 100644 --- a/dbt/dbt-run.py +++ b/dbt/dbt-run.py @@ -6,6 +6,7 @@ from urllib.parse import urlparse +dbt_selector = os.getenv("DBT_SELECTOR") def connection(): for attempt in range(5): @@ -37,7 +38,9 @@ def setup(): CREATE TABLE IF NOT EXISTS {os.getenv('POSTGRES_SCHEMA')}._dataemon ( inserted_on TIMESTAMP DEFAULT NOW(), - packages jsonb, manifest jsonb + packages jsonb, + manifest jsonb, + dbt_selector text ) """) conn.commit() @@ -74,8 +77,9 @@ def get_manifest(): cur.execute(f""" SELECT manifest FROM {os.getenv('POSTGRES_SCHEMA')}._dataemon + WHERE dbt_selector = %s OR (dbt_selector IS NULL AND %s IS NULL) ORDER BY inserted_on DESC - """) + """, (dbt_selector,dbt_selector)) manifest = cur.fetchone() # save to file if found @@ -97,14 +101,14 @@ def save_package_manifest(package_json, manifest_json): with conn.cursor() as cur: # because manifest is large, delete old entries # we only want the current/latest data - cur.execute( - f"DELETE FROM {os.getenv('POSTGRES_SCHEMA')}._dataemon " - ) - cur.execute( - f"INSERT INTO {os.getenv('POSTGRES_SCHEMA')}._dataemon " - "(packages, manifest) VALUES (%s, %s);", - [package_json, manifest_json] - ) + cur.execute(f""" + DELETE FROM {os.getenv('POSTGRES_SCHEMA')}._dataemon + WHERE dbt_selector = %s OR (dbt_selector IS NULL AND %s IS NULL) + """, (dbt_selector,dbt_selector)) + cur.execute(f""" + INSERT INTO {os.getenv('POSTGRES_SCHEMA')}._dataemon + (packages, manifest, dbt_selector) VALUES (%s, %s, %s); + """, (package_json, manifest_json, dbt_selector)) conn.commit() @@ -132,10 +136,9 @@ def update_models(): def run_incremental_models(): # update incremental models (and tables if there are any) args = ["dbt", "run", "--profiles-dir", ".dbt", "--exclude", "config.materialized:view"] - selector = os.getenv("DBT_SELECTOR") - if selector: + if dbt_selector: args.append('--select') - args.append(selector) + args.append(dbt_selector) subprocess.run(args) From 5c872f7bc6934f2940a07bbc956b0da14eceb6cc Mon Sep 17 00:00:00 2001 From: Tom Wier Date: Thu, 30 Jan 2025 12:22:46 +0300 Subject: [PATCH 07/25] fate(#172): adding source column to couch table --- couch2pg/src/importer.js | 21 ++++++++------- couch2pg/src/setup.js | 6 +++++ couch2pg/tests/unit/importer.spec.js | 39 +++++++++++++++++++++------- couch2pg/tests/unit/setup.spec.js | 6 ++++- 4 files changed, 52 insertions(+), 20 deletions(-) diff --git a/couch2pg/src/importer.js b/couch2pg/src/importer.js index 97b1fb6..bad3f11 100644 --- a/couch2pg/src/importer.js +++ b/couch2pg/src/importer.js @@ -15,11 +15,12 @@ const UPDATE_SEQ_STMT = ` SET seq = $1, pending = $2, updated_at = CURRENT_TIMESTAMP WHERE source = $3 `; -const INSERT_DOCS_STMT = `INSERT INTO ${db.postgresTable} (saved_timestamp, _id, _deleted, doc) VALUES`; +const INSERT_DOCS_STMT = `INSERT INTO ${db.postgresTable} (saved_timestamp, _id, _deleted, source, doc) VALUES`; const ON_CONFLICT_STMT = ` ON CONFLICT (_id) DO UPDATE SET saved_timestamp = EXCLUDED.saved_timestamp, _deleted = EXCLUDED._deleted, + source = EXCLUDED.source, doc = EXCLUDED.doc `; @@ -63,7 +64,7 @@ const storeSeq = async (seq, pending, source) => { await client.end(); }; -const buildBulkInsertQuery = (allDocs) => { +const buildBulkInsertQuery = (allDocs, source) => { const now = new Date().toISOString(); let idx = 1; @@ -72,8 +73,8 @@ const buildBulkInsertQuery = (allDocs) => { allDocs.rows.forEach((row) => { removeSecurityDetails(row.doc); - insertStmts.push(`($${idx++}, $${idx++}, $${idx++}, $${idx++})`); - docsToInsert.push(now, row.id, !!row.deleted, sanitise(JSON.stringify(row.doc))); + insertStmts.push(`($${idx++}, $${idx++}, $${idx++}, $${idx++}, $${idx++})`); + docsToInsert.push(now, row.id, !!row.deleted, source, sanitise(JSON.stringify(row.doc))); }); return { @@ -97,7 +98,7 @@ const addDeletesToResult = (deletedDocs, allDocs) => { Downloads all given documents from couchdb and stores them in Postgres, in batches. We presume if a document is on this list it has changed, and thus needs updating. */ -const loadAndStoreDocs = async (couchdb, docsToDownload) => { +const loadAndStoreDocs = async (couchdb, docsToDownload, source) => { if (!docsToDownload.length) { return; } @@ -109,13 +110,13 @@ const loadAndStoreDocs = async (couchdb, docsToDownload) => { const docsToStore = addDeletesToResult(deletedDocs, allDocsResult); - await storeDocs(docsToStore); + await storeDocs(docsToStore, source); }; -const storeDocs = async (allDocsResult) => { +const storeDocs = async (allDocsResult, source) => { let client; try { - const { query, values } = buildBulkInsertQuery(allDocsResult); + const { query, values } = buildBulkInsertQuery(allDocsResult, source); client = await db.getPgClient(); await client.query(query, values); @@ -124,7 +125,7 @@ const storeDocs = async (allDocsResult) => { if (err.code === '40P01') { // deadlock detected await client.end(); - return storeDocs(allDocsResult); + return storeDocs(allDocsResult, source); } throw err; } @@ -154,7 +155,7 @@ const importChangesBatch = async (couchDb, source) => { `in ${dbName}`); console.log(`There are approximately ${pending} changes left in ${dbName}`); - await loadAndStoreDocs(couchDb, changes.results); + await loadAndStoreDocs(couchDb, changes.results, source); await storeSeq(changes.last_seq, pending, source); return changes.results.length; diff --git a/couch2pg/src/setup.js b/couch2pg/src/setup.js index ee154d5..e76213d 100644 --- a/couch2pg/src/setup.js +++ b/couch2pg/src/setup.js @@ -6,6 +6,7 @@ CREATE TABLE IF NOT EXISTS ${db.postgresTable} ( saved_timestamp TIMESTAMP, _id VARCHAR PRIMARY KEY, _deleted BOOLEAN, + source varchar, doc jsonb )`; @@ -25,12 +26,17 @@ const createTimestampIndex = ` CREATE INDEX CONCURRENTLY IF NOT EXISTS saved_timestamp ON ${db.postgresTable}(saved_timestamp); `; +const createSourceIndex = ` +CREATE INDEX CONCURRENTLY IF NOT EXISTS source ON ${db.postgresTable}(source); +`; + export const createDatabase = async () => { const client = await db.getPgClient(); await client.query(createSchema); await client.query(createTable); await client.query(createDeleteIndex); await client.query(createTimestampIndex); + await client.query(createSourceIndex); await client.query(createProgressTable); await client.end(); }; diff --git a/couch2pg/tests/unit/importer.spec.js b/couch2pg/tests/unit/importer.spec.js index b73571d..bbcef6c 100644 --- a/couch2pg/tests/unit/importer.spec.js +++ b/couch2pg/tests/unit/importer.spec.js @@ -24,12 +24,13 @@ const updateSeqMatch = () => ` WHERE source = $3 `; -const insertDocsMatch = () => `INSERT INTO ${db.postgresTable} (saved_timestamp, _id, _deleted, doc) VALUES`; +const insertDocsMatch = () => `INSERT INTO ${db.postgresTable} (saved_timestamp, _id, _deleted, source, doc) VALUES`; const ON_CONFLICT_STMT = ` ON CONFLICT (_id) DO UPDATE SET saved_timestamp = EXCLUDED.saved_timestamp, _deleted = EXCLUDED._deleted, + source = EXCLUDED.source, doc = EXCLUDED.doc `; @@ -156,22 +157,25 @@ describe('importer', () => { expect(couchDb.allDocs.args).to.deep.equal([[{ include_docs: true, keys: ['doc1', 'doc2', 'doc3'] }]]); expect(pgClient.query.withArgs(sinon.match(insertDocsMatch())).args).to.deep.equal([[ - 'INSERT INTO v1.whatever (saved_timestamp, _id, _deleted, doc) VALUES ' + + 'INSERT INTO v1.whatever (saved_timestamp, _id, _deleted, source, doc) VALUES ' + '($1, $2, $3, $4),($5, $6, $7, $8),($9, $10, $11, $12) ' + ON_CONFLICT_STMT, [ now.toISOString(), 'doc1', false, + 'thehost/medic', JSON.stringify(docs[0].doc), now.toISOString(), 'doc2', false, + 'thehost/medic', JSON.stringify(docs[1].doc), now.toISOString(), 'doc3', false, + 'thehost/medic', JSON.stringify(docs[2].doc), ] ]]); @@ -234,64 +238,73 @@ describe('importer', () => { expect(pgClient.query.withArgs(sinon.match(insertDocsMatch())).callCount).to.equal(3); expect(pgClient.query.withArgs(sinon.match(insertDocsMatch())).args[0]).to.deep.equal([ - 'INSERT INTO v1.whatever (saved_timestamp, _id, _deleted, doc) VALUES ' + + 'INSERT INTO v1.whatever (saved_timestamp, _id, _deleted, source, doc) VALUES ' + '($1, $2, $3, $4),($5, $6, $7, $8),($9, $10, $11, $12) ' + ON_CONFLICT_STMT, [ now.toISOString(), iterationOne.docs[0].id, false, + 'thehost/medic', JSON.stringify(iterationOne.docs[0].doc), now.toISOString(), iterationOne.docs[1].id, false, + 'thehost/medic', JSON.stringify(iterationOne.docs[1].doc), now.toISOString(), iterationOne.docs[2].id, false, + 'thehost/medic', JSON.stringify(iterationOne.docs[2].doc), ] ]); expect(pgClient.query.withArgs(sinon.match(insertDocsMatch())).args[1]).to.deep.equal([ - 'INSERT INTO v1.whatever (saved_timestamp, _id, _deleted, doc) VALUES ' + + 'INSERT INTO v1.whatever (saved_timestamp, _id, _deleted, source, doc) VALUES ' + '($1, $2, $3, $4),($5, $6, $7, $8),($9, $10, $11, $12) ' + ON_CONFLICT_STMT, [ now.toISOString(), iterationTwo.docs[0].id, false, + 'thehost/medic', JSON.stringify(iterationTwo.docs[0].doc), now.toISOString(), iterationTwo.docs[1].id, false, + 'thehost/medic', JSON.stringify(iterationTwo.docs[1].doc), now.toISOString(), iterationTwo.docs[2].id, false, + 'thehost/medic', JSON.stringify(iterationTwo.docs[2].doc), ] ]); expect(pgClient.query.withArgs(sinon.match(insertDocsMatch())).args[2]).to.deep.equal([ - 'INSERT INTO v1.whatever (saved_timestamp, _id, _deleted, doc) VALUES ' + + 'INSERT INTO v1.whatever (saved_timestamp, _id, _deleted, source, doc) VALUES ' + '($1, $2, $3, $4),($5, $6, $7, $8),($9, $10, $11, $12) ' + ON_CONFLICT_STMT, [ now.toISOString(), iterationThree.docs[0].id, false, + 'thehost/medic', JSON.stringify(iterationThree.docs[0].doc), now.toISOString(), iterationThree.docs[1].id, false, + 'thehost/medic', JSON.stringify(iterationThree.docs[1].doc), now.toISOString(), iterationThree.docs[2].id, false, + 'thehost/medic', JSON.stringify(iterationThree.docs[2].doc), ] ]); @@ -315,12 +328,13 @@ describe('importer', () => { await importer(couchDb); expect(pgClient.query.withArgs(sinon.match(insertDocsMatch())).args).to.deep.equal([[ - 'INSERT INTO v1.whatever (saved_timestamp, _id, _deleted, doc) VALUES ' + + 'INSERT INTO v1.whatever (saved_timestamp, _id, _deleted, source, doc) VALUES ' + '($1, $2, $3, $4) ' + ON_CONFLICT_STMT, [ new Date().toISOString(), 'change', false, + 'thehost/medic', JSON.stringify({ _id: 'change', _rev: '1', @@ -352,12 +366,13 @@ describe('importer', () => { await importer(couchDb); expect(pgClient.query.withArgs(sinon.match(insertDocsMatch())).args).to.deep.equal([[ - 'INSERT INTO v1.whatever (saved_timestamp, _id, _deleted, doc) VALUES ' + + 'INSERT INTO v1.whatever (saved_timestamp, _id, _deleted, source, doc) VALUES ' + '($1, $2, $3, $4) ' + ON_CONFLICT_STMT, [ new Date().toISOString(), 'change', false, + 'thehost/medic', JSON.stringify({ _id: 'org.couchdb.user:paul', _rev: '1', @@ -392,22 +407,25 @@ describe('importer', () => { expect(couchDb.allDocs.args).to.deep.equal([[{ include_docs: true, keys: ['doc2'] }]]); expect(pgClient.query.withArgs(sinon.match(insertDocsMatch())).args).to.deep.equal([[ - 'INSERT INTO v1.whatever (saved_timestamp, _id, _deleted, doc) VALUES ' + + 'INSERT INTO v1.whatever (saved_timestamp, _id, _deleted, source, doc) VALUES ' + '($1, $2, $3, $4),($5, $6, $7, $8),($9, $10, $11, $12) ' + ON_CONFLICT_STMT, [ now.toISOString(), 'doc2', false, + 'thehost/medic', JSON.stringify(docs[0].doc), now.toISOString(), 'doc1', true, + 'thehost/medic', JSON.stringify({ _id: 'doc1', _rev: 1, _deleted: true }), now.toISOString(), 'doc3', true, + 'thehost/medic', JSON.stringify({ _id: 'doc3', _rev: undefined, _deleted: true }), ] ]]); @@ -565,22 +583,25 @@ describe('importer', () => { expect(couchDb.allDocs.args).to.deep.equal([[{ include_docs: true, keys: ['doc1', 'doc2', 'doc3'] }]]); const queryArgs = [ - 'INSERT INTO v1.whatever (saved_timestamp, _id, _deleted, doc) VALUES ' + + 'INSERT INTO v1.whatever (saved_timestamp, _id, _deleted, source, doc) VALUES ' + '($1, $2, $3, $4),($5, $6, $7, $8),($9, $10, $11, $12) ' + ON_CONFLICT_STMT, [ now.toISOString(), 'doc1', false, + 'thehost/medic', JSON.stringify(docs[0].doc), now.toISOString(), 'doc2', false, + 'thehost/medic', JSON.stringify(docs[1].doc), now.toISOString(), 'doc3', false, + 'thehost/medic', JSON.stringify(docs[2].doc), ] ]; diff --git a/couch2pg/tests/unit/setup.spec.js b/couch2pg/tests/unit/setup.spec.js index 083c37f..c775c3e 100644 --- a/couch2pg/tests/unit/setup.spec.js +++ b/couch2pg/tests/unit/setup.spec.js @@ -32,7 +32,7 @@ describe('setup', () => { await setup.createDatabase(); expect(db.getPgClient.calledOnce).to.equal(true); - expect(pgClient.query.callCount).to.equal(5); + expect(pgClient.query.callCount).to.equal(6); expect(pgClient.end.calledOnce).to.equal(true); expect(pgClient.query.args[0]).to.deep.equal(['CREATE SCHEMA IF NOT EXISTS v1']); expect(pgClient.query.args[1]).to.deep.equal([ ` @@ -40,6 +40,7 @@ CREATE TABLE IF NOT EXISTS v1.whatever ( saved_timestamp TIMESTAMP, _id VARCHAR PRIMARY KEY, _deleted BOOLEAN, + source varchar, doc jsonb )`]); expect(pgClient.query.args[2]).to.deep.equal([` @@ -49,6 +50,9 @@ CREATE INDEX CONCURRENTLY IF NOT EXISTS _deleted ON v1.whatever(_deleted); CREATE INDEX CONCURRENTLY IF NOT EXISTS saved_timestamp ON v1.whatever(saved_timestamp); `]); expect(pgClient.query.args[4]).to.deep.equal([` +CREATE INDEX CONCURRENTLY IF NOT EXISTS source ON v1.whatever(source); +`]); + expect(pgClient.query.args[5]).to.deep.equal([` CREATE TABLE IF NOT EXISTS v1.couchdb_progress ( seq varchar, pending integer, From 8a6feb468abbb26e3bf0781938bad96a165a0b76 Mon Sep 17 00:00:00 2001 From: Tom Wier Date: Thu, 30 Jan 2025 12:41:56 +0300 Subject: [PATCH 08/25] fix(#172): fixing test --- couch2pg/tests/unit/importer.spec.js | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/couch2pg/tests/unit/importer.spec.js b/couch2pg/tests/unit/importer.spec.js index bbcef6c..7496f3e 100644 --- a/couch2pg/tests/unit/importer.spec.js +++ b/couch2pg/tests/unit/importer.spec.js @@ -158,7 +158,7 @@ describe('importer', () => { expect(pgClient.query.withArgs(sinon.match(insertDocsMatch())).args).to.deep.equal([[ 'INSERT INTO v1.whatever (saved_timestamp, _id, _deleted, source, doc) VALUES ' + - '($1, $2, $3, $4),($5, $6, $7, $8),($9, $10, $11, $12) ' + ON_CONFLICT_STMT, + '($1, $2, $3, $4, $5),($6, $7, $8, $9, $10),($11, $12, $13, $14, $15) ' + ON_CONFLICT_STMT, [ now.toISOString(), 'doc1', @@ -239,7 +239,7 @@ describe('importer', () => { expect(pgClient.query.withArgs(sinon.match(insertDocsMatch())).callCount).to.equal(3); expect(pgClient.query.withArgs(sinon.match(insertDocsMatch())).args[0]).to.deep.equal([ 'INSERT INTO v1.whatever (saved_timestamp, _id, _deleted, source, doc) VALUES ' + - '($1, $2, $3, $4),($5, $6, $7, $8),($9, $10, $11, $12) ' + ON_CONFLICT_STMT, + '($1, $2, $3, $4, $5),($6, $7, $8, $9, $10),($11, $12, $13, $14, $15) ' + ON_CONFLICT_STMT, [ now.toISOString(), iterationOne.docs[0].id, @@ -263,7 +263,7 @@ describe('importer', () => { expect(pgClient.query.withArgs(sinon.match(insertDocsMatch())).args[1]).to.deep.equal([ 'INSERT INTO v1.whatever (saved_timestamp, _id, _deleted, source, doc) VALUES ' + - '($1, $2, $3, $4),($5, $6, $7, $8),($9, $10, $11, $12) ' + ON_CONFLICT_STMT, + '($1, $2, $3, $4, $5),($6, $7, $8, $9, $10),($11, $12, $13, $14, $15) ' + ON_CONFLICT_STMT, [ now.toISOString(), iterationTwo.docs[0].id, @@ -287,7 +287,7 @@ describe('importer', () => { expect(pgClient.query.withArgs(sinon.match(insertDocsMatch())).args[2]).to.deep.equal([ 'INSERT INTO v1.whatever (saved_timestamp, _id, _deleted, source, doc) VALUES ' + - '($1, $2, $3, $4),($5, $6, $7, $8),($9, $10, $11, $12) ' + ON_CONFLICT_STMT, + '($1, $2, $3, $4, $5),($6, $7, $8, $9, $10),($11, $12, $13, $14, $15) ' + ON_CONFLICT_STMT, [ now.toISOString(), iterationThree.docs[0].id, @@ -329,7 +329,7 @@ describe('importer', () => { expect(pgClient.query.withArgs(sinon.match(insertDocsMatch())).args).to.deep.equal([[ 'INSERT INTO v1.whatever (saved_timestamp, _id, _deleted, source, doc) VALUES ' + - '($1, $2, $3, $4) ' + ON_CONFLICT_STMT, + '($1, $2, $3, $4, $5) ' + ON_CONFLICT_STMT, [ new Date().toISOString(), 'change', @@ -367,7 +367,7 @@ describe('importer', () => { expect(pgClient.query.withArgs(sinon.match(insertDocsMatch())).args).to.deep.equal([[ 'INSERT INTO v1.whatever (saved_timestamp, _id, _deleted, source, doc) VALUES ' + - '($1, $2, $3, $4) ' + ON_CONFLICT_STMT, + '($1, $2, $3, $4, $5) ' + ON_CONFLICT_STMT, [ new Date().toISOString(), 'change', @@ -408,7 +408,7 @@ describe('importer', () => { expect(pgClient.query.withArgs(sinon.match(insertDocsMatch())).args).to.deep.equal([[ 'INSERT INTO v1.whatever (saved_timestamp, _id, _deleted, source, doc) VALUES ' + - '($1, $2, $3, $4),($5, $6, $7, $8),($9, $10, $11, $12) ' + ON_CONFLICT_STMT, + '($1, $2, $3, $4, $5),($6, $7, $8, $9, $10),($11, $12, $13, $14, $15) ' + ON_CONFLICT_STMT, [ now.toISOString(), 'doc2', @@ -584,7 +584,7 @@ describe('importer', () => { const queryArgs = [ 'INSERT INTO v1.whatever (saved_timestamp, _id, _deleted, source, doc) VALUES ' + - '($1, $2, $3, $4),($5, $6, $7, $8),($9, $10, $11, $12) ' + ON_CONFLICT_STMT, + '($1, $2, $3, $4, $5),($6, $7, $8, $9, $10),($11, $12, $13, $14, $15) ' + ON_CONFLICT_STMT, [ now.toISOString(), 'doc1', From 8731421a87114f733ac514773c463001a0580101 Mon Sep 17 00:00:00 2001 From: Tom Wier Date: Fri, 31 Jan 2025 13:08:17 +0300 Subject: [PATCH 09/25] fix(#172): list affected models --- dbt/dbt-run.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dbt/dbt-run.py b/dbt/dbt-run.py index bd8d7ff..15ce73c 100644 --- a/dbt/dbt-run.py +++ b/dbt/dbt-run.py @@ -88,7 +88,11 @@ def get_manifest(): f.write(json.dumps(manifest[0])); # run dbt ls to make sure current manifest is generated - subprocess.run(["dbt", "ls", "--profiles-dir", ".dbt"]) + args = ["dbt", "ls", "--profiles-dir", ".dbt"] + if dbt_selector: + args.append('--select') + args.append(dbt_selector) + subprocess.run(args) new_manifest = '{}' with open("/dbt/target/manifest.json", "r") as f: From fdc48a7a7398e005fe118c24152c3836a14cf345 Mon Sep 17 00:00:00 2001 From: Tom Wier Date: Fri, 7 Feb 2025 15:49:48 +0300 Subject: [PATCH 10/25] chore(#172): add tests and docker profiles --- docker-compose.yml | 95 +++++++++++++------ package.json | 2 +- .../dbt/package/models/contacts/contacts.yml | 2 + tests/dbt/package/models/reports/reports.yml | 2 + tests/e2e-test.spec.js | 27 ++++++ 5 files changed, 99 insertions(+), 29 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index c39352c..2826f15 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,4 +1,23 @@ name: ${COMPOSE_PROJECT_NAME:-cht-sync} +version: '3.8' + +x-dbt-base: &dbt-common + build: ./dbt/ + working_dir: /dbt/ + environment: &dbt-env + POSTGRES_HOST: ${POSTGRES_HOST} + POSTGRES_PORT: ${POSTGRES_PORT:-5432} + POSTGRES_USER: ${POSTGRES_USER} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} + POSTGRES_DB: ${POSTGRES_DB} + POSTGRES_TABLE: ${POSTGRES_TABLE} + POSTGRES_SCHEMA: ${POSTGRES_SCHEMA} + ROOT_POSTGRES_SCHEMA: ${POSTGRES_SCHEMA} + CHT_PIPELINE_BRANCH_URL: ${CHT_PIPELINE_BRANCH_URL} + DATAEMON_INTERVAL: ${DATAEMON_INTERVAL} + DBT_PACKAGE_TARBALL_URL: ${DBT_PACKAGE_TARBALL_URL} + DBT_THREAD_COUNT: ${DBT_THREAD_COUNT} + services: couch2pg: build: ./couch2pg/ @@ -9,34 +28,54 @@ services: options: max-size: "512m" environment: - - COUCHDB_USER=${COUCHDB_USER} - - COUCHDB_PASSWORD=${COUCHDB_PASSWORD} - - COUCHDB_HOST=${COUCHDB_HOST} - - COUCHDB_DBS=${COUCHDB_DBS} - - COUCHDB_PORT=${COUCHDB_PORT} - - COUCHDB_SECURE=${COUCHDB_SECURE:-true} - - POSTGRES_USER=${POSTGRES_USER} - - POSTGRES_PASSWORD=${POSTGRES_PASSWORD} - - POSTGRES_HOST=${POSTGRES_HOST} - - POSTGRES_DB=${POSTGRES_DB} - - POSTGRES_PORT=${POSTGRES_PORT:-5432} - - POSTGRES_SCHEMA=${POSTGRES_SCHEMA} - - POSTGRES_TABLE=${POSTGRES_TABLE} - restart: always + <<: *dbt-env + COUCHDB_USER: ${COUCHDB_USER} + COUCHDB_PASSWORD: ${COUCHDB_PASSWORD} + COUCHDB_HOST: ${COUCHDB_HOST} + COUCHDB_DBS: ${COUCHDB_DBS} + COUCHDB_PORT: ${COUCHDB_PORT} + COUCHDB_SECURE: ${COUCHDB_SECURE:-true} dbt: - build: ./dbt/ - working_dir: /dbt/ + <<: *dbt-common + profiles: + - default + environment: + <<: *dbt-env + DBT_SELECTOR: '' + + dbt-base: + <<: *dbt-common + profiles: + - base-and-package + environment: + <<: *dbt-env + DBT_SELECTOR: tag:base + + dbt-models: + <<: *dbt-common + profiles: + - base-and-package + depends_on: + - dbt-base + environment: + <<: *dbt-env + DBT_SELECTOR: package:${DBT_PACKAGE_NAME} + + dbt-contacts: + <<: *dbt-common + profiles: + - test + environment: + <<: *dbt-env + DBT_SELECTOR: tag:contacts + + dbt-reports: + <<: *dbt-common + profiles: + - test + depends_on: + - dbt-contacts environment: - - POSTGRES_HOST=${POSTGRES_HOST} - - POSTGRES_PORT=${POSTGRES_PORT:-5432} - - POSTGRES_USER=${POSTGRES_USER} - - POSTGRES_PASSWORD=${POSTGRES_PASSWORD} - - POSTGRES_DB=${POSTGRES_DB} - - POSTGRES_TABLE=${POSTGRES_TABLE} - - POSTGRES_SCHEMA=${POSTGRES_SCHEMA} - - ROOT_POSTGRES_SCHEMA=${POSTGRES_SCHEMA} - - CHT_PIPELINE_BRANCH_URL=${CHT_PIPELINE_BRANCH_URL} - - DATAEMON_INTERVAL=${DATAEMON_INTERVAL} - - DBT_PACKAGE_TARBALL_URL=${DBT_PACKAGE_TARBALL_URL} - - DBT_THREAD_COUNT=${DBT_THREAD_COUNT} + <<: *dbt-env + DBT_SELECTOR: tag:reports diff --git a/package.json b/package.json index 43385ef..bc8c94c 100644 --- a/package.json +++ b/package.json @@ -9,7 +9,7 @@ "test:e2e-mocha": "mocha tests/**/*.spec.js --timeout 70000", "lint": "eslint --color --cache .", "test:e2e-stop-containers": "docker compose --env-file ./tests/.e2e-env -f docker-compose.yml -f docker-compose.couchdb.yml -f docker-compose.postgres.yml -f tests/dbt/docker-compose.yml -f docker-compose.bastion.yml kill && docker compose --env-file ./tests/.e2e-env -f docker-compose.yml -f docker-compose.couchdb.yml -f docker-compose.postgres.yml -f tests/dbt/docker-compose.yml -f docker-compose.bastion.yml down -v", - "test:e2e-containers": "docker compose --env-file ./tests/.e2e-env -f docker-compose.yml -f docker-compose.couchdb.yml -f docker-compose.postgres.yml -f tests/dbt/docker-compose.yml -f docker-compose.bastion.yml up -d --build --force-recreate && npm run wait-for-couchdb", + "test:e2e-containers": "docker compose --profile test --env-file ./tests/.e2e-env -f docker-compose.yml -f docker-compose.couchdb.yml -f docker-compose.postgres.yml -f tests/dbt/docker-compose.yml -f docker-compose.bastion.yml up -d --build --force-recreate && npm run wait-for-couchdb", "test:e2e-data": "cd tests/data && rm -rf ./json_docs && cht csv-to-docs", "test": "cd couch2pg && npm run test", "wait-for-couchdb": "bash -c 'until nc -z localhost 5984; do sleep 1; done; echo \"CouchDB is ready\"'" diff --git a/tests/dbt/package/models/contacts/contacts.yml b/tests/dbt/package/models/contacts/contacts.yml index 0e8881e..9f50338 100644 --- a/tests/dbt/package/models/contacts/contacts.yml +++ b/tests/dbt/package/models/contacts/contacts.yml @@ -3,6 +3,7 @@ version: 2 models: - name: contacts config: + tags: ["contacts"] contract: enforced: true columns: @@ -39,6 +40,7 @@ models: - name: persons config: + tags: ["contacts"] contract: enforced: true columns: diff --git a/tests/dbt/package/models/reports/reports.yml b/tests/dbt/package/models/reports/reports.yml index 53d0686..66d4191 100644 --- a/tests/dbt/package/models/reports/reports.yml +++ b/tests/dbt/package/models/reports/reports.yml @@ -2,6 +2,8 @@ version: 1 models: - name: reports + config: + tags: ["reports"] columns: - name: _id tests: diff --git a/tests/e2e-test.spec.js b/tests/e2e-test.spec.js index e77b31e..e881bc7 100644 --- a/tests/e2e-test.spec.js +++ b/tests/e2e-test.spec.js @@ -328,6 +328,33 @@ describe('Main workflow Test Suite', () => { }); }); + describe('DBT Selector Tests', () => { + it('should maintain separate manifests for different selectors', async () => { + // Test contacts selector + await delay(6); + + const contactsManifest = await client.query( + `SELECT manifest, dbt_selector FROM ${POSTGRES_SCHEMA}._dataemon WHERE dbt_selector = $1`, + ['tag:contacts'] + ); + expect(contactsManifest.rows.length).to.be.greaterThan(0); + expect(contactsManifest.rows[0].dbt_selector).to.equal('tag:contacts'); + + // Test reports selector + await delay(6); + + const reportsManifest = await client.query( + `SELECT manifest, dbt_selector FROM ${POSTGRES_SCHEMA}._dataemon WHERE dbt_selector = $1`, + ['tag:reports'] + ); + expect(reportsManifest.rows.length).to.be.greaterThan(0); + expect(reportsManifest.rows[0].dbt_selector).to.equal('tag:reports'); + + // Verify manifests are different + expect(contactsManifest.rows[0].manifest).to.not.deep.equal(reportsManifest.rows[0].manifest); + }); + }); + describe('Downtime handles', () => { after(async () => { const isAlive = await isPostgresConnectionAlive(client); From 3d6b50ea9616a0713367b74e160e2ca28e4c65fe Mon Sep 17 00:00:00 2001 From: Tom Wier Date: Mon, 10 Feb 2025 11:36:18 +0300 Subject: [PATCH 11/25] fix(#172): adding back restart always --- docker-compose.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/docker-compose.yml b/docker-compose.yml index 2826f15..0977505 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -35,6 +35,7 @@ services: COUCHDB_DBS: ${COUCHDB_DBS} COUCHDB_PORT: ${COUCHDB_PORT} COUCHDB_SECURE: ${COUCHDB_SECURE:-true} + restart: always dbt: <<: *dbt-common From 2ce148b3e8c619cfcf9e7f7d975a2bf2228c83b1 Mon Sep 17 00:00:00 2001 From: Tom Wier Date: Tue, 11 Feb 2025 10:26:01 +0300 Subject: [PATCH 12/25] fix(#172): try increasing timeout to fix test --- tests/e2e-test.spec.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/e2e-test.spec.js b/tests/e2e-test.spec.js index e881bc7..d07c3a7 100644 --- a/tests/e2e-test.spec.js +++ b/tests/e2e-test.spec.js @@ -157,7 +157,7 @@ describe('Main workflow Test Suite', () => { await editDoc({ ...report, edited: 1 }); await editDoc({ ...contact, edited: 1 }); - await delay(6); // wait for CHT-Sync + await delay(24); // wait for CHT-Sync const pgTableDataRecord = await client.query(`SELECT * from ${PGTABLE} where _id = $1`, [report._id]); expect(pgTableDataRecord.rows[0].doc.edited).to.equal(1); From fd8bd63e63db51eb6d7f12368332299d801afe50 Mon Sep 17 00:00:00 2001 From: Tom Wier Date: Tue, 11 Feb 2025 11:56:32 +0300 Subject: [PATCH 13/25] feat(#156): add batch size --- dbt/dbt-run.py | 12 +++++++++++- deploy/cht_sync/templates/dbt.yaml | 2 ++ deploy/cht_sync/values.yaml.template | 1 + docker-compose.yml | 1 + env.template | 3 ++- 5 files changed, 17 insertions(+), 2 deletions(-) diff --git a/dbt/dbt-run.py b/dbt/dbt-run.py index 7906e94..ab91f87 100644 --- a/dbt/dbt-run.py +++ b/dbt/dbt-run.py @@ -131,7 +131,17 @@ def update_models(): def run_incremental_models(): # update incremental models (and tables if there are any) - subprocess.run(["dbt", "run", "--profiles-dir", ".dbt", "--exclude", "config.materialized:view"]) + args = ["dbt", "run", + "--profiles-dir", + ".dbt", + "--exclude", "config.materialized:view"] + + batch_size = int(os.getenv("DBT_BATCH_SIZE") or 0) + if batch_size: + args.append("--vars") + args.append(f'{{batch_size: {batch_size}}}') + + subprocess.run(args) if __name__ == "__main__": diff --git a/deploy/cht_sync/templates/dbt.yaml b/deploy/cht_sync/templates/dbt.yaml index 8d2151f..a7ecacf 100644 --- a/deploy/cht_sync/templates/dbt.yaml +++ b/deploy/cht_sync/templates/dbt.yaml @@ -41,4 +41,6 @@ spec: value: {{ .Values.cht_pipeline_branch_url }} - name: DBT_THREAD_COUNT value: {{ .Values.dbt_thread_count | default "1" | quote }} + - name: DBT_BATCH_SIZE + value: {{ .Values.dbt_batch_size | default "" | quote }} diff --git a/deploy/cht_sync/values.yaml.template b/deploy/cht_sync/values.yaml.template index 9afcb2e..a8b8449 100644 --- a/deploy/cht_sync/values.yaml.template +++ b/deploy/cht_sync/values.yaml.template @@ -11,6 +11,7 @@ postgres: cht_pipeline_branch_url: "https://github.com/medic/cht-pipeline.git#main" dbt_thread_count: 1 +dbt_batch_size: 100000 # values shared by all couchdb instances # can be omitted if couchdb instances do not share any values diff --git a/docker-compose.yml b/docker-compose.yml index c39352c..c6cf242 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -40,3 +40,4 @@ services: - DATAEMON_INTERVAL=${DATAEMON_INTERVAL} - DBT_PACKAGE_TARBALL_URL=${DBT_PACKAGE_TARBALL_URL} - DBT_THREAD_COUNT=${DBT_THREAD_COUNT} + - DBT_BATCH_SIZE=${DBT_BATCH_SIZE} diff --git a/env.template b/env.template index e6aca3f..7e21684 100644 --- a/env.template +++ b/env.template @@ -11,6 +11,7 @@ POSTGRES_PORT=5432 CHT_PIPELINE_BRANCH_URL="https://github.com/medic/cht-pipeline.git#main" DATAEMON_INTERVAL=5 DBT_THREAD_COUNT=1 +DBT_BATCH_SIZE=100000 # couchdb COUCHDB_USER=medic @@ -24,4 +25,4 @@ COUCHDB_SECURE=false #COMPOSE_PROJECT_NAME=cht-sync #BASTION_PORT=22222 # default is 22222 uncomment to change -#BASTION_AUTHORIZED_KEYS_FILE= # uncomment to change \ No newline at end of file +#BASTION_AUTHORIZED_KEYS_FILE= # uncomment to change From 53bd60fb613d7daf15f6b24d90ae160d21d24346 Mon Sep 17 00:00:00 2001 From: Tom Wier Date: Mon, 17 Feb 2025 12:49:53 +0300 Subject: [PATCH 14/25] fix(#182): fix tests using local path --- docker-compose.local.yml | 42 ---------------------------------------- docker-compose.yml | 11 +++++++++-- package.json | 4 ++-- tests/.e2e-env | 11 +++++------ 4 files changed, 16 insertions(+), 52 deletions(-) delete mode 100644 docker-compose.local.yml diff --git a/docker-compose.local.yml b/docker-compose.local.yml deleted file mode 100644 index 202a249..0000000 --- a/docker-compose.local.yml +++ /dev/null @@ -1,42 +0,0 @@ -name: ${COMPOSE_PROJECT_NAME:-cht-sync} -services: - couch2pg: - build: ./couch2pg/ - extra_hosts: - - "host.docker.internal:host-gateway" - logging: - driver: "json-file" - options: - max-size: "512m" - environment: - - COUCHDB_USER=${COUCHDB_USER} - - COUCHDB_PASSWORD=${COUCHDB_PASSWORD} - - COUCHDB_HOST=${COUCHDB_HOST} - - COUCHDB_DBS=${COUCHDB_DBS} - - COUCHDB_PORT=${COUCHDB_PORT} - - COUCHDB_SECURE=${COUCHDB_SECURE:-true} - - POSTGRES_USER=${POSTGRES_USER} - - POSTGRES_PASSWORD=${POSTGRES_PASSWORD} - - POSTGRES_HOST=${POSTGRES_HOST} - - POSTGRES_DB=${POSTGRES_DB} - - POSTGRES_PORT=5432 - - POSTGRES_SCHEMA=${POSTGRES_SCHEMA} - - POSTGRES_TABLE=${POSTGRES_TABLE} - restart: always - - dbt: - build: ./dbt/ - working_dir: /dbt/ - volumes: - - "${DBT_LOCAL_PATH}:/dbt/package/" - environment: - - POSTGRES_HOST=${POSTGRES_HOST} - - POSTGRES_USER=${POSTGRES_USER} - - POSTGRES_PASSWORD=${POSTGRES_PASSWORD} - - POSTGRES_DB=${POSTGRES_DB} - - POSTGRES_TABLE=${POSTGRES_TABLE} - - POSTGRES_SCHEMA=${POSTGRES_SCHEMA} - - ROOT_POSTGRES_SCHEMA=${POSTGRES_SCHEMA} - - DATAEMON_INTERVAL=${DATAEMON_INTERVAL} - - DBT_LOCAL_PATH=${DBT_LOCAL_PATH} - - DBT_THREAD_COUNT=${DBT_THREAD_COUNT} diff --git a/docker-compose.yml b/docker-compose.yml index 0977505..cb71e71 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -13,9 +13,7 @@ x-dbt-base: &dbt-common POSTGRES_TABLE: ${POSTGRES_TABLE} POSTGRES_SCHEMA: ${POSTGRES_SCHEMA} ROOT_POSTGRES_SCHEMA: ${POSTGRES_SCHEMA} - CHT_PIPELINE_BRANCH_URL: ${CHT_PIPELINE_BRANCH_URL} DATAEMON_INTERVAL: ${DATAEMON_INTERVAL} - DBT_PACKAGE_TARBALL_URL: ${DBT_PACKAGE_TARBALL_URL} DBT_THREAD_COUNT: ${DBT_THREAD_COUNT} services: @@ -44,6 +42,7 @@ services: environment: <<: *dbt-env DBT_SELECTOR: '' + CHT_PIPELINE_BRANCH_URL: ${CHT_PIPELINE_BRANCH_URL} dbt-base: <<: *dbt-common @@ -52,6 +51,7 @@ services: environment: <<: *dbt-env DBT_SELECTOR: tag:base + CHT_PIPELINE_BRANCH_URL: ${CHT_PIPELINE_BRANCH_URL} dbt-models: <<: *dbt-common @@ -62,17 +62,23 @@ services: environment: <<: *dbt-env DBT_SELECTOR: package:${DBT_PACKAGE_NAME} + CHT_PIPELINE_BRANCH_URL: ${CHT_PIPELINE_BRANCH_URL} dbt-contacts: <<: *dbt-common + volumes: + - "${DBT_LOCAL_PATH}:/dbt/package/" profiles: - test environment: <<: *dbt-env DBT_SELECTOR: tag:contacts + DBT_LOCAL_PATH: ${DBT_LOCAL_PATH} dbt-reports: <<: *dbt-common + volumes: + - "${DBT_LOCAL_PATH}:/dbt/package/" profiles: - test depends_on: @@ -80,3 +86,4 @@ services: environment: <<: *dbt-env DBT_SELECTOR: tag:reports + DBT_LOCAL_PATH: ${DBT_LOCAL_PATH} diff --git a/package.json b/package.json index bc8c94c..c4fe57f 100644 --- a/package.json +++ b/package.json @@ -8,8 +8,8 @@ "test:e2e": "npm run test:e2e-data && npm run test:e2e-stop-containers && npm run test:e2e-containers && npm run test:e2e-mocha && npm run test:e2e-stop-containers ", "test:e2e-mocha": "mocha tests/**/*.spec.js --timeout 70000", "lint": "eslint --color --cache .", - "test:e2e-stop-containers": "docker compose --env-file ./tests/.e2e-env -f docker-compose.yml -f docker-compose.couchdb.yml -f docker-compose.postgres.yml -f tests/dbt/docker-compose.yml -f docker-compose.bastion.yml kill && docker compose --env-file ./tests/.e2e-env -f docker-compose.yml -f docker-compose.couchdb.yml -f docker-compose.postgres.yml -f tests/dbt/docker-compose.yml -f docker-compose.bastion.yml down -v", - "test:e2e-containers": "docker compose --profile test --env-file ./tests/.e2e-env -f docker-compose.yml -f docker-compose.couchdb.yml -f docker-compose.postgres.yml -f tests/dbt/docker-compose.yml -f docker-compose.bastion.yml up -d --build --force-recreate && npm run wait-for-couchdb", + "test:e2e-stop-containers": "docker compose --env-file ./tests/.e2e-env -f docker-compose.yml -f docker-compose.couchdb.yml -f docker-compose.postgres.yml -f docker-compose.bastion.yml kill && docker compose --env-file ./tests/.e2e-env -f docker-compose.yml -f docker-compose.couchdb.yml -f docker-compose.postgres.yml -f docker-compose.bastion.yml down -v", + "test:e2e-containers": "docker compose --profile test --env-file ./tests/.e2e-env -f docker-compose.yml -f docker-compose.couchdb.yml -f docker-compose.postgres.yml -f docker-compose.bastion.yml up -d --build --force-recreate && npm run wait-for-couchdb", "test:e2e-data": "cd tests/data && rm -rf ./json_docs && cht csv-to-docs", "test": "cd couch2pg && npm run test", "wait-for-couchdb": "bash -c 'until nc -z localhost 5984; do sleep 1; done; echo \"CouchDB is ready\"'" diff --git a/tests/.e2e-env b/tests/.e2e-env index 51f0892..a9af4bf 100644 --- a/tests/.e2e-env +++ b/tests/.e2e-env @@ -1,12 +1,9 @@ POSTGRES_USER="postgres" POSTGRES_PASSWORD="postgres" -POSTGRES_DB="data" -POSTGRES_TABLE="medic" +POSTGRES_DB="cht_sync" +POSTGRES_TABLE="couchdb" POSTGRES_SCHEMA="v1" -DBT_POSTGRES_USER="postgres" -DBT_POSTGRES_PASSWORD="postgres" -DBT_POSTGRES_SCHEMA="dbt" -DBT_POSTGRES_HOST="postgres" +POSTGRES_PORT=5432 DBT_LOCAL_PATH="./tests/dbt/package" DATAEMON_INTERVAL=0 COUCHDB_USER="medic" @@ -16,3 +13,5 @@ COUCHDB_HOST="host.docker.internal" COUCHDB_PORT=5984 COUCHDB_SECURE=false POSTGRES_HOST=postgres +BASTION_AUTHORIZED_KEYS_FILE=./tests/utils/bastion-ssh-key.pub +DBT_THREAD_COUNT=1 From 2f1ad5ad32a1f5125310fdcfabb2f5b7877bd138 Mon Sep 17 00:00:00 2001 From: Tom Wier Date: Wed, 31 Jul 2024 12:10:15 +0300 Subject: [PATCH 15/25] fix(#127): removing unused service and changing app names --- deploy/cht_sync/templates/couch2pg.yaml | 4 ++-- deploy/cht_sync/templates/dbt.yaml | 4 ++-- deploy/cht_sync/templates/postgres-service.yaml | 2 +- deploy/cht_sync/templates/postgres.yaml | 6 ++---- deploy/cht_sync/templates/postgrest-service.yaml | 10 ---------- 5 files changed, 7 insertions(+), 19 deletions(-) delete mode 100644 deploy/cht_sync/templates/postgrest-service.yaml diff --git a/deploy/cht_sync/templates/couch2pg.yaml b/deploy/cht_sync/templates/couch2pg.yaml index 2b2e106..b3c7710 100644 --- a/deploy/cht_sync/templates/couch2pg.yaml +++ b/deploy/cht_sync/templates/couch2pg.yaml @@ -9,11 +9,11 @@ spec: replicas: 1 selector: matchLabels: - app: cht-sync + app: cht-sync-couch2pg-{{ $service.host | replace "." "-" }} template: metadata: labels: - app: cht-sync + app: cht-sync-couch2pg-{{ $service.host | replace "." "-" }} spec: automountServiceAccountToken: false containers: diff --git a/deploy/cht_sync/templates/dbt.yaml b/deploy/cht_sync/templates/dbt.yaml index 44bd48e..ef43cb8 100644 --- a/deploy/cht_sync/templates/dbt.yaml +++ b/deploy/cht_sync/templates/dbt.yaml @@ -9,11 +9,11 @@ spec: replicas: 1 selector: matchLabels: - app: cht-sync + app: cht-sync-dbt template: metadata: labels: - app: cht-sync + app: cht-sync-dbt spec: automountServiceAccountToken: false containers: diff --git a/deploy/cht_sync/templates/postgres-service.yaml b/deploy/cht_sync/templates/postgres-service.yaml index 9e3d862..046d8ff 100644 --- a/deploy/cht_sync/templates/postgres-service.yaml +++ b/deploy/cht_sync/templates/postgres-service.yaml @@ -5,7 +5,7 @@ metadata: name: postgres spec: selector: - inner.service: postgres + app: cht-sync-postgres ports: - protocol: TCP port: {{ .Values.postgres.port | default "5432" }} diff --git a/deploy/cht_sync/templates/postgres.yaml b/deploy/cht_sync/templates/postgres.yaml index 09c7eec..4b77a94 100644 --- a/deploy/cht_sync/templates/postgres.yaml +++ b/deploy/cht_sync/templates/postgres.yaml @@ -10,13 +10,11 @@ spec: replicas: 1 selector: matchLabels: - app: cht-sync - inner.service: postgres + app: cht-sync-postgres template: metadata: labels: - app: cht-sync - inner.service: postgres + app: cht-sync-postgres spec: automountServiceAccountToken: false containers: diff --git a/deploy/cht_sync/templates/postgrest-service.yaml b/deploy/cht_sync/templates/postgrest-service.yaml deleted file mode 100644 index 94ad911..0000000 --- a/deploy/cht_sync/templates/postgrest-service.yaml +++ /dev/null @@ -1,10 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: postgrest -spec: - selector: - inner.service: postgrest - ports: - - protocol: TCP - port: 3000 From 0be1d0255fdb36c95cdb9eaececdcb422160711a Mon Sep 17 00:00:00 2001 From: Tom Wier Date: Tue, 18 Feb 2025 12:37:36 +0300 Subject: [PATCH 16/25] fix: heml chart fixes and inclusions from Moh Ke --- deploy/cht_sync/templates/dbt.yaml | 9 ++++----- deploy/cht_sync/templates/postgres-service.yaml | 3 +++ deploy/cht_sync/templates/postgres.yaml | 6 ++++++ 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/deploy/cht_sync/templates/dbt.yaml b/deploy/cht_sync/templates/dbt.yaml index ef43cb8..ecfac73 100644 --- a/deploy/cht_sync/templates/dbt.yaml +++ b/deploy/cht_sync/templates/dbt.yaml @@ -4,16 +4,16 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: cht-sync-dbt + name: cht-sync-dbt-{{ $index }} spec: replicas: 1 selector: matchLabels: - app: cht-sync-dbt + app: cht-sync-dbt-{{ $index }} template: metadata: labels: - app: cht-sync-dbt + app: cht-sync-dbt-{{ $index }} spec: automountServiceAccountToken: false containers: @@ -46,7 +46,6 @@ spec: value: {{ $.Values.dbt_thread_count | default "1" | quote }} - name: DBT_SELECTOR value: {{ $dbt_selector }} - value: {{ .Values.dbt_thread_count | default "1" | quote }} - name: DBT_BATCH_SIZE - value: {{ .Values.dbt_batch_size | default "" | quote }} + value: {{ $.Values.dbt_batch_size | default "" | quote }} {{- end }} diff --git a/deploy/cht_sync/templates/postgres-service.yaml b/deploy/cht_sync/templates/postgres-service.yaml index 046d8ff..86d8000 100644 --- a/deploy/cht_sync/templates/postgres-service.yaml +++ b/deploy/cht_sync/templates/postgres-service.yaml @@ -10,4 +10,7 @@ spec: - protocol: TCP port: {{ .Values.postgres.port | default "5432" }} targetPort: 5432 + {{- if .Values.postgres.nodePort }} + nodePort: {{ .Values.postgres.nodePort }} + {{- end }} {{- end }} diff --git a/deploy/cht_sync/templates/postgres.yaml b/deploy/cht_sync/templates/postgres.yaml index 4b77a94..0bae577 100644 --- a/deploy/cht_sync/templates/postgres.yaml +++ b/deploy/cht_sync/templates/postgres.yaml @@ -38,8 +38,14 @@ spec: volumeMounts: - name: postgres-data mountPath: /var/lib/postgresql/data + - name: postgres-shm + mountPath: /dev/shm volumes: - name: postgres-data persistentVolumeClaim: claimName: postgres-pvc + - name: postgres-shm + emptyDir: + medium: Memory + sizeLimit: {{ (.Values.postgres).memory_limit | default "2Gi" }} {{- end }} From e1677e0339584316f8333401d0df5d4f60f36443 Mon Sep 17 00:00:00 2001 From: Tom Wier Date: Tue, 18 Feb 2025 14:15:42 +0300 Subject: [PATCH 17/25] chore: adding migrations scripts for v2 --- .../upgrade/add-manifest-and-source.sh | 17 ++++++ deploy/scripts/upgrade/lib/migration.sh | 53 +++++++++++++++++++ .../scripts/upgrade/upgrade-84-monitoring.sh | 51 ++---------------- 3 files changed, 74 insertions(+), 47 deletions(-) create mode 100755 deploy/scripts/upgrade/add-manifest-and-source.sh create mode 100755 deploy/scripts/upgrade/lib/migration.sh diff --git a/deploy/scripts/upgrade/add-manifest-and-source.sh b/deploy/scripts/upgrade/add-manifest-and-source.sh new file mode 100755 index 0000000..7047716 --- /dev/null +++ b/deploy/scripts/upgrade/add-manifest-and-source.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# Source the migration template +SCRIPT_DIR="$(dirname "$(readlink -f "$0")")" +source "$SCRIPT_DIR/lib/migration.sh" + +# SQL statements to execute +SQL_STATEMENTS=( + "ALTER TABLE _dataemon ADD COLUMN IF NOT EXISTS manifest jsonb;" + "ALTER TABLE couchdb ADD COLUMN IF NOT EXISTS source varchar;" + "CREATE INDEX IF NOT EXISTS source ON couchdb(source);" +) + +# Execute each SQL statement +for sql in "${SQL_STATEMENTS[@]}"; do + execute_sql "$sql" +done diff --git a/deploy/scripts/upgrade/lib/migration.sh b/deploy/scripts/upgrade/lib/migration.sh new file mode 100755 index 0000000..9a81eb5 --- /dev/null +++ b/deploy/scripts/upgrade/lib/migration.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +# Check for required arguments +if [ "$#" -lt 3 ] || { [ "$1" == "kubectl" ] && [ "$#" -ne 4 ]; }; then + echo "Usage: $0 [namespace]" + exit 1 +fi + +# Assign arguments to variables +ENVIRONMENT=$1 +USERNAME=$2 +DBNAME=$3 + +# Function to execute SQL statement +execute_sql() { + local sql_statement=$1 + + if [ "$ENVIRONMENT" == "docker" ]; then + # Find the container ID or name automatically + CONTAINER_ID=$(docker ps --filter "name=postgres" --format "{{.ID}}" | head -n 1) + + if [ -z "$CONTAINER_ID" ]; then + echo "No running Postgres container found." + exit 1 + fi + + # Run the SQL statement using docker exec + docker exec "$CONTAINER_ID" psql -U "$USERNAME" "$DBNAME" -c "$sql_statement" + + elif [ "$ENVIRONMENT" == "kubectl" ]; then + # Assign namespace argument + NAMESPACE=$4 + + # Find the Postgres pod automatically + POD_NAME=$(kubectl get pods -n "$NAMESPACE" -l app.name=postgres -o jsonpath="{.items[0].metadata.name}") + + if [ -z "$POD_NAME" ]; then + # Try alternative label + POD_NAME=$(kubectl get pods -n "$NAMESPACE" -l inner.service=postgres -o jsonpath="{.items[0].metadata.name}") + if [ -z "$POD_NAME" ]; then + echo "No running Postgres pod found in namespace $NAMESPACE." + exit 1 + fi + fi + + # Run the SQL statement using kubectl exec + kubectl -n "$NAMESPACE" exec "$POD_NAME" -- psql -U "$USERNAME" "$DBNAME" -c "$sql_statement" + + else + echo "Invalid environment specified. Use 'kubectl' or 'docker'." + exit 1 + fi +} diff --git a/deploy/scripts/upgrade/upgrade-84-monitoring.sh b/deploy/scripts/upgrade/upgrade-84-monitoring.sh index fc6a4e4..9ca048a 100755 --- a/deploy/scripts/upgrade/upgrade-84-monitoring.sh +++ b/deploy/scripts/upgrade/upgrade-84-monitoring.sh @@ -1,51 +1,8 @@ #!/bin/bash -# Check for required arguments -if [ "$#" -lt 3 ] || { [ "$1" == "kubectl" ] && [ "$#" -ne 4 ]; }; then - echo "Usage: $0 [namespace]" - exit 1 -fi +# Source the migration template +SCRIPT_DIR="$(dirname "$(readlink -f "$0")")" +source "SCRIPT_DIR/lib/migration.sh" -# Assign arguments to variables -ENVIRONMENT=$1 -USERNAME=$2 -DBNAME=$3 ALTER_STATEMENT='ALTER TABLE v1.couchdb_progress ADD pending integer, ADD updated_at timestamptz'; - -if [ "$ENVIRONMENT" == "docker" ]; then - # Find the container ID or name automatically - CONTAINER_ID=$(docker ps --filter "name=postgres" --format "{{.ID}}" | head -n 1) - - if [ -z "$CONTAINER_ID" ]; then - echo "No running Postgres container found." - exit 1 - fi - - # Run the ALTER TABLE statement using docker exec - docker exec "$CONTAINER_ID" psql -U "$USERNAME" "$DBNAME" -c "$ALTER_STATEMENT" - -elif [ "$ENVIRONMENT" == "kubectl" ]; then - # Assign namespace argument - NAMESPACE=$4 - - # Find the Postgres pod automatically - POD_NAME=$(kubectl get pods -n "$NAMESPACE" -l app.name=postgres -o jsonpath="{.items[0].metadata.name}") - - if [ -z "$POD_NAME" ]; then - # Find the Postgres pod automatically - POD_NAME=$(kubectl get pods -n "$NAMESPACE" -l inner.service=postgres -o jsonpath="{.items[0].metadata.name}") - if [ -z "$POD_NAME" ]; then - echo "No running Postgres pod found in namespace $NAMESPACE." - exit 1 - fi - fi - - # echo $POD_NAME - # Run the ALTER TABLE statement using kubectl exec - kubectl -n "$NAMESPACE" exec "$POD_NAME" -- psql -U "$USERNAME" "$DBNAME" -c "$ALTER_STATEMENT" - -else - echo "Invalid environment specified. Use 'kubectl' or 'docker'." - exit 1 -fi - +execute_sql "$ALTER_STATEMENT" From 26f43030583b90d74efa05b638e59c017da97df8 Mon Sep 17 00:00:00 2001 From: Tom Wier Date: Wed, 19 Feb 2025 14:46:44 +0300 Subject: [PATCH 18/25] fix: changing dockerhub username --- .github/workflows/test.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 95810ae..6588cf0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -4,6 +4,7 @@ on: [push, pull_request] env: INTERNAL_CONTRIBUTOR: ${{ secrets.DOCKERHUB_USERNAME && 'true' }} + DOCKERHUB_USERNAME: 'dockermedic' jobs: unit-tests: @@ -31,7 +32,7 @@ jobs: - name: Login to Docker Hub uses: docker/login-action@v3 with: - username: ${{ secrets.DOCKERHUB_USERNAME }} + username: ${{ env.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} if: ${{ env.INTERNAL_CONTRIBUTOR }} - run: npm ci From fab35ce7f1c945a13eea1d62932746f92d696e0b Mon Sep 17 00:00:00 2001 From: Tom Wier Date: Fri, 28 Feb 2025 16:54:57 +0300 Subject: [PATCH 19/25] fix: changing from multiple files to profiles --- deploy/cht_sync/templates/dbt.yaml | 2 +- docker-compose.bastion.yml | 12 ---- docker-compose.couchdb.yml | 12 ---- docker-compose.pgadmin.yml | 13 ---- docker-compose.postgres.yml | 12 ---- docker-compose.yml | 102 +++++++++++++++++++++-------- 6 files changed, 76 insertions(+), 77 deletions(-) delete mode 100644 docker-compose.bastion.yml delete mode 100644 docker-compose.couchdb.yml delete mode 100644 docker-compose.pgadmin.yml delete mode 100644 docker-compose.postgres.yml diff --git a/deploy/cht_sync/templates/dbt.yaml b/deploy/cht_sync/templates/dbt.yaml index ecfac73..4e23598 100644 --- a/deploy/cht_sync/templates/dbt.yaml +++ b/deploy/cht_sync/templates/dbt.yaml @@ -1,4 +1,4 @@ -{{- $dbt_selectors := .Values.dbt_selectors -}} +{{- $dbt_selectors := .Values.dbt_selectors | default (list "") -}} {{- range $index, $dbt_selector := $dbt_selectors }} --- apiVersion: apps/v1 diff --git a/docker-compose.bastion.yml b/docker-compose.bastion.yml deleted file mode 100644 index 6147b17..0000000 --- a/docker-compose.bastion.yml +++ /dev/null @@ -1,12 +0,0 @@ -services: - bastion: - build: ./bastion/ - restart: unless-stopped - ports: - - ${BASTION_PORT:-22222}:22/tcp - volumes: - - ${BASTION_AUTHORIZED_KEYS_FILE:-$PWD/bastion/authorized_keys}:/var/lib/bastion/authorized_keys-tmp - - bastion:/usr/etc/ssh:rw - -volumes: - bastion: diff --git a/docker-compose.couchdb.yml b/docker-compose.couchdb.yml deleted file mode 100644 index 65b52cd..0000000 --- a/docker-compose.couchdb.yml +++ /dev/null @@ -1,12 +0,0 @@ -services: - couchdb: - image: public.ecr.aws/medic/cht-couchdb:4.15.0 - restart: always - ports: - - "5984:5984" - environment: - - COUCHDB_USER=${COUCHDB_USER} - - COUCHDB_PASSWORD=${COUCHDB_PASSWORD} - - COUCHDB_SECRET=9c0d6034-0f19-45df-8fcd-5fec9c473c73 - - COUCHDB_UUID=4c6bffc8-a5ac-4f98-a34c-6fb0e63964e5 - - SVC_NAME=couchdb diff --git a/docker-compose.pgadmin.yml b/docker-compose.pgadmin.yml deleted file mode 100644 index e1ec7e1..0000000 --- a/docker-compose.pgadmin.yml +++ /dev/null @@ -1,13 +0,0 @@ -services: - postgres: - ports: - - 5432:${POSTGRES_PORT:-5432} - - pgadmin: - image: dpage/pgadmin4 - environment: - PGADMIN_DEFAULT_EMAIL: ${PGADMIN_DEFAULT_EMAIL:-pgadmin4@pgadmin.org} - PGADMIN_DEFAULT_PASSWORD: ${PGADMIN_DEFAULT_PASSWORD:-admin} - PGADMIN_CONFIG_SERVER_MODE: 'False' - ports: - - "${PGADMIN_PORT:-5050}:80" diff --git a/docker-compose.postgres.yml b/docker-compose.postgres.yml deleted file mode 100644 index e4b2327..0000000 --- a/docker-compose.postgres.yml +++ /dev/null @@ -1,12 +0,0 @@ -services: - postgres: - image: postgres:16 - restart: always - volumes: - - ./postgres/init-dbt-resources.sh:/docker-entrypoint-initdb.d/init-dbt-resources.sh:z - environment: - - POSTGRES_USER=${POSTGRES_USER} - - POSTGRES_PASSWORD=${POSTGRES_PASSWORD} - - POSTGRES_DB=${POSTGRES_DB} - - POSTGRES_TABLES=${COUCHDB_DBS} - - POSTGRES_SCHEMA=${POSTGRES_SCHEMA} \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 5fcdb13..320aa03 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -36,55 +36,103 @@ services: COUCHDB_SECURE: ${COUCHDB_SECURE:-true} restart: always - dbt: +#### Test ############################################################################################## + + dbt-test-contacts: <<: *dbt-common + volumes: + - "${DBT_LOCAL_PATH}:/dbt/package/" profiles: - - default + - test environment: <<: *dbt-env - DBT_SELECTOR: '' - CHT_PIPELINE_BRANCH_URL: ${CHT_PIPELINE_BRANCH_URL} + DBT_SELECTOR: tag:contacts + DBT_LOCAL_PATH: ${DBT_LOCAL_PATH} - dbt-base: + dbt-test-reports: <<: *dbt-common + volumes: + - "${DBT_LOCAL_PATH}:/dbt/package/" profiles: - - base-and-package + - test + depends_on: + - dbt-contacts environment: <<: *dbt-env - DBT_SELECTOR: tag:base - CHT_PIPELINE_BRANCH_URL: ${CHT_PIPELINE_BRANCH_URL} + DBT_SELECTOR: tag:reports + DBT_LOCAL_PATH: ${DBT_LOCAL_PATH} - dbt-models: - <<: *dbt-common + couchdb: + image: public.ecr.aws/medic/cht-couchdb:4.15.0 + restart: always profiles: - - base-and-package - depends_on: - - dbt-base + - test + ports: + - "5984:5984" environment: - <<: *dbt-env - DBT_SELECTOR: package:${DBT_PACKAGE_NAME} - CHT_PIPELINE_BRANCH_URL: ${CHT_PIPELINE_BRANCH_URL} + - COUCHDB_USER=${COUCHDB_USER} + - COUCHDB_PASSWORD=${COUCHDB_PASSWORD} + - COUCHDB_SECRET=9c0d6034-0f19-45df-8fcd-5fec9c473c73 + - COUCHDB_UUID=4c6bffc8-a5ac-4f98-a34c-6fb0e63964e5 + - SVC_NAME=couchdb - dbt-contacts: + postgres: + image: postgres:16 + restart: always + profiles: + - test + - local + volumes: + - ./postgres/init-dbt-resources.sh:/docker-entrypoint-initdb.d/init-dbt-resources.sh:z + environment: + - POSTGRES_USER=${POSTGRES_USER} + - POSTGRES_PASSWORD=${POSTGRES_PASSWORD} + - POSTGRES_DB=${POSTGRES_DB} + - POSTGRES_TABLES=${COUCHDB_DBS} + - POSTGRES_SCHEMA=${POSTGRES_SCHEMA} + +#### Local ############################################################################################## + + dbt-local: <<: *dbt-common volumes: - "${DBT_LOCAL_PATH}:/dbt/package/" profiles: - - test + - local environment: <<: *dbt-env - DBT_SELECTOR: tag:contacts + DBT_SELECTOR: '' DBT_LOCAL_PATH: ${DBT_LOCAL_PATH} - dbt-reports: + pgadmin: + image: dpage/pgadmin4 + environment: + PGADMIN_DEFAULT_EMAIL: ${PGADMIN_DEFAULT_EMAIL:-pgadmin4@pgadmin.org} + PGADMIN_DEFAULT_PASSWORD: ${PGADMIN_DEFAULT_PASSWORD:-admin} + PGADMIN_CONFIG_SERVER_MODE: 'False' + ports: + - "${PGADMIN_PORT:-5050}:80" + + +#### Production ########################################################################################## + + dbt-production: <<: *dbt-common - volumes: - - "${DBT_LOCAL_PATH}:/dbt/package/" profiles: - - test - depends_on: - - dbt-contacts + - production environment: <<: *dbt-env - DBT_SELECTOR: tag:reports - DBT_LOCAL_PATH: ${DBT_LOCAL_PATH} + DBT_SELECTOR: '' + CHT_PIPELINE_BRANCH_URL: ${CHT_PIPELINE_BRANCH_URL} + + bastion: + build: ./bastion/ + restart: unless-stopped + profiles: + - production + ports: + - ${BASTION_PORT:-22222}:22/tcp + volumes: + - ${BASTION_AUTHORIZED_KEYS_FILE:-$PWD/bastion/authorized_keys}:/var/lib/bastion/authorized_keys-tmp + - bastion:/usr/etc/ssh:rw + From 5a684ec653abfe8934b4add7670e189261c12563 Mon Sep 17 00:00:00 2001 From: Tom Wier Date: Mon, 3 Mar 2025 16:20:41 +0300 Subject: [PATCH 20/25] fix: fixing tests --- docker-compose.yml | 9 ++++++--- package.json | 4 ++-- tests/utils/docker-utils.js | 2 -- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 320aa03..ec331b9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,8 +14,8 @@ x-dbt-base: &dbt-common POSTGRES_SCHEMA: ${POSTGRES_SCHEMA} ROOT_POSTGRES_SCHEMA: ${POSTGRES_SCHEMA} DATAEMON_INTERVAL: ${DATAEMON_INTERVAL} - DBT_THREAD_COUNT: ${DBT_THREAD_COUNT} - DBT_BATCH_SIZE: ${DBT_BATCH_SIZE} + DBT_THREAD_COUNT: ${DBT_THREAD_COUNT:-1} + DBT_BATCH_SIZE: ${DBT_BATCH_SIZE:-0} services: couch2pg: @@ -56,7 +56,7 @@ services: profiles: - test depends_on: - - dbt-contacts + - dbt-test-contacts environment: <<: *dbt-env DBT_SELECTOR: tag:reports @@ -130,9 +130,12 @@ services: restart: unless-stopped profiles: - production + - test ports: - ${BASTION_PORT:-22222}:22/tcp volumes: - ${BASTION_AUTHORIZED_KEYS_FILE:-$PWD/bastion/authorized_keys}:/var/lib/bastion/authorized_keys-tmp - bastion:/usr/etc/ssh:rw +volumes: + bastion: diff --git a/package.json b/package.json index c4fe57f..1f01bba 100644 --- a/package.json +++ b/package.json @@ -8,8 +8,8 @@ "test:e2e": "npm run test:e2e-data && npm run test:e2e-stop-containers && npm run test:e2e-containers && npm run test:e2e-mocha && npm run test:e2e-stop-containers ", "test:e2e-mocha": "mocha tests/**/*.spec.js --timeout 70000", "lint": "eslint --color --cache .", - "test:e2e-stop-containers": "docker compose --env-file ./tests/.e2e-env -f docker-compose.yml -f docker-compose.couchdb.yml -f docker-compose.postgres.yml -f docker-compose.bastion.yml kill && docker compose --env-file ./tests/.e2e-env -f docker-compose.yml -f docker-compose.couchdb.yml -f docker-compose.postgres.yml -f docker-compose.bastion.yml down -v", - "test:e2e-containers": "docker compose --profile test --env-file ./tests/.e2e-env -f docker-compose.yml -f docker-compose.couchdb.yml -f docker-compose.postgres.yml -f docker-compose.bastion.yml up -d --build --force-recreate && npm run wait-for-couchdb", + "test:e2e-stop-containers": "docker compose --profile test --env-file ./tests/.e2e-env -f docker-compose.yml kill && docker compose --profile test --env-file ./tests/.e2e-env -f docker-compose.yml down -v", + "test:e2e-containers": "docker compose --profile test --env-file ./tests/.e2e-env -f docker-compose.yml up -d --build --force-recreate && npm run wait-for-couchdb", "test:e2e-data": "cd tests/data && rm -rf ./json_docs && cht csv-to-docs", "test": "cd couch2pg && npm run test", "wait-for-couchdb": "bash -c 'until nc -z localhost 5984; do sleep 1; done; echo \"CouchDB is ready\"'" diff --git a/tests/utils/docker-utils.js b/tests/utils/docker-utils.js index 125a177..19716e3 100644 --- a/tests/utils/docker-utils.js +++ b/tests/utils/docker-utils.js @@ -2,8 +2,6 @@ import { execSync } from 'child_process'; const composeFiles = [ 'docker-compose.yml', - 'docker-compose.couchdb.yml', - 'docker-compose.postgres.yml' ].map(file => `-f ${file}`).join(' '); const execDockerCommand = (command) => { From bc19b825b2f671ae0b09d28c2e645e079a8d4e7b Mon Sep 17 00:00:00 2001 From: Tom Wier Date: Wed, 5 Mar 2025 17:41:41 +0300 Subject: [PATCH 21/25] fix: allowing local profile w/o DBT_LOCAL_PATH --- docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index ec331b9..5433c5b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,4 @@ name: ${COMPOSE_PROJECT_NAME:-cht-sync} -version: '3.8' x-dbt-base: &dbt-common build: ./dbt/ @@ -103,6 +102,7 @@ services: <<: *dbt-env DBT_SELECTOR: '' DBT_LOCAL_PATH: ${DBT_LOCAL_PATH} + CHT_PIPELINE_BRANCH_URL: ${CHT_PIPELINE_BRANCH_URL} pgadmin: image: dpage/pgadmin4 From 57a8d1563ac17676a700c52f2d048a76e677d632 Mon Sep 17 00:00:00 2001 From: Tom Wier Date: Fri, 7 Mar 2025 13:27:27 +0300 Subject: [PATCH 22/25] fix: small fixes for docker-compose --- docker-compose.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 5433c5b..ac2620c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -73,7 +73,8 @@ services: - COUCHDB_PASSWORD=${COUCHDB_PASSWORD} - COUCHDB_SECRET=9c0d6034-0f19-45df-8fcd-5fec9c473c73 - COUCHDB_UUID=4c6bffc8-a5ac-4f98-a34c-6fb0e63964e5 - - SVC_NAME=couchdb + +#### Local ############################################################################################## postgres: image: postgres:16 @@ -90,8 +91,6 @@ services: - POSTGRES_TABLES=${COUCHDB_DBS} - POSTGRES_SCHEMA=${POSTGRES_SCHEMA} -#### Local ############################################################################################## - dbt-local: <<: *dbt-common volumes: @@ -102,10 +101,11 @@ services: <<: *dbt-env DBT_SELECTOR: '' DBT_LOCAL_PATH: ${DBT_LOCAL_PATH} - CHT_PIPELINE_BRANCH_URL: ${CHT_PIPELINE_BRANCH_URL} pgadmin: image: dpage/pgadmin4 + profiles: + - local environment: PGADMIN_DEFAULT_EMAIL: ${PGADMIN_DEFAULT_EMAIL:-pgadmin4@pgadmin.org} PGADMIN_DEFAULT_PASSWORD: ${PGADMIN_DEFAULT_PASSWORD:-admin} From f7b12ae29a754af6f5b2f1ad6864949eb7cee80e Mon Sep 17 00:00:00 2001 From: Tom Wier Date: Fri, 7 Mar 2025 17:00:57 +0300 Subject: [PATCH 23/25] fix: add dbt_selector to migration script --- deploy/scripts/upgrade/add-manifest-and-source.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/deploy/scripts/upgrade/add-manifest-and-source.sh b/deploy/scripts/upgrade/add-manifest-and-source.sh index 7047716..424087d 100755 --- a/deploy/scripts/upgrade/add-manifest-and-source.sh +++ b/deploy/scripts/upgrade/add-manifest-and-source.sh @@ -7,6 +7,7 @@ source "$SCRIPT_DIR/lib/migration.sh" # SQL statements to execute SQL_STATEMENTS=( "ALTER TABLE _dataemon ADD COLUMN IF NOT EXISTS manifest jsonb;" + "ALTER TABLE _dataemon ADD COLUMN IF NOT EXISTS dbt_selector text;" "ALTER TABLE couchdb ADD COLUMN IF NOT EXISTS source varchar;" "CREATE INDEX IF NOT EXISTS source ON couchdb(source);" ) From bf342393cabd0217f454286aa83b83002e54f6e6 Mon Sep 17 00:00:00 2001 From: Tom Wier Date: Fri, 14 Mar 2025 11:55:55 +0300 Subject: [PATCH 24/25] Revert "fix: changing dockerhub username" This reverts commit 26f43030583b90d74efa05b638e59c017da97df8. --- .github/workflows/test.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6588cf0..95810ae 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -4,7 +4,6 @@ on: [push, pull_request] env: INTERNAL_CONTRIBUTOR: ${{ secrets.DOCKERHUB_USERNAME && 'true' }} - DOCKERHUB_USERNAME: 'dockermedic' jobs: unit-tests: @@ -32,7 +31,7 @@ jobs: - name: Login to Docker Hub uses: docker/login-action@v3 with: - username: ${{ env.DOCKERHUB_USERNAME }} + username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} if: ${{ env.INTERNAL_CONTRIBUTOR }} - run: npm ci From 3d232680f7df488c92dc5078b4c875c30a192dfc Mon Sep 17 00:00:00 2001 From: Tom Wier Date: Fri, 28 Mar 2025 12:54:21 +0300 Subject: [PATCH 25/25] fix: add selector to model change command --- dbt/dbt-run.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/dbt/dbt-run.py b/dbt/dbt-run.py index 72353c6..6e423fe 100644 --- a/dbt/dbt-run.py +++ b/dbt/dbt-run.py @@ -132,15 +132,21 @@ def update_models(): # save the new manifest and package for the next run save_package_manifest(package_json, manifest_json) - # anything that changed, run a full refresh - subprocess.run(["dbt", "run", + args = ["dbt", "run", "--profiles-dir", ".dbt", - "--select", - "state:modified", "--full-refresh", "--state", - "./old_manifest"]) + "./old_manifest", + "--select"] + + if dbt_selector: + args.append(f"{dbt_selector},state:modified") + else: + args.append("state:modified") + + # anything that changed, run a full refresh + subprocess.run(args) def run_incremental_models(): # update incremental models (and tables if there are any)