forked from sustainability-software-lab/ca-biositing
-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathpixi.toml
More file actions
437 lines (374 loc) · 16.4 KB
/
pixi.toml
File metadata and controls
437 lines (374 loc) · 16.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
# VVVVVV minimum `pixi` version
"$schema" = "https://pixi.sh/v0.55.0/schema/manifest/schema.json"
[workspace]
requires-pixi = ">=0.55,<1.0"
authors = ["SSEC Team"]
channels = ["conda-forge"]
name = "ca-biositing"
platforms = ["osx-arm64", "osx-64", "linux-64", "linux-aarch64", "win-64"]
version = "0.1.0"
[environments]
default = ["datamodels", "pipeline", "webservice", "kernel", "gis"]
py312 = ["py312", "datamodels", "pipeline", "webservice"]
py313 = ["py313", "datamodels", "pipeline", "webservice"]
gis = { features = ["qgis", "raster", "vector", "kernel"], solve-group = "default" }
etl = { features = ["datamodels", "pipeline"], solve-group = "default" }
webservice = { features = [
"datamodels",
"webservice",
], solve-group = "default" }
frontend = { features = [
"frontend",
], no-default-feature = true, solve-group = "default" }
docs = { features = ["docs"], no-default-feature = true, solve-group = "default" }
deployment = { features = ["cloud"], solve-group = "default" }
[dependencies]
python = "~=3.12"
pre-commit = ">=4.2.0,<5"
pytest = ">=8.4.2,<9"
pytest-cov = ">=7.0.0,<8"
pip = ">=25.2,<26"
docker-compose = ">=2.39.2,<3"
docker-cli = ">=28.3.1,<29"
testcontainers = ">=4.13.2,<5"
python-dotenv = ">=1.2.1,<2"
asyncpg = ">=0.29.0,<0.30"
pydrive2 = ">=1.21.3,<2"
geopandas = ">=1.1.2,<2"
pyogrio = ">=0.10.0"
proj = "*"
pandas = ">=2.2,<3"
# Pin these to versions compatible with prefect 3.x (pypi)
packaging = "<25.1"
importlib-metadata = "<8.8"
pytz = "<2026"
[tasks]
pre-commit-all = "pre-commit run --all-files"
pre-commit-install = "pre-commit install"
initial-migration = { cmd = [
"docker-compose",
"-f",
"resources/docker/docker-compose.yml",
"run",
"--rm",
"setup-db",
"alembic",
"revision",
"--autogenerate",
"-m",
"Initial migration",
], description = "Create the initial database migration script.", outputs = [
"alembic/versions/*_initial_migration.py",
] }
start-services = { cmd = [
"docker-compose",
"--file",
"resources/docker/docker-compose.yml",
"up",
"-d",
], description = "Start required services using Docker Compose." }
teardown-services = { cmd = [
"docker-compose",
"-f",
"resources/docker/docker-compose.yml",
"down",
], description = "Teardown services started by Docker Compose." }
teardown-services-volumes = { cmd = [
"docker-compose",
"-f",
"resources/docker/docker-compose.yml",
"down",
"-v",
], description = "Teardown services and remove volumes (deletes all data)." }
service-status = { cmd = [
"docker-compose",
"-f",
"resources/docker/docker-compose.yml",
"ps",
], description = "Check status of running services." }
service-logs = { cmd = [
"docker-compose",
"-f",
"resources/docker/docker-compose.yml",
"logs",
"-f",
], description = "View logs from all services." }
rebuild-services = { cmd = [
"docker-compose",
"-f",
"resources/docker/docker-compose.yml",
"build",
"--no-cache",
], description = "Rebuild Docker images without cache." }
exec-prefect-worker = { cmd = [
"docker-compose",
"-f",
"resources/docker/docker-compose.yml",
"exec",
"prefect-worker",
], description = "Execute commands in the Prefect worker container. Usage: pixi run exec-prefect-worker <command>" }
exec-db = { cmd = [
"docker-compose",
"-f",
"resources/docker/docker-compose.yml",
"exec",
"db",
], description = "Execute commands in the database container. Usage: pixi run exec-db <command>" }
run-etl = { cmd = [
"docker-compose",
"-f",
"resources/docker/docker-compose.yml",
"exec",
"prefect-worker",
"/bin/bash",
"-c",
"source /shell-hook.sh && prefect deployment run 'Master ETL Flow/master-etl-deployment'",
], description = "Trigger the master ETL flow deployment.", depends-on = [
"start-services",
] }
check-db-health = { cmd = [
"docker-compose",
"-f",
"resources/docker/docker-compose.yml",
"exec",
"db",
"pg_isready",
"-U",
"biocirv_user",
"-d",
"biocirv_db",
], description = "Check PostgreSQL database health." }
access-db = { cmd = [
"docker-compose",
"-f",
"resources/docker/docker-compose.yml",
"exec",
"db",
"psql",
"-U",
"biocirv_user",
"-d",
"biocirv_db",
], description = "Access the application database with psql." }
access-prefect-db = { cmd = [
"docker-compose",
"-f",
"resources/docker/docker-compose.yml",
"exec",
"db",
"psql",
"-U",
"biocirv_user",
"-d",
"prefect_db",
], description = "Access the Prefect metadata database with psql." }
restart-services = { cmd = [
"docker-compose",
"-f",
"resources/docker/docker-compose.yml",
"restart",
], description = "Restart all services." }
restart-prefect-worker = { cmd = [
"docker-compose",
"-f",
"resources/docker/docker-compose.yml",
"restart",
"prefect-worker",
], description = "Restart the Prefect worker service." }
migrate = { cmd = [
"alembic",
"upgrade",
"head",
], description = "Apply database migrations locally to the Docker database."}
migrate-autogenerate = { cmd = "alembic revision --autogenerate", description = "Auto-generate a new Alembic migration from model changes." }
# pgschema CLI (pre-built binary, not on conda-forge)
# See https://www.pgschema.com/installation#pre-built-binary
install-pgschema = { cmd = "bash scripts/install-pgschema.sh", description = "Install pgschema CLI binary into the pixi environment." }
# SQL-First Workflow Tasks (pgschema)
# These tasks manage the database schema using raw SQL files as the source of truth.
schema-dump = { cmd = "bash -c 'pgschema dump --host localhost --port 5432 --user biocirv_user --db biocirv_db --password biocirv_dev_password > src/ca_biositing/datamodels/ca_biositing/datamodels/sql_schemas/main.sql'", description = "Dump the current database schema to the reference SQL file.", env = { PGSSLMODE = "disable" } }
# Public Schema Management (validation only - operational tasks replaced by Alembic)
schema-plan = { cmd = "pgschema plan --host localhost --port 5432 --user biocirv_user --db biocirv_db --file src/ca_biositing/datamodels/ca_biositing/datamodels/sql_schemas/main.sql --password biocirv_dev_password --plan-db biocirv_db_shadow --plan-host localhost --plan-user biocirv_user --plan-password biocirv_dev_password --schema public", description = "Plan changes for the 'public' schema.", env = { PGSSLMODE = "disable" } }
# Analytics Schema Management (validation only - operational tasks replaced by Alembic)
schema-analytics-plan = { cmd = "pgschema plan --host localhost --port 5432 --user biocirv_user --db biocirv_db --file src/ca_biositing/datamodels/ca_biositing/datamodels/sql_schemas/main.sql --password biocirv_dev_password --plan-db biocirv_db_shadow --plan-host localhost --plan-user biocirv_user --plan-password biocirv_dev_password --schema ca_biositing", description = "Plan changes for the 'ca_biositing' analytics schema.", env = { PGSSLMODE = "disable" } }
# Materialized View Utilities
schema-analytics-list = { cmd = "pixi run access-db -c \"SELECT schemaname, matviewname, ispopulated FROM pg_matviews\"", description = "List all materialized views in the database." }
refresh-views = { cmd = "python -c \"import os; os.environ.setdefault('DATABASE_URL', 'postgresql://biocirv_user:biocirv_dev_password@localhost:5432/biocirv_db'); from ca_biositing.datamodels.views import refresh_all_views; from ca_biositing.datamodels.database import get_engine; refresh_all_views(get_engine())\"", description = "Refresh all materialized views using SQLAlchemy." }
[tasks.qgis]
depends-on = [{ task = "run-qgis", environment = "gis" }]
[tasks.start-webservice]
cwd = "src"
cmd = "uvicorn ca_biositing.webservice.main:app --reload"
[tasks.test]
cmd = "python -m pytest tests/ --verbose"
description = "Run all tests"
[tasks.test-cov]
cmd = "python -m pytest tests/ --cov=src/ca_biositing --cov-report=html --cov-report=term-missing"
description = "Run all tests with coverage and HTML report"
[tasks.submodule-frontend-init]
cmd = "git submodule update --init frontend"
description = "Initialize only the frontend submodule."
# GIS Dependencies
[feature.qgis.dependencies]
qgis = "*"
[feature.qgis.tasks]
run-qgis = { cmd = "qgis", description = "Run QGIS for geospatial analysis and visualization." }
[feature.raster.dependencies]
rasterio = "*"
xarray = "*"
[feature.vector.dependencies]
shapely = "*"
pyproj = "*"
proj = "*"
geopandas = "*"
# Python Dependencies for specific versions
[feature.py312.dependencies]
python = "3.12.*"
[feature.py313.dependencies]
python = "3.13.*"
# Frontend Dependencies
# This feature manages the frontend environment using Node.js and npm.
# The actual package dependencies are defined in the frontend repository.
[feature.frontend.dependencies]
nodejs = ">=18,<21"
[feature.frontend.tasks]
frontend-install = { cmd = "npm install", cwd = "frontend", description = "Install frontend dependencies." }
frontend-dev = { cmd = "npm run dev", cwd = "frontend", description = "Run frontend in development mode." }
frontend-build = { cmd = "npm run build", cwd = "frontend", description = "Build the production frontend bundle." }
# Kernel Dependencies
# pixi-kernel provides a Jupyter kernel that runs code in a pixi environment,
# so all installed packages (including our namespace packages) are available
# without any PYTHONPATH manipulation.
[feature.kernel.pypi-dependencies]
pixi-kernel = "*"
# Datamodels Dependencies
[feature.datamodels.pypi-dependencies]
ca-biositing-datamodels = { path = "./src/ca_biositing/datamodels", editable = true }
# Pipeline Dependencies
[feature.pipeline.pypi-dependencies]
ca-biositing-pipeline = { path = "./src/ca_biositing/pipeline", editable = true }
# Alembic is required for generating migrations inside the worker container
alembic = ">=1.13.2,<2"
# Prefect is required for `prefect flow-run execute` inside Cloud Run flow jobs
prefect = ">=3,<4"
[feature.pipeline.tasks.deploy]
args = [
{ "arg" = "deployment_name", "default" = "master-etl-deployment" },
{ "arg" = "env_file", "default" = "../docker/.env" },
]
cwd = "resources/prefect"
cmd = ["python", "deploy.py", "--env-file", "{{ env_file }}", "{{ deployment_name }}"]
description = "Deploy the ETL pipeline to Prefect."
depends-on = [{ task = "start-services" }]
# Webservice Dependencies
[feature.webservice.pypi-dependencies]
ca-biositing-webservice = { path = "./src/ca_biositing/webservice", editable = true }
fastapi = ">=0.111.0,<0.115.0"
pydantic = ">=2.5,<3.0"
pyjwt = ">=2.0,<3"
pwdlib = { version = ">=0.2.0", extras = ["argon2"] }
python-multipart = ">=0.0.9"
[feature.webservice.tasks.create-admin]
cmd = "python scripts/create_admin.py"
description = "Create an admin user for API authentication"
env = { DATABASE_URL = "postgresql://biocirv_user:biocirv_dev_password@localhost:5432/biocirv_db" }
[feature.docs.dependencies]
python = ">=3.11,<3.14"
mkdocs = "*"
mkdocs-material = "*"
mkdocs-git-revision-date-localized-plugin = "*"
pymdown-extensions = "*"
pip = "*"
[feature.docs.pypi-dependencies]
mkdocs-awesome-pages-plugin = "*"
mkdocstrings = { version = "~=1.0", extras = ["python"] }
[feature.docs.tasks]
docs-serve = { cmd = "mkdocs serve", description = "Run MkDocs locally for previewing documentation." }
docs-build = { cmd = "mkdocs build", description = "Build static documentation site." }
[tasks.rtd-publish]
cmd = "pixi run -e docs python -m mkdocs build --clean --site-dir $READTHEDOCS_OUTPUT/html --config-file mkdocs.yml"
description = "Build documentation for ReadTheDocs"
# ==== Deployment environment ====
[feature.cloud.activation]
scripts = ["scripts/activate-gcloud.sh"]
[feature.cloud.tasks.install-gcloud]
cmd = "curl -fsSL https://sdk.cloud.google.com | bash -s -- --disable-prompts --install-dir=$CONDA_PREFIX"
outputs = ["**/google-cloud-sdk/bin/gcloud"]
description = "Install the Google Cloud CLI into the pixi environment."
[feature.cloud.tasks.install-pulumi]
cmd = "bash -c 'curl -fsSL https://get.pulumi.com | bash -s -- --install-root $CONDA_PREFIX ${PULUMI_VERSION:+--version $PULUMI_VERSION}'"
outputs = ["**/bin/pulumi"]
description = "Install Pulumi CLI. Set PULUMI_VERSION env var to install a specific version (e.g. PULUMI_VERSION=3.220.0 pixi run install-pulumi)."
[feature.cloud.dependencies]
postgresql = "*"
[feature.cloud.pypi-dependencies]
prefect = ">=3,<4"
pulumi = ">=3,<4"
pulumi-gcp = ">=9,<10"
pulumi-random = ">=4,<5"
# Pulumi tasks (run via Docker to avoid macOS gRPC/fork crash)
[feature.cloud.tasks.cloud-bootstrap]
cmd = "gcloud storage buckets create gs://biocirv-470318-pulumi-state --location=us-west1 --uniform-bucket-level-access --public-access-prevention && gcloud storage buckets update gs://biocirv-470318-pulumi-state --versioning"
description = "Create the Pulumi state bucket in GCS (one-time setup)."
[feature.cloud.tasks.cloud-build]
cmd = "docker build -t ca-biositing-pulumi deployment/cloud/gcp/infrastructure/"
description = "Build the Pulumi Docker image for infrastructure deployment."
[feature.cloud.tasks.cloud-migrate]
cmd = "bash scripts/cloud-migrate-ci.sh"
description = "Refresh the migration job image digest and apply Alembic migrations. Set DEPLOY_ENV for target environment."
[feature.cloud.tasks.cloud-seed-admin]
cmd = "bash scripts/cloud-seed-admin.sh"
description = "Seed the admin user in the database. Set DEPLOY_ENV for target environment."
[feature.cloud.tasks.cloud-plan]
cmd = "bash scripts/pulumi-docker.sh preview"
description = "Preview pending infrastructure changes. Set DEPLOY_ENV for target environment."
[feature.cloud.tasks.cloud-deploy]
cmd = "bash scripts/pulumi-docker.sh up"
description = "Deploy infrastructure changes to GCP. Set DEPLOY_ENV for target environment."
[feature.cloud.tasks.cloud-destroy]
cmd = "bash scripts/pulumi-docker.sh destroy"
description = "Destroy all managed GCP infrastructure. Set DEPLOY_ENV for target environment."
[feature.cloud.tasks.cloud-outputs]
cmd = "bash scripts/pulumi-docker.sh outputs"
description = "Show Pulumi stack outputs. Set DEPLOY_ENV for target environment."
[feature.cloud.tasks.cloud-refresh]
cmd = "bash scripts/pulumi-docker.sh refresh"
description = "Refresh Pulumi state from live GCP resources. Set DEPLOY_ENV for target environment."
# --- CI / Linux-native tasks (no Docker wrapper) ---
# These tasks run directly on Linux (GitHub Actions or Linux workstations).
# They read IMAGE_TAG from the environment (default: "latest").
[feature.cloud.tasks.cloud-deploy-direct]
cmd = "python deployment/cloud/gcp/infrastructure/deploy.py up"
env = { PULUMI_CONFIG_PASSPHRASE = "" }
description = "Deploy infrastructure via Pulumi (direct, no Docker). Set DEPLOY_ENV for target environment. For Linux/CI use."
[feature.cloud.tasks.cloud-plan-direct]
cmd = "python deployment/cloud/gcp/infrastructure/deploy.py preview"
env = { PULUMI_CONFIG_PASSPHRASE = "" }
description = "Preview infrastructure changes via Pulumi (direct, no Docker). Set DEPLOY_ENV for target environment. For Linux/CI use."
[feature.cloud.tasks.cloud-refresh-direct]
cmd = "python deployment/cloud/gcp/infrastructure/deploy.py refresh"
env = { PULUMI_CONFIG_PASSPHRASE = "" }
description = "Refresh Pulumi state from live GCP resources (direct, no Docker). Set DEPLOY_ENV for target environment. For Linux/CI use."
[feature.cloud.tasks.cloud-outputs-direct]
cmd = "python deployment/cloud/gcp/infrastructure/deploy.py outputs"
env = { PULUMI_CONFIG_PASSPHRASE = "" }
description = "Show Pulumi stack outputs (direct, no Docker). Set DEPLOY_ENV for target environment. For Linux/CI use."
[feature.cloud.tasks.cloud-migrate-ci]
cmd = "bash scripts/cloud-migrate-ci.sh"
env = { IMAGE_TAG = "latest" }
description = "Update migration job image with IMAGE_TAG and execute. Usage: DEPLOY_ENV=production IMAGE_TAG=abc1234 pixi run cloud-migrate-ci"
[feature.cloud.tasks.cloud-update-services]
cmd = "bash scripts/cloud-update-services.sh"
env = { IMAGE_TAG = "latest" }
description = "Force new Cloud Run revisions with IMAGE_TAG. Usage: DEPLOY_ENV=production IMAGE_TAG=abc1234 pixi run cloud-update-services"
[feature.cloud.tasks.cloud-validate]
cmd = "bash scripts/validate-deployment.sh"
description = "Validate deployment health. Set DEPLOY_ENV for target environment."
[feature.cloud.tasks.cloud-trigger-etl]
cmd = "bash scripts/cloud-trigger-etl.sh"
description = "Trigger the Master ETL flow via Prefect API (fire-and-forget). For CI use."
[feature.gis.pypi-dependencies]
geopandas = "*"
matplotlib = "*"
[pypi-dependencies]
sqlalchemy = ">=2.0.44, <3"