Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
267 changes: 267 additions & 0 deletions Cargo.lock

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ license = "AGPL-3.0"

[dependencies]
bcrypt = "0.17.1"
clap = { version = "4.5.38", features = ["derive"] }
chrono = "0.4.41"
color-eyre = "0.6.5"
dotenvy = "0.15.7"
Expand All @@ -15,13 +16,14 @@ petgraph = "0.8.2"
poem = { version = "3.1.12", features = ["embed"] }
poem-openapi = { version = "5.1.16", features = ["chrono", "swagger-ui", "uuid"] }
rand = "0.9.2"
reqwest = { version = "0.12.15", default-features = false, features = ["json", "rustls-tls"] }
rust-embed = "8.7.2"
serde = "1.0.219"
serde_json = "1.0.143"
sqlx = { version = "0.8.6", features = ["chrono", "postgres", "runtime-tokio", "tls-rustls", "uuid"] }
strum = { version = "0.27.2", features = ["derive"] }
thiserror = "2.0.16"
tokio = { version = "1.47.1", features = ["rt-multi-thread"] }
tokio = { version = "1.47.1", features = ["io-util", "macros", "process", "rt-multi-thread"] }
tracing = "0.1.41"
tracing-subscriber = "0.3.20"
uuid = { version = "1.18.0", features = ["v4"] }
Expand Down
13 changes: 13 additions & 0 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,19 @@ fn main() {
)
.expect("Failed to copy viz-global.js from node_modules");

// Populate Assets directory with Nice-DAG core
fs::create_dir_all("assets/nice-dag").expect("Failed to create assets/nice-dag directory");
fs::copy(
"node_modules/@ebay/nice-dag-core/lib/index.umd.cjs",
"assets/nice-dag/nice-dag-core.js",
)
.expect("Failed to copy nice-dag-core.js from node_modules");

// Populate Assets directory with Fletcher JS helpers
fs::create_dir_all("assets/js").expect("Failed to create assets/js directory");
fs::copy("scripts/plan-dag.js", "assets/js/plan-dag.js")
.expect("Failed to copy plan-dag.js into assets");

// Populate Assets directory with Prism.js
fs::create_dir_all("assets/prism").expect("Failed to create assets/prism directory");
fs::copy("node_modules/prismjs/prism.js", "assets/prism/prism.js")
Expand Down
2 changes: 2 additions & 0 deletions migrations/20260410170000_execution_log.down.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
DROP INDEX IF EXISTS execution_log_dataset_product_run_idx;
DROP TABLE IF EXISTS execution_log;
14 changes: 14 additions & 0 deletions migrations/20260410170000_execution_log.up.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
CREATE TABLE execution_log (
log_id BIGSERIAL PRIMARY KEY,
dataset_id UUID NOT NULL,
data_product_id UUID NOT NULL,
run_id UUID NOT NULL,
stream TEXT NOT NULL,
message TEXT NOT NULL,
created_by TEXT NOT NULL,
created_at TIMESTAMPTZ NOT NULL,
FOREIGN KEY(dataset_id, data_product_id) REFERENCES data_product(dataset_id, data_product_id)
Comment on lines +1 to +10
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Add an integrity constraint for run_id.

run_id is now part of the log identity, but this table only constrains (dataset_id, data_product_id). That means a bad callback can persist logs for a nonexistent or wrong run and the row will still be valid. Please tie execution_log.run_id to the table that owns step runs/historical snapshots so logs cannot be misattributed.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@migrations/20260410170000_execution_log.up.sql` around lines 1 - 10, Add a
foreign-key constraint tying execution_log.run_id to the table that owns step
runs/historical snapshots: alter the CREATE TABLE execution_log to include
FOREIGN KEY (run_id, dataset_id, data_product_id) REFERENCES step_run(run_id,
dataset_id, data_product_id) (or the actual run table name/column names if
different) so run_id cannot refer to a non-existent or mismatched run; ensure
the referenced columns on the run table are unique/primary keys.

);

CREATE INDEX execution_log_dataset_product_run_idx
ON execution_log (dataset_id, data_product_id, run_id, log_id);
4 changes: 4 additions & 0 deletions migrations/20260410190000_plan_runs.down.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
DROP TABLE plan_run_dependency;
DROP TABLE plan_run_data_product;
DROP INDEX plan_run_dataset_created_idx;
DROP TABLE plan_run;
Comment on lines +1 to +4
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Fix DROP order: index must be dropped before its parent table.

The index plan_run_dataset_created_idx is on the plan_run table (based on the naming convention and the up migration), so it must be dropped before the table. Currently, dropping plan_run first would implicitly drop the index, but the explicit DROP INDEX on line 3 would then fail.

Additionally, consider using IF EXISTS for idempotency (consistent with execution_log.down.sql).

🔧 Proposed fix
-DROP TABLE plan_run_dependency;
-DROP TABLE plan_run_data_product;
-DROP INDEX plan_run_dataset_created_idx;
-DROP TABLE plan_run;
+DROP INDEX IF EXISTS plan_run_dataset_created_idx;
+DROP TABLE IF EXISTS plan_run_dependency;
+DROP TABLE IF EXISTS plan_run_data_product;
+DROP TABLE IF EXISTS plan_run;
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
DROP TABLE plan_run_dependency;
DROP TABLE plan_run_data_product;
DROP INDEX plan_run_dataset_created_idx;
DROP TABLE plan_run;
DROP INDEX IF EXISTS plan_run_dataset_created_idx;
DROP TABLE IF EXISTS plan_run_dependency;
DROP TABLE IF EXISTS plan_run_data_product;
DROP TABLE IF EXISTS plan_run;
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@migrations/20260410190000_plan_runs.down.sql` around lines 1 - 4, The DROPs
are ordered improperly: explicitly drop the index plan_run_dataset_created_idx
before dropping its parent table plan_run, and make the down migration
idempotent by using IF EXISTS on the DROP TABLE and DROP INDEX statements for
plan_run_dependency, plan_run_data_product, plan_run_dataset_created_idx, and
plan_run so the migration can be re-run safely.

43 changes: 43 additions & 0 deletions migrations/20260410190000_plan_runs.up.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
CREATE TABLE plan_run (
plan_run_id UUID PRIMARY KEY,
dataset_id UUID NOT NULL,
status state NOT NULL,
created_by TEXT NOT NULL,
created_at TIMESTAMPTZ NOT NULL,
modified_by TEXT NOT NULL,
modified_date TIMESTAMPTZ NOT NULL,
FOREIGN KEY(dataset_id) REFERENCES dataset(dataset_id)
);

CREATE INDEX plan_run_dataset_created_idx
ON plan_run(dataset_id, created_at DESC);

CREATE TABLE plan_run_data_product (
plan_run_id UUID NOT NULL,
dataset_id UUID NOT NULL,
data_product_id UUID NOT NULL,
compute compute NOT NULL,
name TEXT NOT NULL,
version TEXT NOT NULL,
eager BOOL NOT NULL,
state state NOT NULL,
step_run_id UUID,
link TEXT,
passback JSONB,
modified_by TEXT NOT NULL,
modified_date TIMESTAMPTZ NOT NULL,
PRIMARY KEY(plan_run_id, data_product_id),
FOREIGN KEY(plan_run_id) REFERENCES plan_run(plan_run_id) ON DELETE CASCADE
);

CREATE INDEX plan_run_data_product_step_run_idx
ON plan_run_data_product(step_run_id);

CREATE TABLE plan_run_dependency (
plan_run_id UUID NOT NULL,
dataset_id UUID NOT NULL,
parent_id UUID NOT NULL,
child_id UUID NOT NULL,
PRIMARY KEY(plan_run_id, parent_id, child_id),
FOREIGN KEY(plan_run_id) REFERENCES plan_run(plan_run_id) ON DELETE CASCADE
);
Comment on lines +15 to +43
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Enforce that child dataset_id values match the parent plan_run.

Both snapshot tables persist dataset_id, but the FK only references plan_run_id. That means a bad insert can write (plan_run_id = run_A, dataset_id = dataset_B) and still satisfy the schema, while your read paths filter on both columns and will silently miss those rows.

Constraint fix
 CREATE TABLE plan_run (
   plan_run_id UUID PRIMARY KEY,
   dataset_id UUID NOT NULL,
   status state NOT NULL,
   created_by TEXT NOT NULL,
   created_at TIMESTAMPTZ NOT NULL,
   modified_by TEXT NOT NULL,
   modified_date TIMESTAMPTZ NOT NULL,
-  FOREIGN KEY(dataset_id) REFERENCES dataset(dataset_id)
+  FOREIGN KEY(dataset_id) REFERENCES dataset(dataset_id),
+  UNIQUE(plan_run_id, dataset_id)
 );
@@
 CREATE TABLE plan_run_data_product (
@@
-  FOREIGN KEY(plan_run_id) REFERENCES plan_run(plan_run_id) ON DELETE CASCADE
+  FOREIGN KEY(plan_run_id, dataset_id)
+    REFERENCES plan_run(plan_run_id, dataset_id)
+    ON DELETE CASCADE
 );
@@
 CREATE TABLE plan_run_dependency (
@@
-  FOREIGN KEY(plan_run_id) REFERENCES plan_run(plan_run_id) ON DELETE CASCADE
+  FOREIGN KEY(plan_run_id, dataset_id)
+    REFERENCES plan_run(plan_run_id, dataset_id)
+    ON DELETE CASCADE
 );
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@migrations/20260410190000_plan_runs.up.sql` around lines 15 - 43, Replace the
single-column foreign key references on plan_run_id with a composite foreign key
(plan_run_id, dataset_id) that references plan_run(plan_run_id, dataset_id) in
both plan_run_data_product and plan_run_dependency so rows cannot pair a
plan_run_id with a different dataset_id; specifically, update the FOREIGN KEY in
plan_run_data_product (currently FOREIGN KEY(plan_run_id) ...) to FOREIGN
KEY(plan_run_id, dataset_id) REFERENCES plan_run(plan_run_id, dataset_id) ON
DELETE CASCADE, and do the same for plan_run_dependency (replace its FOREIGN
KEY(plan_run_id) ... with a composite FK on (plan_run_id, dataset_id)
referencing plan_run), leaving primary keys and other columns unchanged.

92 changes: 92 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"markdownlint-cli2": "^0.18.1"
},
"dependencies": {
"@ebay/nice-dag-core": "^1.0.41",
"@tailwindcss/cli": "^4.1.11",
"@viz-js/viz": "^3.16.0",
"daisyui": "^5.0.50",
Expand Down
Loading