From de7baa08bdf6e7eba6c67c053d9954968466d29c Mon Sep 17 00:00:00 2001 From: Noel Gomez Date: Sun, 15 Mar 2026 10:51:18 -0700 Subject: [PATCH] Feature/add row policies (#274) * enable automatic change tracking with dlt * improve masking policies * update policies and apply to model * add country populations dlt load script * update plan/apply scripts to use account usage * update snowcap resources, and use dbt snowflake native row policies config * apply row access policies conditionally --- load/dlt/country_populations.py | 44 +++ load/dlt/loans_data.py | 8 +- load/dlt/utils/datacoves_utils.py | 15 + secure/snowcap/apply.sh | 140 ++++++--- secure/snowcap/plan.sh | 116 +++++--- secure/snowcap/resources/masking_policies.yml | 10 +- .../object_templates/row_access__roles.yml | 139 +++++++++ .../snowcap/resources/roles__functional.yml | 20 ++ .../resources/row_access__countries.yml | 269 ++++++++++++++++++ .../snowcap/resources/row_access_policies.yml | 17 ++ secure/snowcap/resources/users.yml | 2 + transform/dbt_project.yml | 10 +- transform/macros/snowcap_apply_policies.sql | 59 ++++ .../country_data/stg_country_populations.yml | 7 +- .../stg_country_populations_v1.sql | 4 +- .../stg_country_populations_v2.sql | 4 +- .../stg_country_populations_v3.sql | 4 +- .../L1_inlets/loans/stg_personal_loans.yml | 8 +- transform/package-lock.yml | 7 +- transform/packages.yml | 4 +- 20 files changed, 771 insertions(+), 116 deletions(-) create mode 100755 load/dlt/country_populations.py create mode 100644 secure/snowcap/resources/object_templates/row_access__roles.yml create mode 100644 secure/snowcap/resources/row_access__countries.yml create mode 100644 secure/snowcap/resources/row_access_policies.yml create mode 100644 transform/macros/snowcap_apply_policies.sql diff --git a/load/dlt/country_populations.py b/load/dlt/country_populations.py new file mode 100755 index 000000000..eb2d37296 --- /dev/null +++ b/load/dlt/country_populations.py @@ -0,0 +1,44 @@ +#!/usr/bin/env -S uv run +# /// script +# dependencies = [ +# "dlt[snowflake, parquet]==1.21.0", +# "enlighten~=1.12.4", +# "psutil~=6.0.0", +# "connectorx==0.4.1", +# "pandas==2.2.2", +# ] +# /// +"""Loads world population CSV data to Snowflake RAW database""" +import dlt +import pandas as pd +from utils.datacoves_utils import pipelines_dir + +@dlt.resource(write_disposition="replace", table_name="country_populations") +def country_populations(): + url = "https://raw.githubusercontent.com/datasets/population/master/data/population.csv" + df = pd.read_csv(url) + yield df + +@dlt.source +def country_populations_source(): + return [country_populations] + +if __name__ == "__main__": + datacoves_snowflake = dlt.destinations.snowflake( + destination_name="datacoves_snowflake", + database="raw" + ) + + pipeline = dlt.pipeline( + progress="log", + pipeline_name="world_population_data", + destination=datacoves_snowflake, + pipelines_dir=pipelines_dir, + dataset_name="raw" + ) + + load_info = pipeline.run([ + country_populations_source() + ]) + + print(load_info) diff --git a/load/dlt/loans_data.py b/load/dlt/loans_data.py index d974569ca..c60e1799c 100755 --- a/load/dlt/loans_data.py +++ b/load/dlt/loans_data.py @@ -12,7 +12,7 @@ """Loads a CSV file to Snowflake""" import dlt import pandas as pd -from utils.datacoves_utils import pipelines_dir +from utils.datacoves_utils import pipelines_dir, enable_change_tracking @dlt.resource(write_disposition="replace") def personal_loans(): @@ -28,7 +28,7 @@ def zip_coordinates(): @dlt.source def loans_data(): - return [personal_loans, zip_coordinates] + return [personal_loans(), zip_coordinates()] if __name__ == "__main__": datacoves_snowflake = dlt.destinations.snowflake( @@ -46,5 +46,7 @@ def loans_data(): ) load_info = pipeline.run(loans_data()) - print(load_info) + + # Enable CHANGE_TRACKING for Dynamic Table support + enable_change_tracking(pipeline, ["personal_loans", "zip_coordinates"]) diff --git a/load/dlt/utils/datacoves_utils.py b/load/dlt/utils/datacoves_utils.py index b0cdcf9bf..13e5de93a 100644 --- a/load/dlt/utils/datacoves_utils.py +++ b/load/dlt/utils/datacoves_utils.py @@ -9,3 +9,18 @@ else: pipelines_dir = os.path.join('/tmp','.dlt','pipelines') print(f"pipelines_dir set to: {pipelines_dir}") + + +def enable_change_tracking(pipeline, tables: list[str]): + """Enable CHANGE_TRACKING on Snowflake tables for Dynamic Table support. + + Args: + pipeline: A dlt pipeline instance with a Snowflake destination. + tables: List of table names to enable change tracking on. + """ + with pipeline.sql_client() as client: + for table in tables: + client.execute_sql( + f"ALTER TABLE {pipeline.dataset_name}.{table} SET CHANGE_TRACKING = TRUE" + ) + print(f"CHANGE_TRACKING enabled on: {', '.join(tables)}") diff --git a/secure/snowcap/apply.sh b/secure/snowcap/apply.sh index 5d09b1b38..69c15f5cf 100755 --- a/secure/snowcap/apply.sh +++ b/secure/snowcap/apply.sh @@ -1,63 +1,113 @@ #!/bin/bash -secure_path=/config/workspace/secure/snowcap -cd $secure_path +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +cd "$SCRIPT_DIR" +# Load .env if it exists if [ -f .env ]; then - echo "File .env found." -else - echo "File .env does not exist. Please create a .env file with the following variables:" + set -a + . ./.env + set +a +fi + +# Parse arguments +USE_PII=false +GIT_BRANCH="" +USE_PLAN=false +while [[ $# -gt 0 ]]; do + case $1 in + --pii) + USE_PII=true + shift + ;; + -b|--branch) + GIT_BRANCH="$2" + shift 2 + ;; + --plan) + USE_PLAN=true + break # Stop parsing, rest goes to snowcap (including --plan) + ;; + *) + break # Stop parsing, rest goes to snowcap + ;; + esac +done + +# Validate required variables +missing=() +[ -z "$SNOWFLAKE_ACCOUNT" ] && missing+=("SNOWFLAKE_ACCOUNT") +[ -z "$SNOWFLAKE_USER" ] && missing+=("SNOWFLAKE_USER") +[ -z "$SNOWFLAKE_ROLE" ] && missing+=("SNOWFLAKE_ROLE") +[ -z "$SNOWFLAKE_PRIVATE_KEY_PATH" ] && missing+=("SNOWFLAKE_PRIVATE_KEY_PATH") + +if $USE_PII && [ -z "$SNOWFLAKE_ACCOUNT_PII" ]; then + missing+=("SNOWFLAKE_ACCOUNT_PII") +fi + +if [ ${#missing[@]} -gt 0 ]; then + echo "Error: Missing required environment variables:" + for var in "${missing[@]}"; do + echo " - $var" + done echo "" - echo "SNOWFLAKE_ACCOUNT=" - echo "SNOWFLAKE_ACCOUNT_PII=" - echo "SNOWFLAKE_USER=" - echo "SNOWFLAKE_ROLE=" - echo "SNOWFLAKE_PRIVATE_KEY_PATH=" - echo "SNOWFLAKE_AUTHENTICATOR=SNOWFLAKE_JWT" + echo "Create a .env file with:" echo "" + echo " SNOWFLAKE_ACCOUNT=your_account # Standard account identifier" + echo " SNOWFLAKE_ACCOUNT_PII=your_pii_acct # Enterprise account (for --pii flag)" + echo " SNOWFLAKE_USER=your_user # Service account username" + echo " SNOWFLAKE_ROLE=SECURITYADMIN # Role for applying changes" + echo " SNOWFLAKE_PRIVATE_KEY_PATH=~/.ssh/key # Path to private key" + echo " SNOWFLAKE_AUTHENTICATOR=SNOWFLAKE_JWT" exit 1 fi -# Load env vars safely -set -a -. ./.env -set +a - -# Default to non-PII (standard) account -ACCOUNT_TO_USE="$SNOWFLAKE_ACCOUNT" -EXCLUDE_RESOURCES="" - -# If -pii flag is passed, switch to PII (enterprise) account -if [[ "$1" == "-pii" ]]; then - ACCOUNT_TO_USE="$SNOWFLAKE_ACCOUNT_PII" +# Set account based on --pii flag +if $USE_PII; then + ACCOUNT_TO_USE="$SNOWFLAKE_ACCOUNT_PII" + EXCLUDE_RESOURCES="" + USE_ACCOUNT_USAGE="--use-account-usage" else - # Standard accounts don't support enterprise-only features - EXCLUDE_RESOURCES="--exclude masking_policy,tag,tag_reference,tag_masking_policy_reference" + ACCOUNT_TO_USE="$SNOWFLAKE_ACCOUNT" + EXCLUDE_RESOURCES="--exclude masking_policy,tag,tag_reference,tag_masking_policy_reference,row_access_policy" + USE_ACCOUNT_USAGE="" fi -# Override SNOWFLAKE_ACCOUNT for the snowcap run export SNOWFLAKE_ACCOUNT="$ACCOUNT_TO_USE" +# Build uvx command based on branch +if [ -n "$GIT_BRANCH" ]; then + UVX_CMD="uvx --from snowcap@git+https://github.com/datacoves/snowcap.git@${GIT_BRANCH} --refresh" +else + UVX_CMD="uvx" +fi + echo "==========" echo "Using SNOWFLAKE_ACCOUNT=$SNOWFLAKE_ACCOUNT" -if [[ -n "$EXCLUDE_RESOURCES" ]]; then - echo "Excluding enterprise-only resources (standard account)" +if [ -n "$GIT_BRANCH" ]; then + echo "Using snowcap from branch: $GIT_BRANCH" +fi +if [ -n "$EXCLUDE_RESOURCES" ]; then + echo "Excluding enterprise-only resources (standard account)" +fi +if [ -n "$USE_ACCOUNT_USAGE" ]; then + echo "Using --use-account-usage flag" fi echo "==========" -# SNOWCAP_LOG_LEVEL=DEBUG -# uvx --from snowcap@git+https://github.com/datacoves/snowcap.git@fix_exclude_cli_param \ -# --refresh \ -# snowcap apply \ -# --config resources/ \ -# --sync_resources role,grant,role_grant,warehouse,user,masking_policy,tag,tag_reference,tag_masking_policy_reference \ -# $EXCLUDE_RESOURCES - # --use-account-usage - -uvx snowcap apply \ - --config resources/ \ - --sync_resources role,grant,role_grant,warehouse,database,user,masking_policy,tag,tag_reference,tag_masking_policy_reference \ - $EXCLUDE_RESOURCES - # --use-account-usage - -# uvx --from snowcap@git+https://github.com/datacoves/snowcap.git \ -uvx snowcap --version +# Build config flag (skip if using --plan) +if $USE_PLAN; then + CONFIG_FLAG="" + SYNC_FLAG="" +else + CONFIG_FLAG="--config resources/" + SYNC_FLAG="--sync_resources role,grant,role_grant,warehouse,user,masking_policy,tag,tag_reference,tag_masking_policy_reference,row_access_policy" +fi + +$UVX_CMD snowcap apply \ + $CONFIG_FLAG \ + $SYNC_FLAG \ + $EXCLUDE_RESOURCES \ + $USE_ACCOUNT_USAGE \ + "$@" + +$UVX_CMD snowcap --version diff --git a/secure/snowcap/plan.sh b/secure/snowcap/plan.sh index 399b520d4..d875fe919 100755 --- a/secure/snowcap/plan.sh +++ b/secure/snowcap/plan.sh @@ -1,57 +1,97 @@ #!/bin/bash -secure_path=/config/workspace/secure/snowcap -cd $secure_path +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +cd "$SCRIPT_DIR" +# Load .env if it exists if [ -f .env ]; then - echo "File .env found." -else - echo "File .env does not exist. Please create a .env file with the following variables:" + set -a + . ./.env + set +a +fi + +# Parse arguments +USE_PII=false +GIT_BRANCH="" +while [[ $# -gt 0 ]]; do + case $1 in + --pii) + USE_PII=true + shift + ;; + -b|--branch) + GIT_BRANCH="$2" + shift 2 + ;; + *) + break # Stop parsing, rest goes to snowcap + ;; + esac +done + +# Validate required variables +missing=() +[ -z "$SNOWFLAKE_ACCOUNT" ] && missing+=("SNOWFLAKE_ACCOUNT") +[ -z "$SNOWFLAKE_USER" ] && missing+=("SNOWFLAKE_USER") +[ -z "$SNOWFLAKE_ROLE" ] && missing+=("SNOWFLAKE_ROLE") +[ -z "$SNOWFLAKE_PRIVATE_KEY_PATH" ] && missing+=("SNOWFLAKE_PRIVATE_KEY_PATH") + +if $USE_PII && [ -z "$SNOWFLAKE_ACCOUNT_PII" ]; then + missing+=("SNOWFLAKE_ACCOUNT_PII") +fi + +if [ ${#missing[@]} -gt 0 ]; then + echo "Error: Missing required environment variables:" + for var in "${missing[@]}"; do + echo " - $var" + done echo "" - echo "SNOWFLAKE_ACCOUNT=" - echo "SNOWFLAKE_ACCOUNT_PII=" - echo "SNOWFLAKE_USER=" - echo "SNOWFLAKE_ROLE=" - echo "SNOWFLAKE_PRIVATE_KEY_PATH=" - echo "SNOWFLAKE_AUTHENTICATOR=SNOWFLAKE_JWT" + echo "Create a .env file with:" echo "" + echo " SNOWFLAKE_ACCOUNT=your_account # Standard account identifier" + echo " SNOWFLAKE_ACCOUNT_PII=your_pii_acct # Enterprise account (for --pii flag)" + echo " SNOWFLAKE_USER=your_user # Service account username" + echo " SNOWFLAKE_ROLE=SECURITYADMIN # Role for applying changes" + echo " SNOWFLAKE_PRIVATE_KEY_PATH=~/.ssh/key # Path to private key" + echo " SNOWFLAKE_AUTHENTICATOR=SNOWFLAKE_JWT" exit 1 fi -# Load env vars safely -set -a -. ./.env -set +a - -# Default to non-PII (standard) account -ACCOUNT_TO_USE="$SNOWFLAKE_ACCOUNT" -EXCLUDE_RESOURCES="" - -# If -pii flag is passed, switch to PII (enterprise) account -if [[ "$1" == "-pii" ]]; then - ACCOUNT_TO_USE="$SNOWFLAKE_ACCOUNT_PII" +# Set account based on --pii flag +if $USE_PII; then + ACCOUNT_TO_USE="$SNOWFLAKE_ACCOUNT_PII" + EXCLUDE_RESOURCES="" + USE_ACCOUNT_USAGE="--use-account-usage" else - # Standard accounts don't support enterprise-only features - EXCLUDE_RESOURCES="--exclude masking_policy,tag,tag_reference,tag_masking_policy_reference" + ACCOUNT_TO_USE="$SNOWFLAKE_ACCOUNT" + EXCLUDE_RESOURCES="--exclude masking_policy,tag,tag_reference,tag_masking_policy_reference,row_access_policy" + USE_ACCOUNT_USAGE="" fi -# Override SNOWFLAKE_ACCOUNT for the snowcap run export SNOWFLAKE_ACCOUNT="$ACCOUNT_TO_USE" +# Build uvx command based on branch +if [ -n "$GIT_BRANCH" ]; then + UVX_CMD="uvx --from snowcap@git+https://github.com/datacoves/snowcap.git@${GIT_BRANCH} --refresh" +else + UVX_CMD="uvx" +fi + echo "==========" echo "Using SNOWFLAKE_ACCOUNT=$SNOWFLAKE_ACCOUNT" -if [[ -n "$EXCLUDE_RESOURCES" ]]; then - echo "Excluding enterprise-only resources (standard account)" +if [ -n "$GIT_BRANCH" ]; then + echo "Using snowcap from branch: $GIT_BRANCH" +fi +if [ -n "$EXCLUDE_RESOURCES" ]; then + echo "Excluding enterprise-only resources (standard account)" +fi +if [ -n "$USE_ACCOUNT_USAGE" ]; then + echo "Using --use-account-usage flag" fi echo "==========" -# uvx --from snowcap@git+https://github.com/datacoves/snowcap.git@improve_plan_output \ -# --refresh \ -# snowcap plan \ -# --config resources/ \ -# --sync_resources role,grant,role_grant,warehouse,user,masking_policy,tag,tag_reference,tag_masking_policy_reference \ -# $EXCLUDE_RESOURCES - -uvx snowcap plan \ +$UVX_CMD snowcap plan \ --config resources/ \ - --sync_resources role,grant,role_grant,warehouse,user,masking_policy,tag,tag_reference,tag_masking_policy_reference \ - $EXCLUDE_RESOURCES + --sync_resources role,grant,role_grant,warehouse,user,masking_policy,tag,tag_reference,tag_masking_policy_reference,row_access_policy \ + $EXCLUDE_RESOURCES \ + $USE_ACCOUNT_USAGE \ + "$@" diff --git a/secure/snowcap/resources/masking_policies.yml b/secure/snowcap/resources/masking_policies.yml index a50e4dd7e..384dfbf92 100644 --- a/secure/snowcap/resources/masking_policies.yml +++ b/secure/snowcap/resources/masking_policies.yml @@ -5,7 +5,7 @@ masking_policies: - name: val data_type: VARCHAR returns: VARCHAR - body: | + body: |- CASE WHEN IS_ROLE_IN_SESSION('Z_UNMASK__PII') THEN val ELSE '***MASKED***' @@ -18,7 +18,7 @@ masking_policies: - name: val data_type: NUMBER returns: NUMBER - body: | + body: |- CASE WHEN IS_ROLE_IN_SESSION('Z_UNMASK__PII') THEN val ELSE NULL @@ -31,7 +31,7 @@ masking_policies: - name: val data_type: FLOAT returns: FLOAT - body: | + body: |- CASE WHEN IS_ROLE_IN_SESSION('Z_UNMASK__PII') THEN val ELSE NULL @@ -44,7 +44,7 @@ masking_policies: - name: val data_type: DATE returns: DATE - body: | + body: |- CASE WHEN IS_ROLE_IN_SESSION('Z_UNMASK__PII') THEN val ELSE NULL @@ -57,7 +57,7 @@ masking_policies: - name: val data_type: TIMESTAMP_NTZ returns: TIMESTAMP_NTZ - body: | + body: |- CASE WHEN IS_ROLE_IN_SESSION('Z_UNMASK__PII') THEN val ELSE NULL diff --git a/secure/snowcap/resources/object_templates/row_access__roles.yml b/secure/snowcap/resources/object_templates/row_access__roles.yml new file mode 100644 index 000000000..0051d1fc8 --- /dev/null +++ b/secure/snowcap/resources/object_templates/row_access__roles.yml @@ -0,0 +1,139 @@ +roles: + - for_each: var.country_code__iso_3 + name: "z_row_access__country_code__iso_3__{{ each.value }}" + + # Region roles - for regional access + - name: z_row_access__region__na + - name: z_row_access__region__latam + - name: z_row_access__region__emea + - name: z_row_access__region__apac + +# Hierarchy - grant country roles to region roles +role_grants: + - to_role: z_row_access__region__na + roles: + - z_row_access__country_code__iso_3__can + - z_row_access__country_code__iso_3__usa + - z_row_access__country_code__iso_3__mex + + - to_role: z_row_access__region__latam + roles: + - z_row_access__country_code__iso_3__arg + - z_row_access__country_code__iso_3__bol + - z_row_access__country_code__iso_3__bra + - z_row_access__country_code__iso_3__chl + - z_row_access__country_code__iso_3__col + - z_row_access__country_code__iso_3__cri + - z_row_access__country_code__iso_3__ecu + - z_row_access__country_code__iso_3__gtm + - z_row_access__country_code__iso_3__hnd + - z_row_access__country_code__iso_3__nic + - z_row_access__country_code__iso_3__pan + - z_row_access__country_code__iso_3__per + - z_row_access__country_code__iso_3__pry + - z_row_access__country_code__iso_3__slv + - z_row_access__country_code__iso_3__ury + - z_row_access__country_code__iso_3__ven + + - to_role: z_row_access__region__emea + roles: + - z_row_access__country_code__iso_3__alb + - z_row_access__country_code__iso_3__and + - z_row_access__country_code__iso_3__aut + - z_row_access__country_code__iso_3__bel + - z_row_access__country_code__iso_3__bgr + - z_row_access__country_code__iso_3__bih + - z_row_access__country_code__iso_3__blr + - z_row_access__country_code__iso_3__che + - z_row_access__country_code__iso_3__cyp + - z_row_access__country_code__iso_3__cze + - z_row_access__country_code__iso_3__deu + - z_row_access__country_code__iso_3__dnk + - z_row_access__country_code__iso_3__esp + - z_row_access__country_code__iso_3__est + - z_row_access__country_code__iso_3__fin + - z_row_access__country_code__iso_3__fra + - z_row_access__country_code__iso_3__gbr + - z_row_access__country_code__iso_3__geo + - z_row_access__country_code__iso_3__grc + - z_row_access__country_code__iso_3__hrv + - z_row_access__country_code__iso_3__hun + - z_row_access__country_code__iso_3__irl + - z_row_access__country_code__iso_3__isl + - z_row_access__country_code__iso_3__ita + - z_row_access__country_code__iso_3__kaz + - z_row_access__country_code__iso_3__kgz + - z_row_access__country_code__iso_3__kor + - z_row_access__country_code__iso_3__lbn + - z_row_access__country_code__iso_3__lby + - z_row_access__country_code__iso_3__lka + - z_row_access__country_code__iso_3__ltu + - z_row_access__country_code__iso_3__lux + - z_row_access__country_code__iso_3__lva + - z_row_access__country_code__iso_3__mar + - z_row_access__country_code__iso_3__mda + - z_row_access__country_code__iso_3__mkd + - z_row_access__country_code__iso_3__mne + - z_row_access__country_code__iso_3__nld + - z_row_access__country_code__iso_3__nor + - z_row_access__country_code__iso_3__omn + - z_row_access__country_code__iso_3__pol + - z_row_access__country_code__iso_3__prt + - z_row_access__country_code__iso_3__qat + - z_row_access__country_code__iso_3__rou + - z_row_access__country_code__iso_3__rus + - z_row_access__country_code__iso_3__sau + - z_row_access__country_code__iso_3__sdn + - z_row_access__country_code__iso_3__srb + - z_row_access__country_code__iso_3__svk + - z_row_access__country_code__iso_3__svn + - z_row_access__country_code__iso_3__swe + - z_row_access__country_code__iso_3__syr + - z_row_access__country_code__iso_3__tur + - z_row_access__country_code__iso_3__ukr + + - to_role: z_row_access__region__apac + roles: + - z_row_access__country_code__iso_3__afg + - z_row_access__country_code__iso_3__are + - z_row_access__country_code__iso_3__arm + - z_row_access__country_code__iso_3__aze + - z_row_access__country_code__iso_3__bgd + - z_row_access__country_code__iso_3__brn + - z_row_access__country_code__iso_3__btn + - z_row_access__country_code__iso_3__chn + - z_row_access__country_code__iso_3__fji + - z_row_access__country_code__iso_3__hkg + - z_row_access__country_code__iso_3__idn + - z_row_access__country_code__iso_3__ind + - z_row_access__country_code__iso_3__irn + - z_row_access__country_code__iso_3__irq + - z_row_access__country_code__iso_3__jpn + - z_row_access__country_code__iso_3__khm + - z_row_access__country_code__iso_3__kir + - z_row_access__country_code__iso_3__kor + - z_row_access__country_code__iso_3__lao + - z_row_access__country_code__iso_3__lka + - z_row_access__country_code__iso_3__mac + - z_row_access__country_code__iso_3__mdv + - z_row_access__country_code__iso_3__mmr + - z_row_access__country_code__iso_3__mng + - z_row_access__country_code__iso_3__mnp + - z_row_access__country_code__iso_3__mys + - z_row_access__country_code__iso_3__npl + - z_row_access__country_code__iso_3__nru + - z_row_access__country_code__iso_3__nzl + - z_row_access__country_code__iso_3__pak + - z_row_access__country_code__iso_3__phl + - z_row_access__country_code__iso_3__png + - z_row_access__country_code__iso_3__prk + - z_row_access__country_code__iso_3__sgp + - z_row_access__country_code__iso_3__slb + - z_row_access__country_code__iso_3__tha + - z_row_access__country_code__iso_3__tjk + - z_row_access__country_code__iso_3__tkm + - z_row_access__country_code__iso_3__tls + - z_row_access__country_code__iso_3__ton + - z_row_access__country_code__iso_3__tuv + - z_row_access__country_code__iso_3__vnm + - z_row_access__country_code__iso_3__vut diff --git a/secure/snowcap/resources/roles__functional.yml b/secure/snowcap/resources/roles__functional.yml index 213ea7211..7a6787beb 100644 --- a/secure/snowcap/resources/roles__functional.yml +++ b/secure/snowcap/resources/roles__functional.yml @@ -6,6 +6,9 @@ roles: - name: analyst - name: analyst_pii + - name: de__analyst + - name: de__analyst_pii + - name: loader - name: transformer_dbt @@ -79,6 +82,7 @@ role_grants: # APPLY TAGS - z_tag__apply__pii + - z_rap__apply__country__iso_3 # WAREHOUSES - z_wh__wh_transforming @@ -87,6 +91,10 @@ role_grants: - to_role: analyst roles: - z_base__analyst + - z_row_access__region__na + - z_row_access__region__latam + - z_row_access__region__emea + - z_row_access__region__apac - z_tables_views__select - to_role: analyst_pii @@ -94,6 +102,17 @@ role_grants: - analyst - z_unmask__pii + - to_role: de__analyst + roles: + - z_base__analyst + - z_row_access__country_code__iso_3__deu + - z_tables_views__select + + - to_role: de__analyst_pii + roles: + - de__analyst + - z_unmask__pii + - to_role: catalog roles: - z_wh__wh_catalog @@ -130,6 +149,7 @@ role_grants: - z_stage__balboa_apps__resources__streamlit__write - z_tag__apply__pii + - z_rap__apply__country__iso_3 - z_tables_views__select diff --git a/secure/snowcap/resources/row_access__countries.yml b/secure/snowcap/resources/row_access__countries.yml new file mode 100644 index 000000000..e51278fd9 --- /dev/null +++ b/secure/snowcap/resources/row_access__countries.yml @@ -0,0 +1,269 @@ +vars: + - name: country_code__iso_3 + type: list + default: + - abw + - afe + - afg + - afw + - ago + - alb + - and + - arb + - are + - arg + - arm + - asm + - atg + - aus + - aut + - aze + - bdi + - bel + - ben + - bfa + - bgd + - bgr + - bhr + - bhs + - bih + - blr + - blz + - bmu + - bol + - bra + - brb + - brn + - btn + - bwa + - caf + - can + - ceb + - che + - chi + - chl + - chn + - civ + - cmr + - cod + - cog + - col + - com + - cpv + - cri + - css + - cub + - cuw + - cym + - cyp + - cze + - deu + - dji + - dma + - dnk + - dom + - dza + - eap + - ear + - eas + - eca + - ecs + - ecu + - egy + - emu + - eri + - esp + - est + - eth + - euu + - fcs + - fin + - fji + - fra + - fro + - fsm + - gab + - gbr + - geo + - gha + - gib + - gin + - gmb + - gnb + - gnq + - grc + - grd + - grl + - gtm + - gum + - guy + - hic + - hkg + - hnd + - hpc + - hrv + - hti + - hun + - ibd + - ibt + - ida + - idb + - idn + - idx + - imn + - ind + - irl + - irn + - irq + - isl + - isr + - ita + - jam + - jor + - jpn + - kaz + - ken + - kgz + - khm + - kir + - kna + - kor + - kwt + - lac + - lao + - lbn + - lbr + - lby + - lca + - lcn + - ldc + - lic + - lie + - lka + - lmc + - lmy + - lso + - lte + - ltu + - lux + - lva + - mac + - maf + - mar + - mco + - mda + - mdg + - mdv + - mea + - mex + - mhl + - mic + - mkd + - mli + - mlt + - mmr + - mna + - mne + - mng + - mnp + - moz + - mrt + - mus + - mwi + - mys + - nac + - nam + - ncl + - ner + - nga + - nic + - nld + - nor + - npl + - nru + - nzl + - oed + - omn + - oss + - pak + - pan + - per + - phl + - plw + - png + - pol + - pre + - pri + - prk + - prt + - pry + - pse + - pss + - pst + - pyf + - qat + - rou + - rus + - rwa + - sas + - sau + - sdn + - sen + - sgp + - slb + - sle + - slv + - smr + - som + - srb + - ssa + - ssd + - ssf + - sst + - stp + - sur + - svk + - svn + - swe + - swz + - sxm + - syc + - syr + - tca + - tcd + - tea + - tec + - tgo + - tha + - tjk + - tkm + - tla + - tls + - tmn + - ton + - tsa + - tss + - tto + - tun + - tur + - tuv + - tza + - uga + - ukr + - umc + - ury + - usa + - uzb + - vct + - ven + - vgb + - vir + - vnm + - vut + - wld + - wsm + - xkx + - yem + - zaf + - zmb + - zwe diff --git a/secure/snowcap/resources/row_access_policies.yml b/secure/snowcap/resources/row_access_policies.yml new file mode 100644 index 000000000..9e8cf71a5 --- /dev/null +++ b/secure/snowcap/resources/row_access_policies.yml @@ -0,0 +1,17 @@ +row_access_policies: + - name: governance.policies.rap_country__iso_3 + args: + - name: country_val + data_type: VARCHAR + body: |- + country_val IS NOT NULL + AND IS_ROLE_IN_SESSION('Z_ROW_ACCESS__COUNTRY_CODE__ISO_3__' || UPPER(country_val)) + comment: Filters rows by country based on user role + +roles: + - name: z_rap__apply__country__iso_3 + +grants: + - priv: APPLY + on: row_access_policy governance.policies.rap_country__iso_3 + to: z_rap__apply__country__iso_3 diff --git a/secure/snowcap/resources/users.yml b/secure/snowcap/resources/users.yml index 447310236..3eb4f439b 100644 --- a/secure/snowcap/resources/users.yml +++ b/secure/snowcap/resources/users.yml @@ -65,6 +65,8 @@ role_grants: roles: - analyst - analyst_pii + - de__analyst + - de__analyst_pii - finance_team - loader - transformer_dbt diff --git a/transform/dbt_project.yml b/transform/dbt_project.yml index da72af000..3fc3fecaa 100644 --- a/transform/dbt_project.yml +++ b/transform/dbt_project.yml @@ -131,7 +131,7 @@ models: +post-hook: - > {% if flags.WHICH in ('run', 'build') and target.name == 'prd_pii' %} - {{ dbt_tags.apply_column_tags() }} + {{ snowcap_apply_policies() }} {% endif %} # - "{{ dbt_snow_mask.apply_masking_policy('sources') if target.name == 'prd_pii' }}" # - "{{ dbt_snow_mask.apply_masking_policy('models') if target.name == 'prd_pii' }}" @@ -154,9 +154,11 @@ data_tests: vars: 'dbt_date:time_zone': 'America/Los_Angeles' - dbt_tags__opt_in_default_naming_config: false - dbt_tags__database: "GOVERNANCE" - dbt_tags__schema: "TAGS" + # Snowcap governance macros + snowcap_tag_database: "GOVERNANCE" + snowcap_tag_schema: "TAGS" + snowcap_policy_database: "GOVERNANCE" + snowcap_policy_schema: "POLICIES" create_masking_policy_schema: "false" use_common_masking_policy_db: "true" diff --git a/transform/macros/snowcap_apply_policies.sql b/transform/macros/snowcap_apply_policies.sql new file mode 100644 index 000000000..0580d646d --- /dev/null +++ b/transform/macros/snowcap_apply_policies.sql @@ -0,0 +1,59 @@ +{#- Snowcap dbt Governance Macros -#} +{#- Generated by: snowcap generate dbt-macros -#} +{#- Documentation: https://snowcap.datacoves.com -#} + +{% macro snowcap_apply_policies() %} + {{ snowcap_apply_masking_tags() }} +{% endmacro %} + +{% macro snowcap_apply_masking_tags() %} + {% set tag_db = var('snowcap_tag_database', target.database) %} + {% set tag_schema = var('snowcap_tag_schema', 'TAGS') %} + {% set columns = model.columns.values() if model.columns else [] %} + + {#- Build a map of column -> desired masking tag -#} + {% set desired_tags = {} %} + {% for column in columns %} + {% set meta = column.meta if column.meta is mapping else {} %} + {% set masking_tag = meta.get('masking_tag') %} + {% do desired_tags.update({column.name | upper: masking_tag}) %} + {% endfor %} + + {#- Query current tags on this table's columns from our tag schema -#} + {% set current_tags_query %} + SELECT COLUMN_NAME, TAG_NAME + FROM SNOWFLAKE.ACCOUNT_USAGE.TAG_REFERENCES + WHERE OBJECT_DATABASE = '{{ this.database | upper }}' + AND OBJECT_SCHEMA = '{{ this.schema | upper }}' + AND OBJECT_NAME = '{{ this.identifier | upper }}' + AND DOMAIN = 'COLUMN' + AND TAG_DATABASE = '{{ tag_db | upper }}' + AND TAG_SCHEMA = '{{ tag_schema | upper }}' + {% endset %} + {% set current_tags_result = run_query(current_tags_query) %} + + {#- Unset tags that should no longer be present -#} + {% for row in current_tags_result %} + {% set col_name = row['COLUMN_NAME'] %} + {% set current_tag = row['TAG_NAME'] %} + {% set desired_tag = desired_tags.get(col_name) %} + + {#- If column has no desired tag, or has a different tag, unset current -#} + {% if not desired_tag or desired_tag | upper != current_tag | upper %} + ALTER TABLE {{ this }} MODIFY COLUMN {{ col_name }} + UNSET TAG {{ tag_db }}.{{ tag_schema }}.{{ current_tag }}; + {% endif %} + {% endfor %} + + {#- Set desired masking tags -#} + {% for column in columns %} + {% set meta = column.meta if column.meta is mapping else {} %} + {% set masking_tag = meta.get('masking_tag') %} + + {% if masking_tag %} + ALTER TABLE {{ this }} MODIFY COLUMN {{ column.name }} + SET TAG {{ tag_db }}.{{ tag_schema }}.{{ masking_tag }} = 'true'; + {% endif %} + {% endfor %} +{% endmacro %} + diff --git a/transform/models/L1_inlets/country_data/stg_country_populations.yml b/transform/models/L1_inlets/country_data/stg_country_populations.yml index a261b58be..f3e6fd03b 100644 --- a/transform/models/L1_inlets/country_data/stg_country_populations.yml +++ b/transform/models/L1_inlets/country_data/stg_country_populations.yml @@ -3,6 +3,9 @@ version: 2 models: - name: stg_country_populations description: Contains population information from the Github Datasets repository. + config: + access: public + row_access_policy: "{{ 'governance.policies.rap_country__iso_3 ON (country_code)' if target.name == 'prd_pii' else none }}" data_tests: - dbt_utils.unique_combination_of_columns: arguments: @@ -18,7 +21,7 @@ models: - name: value description: The population value for a particular year and country - name: country_code - description: The ISO 3166-1 alpha-2 code for the country + description: The alpha-3 code for the country latest_version: 2 @@ -37,5 +40,3 @@ models: exclude: [last_year] - name: prior_year description: The prior year for which the population value is recorded - config: - access: public diff --git a/transform/models/L1_inlets/country_data/stg_country_populations_v1.sql b/transform/models/L1_inlets/country_data/stg_country_populations_v1.sql index bb6c58aef..bea13e07b 100644 --- a/transform/models/L1_inlets/country_data/stg_country_populations_v1.sql +++ b/transform/models/L1_inlets/country_data/stg_country_populations_v1.sql @@ -9,9 +9,9 @@ final as ( select year, - "COUNTRY NAME" as country_name, + "COUNTRY_NAME" as country_name, value, - "COUNTRY CODE" as country_code + "COUNTRY_CODE" as country_code from raw_source diff --git a/transform/models/L1_inlets/country_data/stg_country_populations_v2.sql b/transform/models/L1_inlets/country_data/stg_country_populations_v2.sql index eac5a2d04..f5aa38e30 100644 --- a/transform/models/L1_inlets/country_data/stg_country_populations_v2.sql +++ b/transform/models/L1_inlets/country_data/stg_country_populations_v2.sql @@ -10,9 +10,9 @@ final as ( select year, year - 1 as last_year, - "COUNTRY NAME" as country_name, + "COUNTRY_NAME" as country_name, value, - "COUNTRY CODE" as country_code + "COUNTRY_CODE" as country_code from raw_source diff --git a/transform/models/L1_inlets/country_data/stg_country_populations_v3.sql b/transform/models/L1_inlets/country_data/stg_country_populations_v3.sql index fd55ff035..4038da1f0 100644 --- a/transform/models/L1_inlets/country_data/stg_country_populations_v3.sql +++ b/transform/models/L1_inlets/country_data/stg_country_populations_v3.sql @@ -10,9 +10,9 @@ final as ( select year, year - 1 as prior_year, - "COUNTRY NAME" as country_name, + "COUNTRY_NAME" as country_name, value, - "COUNTRY CODE" as country_code + "COUNTRY_CODE" as country_code from raw_source diff --git a/transform/models/L1_inlets/loans/stg_personal_loans.yml b/transform/models/L1_inlets/loans/stg_personal_loans.yml index 539b02893..74ccf3daf 100644 --- a/transform/models/L1_inlets/loans/stg_personal_loans.yml +++ b/transform/models/L1_inlets/loans/stg_personal_loans.yml @@ -12,12 +12,12 @@ models: # config: # store_failures_as: view description: The state in which the borrower resides - tags: - - pii + meta: + masking_tag: pii - name: annual_inc description: The borrower's annual income - tags: - - pii + meta: + masking_tag: pii - name: collections_12_mths_ex_med description: Number of collections in the last 12 months excluding medical collections diff --git a/transform/package-lock.yml b/transform/package-lock.yml index 75f58d15b..70e2163cc 100644 --- a/transform/package-lock.yml +++ b/transform/package-lock.yml @@ -10,11 +10,8 @@ packages: version: 0.2.8 - name: dbt_external_tables package: dbt-labs/dbt_external_tables - version: 0.12.0 - - name: dbt_tags - package: infinitelambda/dbt_tags - version: 1.9.0 + version: 0.12.1 - name: dbt_date package: godatadriven/dbt_date version: 0.17.1 -sha1_hash: 46471b8f9824cdf17d80d34f2b94ad2d2662d5b5 +sha1_hash: c22f502b6cd00019053b254ea09eba149cdd1379 diff --git a/transform/packages.yml b/transform/packages.yml index e4d9e4db1..a1aabbc04 100644 --- a/transform/packages.yml +++ b/transform/packages.yml @@ -7,7 +7,7 @@ packages: version: 0.2.8 # for the latest version tag - package: dbt-labs/dbt_external_tables - version: 0.12.0 + version: 0.12.1 # for the latest version tag # - package: data-mie/dbt_profiler # version: 0.8.4 @@ -15,5 +15,3 @@ packages: # version: 0.12.1 # - package: elementary-data/elementary # version: 0.16.2 - - package: infinitelambda/dbt_tags - version: 1.9.0