From bc922f2eb73f0a1de32a602fc9b664b90e9110f6 Mon Sep 17 00:00:00 2001 From: Noel Gomez Date: Tue, 10 Mar 2026 15:24:46 -0700 Subject: [PATCH 1/2] Feature/snowcap improvements (#272) * add claude code install info * add support for tag based masking policies * add tags to dbt model cols --- secure/snowcap/.env.sample | 1 + secure/snowcap/apply.sh | 33 +++++++++- secure/snowcap/plan.sh | 33 +++++++++- secure/snowcap/resources/databases.yml | 3 + secure/snowcap/resources/masking_policies.yml | 65 +++++++++++++++++++ .../resources/object_templates/user.yml | 7 ++ secure/snowcap/resources/roles__base.yml | 11 ++++ .../snowcap/resources/roles__functional.yml | 19 ++++++ secure/snowcap/resources/schemas.yml | 4 ++ .../resources/tag_masking_policies.yml | 16 +++++ secure/snowcap/resources/tags.yml | 11 ++++ secure/snowcap/resources/users.yml | 43 ++++++++++++ training_and_demos/claude/readme.md | 1 + transform/dbt_project.yml | 12 +++- .../L1_inlets/loans/stg_personal_loans.yml | 4 ++ transform/package-lock.yml | 5 +- transform/packages.yml | 2 + 17 files changed, 261 insertions(+), 9 deletions(-) create mode 100644 secure/snowcap/resources/masking_policies.yml create mode 100644 secure/snowcap/resources/object_templates/user.yml create mode 100644 secure/snowcap/resources/tag_masking_policies.yml create mode 100644 secure/snowcap/resources/tags.yml create mode 100644 training_and_demos/claude/readme.md diff --git a/secure/snowcap/.env.sample b/secure/snowcap/.env.sample index dbd2d7eb6..55884aad9 100644 --- a/secure/snowcap/.env.sample +++ b/secure/snowcap/.env.sample @@ -1,4 +1,5 @@ SNOWFLAKE_ACCOUNT= +SNOWFLAKE_ACCOUNT_PII= SNOWFLAKE_USER= SNOWFLAKE_ROLE=securityadmin SNOWFLAKE_PRIVATE_KEY_PATH=/config/.ssl/dev-private.pem diff --git a/secure/snowcap/apply.sh b/secure/snowcap/apply.sh index 58c1f3338..32ed9d3e8 100755 --- a/secure/snowcap/apply.sh +++ b/secure/snowcap/apply.sh @@ -8,6 +8,7 @@ else echo "File .env does not exist. Please create a .env file with the following variables:" echo "" echo "SNOWFLAKE_ACCOUNT=" + echo "SNOWFLAKE_ACCOUNT_PII=" echo "SNOWFLAKE_USER=" echo "SNOWFLAKE_ROLE=" echo "SNOWFLAKE_PRIVATE_KEY_PATH=" @@ -16,14 +17,40 @@ else exit 1 fi -export $(cat .env | xargs) +# Load env vars safely +set -a +. ./.env +set +a + +# Default to non-PII (standard) account +ACCOUNT_TO_USE="$SNOWFLAKE_ACCOUNT" +EXCLUDE_RESOURCES="" + +# If -pii flag is passed, switch to PII (enterprise) account +if [[ "$1" == "-pii" ]]; then + ACCOUNT_TO_USE="$SNOWFLAKE_ACCOUNT_PII" +else + # Standard accounts don't support enterprise-only features + EXCLUDE_RESOURCES="--exclude masking_policy,tag,tag_reference,tag_masking_policy_reference" +fi + +# Override SNOWFLAKE_ACCOUNT for the snowcap run +export SNOWFLAKE_ACCOUNT="$ACCOUNT_TO_USE" + +echo "==========" +echo "Using SNOWFLAKE_ACCOUNT=$SNOWFLAKE_ACCOUNT" +if [[ -n "$EXCLUDE_RESOURCES" ]]; then + echo "Excluding enterprise-only resources (standard account)" +fi +echo "==========" # SNOWCAP_LOG_LEVEL=DEBUG -uvx --from snowcap@git+https://github.com/datacoves/snowcap.git \ +uvx --from snowcap@git+https://github.com/datacoves/snowcap.git@masking_policies_support \ --refresh \ snowcap apply \ --config resources/ \ - --sync_resources role,grant,role_grant \ + --sync_resources role,grant,role_grant,warehouse,database,user,masking_policy,tag,tag_reference,tag_masking_policy_reference \ + $EXCLUDE_RESOURCES # --use-account-usage diff --git a/secure/snowcap/plan.sh b/secure/snowcap/plan.sh index 71e6b2371..04d3f5d49 100755 --- a/secure/snowcap/plan.sh +++ b/secure/snowcap/plan.sh @@ -8,6 +8,7 @@ else echo "File .env does not exist. Please create a .env file with the following variables:" echo "" echo "SNOWFLAKE_ACCOUNT=" + echo "SNOWFLAKE_ACCOUNT_PII=" echo "SNOWFLAKE_USER=" echo "SNOWFLAKE_ROLE=" echo "SNOWFLAKE_PRIVATE_KEY_PATH=" @@ -16,10 +17,36 @@ else exit 1 fi -export $(cat .env | xargs) +# Load env vars safely +set -a +. ./.env +set +a -uvx --from snowcap@git+https://github.com/datacoves/snowcap.git \ +# Default to non-PII (standard) account +ACCOUNT_TO_USE="$SNOWFLAKE_ACCOUNT" +EXCLUDE_RESOURCES="" + +# If -pii flag is passed, switch to PII (enterprise) account +if [[ "$1" == "-pii" ]]; then + ACCOUNT_TO_USE="$SNOWFLAKE_ACCOUNT_PII" +else + # Standard accounts don't support enterprise-only features + EXCLUDE_RESOURCES="--exclude masking_policy,tag,tag_reference,tag_masking_policy_reference" +fi + +# Override SNOWFLAKE_ACCOUNT for the snowcap run +export SNOWFLAKE_ACCOUNT="$ACCOUNT_TO_USE" + +echo "==========" +echo "Using SNOWFLAKE_ACCOUNT=$SNOWFLAKE_ACCOUNT" +if [[ -n "$EXCLUDE_RESOURCES" ]]; then + echo "Excluding enterprise-only resources (standard account)" +fi +echo "==========" + +uvx --from snowcap@git+https://github.com/datacoves/snowcap.git@masking_policies_support \ --refresh \ snowcap plan \ --config resources/ \ - --sync_resources role,grant,role_grant + --sync_resources role,grant,role_grant,warehouse,database,user,masking_policy,tag,tag_reference,tag_masking_policy_reference \ + $EXCLUDE_RESOURCES diff --git a/secure/snowcap/resources/databases.yml b/secure/snowcap/resources/databases.yml index 7f4f7aa31..7a080873d 100644 --- a/secure/snowcap/resources/databases.yml +++ b/secure/snowcap/resources/databases.yml @@ -2,6 +2,9 @@ vars: - name: databases type: list default: + - name: governance + owner: sysadmin + max_data_extension_time_in_days: 30 - name: raw owner: loader max_data_extension_time_in_days: 10 diff --git a/secure/snowcap/resources/masking_policies.yml b/secure/snowcap/resources/masking_policies.yml new file mode 100644 index 000000000..a50e4dd7e --- /dev/null +++ b/secure/snowcap/resources/masking_policies.yml @@ -0,0 +1,65 @@ +masking_policies: + # String/VARCHAR columns + - name: governance.policies.mask_pii_string + args: + - name: val + data_type: VARCHAR + returns: VARCHAR + body: | + CASE + WHEN IS_ROLE_IN_SESSION('Z_UNMASK__PII') THEN val + ELSE '***MASKED***' + END + comment: Masks PII string data + + # Numeric columns + - name: governance.policies.mask_pii_number + args: + - name: val + data_type: NUMBER + returns: NUMBER + body: | + CASE + WHEN IS_ROLE_IN_SESSION('Z_UNMASK__PII') THEN val + ELSE NULL + END + comment: Masks PII numeric data + + # Float columns + - name: governance.policies.mask_pii_float + args: + - name: val + data_type: FLOAT + returns: FLOAT + body: | + CASE + WHEN IS_ROLE_IN_SESSION('Z_UNMASK__PII') THEN val + ELSE NULL + END + comment: Masks PII float data + + # Date columns + - name: governance.policies.mask_pii_date + args: + - name: val + data_type: DATE + returns: DATE + body: | + CASE + WHEN IS_ROLE_IN_SESSION('Z_UNMASK__PII') THEN val + ELSE NULL + END + comment: Masks PII date data + + # Timestamp columns + - name: governance.policies.mask_pii_timestamp + args: + - name: val + data_type: TIMESTAMP_NTZ + returns: TIMESTAMP_NTZ + body: | + CASE + WHEN IS_ROLE_IN_SESSION('Z_UNMASK__PII') THEN val + ELSE NULL + END + comment: Masks PII timestamp data diff --git a/secure/snowcap/resources/object_templates/user.yml b/secure/snowcap/resources/object_templates/user.yml new file mode 100644 index 000000000..8054136e3 --- /dev/null +++ b/secure/snowcap/resources/object_templates/user.yml @@ -0,0 +1,7 @@ +# Warehouses +users: + - for_each: var.users + name: "{{ each.value.name }}" + owner: SECURITYADMIN + type: "{{ each.value.type }}" + default_secondary_roles: [] diff --git a/secure/snowcap/resources/roles__base.yml b/secure/snowcap/resources/roles__base.yml index 81033aa56..665b31d89 100644 --- a/secure/snowcap/resources/roles__base.yml +++ b/secure/snowcap/resources/roles__base.yml @@ -2,12 +2,17 @@ roles: - name: z_account__create_database - name: z_db__tst_balboa # Used to grant access to TST databases - name: z_db__balboa_dev__create_schema + - name: z_schemas__db__raw + - name: z_schemas__db__governance + - name: z_tables_views__select - name: z_policy_row_region_de - name: z_policy_row_region_all + - name: z_unmask__pii + grants: - priv: "CREATE DATABASE" on: "ACCOUNT" @@ -23,6 +28,12 @@ grants: - "future schemas in database raw" to: z_schemas__db__raw + - priv: "USAGE" + on: + - "all schemas in database governance" + - "future schemas in database governance" + to: z_schemas__db__governance + - for_each: var.databases priv: "SELECT" on: diff --git a/secure/snowcap/resources/roles__functional.yml b/secure/snowcap/resources/roles__functional.yml index ed802d073..213ea7211 100644 --- a/secure/snowcap/resources/roles__functional.yml +++ b/secure/snowcap/resources/roles__functional.yml @@ -5,6 +5,7 @@ roles: - name: bot_integration - name: analyst + - name: analyst_pii - name: loader - name: transformer_dbt @@ -23,6 +24,10 @@ role_grants: - z_db__raw - z_schemas__db__raw + # GOVERNANCE + - z_db__governance + - z_schemas__db__governance + # BALBOA - z_db__balboa - z_schema__dbt_test__audit @@ -72,6 +77,9 @@ role_grants: # STAGES - z_stage__raw__dbt_artifacts__artifacts__read + # APPLY TAGS + - z_tag__apply__pii + # WAREHOUSES - z_wh__wh_transforming - z_wh__wh_transforming_dynamic_tables @@ -81,6 +89,11 @@ role_grants: - z_base__analyst - z_tables_views__select + - to_role: analyst_pii + roles: + - analyst + - z_unmask__pii + - to_role: catalog roles: - z_wh__wh_catalog @@ -100,6 +113,10 @@ role_grants: - z_db__raw - z_schemas__db__raw + # GOVERNANCE + - z_db__governance + - z_schemas__db__governance + # Shared DBs - z_db__covid19_epidemiological_data - z_db__snowflake @@ -112,6 +129,8 @@ role_grants: - z_stage__raw__dbt_artifacts__artifacts__write - z_stage__balboa_apps__resources__streamlit__write + - z_tag__apply__pii + - z_tables_views__select - to_role: bot_integration diff --git a/secure/snowcap/resources/schemas.yml b/secure/snowcap/resources/schemas.yml index a713ee292..96f9e4570 100644 --- a/secure/snowcap/resources/schemas.yml +++ b/secure/snowcap/resources/schemas.yml @@ -9,6 +9,10 @@ vars: - name: RAW.SNAPSHOTS owner: transformer_dbt + # GOVERNANCE DB + - name: GOVERNANCE.TAGS + - name: GOVERNANCE.POLICIES + # BALBOA DB - name: BALBOA.DBT_TEST__AUDIT diff --git a/secure/snowcap/resources/tag_masking_policies.yml b/secure/snowcap/resources/tag_masking_policies.yml new file mode 100644 index 000000000..0cc120eea --- /dev/null +++ b/secure/snowcap/resources/tag_masking_policies.yml @@ -0,0 +1,16 @@ +tag_masking_policy_references: + # Use fully qualified names: database.schema.name + - tag_name: governance.tags.pii + masking_policy_name: governance.policies.mask_pii_string + + - tag_name: governance.tags.pii + masking_policy_name: governance.policies.mask_pii_number + + - tag_name: governance.tags.pii + masking_policy_name: governance.policies.mask_pii_float + + - tag_name: governance.tags.pii + masking_policy_name: governance.policies.mask_pii_date + + - tag_name: governance.tags.pii + masking_policy_name: governance.policies.mask_pii_timestamp diff --git a/secure/snowcap/resources/tags.yml b/secure/snowcap/resources/tags.yml new file mode 100644 index 000000000..eb69f95f8 --- /dev/null +++ b/secure/snowcap/resources/tags.yml @@ -0,0 +1,11 @@ +tags: + - name: governance.tags.pii + comment: Personally Identifiable Information + +roles: + - name: z_tag__apply__pii + +grants: + - priv: APPLY + on: tag governance.tags.pii + to: z_tag__apply__pii diff --git a/secure/snowcap/resources/users.yml b/secure/snowcap/resources/users.yml index edfce2f3d..900728612 100644 --- a/secure/snowcap/resources/users.yml +++ b/secure/snowcap/resources/users.yml @@ -1,3 +1,45 @@ +# Users +vars: + - name: users + type: list + default: + - name: alejandro + type: PERSON + - name: fernando + type: PERSON + - name: jesus + type: PERSON + - name: ian + type: PERSON + - name: gomezn + type: PERSON + - name: sebastian + type: PERSON + - name: stephen + type: PERSON + + # Service Accounts + - name: svc_airbyte + type: SERVICE + - name: svc_airflow + type: SERVICE + - name: svc_datacoves + type: SERVICE + - name: svc_fivetran + type: SERVICE + - name: svc_github_actions + type: SERVICE + - name: svc_balboa_ci + type: SERVICE + - name: svc_dbt_coves_ci + type: SERVICE + - name: svc_datacoves_platform_ci + type: SERVICE + - name: svc_datahub + type: SERVICE + - name: svc_superset + type: LEGACY_SERVICE + role_grants: - to_user: alejandro roles: @@ -22,6 +64,7 @@ role_grants: - to_user: gomezn roles: - analyst + - analyst_pii - finance_team - loader - transformer_dbt diff --git a/training_and_demos/claude/readme.md b/training_and_demos/claude/readme.md new file mode 100644 index 000000000..6d43a8e4c --- /dev/null +++ b/training_and_demos/claude/readme.md @@ -0,0 +1 @@ +curl -fsSL https://claude.ai/install.sh | bash diff --git a/transform/dbt_project.yml b/transform/dbt_project.yml index 4eae35b7c..9f2802cb9 100644 --- a/transform/dbt_project.yml +++ b/transform/dbt_project.yml @@ -129,8 +129,12 @@ models: +transient: "{{ 'false' if target.name == 'prd' else 'true' }}" +post-hook: - - "{{ dbt_snow_mask.apply_masking_policy('sources') if target.name == 'prd_pii' }}" - - "{{ dbt_snow_mask.apply_masking_policy('models') if target.name == 'prd_pii' }}" + - > + {% if flags.WHICH in ('run', 'build') and target.name == 'prd_pii' %} + {{ dbt_tags.apply_column_tags() }} + {% endif %} + # - "{{ dbt_snow_mask.apply_masking_policy('sources') if target.name == 'prd_pii' }}" + # - "{{ dbt_snow_mask.apply_masking_policy('models') if target.name == 'prd_pii' }}" # This macro creates a non_versioned view of a versioned dbt model # Useful if you want to have a versioned mart but dont want BI tool to break if version # is updated. @@ -150,6 +154,10 @@ data_tests: vars: 'dbt_date:time_zone': 'America/Los_Angeles' + dbt_tags__opt_in_default_naming_config: false + dbt_tags__database: "GOVERNANCE" + dbt_tags__schema: "TAGS" + create_masking_policy_schema: "false" use_common_masking_policy_db: "true" common_masking_policy_db: "BALBOA" diff --git a/transform/models/L1_inlets/loans/stg_personal_loans.yml b/transform/models/L1_inlets/loans/stg_personal_loans.yml index cecd78452..539b02893 100644 --- a/transform/models/L1_inlets/loans/stg_personal_loans.yml +++ b/transform/models/L1_inlets/loans/stg_personal_loans.yml @@ -12,8 +12,12 @@ models: # config: # store_failures_as: view description: The state in which the borrower resides + tags: + - pii - name: annual_inc description: The borrower's annual income + tags: + - pii - name: collections_12_mths_ex_med description: Number of collections in the last 12 months excluding medical collections diff --git a/transform/package-lock.yml b/transform/package-lock.yml index 4d8a47e5f..75f58d15b 100644 --- a/transform/package-lock.yml +++ b/transform/package-lock.yml @@ -11,7 +11,10 @@ packages: - name: dbt_external_tables package: dbt-labs/dbt_external_tables version: 0.12.0 + - name: dbt_tags + package: infinitelambda/dbt_tags + version: 1.9.0 - name: dbt_date package: godatadriven/dbt_date version: 0.17.1 -sha1_hash: 095ba32192569d9d62e26f285b16b540e24b633f +sha1_hash: 46471b8f9824cdf17d80d34f2b94ad2d2662d5b5 diff --git a/transform/packages.yml b/transform/packages.yml index 69e04f5ff..e4d9e4db1 100644 --- a/transform/packages.yml +++ b/transform/packages.yml @@ -15,3 +15,5 @@ packages: # version: 0.12.1 # - package: elementary-data/elementary # version: 0.16.2 + - package: infinitelambda/dbt_tags + version: 1.9.0 From a3daa9b3933ae9293f9b59f1f70f655fc4652cae Mon Sep 17 00:00:00 2001 From: Noel Gomez Date: Tue, 10 Mar 2026 16:11:48 -0700 Subject: [PATCH 2/2] update plan and apply to use pypi version of snowcap --- secure/snowcap/apply.sh | 17 +++++++++++------ secure/snowcap/plan.sh | 13 +++++++++---- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/secure/snowcap/apply.sh b/secure/snowcap/apply.sh index 32ed9d3e8..5d09b1b38 100755 --- a/secure/snowcap/apply.sh +++ b/secure/snowcap/apply.sh @@ -45,14 +45,19 @@ fi echo "==========" # SNOWCAP_LOG_LEVEL=DEBUG -uvx --from snowcap@git+https://github.com/datacoves/snowcap.git@masking_policies_support \ - --refresh \ - snowcap apply \ +# uvx --from snowcap@git+https://github.com/datacoves/snowcap.git@fix_exclude_cli_param \ +# --refresh \ +# snowcap apply \ +# --config resources/ \ +# --sync_resources role,grant,role_grant,warehouse,user,masking_policy,tag,tag_reference,tag_masking_policy_reference \ +# $EXCLUDE_RESOURCES + # --use-account-usage + +uvx snowcap apply \ --config resources/ \ --sync_resources role,grant,role_grant,warehouse,database,user,masking_policy,tag,tag_reference,tag_masking_policy_reference \ $EXCLUDE_RESOURCES # --use-account-usage - -uvx --from snowcap@git+https://github.com/datacoves/snowcap.git \ - snowcap --version +# uvx --from snowcap@git+https://github.com/datacoves/snowcap.git \ +uvx snowcap --version diff --git a/secure/snowcap/plan.sh b/secure/snowcap/plan.sh index 04d3f5d49..b3a1e1a4d 100755 --- a/secure/snowcap/plan.sh +++ b/secure/snowcap/plan.sh @@ -44,9 +44,14 @@ if [[ -n "$EXCLUDE_RESOURCES" ]]; then fi echo "==========" -uvx --from snowcap@git+https://github.com/datacoves/snowcap.git@masking_policies_support \ - --refresh \ - snowcap plan \ +# uvx --from snowcap@git+https://github.com/datacoves/snowcap.git@fix_exclude_cli_param \ +# --refresh \ +# snowcap plan \ +# --config resources/ \ +# --sync_resources role,grant,role_grant,warehouse,user,masking_policy,tag,tag_reference,tag_masking_policy_reference \ +# $EXCLUDE_RESOURCES + +uvx snowcap plan \ --config resources/ \ - --sync_resources role,grant,role_grant,warehouse,database,user,masking_policy,tag,tag_reference,tag_masking_policy_reference \ + --sync_resources role,grant,role_grant,warehouse,user,masking_policy,tag,tag_reference,tag_masking_policy_reference \ $EXCLUDE_RESOURCES