diff --git a/secure/snowcap/.env.sample b/secure/snowcap/.env.sample index dbd2d7eb..55884aad 100644 --- a/secure/snowcap/.env.sample +++ b/secure/snowcap/.env.sample @@ -1,4 +1,5 @@ SNOWFLAKE_ACCOUNT= +SNOWFLAKE_ACCOUNT_PII= SNOWFLAKE_USER= SNOWFLAKE_ROLE=securityadmin SNOWFLAKE_PRIVATE_KEY_PATH=/config/.ssl/dev-private.pem diff --git a/secure/snowcap/apply.sh b/secure/snowcap/apply.sh index 58c1f333..5d09b1b3 100755 --- a/secure/snowcap/apply.sh +++ b/secure/snowcap/apply.sh @@ -8,6 +8,7 @@ else echo "File .env does not exist. Please create a .env file with the following variables:" echo "" echo "SNOWFLAKE_ACCOUNT=" + echo "SNOWFLAKE_ACCOUNT_PII=" echo "SNOWFLAKE_USER=" echo "SNOWFLAKE_ROLE=" echo "SNOWFLAKE_PRIVATE_KEY_PATH=" @@ -16,16 +17,47 @@ else exit 1 fi -export $(cat .env | xargs) +# Load env vars safely +set -a +. ./.env +set +a + +# Default to non-PII (standard) account +ACCOUNT_TO_USE="$SNOWFLAKE_ACCOUNT" +EXCLUDE_RESOURCES="" + +# If -pii flag is passed, switch to PII (enterprise) account +if [[ "$1" == "-pii" ]]; then + ACCOUNT_TO_USE="$SNOWFLAKE_ACCOUNT_PII" +else + # Standard accounts don't support enterprise-only features + EXCLUDE_RESOURCES="--exclude masking_policy,tag,tag_reference,tag_masking_policy_reference" +fi + +# Override SNOWFLAKE_ACCOUNT for the snowcap run +export SNOWFLAKE_ACCOUNT="$ACCOUNT_TO_USE" + +echo "==========" +echo "Using SNOWFLAKE_ACCOUNT=$SNOWFLAKE_ACCOUNT" +if [[ -n "$EXCLUDE_RESOURCES" ]]; then + echo "Excluding enterprise-only resources (standard account)" +fi +echo "==========" # SNOWCAP_LOG_LEVEL=DEBUG -uvx --from snowcap@git+https://github.com/datacoves/snowcap.git \ - --refresh \ - snowcap apply \ - --config resources/ \ - --sync_resources role,grant,role_grant \ +# uvx --from snowcap@git+https://github.com/datacoves/snowcap.git@fix_exclude_cli_param \ +# --refresh \ +# snowcap apply \ +# --config resources/ \ +# --sync_resources role,grant,role_grant,warehouse,user,masking_policy,tag,tag_reference,tag_masking_policy_reference \ +# $EXCLUDE_RESOURCES # --use-account-usage +uvx snowcap apply \ + --config resources/ \ + --sync_resources role,grant,role_grant,warehouse,database,user,masking_policy,tag,tag_reference,tag_masking_policy_reference \ + $EXCLUDE_RESOURCES + # --use-account-usage -uvx --from snowcap@git+https://github.com/datacoves/snowcap.git \ - snowcap --version +# uvx --from snowcap@git+https://github.com/datacoves/snowcap.git \ +uvx snowcap --version diff --git a/secure/snowcap/plan.sh b/secure/snowcap/plan.sh index 71e6b237..b3a1e1a4 100755 --- a/secure/snowcap/plan.sh +++ b/secure/snowcap/plan.sh @@ -8,6 +8,7 @@ else echo "File .env does not exist. Please create a .env file with the following variables:" echo "" echo "SNOWFLAKE_ACCOUNT=" + echo "SNOWFLAKE_ACCOUNT_PII=" echo "SNOWFLAKE_USER=" echo "SNOWFLAKE_ROLE=" echo "SNOWFLAKE_PRIVATE_KEY_PATH=" @@ -16,10 +17,41 @@ else exit 1 fi -export $(cat .env | xargs) +# Load env vars safely +set -a +. ./.env +set +a -uvx --from snowcap@git+https://github.com/datacoves/snowcap.git \ - --refresh \ - snowcap plan \ +# Default to non-PII (standard) account +ACCOUNT_TO_USE="$SNOWFLAKE_ACCOUNT" +EXCLUDE_RESOURCES="" + +# If -pii flag is passed, switch to PII (enterprise) account +if [[ "$1" == "-pii" ]]; then + ACCOUNT_TO_USE="$SNOWFLAKE_ACCOUNT_PII" +else + # Standard accounts don't support enterprise-only features + EXCLUDE_RESOURCES="--exclude masking_policy,tag,tag_reference,tag_masking_policy_reference" +fi + +# Override SNOWFLAKE_ACCOUNT for the snowcap run +export SNOWFLAKE_ACCOUNT="$ACCOUNT_TO_USE" + +echo "==========" +echo "Using SNOWFLAKE_ACCOUNT=$SNOWFLAKE_ACCOUNT" +if [[ -n "$EXCLUDE_RESOURCES" ]]; then + echo "Excluding enterprise-only resources (standard account)" +fi +echo "==========" + +# uvx --from snowcap@git+https://github.com/datacoves/snowcap.git@fix_exclude_cli_param \ +# --refresh \ +# snowcap plan \ +# --config resources/ \ +# --sync_resources role,grant,role_grant,warehouse,user,masking_policy,tag,tag_reference,tag_masking_policy_reference \ +# $EXCLUDE_RESOURCES + +uvx snowcap plan \ --config resources/ \ - --sync_resources role,grant,role_grant + --sync_resources role,grant,role_grant,warehouse,user,masking_policy,tag,tag_reference,tag_masking_policy_reference \ + $EXCLUDE_RESOURCES diff --git a/secure/snowcap/resources/databases.yml b/secure/snowcap/resources/databases.yml index 7f4f7aa3..7a080873 100644 --- a/secure/snowcap/resources/databases.yml +++ b/secure/snowcap/resources/databases.yml @@ -2,6 +2,9 @@ vars: - name: databases type: list default: + - name: governance + owner: sysadmin + max_data_extension_time_in_days: 30 - name: raw owner: loader max_data_extension_time_in_days: 10 diff --git a/secure/snowcap/resources/masking_policies.yml b/secure/snowcap/resources/masking_policies.yml new file mode 100644 index 00000000..a50e4dd7 --- /dev/null +++ b/secure/snowcap/resources/masking_policies.yml @@ -0,0 +1,65 @@ +masking_policies: + # String/VARCHAR columns + - name: governance.policies.mask_pii_string + args: + - name: val + data_type: VARCHAR + returns: VARCHAR + body: | + CASE + WHEN IS_ROLE_IN_SESSION('Z_UNMASK__PII') THEN val + ELSE '***MASKED***' + END + comment: Masks PII string data + + # Numeric columns + - name: governance.policies.mask_pii_number + args: + - name: val + data_type: NUMBER + returns: NUMBER + body: | + CASE + WHEN IS_ROLE_IN_SESSION('Z_UNMASK__PII') THEN val + ELSE NULL + END + comment: Masks PII numeric data + + # Float columns + - name: governance.policies.mask_pii_float + args: + - name: val + data_type: FLOAT + returns: FLOAT + body: | + CASE + WHEN IS_ROLE_IN_SESSION('Z_UNMASK__PII') THEN val + ELSE NULL + END + comment: Masks PII float data + + # Date columns + - name: governance.policies.mask_pii_date + args: + - name: val + data_type: DATE + returns: DATE + body: | + CASE + WHEN IS_ROLE_IN_SESSION('Z_UNMASK__PII') THEN val + ELSE NULL + END + comment: Masks PII date data + + # Timestamp columns + - name: governance.policies.mask_pii_timestamp + args: + - name: val + data_type: TIMESTAMP_NTZ + returns: TIMESTAMP_NTZ + body: | + CASE + WHEN IS_ROLE_IN_SESSION('Z_UNMASK__PII') THEN val + ELSE NULL + END + comment: Masks PII timestamp data diff --git a/secure/snowcap/resources/object_templates/user.yml b/secure/snowcap/resources/object_templates/user.yml new file mode 100644 index 00000000..8054136e --- /dev/null +++ b/secure/snowcap/resources/object_templates/user.yml @@ -0,0 +1,7 @@ +# Warehouses +users: + - for_each: var.users + name: "{{ each.value.name }}" + owner: SECURITYADMIN + type: "{{ each.value.type }}" + default_secondary_roles: [] diff --git a/secure/snowcap/resources/roles__base.yml b/secure/snowcap/resources/roles__base.yml index 81033aa5..665b31d8 100644 --- a/secure/snowcap/resources/roles__base.yml +++ b/secure/snowcap/resources/roles__base.yml @@ -2,12 +2,17 @@ roles: - name: z_account__create_database - name: z_db__tst_balboa # Used to grant access to TST databases - name: z_db__balboa_dev__create_schema + - name: z_schemas__db__raw + - name: z_schemas__db__governance + - name: z_tables_views__select - name: z_policy_row_region_de - name: z_policy_row_region_all + - name: z_unmask__pii + grants: - priv: "CREATE DATABASE" on: "ACCOUNT" @@ -23,6 +28,12 @@ grants: - "future schemas in database raw" to: z_schemas__db__raw + - priv: "USAGE" + on: + - "all schemas in database governance" + - "future schemas in database governance" + to: z_schemas__db__governance + - for_each: var.databases priv: "SELECT" on: diff --git a/secure/snowcap/resources/roles__functional.yml b/secure/snowcap/resources/roles__functional.yml index ed802d07..213ea721 100644 --- a/secure/snowcap/resources/roles__functional.yml +++ b/secure/snowcap/resources/roles__functional.yml @@ -5,6 +5,7 @@ roles: - name: bot_integration - name: analyst + - name: analyst_pii - name: loader - name: transformer_dbt @@ -23,6 +24,10 @@ role_grants: - z_db__raw - z_schemas__db__raw + # GOVERNANCE + - z_db__governance + - z_schemas__db__governance + # BALBOA - z_db__balboa - z_schema__dbt_test__audit @@ -72,6 +77,9 @@ role_grants: # STAGES - z_stage__raw__dbt_artifacts__artifacts__read + # APPLY TAGS + - z_tag__apply__pii + # WAREHOUSES - z_wh__wh_transforming - z_wh__wh_transforming_dynamic_tables @@ -81,6 +89,11 @@ role_grants: - z_base__analyst - z_tables_views__select + - to_role: analyst_pii + roles: + - analyst + - z_unmask__pii + - to_role: catalog roles: - z_wh__wh_catalog @@ -100,6 +113,10 @@ role_grants: - z_db__raw - z_schemas__db__raw + # GOVERNANCE + - z_db__governance + - z_schemas__db__governance + # Shared DBs - z_db__covid19_epidemiological_data - z_db__snowflake @@ -112,6 +129,8 @@ role_grants: - z_stage__raw__dbt_artifacts__artifacts__write - z_stage__balboa_apps__resources__streamlit__write + - z_tag__apply__pii + - z_tables_views__select - to_role: bot_integration diff --git a/secure/snowcap/resources/schemas.yml b/secure/snowcap/resources/schemas.yml index a713ee29..96f9e457 100644 --- a/secure/snowcap/resources/schemas.yml +++ b/secure/snowcap/resources/schemas.yml @@ -9,6 +9,10 @@ vars: - name: RAW.SNAPSHOTS owner: transformer_dbt + # GOVERNANCE DB + - name: GOVERNANCE.TAGS + - name: GOVERNANCE.POLICIES + # BALBOA DB - name: BALBOA.DBT_TEST__AUDIT diff --git a/secure/snowcap/resources/tag_masking_policies.yml b/secure/snowcap/resources/tag_masking_policies.yml new file mode 100644 index 00000000..0cc120ee --- /dev/null +++ b/secure/snowcap/resources/tag_masking_policies.yml @@ -0,0 +1,16 @@ +tag_masking_policy_references: + # Use fully qualified names: database.schema.name + - tag_name: governance.tags.pii + masking_policy_name: governance.policies.mask_pii_string + + - tag_name: governance.tags.pii + masking_policy_name: governance.policies.mask_pii_number + + - tag_name: governance.tags.pii + masking_policy_name: governance.policies.mask_pii_float + + - tag_name: governance.tags.pii + masking_policy_name: governance.policies.mask_pii_date + + - tag_name: governance.tags.pii + masking_policy_name: governance.policies.mask_pii_timestamp diff --git a/secure/snowcap/resources/tags.yml b/secure/snowcap/resources/tags.yml new file mode 100644 index 00000000..eb69f95f --- /dev/null +++ b/secure/snowcap/resources/tags.yml @@ -0,0 +1,11 @@ +tags: + - name: governance.tags.pii + comment: Personally Identifiable Information + +roles: + - name: z_tag__apply__pii + +grants: + - priv: APPLY + on: tag governance.tags.pii + to: z_tag__apply__pii diff --git a/secure/snowcap/resources/users.yml b/secure/snowcap/resources/users.yml index edfce2f3..90072861 100644 --- a/secure/snowcap/resources/users.yml +++ b/secure/snowcap/resources/users.yml @@ -1,3 +1,45 @@ +# Users +vars: + - name: users + type: list + default: + - name: alejandro + type: PERSON + - name: fernando + type: PERSON + - name: jesus + type: PERSON + - name: ian + type: PERSON + - name: gomezn + type: PERSON + - name: sebastian + type: PERSON + - name: stephen + type: PERSON + + # Service Accounts + - name: svc_airbyte + type: SERVICE + - name: svc_airflow + type: SERVICE + - name: svc_datacoves + type: SERVICE + - name: svc_fivetran + type: SERVICE + - name: svc_github_actions + type: SERVICE + - name: svc_balboa_ci + type: SERVICE + - name: svc_dbt_coves_ci + type: SERVICE + - name: svc_datacoves_platform_ci + type: SERVICE + - name: svc_datahub + type: SERVICE + - name: svc_superset + type: LEGACY_SERVICE + role_grants: - to_user: alejandro roles: @@ -22,6 +64,7 @@ role_grants: - to_user: gomezn roles: - analyst + - analyst_pii - finance_team - loader - transformer_dbt diff --git a/training_and_demos/claude/readme.md b/training_and_demos/claude/readme.md new file mode 100644 index 00000000..6d43a8e4 --- /dev/null +++ b/training_and_demos/claude/readme.md @@ -0,0 +1 @@ +curl -fsSL https://claude.ai/install.sh | bash diff --git a/transform/dbt_project.yml b/transform/dbt_project.yml index 4eae35b7..9f2802cb 100644 --- a/transform/dbt_project.yml +++ b/transform/dbt_project.yml @@ -129,8 +129,12 @@ models: +transient: "{{ 'false' if target.name == 'prd' else 'true' }}" +post-hook: - - "{{ dbt_snow_mask.apply_masking_policy('sources') if target.name == 'prd_pii' }}" - - "{{ dbt_snow_mask.apply_masking_policy('models') if target.name == 'prd_pii' }}" + - > + {% if flags.WHICH in ('run', 'build') and target.name == 'prd_pii' %} + {{ dbt_tags.apply_column_tags() }} + {% endif %} + # - "{{ dbt_snow_mask.apply_masking_policy('sources') if target.name == 'prd_pii' }}" + # - "{{ dbt_snow_mask.apply_masking_policy('models') if target.name == 'prd_pii' }}" # This macro creates a non_versioned view of a versioned dbt model # Useful if you want to have a versioned mart but dont want BI tool to break if version # is updated. @@ -150,6 +154,10 @@ data_tests: vars: 'dbt_date:time_zone': 'America/Los_Angeles' + dbt_tags__opt_in_default_naming_config: false + dbt_tags__database: "GOVERNANCE" + dbt_tags__schema: "TAGS" + create_masking_policy_schema: "false" use_common_masking_policy_db: "true" common_masking_policy_db: "BALBOA" diff --git a/transform/models/L1_inlets/loans/stg_personal_loans.yml b/transform/models/L1_inlets/loans/stg_personal_loans.yml index cecd7845..539b0289 100644 --- a/transform/models/L1_inlets/loans/stg_personal_loans.yml +++ b/transform/models/L1_inlets/loans/stg_personal_loans.yml @@ -12,8 +12,12 @@ models: # config: # store_failures_as: view description: The state in which the borrower resides + tags: + - pii - name: annual_inc description: The borrower's annual income + tags: + - pii - name: collections_12_mths_ex_med description: Number of collections in the last 12 months excluding medical collections diff --git a/transform/package-lock.yml b/transform/package-lock.yml index 4d8a47e5..75f58d15 100644 --- a/transform/package-lock.yml +++ b/transform/package-lock.yml @@ -11,7 +11,10 @@ packages: - name: dbt_external_tables package: dbt-labs/dbt_external_tables version: 0.12.0 + - name: dbt_tags + package: infinitelambda/dbt_tags + version: 1.9.0 - name: dbt_date package: godatadriven/dbt_date version: 0.17.1 -sha1_hash: 095ba32192569d9d62e26f285b16b540e24b633f +sha1_hash: 46471b8f9824cdf17d80d34f2b94ad2d2662d5b5 diff --git a/transform/packages.yml b/transform/packages.yml index 69e04f5f..e4d9e4db 100644 --- a/transform/packages.yml +++ b/transform/packages.yml @@ -15,3 +15,5 @@ packages: # version: 0.12.1 # - package: elementary-data/elementary # version: 0.16.2 + - package: infinitelambda/dbt_tags + version: 1.9.0