Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions load/dlt/country_populations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/env -S uv run
# /// script
# dependencies = [
# "dlt[snowflake, parquet]==1.21.0",
# "enlighten~=1.12.4",
# "psutil~=6.0.0",
# "connectorx==0.4.1",
# "pandas==2.2.2",
# ]
# ///
"""Loads world population CSV data to Snowflake RAW database"""
import dlt
import pandas as pd
from utils.datacoves_utils import pipelines_dir

@dlt.resource(write_disposition="replace", table_name="country_populations")
def country_populations():
url = "https://raw.githubusercontent.com/datasets/population/master/data/population.csv"
df = pd.read_csv(url)
yield df

@dlt.source
def country_populations_source():
return [country_populations]

if __name__ == "__main__":
datacoves_snowflake = dlt.destinations.snowflake(
destination_name="datacoves_snowflake",
database="raw"
)

pipeline = dlt.pipeline(
progress="log",
pipeline_name="world_population_data",
destination=datacoves_snowflake,
pipelines_dir=pipelines_dir,
dataset_name="raw"
)

load_info = pipeline.run([
country_populations_source()
])

print(load_info)
8 changes: 5 additions & 3 deletions load/dlt/loans_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"""Loads a CSV file to Snowflake"""
import dlt
import pandas as pd
from utils.datacoves_utils import pipelines_dir
from utils.datacoves_utils import pipelines_dir, enable_change_tracking

@dlt.resource(write_disposition="replace")
def personal_loans():
Expand All @@ -28,7 +28,7 @@ def zip_coordinates():

@dlt.source
def loans_data():
return [personal_loans, zip_coordinates]
return [personal_loans(), zip_coordinates()]

if __name__ == "__main__":
datacoves_snowflake = dlt.destinations.snowflake(
Expand All @@ -46,5 +46,7 @@ def loans_data():
)

load_info = pipeline.run(loans_data())

print(load_info)

# Enable CHANGE_TRACKING for Dynamic Table support
enable_change_tracking(pipeline, ["personal_loans", "zip_coordinates"])
15 changes: 15 additions & 0 deletions load/dlt/utils/datacoves_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,18 @@
else:
pipelines_dir = os.path.join('/tmp','.dlt','pipelines')
print(f"pipelines_dir set to: {pipelines_dir}")


def enable_change_tracking(pipeline, tables: list[str]):
"""Enable CHANGE_TRACKING on Snowflake tables for Dynamic Table support.

Args:
pipeline: A dlt pipeline instance with a Snowflake destination.
tables: List of table names to enable change tracking on.
"""
with pipeline.sql_client() as client:
for table in tables:
client.execute_sql(
f"ALTER TABLE {pipeline.dataset_name}.{table} SET CHANGE_TRACKING = TRUE"
)
print(f"CHANGE_TRACKING enabled on: {', '.join(tables)}")
140 changes: 95 additions & 45 deletions secure/snowcap/apply.sh
Original file line number Diff line number Diff line change
@@ -1,63 +1,113 @@
#!/bin/bash
secure_path=/config/workspace/secure/snowcap
cd $secure_path
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
cd "$SCRIPT_DIR"

# Load .env if it exists
if [ -f .env ]; then
echo "File .env found."
else
echo "File .env does not exist. Please create a .env file with the following variables:"
set -a
. ./.env
set +a
fi

# Parse arguments
USE_PII=false
GIT_BRANCH=""
USE_PLAN=false
while [[ $# -gt 0 ]]; do
case $1 in
--pii)
USE_PII=true
shift
;;
-b|--branch)
GIT_BRANCH="$2"
shift 2
;;
--plan)
USE_PLAN=true
break # Stop parsing, rest goes to snowcap (including --plan)
;;
*)
break # Stop parsing, rest goes to snowcap
;;
esac
done

# Validate required variables
missing=()
[ -z "$SNOWFLAKE_ACCOUNT" ] && missing+=("SNOWFLAKE_ACCOUNT")
[ -z "$SNOWFLAKE_USER" ] && missing+=("SNOWFLAKE_USER")
[ -z "$SNOWFLAKE_ROLE" ] && missing+=("SNOWFLAKE_ROLE")
[ -z "$SNOWFLAKE_PRIVATE_KEY_PATH" ] && missing+=("SNOWFLAKE_PRIVATE_KEY_PATH")

if $USE_PII && [ -z "$SNOWFLAKE_ACCOUNT_PII" ]; then
missing+=("SNOWFLAKE_ACCOUNT_PII")
fi

if [ ${#missing[@]} -gt 0 ]; then
echo "Error: Missing required environment variables:"
for var in "${missing[@]}"; do
echo " - $var"
done
echo ""
echo "SNOWFLAKE_ACCOUNT="
echo "SNOWFLAKE_ACCOUNT_PII="
echo "SNOWFLAKE_USER="
echo "SNOWFLAKE_ROLE="
echo "SNOWFLAKE_PRIVATE_KEY_PATH="
echo "SNOWFLAKE_AUTHENTICATOR=SNOWFLAKE_JWT"
echo "Create a .env file with:"
echo ""
echo " SNOWFLAKE_ACCOUNT=your_account # Standard account identifier"
echo " SNOWFLAKE_ACCOUNT_PII=your_pii_acct # Enterprise account (for --pii flag)"
echo " SNOWFLAKE_USER=your_user # Service account username"
echo " SNOWFLAKE_ROLE=SECURITYADMIN # Role for applying changes"
echo " SNOWFLAKE_PRIVATE_KEY_PATH=~/.ssh/key # Path to private key"
echo " SNOWFLAKE_AUTHENTICATOR=SNOWFLAKE_JWT"
exit 1
fi

# Load env vars safely
set -a
. ./.env
set +a

# Default to non-PII (standard) account
ACCOUNT_TO_USE="$SNOWFLAKE_ACCOUNT"
EXCLUDE_RESOURCES=""

# If -pii flag is passed, switch to PII (enterprise) account
if [[ "$1" == "-pii" ]]; then
ACCOUNT_TO_USE="$SNOWFLAKE_ACCOUNT_PII"
# Set account based on --pii flag
if $USE_PII; then
ACCOUNT_TO_USE="$SNOWFLAKE_ACCOUNT_PII"
EXCLUDE_RESOURCES=""
USE_ACCOUNT_USAGE="--use-account-usage"
else
# Standard accounts don't support enterprise-only features
EXCLUDE_RESOURCES="--exclude masking_policy,tag,tag_reference,tag_masking_policy_reference"
ACCOUNT_TO_USE="$SNOWFLAKE_ACCOUNT"
EXCLUDE_RESOURCES="--exclude masking_policy,tag,tag_reference,tag_masking_policy_reference,row_access_policy"
USE_ACCOUNT_USAGE=""
fi

# Override SNOWFLAKE_ACCOUNT for the snowcap run
export SNOWFLAKE_ACCOUNT="$ACCOUNT_TO_USE"

# Build uvx command based on branch
if [ -n "$GIT_BRANCH" ]; then
UVX_CMD="uvx --from snowcap@git+https://github.com/datacoves/snowcap.git@${GIT_BRANCH} --refresh"
else
UVX_CMD="uvx"
fi

echo "=========="
echo "Using SNOWFLAKE_ACCOUNT=$SNOWFLAKE_ACCOUNT"
if [[ -n "$EXCLUDE_RESOURCES" ]]; then
echo "Excluding enterprise-only resources (standard account)"
if [ -n "$GIT_BRANCH" ]; then
echo "Using snowcap from branch: $GIT_BRANCH"
fi
if [ -n "$EXCLUDE_RESOURCES" ]; then
echo "Excluding enterprise-only resources (standard account)"
fi
if [ -n "$USE_ACCOUNT_USAGE" ]; then
echo "Using --use-account-usage flag"
fi
echo "=========="

# SNOWCAP_LOG_LEVEL=DEBUG
# uvx --from snowcap@git+https://github.com/datacoves/snowcap.git@fix_exclude_cli_param \
# --refresh \
# snowcap apply \
# --config resources/ \
# --sync_resources role,grant,role_grant,warehouse,user,masking_policy,tag,tag_reference,tag_masking_policy_reference \
# $EXCLUDE_RESOURCES
# --use-account-usage

uvx snowcap apply \
--config resources/ \
--sync_resources role,grant,role_grant,warehouse,database,user,masking_policy,tag,tag_reference,tag_masking_policy_reference \
$EXCLUDE_RESOURCES
# --use-account-usage

# uvx --from snowcap@git+https://github.com/datacoves/snowcap.git \
uvx snowcap --version
# Build config flag (skip if using --plan)
if $USE_PLAN; then
CONFIG_FLAG=""
SYNC_FLAG=""
else
CONFIG_FLAG="--config resources/"
SYNC_FLAG="--sync_resources role,grant,role_grant,warehouse,user,masking_policy,tag,tag_reference,tag_masking_policy_reference,row_access_policy"
fi

$UVX_CMD snowcap apply \
$CONFIG_FLAG \
$SYNC_FLAG \
$EXCLUDE_RESOURCES \
$USE_ACCOUNT_USAGE \
"$@"

$UVX_CMD snowcap --version
116 changes: 78 additions & 38 deletions secure/snowcap/plan.sh
Original file line number Diff line number Diff line change
@@ -1,57 +1,97 @@
#!/bin/bash
secure_path=/config/workspace/secure/snowcap
cd $secure_path
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
cd "$SCRIPT_DIR"

# Load .env if it exists
if [ -f .env ]; then
echo "File .env found."
else
echo "File .env does not exist. Please create a .env file with the following variables:"
set -a
. ./.env
set +a
fi

# Parse arguments
USE_PII=false
GIT_BRANCH=""
while [[ $# -gt 0 ]]; do
case $1 in
--pii)
USE_PII=true
shift
;;
-b|--branch)
GIT_BRANCH="$2"
shift 2
;;
*)
break # Stop parsing, rest goes to snowcap
;;
esac
done

# Validate required variables
missing=()
[ -z "$SNOWFLAKE_ACCOUNT" ] && missing+=("SNOWFLAKE_ACCOUNT")
[ -z "$SNOWFLAKE_USER" ] && missing+=("SNOWFLAKE_USER")
[ -z "$SNOWFLAKE_ROLE" ] && missing+=("SNOWFLAKE_ROLE")
[ -z "$SNOWFLAKE_PRIVATE_KEY_PATH" ] && missing+=("SNOWFLAKE_PRIVATE_KEY_PATH")

if $USE_PII && [ -z "$SNOWFLAKE_ACCOUNT_PII" ]; then
missing+=("SNOWFLAKE_ACCOUNT_PII")
fi

if [ ${#missing[@]} -gt 0 ]; then
echo "Error: Missing required environment variables:"
for var in "${missing[@]}"; do
echo " - $var"
done
echo ""
echo "SNOWFLAKE_ACCOUNT="
echo "SNOWFLAKE_ACCOUNT_PII="
echo "SNOWFLAKE_USER="
echo "SNOWFLAKE_ROLE="
echo "SNOWFLAKE_PRIVATE_KEY_PATH="
echo "SNOWFLAKE_AUTHENTICATOR=SNOWFLAKE_JWT"
echo "Create a .env file with:"
echo ""
echo " SNOWFLAKE_ACCOUNT=your_account # Standard account identifier"
echo " SNOWFLAKE_ACCOUNT_PII=your_pii_acct # Enterprise account (for --pii flag)"
echo " SNOWFLAKE_USER=your_user # Service account username"
echo " SNOWFLAKE_ROLE=SECURITYADMIN # Role for applying changes"
echo " SNOWFLAKE_PRIVATE_KEY_PATH=~/.ssh/key # Path to private key"
echo " SNOWFLAKE_AUTHENTICATOR=SNOWFLAKE_JWT"
exit 1
fi

# Load env vars safely
set -a
. ./.env
set +a

# Default to non-PII (standard) account
ACCOUNT_TO_USE="$SNOWFLAKE_ACCOUNT"
EXCLUDE_RESOURCES=""

# If -pii flag is passed, switch to PII (enterprise) account
if [[ "$1" == "-pii" ]]; then
ACCOUNT_TO_USE="$SNOWFLAKE_ACCOUNT_PII"
# Set account based on --pii flag
if $USE_PII; then
ACCOUNT_TO_USE="$SNOWFLAKE_ACCOUNT_PII"
EXCLUDE_RESOURCES=""
USE_ACCOUNT_USAGE="--use-account-usage"
else
# Standard accounts don't support enterprise-only features
EXCLUDE_RESOURCES="--exclude masking_policy,tag,tag_reference,tag_masking_policy_reference"
ACCOUNT_TO_USE="$SNOWFLAKE_ACCOUNT"
EXCLUDE_RESOURCES="--exclude masking_policy,tag,tag_reference,tag_masking_policy_reference,row_access_policy"
USE_ACCOUNT_USAGE=""
fi

# Override SNOWFLAKE_ACCOUNT for the snowcap run
export SNOWFLAKE_ACCOUNT="$ACCOUNT_TO_USE"

# Build uvx command based on branch
if [ -n "$GIT_BRANCH" ]; then
UVX_CMD="uvx --from snowcap@git+https://github.com/datacoves/snowcap.git@${GIT_BRANCH} --refresh"
else
UVX_CMD="uvx"
fi

echo "=========="
echo "Using SNOWFLAKE_ACCOUNT=$SNOWFLAKE_ACCOUNT"
if [[ -n "$EXCLUDE_RESOURCES" ]]; then
echo "Excluding enterprise-only resources (standard account)"
if [ -n "$GIT_BRANCH" ]; then
echo "Using snowcap from branch: $GIT_BRANCH"
fi
if [ -n "$EXCLUDE_RESOURCES" ]; then
echo "Excluding enterprise-only resources (standard account)"
fi
if [ -n "$USE_ACCOUNT_USAGE" ]; then
echo "Using --use-account-usage flag"
fi
echo "=========="

# uvx --from snowcap@git+https://github.com/datacoves/snowcap.git@improve_plan_output \
# --refresh \
# snowcap plan \
# --config resources/ \
# --sync_resources role,grant,role_grant,warehouse,user,masking_policy,tag,tag_reference,tag_masking_policy_reference \
# $EXCLUDE_RESOURCES

uvx snowcap plan \
$UVX_CMD snowcap plan \
--config resources/ \
--sync_resources role,grant,role_grant,warehouse,user,masking_policy,tag,tag_reference,tag_masking_policy_reference \
$EXCLUDE_RESOURCES
--sync_resources role,grant,role_grant,warehouse,user,masking_policy,tag,tag_reference,tag_masking_policy_reference,row_access_policy \
$EXCLUDE_RESOURCES \
$USE_ACCOUNT_USAGE \
"$@"
Loading
Loading