diff --git a/dbt/models/marts/core/dim_products.sql b/dbt/models/marts/core/dim_products.sql index 3dd668c..3c38e83 100644 --- a/dbt/models/marts/core/dim_products.sql +++ b/dbt/models/marts/core/dim_products.sql @@ -25,7 +25,7 @@ final as ( r.positive_reviews, r.negative_reviews, case - when r.avg_rating >= 4.5 then 'Excellent' + when r.avg_rating >= 4.8 then 'Excellent' when r.avg_rating >= 3.5 then 'Good' when r.avg_rating >= 2.5 then 'Average' else 'Poor' diff --git a/dbt/models/marts/core/schema.yml b/dbt/models/marts/core/schema.yml index 6ff43a3..edfc5bf 100644 --- a/dbt/models/marts/core/schema.yml +++ b/dbt/models/marts/core/schema.yml @@ -79,14 +79,121 @@ models: observe_in_airflow: true sla_hours: 2 tier: 1 + data_portal_tests: + - name: "Excellent tier matches >= 4.8 threshold" + type: "business_logic" + sql: "SELECT * FROM {{ model }} WHERE rating_tier = 'Excellent' AND avg_rating < 4.8" + expected_result: "0 rows" + description: "Every product labelled Excellent must have an avg_rating of at least 4.8, consistent with the CASE expression in the model SQL." + + - name: "Good tier is correctly bounded" + type: "business_logic" + sql: "SELECT * FROM {{ model }} WHERE rating_tier = 'Good' AND (avg_rating < 3.5 OR avg_rating >= 4.8)" + expected_result: "0 rows" + description: "Products in the Good tier must have avg_rating in [3.5, 4.8). Any product outside this band has been mis-classified." + + - name: "avg_rating within valid 0–5 scale" + type: "data_quality" + sql: "SELECT * FROM {{ model }} WHERE avg_rating IS NOT NULL AND (avg_rating < 0 OR avg_rating > 5)" + expected_result: "0 rows" + description: "Customer ratings are collected on a 0–5 scale; values outside this range indicate upstream data corruption." + + - name: "No negative product revenue" + type: "data_quality" + sql: "SELECT * FROM {{ model }} WHERE product_total_revenue IS NOT NULL AND product_total_revenue < 0" + expected_result: "0 rows" + description: "Total revenue for a product should never be negative; negative values signal a calculation error in int_product_profitability." + + - name: "Positive plus negative reviews do not exceed total review count" + type: "business_logic" + sql: "SELECT * FROM {{ model }} WHERE (positive_reviews + negative_reviews) > review_count" + expected_result: "0 rows" + description: "The sum of positive and negative reviews must not exceed the total review_count, since the two sub-counts are a subset of all reviews." columns: - name: product_id description: Unique product identifier — primary key + data_type: varchar tests: - unique - not_null + - name: product_name + description: Human-readable name of the product. + data_type: varchar + + - name: product_category + description: Product category grouping (e.g. Electronics, Clothing). + data_type: varchar + + - name: unit_price + description: Listed retail price per unit of the product. + data_type: numeric + + - name: supply_cost + description: Per-unit cost of goods from int_product_profitability. + data_type: numeric + + - name: unit_margin + description: Per-unit gross margin (unit_price minus supply_cost). + data_type: numeric + + - name: margin_pct + description: Gross margin as a percentage of unit_price. + data_type: numeric + + - name: review_count + description: Total number of customer reviews submitted for the product. + data_type: integer + + - name: avg_rating + description: Average customer review rating for the product on a 0–5 scale. + data_type: float + + - name: positive_reviews + description: Count of reviews with a rating of 4 or above. + data_type: integer + + - name: negative_reviews + description: Count of reviews with a rating of 2 or below. + data_type: integer + + - name: rating_tier + description: > + Rating tier classification derived from avg_rating. Segments products + into four quality buckets: 'Excellent' (avg_rating >= 4.8), + 'Good' (avg_rating >= 3.5), 'Average' (avg_rating >= 2.5), and + 'Poor' (avg_rating < 2.5). Used as the primary cohort dimension for + product quality analysis, marketing segmentation, and downstream + models such as marketing_excellent_cohort_revenue. + data_type: varchar + tests: + - not_null + - accepted_values: + values: ['Excellent', 'Good', 'Average', 'Poor'] + + - name: total_units_sold + description: Total units of the product sold across all orders. + data_type: integer + + - name: product_total_revenue + description: > + Total gross revenue generated by the product (unit_price × units_sold). + This is the primary revenue metric consumed by the downstream + marketing_excellent_cohort_revenue model, where it is aggregated via + a window function to produce cohort_total_revenue and + pct_of_cohort_revenue for all products classified in the Excellent + rating tier (avg_rating >= 4.8). + data_type: numeric + + - name: product_gross_profit + description: Total gross profit for the product (revenue minus total supply cost). + data_type: numeric + + - name: created_at + description: Timestamp when the product record was originally created in the source system. + data_type: timestamp + # ──────────────────────────────────────────── # dim_dates # ──────────────────────────────────────────── diff --git a/dbt/models/marts/marketing/marketing_excellent_cohort_revenue.sql b/dbt/models/marts/marketing/marketing_excellent_cohort_revenue.sql new file mode 100644 index 0000000..cea4a17 --- /dev/null +++ b/dbt/models/marts/marketing/marketing_excellent_cohort_revenue.sql @@ -0,0 +1,42 @@ +-- Revenue analysis for products in the Excellent rating cohort (avg_rating >= 4.5). +-- Each row represents one product; cohort_total_revenue and pct_of_cohort_revenue +-- provide the aggregate view across all Excellent-tier products. + +with excellent_products as ( + + select + product_id, + product_name, + product_category, + avg_rating, + rating_tier, + review_count, + total_units_sold, + product_total_revenue, + product_gross_profit, + margin_pct + from {{ ref('dim_products') }} + where rating_tier = 'Excellent' + +) + +select + product_id, + product_name, + product_category, + avg_rating, + rating_tier, + review_count, + total_units_sold, + product_total_revenue, + product_gross_profit, + margin_pct, + sum(product_total_revenue) over () as cohort_total_revenue, + round( + product_total_revenue * 100.0 + / nullif(sum(product_total_revenue) over (), 0), + 2 + ) as pct_of_cohort_revenue + +from excellent_products +order by product_total_revenue desc diff --git a/dbt/models/marts/marketing/schema.yml b/dbt/models/marts/marketing/schema.yml index 6dd4eac..d4780ce 100644 --- a/dbt/models/marts/marketing/schema.yml +++ b/dbt/models/marts/marketing/schema.yml @@ -173,6 +173,131 @@ models: sla_hours: 4 tier: 2 + # ──────────────────────────────────────────── + # marketing_excellent_cohort_revenue + # ──────────────────────────────────────────── + - name: marketing_excellent_cohort_revenue + description: > + Revenue analysis for products classified in the Excellent rating cohort + (avg_rating >= 4.5). Each row represents one product; cohort_total_revenue + and pct_of_cohort_revenue surface the aggregate view across all + Excellent-tier products. Built on top of dim_products. Use this model to + understand the revenue contribution and profitability of the + highest-rated products. + group: marketing_analytics + access: public + + config: + tags: ["active", "marketing", "tier-2", "cohort"] + materialized: table + + meta: + maintainer_email: sofia.gutierrez@jaffle-shop.com + listeners: + - marketing-team@jaffle-shop.com + - jordan.blake@jaffle-shop.com + business_unit: "Marketing" + model_type: "SQL" + schedule: "Daily at 07:45 AM UTC" + schedule_cron: "45 7 * * *" + status: "active" + revised_state: "RENEWED" + expiry_date: "2027-06-30" + approved: true + approved_by: sofia.gutierrez@jaffle-shop.com + approved_date: "2026-02-23" + observe_in_airflow: true + sla_hours: 4 + tier: 2 + data_portal_tests: + - name: "Only Excellent rating tier products included" + type: "accepted_values" + sql: "SELECT * FROM {{ model }} WHERE rating_tier != 'Excellent'" + expected_result: "0 rows" + description: "All rows must belong to the Excellent rating tier — any other tier value indicates a filter logic regression." + - name: "avg_rating meets Excellent threshold (>= 4.5)" + type: "business_logic" + sql: "SELECT * FROM {{ model }} WHERE avg_rating < 4.5" + expected_result: "0 rows" + description: "Excellent-tier products must have an avg_rating of at least 4.5, consistent with the classification in dim_products." + - name: "No negative product revenue" + type: "data_quality" + sql: "SELECT * FROM {{ model }} WHERE product_total_revenue < 0" + expected_result: "0 rows" + description: "Revenue values should never be negative; a negative value would indicate bad source data from int_product_profitability." + - name: "cohort_total_revenue is positive" + type: "business_logic" + sql: "SELECT * FROM {{ model }} WHERE cohort_total_revenue <= 0" + expected_result: "0 rows" + description: "The cohort-level total revenue window function must always be a positive number as long as Excellent products exist." + - name: "pct_of_cohort_revenue is within valid range" + type: "business_logic" + sql: "SELECT * FROM {{ model }} WHERE pct_of_cohort_revenue < 0 OR pct_of_cohort_revenue > 100" + expected_result: "0 rows" + description: "Each product's revenue share of the Excellent cohort must fall between 0% and 100%." + + columns: + - name: product_id + description: Unique product identifier — primary key sourced from dim_products. + data_type: varchar + tests: + - unique + - not_null + + - name: product_name + description: Human-readable name of the product. + data_type: varchar + + - name: product_category + description: Product category grouping (e.g. Electronics, Clothing). + data_type: varchar + + - name: avg_rating + description: > + Average customer review rating for the product (0–5 scale). + All values in this model are >= 4.5, qualifying the product as Excellent. + data_type: float + + - name: rating_tier + description: > + Rating tier classification from dim_products. Always 'Excellent' in this + model, representing products with avg_rating >= 4.5. + data_type: varchar + + - name: review_count + description: Total number of customer reviews submitted for the product. + data_type: integer + + - name: total_units_sold + description: Total units of the product sold across all orders. + data_type: integer + + - name: product_total_revenue + description: Total gross revenue generated by the product (unit_price × units_sold). + data_type: numeric + + - name: product_gross_profit + description: Total gross profit for the product (revenue minus supply costs). + data_type: numeric + + - name: margin_pct + description: Gross margin percentage — product_gross_profit / product_total_revenue. + data_type: numeric + + - name: cohort_total_revenue + description: > + Sum of product_total_revenue across all Excellent-tier products in this + model. Computed via a window function; repeated on every row for easy + downstream ratio calculations. + data_type: numeric + + - name: pct_of_cohort_revenue + description: > + This product's share of total Excellent cohort revenue, expressed as a + percentage (0–100). Calculated as product_total_revenue * 100 / + cohort_total_revenue, rounded to 2 decimal places. + data_type: numeric + # ──────────────────────────────────────────── # marketing_channel_attribution # ────────────────────────────────────────────