diff --git a/dbt/models/marts/core/schema.yml b/dbt/models/marts/core/schema.yml index 6ff43a3..df4d109 100644 --- a/dbt/models/marts/core/schema.yml +++ b/dbt/models/marts/core/schema.yml @@ -83,9 +83,62 @@ models: columns: - name: product_id description: Unique product identifier — primary key + data_type: varchar tests: - unique - not_null + - name: product_name + description: Human-readable name of the product + data_type: varchar + - name: product_category + description: Category the product belongs to (e.g. Electronics, Clothing) + data_type: varchar + - name: unit_price + description: Listed selling price per unit of the product + data_type: numeric + - name: supply_cost + description: Per-unit cost of goods from stg_supply_costs, used to compute margin + data_type: numeric + - name: unit_margin + description: Absolute margin per unit — unit_price minus supply_cost + data_type: numeric + - name: margin_pct + description: Gross margin percentage — unit_margin / unit_price × 100 + data_type: numeric + - name: review_count + description: Total number of customer reviews submitted for the product + data_type: integer + - name: avg_rating + description: Average customer review rating on a 1–5 scale, derived from int_product_reviews_agg + data_type: numeric + - name: positive_reviews + description: Count of reviews with a rating >= 4 (positive sentiment threshold) + data_type: integer + - name: negative_reviews + description: Count of reviews with a rating <= 2 (negative sentiment threshold) + data_type: integer + - name: rating_tier + description: > + Product quality tier derived from avg_rating using the following scale: + 'Excellent' — avg_rating >= 4.5 (top-tier, highest customer satisfaction); + 'Good' — avg_rating >= 3.5 and < 4.5 (above-average satisfaction); + 'Average' — avg_rating >= 2.5 and < 3.5 (neutral satisfaction); + 'Poor' — avg_rating < 2.5 (below-average, needs attention). + Used for cohort segmentation in downstream models such as + marketing_excellent_rating_revenue. + data_type: varchar + - name: total_units_sold + description: Cumulative number of units sold across all orders + data_type: integer + - name: product_total_revenue + description: Total gross revenue generated by the product (units_sold × unit_price) + data_type: numeric + - name: product_gross_profit + description: Total gross profit — product_total_revenue minus total supply cost + data_type: numeric + - name: created_at + description: Timestamp when the product record was first created in the source system + data_type: timestamp # ──────────────────────────────────────────── # dim_dates diff --git a/dbt/models/marts/marketing/marketing_excellent_rating_revenue.sql b/dbt/models/marts/marketing/marketing_excellent_rating_revenue.sql new file mode 100644 index 0000000..f36172f --- /dev/null +++ b/dbt/models/marts/marketing/marketing_excellent_rating_revenue.sql @@ -0,0 +1,42 @@ +-- Revenue breakdown for products in the Excellent rating cohort (avg_rating >= 4.5) +-- Filters dim_products to rating_tier = 'Excellent' and surfaces per-product +-- revenue alongside cohort-level totals and each product's percentage share. +with excellent_products as ( + select + product_id, + product_name, + product_category, + unit_price, + avg_rating, + rating_tier, + review_count, + total_units_sold, + product_total_revenue, + product_gross_profit, + margin_pct + from {{ ref('dim_products') }} + where rating_tier = 'Excellent' +), + +final as ( + select + product_id, + product_name, + product_category, + unit_price, + avg_rating, + rating_tier, + review_count, + total_units_sold, + product_total_revenue, + product_gross_profit, + margin_pct, + sum(product_total_revenue) over () as cohort_total_revenue, + round( + product_total_revenue / nullif(sum(product_total_revenue) over (), 0) * 100, + 2 + ) as pct_of_cohort_revenue + from excellent_products +) + +select * from final diff --git a/dbt/models/marts/marketing/schema.yml b/dbt/models/marts/marketing/schema.yml index 6dd4eac..a7cca90 100644 --- a/dbt/models/marts/marketing/schema.yml +++ b/dbt/models/marts/marketing/schema.yml @@ -173,6 +173,126 @@ models: sla_hours: 4 tier: 2 + # ──────────────────────────────────────────── + # marketing_excellent_rating_revenue + # ──────────────────────────────────────────── + - name: marketing_excellent_rating_revenue + description: > + Revenue analysis for products in the Excellent rating cohort (avg_rating >= 4.5). + Filters dim_products to rating_tier = 'Excellent' and surfaces per-product revenue + alongside cohort-level totals and each product's percentage share of cohort revenue. + Useful for understanding the contribution of top-rated products to overall sales. + group: marketing_analytics + access: public + + config: + tags: ["active", "marketing", "tier-2", "cohort"] + materialized: table + + meta: + maintainer_email: sofia.gutierrez@jaffle-shop.com + listeners: + - marketing-team@jaffle-shop.com + - jordan.blake@jaffle-shop.com + business_unit: "Marketing" + model_type: "SQL" + schedule: "Daily at 07:45 AM UTC" + schedule_cron: "45 7 * * *" + status: "active" + revised_state: "RENEWED" + expiry_date: "2027-06-30" + approved: true + approved_by: sofia.gutierrez@jaffle-shop.com + approved_date: "2026-02-23" + observe_in_airflow: true + sla_hours: 4 + tier: 2 + data_portal_tests: + - name: "All rows have Excellent rating tier" + type: "accepted_values" + sql: "SELECT * FROM {{ model }} WHERE rating_tier != 'Excellent'" + expected_result: "0 rows" + description: "Every row in this model must belong to the Excellent rating cohort (avg_rating >= 4.5)" + - name: "No negative product revenue" + type: "data_quality" + sql: "SELECT * FROM {{ model }} WHERE product_total_revenue < 0" + expected_result: "0 rows" + description: "Product total revenue must be non-negative for all Excellent cohort products" + - name: "Cohort total revenue is consistent across all rows" + type: "business_logic" + sql: "SELECT * FROM {{ model }} WHERE cohort_total_revenue != (SELECT SUM(product_total_revenue) FROM {{ model }})" + expected_result: "0 rows" + description: "The cohort_total_revenue window value must equal the sum of all product_total_revenue in the model" + - name: "Percentage of cohort revenue sums to 100" + type: "business_logic" + sql: "SELECT * FROM (SELECT ABS(SUM(pct_of_cohort_revenue) - 100) AS diff FROM {{ model }}) t WHERE diff > 0.01" + expected_result: "0 rows" + description: "The sum of pct_of_cohort_revenue across all Excellent products must equal 100 (within 0.01 tolerance)" + - name: "No products with zero units sold" + type: "data_quality" + sql: "SELECT * FROM {{ model }} WHERE total_units_sold IS NULL OR total_units_sold = 0" + expected_result: "0 rows" + description: "All products in the Excellent cohort must have sold at least one unit to be included in revenue analysis" + + columns: + - name: product_id + description: Unique product identifier — primary key, inherited from dim_products + data_type: varchar + tests: + - unique + - not_null + - name: product_name + description: Human-readable name of the product + data_type: varchar + tests: + - not_null + - name: product_category + description: Category the product belongs to (e.g. Electronics, Clothing) + data_type: varchar + - name: unit_price + description: Listed selling price per unit of the product + data_type: numeric + - name: avg_rating + description: Average customer review rating for the product; always >= 4.5 in this model + data_type: numeric + tests: + - not_null + - name: rating_tier + description: > + Product quality tier derived from avg_rating. Always 'Excellent' in this model + (avg_rating >= 4.5). The full tier scale in dim_products is: + Excellent (>=4.5), Good (>=3.5), Average (>=2.5), Poor (<2.5). + data_type: varchar + tests: + - not_null + - name: review_count + description: Total number of customer reviews submitted for the product + data_type: integer + - name: total_units_sold + description: Cumulative number of units sold across all orders + data_type: integer + - name: product_total_revenue + description: Total gross revenue generated by this product (units_sold × unit_price) + data_type: numeric + - name: product_gross_profit + description: Total gross profit for the product (revenue minus supply cost) + data_type: numeric + - name: margin_pct + description: Gross margin percentage — product_gross_profit / product_total_revenue × 100 + data_type: numeric + - name: cohort_total_revenue + description: > + Sum of product_total_revenue across ALL products in the Excellent rating cohort. + Computed as a window function over the full result set; the value is identical + on every row and represents the cohort's aggregate revenue contribution. + data_type: numeric + - name: pct_of_cohort_revenue + description: > + This product's share of the Excellent cohort's total revenue, expressed as a + percentage (0–100). Calculated as product_total_revenue / cohort_total_revenue × 100, + rounded to 2 decimal places. All values sum to 100 across the cohort. + data_type: numeric + # ──────────────────────────────────────────── # marketing_channel_attribution # ────────────────────────────────────────────