From 0b62fcf104882e97ddbec27aed534dabec6c645d Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 23 Feb 2026 08:22:41 +0000 Subject: [PATCH] dataportal: accept SQL changes for finance_excellent_cohort_revenue --- dbt/models/marts/core/schema.yml | 9 ++ .../finance_excellent_cohort_revenue.sql | 49 ++++++ dbt/models/marts/finance/schema.yml | 149 ++++++++++++++++++ 3 files changed, 207 insertions(+) create mode 100644 dbt/models/marts/finance/finance_excellent_cohort_revenue.sql diff --git a/dbt/models/marts/core/schema.yml b/dbt/models/marts/core/schema.yml index 6ff43a3..0348d23 100644 --- a/dbt/models/marts/core/schema.yml +++ b/dbt/models/marts/core/schema.yml @@ -83,10 +83,19 @@ models: columns: - name: product_id description: Unique product identifier — primary key + data_type: varchar tests: - unique - not_null + - name: rating_tier + description: > + Business-logic classification of product quality derived from avg_rating. + Tiers are defined as: Excellent (avg_rating >= 4.5), Good (avg_rating >= 3.5), + Average (avg_rating >= 2.5), and Poor (avg_rating < 2.5). Used as the cohort + key for rating-based revenue and margin analysis (e.g. finance_excellent_cohort_revenue). + data_type: varchar + # ──────────────────────────────────────────── # dim_dates # ──────────────────────────────────────────── diff --git a/dbt/models/marts/finance/finance_excellent_cohort_revenue.sql b/dbt/models/marts/finance/finance_excellent_cohort_revenue.sql new file mode 100644 index 0000000..d6e0fb7 --- /dev/null +++ b/dbt/models/marts/finance/finance_excellent_cohort_revenue.sql @@ -0,0 +1,49 @@ +-- Revenue metrics for the Excellent rating cohort (avg_rating >= 4.5) +-- Filters dim_products to only Excellent-rated products and surfaces +-- per-product revenue alongside cohort-level totals and share metrics. + +with excellent_products as ( + select + product_id, + product_name, + product_category, + avg_rating, + rating_tier, + review_count, + positive_reviews, + negative_reviews, + unit_price, + unit_margin, + margin_pct, + total_units_sold, + product_total_revenue, + product_gross_profit + from {{ ref('dim_products') }} + where rating_tier = 'Excellent' +), + +final as ( + select + product_id, + product_name, + product_category, + avg_rating, + rating_tier, + review_count, + positive_reviews, + negative_reviews, + unit_price, + unit_margin, + margin_pct, + total_units_sold, + product_total_revenue, + product_gross_profit, + sum(product_total_revenue) over () as cohort_total_revenue, + round( + product_total_revenue / nullif(sum(product_total_revenue) over (), 0) * 100, + 2 + ) as pct_of_cohort_revenue + from excellent_products +) + +select * from final diff --git a/dbt/models/marts/finance/schema.yml b/dbt/models/marts/finance/schema.yml index cf809a1..64f9a1f 100644 --- a/dbt/models/marts/finance/schema.yml +++ b/dbt/models/marts/finance/schema.yml @@ -106,6 +106,155 @@ models: sla_hours: 3 tier: 2 + # ──────────────────────────────────────────── + # finance_excellent_cohort_revenue + # ──────────────────────────────────────────── + - name: finance_excellent_cohort_revenue + description: > + Revenue breakdown for the Excellent rating cohort — products with avg_rating >= 4.5 + sourced from dim_products. Surfaces per-product revenue, gross profit, and margin + alongside the cohort's aggregate total revenue and each product's percentage share + of that total. Intended for finance and product teams to identify top-rated + revenue drivers and measure the financial weight of the highest-quality SKUs. + group: finance_analytics + access: public + + config: + tags: ["active", "finance", "tier-1", "cohort"] + materialized: table + + meta: + maintainer_email: marcus.chen@jaffle-shop.com + listeners: + - finance-team@jaffle-shop.com + - jordan.blake@jaffle-shop.com + - data-alerts@jaffle-shop.com + business_unit: "Finance" + model_type: "SQL" + schedule: "Daily at 07:00 AM UTC" + schedule_cron: "0 7 * * *" + status: "active" + revised_state: "RENEWED" + expiry_date: "2027-06-30" + approved: true + approved_by: marcus.chen@jaffle-shop.com + approved_date: "2024-06-01" + observe_in_airflow: true + sla_hours: 2 + tier: 1 + data_portal_tests: + - name: "All products are Excellent rating tier" + type: "accepted_values" + sql: "SELECT * FROM {{ model }} WHERE rating_tier != 'Excellent'" + expected_result: "0 rows" + description: "Ensures only Excellent-rated products (avg_rating >= 4.5) are present; any leakage from lower tiers would corrupt the cohort definition." + - name: "No negative product revenue" + type: "data_quality" + sql: "SELECT * FROM {{ model }} WHERE product_total_revenue < 0" + expected_result: "0 rows" + description: "Validates that no product carries a negative total revenue value, which would indicate a data pipeline error upstream." + - name: "avg_rating meets Excellent threshold on every row" + type: "accepted_values" + sql: "SELECT * FROM {{ model }} WHERE avg_rating < 4.5" + expected_result: "0 rows" + description: "Confirms that every product's avg_rating is at least 4.5, consistent with the Excellent tier definition in dim_products." + - name: "pct_of_cohort_revenue sums to 100" + type: "business_logic" + sql: "SELECT * FROM (SELECT ABS(SUM(pct_of_cohort_revenue) - 100) AS diff FROM {{ model }}) t WHERE diff > 0.01" + expected_result: "0 rows" + description: "Validates that pct_of_cohort_revenue values sum to 100% across all products, within a 0.01 rounding tolerance." + - name: "cohort_total_revenue is consistent across all rows" + type: "business_logic" + sql: "SELECT * FROM {{ model }} WHERE ABS(cohort_total_revenue - (SELECT SUM(product_total_revenue) FROM {{ model }})) > 0.01" + expected_result: "0 rows" + description: "Verifies that the cohort_total_revenue window value is identical on every row and equals the true sum of product_total_revenue." + + columns: + - name: product_id + description: Unique product identifier — primary key, sourced from dim_products. + data_type: varchar + tests: + - unique + - not_null + + - name: product_name + description: Human-readable product name, sourced from dim_products. + data_type: varchar + + - name: product_category + description: Product category grouping (e.g. Electronics, Apparel), sourced from dim_products. + data_type: varchar + + - name: avg_rating + description: Average customer review rating for the product on a 0–5 scale. All values in this model are >= 4.5 (Excellent tier). + data_type: float + tests: + - not_null + + - name: rating_tier + description: > + Rating tier classification derived from avg_rating in dim_products + (Excellent >= 4.5, Good >= 3.5, Average >= 2.5, Poor < 2.5). + All rows in this model have rating_tier = 'Excellent'. + data_type: varchar + tests: + - not_null + + - name: review_count + description: Total number of customer reviews submitted for the product. + data_type: integer + + - name: positive_reviews + description: Count of reviews with a rating >= 4, indicating positive customer sentiment. + data_type: integer + + - name: negative_reviews + description: Count of reviews with a rating <= 2, indicating negative customer sentiment. + data_type: integer + + - name: unit_price + description: Per-unit selling price in USD. + data_type: numeric + + - name: unit_margin + description: Per-unit profit margin in USD (unit_price minus supply_cost). + data_type: numeric + + - name: margin_pct + description: Gross margin percentage calculated as (unit_price - supply_cost) / unit_price * 100. + data_type: numeric + + - name: total_units_sold + description: Total quantity of this product sold across all historical orders. + data_type: integer + + - name: product_total_revenue + description: Total revenue generated by this product across all orders (units_sold × unit_price). + data_type: numeric + tests: + - not_null + + - name: product_gross_profit + description: Total gross profit for the product (product_total_revenue minus total cost of goods sold). + data_type: numeric + + - name: cohort_total_revenue + description: > + Sum of product_total_revenue across all Excellent-rated products in this model. + Computed as a window function; the value is identical on every row and represents + the full revenue contribution of the Excellent cohort. + data_type: numeric + tests: + - not_null + + - name: pct_of_cohort_revenue + description: > + Each product's share of the Excellent cohort's total revenue, expressed as a + percentage (0–100). Calculated as product_total_revenue / cohort_total_revenue * 100, + rounded to 2 decimal places. Useful for identifying which Excellent products + are the largest revenue contributors within the cohort. + data_type: numeric + # ──────────────────────────────────────────── # finance_product_margins # ────────────────────────────────────────────