diff --git a/02_activities/assignments/Assignment2_Prompt3.txt b/02_activities/assignments/Assignment2_Prompt3.txt new file mode 100644 index 000000000..49a77ba14 --- /dev/null +++ b/02_activities/assignments/Assignment2_Prompt3.txt @@ -0,0 +1 @@ +As per the slowly changing Dimensions (SCD), out of these two proposed architectures for the CUSTOMER_ADDRESS table, the architecture that overwrites and stores only the latest data is the type 1 SCD. On the other hand, the architecture that retains changes is the type 2 SCD. \ No newline at end of file diff --git a/02_activities/assignments/Assignment2_section1.png b/02_activities/assignments/Assignment2_section1.png new file mode 100644 index 000000000..14b3652f6 Binary files /dev/null and b/02_activities/assignments/Assignment2_section1.png differ diff --git a/02_activities/assignments/assignment2.sql b/02_activities/assignments/assignment2.sql index 5ad40748a..e84167c3c 100644 --- a/02_activities/assignments/assignment2.sql +++ b/02_activities/assignments/assignment2.sql @@ -21,6 +21,9 @@ Edit the appropriate columns -- you're making two edits -- and the NULL rows wil All the other rows will remain the same.) */ +SELECT +product_name || ', ' || coalesce(product_size, '') || ' (' || coalesce(product_qty_type, 'unit') || ')' +FROM product; --Windowed Functions /* 1. Write a query that selects from the customer_purchases table and numbers each customer’s @@ -32,18 +35,44 @@ each new market date for each customer, or select only the unique market dates p (without purchase details) and number those visits. HINT: One of these approaches uses ROW_NUMBER() and one uses DENSE_RANK(). */ +SELECT +customer_id, +market_date, + + ROW_NUMBER()OVER(PARTITION BY customer_id ORDER BY market_date ASC) AS customer_visits +FROM customer_purchases /* 2. Reverse the numbering of the query from a part so each customer’s most recent visit is labeled 1, then write another query that uses this one as a subquery (or temp table) and filters the results to only the customer’s most recent visit. */ +SELECT * +FROM +( + SELECT + customer_id, + market_date, + + ROW_NUMBER()OVER(PARTITION BY customer_id ORDER BY market_date DESC) AS customer_visits + + FROM customer_purchases +)x +WHERE x.customer_visits = 1; /* 3. Using a COUNT() window function, include a value along with each row of the customer_purchases table that indicates how many different times that customer has purchased that product_id. */ +SELECT +customer_id, +market_date, +product_id, +transaction_time, + + COUNT(*)OVER(PARTITION BY customer_id, product_id ORDER BY market_date DESC) AS [purchases] +FROM customer_purchases; -- String manipulations /* 1. Some product names in the product table have descriptions like "Jar" or "Organic". @@ -57,11 +86,27 @@ Remove any trailing or leading whitespaces. Don't just use a case statement for Hint: you might need to use INSTR(product_name,'-') to find the hyphens. INSTR will help split the column. */ +SELECT +product_name, +CASE + WHEN INSTR(product_name, '-') + THEN + TRIM(SUBSTR(product_name, INSTR(product_name, '-')+1)) + ELSE + NULL +END AS description + +FROM product; /* 2. Filter the query to show any product_size value that contain a number with REGEXP. */ +SELECT +product_name, +product_size +FROM product +WHERE product_size REGEXP '[0-9]'; -- UNION /* 1. Using a UNION, write a query that displays the market dates with the highest and lowest total sales. @@ -73,6 +118,50 @@ HINT: There are a possibly a few ways to do this query, but if you're struggling 3) Query the second temp table twice, once for the best day, once for the worst day, with a UNION binding them. */ +DROP TABLE IF EXISTS temp.sales_value; +CREATE TABLE temp.sales_value AS + +SELECT +market_date, +SUM(quantity*cost_to_customer_per_qty)as sales + +FROM customer_purchases + +GROUP BY market_date; +--SELECT * FROM temp.sales_value + + + +DROP TABLE IF EXISTS temp.sales_rank; +CREATE TABLE temp.sales_rank AS + +SELECT +market_date, +sales, + RANK() OVER (ORDER BY sales DESC) AS best_day, + RANK() OVER (ORDER BY sales ASC) AS worst_day + +FROM temp.sales_value; +--SELECT * from temp.sales_rank + + +SELECT +market_date, +sales, +'Highest Sales Day' as day_type + +FROM temp.sales_rank +WHERE best_day = 1 + +UNION + +SELECT +market_date, +sales, +'Lowest Sales Day' as day_type + +FROM temp.sales_rank +WHERE worst_day = 1; @@ -89,6 +178,32 @@ Think a bit about the row counts: how many distinct vendors, product names are t How many customers are there (y). Before your final group by you should have the product of those two queries (x*y). */ +SELECT +vendor_name, +product_name, +SUM(5 * original_price) AS total_money + +FROM ( + SELECT + vendor.vendor_id, + product_id, + original_price, + vendor_name + FROM vendor_inventory + INNER JOIN vendor + ON vendor_inventory.vendor_id = vendor.vendor_id +) x + +CROSS JOIN ( + SELECT + customer_id + FROM customer +) y + +INNER JOIN product + ON x.product_id = product.product_id + +GROUP BY vendor_name, product_name; -- INSERT @@ -97,11 +212,30 @@ This table will contain only products where the `product_qty_type = 'unit'`. It should use all of the columns from the product table, as well as a new column for the `CURRENT_TIMESTAMP`. Name the timestamp column `snapshot_timestamp`. */ +DROP TABLE IF EXISTS temp.product_units; + +CREATE TEMP TABLE temp.product_units AS +SELECT *, +CURRENT_TIMESTAMP AS snapshot_timestamp + +FROM product +WHERE 1 = 0; + +INSERT INTO product_units +SELECT *, +CURRENT_TIMESTAMP AS snapshot_timestamp + +FROM product +WHERE product_qty_type = 'unit'; + +SELECT * FROM product_units /*2. Using `INSERT`, add a new row to the product_units table (with an updated timestamp). This can be any product you desire (e.g. add another record for Apple Pie). */ +INSERT INTO product_units +VALUES(24,'Apple Vinegar','1 ltr','2','unit',CURRENT_TIMESTAMP); -- DELETE @@ -109,6 +243,16 @@ This can be any product you desire (e.g. add another record for Apple Pie). */ HINT: If you don't specify a WHERE clause, you are going to have a bad time.*/ +DELETE FROM product_units +WHERE product_name = 'Apple Vinegar' +AND snapshot_timestamp IN +( +SELECT MIN (snapshot_timestamp) +FROM product_units +WHERE product_name = 'Apple Vinegar' +); + +SELECT * FROM product_units WHERE product_name = 'Apple Vinegar' -- UPDATE @@ -129,5 +273,15 @@ Finally, make sure you have a WHERE statement to update the right row, When you have all of these components, you can run the update statement. */ +ALTER TABLE product_units +ADD current_quantity INT; +UPDATE product_units +SET current_quantity = COALESCE(( + SELECT quantity + FROM vendor_inventory + WHERE vendor_inventory.product_id = product_units.product_id + ORDER BY market_date DESC +), 0) +select * from product_units \ No newline at end of file