diff --git a/02_activities/assignments/Assignment2.md b/02_activities/assignments/Assignment2.md index 6cc76d916..614a06039 100644 --- a/02_activities/assignments/Assignment2.md +++ b/02_activities/assignments/Assignment2.md @@ -55,6 +55,78 @@ The store wants to keep customer addresses. Propose two architectures for the CU ``` Your answer... + +Here's my proposal for two CUSTOMER_ADDRESS tables: + +TYPE 1 - This would overwrite changes to the customer address directly, it would not retain history, but would keep table record counts lean. + +In practice, a customer would have 1 record (not showing all fields): +__________________________________ +| customer_id | customer_address | +|-------------|------------------| +| 123 |Address1 | + + +When a new address is added (for example Address 2), the existing records is updated via UPDATE statement on the applicable address/country/postal etc. fields. + + +After the update, it would look like: +__________________________________ +| customer_id | customer_address | +|-------------|------------------| +| 123 |Address2 | + + +The table itself would be: +- customer_id (int) +- customer_address (varchar (255)) +- customer_address_unit (varchar(80)) +- postal_zip (varchar(14)) +- state_province (varchar(3)) +- country (varchar(80)) +- country_code (varchar (3)) + + + +TYPE 2 - This would add a new row/record for every new address a customer has. This would maintain existing rows for historical purposes. Compared to Type 1 proposition, this has two additional fields: + - effective_date would show when the new address is added + - current_flag would indicate if that records is the current address of the customer + +In practice, a customer could have 3 addresses (not showing all fields): +__________________________________________________________________ +| customer_id | customer_address | effective_date | current_flag | +|-------------|------------------|----------------|--------------| +| 123 |Address1 | 2024-01-01 | NULL | +| 123 |Address2 | 2024-02-01 | NULL | +| 123 |Address3 | 2025-01-01 | Y | + +When a new address is added (for example Address 4): +1. The existing current_flag would be changed to NULL for address 3 +2. A new record is inserted for customer 123 with Address 4, today's date, and current_flag = 'Y' + +After the update, it would look like: +__________________________________________________________________ +| customer_id | customer_address | effective_date | current_flag | +|-------------|------------------|----------------|--------------| +| 123 |Address1 | 2024-01-01 | NULL | +| 123 |Address2 | 2024-02-01 | NULL | +| 123 |Address3 | 2025-01-01 | NULL | +| 123 |Address4 | 2025-04-27 | Y | + + +The table itself would be: +- customer_id (int) +- customer_address (varchar (255)) +- customer_address_unit (varchar(80)) +- postal_zip (varchar(14)) +- state_province (varchar(3)) +- country (varchar(80)) +- country_code (varchar (3)) +- effective_date (date) +- current_flag (varchar(1)) + + + ``` *** diff --git a/02_activities/assignments/SQL Assignment 2 Section 1 - Prompt 1.png b/02_activities/assignments/SQL Assignment 2 Section 1 - Prompt 1.png new file mode 100644 index 000000000..396e6c6fc Binary files /dev/null and b/02_activities/assignments/SQL Assignment 2 Section 1 - Prompt 1.png differ diff --git a/02_activities/assignments/SQL Assignment 2 Section 1 - Prompt 2.png b/02_activities/assignments/SQL Assignment 2 Section 1 - Prompt 2.png new file mode 100644 index 000000000..06b38be37 Binary files /dev/null and b/02_activities/assignments/SQL Assignment 2 Section 1 - Prompt 2.png differ diff --git a/02_activities/assignments/assignment2.sql b/02_activities/assignments/assignment2.sql index 5ad40748a..b3472f062 100644 --- a/02_activities/assignments/assignment2.sql +++ b/02_activities/assignments/assignment2.sql @@ -20,6 +20,10 @@ The `||` values concatenate the columns into strings. Edit the appropriate columns -- you're making two edits -- and the NULL rows will be fixed. All the other rows will remain the same.) */ +SELECT +product_name || ', ' || COALESCE(product_size, '') || ' (' || COALESCE(product_qty_type, 'unit') || ')' +FROM product; + --Windowed Functions @@ -32,17 +36,75 @@ each new market date for each customer, or select only the unique market dates p (without purchase details) and number those visits. HINT: One of these approaches uses ROW_NUMBER() and one uses DENSE_RANK(). */ +SELECT + customer_id + ,market_date + ,ROW_NUMBER() OVER ( + PARTITION BY customer_id + ORDER BY market_date + ) AS customer_visits +FROM ( + SELECT DISTINCT + customer_id + ,market_date + FROM customer_purchases + ); + /* 2. Reverse the numbering of the query from a part so each customer’s most recent visit is labeled 1, then write another query that uses this one as a subquery (or temp table) and filters the results to only the customer’s most recent visit. */ +-- Recent visit is 1 (reverse order). Just add DESC to order by for market_date +SELECT + customer_id + ,market_date + ,ROW_NUMBER() OVER ( + PARTITION BY customer_id + ORDER BY market_date DESC + ) AS customer_visits +FROM ( + SELECT DISTINCT + customer_id + ,market_date + FROM customer_purchases + ) + + +-- Only get most recent visit +SELECT * +FROM ( + SELECT + customer_id + ,market_date + ,ROW_NUMBER() OVER ( + PARTITION BY customer_id + ORDER BY market_date DESC + ) AS customer_visits + FROM ( + SELECT DISTINCT + customer_id + ,market_date + FROM customer_purchases + ) + ) +WHERE customer_visits = 1; + /* 3. Using a COUNT() window function, include a value along with each row of the customer_purchases table that indicates how many different times that customer has purchased that product_id. */ +SELECT + * + ,COUNT(*) OVER ( + PARTITION BY customer_id, product_id + ORDER BY market_date, transaction_time + ) AS product_purchase_count +FROM customer_purchases +ORDER BY customer_id, product_id, market_date, transaction_time; + -- String manipulations @@ -57,10 +119,22 @@ Remove any trailing or leading whitespaces. Don't just use a case statement for Hint: you might need to use INSTR(product_name,'-') to find the hyphens. INSTR will help split the column. */ +--Need to add +2 in SUBSTR because of the hyphen and the space after +SELECT + * + ,TRIM(IIF(INSTR(product_name, '-') > 0, SUBSTR(product_name, INSTR(product_name, '-') + 2), NULL)) AS description +FROM product; + /* 2. Filter the query to show any product_size value that contain a number with REGEXP. */ +SELECT + * + ,TRIM(IIF(INSTR(product_name, '-') > 0, SUBSTR(product_name, INSTR(product_name, '-') + 2), NULL)) AS description +FROM product +WHERE product_size REGEXP '[0-9]'; + -- UNION @@ -73,6 +147,35 @@ HINT: There are a possibly a few ways to do this query, but if you're struggling 3) Query the second temp table twice, once for the best day, once for the worst day, with a UNION binding them. */ +SELECT + * +FROM + (--Get worst date (lowest sales) + SELECT + market_date + ,SUM(quantity * cost_to_customer_per_qty) AS 'total_sales' + ,'worst day' AS 'sales_performance' + FROM customer_purchases + GROUP BY market_date + ORDER BY total_sales ASC + LIMIT 1 + ) + +UNION + +SELECT + * +FROM + (--Get best date (most sales) + SELECT + market_date + ,SUM(quantity * cost_to_customer_per_qty) AS 'total_sales' + ,'best day' AS 'sales_performance' + FROM customer_purchases + GROUP BY market_date + ORDER BY total_sales DESC + LIMIT 1 + ); @@ -89,6 +192,43 @@ Think a bit about the row counts: how many distinct vendors, product names are t How many customers are there (y). Before your final group by you should have the product of those two queries (x*y). */ +-- There are 3 vendors in vendor_inventory (vendor_id 4, 7, 8) and they have 8 products altogether. +-- There are 26 customers. We should expect the total records of customers purchasing to be (8*26) = 208. + +-- Aggregate to find total sales per product_name +SELECT + vendor_name + ,product_name + ,SUM(amount_to_sell) +FROM ( + -- Get all valid combinations of products and vendors from vendor_inventory + SELECT + v.vendor_id + ,v.vendor_name + ,vi.product_id + ,cp.product_name + ,vi.original_price + ,(vi.original_price * 5) AS amount_to_sell + FROM vendor v + INNER JOIN ( + SELECT DISTINCT + vendor_id + ,product_id + ,original_price + FROM vendor_inventory + ) vi + ON v.vendor_id = vi.vendor_id + + -- Get all combinations of customers and products and join on relevant product_ids + INNER JOIN ( + SELECT * + FROM customer c + CROSS JOIN product p + ) cp + ON vi.product_id = cp.product_id + ) +GROUP BY product_name; + -- INSERT @@ -97,11 +237,60 @@ This table will contain only products where the `product_qty_type = 'unit'`. It should use all of the columns from the product table, as well as a new column for the `CURRENT_TIMESTAMP`. Name the timestamp column `snapshot_timestamp`. */ +--Get table format for product +PRAGMA table_info(product); + +--From this, we see the names and types as: +-- name type +-- product_id int(11) +-- product_name varchar(45) +-- product_size varchar(45) +-- product_category_id int(11) +-- product_qty_type varchar(45) + + + +-- Delete table in case it already exists +DROP TABLE IF EXISTS product_units; + +--Copy table format/columns of product, include snapshot_timestamp column +CREATE TABLE product_units ( + product_id INT(11) + ,product_name VARCHAR(45) + ,product_size VARCHAR(45) + ,product_category_id INT(11) + ,product_qty_type VARCHAR(45) + ,snapshot_timestamp TEXT + ); + +--Insert 'unit' values, as well as the timestamp +INSERT INTO product_units + SELECT + * + ,strftime('%Y-%m-%d %H:%M:%S', datetime('now')) AS 'snapshot_timestamp' + FROM product + WHERE product_qty_type = 'unit'; + +-- Checking if results look good +SELECT * FROM product_units; + /*2. Using `INSERT`, add a new row to the product_units table (with an updated timestamp). This can be any product you desire (e.g. add another record for Apple Pie). */ +INSERT INTO product_units VALUES( + 24 + ,'Anchovy Martini' + ,'30 oz' + ,3 + ,'unit' + ,strftime('%Y-%m-%d %H:%M:%S', datetime('now')) + ); + +-- Checking if results look good +SELECT * FROM product_units; + -- DELETE @@ -109,6 +298,12 @@ This can be any product you desire (e.g. add another record for Apple Pie). */ HINT: If you don't specify a WHERE clause, you are going to have a bad time.*/ +DELETE FROM product_units +WHERE product_name = 'Anchovy Martini'; + +-- Checking if results look good +SELECT * FROM product_units; + -- UPDATE @@ -128,6 +323,46 @@ Finally, make sure you have a WHERE statement to update the right row, you'll need to use product_units.product_id to refer to the correct row within the product_units table. When you have all of these components, you can run the update statement. */ +ALTER TABLE product_units +ADD current_quantity INT; + +DROP TABLE IF EXISTS temp_latest_inventory; + +-- Get modified version of product_units as a temp table to join values +CREATE TEMP TABLE temp_latest_inventory AS + -- Get all product_ids with current_quantity. It's set to 0 if it's not for the max date (Oct 13, 2023) + SELECT + pu.* + ,vi_latest.* + ,COALESCE(vi_latest.quantity,0) AS latest_quantity + FROM product_units pu + LEFT JOIN ( + -- Inventory of latest date. If no record, means they were not available on that date + SELECT vi.* + ,vi_latest_date.max_market_date + FROM vendor_inventory vi + INNER JOIN ( + -- Get the latest date for each product + SELECT + MAX(market_date) AS max_market_date + FROM vendor_inventory vi + ) vi_latest_date + ON vi.market_date = vi_latest_date.max_market_date + ) vi_latest + ON pu.product_id = vi_latest.product_id; + +SELECT * FROM temp_latest_inventory; + +-- Update statement for current_quantity, join on product_id +UPDATE product_units +SET current_quantity = ( + SELECT latest_quantity + FROM temp_latest_inventory + WHERE product_units.product_id = temp_latest_inventory.product_id + ); + +-- Checking if results look good +SELECT * FROM product_units;