diff --git a/02_activities/assignments/Assignment 2 ERD - Ts'ui Toy.png b/02_activities/assignments/Assignment 2 ERD - Ts'ui Toy.png new file mode 100644 index 000000000..75329e4cb Binary files /dev/null and b/02_activities/assignments/Assignment 2 ERD - Ts'ui Toy.png differ diff --git a/02_activities/assignments/Assignment 2 ERD with employee shifts - Ts'ui Toy.png b/02_activities/assignments/Assignment 2 ERD with employee shifts - Ts'ui Toy.png new file mode 100644 index 000000000..8f4b1e865 Binary files /dev/null and b/02_activities/assignments/Assignment 2 ERD with employee shifts - Ts'ui Toy.png differ diff --git a/02_activities/assignments/Assignment2.md b/02_activities/assignments/Assignment2.md index 6cc76d916..0455e592b 100644 --- a/02_activities/assignments/Assignment2.md +++ b/02_activities/assignments/Assignment2.md @@ -22,7 +22,7 @@ Checklist: If you encounter any difficulties or have questions, please don't hesitate to reach out to our team via our Slack at `#cohort-6-help`. Our Technical Facilitators and Learning Support staff are here to help you navigate any challenges. *** - +## Participant Name: Ts'ui Toy (Empress-star) ## Section 1: You can start this section following *session 1*, but you may want to wait until you feel comfortable wtih basic SQL query writing. @@ -54,7 +54,9 @@ The store wants to keep customer addresses. Propose two architectures for the CU **HINT:** search type 1 vs type 2 slowly changing dimensions. ``` -Your answer... +Option 1: A customer_addresses table that includes the following columns: customer_id, customer_street_address, customer_city, customer_province, customer_country, customer_postal_code. This would would be the Type 1 table, which has no history. + +Option 2: A customer_addresses table that includes the following columns: record_date, customer_id, customer_street_address, customer_city, customer_province, customer_country, customer_postal_code. This would be the Type 2 table, as it would keep the full history by having one entry per customer address per day it was updated. ``` *** diff --git a/02_activities/assignments/Assignment_one_SQL.drawio.png b/02_activities/assignments/Assignment_one_SQL.drawio.png new file mode 100644 index 000000000..8826a84d7 Binary files /dev/null and b/02_activities/assignments/Assignment_one_SQL.drawio.png differ diff --git a/02_activities/assignments/assignment1.sql b/02_activities/assignments/assignment1.sql index 2e89fa7af..681a06036 100644 --- a/02_activities/assignments/assignment1.sql +++ b/02_activities/assignments/assignment1.sql @@ -1,24 +1,34 @@ +/*Participant Name: Ts'ui Toy (Empress-star)*/ + /* ASSIGNMENT 1 */ /* SECTION 2 */ --SELECT /* 1. Write a query that returns everything in the customer table. */ +SELECT* +FROM customer; - -/* 2. Write a query that displays all of the columns and 10 rows from the cus- tomer table, +/* 2. Write a query that displays all of the columns and 10 rows from the customer table, sorted by customer_last_name, then customer_first_ name. */ - +SELECT* +FROM customer +ORDER BY customer_last_name, customer_first_name +LIMIT 10; --WHERE /* 1. Write a query that returns all customer purchases of product IDs 4 and 9. */ -- option 1 - +SELECT* +FROM customer_purchases +WHERE product_id='4' or product_id='9'; -- option 2 - +SELECT* +FROM customer_purchases +WHERE product_id IN (4,9); /*2. Write a query that returns all customer purchases and a new calculated column 'price' (quantity * cost_to_customer_per_qty), @@ -27,10 +37,17 @@ filtered by vendor IDs between 8 and 10 (inclusive) using either: 2. one condition using BETWEEN */ -- option 1 +SELECT*, +quantity*cost_to_customer_per_qty as price +FROM customer_purchases +WHERE vendor_id >=8 AND vendor_id <=10; -- option 2 - +SELECT*, +quantity*cost_to_customer_per_qty as price +FROM customer_purchases +WHERE vendor_id BETWEEN 8 AND 10; --CASE @@ -38,19 +55,39 @@ filtered by vendor IDs between 8 and 10 (inclusive) using either: Using the product table, write a query that outputs the product_id and product_name columns and add a column called prod_qty_type_condensed that displays the word “unit” if the product_qty_type is “unit,” and otherwise displays the word “bulk.” */ - +SELECT +product_id, +product_name, + CASE WHEN product_qty_type = 'unit' THEN 'unit' + ELSE 'bulk' + END as product_qty_type_condensed +FROM product; /* 2. We want to flag all of the different types of pepper products that are sold at the market. add a column to the previous query called pepper_flag that outputs a 1 if the product_name contains the word “pepper” (regardless of capitalization), and otherwise outputs 0. */ +SELECT +product_id, +product_name, + CASE WHEN product_qty_type = 'unit' THEN 'unit' + ELSE 'bulk' + END as product_qty_type_condensed, + CASE WHEN product_name LIKE '%pepper%' THEN 1 + ELSE 0 + END as pepper_flag +FROM product; --JOIN /* 1. Write a query that INNER JOINs the vendor table to the vendor_booth_assignments table on the vendor_id field they both have in common, and sorts the result by vendor_name, then market_date. */ - +SELECT* +FROM vendor_booth_assignments as vba +INNER JOIN vendor as v + ON vba.vendor_id = v.vendor_id +ORDER BY vendor_name,market_date; @@ -59,7 +96,11 @@ vendor_id field they both have in common, and sorts the result by vendor_name, t -- AGGREGATE /* 1. Write a query that determines how many times each vendor has rented a booth at the farmer’s market by counting the vendor booth assignments per vendor_id. */ - +SELECT +count(booth_number) AS total_booths_rented, +vendor_id +FROM vendor_booth_assignments +GROUP BY vendor_id; /* 2. The Farmer’s Market Customer Appreciation Committee wants to give a bumper @@ -67,6 +108,18 @@ sticker to everyone who has ever spent more than $2000 at the market. Write a qu of customers for them to give stickers to, sorted by last name, then first name. HINT: This query requires you to join two tables, use an aggregate function, and use the HAVING keyword. */ +SELECT +c.customer_first_name, +c.customer_last_name, +SUM(quantity*cost_to_customer_per_qty) as total_spend +FROM customer_purchases as cp +LEFT JOIN customer as c + ON cp.customer_id=c.customer_id +GROUP BY cp.customer_id +HAVING total_spend>=2000 +ORDER BY c.customer_last_name,c.customer_first_name; + + @@ -82,7 +135,23 @@ When inserting the new vendor, you need to appropriately align the columns to be VALUES(col1,col2,col3,col4,col5) */ +--drop table if needed +DROP TABLE IF EXISTS temp.new_vendor; + +--creating temp table +CREATE TABLE temp.new_vendor AS +--defining temp table +SELECT* +FROM vendor; + +--adding 10th vendor to temp.new_vendor +INSERT INTO temp.new_vendor (vendor_id,vendor_name,vendor_type,vendor_owner_first_name,vendor_owner_last_name) +VALUES (10,"Thomas' Superfood Store",'Fresh Focused','Thomas','Rosenthal'); + +--verifying it worked +SELECT * +FROM temp.new_vendor; -- Date /*1. Get the customer_id, month, and year (in separate columns) of every purchase in the customer_purchases table. @@ -90,6 +159,10 @@ VALUES(col1,col2,col3,col4,col5) HINT: you might need to search for strfrtime modifers sqlite on the web to know what the modifers for month and year are! */ +SELECT customer_id, +strftime('%m',market_date) AS 'Month' ,strftime('%Y',market_date) AS 'Year' +FROM customer_purchases; + /* 2. Using the previous query as a base, determine how much money each customer spent in April 2022. @@ -98,3 +171,11 @@ Remember that money spent is quantity*cost_to_customer_per_qty. HINTS: you will need to AGGREGATE, GROUP BY, and filter... but remember, STRFTIME returns a STRING for your WHERE statement!! */ +SELECT +customer_id, +strftime('%m',market_date) AS 'Month', +strftime('%Y',market_date) AS 'Year', +quantity*cost_to_customer_per_qty AS total_spend +FROM customer_purchases +WHERE Month = '04' AND Year = '2022' +GROUP BY customer_id; diff --git a/02_activities/assignments/assignment2.sql b/02_activities/assignments/assignment2.sql index 5ad40748a..c97654423 100644 --- a/02_activities/assignments/assignment2.sql +++ b/02_activities/assignments/assignment2.sql @@ -8,7 +8,9 @@ We tell them, no problem! We can produce a list with all of the appropriate deta Using the following syntax you create our super cool and not at all needy manager a list: SELECT -product_name || ', ' || product_size|| ' (' || product_qty_type || ')' + +product_name|| ', ' || product_size|| ' (' || product_qty_type || ')' + FROM product But wait! The product table has some bad data (a few NULL values). @@ -20,7 +22,11 @@ The `||` values concatenate the columns into strings. Edit the appropriate columns -- you're making two edits -- and the NULL rows will be fixed. All the other rows will remain the same.) */ +SELECT + +product_name|| ', ' || coalesce(product_size,'')|| ' (' || coalesce(product_qty_type,'unit') || ')' +FROM product; --Windowed Functions /* 1. Write a query that selects from the customer_purchases table and numbers each customer’s @@ -32,18 +38,42 @@ each new market date for each customer, or select only the unique market dates p (without purchase details) and number those visits. HINT: One of these approaches uses ROW_NUMBER() and one uses DENSE_RANK(). */ - +SELECT +customer_id +,market_date +,row_number() OVER (PARTITION BY customer_id ORDER BY market_date ASC) as num_of_visit +FROM customer_purchases; /* 2. Reverse the numbering of the query from a part so each customer’s most recent visit is labeled 1, then write another query that uses this one as a subquery (or temp table) and filters the results to only the customer’s most recent visit. */ - +SELECT* +FROM +( + SELECT + customer_id + ,market_date + ,row_number() OVER (PARTITION BY customer_id ORDER BY market_date DESC) as num_of_visit + FROM customer_purchases + ) as x +WHERE x.num_of_visit = 1; /* 3. Using a COUNT() window function, include a value along with each row of the customer_purchases table that indicates how many different times that customer has purchased that product_id. */ +--product count of products by customer + +SELECT DISTINCT +customer_id, +product_id, +count(product_id) over(PARTITION by product_id,customer_id) as times_purchased +FROM customer_purchases +ORDER BY customer_id; + + + -- String manipulations /* 1. Some product names in the product table have descriptions like "Jar" or "Organic". @@ -57,10 +87,29 @@ Remove any trailing or leading whitespaces. Don't just use a case statement for Hint: you might need to use INSTR(product_name,'-') to find the hyphens. INSTR will help split the column. */ - +SELECT *, +CASE WHEN + product_name like'%-%' + THEN + trim(substr(product_name,(instr(product_name,'-')+2))) + ELSE + null + END as description + +FROM product; /* 2. Filter the query to show any product_size value that contain a number with REGEXP. */ - +SELECT *, +CASE WHEN + product_name like'%-%' + THEN + trim(substr(product_name,(instr(product_name,'-')+2))) + ELSE + null + END as description + +FROM product +WHERE product_size REGEXP '\d'; -- UNION @@ -73,6 +122,33 @@ HINT: There are a possibly a few ways to do this query, but if you're struggling 3) Query the second temp table twice, once for the best day, once for the worst day, with a UNION binding them. */ +--max QUERY +SELECT* + +FROM +( + SELECT market_date + ,sum(quantity*cost_to_customer_per_qty) as sales + ,row_number() OVER (ORDER BY sum(quantity*cost_to_customer_per_qty) DESC) as [row_number] + FROM customer_purchases + GROUP BY market_date +)as x +WHERE x.[row_number]=1 + +UNION + +--min QUERY +SELECT* + +FROM +( + SELECT market_date + ,sum(quantity*cost_to_customer_per_qty) as sales + ,row_number() OVER (ORDER BY sum(quantity*cost_to_customer_per_qty) ASC) as [row_number] + FROM customer_purchases + GROUP BY market_date +)as x +WHERE x.[row_number]=1; @@ -89,6 +165,34 @@ Think a bit about the row counts: how many distinct vendors, product names are t How many customers are there (y). Before your final group by you should have the product of those two queries (x*y). */ +--create cross joined temp table +DROP TABLE IF EXISTS temp.vendor_sales; + +CREATE TEMP TABLE IF NOT EXISTS temp.vendor_sales as + SELECT DISTINCT vendor_id, + product_id, + original_price*5 as total_price, + customer_id + FROM vendor_inventory + CROSS JOIN + customer; + +--check temp table + +SELECT* FROM TEMP.vendor_sales; + + +--filter table +SELECT +vendor_name, +product_name, +sum(total_price) +FROM temp.vendor_sales as vs +LEFT JOIN vendor as v + ON vs.vendor_id=v.vendor_id +LEFT JOIN product as p + ON vs.product_id=p.product_id +GROUP BY vendor_name,product_name; -- INSERT @@ -97,10 +201,21 @@ This table will contain only products where the `product_qty_type = 'unit'`. It should use all of the columns from the product table, as well as a new column for the `CURRENT_TIMESTAMP`. Name the timestamp column `snapshot_timestamp`. */ +--creating table + +CREATE TABLE IF NOT EXISTS product_units as + SELECT*, + CURRENT_TIMESTAMP as snapshot_timestamp + FROM product + WHERE product_qty_type LIKE '%unit%'; + + /*2. Using `INSERT`, add a new row to the product_units table (with an updated timestamp). This can be any product you desire (e.g. add another record for Apple Pie). */ +INSERT INTO product_units +VALUES (10,'Eggs','1 dozen',6,'unit',CURRENT_TIMESTAMP); @@ -108,21 +223,54 @@ This can be any product you desire (e.g. add another record for Apple Pie). */ /* 1. Delete the older record for the whatever product you added. HINT: If you don't specify a WHERE clause, you are going to have a bad time.*/ +DELETE FROM product_units +WHERE product_id=10 and snapshot_timestamp