These exercises are in PostgreSQL V14, all completed tasks should be compatible with PostgreSQL V14
This needs to return quickly and should not scan the entire table (seq scan), please include any indexes in your script
For this challenge we have two tables of products and prices. Every product has lots of prices published throughout the year. We would like to see for product ids 100, 200, 300 and 400 the most recently published price which was published on or before the 10th June 2022.
If a price was published on the 11th June 2022 it would not be valid as it was published after the requested date. If it was published on or before the 10th June 2022 it is valid. The query should return a single price per product.
The product may or may not have a price that was published on the 10th June 2022.
We would like to see the Product Name, Product Price, Published Date.
DROP TABLE IF EXISTS example_1.product_prices;
DROP TABLE IF EXISTS example_1.products;
DROP SCHEMA IF EXISTS example_1;
CREATE SCHEMA example_1;
CREATE TABLE example_1.products (
id int4 NOT NULL GENERATED BY DEFAULT AS IDENTITY,
name text NOT NULL,
CONSTRAINT pk_products PRIMARY KEY (id)
);
CREATE TABLE example_1.product_prices (
id int4 NOT NULL GENERATED BY DEFAULT AS IDENTITY,
product_id int4 NOT NULL,
product_price numeric NOT NULL,
published_date date NOT NULL,
CONSTRAINT pk_product_prices PRIMARY KEY (id)
);
ALTER TABLE example_1.product_prices ADD CONSTRAINT fk_product_prices_product FOREIGN KEY (product_id) REFERENCES example_1.products(id) ON DELETE CASCADE;
INSERT INTO example_1.products (name)
SELECT concat('Product-', id)
FROM generate_series(1, 10000, 1) AS id;
INSERT INTO example_1.product_prices (product_id, product_price, published_date)
SELECT p.id, ROUND((random() * 100 + 1)::numeric, 2), ('2022-12-31'::date - random() * INTERVAL '365 days')::date
FROM example_1.products p
CROSS JOIN
(
SELECT generate_series(1, 100, 1) d
) AS dates;This needs to return quickly and should not scan the entire table (seq scan), please include any indexes in your script
For this challenge we have a "large" table of transactions. We need to ensure that the query below completes as quickly as possible and performs an index only scan selecting:
id, product_id, transaction_type, unit_of_measure
WHERE the product_id is 50 and the date is in June
DROP TABLE IF EXISTS example_2.transactions;
DROP SCHEMA IF EXISTS example_2;
CREATE SCHEMA example_2;
CREATE TABLE example_2.transactions (
id int4 NOT NULL GENERATED BY DEFAULT AS IDENTITY,
product_id int NULL,
ship_date date NOT NULL,
transaction_type text NOT NULL,
unit_of_measure text NOT NULL,
CONSTRAINT pk_transactions PRIMARY KEY (id)
);
INSERT INTO example_2.transactions (product_id, ship_date, transaction_type, unit_of_measure)
SELECT ROUND((random() * 100 + 1)::numeric),
('2022-12-31'::date - random() * INTERVAL '365 days')::date,
CASE WHEN ROUND((random())::numeric) = 0 THEN 'SALE' ELSE 'PURCHASE' END,
CASE WHEN ROUND((random())::numeric) = 0 THEN 'GA' ELSE 'KG' END
FROM generate_series(1, 10000, 1) AS id
CROSS JOIN
(
SELECT generate_series(1, 100, 1) d
) AS dates;This needs to return quickly, please include any indexes in your script
For this challenge we have two tables of products and prices. Every product has lots of prices published throughout the year. We would like to create view which returns only valid prices for a product.
A price is valid if the unit of measure on the price matches the unit of measure on the product.
DROP TABLE IF EXISTS example_3.product_prices;
DROP TABLE IF EXISTS example_3.products;
DROP SCHEMA IF EXISTS example_3;
CREATE SCHEMA example_3;
CREATE TABLE example_3.products (
id int4 NOT NULL GENERATED BY DEFAULT AS IDENTITY,
name text NOT NULL,
unit_of_measure text NOT NULL,
CONSTRAINT pk_products PRIMARY KEY (id)
);
CREATE TABLE example_3.product_prices (
id int4 NOT NULL GENERATED BY DEFAULT AS IDENTITY,
product_id int4 NOT NULL,
product_price numeric NOT NULL,
published_date date NOT NULL,
unit_of_measure text NOT NULL,
CONSTRAINT pk_product_prices PRIMARY KEY (id)
);
ALTER TABLE example_3.product_prices ADD CONSTRAINT fk_product_prices_product FOREIGN KEY (product_id) REFERENCES example_3.products(id) ON DELETE CASCADE;
INSERT INTO example_3.products (name, unit_of_measure)
SELECT concat('Product-', id),
CASE WHEN ROUND((random())::numeric) = 0 THEN 'GA' ELSE 'KG' END
FROM generate_series(1, 1000, 1) AS id;
INSERT INTO example_3.product_prices (product_id, product_price, published_date, unit_of_measure)
SELECT p.id,
ROUND((random() * 100 + 1)::numeric, 2),
('2022-12-31'::date - random() * INTERVAL '365 days')::date,
CASE WHEN ROUND((random())::numeric) = 0 THEN 'GA' ELSE 'KG' END
FROM example_3.products p
CROSS JOIN
(
SELECT generate_series(1, 100, 1) d
) AS dates;This needs to return quickly and should not scan the entire table (seq scan), please include any indexes in your script
For this challenge we have a "large" table of transactions. We need to know which organizations have transaction data. To query this a distinct won't work regardless of indexes as it will still need to do a full sequence scan.
What are the alternatives you could use to find the distinct collection of organization ids
DROP TABLE IF EXISTS example_4.transactions;
DROP SCHEMA IF EXISTS example_4;
CREATE SCHEMA example_4;
CREATE TABLE example_4.transactions (
id int4 NOT NULL GENERATED BY DEFAULT AS IDENTITY,
organization_id int NULL,
CONSTRAINT pk_transactions PRIMARY KEY (id)
);
INSERT INTO example_4.transactions (organization_id)
SELECT ROUND((random() * 100 + 1)::numeric)
FROM generate_series(1, 1000000, 1) AS id;