From 8e2796c5aa045eb89135710c5a5e4b1043bcb472 Mon Sep 17 00:00:00 2001 From: Amandine0424 <138607470+Amandine0424@users.noreply.github.com> Date: Sat, 25 Nov 2023 15:41:57 +0100 Subject: [PATCH 1/2] Add files via upload --- your-code/solution.sql | 82 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 your-code/solution.sql diff --git a/your-code/solution.sql b/your-code/solution.sql new file mode 100644 index 0000000..d08af40 --- /dev/null +++ b/your-code/solution.sql @@ -0,0 +1,82 @@ +USE Publications2; + +-- Challenge 1 - Who Have Published What At Where? + +CREATE VIEW AuthorTitlePublisher AS +-- create a view in MySQL to store the result of the query as a virtual table. +-- This allows you to reference the view in subsequent queries +SELECT Authors.au_id AS 'AUTHOR ID', + Authors.au_lname AS 'LAST NAME', + Authors.au_fname AS 'FIRST NAME', + Titles.title AS 'TITLE', + Publishers.pub_name AS 'PUBLISHER' +FROM Authors +JOIN TitleAuthor ON Authors.au_id = TitleAuthor.au_id +JOIN Titles ON TitleAuthor.title_id = Titles.title_id +JOIN Publishers ON Titles.pub_id = Publishers.pub_id; + +-- Count the total number of records in titleauthor +SELECT COUNT(*) AS 'Total TitleAuthor Records' FROM TitleAuthor; + +-- Both have 25 rows. + +-- Challenge 2 - Who Have Published How Many At Where? + +CREATE VIEW AuthorTitlePublisherWithCount AS +SELECT Authors.au_id AS 'AUTHOR ID', + Authors.au_lname AS 'LAST NAME', + Authors.au_fname AS 'FIRST NAME', + Publishers.pub_name AS 'PUBLISHER', + COUNT(*) AS 'TITLE COUNT' +FROM Authors +JOIN TitleAuthor ON Authors.au_id = TitleAuthor.au_id +JOIN Titles ON TitleAuthor.title_id = Titles.title_id +JOIN Publishers ON Titles.pub_id = Publishers.pub_id +GROUP BY Authors.au_id, Titles.title, Publishers.pub_name; + +SELECT * +FROM AuthorTitlePublisherWithCount; + +-- Calculate the total title count across all authors +SELECT SUM(`TITLE COUNT`) AS `TOTAL TITLE COUNT` +FROM AuthorTitlePublisherWithCount; + +-- Both have 25 rows. + +-- Challenge 3 - Best Selling Authors + +CREATE VIEW BestSellingAuthors AS +SELECT + Authors.au_id AS 'AUTHOR ID', + Authors.au_lname AS 'LAST NAME', + Authors.au_fname AS 'FIRST NAME', + COUNT(*) AS 'TOTAL' +FROM + Authors +JOIN + TitleAuthor ON Authors.au_id = TitleAuthor.au_id +GROUP BY + Authors.au_id, Authors.au_lname, Authors.au_fname +ORDER BY + TOTAL DESC +LIMIT 3; + +-- Challenge 4 - Best Selling Authors Ranking +SELECT + Authors.au_id AS 'AUTHOR ID', + Authors.au_lname AS 'LAST NAME', + Authors.au_fname AS 'FIRST NAME', + COUNT(TitleAuthor.title_id) AS 'TOTAL' +FROM + Authors +LEFT JOIN +-- LEFT JOIN to ensure that all authors from the Authors table are +-- included, even if they haven't sold any titles + TitleAuthor ON Authors.au_id = TitleAuthor.au_id +GROUP BY + Authors.au_id, Authors.au_lname, Authors.au_fname +ORDER BY + TOTAL DESC; + + + From 0264f3ee5871bfc9ed150ac5d12e5c0f48c7f1a7 Mon Sep 17 00:00:00 2001 From: Amandine0424 <138607470+Amandine0424@users.noreply.github.com> Date: Sat, 25 Nov 2023 15:45:02 +0100 Subject: [PATCH 2/2] Add files via upload --- .../lab_mysql_publications_db_import.ipynb | 1726 +++++++++++++++++ 1 file changed, 1726 insertions(+) create mode 100644 your-code/lab_mysql_publications_db_import.ipynb diff --git a/your-code/lab_mysql_publications_db_import.ipynb b/your-code/lab_mysql_publications_db_import.ipynb new file mode 100644 index 0000000..c4c3fd0 --- /dev/null +++ b/your-code/lab_mysql_publications_db_import.ipynb @@ -0,0 +1,1726 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "9857c3c1", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "480634d8", + "metadata": {}, + "outputs": [], + "source": [ + "authors = pd.read_csv('authors.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "7ec677c1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
au_idau_lnameau_fnamephoneaddresscitystatezipcontract
0172-32-1176WhiteJohnson408 496-722310932 Bigge Rd.Menlo ParkCA940251
1213-46-8915GreenMarjorie415 986-7020309 63rd St. #411OaklandCA946181
2238-95-7766CarsonCheryl415 548-7723589 Darwin Ln.BerkeleyCA947051
3267-41-2394O'LearyMichael408 286-242822 Cleveland Av. #14San JoseCA951281
4274-80-9391StraightDean415 834-29195420 College Av.OaklandCA946091
\n", + "
" + ], + "text/plain": [ + " au_id au_lname au_fname phone address \\\n", + "0 172-32-1176 White Johnson 408 496-7223 10932 Bigge Rd. \n", + "1 213-46-8915 Green Marjorie 415 986-7020 309 63rd St. #411 \n", + "2 238-95-7766 Carson Cheryl 415 548-7723 589 Darwin Ln. \n", + "3 267-41-2394 O'Leary Michael 408 286-2428 22 Cleveland Av. #14 \n", + "4 274-80-9391 Straight Dean 415 834-2919 5420 College Av. \n", + "\n", + " city state zip contract \n", + "0 Menlo Park CA 94025 1 \n", + "1 Oakland CA 94618 1 \n", + "2 Berkeley CA 94705 1 \n", + "3 San Jose CA 95128 1 \n", + "4 Oakland CA 94609 1 " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "authors.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "119374be", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
discounttypestor_idlowqtyhighqtydiscount
0Initial CustomerNaNNaNNaN10.5
1Volume DiscountNaN100.01000.06.7
2Customer Discount8042.0NaNNaN5.0
\n", + "
" + ], + "text/plain": [ + " discounttype stor_id lowqty highqty discount\n", + "0 Initial Customer NaN NaN NaN 10.5\n", + "1 Volume Discount NaN 100.0 1000.0 6.7\n", + "2 Customer Discount 8042.0 NaN NaN 5.0" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "discounts = pd.read_csv('discounts.csv')\n", + "discounts.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "d119ff80", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
emp_idfnameminitlnamejob_idjob_lvlpub_idhire_date
0A-C71970FAriaNaNCruz108713891991-10-26 00:00:00
1A-R89858FAnnetteNaNRoulet615299991990-02-21 00:00:00
2AMD15433FAnnMDevon320099521991-07-16 00:00:00
3ARD36773FAnabelaRDomingues81008771993-01-27 00:00:00
4CFH28514MCarlosFHernadez521199991989-04-21 00:00:00
\n", + "
" + ], + "text/plain": [ + " emp_id fname minit lname job_id job_lvl pub_id \\\n", + "0 A-C71970F Aria NaN Cruz 10 87 1389 \n", + "1 A-R89858F Annette NaN Roulet 6 152 9999 \n", + "2 AMD15433F Ann M Devon 3 200 9952 \n", + "3 ARD36773F Anabela R Domingues 8 100 877 \n", + "4 CFH28514M Carlos F Hernadez 5 211 9999 \n", + "\n", + " hire_date \n", + "0 1991-10-26 00:00:00 \n", + "1 1990-02-21 00:00:00 \n", + "2 1991-07-16 00:00:00 \n", + "3 1993-01-27 00:00:00 \n", + "4 1989-04-21 00:00:00 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "employee = pd.read_csv('employee.csv')\n", + "employee.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "ecf61ea0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "emp_id object\n", + "fname object\n", + "minit object\n", + "lname object\n", + "job_id int64\n", + "job_lvl int64\n", + "pub_id int64\n", + "hire_date object\n", + "dtype: object" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "employee.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "f7708b10", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
job_idjob_descmin_lvlmax_lvl
01New Hire - Job not specified1010
12Chief Executive Officer200250
23Business Operations Manager175225
34Chief Financial Officier175250
45Publisher150250
\n", + "
" + ], + "text/plain": [ + " job_id job_desc min_lvl max_lvl\n", + "0 1 New Hire - Job not specified 10 10\n", + "1 2 Chief Executive Officer 200 250\n", + "2 3 Business Operations Manager 175 225\n", + "3 4 Chief Financial Officier 175 250\n", + "4 5 Publisher 150 250" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "jobs = pd.read_csv('jobs.csv')\n", + "jobs.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "5ee34881", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pub_idlogopr_info
0736474946383961D3001F00B30F0000000080000000800080...This is sample text data for New Moon Books, p...
18774749463839618B002F00B30F0000000080000000800080...This is sample text data for Binnet & Hardley,...
21389474946383961C2001D00B30F0000000080000000800080...This is sample text data for Algodata Infosyst...
31622474946383961F5003400B30F0000000080000000800080...This is sample text data for Five Lakes Publis...
41756474946383961E3002500B30F0000000080000000800080...This is sample text data for Ramona Publishers...
\n", + "
" + ], + "text/plain": [ + " pub_id logo \\\n", + "0 736 474946383961D3001F00B30F0000000080000000800080... \n", + "1 877 4749463839618B002F00B30F0000000080000000800080... \n", + "2 1389 474946383961C2001D00B30F0000000080000000800080... \n", + "3 1622 474946383961F5003400B30F0000000080000000800080... \n", + "4 1756 474946383961E3002500B30F0000000080000000800080... \n", + "\n", + " pr_info \n", + "0 This is sample text data for New Moon Books, p... \n", + "1 This is sample text data for Binnet & Hardley,... \n", + "2 This is sample text data for Algodata Infosyst... \n", + "3 This is sample text data for Five Lakes Publis... \n", + "4 This is sample text data for Ramona Publishers... " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pub_info = pd.read_csv('pub_info.csv')\n", + "pub_info.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "2f87e20c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 1286\n", + "1 1046\n", + "2 932\n", + "3 1828\n", + "4 1878\n", + "5 792\n", + "6 1602\n", + "7 1068\n", + "Name: logo, dtype: int64\n" + ] + } + ], + "source": [ + "# Check the length of each 'logo' value\n", + "logo_lengths = pub_info['logo'].apply(len)\n", + "\n", + "# Display the results\n", + "print(logo_lengths)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "d8df519d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pub_idpub_namecitystatecountry
0736New Moon BooksBostonMAUSA
1877Binnet & HardleyWashingtonDCUSA
21389Algodata InfosystemsBerkeleyCAUSA
31622Five Lakes PublishingChicagoILUSA
41756Ramona PublishersDallasTXUSA
\n", + "
" + ], + "text/plain": [ + " pub_id pub_name city state country\n", + "0 736 New Moon Books Boston MA USA\n", + "1 877 Binnet & Hardley Washington DC USA\n", + "2 1389 Algodata Infosystems Berkeley CA USA\n", + "3 1622 Five Lakes Publishing Chicago IL USA\n", + "4 1756 Ramona Publishers Dallas TX USA" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "publishers = pd.read_csv ('publishers.csv')\n", + "publishers.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "db8f4689", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
title_idlorangehirangeroyalty
0BU10320500010
1BU103250015000012
2PC10350200010
3PC10352001300012
4PC10353001400014
\n", + "
" + ], + "text/plain": [ + " title_id lorange hirange royalty\n", + "0 BU1032 0 5000 10\n", + "1 BU1032 5001 50000 12\n", + "2 PC1035 0 2000 10\n", + "3 PC1035 2001 3000 12\n", + "4 PC1035 3001 4000 14" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "roysched = pd.read_csv('roysched.csv')\n", + "roysched.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "2a74de30", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
stor_idord_numord_dateqtypaytermstitle_id
0638068711994-09-14 00:00:005Net 60BU1032
16380722a1994-09-13 00:00:003Net 60PS2091
27066A29761993-05-24 00:00:0050Net 30PC8888
37066QA7442.31994-09-13 00:00:0075ON invoicePS2091
47067D44821994-09-14 00:00:0010Net 60PS2091
\n", + "
" + ], + "text/plain": [ + " stor_id ord_num ord_date qty payterms title_id\n", + "0 6380 6871 1994-09-14 00:00:00 5 Net 60 BU1032\n", + "1 6380 722a 1994-09-13 00:00:00 3 Net 60 PS2091\n", + "2 7066 A2976 1993-05-24 00:00:00 50 Net 30 PC8888\n", + "3 7066 QA7442.3 1994-09-13 00:00:00 75 ON invoice PS2091\n", + "4 7067 D4482 1994-09-14 00:00:00 10 Net 60 PS2091" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sales = pd.read_csv('sales.csv')\n", + "sales.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "4846e13e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
stor_idstor_namestor_addresscitystatezip
06380Eric the Read Books788 Catamaugus Ave.SeattleWA98056
17066Barnum's567 Pasadena Ave.TustinCA92789
27067News & Brews577 First St.Los GatosCA96745
37131Doc-U-Mat: Quality Laundry and Books24-A Avogadro WayRemuladeWA98014
47896Fricative Bookshop89 Madison St.FremontCA90019
\n", + "
" + ], + "text/plain": [ + " stor_id stor_name stor_address \\\n", + "0 6380 Eric the Read Books 788 Catamaugus Ave. \n", + "1 7066 Barnum's 567 Pasadena Ave. \n", + "2 7067 News & Brews 577 First St. \n", + "3 7131 Doc-U-Mat: Quality Laundry and Books 24-A Avogadro Way \n", + "4 7896 Fricative Bookshop 89 Madison St. \n", + "\n", + " city state zip \n", + "0 Seattle WA 98056 \n", + "1 Tustin CA 92789 \n", + "2 Los Gatos CA 96745 \n", + "3 Remulade WA 98014 \n", + "4 Fremont CA 90019 " + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "stores = pd.read_csv('stores.csv')\n", + "stores.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "3204f81b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
au_idtitle_idau_ordroyaltyper
0172-32-1176PS33331100
1213-46-8915BU1032240
2213-46-8915BU20751100
3238-95-7766PC10351100
4267-41-2394BU1111240
\n", + "
" + ], + "text/plain": [ + " au_id title_id au_ord royaltyper\n", + "0 172-32-1176 PS3333 1 100\n", + "1 213-46-8915 BU1032 2 40\n", + "2 213-46-8915 BU2075 1 100\n", + "3 238-95-7766 PC1035 1 100\n", + "4 267-41-2394 BU1111 2 40" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titleauthor = pd.read_csv('titleauthor.csv')\n", + "titleauthor.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "bfc608ef", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
title_idtitletypepub_idpriceadvanceroyaltyytd_salesnotespubdate
0BU1032The Busy Executive's Database Guidebusiness138919.995000.010.04095.0An overview of available database systems with...1991-06-12 00:00:00
1BU1111Cooking with Computers: Surreptitious Balance ...business138911.955000.010.03876.0Helpful hints on how to use your electronic re...1991-06-09 00:00:00
2BU2075You Can Combat Computer Stress!business7362.9910125.024.018722.0The latest medical and psychological technique...1991-06-30 00:00:00
3BU7832Straight Talk About Computersbusiness138919.995000.010.04095.0Annotated analysis of what computers can do fo...1991-06-22 00:00:00
4MC2222Silicon Valley Gastronomic Treatsmod_cook87719.990.012.02032.0Favorite recipes for quick, easy, and elegant ...1991-06-09 00:00:00
\n", + "
" + ], + "text/plain": [ + " title_id title type \\\n", + "0 BU1032 The Busy Executive's Database Guide business \n", + "1 BU1111 Cooking with Computers: Surreptitious Balance ... business \n", + "2 BU2075 You Can Combat Computer Stress! business \n", + "3 BU7832 Straight Talk About Computers business \n", + "4 MC2222 Silicon Valley Gastronomic Treats mod_cook \n", + "\n", + " pub_id price advance royalty ytd_sales \\\n", + "0 1389 19.99 5000.0 10.0 4095.0 \n", + "1 1389 11.95 5000.0 10.0 3876.0 \n", + "2 736 2.99 10125.0 24.0 18722.0 \n", + "3 1389 19.99 5000.0 10.0 4095.0 \n", + "4 877 19.99 0.0 12.0 2032.0 \n", + "\n", + " notes pubdate \n", + "0 An overview of available database systems with... 1991-06-12 00:00:00 \n", + "1 Helpful hints on how to use your electronic re... 1991-06-09 00:00:00 \n", + "2 The latest medical and psychological technique... 1991-06-30 00:00:00 \n", + "3 Annotated analysis of what computers can do fo... 1991-06-22 00:00:00 \n", + "4 Favorite recipes for quick, easy, and elegant ... 1991-06-09 00:00:00 " + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titles = pd.read_csv('titles.csv')\n", + "titles.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "32dce367", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "title_id object\n", + "title object\n", + "type object\n", + "pub_id int64\n", + "price float64\n", + "advance float64\n", + "royalty float64\n", + "ytd_sales float64\n", + "notes object\n", + "pubdate object\n", + "dtype: object" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titles.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "6ea13d40", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DataFrame 1 Column Names:\n", + "au_id\n", + "au_lname\n", + "au_fname\n", + "phone\n", + "address\n", + "city\n", + "state\n", + "zip\n", + "contract\n", + "\n", + "\n", + "DataFrame 2 Column Names:\n", + "discounttype\n", + "stor_id\n", + "lowqty\n", + "highqty\n", + "discount\n", + "\n", + "\n", + "DataFrame 3 Column Names:\n", + "emp_id\n", + "fname\n", + "minit\n", + "lname\n", + "job_id\n", + "job_lvl\n", + "pub_id\n", + "hire_date\n", + "\n", + "\n", + "DataFrame 4 Column Names:\n", + "job_id\n", + "job_desc\n", + "min_lvl\n", + "max_lvl\n", + "\n", + "\n", + "DataFrame 5 Column Names:\n", + "pub_id\n", + "logo\n", + "pr_info\n", + "\n", + "\n", + "DataFrame 6 Column Names:\n", + "pub_id\n", + "pub_name\n", + "city\n", + "state\n", + "country\n", + "\n", + "\n", + "DataFrame 7 Column Names:\n", + "title_id\n", + "lorange\n", + "hirange\n", + "royalty\n", + "\n", + "\n", + "DataFrame 8 Column Names:\n", + "stor_id\n", + "ord_num\n", + "ord_date\n", + "qty\n", + "payterms\n", + "title_id\n", + "\n", + "\n", + "DataFrame 9 Column Names:\n", + "stor_id\n", + "stor_name\n", + "stor_address\n", + "city\n", + "state\n", + "zip\n", + "\n", + "\n", + "DataFrame 10 Column Names:\n", + "au_id\n", + "title_id\n", + "au_ord\n", + "royaltyper\n", + "\n", + "\n", + "DataFrame 11 Column Names:\n", + "title_id\n", + "title\n", + "type\n", + "pub_id\n", + "price\n", + "advance\n", + "royalty\n", + "ytd_sales\n", + "notes\n", + "pubdate\n", + "\n", + "\n" + ] + } + ], + "source": [ + "# List of dataframes\n", + "dataframes = [authors, discounts, employee, jobs, pub_info, publishers, roysched, sales, stores, titleauthor, titles]\n", + "\n", + "# Loop through each dataframe and print its column names\n", + "for i, df in enumerate(dataframes, start=1):\n", + " print(f\"DataFrame {i} Column Names:\")\n", + " for column_name in df.columns:\n", + " print(column_name)\n", + " print(\"\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "b7d766ee", + "metadata": {}, + "outputs": [], + "source": [ + "import mysql.connector\n", + "import getpass #create new input area, useful to give password" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "6115b4df", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "········\n" + ] + } + ], + "source": [ + "# Step 3 : Enter password\n", + "saved_password = getpass.getpass()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "c01bfb39", + "metadata": {}, + "outputs": [], + "source": [ + "cnx = mysql.connector.connect(user='root',\n", + " password=saved_password,\n", + " port = 3306) #indicate local instance here " + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "1851b55e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cnx.is_connected()" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "a5f62486", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Table 'Authors' created successfully.\n", + "Table 'Discounts' created successfully.\n", + "Table 'Employee' created successfully.\n", + "Table 'Jobs' created successfully.\n", + "Table 'Pub_Info' created successfully.\n", + "Table 'Publishers' created successfully.\n", + "Table 'Roysched' created successfully.\n", + "Table 'Sales' created successfully.\n", + "Table 'Stores' created successfully.\n", + "Table 'TitleAuthor' created successfully.\n", + "Table 'Titles' created successfully.\n" + ] + } + ], + "source": [ + "# Create a database\n", + "create_db_statement = \"CREATE DATABASE IF NOT EXISTS Publications2;\"\n", + "cursor = cnx.cursor()\n", + "cursor.execute(create_db_statement)\n", + "\n", + "# Switch to the created database\n", + "use_db_statement = \"USE Publications2;\"\n", + "cursor.execute(use_db_statement)\n", + "\n", + "# Dataframe: authors\n", + "create_table_authors = \"\"\"\n", + "CREATE TABLE IF NOT EXISTS Authors (\n", + " au_id VARCHAR(15) PRIMARY KEY,\n", + " au_lname VARCHAR(255),\n", + " au_fname VARCHAR(255),\n", + " phone VARCHAR(20),\n", + " address VARCHAR(255),\n", + " city VARCHAR(255),\n", + " state VARCHAR(2),\n", + " zip VARCHAR(10),\n", + " contract INT\n", + ");\n", + "\"\"\"\n", + "cursor.execute(create_table_authors)\n", + "print(\"Table 'Authors' created successfully.\")\n", + "\n", + "# Dataframe: discounts\n", + "create_table_discounts = \"\"\"\n", + "CREATE TABLE IF NOT EXISTS Discounts (\n", + " discounttype VARCHAR(255),\n", + " stor_id INT,\n", + " lowqty INT,\n", + " highqty INT,\n", + " discount FLOAT\n", + ");\n", + "\"\"\"\n", + "cursor.execute(create_table_discounts)\n", + "print(\"Table 'Discounts' created successfully.\")\n", + "\n", + "# Dataframe: employee\n", + "create_table_employee = \"\"\"\n", + "CREATE TABLE IF NOT EXISTS Employee (\n", + " emp_id VARCHAR(255) PRIMARY KEY,\n", + " fname VARCHAR(255),\n", + " minit VARCHAR(1),\n", + " lname VARCHAR(255),\n", + " job_id INT,\n", + " job_lvl INT,\n", + " pub_id INT,\n", + " hire_date DATE\n", + ");\n", + "\"\"\"\n", + "cursor.execute(create_table_employee)\n", + "print(\"Table 'Employee' created successfully.\")\n", + "\n", + "# Dataframe: jobs\n", + "create_table_jobs = \"\"\"\n", + "CREATE TABLE IF NOT EXISTS Jobs (\n", + " job_id INT PRIMARY KEY,\n", + " job_desc VARCHAR(255),\n", + " min_lvl INT,\n", + " max_lvl INT\n", + ");\n", + "\"\"\"\n", + "cursor.execute(create_table_jobs)\n", + "print(\"Table 'Jobs' created successfully.\")\n", + "\n", + "# Dataframe: pub_info\n", + "create_table_pub_info = \"\"\"\n", + "CREATE TABLE IF NOT EXISTS Pub_Info (\n", + " pub_id INT PRIMARY KEY,\n", + " logo VARCHAR(2000),\n", + " pr_info TEXT\n", + ");\n", + "\"\"\"\n", + "cursor.execute(create_table_pub_info)\n", + "print(\"Table 'Pub_Info' created successfully.\")\n", + "\n", + "# Dataframe: publishers\n", + "create_table_publishers = \"\"\"\n", + "CREATE TABLE IF NOT EXISTS Publishers (\n", + " pub_id INT PRIMARY KEY,\n", + " pub_name VARCHAR(255),\n", + " city VARCHAR(255),\n", + " state VARCHAR(255),\n", + " country VARCHAR(255)\n", + ");\n", + "\"\"\"\n", + "cursor.execute(create_table_publishers)\n", + "print(\"Table 'Publishers' created successfully.\")\n", + "\n", + "# Dataframe: roysched\n", + "create_table_roysched = \"\"\"\n", + "CREATE TABLE IF NOT EXISTS Roysched (\n", + " title_id VARCHAR(255),\n", + " lorange INT,\n", + " hirange INT,\n", + " royalty FLOAT\n", + ");\n", + "\"\"\"\n", + "cursor.execute(create_table_roysched)\n", + "print(\"Table 'Roysched' created successfully.\")\n", + "\n", + "# Dataframe: sales\n", + "create_table_sales = \"\"\"\n", + "CREATE TABLE IF NOT EXISTS Sales (\n", + " stor_id INT,\n", + " ord_num VARCHAR(255),\n", + " ord_date DATE,\n", + " qty INT,\n", + " payterms VARCHAR(255),\n", + " title_id VARCHAR(255)\n", + ");\n", + "\"\"\"\n", + "cursor.execute(create_table_sales)\n", + "print(\"Table 'Sales' created successfully.\")\n", + "\n", + "# Dataframe: stores\n", + "create_table_stores = \"\"\"\n", + "CREATE TABLE IF NOT EXISTS Stores (\n", + " stor_id INT PRIMARY KEY,\n", + " stor_name VARCHAR(255),\n", + " stor_address VARCHAR(255),\n", + " city VARCHAR(255),\n", + " state VARCHAR(255),\n", + " zip VARCHAR(10)\n", + ");\n", + "\"\"\"\n", + "cursor.execute(create_table_stores)\n", + "print(\"Table 'Stores' created successfully.\")\n", + "\n", + "# Dataframe: titleauthor\n", + "create_table_titleauthor = \"\"\"\n", + "CREATE TABLE IF NOT EXISTS TitleAuthor (\n", + " au_id VARCHAR(255),\n", + " title_id VARCHAR(255),\n", + " au_ord INT,\n", + " royaltyper INT\n", + ");\n", + "\"\"\"\n", + "cursor.execute(create_table_titleauthor)\n", + "print(\"Table 'TitleAuthor' created successfully.\")\n", + "\n", + "# Dataframe: titles\n", + "create_table_titles = \"\"\"\n", + "CREATE TABLE IF NOT EXISTS Titles (\n", + " title_id VARCHAR(255) PRIMARY KEY,\n", + " title VARCHAR(255),\n", + " type VARCHAR(255),\n", + " pub_id INT,\n", + " price FLOAT,\n", + " advance FLOAT,\n", + " royalty FLOAT,\n", + " ytd_sales FLOAT,\n", + " notes TEXT,\n", + " pubdate DATE\n", + ");\n", + "\"\"\"\n", + "cursor.execute(create_table_titles)\n", + "print(\"Table 'Titles' created successfully.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "13475c04", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "23 rows inserted into 'Authors' table.\n", + "3 rows inserted into 'Discounts' table.\n", + "43 rows inserted into 'Employee' table.\n", + "14 rows inserted into 'Jobs' table.\n", + "8 rows inserted into 'Pub_info' table.\n", + "8 rows inserted into 'Publishers' table.\n", + "86 rows inserted into 'Roysched' table.\n", + "21 rows inserted into 'Sales' table.\n", + "6 rows inserted into 'Stores' table.\n", + "25 rows inserted into 'TitleAuthor' table.\n", + "18 rows inserted into 'Titles' table.\n" + ] + } + ], + "source": [ + "# Connect to MySQL server\n", + "cnx = mysql.connector.connect(user='root', password='********', port=3306, database='Publications2')\n", + "cursor = cnx.cursor()\n", + "\n", + "# Define a dictionary with table names as keys and dataframes as values\n", + "dataframes = {\n", + " 'Authors': authors,\n", + " 'Discounts': discounts,\n", + " 'Employee': employee,\n", + " 'Jobs': jobs,\n", + " 'Pub_info': pub_info,\n", + " 'Publishers': publishers,\n", + " 'Roysched': roysched,\n", + " 'Sales': sales,\n", + " 'Stores': stores,\n", + " 'TitleAuthor': titleauthor,\n", + " 'Titles': titles\n", + "}\n", + "\n", + "# Iterate through the dictionary and insert data into MySQL\n", + "for table_name, df in dataframes.items():\n", + " # Create the table\n", + " create_table_statement = f\"CREATE TABLE IF NOT EXISTS {table_name} ({', '.join([f'{col} VARCHAR(255)' for col in df.columns])});\"\n", + " cursor.execute(create_table_statement)\n", + "\n", + " # Insert data into the table\n", + " for _, row in df.iterrows():\n", + " placeholders = ', '.join(['%s' for _ in df.columns])\n", + " insert_data_statement = f\"INSERT INTO {table_name} VALUES ({placeholders})\"\n", + " cursor.execute(insert_data_statement, tuple(row))\n", + "\n", + " print(f\"{len(df)} rows inserted into '{table_name}' table.\")\n", + "\n", + "# Commit changes and close connection\n", + "cnx.commit()\n", + "cursor.close()\n", + "cnx.close()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}