diff --git a/01-06-demo-jupyter.py b/01-06-demo-jupyter.py
new file mode 100644
index 0000000..c3505b3
--- /dev/null
+++ b/01-06-demo-jupyter.py
@@ -0,0 +1,107 @@
+1. Create a function that returns a list of all the names.
+2. Create a function that counts the total number of males and females. Return both numbers
+3. Create a function that returns a list of all the charachters that appear in more than x number of films. Where 'x' is the number of films you are checking for and is passed into the function.
+4. Create function that returns a tuple of the min, max, and avg height for all charachters
+5. Create a function that accepts two arguements: eye color and hair color. Return a dictionary with the min, max, and avg height based on the parameters.
+
+Challenge 1: Get all of the response data into a list using a "While Loop" instead of a "For Loop"
+Challenge 2: For problem number 4, convert the centimeters to feet
+Challenge 3: Create a function that returns a list of all the names that start with a certain letter.
+
+
+
+
+
+We have an endpoint with 82 people records We have 10 records per page We have 9 total pages
+
+We have a base url: https://swapi.dev/api/people/?page=
+
+import requests
+import json
+from pprint import pp
+We need to get all of the pages into one Python List.
+
+Psuedo-Code
+
+I want to make a for loop that loops through every page -I need to increase the page number of the URL by 1 every time I move through the loop -pagenum + 1 -pagenum + i
+Concept: I need to extract the data I am looking for into a data structure (list) -need to create a list
+
+Make a get request -I need to save the response to the request in a variable
+
+Need to turn our response into a data type we can use later .loads() .json()
+
+.append the converted response into the list
+
+WAIT!
+
+Before we even mess with the loop.....let's make sure we can do it for one page
+
+url= 'https://swapi.dev/api/people/?page=1'
+
+response= requests.get(url)
+
+r= response.json()
+
+pp(r)
+=======================================================================================
+
+We are now going to try to get all of the pages.
+
+
+baseurl= 'https://swapi.dev/api/people/?page='
+total_pages= 9
+
+#I need to perform my get request, 9 times.
+#I want to use a for loop to do that.
+
+#I can hardcode the values in a for loop, by setting the range with integers
+#for i in range(1, 10)
+data= []
+
+for i in range(1, total_pages+1):
+ response= requests.get(baseurl + str(i))
+ #'https://swapi.dev/api/people/?page=1'
+ #'https://swapi.dev/api/people/?page=2'
+ #'https://swapi.dev/api/people/?page=3'
+ new_response= response.json()
+ data.append(new_response)
+
+pp(data)
+============================================================================================
+
+
+We now have everything in one list!
+
+But there are keys that we do not need for the questions that are being asked.
+
+Let's get the values of the results keys in one list.
+
+We need to loop through the list. Then you call the key for that list index, in this case it is the 'result' key We want to store it in a seperate list so all of have left to work with is our charachter data.
+
+#Checking to see how to get the information that we want.
+print(data[0]['results'])
+
+cleaned_data= []
+
+for list_page in data:
+ #print(list_page)
+ #print(data[i])
+ pass
+ for values in list_page['results']:
+ #print(values)
+ pass
+
+#print(cleaned_data)
+
+for num in range(0, 9):
+ results= data[num]['results']
+ cleaned_data.extend(results)
+
+print(cleaned_data)
+
+============================================================================================
+
+
+
+
+
diff --git a/12-14-xml-practice.py b/12-14-xml-practice.py
new file mode 100644
index 0000000..22392eb
--- /dev/null
+++ b/12-14-xml-practice.py
@@ -0,0 +1,36 @@
+import xml.etree.ElementTree as ET
+
+tree = ET.parse("movies1.xml")
+root = tree.getroot()
+
+# print(tree)
+# print(root)
+
+# print(root.tag)
+# print(root.attrib)
+
+#anime = root.find("./genre[@category='Anime']")
+anime = ET.SubElement(root,'genre')
+print("Added new Genre")
+
+anime.attrib["category"] = 'Anime'
+print("added new genre attribute category=Anime")
+
+new_dec = ET.SubElement(anime, 'decade')
+print("added new decade")
+
+new_dec.attrib["years"] = '1990s'
+print("added decade attribute year = 1990s")
+
+batman = root.find("./genre/decade/movie[@title='Batman Returns']")
+dec1990s = root.find("./genre[@category='Anime']/decade[@years='1990s']")
+
+dec1990s.append(batman)
+print("copy movie element Batman from Action genre to Anime genre ")
+
+dec1990s = root.find("./genre[@category='Action']/decade[@years='1990s']")
+dec1990s.remove(batman)
+print("remove Batman movie element from Action genre ")
+
+
+tree.write("movies1.xml")
diff --git a/12_09_practice.py b/12_09_practice.py
index 7914563..8fbab03 100644
--- a/12_09_practice.py
+++ b/12_09_practice.py
@@ -1,29 +1,206 @@
-#Biggie Size - Given a list, write a function that changes all positive numbers in the list to "big". Example: make_it_big([-1, 3, 5, -5]) returns that same list, #changed to [-1, "big", "big", -5].
-#Count Positives - Given a list of numbers, create a function to replace last value with number of positive values. Example, count_positives([-1,1,1,1]) changes list #to [-1,1,1,3] and returns it. (Note that zero is not considered to be a positive number).
+#Biggie Size - Given a list, write a function that changes all positive numbers in the list to "big".
+# Example: make_it_big([-1, 3, 5, -5]) returns that same list, #changed to [-1, "big", "big", -5].
-#SumTotal - Create a function that takes a list as an argument and returns the sum of all the values in the list. For example sum_total([1,2,3,4]) should return 10
+def make_it_big(mylist):
+ return [each if each < 0 else "big" for each in mylist ]
-#Average - Create a function that takes a list as an argument and returns the average of all the values in the list. For example multiples([1,2,3,4]) should return #2.5
+print(make_it_big([-1, 3, 5, -5]))
-#Length - Create a function that takes a list as an argument and returns the length of the list. For example length([1,2,3,4]) should return 4
+#Count Positives - Given a list of numbers, create a function to replace last value with number of positive values.
+# Example, count_positives([-1,1,1,1]) changes list #to [-1,1,1,3] and returns it.
+# (Note that zero is not considered to be a positive number).
-#Minimum - Create a function that takes a list as an argument and returns the minimum value in the list. If the passed list is empty, have the function return false. #For example minimum([1,2,3,4]) should return 1; minimum([-1,-2,-3]) should return -3.
-#
-#Maximum - Create a function that takes a list as an argument and returns the maximum value in the list. If the passed list is empty, have the function return false. #For example maximum([1,2,3,4]) should return 4; maximum([-1,-2,-3]) should return -1.
+def count_positives(mylist):
+ c = 0
+ for each in mylist:
+ if each > 0:
+ c+=1
+ mylist[-1] = c
+ return mylist
-#Ultimateaalyze - Create a function that takes a list as an argument and returns a dictionary that has the sumTotal, average, minimum, maximum ad length of the list.
+print(count_positives([-1,1,1,1]))
-#ReverseList - Create a function that takes a list as a argument and return a list in a reversed order. Do this without creating a empty temporary list. For example #reverse([1,2,3,4]) should return [4,3,2,1]. This challenge is known to appear during basic technical interviews.
+#SumTotal - Create a function that takes a list as an argument and returns the sum of all the values in the list.
+#For example sum_total([1,2,3,4]) should return 10
+
+def sum_total(mylist):
+ return sum(mylist)
+
+print(sum_total([1,2,3,4]))
+
+#Average - Create a function that takes a list as an argument and returns the average of all the values in the list.
+# For example multiples([1,2,3,4]) should return #2.5
+def multiples(mylist):
+ return (sum(mylist)/len(mylist))
+
+print(multiples([1,2,3,4]))
+
+#Length - Create a function that takes a list as an argument and returns the length of the list.
+# For example length([1,2,3,4]) should return 4
+def length(mylist):
+ return len(mylist)
+
+print(length([1,2,3,4]))
+
+# Minimum - Create a function that takes a list as an argument and returns the minimum value in the list.
+# If the passed list is empty, have the function return false.
+# For example minimum([1,2,3,4]) should return 1; minimum([-1,-2,-3]) should return -3.
+
+def minimum(mylist):
+ if len(mylist) == 0:
+ return False
+ else:
+ return min(mylist)
+
+print(minimum([1,2,3,4]))
+
+
+# Maximum - Create a function that takes a list as an argument and returns the maximum value in the list.
+# If the passed list is empty, have the function return false.
+# For example maximum([1,2,3,4]) should return 4; maximum([-1,-2,-3]) should return -1.
+
+def minimum(mylist):
+ if len(mylist) == 0:
+ return False
+ else:
+ return max(mylist)
+
+print(minimum([1,2,3,4]))
+
+
+# Ultimateaalyze - Create a function that takes a list as an argument and
+# returns a dictionary that has the sumTotal, average, minimum, maximum ad length of the list.
+
+def create_dict(mylist):
+ mydict = {}
+ mydict["sumtotal"] = sum(mylist)
+ mydict["average"] = sum(mylist)/len(mylist)
+ mydict["minimum"] = min(mylist)
+ mydict["maximum"] = max(mylist)
+ mydict["length"] = len(mylist)
+ return mydict
+
+print(create_dict([1,2,3,4]))
+
+# ReverseList - Create a function that takes a list as a argument and return a list in a reversed order.
+# Do this without creating a empty temporary list. For example #reverse([1,2,3,4]) should return [4,3,2,1].
+# This challenge is known to appear during basic technical interviews.
+
+def reverse(mylist):
+ return list(reversed(mylist))
+
+print(reverse([1,2,3,4]))
+
+#Ispalindrome- Given a string, write a python function to check if it is palindrome or not.
+# A string is said to be palindrome if the reverse of the string is the same as string.
+# For example, “radar” is a palindrome, but “radix” is not a palindrome.
+
+def palindrome(mystr):
+ r=""
+ l = len(mystr)
+ for i in range(l-1, -1, -1):
+ r += mystr[i]
+ if mystr == r:
+ return True
+ else:
+ return False
+
+print(palindrome("soon"))
-#Ispalindrome- Given a string, write a python function to check if it is palindrome or not. A string is said to be palindrome if the reverse of the string is the same as string. For example, “radar” is a palindrome, but “radix” is not a palindrome.
#Fizzbuzz- Create a function that will print numbers from 1 to 100, with certain exceptions:
#If the number is a multiple of 3, print “Fizz” instead of the number.
#If the number is a multiple of 5, print “Buzz” instead of the number.
#If the number is a multiple of 3 and 5, print “FizzBuzz” instead of the number.
-#Fibonacci- The Fibonacci numbers, commonly denoted F(n) form a sequence, called the Fibonacci sequence, such that each number is the sum of the two preceding ones, #starting from 0 and 1. That is,
- #F(0) = 0, F(1) = 1
- #F(n) = F(n - 1) + F(n - 2), for n > 1.
- #Create a function that accepts any number and will create a sequence based on the fibonacci sequence.
\ No newline at end of file
+def myfunc(n):
+ if n % 3 == 0 and n % 5 == 0:
+ print("FizzBuzz")
+ elif n % 5 == 0:
+ print("Buzz")
+ elif n % 3 == 0:
+ print("Fizz")
+ else:
+ print(n)
+ return
+
+myfunc(30)
+
+#Fibonacci- The Fibonacci numbers, commonly denoted F(n) form a sequence, called the Fibonacci sequence,
+# such that each number is the sum of the two preceding ones, #starting from 0 and 1. That is,
+#F(0) = 0, F(1) = 1
+#F(n) = F(n - 1) + F(n - 2), for n > 1.
+#Create a function that accepts any number and will create a sequence based on the fibonacci sequence.
+
+def fibonacci(n):
+ fib_list = [0,1]
+ for i in range(2,n):
+ fib_list.append(fib_list[i-2] + fib_list[i-1])
+ return fib_list
+
+print(fibonacci(10))
+
+#########################################################################################################
+############# 12-11-python-practice #####################################################################
+
+####### slide 14 #######
+#Create a generator, primes_gen that generates prime numbers starting from 2
+def primes_gen():
+ return [i for i in range(1,101) if all(i%j != 0 for j in range(2,i))]
+
+gen = primes_gen()
+for _ in range(10):
+ print(next(gen), end=' ')
+
+############ slide 19 and 20 ##################
+#Consider the list:
+prog_lang = [('Python', 3.8), ('Java', 13), ('JavaScript', 2019), ('Scala', 2.13)]
+
+#1. Sort the list by each language's version in ascending order.
+def version_sort(pl):
+ pl_sort = sorted(pl, key=lambda t : t[1])
+ return pl_sort
+
+print(version_sort(prog_lang))
+
+#2. Sort the list by the length of the name of each language in descending order.
+
+def lname_len_sort(pl):
+ lname_len_sort = sorted(pl, key=lambda t : len(t[0]), reverse = True)
+ return lname_len_sort
+
+print(lname_len_sort(prog_lang))
+
+#3. Filter the list so that it only contains languages with 'a' in it.
+
+def filter_alang_pl(pl):
+ pl_fil = list(filter(lambda t : "a" in t[0].lower(), pl))
+ return pl_fil
+
+print(filter_alang_pl(prog_lang))
+
+
+#4. Filter the list so that it only contains languages whose version is in integer form
+
+def filter_intver_pl(pl):
+ pl_fil = list(filter(lambda t : type(t[1]) == int, pl))
+ return pl_fil
+
+print(filter_intver_pl(prog_lang))
+
+#5. Transform the list so that it contains the tuples in the form, ("language in all lower case", length of the language string)
+
+def tuple_lang_len(pl):
+ pl_tup = (*map(lambda t : t[0].lower()+", "+ str(len(t[0])), pl),)
+ return pl_tup
+
+print(tuple_lang_len(prog_lang))
+
+#6. Generate a tuple in the form, ("All languages separated by commas", "All versions separated by commas")
+
+def tuple_lang_ver(pl):
+ pl_tup = (*map(lambda t : t[0], pl),)
+ return pl_tup
+
+print(tuple_lang_ver(prog_lang))
diff --git a/12_11_demo.py b/12_11_demo.py
new file mode 100644
index 0000000..66ec7e3
--- /dev/null
+++ b/12_11_demo.py
@@ -0,0 +1,107 @@
+from functools import reduce
+print([i for i in range(1,101) if all(i%j != 0 for j in range(2,i))])
+
+#prime = [i for i in range(1,51) if (i%2 != 0 and i%3!=0 and i%5!=0 and i%7!=0) or (i==1 or i==2 or i==3 or i==5 or i==7)]
+#print(prime)
+
+#print(["FizzBuzz" if n%3==0 and n%5==0 else "Buzz" if n%5==0 else "Fizz" if n%3==0 else n for n in range(1,16)])
+
+
+numbers =[1,2,3,4]
+#print(list(filter(lambda x : x**2, numbers)))
+#print(map(lambda x : x**2, numbers))
+print("Fibbonacci Numbers")
+print([reduce(lambda x, y : x+y, numbers)] )
+
+my_list = ['python', 'java', 'scala', 'javascript']
+
+#Consider the list:
+prog_lang = [('Python', 3.8), ('Java', 13), ('JavaScript', 2019), ('Scala', 2.13)]
+
+#1. Sort the list by each language's version in ascending order.
+def version_sort(pl):
+ pl_sort = sorted(pl, key=lambda t : t[1])
+ return pl_sort
+
+print(version_sort(prog_lang))
+
+#2. Sort the list by the length of the name of each language in descending order.
+
+def lname_len_sort(pl):
+ lname_len_sort = sorted(pl, key=lambda t : len(t[0]), reverse = True)
+ return lname_len_sort
+
+print(lname_len_sort(prog_lang))
+
+#3. Filter the list so that it only contains languages with 'a' in it.
+
+def filter_alang_pl(pl):
+ pl_fil = list(filter(lambda t : "a" in t[0].lower(), pl))
+ return pl_fil
+
+print(filter_alang_pl(prog_lang))
+
+
+#4. Filter the list so that it only contains languages whose version is in integer form
+
+def filter_intver_pl(pl):
+ pl_fil = list(filter(lambda t : type(t[1]) == int, pl))
+ return pl_fil
+
+print(filter_intver_pl(prog_lang))
+
+#5. Transform the list so that it contains the tuples in the form, ("language in all lower case", length of the language string)
+
+def tuple_lang_len(pl):
+ pl_tup = (*map(lambda t : t[0].lower()+", "+ str(len(t[0])), pl),)
+ return pl_tup
+
+print(tuple_lang_len(prog_lang))
+
+#6. Generate a tuple in the form, ("All languages separated by commas", "All versions separated by commas")
+
+def tuple_lang_ver(pl):
+ pl_tup = (*map(lambda t : t[0], pl),)
+ return pl_tup
+
+print(tuple_lang_ver(prog_lang))
+
+def fun(s):
+ print(split(s))
+
+print(reduce(lambda x,y :(f'{x[0]},{y[0]}',f'{x[1]},{y[1]}'),prog_lang))
+
+
+print(fun("sushama.cardozo@gmail.com"))
+
+def outer(msg):
+ lang = 'Python'
+ def inner():
+ print(lang, msg)
+ return inner
+
+my_func = outer('is fun!!!')
+my_func() # output: 'Python is fun!!!'
+
+##############################################################################
+############### create a closure #############################################
+def multiples_of(n):
+ def multiply(k):
+ print([i for i in range(n, k, n)])
+ return multiply
+
+m3 = multiples_of(3)
+m5 = multiples_of(5)
+m3_under30 = m3(30)
+m7_under30 = multiples_of(7)(30)
+
+print(m3(60))
+print(type(m3))
+
+print(m5(50))
+print(type(m5))
+
+print(m3_under30)
+print(type(m3_under30))
+
+
diff --git a/2022_12_14_demo-files.py b/2022_12_14_demo-files.py
new file mode 100644
index 0000000..89538fc
--- /dev/null
+++ b/2022_12_14_demo-files.py
@@ -0,0 +1,19 @@
+with open("demo.txt", mode='w', encoding='utf-8') as file:
+ file.write("This is the first line\n")
+ file.write("This is the second line\n")
+ file.write("This is the last line\n")
+
+with open("demo.txt", mode='r', encoding='utf-8') as file2:
+ print(file2.read(3))
+ print(file2.readline())
+ print(file2.read())
+ file2.seek(0)
+ print(file2.tell())
+ print(file2.readlines())
+ file2.seek(0)
+ print(list(file2))
+
+with open("courses.txt", mode='r', encoding='utf-8') as course:
+ file.write("Course Id,Course Name,Instructor\n")
+ file.write("C1,Intro to A+,Valerie Boss\n")
+ file.write("C2,Intro to Python,Sammi G\n")
diff --git a/2023_01_03_demp.jpynb b/2023_01_03_demp.jpynb
new file mode 100644
index 0000000..6c90075
--- /dev/null
+++ b/2023_01_03_demp.jpynb
@@ -0,0 +1,2 @@
+import mysql.connector as mariadb
+
diff --git a/Jupyter-Notebooks/.cache b/Jupyter-Notebooks/.cache
new file mode 100644
index 0000000..73e6158
--- /dev/null
+++ b/Jupyter-Notebooks/.cache
@@ -0,0 +1 @@
+{"access_token": "BQAr382mIA4vqu7Aziaa7255kByOS9f0napSFEDzget90NpHMknDEp-_oqZad2JC86PiQZPBvFSvdsToKHYI6mGc-mCm7GGEkEGpxWE1zxHwUEj4r7L1qRantSQ642hvw6xXcbHS4VEvKQ_mhrCr0dXtQe5XTRMVvKXKO2iFSy8dG17gV16yvppNXKyB0H_ow6m8Ug", "token_type": "Bearer", "expires_in": 3600, "expires_at": 1673766031, "scope": null, "refresh_token": "AQBoIYUM48u-gyEm0Pjgq7ZE2dEec_OECd-jTzyp-75xR5j5D439v0tBkgG4tfqLE1nPz7EwstIlKzn36zPzboVvewMNN9xXSbkPNXsTvZOwie94iwGOJXuiwHjDcWRv_J0"}
\ No newline at end of file
diff --git a/Jupyter-Notebooks/hackernews_api_practice.ipynb b/Jupyter-Notebooks/hackernews_api_practice.ipynb
new file mode 100644
index 0000000..d3402e5
--- /dev/null
+++ b/Jupyter-Notebooks/hackernews_api_practice.ipynb
@@ -0,0 +1,358 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "https://hacker-news.firebaseio.com/v0/item/34384789.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384788.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384787.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384786.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384785.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384784.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384783.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384782.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384781.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384780.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384779.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384778.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384777.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384776.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384775.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384774.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384773.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384772.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384771.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384770.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384769.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384768.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384767.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384766.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384765.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384764.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384763.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384762.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384761.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384760.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384759.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384758.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384757.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384756.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384755.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384754.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384753.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384752.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384751.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384750.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384749.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384748.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384747.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384746.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384745.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384744.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384743.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384742.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384741.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384740.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384739.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384738.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384737.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384736.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384735.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384734.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384733.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384732.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384731.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384730.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384729.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384728.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384727.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384726.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384725.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384724.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384723.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384722.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384721.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384720.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384719.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384718.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384717.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384716.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384715.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384714.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384713.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384712.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384711.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384710.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384709.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384708.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384707.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384706.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384705.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384704.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384703.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384702.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384701.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384700.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384699.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384698.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384697.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384696.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384695.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384694.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384693.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384692.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384691.json\n",
+ "https://hacker-news.firebaseio.com/v0/item/34384690.json\n"
+ ]
+ }
+ ],
+ "source": [
+ "import requests\n",
+ "\n",
+ "url=\"https://hacker-news.firebaseio.com/v0/maxitem.json\"\n",
+ "\n",
+ "baseurl=\"https://hacker-news.firebaseio.com/v0/item/\"\n",
+ "data = []\n",
+ "request = requests.get(url)\n",
+ "max_id = request.json()\n",
+ "\n",
+ "\n",
+ "for i in range(100):\n",
+ " url = baseurl + str(max_id) + \".json\"\n",
+ " max_id = max_id - 1\n",
+ " url = baseurl + str(max_id) + \".json\"\n",
+ " print(url)\n",
+ " try:\n",
+ " request = requests.get(url)\n",
+ " response = request.json()\n",
+ " data.append(response)\n",
+ " except:\n",
+ " print(\"Story does not exist anymore!\")\n",
+ " continue\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'by': '75dvtwin', 'id': 34384789, 'parent': 34384386, 'text': 'we can also assume that their vaccine related death are under-reported.', 'time': 1673735546, 'type': 'comment'}\n",
+ "{'by': 'TazeTSchnitzel', 'id': 34384788, 'parent': 34382927, 'text': 'ATI's Imageon became Qualcomm's Adreno, Falanx's Mali became Arm Mali, and Imagination and Nvidia are still around in some form. I wonder what happened to the others?', 'time': 1673735523, 'type': 'comment'}\n",
+ "{'by': 'Mountain_Skies', 'id': 34384787, 'parent': 34382476, 'text': 'Probably useful for moving materials between stations and delivering supplies to worksites on the tracks (which they always seem to be repairing).', 'time': 1673735499, 'type': 'comment'}\n",
+ "{'by': 'brnewd', 'id': 34384786, 'parent': 34382603, 'text': 'I use a stepper while working on my laptop, outdoor in the garden. When trigging a compilation/build I do a couple of situps or pushups. Standing up while working keeps me active and alert. I get more work done sitting down, but love the outdoor sessions better, especially for work that doesn't require extensive typing.', 'time': 1673735495, 'type': 'comment'}\n",
+ "{'by': 'arcatech', 'id': 34384785, 'parent': 34383202, 'text': 'The “starting line” was a long long time ago.', 'time': 1673735493, 'type': 'comment'}\n",
+ "{'by': 'globalreset', 'id': 34384784, 'parent': 34380913, 'text': 'So? Do you enjoy it? Because that's how it is and that's how it is going to be. It take hundreds, thousands hours and even if you make progress, it will still feel like you don't know enough.
If you enjoy it, just give yourself more time. If not then...', 'time': 1673735474, 'type': 'comment'}\n",
+ "{'by': 'Dylan16807', 'id': 34384783, 'parent': 34384554, 'text': 'And it would be bad for a submarine salesman to go to people that think swimming is very special and try to get them believing that submarines do swim.', 'time': 1673735474, 'type': 'comment'}\n",
+ "{'by': 'kevin_thibedeau', 'id': 34384782, 'parent': 34381090, 'text': 'The ones in Indianapolis suck because heavy traffic flows can blow through as a train, completely blocking entry from other directions.', 'time': 1673735471, 'type': 'comment'}\n",
+ "{'by': 'hanniabu', 'id': 34384781, 'parent': 34384432, 'text': 'This has "it's our planet to ruin" vibes', 'time': 1673735467, 'type': 'comment'}\n",
+ "{'by': 'goldenshale', 'id': 34384780, 'parent': 34377910, 'text': 'Lame. These are scared opportunists who are going to screw things up for everyone because they are afraid of the unknown. Seriously, do something new and creative if you are an artist. Design your way into relevance.', 'time': 1673735447, 'type': 'comment'}\n",
+ "{'by': 'based2', 'descendants': 0, 'id': 34384779, 'score': 1, 'time': 1673735437, 'title': 'Juniper Green', 'type': 'story', 'url': 'https://fr.wikipedia.org/wiki/Juniper_Green_(jeu)'}\n",
+ "{'by': 'zxcvbn4038', 'id': 34384778, 'parent': 34383720, 'text': 'Like anything else you need to diversify your risk. Have multiple payment processors running so that an outage at one, regardless of reason, doesn’t take you down entirely. These days there is really no barrier to doing this.
Reversing all of your charges is pretty extreme, I suspect there is some detail being omitted.
A polite - and brief - letter to the CEO can work wonders. I had a problem with my Goldman Sachs once where they locked my account, their security people blew me off for three weeks, I sent an email to David Solomon and an hour later the issue was resolved. David Solomon probably never saw my email but it got routed to someone who could help.
If you e-mail the CEO of US Bank it gets routed to a review committee, they investigate and respond in ten days.
Again the key is to be polite and brief. If your in full Karen mode or sending three pages of text then you will get passed over. Include enough information so the issue can be routed to the right person, please help, thank you.', 'time': 1673735429, 'type': 'comment'}\n",
+ "{'by': 'up2isomorphism', 'id': 34384777, 'parent': 34381884, 'text': 'It is all about people. When company grows, there is a choice needed to be made between holding the hiring standard and maximizing short term revenue, for public traded companies there is little chance of choosing the former. And it is always exponentially harder to get rid of bad hire than hiring bad people.
For google, at least before 2006, I did not know personally anybody does write bad code got in there. Until I start to see someone will always copy and paste code from internet and try to make some code barely works made to Google by just consistently practicing coding questions, the trend is quite obvious that none of stuff coming out there has a distinct quality differences compared to other technology companies.', 'time': 1673735424, 'type': 'comment'}\n",
+ "{'by': 'yumswiss', 'id': 34384776, 'parent': 34378460, 'text': 'Approach has helped:\\n- RSS Feed. Check only once a week\\n- NextDNS block distracting sites during the week', 'time': 1673735423, 'type': 'comment'}\n",
+ "{'by': 'UncleMeat', 'id': 34384775, 'parent': 34384177, 'text': 'Damore did much more than simply present the best available data regarding various personality distributions between men and women.', 'time': 1673735409, 'type': 'comment'}\n",
+ "{'by': 'Natsu', 'id': 34384774, 'parent': 34381697, 'text': '* * *', 'time': 1673735389, 'type': 'comment'}\n",
+ "{'by': 'iask', 'id': 34384773, 'parent': 34381895, 'text': 'Nice work. How about X12 to XML and X12 to JSON?', 'time': 1673735384, 'type': 'comment'}\n",
+ "{'by': 'Desmond45', 'id': 34384772, 'parent': 34382695, 'text': 'Do you need help on how to remotely spy and monitor your cheating partner’s phone to gain vivid proof and evidences of their cheating ways? Lookup REMOTESPYTECH at g m a i |', 'time': 1673735378, 'type': 'comment'}\n",
+ "{'by': 'mooreds', 'descendants': 0, 'id': 34384771, 'score': 1, 'time': 1673735372, 'title': 'I think Go is more verbose than Java', 'type': 'story', 'url': 'https://www.sivalabs.in/why-go-is-more-verbose-than-java/'}\n",
+ "{'by': 'Waterluvian', 'id': 34384770, 'parent': 34384711, 'text': 'Countless species of life have come, changed, and went over billions of years. But we arrive in the last few hundred thousand years and declare, “stop the cycling!”
I completely see (and share) your reasoning. But I think it’s very anthropocentric.', 'time': 1673735366, 'type': 'comment'}\n",
+ "{'by': 'alasdair_', 'id': 34384769, 'parent': 34379965, 'text': 'There was also the Jolly Rodger Cookbook (http://www.textfiles.com/anarchy/JOLLYROGER/) that was thee first set of "philez" that I read as a kid. Finding more like that led to me frequenting a bunch of interesting BBS, then learning how to, um, creatively pay, for those international calls and later into setting up my own board. It all kind of died off as the web became a thing but it was still a lot of fun.', 'time': 1673735353, 'type': 'comment'}\n",
+ "{'by': '__s', 'id': 34384768, 'parent': 34384656, 'text': 'Yes
Just because you have staging doesn't mean you don't need unit tests. Similarly, test in stage, then test in prod. Ideally in a way isolated from real prod users (eg, in an insurance system we had fake dealer accounts for testing)', 'time': 1673735353, 'type': 'comment'}\n",
+ "{'by': 'networked', 'descendants': 0, 'id': 34384767, 'score': 1, 'time': 1673735351, 'title': \"DragonFlyBSD's HAMMER2 File-System Being Ported to NetBSD\", 'type': 'story', 'url': 'https://www.phoronix.com/news/NetBSD-HAMMER2-Port'}\n",
+ "{'by': 'kevin_hu', 'descendants': 0, 'id': 34384766, 'score': 1, 'time': 1673735350, 'title': 'Is the Fed hiking too fast?', 'type': 'story', 'url': 'https://noahpinion.substack.com/p/is-the-fed-hiking-too-fast'}\n",
+ "{'by': 'cloudking', 'id': 34384765, 'parent': 34377538, 'text': 'It's faster and cheaper to use an off the shelf framework and template than building something original from scratch these days.', 'time': 1673735326, 'type': 'comment'}\n",
+ "{'by': 'Jcowell', 'id': 34384764, 'parent': 34381029, 'text': '> Risk sharing is best done through insurance. The government has no need to be involved in healthcare, beyond subsidizing premiums to some extent for those who are too poor to pay the full cost.
I disagree on the basis that insurance skew to not having to pay as much as possible. This conflicts with healthcare because if Insurance companies are able to determine that you are high risk for something, they would either outright deny you coverage or give you a price that outright means denial anyway.
The only entity capable of stopping this is one with the monopoly of power, I.e The Government.', 'time': 1673735324, 'type': 'comment'}\n",
+ "{'by': 'WalterBright', 'id': 34384763, 'parent': 34384707, 'text': 'It was Microsoft that holed IBM below the waterline. IBM's slide had already started by 1989.', 'time': 1673735303, 'type': 'comment'}\n",
+ "{'by': 'hulitu', 'id': 34384762, 'parent': 34382258, 'text': 'It depends on what audio setup you do the \\nlistening. On laptop speakeakers it will be no difference.', 'time': 1673735298, 'type': 'comment'}\n",
+ "{'by': 'mpro', 'descendants': 0, 'id': 34384761, 'score': 1, 'text': 'Hi, want to ask the community where to get start for prepare myself for metaverse programming. \\nI am currently a ranking engineer with 1.5 years of experience.', 'time': 1673735297, 'title': 'Ask HN: How to learn stuff for metaverse programming', 'type': 'story'}\n",
+ "{'by': 'KMag', 'id': 34384760, 'parent': 34383060, 'text': 'A few years ahead of me in high school, the wrestling team had a one-handed wrestler. A friend of mine on the wrestling team told me it was because the guy was helping a guy make a pipe bomb, using a hammer and a wooden dowel to pack match heads in a pipe. One guy was holding the pipe, and the other was swinging the hammer.', 'time': 1673735290, 'type': 'comment'}\n",
+ "{'by': 'superkuh', 'id': 34384759, 'parent': 34383203, 'text': 'I do appreciate you linking to articles but these articles are pretty terrible.
re #1: The WHO adding internet addiction reflects the amount of sway China's political processes have over WHO declarations more than anything else. Their internal political narrative is that this is a problem and the WHO is being used to support that. The repetition of the falsified blue light hypothesis re: sleep is also informative re: the quality of citation #1. As for "changes in glutamatergic and gabaergic" signalling... if that doesn't happen when it means you're brain dead. Glutamate or GABA expressing neurons literally make up ~3/4 of the neuronal cells in the brain. And both are regulated extraceullary by glial cells too. You cannot do anything without changing this. If they'd done fMRI or PET or something and could shown long term abberant changes in the glutamergic signalling in the shell of the nucelus accumbens then maybe it'd be saying something. But they don't and I'm getting ahead of myself.
Citation #2 shows that when people are doing something relaxing and then they stop doing it they aren't as relaxed. That's hardly surprising. The arguments seem to be pop-sci level characterizations of the brain where any change is seen as significant or having a valance, good or bad.
And then they go and cite obviously false out-dated concepts like the idea of dopaminergic cells being neccessary or sufficient for expressions of pleasure/reward,
> Dopamine plays a critical role in this circuitry, for the subjective pleasure associated with positive rewards, and the motivation or drive-related reinforcements associated with eating, drinking, or drugs [73,74]
>The initially pleasant, so-called rewarding effects of the drug are relayed by the release of dopamine in the nucleus accumbens (NA) by the synaptic endings from the neurons of the ventral tegmental area (VTA) of the mesocorticolimbic circuitry [79,80].
It's actually glutamergic cells in the shell of the nucleus accumbens that are necessary and sufficient (but not all encompassing) for pleasure expression in mammals. Dopaminergic neurons can be blocked off with antagonists and the expression is still complete. The modern understanding is that mesolimbic dopaminergic populations encode for wanting and reward prediction. Glutamergic cells encode for reward/pleasure. I'd hope that someone writing a policy paper like this would cite up to date knowledge but it is excusable and a side point.
The real problem with #2 is that it doesn't actually talk about withdrawl symptoms in "digital addicts". It talks about widthdrawl symptoms and neurochemistry known in actual drug addicts and then just implicitly applies that all these statements must apply to the behavior "digital addiction" too. They don't show data about "digital addiction" withdrawl.
The third article is behind a cloudflare wall and I cannot access it.', 'time': 1673735285, 'type': 'comment'}\n",
+ "{'by': 'lanstin', 'id': 34384758, 'parent': 34383356, 'text': 'That isnt the problem he is describing tho, it is that you have no primitives in emacs that correspond to a rectangle of text entry boxes like the semantics of csv. You only have sequence of chars.
The author wants people to rewrite emacs as a design system for arbitrary structures of data, with hooks for moving around the structure and editing the structure and i guess contents', 'time': 1673735280, 'type': 'comment'}\n",
+ "{'by': 'turtledragonfly', 'id': 34384757, 'parent': 34384084, 'text': 'As far as I know, the main thing is to do a lot of it, and with purpose.
Depending on your personality, this may be a big ask — if you get discouraged or distracted easily, for instance.
On the other hand, it's a very accessible skill: pen and paper are easy to come by.
Personally, I've always felt rewarded by doing it. It soothes something in me, so I haven't had the struggle with self-motivation that some report.
I think that's a key to getting good at many things, really: find some emotional satisfaction from what you're doing, and the motivation will follow. As opposed to simply saying "I want to be good at X" and mechanically pushing yourself to do it. If you don't have some baseline love for the activity, you may learn to dread it, by that approach.', 'time': 1673735276, 'type': 'comment'}\n",
+ "{'by': 'kevin_hu', 'descendants': 0, 'id': 34384756, 'score': 1, 'time': 1673735273, 'title': 'How to think well and understand things', 'type': 'story', 'url': 'https://bitsofwonder.substack.com/p/how-to-think-well-and-understand'}\n",
+ "{'by': 'throwaway82388', 'id': 34384755, 'parent': 34384322, 'text': 'Whether the acting party is a friend or enemy distinguishes the guilty from the blameless. Anything can be rationalized post facto. It’s how contemporary politics work and it’s breathtaking to witness.', 'time': 1673735270, 'type': 'comment'}\n",
+ "{'by': 'voisin', 'id': 34384754, 'parent': 34384716, 'text': 'Search is a disaster now, with the screen resembling a late 90’s teenager’s website with ads taking over everything visually. They’ve increased noise at the cost of signal and that itself will be the downfall. Now they are letting the ad cancer spread to YouTube with multiple unskippable ada before each video.
Ads will go to wherever they are most effective. Google to Facebook, then Tik Tok, etc.
Android - does anything about Android these days seem like it is a driving ambition for a company the scale of Google? And what does it contribute to their bottom line apart from ads via the platforms above that are eating themselves?', 'time': 1673735267, 'type': 'comment'}\n",
+ "{'by': 'eternalban', 'id': 34384753, 'parent': 34381906, 'text': 'Here's a podcast to go with that: https://podcast.clearerthinking.org/episode/138
The perennial issue for competent organizations as they scale is the quality of the middle management. Alex didn't write anything about hierarchies of molds (because it most certainly isn't moldy all the way to the top and never was). His pithy slides also don't discuss hiring practices at Google for managers. [We all know how they grill us poor doers.] He also failed to mention our dear friend Peter of the principle fame.
p(goal) = f(p(planners), p(managers), p(doers)) is a more realistic equation. What that f() looks like depends less on organizational structure than on the quality of the workers that mediate planning and building.
https://hbr.org/2021/06/the-real-value-of-middle-managers', 'time': 1673735266, 'type': 'comment'}\n",
+ "{'by': 'brianwawok', 'id': 34384752, 'parent': 34384656, 'text': 'Anytime you need to talk to a third party API, you need to test in prod.
Some people have sandbox apis. They are generally broken and not worth it. See eBay for super in depth sandbox API that never works.
You can read the docs 100 times over. At the end of the day, the API is going to work like it works. So you kind of “have to” test in prod for these guys.', 'time': 1673735262, 'type': 'comment'}\n",
+ "{'by': 'WalterBright', 'id': 34384751, 'parent': 34384716, 'text': 'I heard the exact same thing about every previous giant that failed. It was always "this time it's different!"', 'time': 1673735245, 'type': 'comment'}\n",
+ "{'by': 'djtriptych', 'id': 34384750, 'parent': 34384718, 'text': 'I want to say that Gmail, Google Maps, and Google's office suite represented jumps in usability akin to a total revolution for users. The first two I know for sure were developed in house, and are of course still running strong.
To me this is similar to Apple coming in to established markets and dominating them in short order. Not something to be dismissed IMO.', 'time': 1673735229, 'type': 'comment'}\n",
+ "{'by': 'cloudking', 'id': 34384749, 'parent': 34384656, 'text': 'I think it depends on how your application works. If you have the concept of customers, then you can have a test customer in production with test data that doesn't affect real customers for example. You can reset the test customer data each time you want to test.', 'time': 1673735196, 'type': 'comment'}\n",
+ "{'by': 'Dylan16807', 'id': 34384748, 'parent': 34384489, 'text': 'Did the author tell it which way or by how much?
If I say to discriminate on some feature and it consistently does it the same way, that's still a pretty bad bias. It probably shows up in other ways.', 'time': 1673735182, 'type': 'comment'}\n",
+ "{'by': 'chki', 'id': 34384747, 'parent': 34384711, 'text': 'But why? Why are algae better than rocks and ants better than algae if you take humans out of the equation?', 'time': 1673735172, 'type': 'comment'}\n",
+ "{'by': 'WalterBright', 'id': 34384746, 'parent': 34384733, 'text': 'Throughout the SO anti-trust trial, SO was losing market share steadily. It was being eaten away by smaller, nimbler competitors who had learned how to attack SO.
"Titan" by Ron Chernow', 'time': 1673735167, 'type': 'comment'}\n",
+ "{'by': 'rapind', 'id': 34384745, 'parent': 34384635, 'text': 'The biggest brain fart of humanity is our tendency towards monoculture (for scale). Nature is very obviously showing us how to sustain an ecosystem and yet in our incredible hubris we’re like “nah, we got this”. We just want to swim upstream for the hell of it.', 'time': 1673735163, 'type': 'comment'}\n",
+ "{'by': 'visarga', 'id': 34384744, 'parent': 34384403, 'text': 'chatGPT being able to write OpenAI API code is great, and all companies should prepare samples so future models can correctly interface with their systems.
But what will be needed is to create an AI that implements scientific papers. About 30% of papers have code implementation. That's a sizeable dataset to train a Codex model on.
You can have AI generating papers, and AI implementing papers, then learning to predict experimental results. This is how you bootstrap a self improving AI.
It does not learn only how to recreate itself, it learns how to solve all problems at the same time. A data engineering approach to AI: search and learn / solve and learn / evolve and learn.', 'time': 1673735162, 'type': 'comment'}\n",
+ "{'by': 'caseyross', 'id': 34384743, 'parent': 34384608, 'text': 'Owning the world, and being slow and bureaucratic, are not mutually exclusive. In fact, I imagine they're highly correlated.', 'time': 1673735137, 'type': 'comment'}\n",
+ "{'by': 'int_19h', 'id': 34384742, 'parent': 34377961, 'text': 'The point of my original comment was precisely that SGML was optimized for text documents. I agree that adopting it for configs was a mistake, but the complaint that "IBM lawyer who invented the SGML syntax had never heard of S-expression" doesn't make any sense in that context.', 'time': 1673735137, 'type': 'comment'}\n",
+ "{'by': 'moloch-hai', 'id': 34384741, 'parent': 34378615, 'text': 'It would have sufficed to put the legend in as one of the in-line images.
When looking with my phone, the big PDF does not display.', 'time': 1673735120, 'type': 'comment'}\n",
+ "{'by': 'weaksauce', 'id': 34384740, 'parent': 34384299, 'text': 'even doing < 5 min a day of learning a new language sees results in a year. drawing probably has a higher startup cost to get into things but i'd expect 10min to be sufficient.', 'time': 1673735112, 'type': 'comment'}\n",
+ "{'by': 'Ultimatt', 'id': 34384739, 'parent': 34366891, 'text': 'Go read what I said, the premise was you can afford the item outright but it still doesn't make sense to if you have interest free credit available.', 'time': 1673735093, 'type': 'comment'}\n",
+ "{'by': 'kstrauser', 'id': 34384738, 'parent': 34384389, 'text': 'No, 20 years ago was well back in...
Oh dear.', 'time': 1673735078, 'type': 'comment'}\n",
+ "{'by': 'didntreadarticl', 'id': 34384737, 'parent': 34383398, 'text': 'I dont think you understand mate', 'time': 1673735075, 'type': 'comment'}\n",
+ "{'by': 'cratermoon', 'id': 34384736, 'parent': 34381988, 'text': 'True, but not worthy of conspiracy-theory levels of speculation.', 'time': 1673735068, 'type': 'comment'}\n",
+ "{'by': 'ncraig', 'id': 34384735, 'parent': 34384524, 'text': 'Some might say that abstracts are the original clickbait.', 'time': 1673735059, 'type': 'comment'}\n",
+ "{'by': 'Waterluvian', 'id': 34384734, 'parent': 34384682, 'text': '> I would wager than as soon as a technological civilization is born, it's morally reprehensible to let it go extinct, since it no longer is at the full whims of evolution.
I see the logic here. But why, though? And technology is everywhere in non-human nature. While I think our technology and civilization is unlike anything else here, it’s still on the spectrum of nature’s exercise of community and technology. Are we really that special?
I think this heads towards the whole “Prime Directive” line of reasoning.
P.S. Eloi > Morlocks, of course. ;)', 'time': 1673735059, 'type': 'comment'}\n",
+ "{'by': 'coliveira', 'id': 34384733, 'kids': [34384746], 'parent': 34384608, 'text': 'I may agree with this on technology, but Standard Oil is still the same: they only changed names, slitted and merged back due to government.', 'time': 1673735057, 'type': 'comment'}\n",
+ "{'by': 'dv35z', 'descendants': 0, 'id': 34384732, 'score': 1, 'text': 'I’ve had several recent issues where videos I sent from my iPhone ended up in “tiny” low-res mode for the recipient. It got me thinking- I ought to be able to easyly upload a video to my own server, and them a link to it. It is 2023. We have the technology.
So that brings us to: Media hosting question -\\nI recently setup a knowledge base (Markdown/Obsidian/MkDocs/GitHub/Render.com/iCloud/Working Copy), and I’m looking for a similar setup for media hosting. I’d like a public domain (media.example.com), easy for me to upload into from iPhone - ideally using the Files app. Basically, Dropbox, but its using my own personal cloud infrastructure. Having password protected areas would be great (family photo albums?), and expiring links could be useful too
My current files situation is a scatter between iCloud, Google Drive, archives in a (currently offline) Synology NAS. Relatedly, would love to hear about peoples personal IT file system setup, who are thinking at a 10 year+ horizon… and don’t want to be fulltime sysadmin as a 3rd job.', 'time': 1673735051, 'title': 'Ask HN: Static Site Generator for photo and video sharing', 'type': 'story'}\n",
+ "{'by': 'deanCommie', 'id': 34384731, 'parent': 34384529, 'text': 'I'm with you on everything except the Twitter part.
Google doesn't leave most acquisitions alone - YouTube is a notable exception, and I'm really curious how they've been able to more independently than most Google acquisitions. (Perhaps implicit trust placed in Susan Wojcicki?)
We have no reason to think that Twitter wouldn't have become the same kind of mess Google Plus did under Google's stewardship.', 'time': 1673735044, 'type': 'comment'}\n",
+ "{'by': 'exolymph', 'descendants': 0, 'id': 34384730, 'score': 2, 'time': 1673735034, 'title': '9 Things I Learned from My 2 Year Old Baby Girl', 'type': 'story', 'url': 'https://madeincosmos.substack.com/p/9-things-i-learned-from-my-2-year'}\n",
+ "{'by': 'InCityDreams', 'id': 34384729, 'parent': 34382359, 'text': 'Not being Catholic, I looked up 'the rosary'. \\nWhich lasted longer...it, or the turbulence?
https://www.theholyrosary.org/howtoprayrosary (for example) was an interesting read, as was your comment.', 'time': 1673735025, 'type': 'comment'}\n",
+ "{'deleted': True, 'id': 34384728, 'parent': 34384238, 'time': 1673735019, 'type': 'comment'}\n",
+ "{'deleted': True, 'id': 34384727, 'parent': 34384524, 'time': 1673735016, 'type': 'comment'}\n",
+ "{'by': 'Zigurd', 'id': 34384726, 'parent': 34384322, 'text': 'You write as if we all saw "...months of BLM & Antifa rioting."
Did you see it? Did it affect you? Do you know the extent to which police departments in the PNW (an all over the US) are infiltrated with brownshirt neonazi gamgs?
Your post reeks of bad faith concern trolling.', 'time': 1673734997, 'type': 'comment'}\n",
+ "{'by': 'petre', 'id': 34384725, 'parent': 34384398, 'text': 'I am unconfortable without my seatbelt in the car. The same goes on an airplane. It's only a matter of getting used to it.', 'time': 1673734996, 'type': 'comment'}\n",
+ "{'by': 'dctoedt', 'id': 34384724, 'parent': 34384537, 'text': '> I’m not sure there’s solid argument for why one is more deserving of existence than the other.
Maybe we're helping to build a universe, and we can do more in that regard than single-celled organisms, cockroaches, etc.?
https://www.questioningchristian.org/2006/06/metanarratives_... (self-cite)', 'time': 1673734986, 'type': 'comment'}\n",
+ "{'by': 'thdespou', 'id': 34384723, 'parent': 34383925, 'text': 'They are doubling down on propaganda operations, throwing bodies as cannon fodder, threatening and committing genocidal acts against Ukrainians. I think there would be a need for greater investment from the west to push them back as they are trying to consolidate the invaded territories. It's just sad to see the Russian people becoming a degenerate nation.', 'time': 1673734984, 'type': 'comment'}\n",
+ "{'by': 'ShamelessC', 'id': 34384722, 'parent': 34384627, 'text': 'Assuming a motivated “attacker”, yes. The average user will have no such notion of “jailbreaks”, and it’s at least clear when one _is_ attempting to “jailbreak” a model (given a full log of the conversation and a competent human investigator).
I think the class of problems that remain are basically outliers that are misaligned and don’t trip up the model’s detection mechanism. Given the nature of language and culture (not to mention that they both change over time), I imagine there are a lot of these. I don’t have any examples (and I don’t think yelling “time’s up” when such outliers are found is at all helpful).', 'time': 1673734978, 'type': 'comment'}\n",
+ "{'by': 'williamcotton', 'id': 34384721, 'parent': 34384582, 'text': 'Yes, and we are now using the artistic definition of “derived” and not the legal definition.
You cannot copyright “any image that resembles Joe Biden”.', 'time': 1673734967, 'type': 'comment'}\n",
+ "{'by': 'MaxBarraclough', 'id': 34384720, 'parent': 34384300, 'text': 'The point of copyleft is to dictate the licence you must use, if you wish to (roughly speaking) link with the copyleft-licensed work. There are plenty of libraries that you cannot use if you wish to distribute your program without making its source-code available.
The unusual thing here is that the creators of a linker are apparently trying to have the copyleft licence propagate to code that is input to the linker. Others have pointed out that GCC has exceptions for this kind of thing, despite that it is released under a strong copyleft licence (GPLv3+).', 'time': 1673734957, 'type': 'comment'}\n",
+ "{'by': 'puzzlingcaptcha', 'descendants': 0, 'id': 34384719, 'score': 2, 'time': 1673734942, 'title': 'Use.GPU Goes Trad', 'type': 'story', 'url': 'https://acko.net/blog/use-gpu-goes-trad/'}\n",
+ "{'by': 'coliveira', 'id': 34384718, 'kids': [34384750], 'parent': 34381884, 'text': 'It is funny because 20 years ago Google was selling itself as the company of the future that would dominate everything in technology. Their "genius" founders were spending billions on new endeavors. In fact, other than the original search engine and associated browser, they only got youtube and other businesses as acquisition (invented by others) and android as a reaction to iOS. Everything else they tried has basically failed or being inexpressive.', 'time': 1673734941, 'type': 'comment'}\n",
+ "{'by': 'dilyevsky', 'id': 34384717, 'parent': 34381459, 'text': 'Everyone onboard got six flags experience without the lines', 'time': 1673734932, 'type': 'comment'}\n",
+ "{'by': 'ChuckNorris89', 'id': 34384716, 'kids': [34384754, 34384751], 'parent': 34384608, 'text': 'Google's monopoly over search, email, maps, ads, Android and Youtube will be hard to beat by smaller and nimble competitors. It's far too entrenched and the moat it has built is too tough and impossibly expensive for newcomers to beat. And if newcomers do turn into a threat, they will be swiftly acquired and absorbed into the machine.
If it was possible, it would have happened already.
Same for the likes of Nvidia.
The only real thereat to Google is government regulators breaking their products up into separate entities.', 'time': 1673734925, 'type': 'comment'}\n",
+ "{'by': 'Dracophoenix', 'id': 34384715, 'parent': 34384584, 'text': 'There already is a Netflix anime loosely-based on the historical events, although it wasn't a very good adaptation in my opinion.
Another anime, Afro Samurai, which is even more loosely-based, is a lot more interesting, albeit quite over-the-top.
https://myanimelist.net/anime/1292/Afro_Samurai', 'time': 1673734920, 'type': 'comment'}\n",
+ "{'by': 'kstrauser', 'id': 34384714, 'parent': 34382820, 'text': 'Could you explain the “through paper” bit? I can’t picture this.', 'time': 1673734918, 'type': 'comment'}\n",
+ "{'by': 'MilnerRoute', 'descendants': 0, 'id': 34384713, 'score': 2, 'time': 1673734914, 'title': \"CDC now says it's 'very unlikely' Pfizer booster has stroke risk after review\", 'type': 'story', 'url': 'https://www.cnbc.com/2023/01/13/pfizer-covid-booster-likely-doesnt-carry-stroke-risk-for-seniors-cdc-says.html'}\n",
+ "{'by': 'closewith', 'id': 34384712, 'parent': 34384537, 'text': 'Total sterilisation is a guaranteed outcome on a long enough time line.', 'time': 1673734912, 'type': 'comment'}\n",
+ "{'by': 'TylerLives', 'id': 34384711, 'kids': [34384770, 34384747], 'parent': 34384537, 'text': 'Given how long it takes for complex life forms to develop, and how other planets known to us don't seem to have any life at all, I think it's reasonable to prefer the existing life to the future potential life, even from a non-anthropocentric perspective.', 'time': 1673734906, 'type': 'comment'}\n",
+ "{'by': 'willjp', 'id': 34384710, 'parent': 34381051, 'text': 'I get the impression that most people's chemistry experience was much different than mine.\\nOurs was about rote memorization, period. I kind of feel like I missed out.', 'time': 1673734901, 'type': 'comment'}\n",
+ "{'by': 'chedoku', 'id': 34384709, 'parent': 34384572, 'text': 'It was promoted from a pawn ;)
joke aside, there can be: \\n1- any arbitrary number of pieces on the board\\n2- pawns in the first and last rows\\n3- missing king\\n4- missing opposite color pieces', 'time': 1673734900, 'type': 'comment'}\n",
+ "{'by': 'strangattractor', 'id': 34384708, 'parent': 34252302, 'text': 'Looky here Mips is mmaking a high performance multicore CPU for the server environment. That certainly didn't take that long.
https://www.mips.com/products/risc-v/', 'time': 1673734900, 'type': 'comment'}\n",
+ "{'by': 'Spooky23', 'id': 34384707, 'kids': [34384763], 'parent': 34384616, 'text': 'That’s a direct result of the Telecommunications Act of 1996. That law will shape this century. It gave us the blessing of the internet as we know it and the curse of the media consolidation and assault on political speech that was accelerated by the elimination of restrictions on media ownership and the eventual Supreme Court decisions that gutted campaign finance rules.
Without that law, the alignment of companies like IBM and AT&T’s derivatives would have dominated big business and prevented the startup ecosystem from developing.', 'time': 1673734875, 'type': 'comment'}\n",
+ "{'by': 'turtledragonfly', 'id': 34384706, 'parent': 34384477, 'text': 'I do, at times, somewhat.
I think software is often seen as a "rapid development" process, esp. when compared to hardware development. But somewhat ironically, I find actually writing software to be pretty slow compared to just doodling out ideas on paper, so I tend to do a lot of that first, especially when treading new territory.
On occasion, in a REPL-style environment, I can get some amount of that same freedom of exploration and experimentation at a keyboard, but it's still hard to come close to pencil and paper, for me.
Sometimes I wonder what it would be to have the mind of someone like Tesla, who could reportedly design complex objects in his head, down to the details. The freedom of experimentation in one's own mind is even better than pencil-and-paper, but I have trouble holding on to the details in that mode.', 'time': 1673734871, 'type': 'comment'}\n",
+ "{'by': 'anthomtb', 'id': 34384705, 'parent': 34382974, 'text': 'Trial and error. I wish I knew a way to find a good therapist without that process. But I do not. And unlike dentists, doctors, counter installers, pet sitters, window cleaners and most other services, folks will not fall over themselves to give you a therapist recommendation (maybe that is a peculiarity of my circle of acquaintances).
To find a therapist, Psychology Today worked well enough. To me, having one is better than having none.', 'time': 1673734870, 'type': 'comment'}\n",
+ "{'by': 'codeflo', 'id': 34384704, 'parent': 34383792, 'text': 'So you're saying "if I put the made-up straw man argument that I intend to knock down in quotation marks, it's less obvious that it's not actually what the other person wrote"?', 'time': 1673734858, 'type': 'comment'}\n",
+ "{'by': 'luckylion', 'id': 34384703, 'parent': 34381802, 'text': 'I'm sure it gets fuzzy on the edges, but "I don't heat and it's still warm" isn't in that zone.
What I'm talking about is: given you want ~18°c/60°f in rooms on average (20°c/68°f for more comfort), and you have a fixed volume of space inside a building, you're not magically saving energy by turning your heat down, because your neighbors need to turn theirs up to achieve the average temperature because insulation is mostly on the outside, not between flats.
You could counteract that by insulating your rooms towards your neighbors, which would also achieve your goal of not going above 10°c/50°f.', 'time': 1673734855, 'type': 'comment'}\n",
+ "{'by': 'mooreds', 'descendants': 0, 'id': 34384702, 'score': 1, 'time': 1673734838, 'title': 'Avoiding the trap in your 2023 Strategy (video)', 'type': 'story', 'url': 'https://www.youtube.com/watch?v=7s4610orrFA'}\n",
+ "{'by': 'toomuchtodo', 'id': 34384701, 'parent': 34384652, 'text': 'The launch is typical, the twin boosters flying back to land together is a sight to see at least once.', 'time': 1673734837, 'type': 'comment'}\n",
+ "{'by': 'dvzk', 'id': 34384700, 'parent': 34381410, 'text': 'Maybe it’s just me, but I would never go through a random certificate authority like “K Software”. You are not cryptographically verifying the developer’s signing identity, you are trusting KSoftware’s attestation that the signed binary is authentic.', 'time': 1673734825, 'type': 'comment'}\n",
+ "{'by': 'musicale', 'id': 34384699, 'parent': 34381574, 'text': 'I have found many abstract-type paintings (and non-abstract for that matter) to be much more interesting in person, where you can experience the actual scale, texture, colors, multiple viewing angles and perspectives, reflections/interaction with the environment and other viewers, etc..
Perhaps that's an argument for high-resolution VR museums, with better scans of paintings to capture 3D texture, layers, transparency, reflectivity, etc.. ;-)', 'time': 1673734814, 'type': 'comment'}\n",
+ "{'by': '0x64', 'id': 34384698, 'parent': 34383126, 'text': 'I do overnight oats; in the morning, all you have to do is pop the lid.
* Wholegrain oats (1.5 to 2 dl)
* Greek yogurt (1.5 to 2 dl)
* Milk, skimmed/whole (3 dl)
Then, throw in whatever. A sliced banana or berries, honey, a pinch of vanilla extract. Prep it in the evening, chuck it into the fridge, and it's done in the morning.', 'time': 1673734790, 'type': 'comment'}\n",
+ "{'by': 'TurkishPoptart', 'id': 34384697, 'parent': 34380809, 'text': 'Sadly, the vaccines were not designed to prevent infection, only to prevent severe illness. I can understand if you were misinformed, even the US President shared this untruth.', 'time': 1673734785, 'type': 'comment'}\n",
+ "{'by': 'sph', 'id': 34384696, 'parent': 34381524, 'text': 'Your comment I replied to is overbearing, overprotective advice about something titled the Anarchist Cookbook, for God's sake.
I tried to restore a bit of that reckless spirit with a cheeky comment, but I am very sad to see the nanny state is out in force today. Gah, so boring.', 'time': 1673734743, 'type': 'comment'}\n",
+ "{'by': '9dev', 'id': 34384695, 'parent': 34375842, 'text': 'Yeah, but I would want to know which library function could possibly throw an error and not be caught off guard by that after deploying to prod :-/', 'time': 1673734736, 'type': 'comment'}\n",
+ "{'by': 'samspenc', 'descendants': 0, 'id': 34384694, 'score': 2, 'time': 1673734727, 'title': 'Apple Card responsible for more than $1.2B loss for Goldman Sachs', 'type': 'story', 'url': 'https://9to5mac.com/2023/01/13/apple-card-billion-dollars-plus-loss/'}\n",
+ "{'by': 'Apocryphon', 'id': 34384693, 'parent': 34382710, 'text': 'Which books of his would you recommend one start with?', 'time': 1673734722, 'type': 'comment'}\n",
+ "{'by': 'tyingq', 'id': 34384692, 'parent': 34382751, 'text': 'Ah, interesting. I saw "analog" and was expecting something more like controlling crosspoint switch ICs[1] to cross connect lots of handsets in arbitrary ways. But this is multiplexing, digital routing, etc.
About a year and half ago I decided to start volunteer with Marine Corps Scholarship Foundation. (I been out of the Marines for about 10 years.) For reason out of my control I'm now the President of the Colorado chapter.
Let me know if any wants to come to the Gold Tournament or volunteer. ;)
Money is great, but volunteering is really giving back.
http://www.mcsf.org', 'time': 1673734710, 'type': 'comment'}\n",
+ "{'by': 'some-mthfka', 'id': 34384690, 'parent': 34384420, 'text': 'Yep. I will have to add it to the article (and why it's insufficient as well).', 'time': 1673734692, 'type': 'comment'}\n"
+ ]
+ }
+ ],
+ "source": [
+ "for d in data:\n",
+ " print(d)\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#2. Write a function to display all the items written by a certain user\n",
+ "def items_by_user(user):\n",
+ " for d in data:\n",
+ " if \"by\" in d:\n",
+ " if d[\"by\"] == user:\n",
+ " return d[\"id\"]\n",
+ "print(items_by_user(\"Genbox\"))\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#3. Write a function to display the average score of all items\n",
+ "def avg_score_all_items():\n",
+ " total = 0\n",
+ " score = 0\n",
+ " for d in data:\n",
+ " if \"score\" in d:\n",
+ " total += 1\n",
+ " score += d[\"score\"]\n",
+ " avg = round(score/total, 2)\n",
+ " return avg\n",
+ "\n",
+ "print(avg_score_all_items())\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#4. Write a function to display the average score of the items by a certain user\n",
+ "import re\n",
+ "def avg_score_by_user(user):\n",
+ " total = 0\n",
+ " score = 0\n",
+ " if user == \"\":\n",
+ " return 0\n",
+ " for d in data:\n",
+ " if \"by\" in d and \"score\" in d:\n",
+ " if re.search(user, d[\"by\"]):\n",
+ " print(d[\"by\"], d[\"score\"])\n",
+ " total += 1\n",
+ " score += d[\"score\"]\n",
+ " if total == 0:\n",
+ " avg = 0\n",
+ " else:\n",
+ " avg = round(score/total, 2)\n",
+ " return avg\n",
+ "\n",
+ "print(avg_score_by_user(\"y\"))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.3"
+ },
+ "orig_nbformat": 4,
+ "vscode": {
+ "interpreter": {
+ "hash": "329e74eab13e1efbefcaeabff40e1514f85405b91e84e04e6869e473b0154ff3"
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Jupyter-Notebooks/pandas_demos b/Jupyter-Notebooks/pandas_demos
new file mode 160000
index 0000000..ecc3d91
--- /dev/null
+++ b/Jupyter-Notebooks/pandas_demos
@@ -0,0 +1 @@
+Subproject commit ecc3d911195264b5a65edab134e5612c534e76bb
diff --git a/Jupyter-Notebooks/pandas_demos-main.zip b/Jupyter-Notebooks/pandas_demos-main.zip
new file mode 100644
index 0000000..62dc309
Binary files /dev/null and b/Jupyter-Notebooks/pandas_demos-main.zip differ
diff --git a/Jupyter-Notebooks/pandas_demos-main/.gitignore b/Jupyter-Notebooks/pandas_demos-main/.gitignore
new file mode 100644
index 0000000..6fbb2ea
--- /dev/null
+++ b/Jupyter-Notebooks/pandas_demos-main/.gitignore
@@ -0,0 +1,4 @@
+my_environment/
+secrets1.py
+.env
+__pycache__
\ No newline at end of file
diff --git a/Jupyter-Notebooks/pandas_demos-main/74 or so Exercises-checkpoint.ipynb b/Jupyter-Notebooks/pandas_demos-main/74 or so Exercises-checkpoint.ipynb
new file mode 100644
index 0000000..d7cd84a
--- /dev/null
+++ b/Jupyter-Notebooks/pandas_demos-main/74 or so Exercises-checkpoint.ipynb
@@ -0,0 +1,1776 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "10746d30",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fc4bd4f0",
+ "metadata": {},
+ "source": [
+ "1) Demonstrate how to import Pandas and check the version of the installation."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3a3bc97b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(pd.show_versions())\n",
+ "print(pd.__version__)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b8fc4f4a",
+ "metadata": {},
+ "source": [
+ "2) Transform this list, numpy array, and dictionary into a series."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "21b30b4e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "pandas.core.series.Series"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import numpy as np\n",
+ "mylist = list('abcedfghijklmnopqrstuvwxyz')\n",
+ "mylist_series = pd.Series(mylist)\n",
+ "#mylist_series\n",
+ "myarr = np.arange(26)\n",
+ "myarr_series = pd.Series(myarr)\n",
+ "#myarr_series\n",
+ "mydict = dict(zip(mylist, myarr))\n",
+ "mydict_series = pd.Series(mydict)\n",
+ "mydict_series"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a17155a8",
+ "metadata": {},
+ "source": [
+ "3) Convert the index of your previous series into a column of the dataframe."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "db0f1395",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "pandas.core.frame.DataFrame"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#pd.DataFrame(mydict_series)\n",
+ "#mydict_series.to_frame(name=\"col1\")\n",
+ "df = mydict_series.reset_index()\n",
+ "type(df)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6f625f24",
+ "metadata": {},
+ "source": [
+ "4) Combine the series below into one dataframe."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "d268747c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
0
\n",
+ "
1
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "
\n",
+ "
0
\n",
+ "
a
\n",
+ "
0
\n",
+ "
\n",
+ "
\n",
+ "
1
\n",
+ "
b
\n",
+ "
1
\n",
+ "
\n",
+ "
\n",
+ "
2
\n",
+ "
c
\n",
+ "
2
\n",
+ "
\n",
+ "
\n",
+ "
3
\n",
+ "
e
\n",
+ "
3
\n",
+ "
\n",
+ "
\n",
+ "
4
\n",
+ "
d
\n",
+ "
4
\n",
+ "
\n",
+ "
\n",
+ "
5
\n",
+ "
f
\n",
+ "
5
\n",
+ "
\n",
+ "
\n",
+ "
6
\n",
+ "
g
\n",
+ "
6
\n",
+ "
\n",
+ "
\n",
+ "
7
\n",
+ "
h
\n",
+ "
7
\n",
+ "
\n",
+ "
\n",
+ "
8
\n",
+ "
i
\n",
+ "
8
\n",
+ "
\n",
+ "
\n",
+ "
9
\n",
+ "
j
\n",
+ "
9
\n",
+ "
\n",
+ "
\n",
+ "
10
\n",
+ "
k
\n",
+ "
10
\n",
+ "
\n",
+ "
\n",
+ "
11
\n",
+ "
l
\n",
+ "
11
\n",
+ "
\n",
+ "
\n",
+ "
12
\n",
+ "
m
\n",
+ "
12
\n",
+ "
\n",
+ "
\n",
+ "
13
\n",
+ "
n
\n",
+ "
13
\n",
+ "
\n",
+ "
\n",
+ "
14
\n",
+ "
o
\n",
+ "
14
\n",
+ "
\n",
+ "
\n",
+ "
15
\n",
+ "
p
\n",
+ "
15
\n",
+ "
\n",
+ "
\n",
+ "
16
\n",
+ "
q
\n",
+ "
16
\n",
+ "
\n",
+ "
\n",
+ "
17
\n",
+ "
r
\n",
+ "
17
\n",
+ "
\n",
+ "
\n",
+ "
18
\n",
+ "
s
\n",
+ "
18
\n",
+ "
\n",
+ "
\n",
+ "
19
\n",
+ "
t
\n",
+ "
19
\n",
+ "
\n",
+ "
\n",
+ "
20
\n",
+ "
u
\n",
+ "
20
\n",
+ "
\n",
+ "
\n",
+ "
21
\n",
+ "
v
\n",
+ "
21
\n",
+ "
\n",
+ "
\n",
+ "
22
\n",
+ "
w
\n",
+ "
22
\n",
+ "
\n",
+ "
\n",
+ "
23
\n",
+ "
x
\n",
+ "
23
\n",
+ "
\n",
+ "
\n",
+ "
24
\n",
+ "
y
\n",
+ "
24
\n",
+ "
\n",
+ "
\n",
+ "
25
\n",
+ "
z
\n",
+ "
25
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0 1\n",
+ "0 a 0\n",
+ "1 b 1\n",
+ "2 c 2\n",
+ "3 e 3\n",
+ "4 d 4\n",
+ "5 f 5\n",
+ "6 g 6\n",
+ "7 h 7\n",
+ "8 i 8\n",
+ "9 j 9\n",
+ "10 k 10\n",
+ "11 l 11\n",
+ "12 m 12\n",
+ "13 n 13\n",
+ "14 o 14\n",
+ "15 p 15\n",
+ "16 q 16\n",
+ "17 r 17\n",
+ "18 s 18\n",
+ "19 t 19\n",
+ "20 u 20\n",
+ "21 v 21\n",
+ "22 w 22\n",
+ "23 x 23\n",
+ "24 y 24\n",
+ "25 z 25"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import numpy as np\n",
+ "ser1 = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))\n",
+ "ser2 = pd.Series(np.arange(26))\n",
+ "df1=pd.concat([ser1, ser2], axis=1)\n",
+ "df1\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4bdd8618",
+ "metadata": {},
+ "source": [
+ "5) Give the series below a name."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "b18f9c01",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 a\n",
+ "1 b\n",
+ "2 c\n",
+ "3 e\n",
+ "4 d\n",
+ "5 f\n",
+ "6 g\n",
+ "7 h\n",
+ "8 i\n",
+ "9 j\n",
+ "10 k\n",
+ "11 l\n",
+ "12 m\n",
+ "13 n\n",
+ "14 o\n",
+ "15 p\n",
+ "16 q\n",
+ "17 r\n",
+ "18 s\n",
+ "19 t\n",
+ "20 u\n",
+ "21 v\n",
+ "22 w\n",
+ "23 x\n",
+ "24 y\n",
+ "25 z\n",
+ "Name: list1, dtype: object"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ser = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))\n",
+ "ser.name = \"list1\"\n",
+ "ser"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cdc8d244",
+ "metadata": {},
+ "source": [
+ "6) Find the elements in the first series (ser1) not in the second series (ser2)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "id": "f1276978",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 1\n",
+ "1 2\n",
+ "2 3\n",
+ "3 6\n",
+ "4 7\n",
+ "5 8\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ser1 = pd.Series([1, 2, 3, 4, 5])\n",
+ "ser2 = pd.Series([4, 5, 6, 7, 8])\n",
+ "#ser1[~ser1.isin(ser2)]\n",
+ "#pd.Series(np.intersect1d(ser1, ser2))\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d336f7dd",
+ "metadata": {},
+ "source": [
+ "7) Find the elements in both series that are not in common (remove them if they exist in both)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "id": "28768232",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 1\n",
+ "1 2\n",
+ "2 3\n",
+ "3 6\n",
+ "4 7\n",
+ "5 8\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 50,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ser1 = pd.Series([1, 2, 3, 4, 5])\n",
+ "ser2 = pd.Series([4, 5, 6, 7, 8])\n",
+ "#ser1.compare(ser2, keep_equal=False)\n",
+ "#ser1.isin(ser2)\n",
+ "#ser1[~ser1.isin(ser2)]\n",
+ "#ser1 = pd.Series(['a', 'b', 'c', 'd', 'e'])\n",
+ "#ser2 = pd.Series(['d', 'e', 'f', 'g', 'h'])\n",
+ "\n",
+ "#ser_u = pd.Series(np.union1d(ser1, ser2)) # union\n",
+ "#ser_i = pd.Series(np.intersect1d(ser1, ser2)) # intersect\n",
+ "#ser_u[~ser_u.isin(ser_i)]\n",
+ "\n",
+ "pd.Series(np.setxor1d(ser1,ser2))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5d7188b5",
+ "metadata": {},
+ "source": [
+ "8) Find the minimum, max, 25th percentile, and 75th percentile in the series."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "id": "5a1a3ce7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "count 25.000000\n",
+ "mean 10.174127\n",
+ "std 4.238628\n",
+ "min 2.378357\n",
+ "25% 6.565532\n",
+ "50% 10.896988\n",
+ "75% 12.494911\n",
+ "max 18.607074\n",
+ "dtype: float64"
+ ]
+ },
+ "execution_count": 41,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ser = pd.Series(np.random.normal(10, 5, 25))\n",
+ "ser.describe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2e3c1803",
+ "metadata": {},
+ "source": [
+ "9) Obtain the count of each unique item in the series."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "16a2880a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30)))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "449036ea",
+ "metadata": {},
+ "source": [
+ "10) Keep the two most frequent items in the series and change all items that are not those two into \"Other\"."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e86f2a9e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "np.random.RandomState(100)\n",
+ "ser = pd.Series(np.random.randint(1, 5, [12]))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b5928875",
+ "metadata": {},
+ "source": [
+ "11) Bin the series below into 10 equal deciles and replace the values with the bin name."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "979b3d36",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ser = pd.Series(np.random.random(20))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bee79a87",
+ "metadata": {},
+ "source": [
+ "12) Reshape the series ser into a dataframe with 7 rows and 5 columns."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "17a3513a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ser = pd.Series(np.random.randint(1, 10, 35))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "28c62e92",
+ "metadata": {},
+ "source": [
+ "13) Find the positions of numbers that are multiples of 3 from ser."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7ae884a9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ser = pd.Series(np.random.randint(1, 10, 7))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7a60f247",
+ "metadata": {},
+ "source": [
+ "14) From ser, extract the items at positions in list pos."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "42338aff",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ser = pd.Series(list('abcdefghijklmnopqrstuvwxyz'))\n",
+ "pos = [0, 4, 8, 14, 20]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5da1836c",
+ "metadata": {},
+ "source": [
+ "15) Stack ser1 and ser2 vertically and horizontally (to form a dataframe)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5dfea534",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ser1 = pd.Series(range(5))\n",
+ "ser2 = pd.Series(list('abcde'))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c252d7bb",
+ "metadata": {},
+ "source": [
+ "16) Get the positions of items of ser2 in ser1 as a list."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "da51891e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ser1 = pd.Series([10, 9, 6, 5, 3, 1, 12, 8, 13])\n",
+ "ser2 = pd.Series([1, 3, 10, 13])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "098e34c8",
+ "metadata": {},
+ "source": [
+ "17) Compute the mean squared error of truth and pred series."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "df7ff599",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "truth = pd.Series(range(10))\n",
+ "pred = pd.Series(range(10)) + np.random.random(10)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7acf6f79",
+ "metadata": {},
+ "source": [
+ "18) Change the first character of each word to upper case in each word of ser."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c210bbe9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ser = pd.Series(['how', 'to', 'kick', 'ass?'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "053f198a",
+ "metadata": {},
+ "source": [
+ "19) Caluculate the number of characters for each element in the series."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "216f9a3e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ser = pd.Series(['how', 'to', 'kick', 'ass?'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a26bcf59",
+ "metadata": {},
+ "source": [
+ "20) Caluculate the difference of differences between the consequtive numbers of ser."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c6dba0cb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ser = pd.Series([1, 3, 6, 10, 15, 21, 27, 35])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c64a2901",
+ "metadata": {},
+ "source": [
+ "21) Convert the date-strings to a timeseries."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "db82de89",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ser = pd.Series(['01 Jan 2010', '02-02-2011', '20120303', '2013/04/04', '2014-05-05', '2015-06-06T12:20'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5a5f83c0",
+ "metadata": {},
+ "source": [
+ "22) Get the day of month, week number, day of year and day of week from ser."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4bcc124e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ser = pd.Series(['01 Jan 2010', '02-02-2011', '20120303', '2013/04/04', '2014-05-05', '2015-06-06T12:20'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c79f97b7",
+ "metadata": {},
+ "source": [
+ "23) Change ser to dates that start with 4th of the respective months."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a6384074",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ser = pd.Series(['Jan 2010', 'Feb 2011', 'Mar 2012'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3dbb2f54",
+ "metadata": {},
+ "source": [
+ "24) From ser, extract words that contain at least 2 vowels."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "32dec09a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ser = pd.Series(['Apple', 'Orange', 'Plan', 'Python', 'Money'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "44177063",
+ "metadata": {},
+ "source": [
+ "25) Extract the valid emails from the series emails. The regex pattern for valid emails is provided as reference."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3e85fe14",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "emails = pd.Series(['buying books at amazom.com', 'rameses@egypt.com', 'matt@t.co', 'narendra@modi.com'])\n",
+ "pattern ='[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\\\.[A-Za-z]{2,4}'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "66fadf17",
+ "metadata": {},
+ "source": [
+ "26) Compute the mean of weights of each fruit."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "75855cd1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fruit = pd.Series(np.random.choice(['apple', 'banana', 'carrot'], 10))\n",
+ "weights = pd.Series(np.linspace(1, 10, 10))\n",
+ "print(weight.tolist())\n",
+ "print(fruit.tolist())\n",
+ "#examples\n",
+ "#> [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]\n",
+ "#> ['banana', 'carrot', 'apple', 'carrot', 'carrot', 'apple', 'banana', 'carrot', 'apple', 'carrot']"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "41c8ebe7",
+ "metadata": {},
+ "source": [
+ "27) Compute the euclidean distance between series (points) p and q, without using a packaged formula."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0c9cbfc2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "p = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])\n",
+ "q = pd.Series([10, 9, 8, 7, 6, 5, 4, 3, 2, 1])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "dc8326a3",
+ "metadata": {},
+ "source": [
+ "28) Get the positions of peaks (values surrounded by smaller values on both sides) in ser."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3ea30aeb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ser = pd.Series([2, 10, 3, 4, 9, 10, 2, 7, 3])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "04f5675f",
+ "metadata": {},
+ "source": [
+ "29) Replace the spaces in my_str with the least frequent character."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1d4aff8d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "my_str = 'dbc deb abed gade'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "146b95f0",
+ "metadata": {},
+ "source": [
+ "30) Create a timeseries starting at ‘2000-01-01’ and 10 weekends (saturdays) after that having random numbers as values."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e4cd8dd6",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b8a17176",
+ "metadata": {},
+ "source": [
+ "31) Series ser has missing dates and values. Make all missing dates appear and fill up with value from previous date."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c8a10d5d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ser = pd.Series([1,10,3,np.nan], index=pd.to_datetime(['2000-01-01', '2000-01-03', '2000-01-06', '2000-01-08']))\n",
+ "print(ser)\n",
+ "#> 2000-01-01 1.0\n",
+ "#> 2000-01-03 10.0\n",
+ "#> 2000-01-06 3.0\n",
+ "#> 2000-01-08 NaN\n",
+ "#> dtype: float64"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "95b261ac",
+ "metadata": {},
+ "source": [
+ "32) Compute autocorrelations for the first 10 lags of ser. Find out which lag has the largest correlation."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "052982b3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ser = pd.Series(np.arange(20) + np.random.normal(1, 10, 20))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1ae48ebc",
+ "metadata": {},
+ "source": [
+ "33) Import every 50th row of BostonHousing dataset as a dataframe.\n",
+ " https://raw.githubusercontent.com/selva86/datasets/master/BostonHousing.csv"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "09888bc6",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0f10cbec",
+ "metadata": {},
+ "source": [
+ "34) Import the boston housing dataset, but while importing change the 'medv' (median house value) column so that values < 25 becomes ‘Low’ and > 25 becomes ‘High’."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c9c39256",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a83078db",
+ "metadata": {},
+ "source": [
+ "35) Create a dataframe with rows as strides from the series \"L\"."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ea9dfeba",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "L = pd.Series(range(15))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4b36f890",
+ "metadata": {},
+ "source": [
+ "36) Import ‘crim’ and ‘medv’ columns of the BostonHousing dataset as a dataframe."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "33ecddbf",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d830c591",
+ "metadata": {},
+ "source": [
+ "37) Get the number of rows, columns, datatype and summary statistics of each column of the Cars93 dataset. Also get the numpy array and list equivalent of the dataframe.\n",
+ "\n",
+ "https://raw.githubusercontent.com/selva86/datasets/master/Cars93_miss.csv"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2c1fe83a",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e458e997",
+ "metadata": {},
+ "source": [
+ "38) Which manufacturer, model and type has the highest Price? What is the row and column number of the cell with the highest Price value?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f70df19d",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f4e81856",
+ "metadata": {},
+ "source": [
+ "39) Rename the column \"Type\" as \"CarType\" in the previous data and replace the ‘.’ in column names with ‘_’."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "33e517e5",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5989022c",
+ "metadata": {},
+ "source": [
+ "40) Check if the data from #37 has any missing values."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e9f7e0ce",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ed881438",
+ "metadata": {},
+ "source": [
+ "41) Count the number of missing values in each column of the previous data. Which column has the maximum number of missing values?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "abf14525",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b5abbc84",
+ "metadata": {},
+ "source": [
+ "42) Replace missing values in Min.Price and Max.Price columns with their respective mean."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "91acfe3c",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c1d26407",
+ "metadata": {},
+ "source": [
+ "43) In the previous data, use apply method to replace the missing values in Min.Price with the column’s mean and those in Max.Price with the column’s median."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cdb6209b",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "80c193bc",
+ "metadata": {},
+ "source": [
+ "44) Get the first column (a) in the data as a dataframe (rather than as a Series)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8be2fbaa",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "83334c4f",
+ "metadata": {},
+ "source": [
+ "45) Actually 3 questions.\n",
+ "\n",
+ "In the data, interchange columns 'a' and 'c'.\n",
+ "\n",
+ "Create a generic function to interchange two columns, without hardcoding column names.\n",
+ "\n",
+ "Sort the columns in reverse alphabetical order, that is colume 'e' first through column 'a' last."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8706e8d3",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bd957988",
+ "metadata": {},
+ "source": [
+ "46) Change the pandas display settings on printing the dataframe so it shows a maximum of 10 rows and 10 columns."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c8bd512a",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f69a323b",
+ "metadata": {},
+ "source": [
+ "47) Suppress scientific notations like ‘e-03’ in df and print upto 4 numbers after decimal."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "485aa0b2",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bfa22aec",
+ "metadata": {},
+ "source": [
+ "48) Format the values in column 'random' of df as percentages."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e3d4f4d9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.DataFrame(np.random.random(4), columns=['random'])\n",
+ "df\n",
+ "#> random\n",
+ "#> 0 .689723\n",
+ "#> 1 .957224\n",
+ "#> 2 .159157\n",
+ "#> 3 .21082"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "829030ce",
+ "metadata": {},
+ "source": [
+ "49) From the cars data, filter the 'Manufacturer', 'Model' and 'Type' for every 20th row starting from 1st (row 0)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b439593c",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d38155b7",
+ "metadata": {},
+ "source": [
+ "50) In the cars data, Replace NaNs with ‘missing’ in columns 'Manufacturer', 'Model' and 'Type' and create a index as a combination of these three columns and check if the index is a primary key."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8cf131e7",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d01c9b73",
+ "metadata": {},
+ "source": [
+ "51) Find the row position of the 5th largest value of column 'a' in df."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "15321302",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.DataFrame(np.random.randint(1, 30, 30).reshape(10,-1), columns=list('abc'))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9df76d19",
+ "metadata": {},
+ "source": [
+ "52) In ser, find the position of the 2nd largest value greater than the mean."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "61330d9f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ser = pd.Series(np.random.randint(1, 100, 15))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0aaae1b8",
+ "metadata": {},
+ "source": [
+ "53) Get the last two rows of df whose row sum is greater than 100."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4ab30612",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.DataFrame(np.random.randint(10, 40, 60).reshape(-1, 4))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ac4dac7e",
+ "metadata": {},
+ "source": [
+ "54) Replace all values of ser in the lower 5%ile and greater than 95%ile with respective 5th and 95th %ile value."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "78bc91ec",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ser = pd.Series(np.logspace(-2, 2, 30))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "407723d9",
+ "metadata": {},
+ "source": [
+ "55) Reshape df to the largest possible square with negative values removed. Drop the smallest values if need be. The order of the positive numbers in the result should remain the same as the original."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2ebc8dec",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.DataFrame(np.random.randint(-20, 50, 100).reshape(10,-1))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5aa891f3",
+ "metadata": {},
+ "source": [
+ "56) Swap rows 1 and 2 in df."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2ca60052",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.DataFrame(np.arange(25).reshape(5, -1))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "97144928",
+ "metadata": {},
+ "source": [
+ "57) Reverse all the rows of dataframe df."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "aac3cc14",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.DataFrame(np.arange(25).reshape(5, -1))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3f09f84f",
+ "metadata": {},
+ "source": [
+ "58) Get one-hot encodings for column 'a' in the dataframe df and append it as columns."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "59860b7a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.DataFrame(np.arange(25).reshape(5,-1), columns=list('abcde'))\n",
+ " a b c d e\n",
+ "0 0 1 2 3 4\n",
+ "1 5 6 7 8 9\n",
+ "2 10 11 12 13 14\n",
+ "3 15 16 17 18 19\n",
+ "4 20 21 22 23 24"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b5295b8b",
+ "metadata": {},
+ "source": [
+ "59) Obtain the column name with the highest number of row-wise maximum’s in df."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5083160c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.DataFrame(np.random.randint(1,100, 40).reshape(10, -1))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "15fcf24b",
+ "metadata": {},
+ "source": [
+ "60) Create a new column such that, each row contains the row number of nearest row-record by euclidean distance."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0719c737",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.DataFrame(np.random.randint(1,100, 40).reshape(10, -1), columns=list('pqrs'), index=list('abcdefghij'))\n",
+ "df\n",
+ "# p q r s\n",
+ "# a 57 77 13 62\n",
+ "# b 68 5 92 24\n",
+ "# c 74 40 18 37\n",
+ "# d 80 17 39 60\n",
+ "# e 93 48 85 33\n",
+ "# f 69 55 8 11\n",
+ "# g 39 23 88 53\n",
+ "# h 63 28 25 61\n",
+ "# i 18 4 73 7\n",
+ "# j 79 12 45 34"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f1eb1791",
+ "metadata": {},
+ "source": [
+ "61) Compute maximum possible absolute correlation value of each column against other columns in df."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "baee6e5f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.DataFrame(np.random.randint(1,100, 80).reshape(8, -1), columns=list('pqrstuvwxy'), index=list('abcdefgh'))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "828ef471",
+ "metadata": {},
+ "source": [
+ "62) Compute the minimum-by-maximum for every row of df."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c7368340",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.DataFrame(np.random.randint(1,100, 80).reshape(8, -1))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "971d2276",
+ "metadata": {},
+ "source": [
+ "63) Create a new column 'penultimate' which has the second largest value of each row of df."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "66c6865b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.DataFrame(np.random.randint(1,100, 80).reshape(8, -1))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "529c1042",
+ "metadata": {},
+ "source": [
+ "64) Normalize all columns of df by subtracting the column mean and divide by standard deviation.\n",
+ "Range all columns of df such that the minimum value in each column is 0 and max is 1."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5da28502",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.DataFrame(np.random.randint(1,100, 80).reshape(8, -1))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "af26e808",
+ "metadata": {},
+ "source": [
+ "65) Compute the correlation of each row of df with its succeeding row."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d5fca730",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.DataFrame(np.random.randint(1,100, 80).reshape(8, -1))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "57341a09",
+ "metadata": {},
+ "source": [
+ "66) Replace both values in both diagonals of df with 0."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "721432ca",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.DataFrame(np.random.randint(1,100, 100).reshape(10, -1))\n",
+ "df\n",
+ "# 0 1 2 3 4 5 6 7 8 9\n",
+ "# 0 11 46 26 44 11 62 18 70 68 26\n",
+ "# 1 87 71 52 50 81 43 83 39 3 59\n",
+ "# 2 47 76 93 77 73 2 2 16 14 26\n",
+ "# 3 64 18 74 22 16 37 60 8 66 39\n",
+ "# 4 10 18 39 98 25 8 32 6 3 29\n",
+ "# 5 29 91 27 86 23 84 28 31 97 10\n",
+ "# 6 37 71 70 65 4 72 82 89 12 97\n",
+ "# 7 65 22 97 75 17 10 43 78 12 77\n",
+ "# 8 47 57 96 55 17 83 61 85 26 86\n",
+ "# 9 76 80 28 45 77 12 67 80 7 63\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "202c483b",
+ "metadata": {},
+ "source": [
+ "67) Using a key in a grouped data frame. From df_grouped, get the group belonging to 'apple' as a dataframe."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c8028ee1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.DataFrame({'col1': ['apple', 'banana', 'orange'] * 3,\n",
+ " 'col2': np.random.rand(9),\n",
+ " 'col3': np.random.randint(0, 15, 9)})\n",
+ "\n",
+ "df_grouped = df.groupby(['col1'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8d2cb901",
+ "metadata": {},
+ "source": [
+ "68) In df, find the second largest value of 'taste' for 'banana'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7209bc19",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.DataFrame({'fruit': ['apple', 'banana', 'orange'] * 3,\n",
+ " 'rating': np.random.rand(9),\n",
+ " 'price': np.random.randint(0, 15, 9)})"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3bde89cb",
+ "metadata": {},
+ "source": [
+ "69) In df, Compute the mean price of every fruit, while keeping the fruit as another column instead of an index."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "69a4c8f6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.DataFrame({'fruit': ['apple', 'banana', 'orange'] * 3,\n",
+ " 'rating': np.random.rand(9),\n",
+ " 'price': np.random.randint(0, 15, 9)})"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e5eaa372",
+ "metadata": {},
+ "source": [
+ "70) Join dataframes df1 and df2 by ‘fruit-pazham’ and ‘weight-kilo’."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8a250adf",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df1 = pd.DataFrame({'fruit': ['apple', 'banana', 'orange'] * 3,\n",
+ " 'weight': ['high', 'medium', 'low'] * 3,\n",
+ " 'price': np.random.randint(0, 15, 9)})\n",
+ "\n",
+ "df2 = pd.DataFrame({'pazham': ['apple', 'orange', 'pine'] * 2,\n",
+ " 'kilo': ['high', 'low'] * 3,\n",
+ " 'price': np.random.randint(0, 15, 6)})"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "57adb8fe",
+ "metadata": {},
+ "source": [
+ "71) Get the positions where the value of two columns match."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b43cc070",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.DataFrame({'fruit1': np.random.choice(['apple', 'orange', 'banana'], 10),\n",
+ " 'fruit2': np.random.choice(['apple', 'orange', 'banana'], 10)})"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b7da5c86",
+ "metadata": {},
+ "source": [
+ "72) Create two new columns in df, one of which is a lag1 (shift column a down by 1 row) of column ‘a’ and the other is a lead1 (shift column b up by 1 row)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e531adc8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.DataFrame(np.random.randint(1, 100, 20).reshape(-1, 4), columns = list('abcd'))\n",
+ "\n",
+ " a b c d\n",
+ "0 66 34 76 47\n",
+ "1 20 86 10 81\n",
+ "2 75 73 51 28\n",
+ "3 1 1 9 83\n",
+ "4 30 47 67 4"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "501a01f7",
+ "metadata": {},
+ "source": [
+ "73) Get the frequency of unique values in the entire dataframe df."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c22b1df8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.DataFrame(np.random.randint(1, 10, 20).reshape(-1, 4), columns = list('abcd'))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ff5d182d",
+ "metadata": {},
+ "source": [
+ "74) Split the string column in df to form a dataframe with 3 columns as shown."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4d8a2bcd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.DataFrame([\"STD, City State\",\n",
+ "\"33, Kolkata West Bengal\",\n",
+ "\"44, Chennai Tamil Nadu\",\n",
+ "\"40, Hyderabad Telengana\",\n",
+ "\"80, Bangalore Karnataka\"], columns=['row'])\n",
+ "\n",
+ "print(df)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.3"
+ },
+ "vscode": {
+ "interpreter": {
+ "hash": "329e74eab13e1efbefcaeabff40e1514f85405b91e84e04e6869e473b0154ff3"
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Jupyter-Notebooks/pandas_demos-main/DA0101EN-2-Review-Data-Wrangling.ipynb b/Jupyter-Notebooks/pandas_demos-main/DA0101EN-2-Review-Data-Wrangling.ipynb
new file mode 100644
index 0000000..aefb48e
--- /dev/null
+++ b/Jupyter-Notebooks/pandas_demos-main/DA0101EN-2-Review-Data-Wrangling.ipynb
@@ -0,0 +1 @@
+{"cells":[{"cell_type":"markdown","metadata":{},"source":["
\n","\n","# Data Wrangling\n","\n","Estimated time needed: **30** minutes\n","\n","## Objectives\n","\n","After completing this lab you will be able to:\n","\n","* Handle missing values\n","* Correct data format\n","* Standardize and normalize data\n"]},{"cell_type":"markdown","metadata":{},"source":["
\n"]},{"cell_type":"markdown","metadata":{},"source":["Data wrangling is the process of converting data from the initial format to a format that may be better for analysis.\n"]},{"cell_type":"markdown","metadata":{},"source":["
What is the fuel consumption (L/100k) rate for the diesel car?
\n"]},{"cell_type":"code","execution_count":104,"metadata":{},"outputs":[],"source":["#install specific version of libraries used in lab\n","#! mamba install pandas==1.3.3\n","#! mamba install numpy=1.21.2\n"]},{"cell_type":"code","execution_count":105,"metadata":{},"outputs":[],"source":["import pandas as pd\n","import matplotlib as plt"]},{"cell_type":"markdown","metadata":{},"source":["
Reading the dataset from the URL and adding the related headers
\n"]},{"cell_type":"markdown","metadata":{},"source":["First, we assign the URL of the dataset to \"filename\".\n"]},{"cell_type":"markdown","metadata":{},"source":["This dataset was hosted on IBM Cloud object. Click HERE for free storage.\n"]},{"cell_type":"code","execution_count":106,"metadata":{},"outputs":[],"source":["filename = \"https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DA0101EN-SkillsNetwork/labs/Data%20files/auto.csv\""]},{"cell_type":"markdown","metadata":{},"source":["Then, we create a Python list headers containing name of headers.\n"]},{"cell_type":"code","execution_count":107,"metadata":{},"outputs":[],"source":["headers = [\"symboling\",\"normalized-losses\",\"make\",\"fuel-type\",\"aspiration\", \"num-of-doors\",\"body-style\",\n"," \"drive-wheels\",\"engine-location\",\"wheel-base\", \"length\",\"width\",\"height\",\"curb-weight\",\"engine-type\",\n"," \"num-of-cylinders\", \"engine-size\",\"fuel-system\",\"bore\",\"stroke\",\"compression-ratio\",\"horsepower\",\n"," \"peak-rpm\",\"city-mpg\",\"highway-mpg\",\"price\"]"]},{"cell_type":"markdown","metadata":{},"source":["Use the Pandas method read_csv() to load the data from the web address. Set the parameter \"names\" equal to the Python list \"headers\".\n"]},{"cell_type":"code","execution_count":108,"metadata":{},"outputs":[],"source":["df = pd.read_csv(\"auto.csv\", names = headers)\n"]},{"cell_type":"markdown","metadata":{},"source":["Use the method head() to display the first five rows of the dataframe.\n"]},{"cell_type":"code","execution_count":109,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":[" symboling normalized-losses make fuel-type aspiration num-of-doors \\\n","0 3 ? alfa-romero gas std two \n","1 3 ? alfa-romero gas std two \n","2 1 ? alfa-romero gas std two \n","3 2 164 audi gas std four \n","4 2 164 audi gas std four \n","\n"," body-style drive-wheels engine-location wheel-base ... engine-size \\\n","0 convertible rwd front 88.6 ... 130 \n","1 convertible rwd front 88.6 ... 130 \n","2 hatchback rwd front 94.5 ... 152 \n","3 sedan fwd front 99.8 ... 109 \n","4 sedan 4wd front 99.4 ... 136 \n","\n"," fuel-system bore stroke compression-ratio horsepower peak-rpm city-mpg \\\n","0 mpfi 3.47 2.68 9.0 111 5000 21 \n","1 mpfi 3.47 2.68 9.0 111 5000 21 \n","2 mpfi 2.68 3.47 9.0 154 5000 19 \n","3 mpfi 3.19 3.40 10.0 102 5500 24 \n","4 mpfi 3.19 3.40 8.0 115 5500 18 \n","\n"," highway-mpg price \n","0 27 13495 \n","1 27 16500 \n","2 26 16500 \n","3 30 13950 \n","4 22 17450 \n","\n","[5 rows x 26 columns]\n"]}],"source":["# To see what the data set looks like, we'll use the head() method.\n","print(df.head(5))"]},{"cell_type":"markdown","metadata":{},"source":["As we can see, several question marks appeared in the dataframe; those are missing values which may hinder our further analysis.\n","\n","
So, how do we identify all those missing values and deal with them?
\n","\n","How to work with missing data?\n","\n","Steps for working with missing data:\n","\n","\n","
\n","In the car dataset, missing data comes with the question mark \"?\".\n","We replace \"?\" with NaN (Not a Number), Python's default missing value marker for reasons of computational speed and convenience. Here we use the function: \n","
.replace(A, B, inplace = True)
\n","to replace A by B.\n"]},{"cell_type":"code","execution_count":110,"metadata":{},"outputs":[],"source":["import numpy as np\n","\n","# replace \"?\" to NaN\n","df.replace(\"?\", np.nan, inplace=True)"]},{"cell_type":"markdown","metadata":{},"source":["
Evaluating for Missing Data
\n","\n","The missing values are converted by default. We use the following functions to identify these missing values. There are two methods to detect missing data:\n","\n","\n","
.isnull()
\n","
.notnull()
\n","\n","The output is a boolean value indicating whether the value that is passed into the argument is in fact missing data.\n"]},{"cell_type":"code","execution_count":111,"metadata":{},"outputs":[],"source":["df1 = df.isnull()"]},{"cell_type":"markdown","metadata":{},"source":["\"True\" means the value is a missing value while \"False\" means the value is not a missing value.\n"]},{"cell_type":"markdown","metadata":{},"source":["
Count missing values in each column
\n","
\n","Using a for loop in Python, we can quickly figure out the number of missing values in each column. As mentioned above, \"True\" represents a missing value and \"False\" means the value is present in the dataset. In the body of the for loop the method \".value_counts()\" counts the number of \"True\" values. \n","
\n","How to deal with missing data?\n","\n","\n","
Drop data \n"," a. Drop the whole row \n"," b. Drop the whole column\n","
\n","
Replace data \n"," a. Replace it by mean \n"," b. Replace it by frequency \n"," c. Replace it based on other functions\n","
\n","\n"]},{"cell_type":"markdown","metadata":{},"source":["Whole columns should be dropped only if most entries in the column are empty. In our dataset, none of the columns are empty enough to drop entirely.\n","We have some freedom in choosing which method to replace data; however, some methods may seem more reasonable than others. We will apply each method to many different columns:\n","\n","Replace by mean:\n","\n","
\n","
\"normalized-losses\": 41 missing data, replace them with mean
\n","
\"stroke\": 4 missing data, replace them with mean
\n","
\"bore\": 4 missing data, replace them with mean
\n","
\"horsepower\": 2 missing data, replace them with mean
\n","
\"peak-rpm\": 2 missing data, replace them with mean
\n","
\n","\n","Replace by frequency:\n","\n","
\n","
\"num-of-doors\": 2 missing data, replace them with \"four\". \n","
\n","
Reason: 84% sedans is four doors. Since four doors is most frequent, it is most likely to occur
\n","
\n","
\n","
\n","\n","Drop the whole row:\n","\n","
\n","
\"price\": 4 missing data, simply delete the whole row\n","
\n","
Reason: price is what we want to predict. Any data entry without price data cannot be used for prediction; therefore any row now without price data is not useful to us
\n","\n","Based on the example above, replace NaN in \"stroke\" column with the mean value.\n","\n","
\n"]},{"cell_type":"code","execution_count":117,"metadata":{},"outputs":[],"source":["# Write your code below and press Shift+Enter to execute \n","mn_stroke = df[\"stroke\"].astype(float).mean()\n","df[\"stroke\"].replace(np.nan, mn_stroke, inplace=True)"]},{"cell_type":"markdown","metadata":{},"source":["Click here for the solution\n","\n","```python\n","#Calculate the mean vaule for \"stroke\" column\n","avg_stroke = df[\"stroke\"].astype(\"float\").mean(axis = 0)\n","print(\"Average of stroke:\", avg_stroke)\n","\n","# replace NaN by mean value in \"stroke\" column\n","df[\"stroke\"].replace(np.nan, avg_stroke, inplace = True)\n","```\n","\n","\n"]},{"cell_type":"markdown","metadata":{},"source":["
Calculate the mean value for the \"horsepower\" column
Replace \"NaN\" with the mean value in the \"peak-rpm\" column
\n"]},{"cell_type":"code","execution_count":121,"metadata":{},"outputs":[{"data":{"text/plain":["0 5000\n","1 5000\n","2 5000\n","3 5500\n","4 5500\n"," ... \n","200 5400\n","201 5300\n","202 5500\n","203 4800\n","204 5400\n","Name: peak-rpm, Length: 205, dtype: object"]},"execution_count":121,"metadata":{},"output_type":"execute_result"}],"source":["df[\"peak-rpm\"].replace(np.nan, mn_pr, inplace=True)\n","df[\"peak-rpm\"]"]},{"cell_type":"markdown","metadata":{},"source":["To see which values are present in a particular column, we can use the \".value_counts()\" method:\n"]},{"cell_type":"code","execution_count":122,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["False 205\n","Name: symboling, dtype: int64\n","False 205\n","Name: normalized-losses, dtype: int64\n","False 205\n","Name: make, dtype: int64\n","False 205\n","Name: fuel-type, dtype: int64\n","False 205\n","Name: aspiration, dtype: int64\n","False 203\n","True 2\n","Name: num-of-doors, dtype: int64\n","False 205\n","Name: body-style, dtype: int64\n","False 205\n","Name: drive-wheels, dtype: int64\n","False 205\n","Name: engine-location, dtype: int64\n","False 205\n","Name: wheel-base, dtype: int64\n","False 205\n","Name: length, dtype: int64\n","False 205\n","Name: width, dtype: int64\n","False 205\n","Name: height, dtype: int64\n","False 205\n","Name: curb-weight, dtype: int64\n","False 205\n","Name: engine-type, dtype: int64\n","False 205\n","Name: num-of-cylinders, dtype: int64\n","False 205\n","Name: engine-size, dtype: int64\n","False 205\n","Name: fuel-system, dtype: int64\n","False 205\n","Name: bore, dtype: int64\n","False 205\n","Name: stroke, dtype: int64\n","False 205\n","Name: compression-ratio, dtype: int64\n","False 205\n","Name: horsepower, dtype: int64\n","False 205\n","Name: peak-rpm, dtype: int64\n","False 205\n","Name: city-mpg, dtype: int64\n","False 205\n","Name: highway-mpg, dtype: int64\n","False 201\n","True 4\n","Name: price, dtype: int64\n"]}],"source":["for i in df.columns:\n"," print(df[i].isnull().value_counts())"]},{"cell_type":"markdown","metadata":{},"source":["We can see that four doors are the most common type. We can also use the \".idxmax()\" method to calculate the most common type automatically:\n"]},{"cell_type":"code","execution_count":123,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["four 114\n","two 89\n","Name: num-of-doors, dtype: int64\n"]},{"data":{"text/plain":["'four'"]},"execution_count":123,"metadata":{},"output_type":"execute_result"}],"source":["max_doors = df[\"num-of-doors\"].value_counts()\n","print(max_doors)\n","max_doors.idxmax()\n"]},{"cell_type":"markdown","metadata":{},"source":["The replacement procedure is very similar to what we have seen previously:\n"]},{"cell_type":"code","execution_count":124,"metadata":{},"outputs":[],"source":["#replace the missing 'num-of-doors' values by the most frequent \n","df[\"num-of-doors\"].replace(np.nan, max_doors.idxmax(), inplace=True)"]},{"cell_type":"markdown","metadata":{},"source":["Finally, let's drop all rows that do not have price data:\n"]},{"cell_type":"code","execution_count":125,"metadata":{},"outputs":[{"data":{"text/html":["
\n","\n","
\n"," \n","
\n","
\n","
index
\n","
symboling
\n","
normalized-losses
\n","
make
\n","
fuel-type
\n","
aspiration
\n","
num-of-doors
\n","
body-style
\n","
drive-wheels
\n","
engine-location
\n","
...
\n","
engine-size
\n","
fuel-system
\n","
bore
\n","
stroke
\n","
compression-ratio
\n","
horsepower
\n","
peak-rpm
\n","
city-mpg
\n","
highway-mpg
\n","
price
\n","
\n"," \n"," \n","
\n","
0
\n","
0
\n","
3
\n","
122.0
\n","
alfa-romero
\n","
gas
\n","
std
\n","
two
\n","
convertible
\n","
rwd
\n","
front
\n","
...
\n","
130
\n","
mpfi
\n","
3.47
\n","
2.68
\n","
9.0
\n","
111
\n","
5000
\n","
21
\n","
27
\n","
13495
\n","
\n","
\n","
1
\n","
1
\n","
3
\n","
122.0
\n","
alfa-romero
\n","
gas
\n","
std
\n","
two
\n","
convertible
\n","
rwd
\n","
front
\n","
...
\n","
130
\n","
mpfi
\n","
3.47
\n","
2.68
\n","
9.0
\n","
111
\n","
5000
\n","
21
\n","
27
\n","
16500
\n","
\n","
\n","
2
\n","
2
\n","
1
\n","
122.0
\n","
alfa-romero
\n","
gas
\n","
std
\n","
two
\n","
hatchback
\n","
rwd
\n","
front
\n","
...
\n","
152
\n","
mpfi
\n","
2.68
\n","
3.47
\n","
9.0
\n","
154
\n","
5000
\n","
19
\n","
26
\n","
16500
\n","
\n","
\n","
3
\n","
3
\n","
2
\n","
164
\n","
audi
\n","
gas
\n","
std
\n","
four
\n","
sedan
\n","
fwd
\n","
front
\n","
...
\n","
109
\n","
mpfi
\n","
3.19
\n","
3.40
\n","
10.0
\n","
102
\n","
5500
\n","
24
\n","
30
\n","
13950
\n","
\n","
\n","
4
\n","
4
\n","
2
\n","
164
\n","
audi
\n","
gas
\n","
std
\n","
four
\n","
sedan
\n","
4wd
\n","
front
\n","
...
\n","
136
\n","
mpfi
\n","
3.19
\n","
3.40
\n","
8.0
\n","
115
\n","
5500
\n","
18
\n","
22
\n","
17450
\n","
\n","
\n","
...
\n","
...
\n","
...
\n","
...
\n","
...
\n","
...
\n","
...
\n","
...
\n","
...
\n","
...
\n","
...
\n","
...
\n","
...
\n","
...
\n","
...
\n","
...
\n","
...
\n","
...
\n","
...
\n","
...
\n","
...
\n","
...
\n","
\n","
\n","
196
\n","
200
\n","
-1
\n","
95
\n","
volvo
\n","
gas
\n","
std
\n","
four
\n","
sedan
\n","
rwd
\n","
front
\n","
...
\n","
141
\n","
mpfi
\n","
3.78
\n","
3.15
\n","
9.5
\n","
114
\n","
5400
\n","
23
\n","
28
\n","
16845
\n","
\n","
\n","
197
\n","
201
\n","
-1
\n","
95
\n","
volvo
\n","
gas
\n","
turbo
\n","
four
\n","
sedan
\n","
rwd
\n","
front
\n","
...
\n","
141
\n","
mpfi
\n","
3.78
\n","
3.15
\n","
8.7
\n","
160
\n","
5300
\n","
19
\n","
25
\n","
19045
\n","
\n","
\n","
198
\n","
202
\n","
-1
\n","
95
\n","
volvo
\n","
gas
\n","
std
\n","
four
\n","
sedan
\n","
rwd
\n","
front
\n","
...
\n","
173
\n","
mpfi
\n","
3.58
\n","
2.87
\n","
8.8
\n","
134
\n","
5500
\n","
18
\n","
23
\n","
21485
\n","
\n","
\n","
199
\n","
203
\n","
-1
\n","
95
\n","
volvo
\n","
diesel
\n","
turbo
\n","
four
\n","
sedan
\n","
rwd
\n","
front
\n","
...
\n","
145
\n","
idi
\n","
3.01
\n","
3.40
\n","
23.0
\n","
106
\n","
4800
\n","
26
\n","
27
\n","
22470
\n","
\n","
\n","
200
\n","
204
\n","
-1
\n","
95
\n","
volvo
\n","
gas
\n","
turbo
\n","
four
\n","
sedan
\n","
rwd
\n","
front
\n","
...
\n","
141
\n","
mpfi
\n","
3.78
\n","
3.15
\n","
9.5
\n","
114
\n","
5400
\n","
19
\n","
25
\n","
22625
\n","
\n"," \n","
\n","
201 rows × 27 columns
\n","
"],"text/plain":[" index symboling normalized-losses make fuel-type aspiration \\\n","0 0 3 122.0 alfa-romero gas std \n","1 1 3 122.0 alfa-romero gas std \n","2 2 1 122.0 alfa-romero gas std \n","3 3 2 164 audi gas std \n","4 4 2 164 audi gas std \n",".. ... ... ... ... ... ... \n","196 200 -1 95 volvo gas std \n","197 201 -1 95 volvo gas turbo \n","198 202 -1 95 volvo gas std \n","199 203 -1 95 volvo diesel turbo \n","200 204 -1 95 volvo gas turbo \n","\n"," num-of-doors body-style drive-wheels engine-location ... engine-size \\\n","0 two convertible rwd front ... 130 \n","1 two convertible rwd front ... 130 \n","2 two hatchback rwd front ... 152 \n","3 four sedan fwd front ... 109 \n","4 four sedan 4wd front ... 136 \n",".. ... ... ... ... ... ... \n","196 four sedan rwd front ... 141 \n","197 four sedan rwd front ... 141 \n","198 four sedan rwd front ... 173 \n","199 four sedan rwd front ... 145 \n","200 four sedan rwd front ... 141 \n","\n"," fuel-system bore stroke compression-ratio horsepower peak-rpm \\\n","0 mpfi 3.47 2.68 9.0 111 5000 \n","1 mpfi 3.47 2.68 9.0 111 5000 \n","2 mpfi 2.68 3.47 9.0 154 5000 \n","3 mpfi 3.19 3.40 10.0 102 5500 \n","4 mpfi 3.19 3.40 8.0 115 5500 \n",".. ... ... ... ... ... ... \n","196 mpfi 3.78 3.15 9.5 114 5400 \n","197 mpfi 3.78 3.15 8.7 160 5300 \n","198 mpfi 3.58 2.87 8.8 134 5500 \n","199 idi 3.01 3.40 23.0 106 4800 \n","200 mpfi 3.78 3.15 9.5 114 5400 \n","\n"," city-mpg highway-mpg price \n","0 21 27 13495 \n","1 21 27 16500 \n","2 19 26 16500 \n","3 24 30 13950 \n","4 18 22 17450 \n",".. ... ... ... \n","196 23 28 16845 \n","197 19 25 19045 \n","198 18 23 21485 \n","199 26 27 22470 \n","200 19 25 22625 \n","\n","[201 rows x 27 columns]"]},"execution_count":125,"metadata":{},"output_type":"execute_result"}],"source":["# simply drop whole row with NaN in \"price\" column\n","df.dropna(inplace=True)\n","\n","# reset index, because we droped two rows\n","df.reset_index(inplace=True)\n","\n","df\n"]},{"cell_type":"code","execution_count":126,"metadata":{},"outputs":[{"data":{"text/html":["
\n","\n","
\n"," \n","
\n","
\n","
index
\n","
symboling
\n","
normalized-losses
\n","
make
\n","
fuel-type
\n","
aspiration
\n","
num-of-doors
\n","
body-style
\n","
drive-wheels
\n","
engine-location
\n","
...
\n","
engine-size
\n","
fuel-system
\n","
bore
\n","
stroke
\n","
compression-ratio
\n","
horsepower
\n","
peak-rpm
\n","
city-mpg
\n","
highway-mpg
\n","
price
\n","
\n"," \n"," \n","
\n","
0
\n","
0
\n","
3
\n","
122.0
\n","
alfa-romero
\n","
gas
\n","
std
\n","
two
\n","
convertible
\n","
rwd
\n","
front
\n","
...
\n","
130
\n","
mpfi
\n","
3.47
\n","
2.68
\n","
9.0
\n","
111
\n","
5000
\n","
21
\n","
27
\n","
13495
\n","
\n","
\n","
1
\n","
1
\n","
3
\n","
122.0
\n","
alfa-romero
\n","
gas
\n","
std
\n","
two
\n","
convertible
\n","
rwd
\n","
front
\n","
...
\n","
130
\n","
mpfi
\n","
3.47
\n","
2.68
\n","
9.0
\n","
111
\n","
5000
\n","
21
\n","
27
\n","
16500
\n","
\n","
\n","
2
\n","
2
\n","
1
\n","
122.0
\n","
alfa-romero
\n","
gas
\n","
std
\n","
two
\n","
hatchback
\n","
rwd
\n","
front
\n","
...
\n","
152
\n","
mpfi
\n","
2.68
\n","
3.47
\n","
9.0
\n","
154
\n","
5000
\n","
19
\n","
26
\n","
16500
\n","
\n","
\n","
3
\n","
3
\n","
2
\n","
164
\n","
audi
\n","
gas
\n","
std
\n","
four
\n","
sedan
\n","
fwd
\n","
front
\n","
...
\n","
109
\n","
mpfi
\n","
3.19
\n","
3.40
\n","
10.0
\n","
102
\n","
5500
\n","
24
\n","
30
\n","
13950
\n","
\n","
\n","
4
\n","
4
\n","
2
\n","
164
\n","
audi
\n","
gas
\n","
std
\n","
four
\n","
sedan
\n","
4wd
\n","
front
\n","
...
\n","
136
\n","
mpfi
\n","
3.19
\n","
3.40
\n","
8.0
\n","
115
\n","
5500
\n","
18
\n","
22
\n","
17450
\n","
\n"," \n","
\n","
5 rows × 27 columns
\n","
"],"text/plain":[" index symboling normalized-losses make fuel-type aspiration \\\n","0 0 3 122.0 alfa-romero gas std \n","1 1 3 122.0 alfa-romero gas std \n","2 2 1 122.0 alfa-romero gas std \n","3 3 2 164 audi gas std \n","4 4 2 164 audi gas std \n","\n"," num-of-doors body-style drive-wheels engine-location ... engine-size \\\n","0 two convertible rwd front ... 130 \n","1 two convertible rwd front ... 130 \n","2 two hatchback rwd front ... 152 \n","3 four sedan fwd front ... 109 \n","4 four sedan 4wd front ... 136 \n","\n"," fuel-system bore stroke compression-ratio horsepower peak-rpm city-mpg \\\n","0 mpfi 3.47 2.68 9.0 111 5000 21 \n","1 mpfi 3.47 2.68 9.0 111 5000 21 \n","2 mpfi 2.68 3.47 9.0 154 5000 19 \n","3 mpfi 3.19 3.40 10.0 102 5500 24 \n","4 mpfi 3.19 3.40 8.0 115 5500 18 \n","\n"," highway-mpg price \n","0 27 13495 \n","1 27 16500 \n","2 26 16500 \n","3 30 13950 \n","4 22 17450 \n","\n","[5 rows x 27 columns]"]},"execution_count":126,"metadata":{},"output_type":"execute_result"}],"source":["df.head()"]},{"cell_type":"markdown","metadata":{},"source":["Good! Now, we have a dataset with no missing values.\n"]},{"cell_type":"markdown","metadata":{},"source":["
Correct data format
\n","We are almost there!\n","
The last step in data cleaning is checking and making sure that all data is in the correct format (int, float, text or other).
As we can see above, some columns are not of the correct data type. Numerical variables should have type 'float' or 'int', and variables with strings such as categories should have type 'object'. For example, 'bore' and 'stroke' variables are numerical values that describe the engines, so we should expect them to be of the type 'float' or 'int'; however, they are shown as type 'object'. We have to convert data types into a proper format for each column using the \"astype()\" method.
\n"]},{"cell_type":"code","execution_count":155,"metadata":{},"outputs":[{"data":{"text/plain":["index int64\n","symboling int64\n","normalized-losses int64\n","make object\n","fuel-type object\n","aspiration object\n","num-of-doors object\n","body-style object\n","drive-wheels object\n","engine-location object\n","wheel-base float64\n","length float64\n","width float64\n","height float64\n","curb-weight int64\n","engine-type object\n","num-of-cylinders object\n","engine-size int64\n","fuel-system object\n","bore object\n","stroke object\n","compression-ratio float64\n","horsepower int64\n","peak-rpm object\n","city-L/100km float64\n","highway-L/100km float64\n","price object\n","dtype: object"]},"execution_count":155,"metadata":{},"output_type":"execute_result"}],"source":["df.dtypes"]},{"cell_type":"markdown","metadata":{},"source":["Wonderful!\n","\n","Now we have finally obtained the cleaned dataset with no missing values with all data in its proper format.\n"]},{"cell_type":"markdown","metadata":{},"source":["
Data Standardization
\n","
\n","Data is usually collected from different agencies in different formats.\n","(Data standardization is also a term for a particular type of data normalization where we subtract the mean and divide by the standard deviation.)\n","
\n","\n","What is standardization?\n","\n","
Standardization is the process of transforming data into a common format, allowing the researcher to make the meaningful comparison.\n","
\n","\n","Example\n","\n","
Transform mpg to L/100km:
\n","
In our dataset, the fuel consumption columns \"city-mpg\" and \"highway-mpg\" are represented by mpg (miles per gallon) unit. Assume we are developing an application in a country that accepts the fuel consumption with L/100km standard.
\n","
We will need to apply data transformation to transform mpg into L/100km.
"],"text/plain":[" index symboling normalized-losses make fuel-type aspiration \\\n","0 0 3 122.0 alfa-romero gas std \n","1 1 3 122.0 alfa-romero gas std \n","2 2 1 122.0 alfa-romero gas std \n","3 3 2 164 audi gas std \n","4 4 2 164 audi gas std \n","\n"," num-of-doors body-style drive-wheels engine-location ... engine-size \\\n","0 two convertible rwd front ... 130 \n","1 two convertible rwd front ... 130 \n","2 two hatchback rwd front ... 152 \n","3 four sedan fwd front ... 109 \n","4 four sedan 4wd front ... 136 \n","\n"," fuel-system bore stroke compression-ratio horsepower peak-rpm city-mpg \\\n","0 mpfi 3.47 2.68 9.0 111 5000 21 \n","1 mpfi 3.47 2.68 9.0 111 5000 21 \n","2 mpfi 2.68 3.47 9.0 154 5000 19 \n","3 mpfi 3.19 3.40 10.0 102 5500 24 \n","4 mpfi 3.19 3.40 8.0 115 5500 18 \n","\n"," highway-mpg price \n","0 27 13495 \n","1 27 16500 \n","2 26 16500 \n","3 30 13950 \n","4 22 17450 \n","\n","[5 rows x 27 columns]"]},"execution_count":129,"metadata":{},"output_type":"execute_result"}],"source":["df.head()"]},{"cell_type":"code","execution_count":146,"metadata":{},"outputs":[{"data":{"text/html":["
\n","\n","
\n"," \n","
\n","
\n","
index
\n","
symboling
\n","
normalized-losses
\n","
make
\n","
fuel-type
\n","
aspiration
\n","
num-of-doors
\n","
body-style
\n","
drive-wheels
\n","
engine-location
\n","
...
\n","
engine-size
\n","
fuel-system
\n","
bore
\n","
stroke
\n","
compression-ratio
\n","
horsepower
\n","
peak-rpm
\n","
city-L/100km
\n","
highway-L/100km
\n","
price
\n","
\n"," \n"," \n","
\n","
0
\n","
0
\n","
3
\n","
122
\n","
alfa-romero
\n","
gas
\n","
std
\n","
two
\n","
convertible
\n","
rwd
\n","
front
\n","
...
\n","
130
\n","
mpfi
\n","
3.47
\n","
2.68
\n","
9.0
\n","
111
\n","
5000
\n","
11.190476
\n","
8.703704
\n","
13495
\n","
\n","
\n","
1
\n","
1
\n","
3
\n","
122
\n","
alfa-romero
\n","
gas
\n","
std
\n","
two
\n","
convertible
\n","
rwd
\n","
front
\n","
...
\n","
130
\n","
mpfi
\n","
3.47
\n","
2.68
\n","
9.0
\n","
111
\n","
5000
\n","
11.190476
\n","
8.703704
\n","
16500
\n","
\n","
\n","
2
\n","
2
\n","
1
\n","
122
\n","
alfa-romero
\n","
gas
\n","
std
\n","
two
\n","
hatchback
\n","
rwd
\n","
front
\n","
...
\n","
152
\n","
mpfi
\n","
2.68
\n","
3.47
\n","
9.0
\n","
154
\n","
5000
\n","
12.368421
\n","
9.038462
\n","
16500
\n","
\n","
\n","
3
\n","
3
\n","
2
\n","
164
\n","
audi
\n","
gas
\n","
std
\n","
four
\n","
sedan
\n","
fwd
\n","
front
\n","
...
\n","
109
\n","
mpfi
\n","
3.19
\n","
3.40
\n","
10.0
\n","
102
\n","
5500
\n","
9.791667
\n","
7.833333
\n","
13950
\n","
\n","
\n","
4
\n","
4
\n","
2
\n","
164
\n","
audi
\n","
gas
\n","
std
\n","
four
\n","
sedan
\n","
4wd
\n","
front
\n","
...
\n","
136
\n","
mpfi
\n","
3.19
\n","
3.40
\n","
8.0
\n","
115
\n","
5500
\n","
13.055556
\n","
10.681818
\n","
17450
\n","
\n"," \n","
\n","
5 rows × 27 columns
\n","
"],"text/plain":[" index symboling normalized-losses make fuel-type aspiration \\\n","0 0 3 122 alfa-romero gas std \n","1 1 3 122 alfa-romero gas std \n","2 2 1 122 alfa-romero gas std \n","3 3 2 164 audi gas std \n","4 4 2 164 audi gas std \n","\n"," num-of-doors body-style drive-wheels engine-location ... engine-size \\\n","0 two convertible rwd front ... 130 \n","1 two convertible rwd front ... 130 \n","2 two hatchback rwd front ... 152 \n","3 four sedan fwd front ... 109 \n","4 four sedan 4wd front ... 136 \n","\n"," fuel-system bore stroke compression-ratio horsepower peak-rpm \\\n","0 mpfi 3.47 2.68 9.0 111 5000 \n","1 mpfi 3.47 2.68 9.0 111 5000 \n","2 mpfi 2.68 3.47 9.0 154 5000 \n","3 mpfi 3.19 3.40 10.0 102 5500 \n","4 mpfi 3.19 3.40 8.0 115 5500 \n","\n"," city-L/100km highway-L/100km price \n","0 11.190476 8.703704 13495 \n","1 11.190476 8.703704 16500 \n","2 12.368421 9.038462 16500 \n","3 9.791667 7.833333 13950 \n","4 13.055556 10.681818 17450 \n","\n","[5 rows x 27 columns]"]},"execution_count":146,"metadata":{},"output_type":"execute_result"}],"source":["# Convert mpg to L/100km by mathematical operation (235 divided by mpg)\n","df[\"city-mpg\"] = 235/df[\"city-mpg\"]\n","df.rename(columns={\"city-mpg\" : \"city-L/100km\"}, inplace=True)\n","\n","# check your transformed data \n","df.head()"]},{"cell_type":"markdown","metadata":{},"source":["
\n","
Question #2:
\n","\n","According to the example above, transform mpg to L/100km in the column of \"highway-mpg\" and change the name of column to \"highway-L/100km\".\n","\n","
"],"text/plain":[" index symboling normalized-losses make fuel-type aspiration \\\n","0 0 3 122 alfa-romero gas std \n","1 1 3 122 alfa-romero gas std \n","2 2 1 122 alfa-romero gas std \n","3 3 2 164 audi gas std \n","4 4 2 164 audi gas std \n","\n"," num-of-doors body-style drive-wheels engine-location ... engine-size \\\n","0 two convertible rwd front ... 130 \n","1 two convertible rwd front ... 130 \n","2 two hatchback rwd front ... 152 \n","3 four sedan fwd front ... 109 \n","4 four sedan 4wd front ... 136 \n","\n"," fuel-system bore stroke compression-ratio horsepower peak-rpm city-mpg \\\n","0 mpfi 3.47 2.68 9.0 111 5000 21 \n","1 mpfi 3.47 2.68 9.0 111 5000 21 \n","2 mpfi 2.68 3.47 9.0 154 5000 19 \n","3 mpfi 3.19 3.40 10.0 102 5500 24 \n","4 mpfi 3.19 3.40 8.0 115 5500 18 \n","\n"," highway-L/100km price \n","0 8.703704 13495 \n","1 8.703704 16500 \n","2 9.038462 16500 \n","3 7.833333 13950 \n","4 10.681818 17450 \n","\n","[5 rows x 27 columns]"]},"execution_count":145,"metadata":{},"output_type":"execute_result"}],"source":["# Write your code below and press Shift+Enter to execute \n","df[\"highway-mpg\"] = 235/df[\"highway-mpg\"]\n","df.rename(columns={\"highway-mpg\" : \"highway-L/100km\"}, inplace=True)\n","df.head()"]},{"cell_type":"markdown","metadata":{},"source":["Click here for the solution\n","\n","```python\n","# transform mpg to L/100km by mathematical operation (235 divided by mpg)\n","df[\"highway-mpg\"] = 235/df[\"highway-mpg\"]\n","\n","# rename column name from \"highway-mpg\" to \"highway-L/100km\"\n","df.rename(columns={'\"highway-mpg\"':'highway-L/100km'}, inplace=True)\n","\n","# check your transformed data \n","df.head()\n","\n","```\n","\n","\n"]},{"cell_type":"markdown","metadata":{},"source":["
Data Normalization
\n","\n","Why normalization?\n","\n","
Normalization is the process of transforming values of several variables into a similar range. Typical normalizations include scaling the variable so the variable average is 0, scaling the variable so the variance is 1, or scaling the variable so the variable values range from 0 to 1.\n","
\n","\n","Example\n","\n","
To demonstrate normalization, let's say we want to scale the columns \"length\", \"width\" and \"height\".
\n","
Target: would like to normalize those variables so their value ranges from 0 to 1
\n","
Approach: replace original value by (original value)/(maximum value)
"],"text/plain":[" length width height\n","0 0.811148 0.890278 0.816054\n","1 0.811148 0.890278 0.816054\n","2 0.822681 0.909722 0.876254\n","3 0.848630 0.919444 0.908027\n","4 0.848630 0.922222 0.908027"]},"execution_count":147,"metadata":{},"output_type":"execute_result"}],"source":["# Write your code below and press Shift+Enter to execute \n","df[\"height\"] = df[\"height\"]/df[\"height\"].max()\n","df[[\"length\", \"width\", \"height\"]].head()"]},{"cell_type":"markdown","metadata":{},"source":["Click here for the solution\n","\n","```python\n","df['height'] = df['height']/df['height'].max() \n","\n","# show the scaled columns\n","df[[\"length\",\"width\",\"height\"]].head()\n","\n","\n","```\n","\n","\n"]},{"cell_type":"markdown","metadata":{},"source":["Here we can see we've normalized \"length\", \"width\" and \"height\" in the range of \\[0,1].\n"]},{"cell_type":"markdown","metadata":{},"source":["
Binning
\n","Why binning?\n","
\n"," Binning is a process of transforming continuous numerical variables into discrete categorical 'bins' for grouped analysis.\n","
\n","\n","Example: \n","\n","
In our dataset, \"horsepower\" is a real valued variable ranging from 48 to 288 and it has 59 unique values. What if we only care about the price difference between cars with high horsepower, medium horsepower, and little horsepower (3 types)? Can we rearrange them into three ‘bins' to simplify analysis?
\n","\n","
We will use the pandas method 'cut' to segment the 'horsepower' column into 3 bins.
\n"]},{"cell_type":"markdown","metadata":{},"source":["Convert data to correct format:\n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":[]},{"cell_type":"markdown","metadata":{},"source":["Let's plot the histogram of horsepower to see what the distribution of horsepower looks like.\n"]},{"cell_type":"code","execution_count":167,"metadata":{},"outputs":[{"data":{"text/plain":["Text(0.5, 1.0, 'horsepower bins')"]},"execution_count":167,"metadata":{},"output_type":"execute_result"},{"data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAjIAAAHHCAYAAACle7JuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAyqElEQVR4nO3deXxU1f3/8fcMWSFMYiBkkRDCIgQJoUaNUwihkBL40lYKdUG+vyLihixqXBBbBdQ2VFvABdBKBdtKVdov8kUqQhHCYoiC4AIYAUFQSEAkCQRJAjm/P3xwv45sIQRmTng9H495PLjnnnvmM3PMzNu7jcsYYwQAAGAht78LAAAAqCuCDAAAsBZBBgAAWIsgAwAArEWQAQAA1iLIAAAAaxFkAACAtQgyAADAWgQZAABgLYIMYJkJEybI5XLp66+/9ncp+IHZs2fL5XJp7dq1Z+zbs2dP9ezZ8/wXBTRwBBkAAGCtIH8XAAAXo8WLF/u7BKBBYI8MgBMYY/Ttt9/6u4yAVFFRUS/jhISEKCQkpF7GAi5mBBnAUqWlpbr55psVFRWlyMhIDRs2TIcPH/bpc/ToUT3++ONq27atQkND1bp1az388MOqrKz06de6dWv97Gc/09tvv60rr7xS4eHheuGFFyRJS5YsUffu3RUVFaWIiAh16NBBDz/8sM/2lZWVGj9+vNq1a6fQ0FAlJibqwQcfPOF5XC6XRo0apVdeeUUdOnRQWFiY0tPTtWLFihNe3/r169WvXz95PB5FRESod+/eWrNmjc/rb9SokZ555hmn7euvv5bb7VazZs1kjHHaR4wYobi4OJ/xCwsL1bdvX0VGRqpx48bKysrS6tWrffocPx9p06ZNuummm3TJJZeoe/fup5yT4w4fPqw77rhDzZo1k8fj0a9//WsdOHDAp88Pz5FZvny5XC6XXn/9df3ud79Ty5YtFRYWpt69e2vr1q0+227ZskWDBg1SXFycwsLC1LJlS914440qKys7Y21AQ8OhJcBS119/vZKTk5WXl6cPPvhAM2fOVIsWLfSHP/zB6XPrrbfq5Zdf1q9+9Svdd999KiwsVF5enjZv3qx58+b5jFdUVKTBgwfrjjvu0G233aYOHTpo48aN+tnPfqYuXbroscceU2hoqLZu3erzhV9TU6Nf/OIXWrVqlW6//XalpKTo448/1pQpU/TZZ5/pjTfe8Hme/Px8vfbaaxozZoxCQ0M1ffp09e3bV++99546d+4sSdq4caMyMzPl8Xj04IMPKjg4WC+88IJ69uyp/Px8ZWRkKCoqSp07d9aKFSs0ZswYSdKqVavkcrn0zTffaNOmTbr88sslSStXrlRmZqZTwzvvvKN+/fopPT1d48ePl9vt1qxZs9SrVy+tXLlSV199tU/N1113ndq3b6/f//73PgHpVEaNGqWoqChNmDBBRUVFmjFjhr744gsnrJzOpEmT5Ha7df/996usrExPPvmkhgwZosLCQklSVVWVcnJyVFlZqdGjRysuLk5fffWV3nzzTZWWlioyMvKM9QENigFglfHjxxtJ5pZbbvFp/+Uvf2maNWvmLG/YsMFIMrfeeqtPv/vvv99IMu+8847TlpSUZCSZRYsW+fSdMmWKkWT27dt3ynr+9re/GbfbbVauXOnT/vzzzxtJZvXq1U6bJCPJrF271mn74osvTFhYmPnlL3/ptA0YMMCEhISYbdu2OW27d+82TZs2NT169HDaRo4caWJjY53l3Nxc06NHD9OiRQszY8YMY4wx+/fvNy6Xyzz99NPGGGNqampM+/btTU5OjqmpqXG2PXz4sElOTjY//elPnbbj7/XgwYNP+fq/b9asWUaSSU9PN1VVVU77k08+aSSZ+fPnO21ZWVkmKyvLWV62bJmRZFJSUkxlZaXT/vTTTxtJ5uOPPzbGGLN+/XojycydO7dWNQENHYeWAEvdeeedPsuZmZnav3+/ysvLJUn//ve/JUm5ubk+/e677z5J0sKFC33ak5OTlZOT49MWFRUlSZo/f75qampOWsfcuXOVkpKijh076uuvv3YevXr1kiQtW7bMp7/X61V6erqz3KpVK1177bV6++23dezYMR07dkyLFy/WgAED1KZNG6dffHy8brrpJq1atcp5jZmZmSopKVFRUZGk7/a89OjRQ5mZmVq5cqWk7/bSGGOcPTIbNmzQli1bdNNNN2n//v1OvRUVFerdu7dWrFhxwmv94Xt9JrfffruCg4Od5REjRigoKMiZk9MZNmyYz7kzx+v+/PPPJcnZ4/L222+fcCgRuBgRZABLtWrVymf5kksukSTnXIwvvvhCbrdb7dq18+kXFxenqKgoffHFFz7tycnJJzzHDTfcoG7duunWW29VbGysbrzxRr3++us+X/RbtmzRxo0bFRMT4/O47LLLJEl79+71GbN9+/YnPM9ll12mw4cPa9++fdq3b58OHz6sDh06nNAvJSVFNTU12rVrl6T/+5JfuXKlKioqtH79emVmZqpHjx5OkFm5cqU8Ho/S0tKceiVp6NChJ9Q8c+ZMVVZWnnCuycnem9P54WuMiIhQfHy8duzYccZtzzSvycnJys3N1cyZM9W8eXPl5ORo2rRpnB+DixbnyACWatSo0UnbzQ/O4TjTORnHhYeHn7RtxYoVWrZsmRYuXKhFixbptddeU69evbR48WI1atRINTU1Sk1N1eTJk086bmJiYq2evy4SEhKUnJysFStWqHXr1jLGyOv1KiYmRnfffbe++OILrVy5Uj/+8Y/ldn/3/23HQ9hTTz2lrl27nnTciIgIn+WTvTfnS23m9U9/+pNuvvlmzZ8/X4sXL9aYMWOUl5enNWvWqGXLlheqVCAgEGSABiopKUk1NTXasmWLUlJSnPaSkhKVlpYqKSmpVuO43W717t1bvXv31uTJk/X73/9ev/nNb7Rs2TJlZ2erbdu2+vDDD9W7d+9ahabje0S+77PPPlPjxo0VExMjSWrcuLFzuOj7Pv30U7ndbp9wlJmZqRUrVig5OVldu3ZV06ZNlZaWpsjISC1atEgffPCBJk6c6PRv27atJMnj8Sg7O7tW78HZ2rJli37yk584y4cOHdKePXv0X//1X/X2HKmpqUpNTdVvf/tbvfvuu+rWrZuef/55PfHEE/X2HIANOLQENFDHvzSnTp3q0358z0n//v3POMY333xzQtvxvRjHL62+/vrr9dVXX+nFF188oe+33357wn1XCgoK9MEHHzjLu3bt0vz589WnTx81atRIjRo1Up8+fTR//nyfQzElJSWaM2eOunfvLo/H47RnZmZqx44deu2115xDTW63Wz/+8Y81efJkVVdX+1yxlJ6errZt2+qPf/yjDh06dELN+/btO+P7ciZ//vOfVV1d7SzPmDFDR48eVb9+/c557PLych09etSnLTU1VW63+4TL3YGLAXtkgAYqLS1NQ4cO1Z///GeVlpYqKytL7733nl5++WUNGDDAZ4/BqTz22GNasWKF+vfvr6SkJO3du1fTp09Xy5Ytnfup/L//9//0+uuv684779SyZcvUrVs3HTt2TJ9++qlef/115940x3Xu3Fk5OTk+l19L8tlr8sQTTzj3r7nrrrsUFBSkF154QZWVlXryySd9ajweUoqKivT73//eae/Ro4feeusthYaG6qqrrnLa3W63Zs6cqX79+unyyy/XsGHDdOmll+qrr77SsmXL5PF4tGDBgjq84/+nqqpKvXv31vXXX6+ioiJNnz5d3bt31y9+8YtzGlf67tLxUaNG6brrrtNll12mo0eP6m9/+5saNWqkQYMGnfP4gG0IMkADNnPmTLVp00azZ8/WvHnzFBcXp3Hjxmn8+PG12v4Xv/iFduzYoZdeeklff/21mjdvrqysLE2cONG5esbtduuNN97QlClT9Ne//lXz5s1T48aN1aZNG919993OSb/HZWVlyev1auLEidq5c6c6deqk2bNnq0uXLk6fyy+/XCtXrtS4ceOUl5enmpoaZWRk6O9//7syMjJ8xuvQoYNatGihvXv3+tys7njAufrqqxUaGuqzTc+ePVVQUKDHH39czz33nA4dOqS4uDhlZGTojjvuqP0bfArPPfecXnnlFT366KOqrq7W4MGD9cwzz9T6fKXTSUtLU05OjhYsWKCvvvpKjRs3Vlpamt566y1dc8015zw+YBuX+eGZgQBwnrhcLo0cOVLPPfecv0sB0EBwjgwAALAWQQYAAFiLIAMAAKzFyb4ALhhOyQNQ39gjAwAArEWQAQAA1vLroaUJEyb43ARL+u6eEJ9++qkk6ciRI7rvvvv06quvqrKyUjk5OZo+fbpiY2Nr/Rw1NTXavXu3mjZtWi/3cAAAAOefMUYHDx5UQkKC81tpJ+P3c2Quv/xy/ec//3GWg4L+r6R7771XCxcu1Ny5cxUZGalRo0Zp4MCBWr16da3H371793n90ToAAHD+7Nq167Q/hur3IBMUFKS4uLgT2svKyvSXv/xFc+bMUa9evSRJs2bNUkpKitasWVPrO1g2bdpU0ndvxPd/nwUAAASu8vJyJSYmOt/jp+L3ILNlyxYlJCQoLCxMXq9XeXl5atWqldatW6fq6mqfX6ft2LGjWrVqpYKCglMGmcrKSp8fTjt48KCk737pliADAIBdznRaiF9P9s3IyNDs2bO1aNEizZgxQ9u3b1dmZqYOHjyo4uJihYSEKCoqymeb2NhYFRcXn3LMvLw8RUZGOg8OKwEA0HD5dY/M93/SvkuXLsrIyFBSUpJef/11hYeH12nMcePGKTc311k+vmsKAAA0PAF1+XVUVJQuu+wybd26VXFxcaqqqlJpaalPn5KSkpOeU3NcaGiocxiJw0kAADRsARVkDh06pG3btik+Pl7p6ekKDg7W0qVLnfVFRUXauXOnvF6vH6sEAACBwq+Hlu6//379/Oc/V1JSknbv3q3x48erUaNGGjx4sCIjIzV8+HDl5uYqOjpaHo9Ho0ePltfrrfUVSwAAoGHza5D58ssvNXjwYO3fv18xMTHq3r271qxZo5iYGEnSlClT5Ha7NWjQIJ8b4gEAAEiSyzTwX3ErLy9XZGSkysrKOF8GAABL1Pb7O6DOkQEAADgbBBkAAGAtggwAALAWQQYAAFiLIAMAAKxFkAEAANYiyAAAAGsRZAAAgLX8emdfoLZaP7TQ3yWctR2T+vu7BABo8NgjAwAArEWQAQAA1iLIAAAAaxFkAACAtQgyAADAWgQZAABgLYIMAACwFkEGAABYiyADAACsRZABAADWIsgAAABrEWQAAIC1CDIAAMBaBBkAAGAtggwAALAWQQYAAFiLIAMAAKxFkAEAANYiyAAAAGsRZAAAgLUIMgAAwFpB/i4AF17rhxb6uwQAAOoFe2QAAIC1CDIAAMBaBBkAAGAtggwAALAWQQYAAFiLIAMAAKzF5dfngMuYAQDwL/bIAAAAaxFkAACAtQgyAADAWgQZAABgLYIMAACwFkEGAABYiyADAACsRZABAADWIsgAAABrEWQAAIC1CDIAAMBaBBkAAGAtggwAALAWQQYAAFiLIAMAAKxFkAEAANYiyAAAAGsRZAAAgLUIMgAAwFoEGQAAYC2CDAAAsBZBBgAAWIsgAwAArEWQAQAA1gqYIDNp0iS5XC7dc889TtuRI0c0cuRINWvWTBERERo0aJBKSkr8VyQAAAgoARFk3n//fb3wwgvq0qWLT/u9996rBQsWaO7cucrPz9fu3bs1cOBAP1UJAAACjd+DzKFDhzRkyBC9+OKLuuSSS5z2srIy/eUvf9HkyZPVq1cvpaena9asWXr33Xe1Zs0aP1YMAAAChd+DzMiRI9W/f39lZ2f7tK9bt07V1dU+7R07dlSrVq1UUFBwyvEqKytVXl7u8wAAAA1TkD+f/NVXX9UHH3yg999//4R1xcXFCgkJUVRUlE97bGysiouLTzlmXl6eJk6cWN+lAgCAAOS3PTK7du3S3XffrVdeeUVhYWH1Nu64ceNUVlbmPHbt2lVvYwMAgMDityCzbt067d27V1dccYWCgoIUFBSk/Px8PfPMMwoKClJsbKyqqqpUWlrqs11JSYni4uJOOW5oaKg8Ho/PAwAANEx+O7TUu3dvffzxxz5tw4YNU8eOHTV27FglJiYqODhYS5cu1aBBgyRJRUVF2rlzp7xerz9KBgAAAcZvQaZp06bq3LmzT1uTJk3UrFkzp3348OHKzc1VdHS0PB6PRo8eLa/Xq2uuucYfJQMAgADj15N9z2TKlClyu90aNGiQKisrlZOTo+nTp/u7LAAAECBcxhjj7yLOp/LyckVGRqqsrKzez5dp/dDCeh0PDcuOSf39XQIAWKu2399+v48MAABAXRFkAACAtQgyAADAWgQZAABgLYIMAACwFkEGAABYiyADAACsRZABAADWIsgAAABrEWQAAIC1CDIAAMBaBBkAAGAtggwAALAWQQYAAFiLIAMAAKxFkAEAANYiyAAAAGsRZAAAgLUIMgAAwFoEGQAAYC2CDAAAsBZBBgAAWIsgAwAArEWQAQAA1iLIAAAAaxFkAACAtQgyAADAWgQZAABgLYIMAACwFkEGAABYiyADAACsRZABAADWIsgAAABrEWQAAIC1CDIAAMBaBBkAAGAtggwAALAWQQYAAFiLIAMAAKxFkAEAANYiyAAAAGsRZAAAgLUIMgAAwFoEGQAAYC2CDAAAsBZBBgAAWIsgAwAArEWQAQAA1iLIAAAAaxFkAACAtQgyAADAWgQZAABgLYIMAACwFkEGAABYiyADAACsRZABAADWIsgAAABrEWQAAIC1CDIAAMBaBBkAAGAtggwAALAWQQYAAFjLr0FmxowZ6tKlizwejzwej7xer9566y1n/ZEjRzRy5Eg1a9ZMERERGjRokEpKSvxYMQAACCR+DTItW7bUpEmTtG7dOq1du1a9evXStddeq40bN0qS7r33Xi1YsEBz585Vfn6+du/erYEDB/qzZAAAEEBcxhjj7yK+Lzo6Wk899ZR+9atfKSYmRnPmzNGvfvUrSdKnn36qlJQUFRQU6JprrqnVeOXl5YqMjFRZWZk8Hk+91tr6oYX1Oh4alh2T+vu7BACwVm2/vwPmHJljx47p1VdfVUVFhbxer9atW6fq6mplZ2c7fTp27KhWrVqpoKDAj5UCAIBAEeTvAj7++GN5vV4dOXJEERERmjdvnjp16qQNGzYoJCREUVFRPv1jY2NVXFx8yvEqKytVWVnpLJeXl5+v0gEAgJ/5fY9Mhw4dtGHDBhUWFmrEiBEaOnSoNm3aVOfx8vLyFBkZ6TwSExPrsVoAABBI/B5kQkJC1K5dO6WnpysvL09paWl6+umnFRcXp6qqKpWWlvr0LykpUVxc3CnHGzdunMrKypzHrl27zvMrAAAA/uL3IPNDNTU1qqysVHp6uoKDg7V06VJnXVFRkXbu3Cmv13vK7UNDQ53LuY8/AABAw+TXc2TGjRunfv36qVWrVjp48KDmzJmj5cuX6+2331ZkZKSGDx+u3NxcRUdHy+PxaPTo0fJ6vbW+YgkAADRsfg0ye/fu1a9//Wvt2bNHkZGR6tKli95++2399Kc/lSRNmTJFbrdbgwYNUmVlpXJycjR9+nR/lgwAAAJIwN1Hpr5xHxn4C/eRAYC6s+4+MgAAAGeLIAMAAKxFkAEAANYiyAAAAGsRZAAAgLUIMgAAwFoEGQAAYC2CDAAAsBZBBgAAWIsgAwAArEWQAQAA1iLIAAAAa9UpyPTq1UulpaUntJeXl6tXr17nWhMAAECt1CnILF++XFVVVSe0HzlyRCtXrjznogAAAGoj6Gw6f/TRR86/N23apOLiYmf52LFjWrRokS699NL6qw4AAOA0zirIdO3aVS6XSy6X66SHkMLDw/Xss8/WW3EAAACnc1ZBZvv27TLGqE2bNnrvvfcUExPjrAsJCVGLFi3UqFGjei8SAADgZM4qyCQlJUmSampqzksxAAAAZ+Osgsz3bdmyRcuWLdPevXtPCDaPPvroORcGAABwJnUKMi+++KJGjBih5s2bKy4uTi6Xy1nncrkIMgAA4IKoU5B54okn9Lvf/U5jx46t73oAAABqrU73kTlw4ICuu+66+q4FAADgrNQpyFx33XVavHhxfdcCAABwVup0aKldu3Z65JFHtGbNGqWmpio4ONhn/ZgxY+qlOAAAgNNxGWPM2W6UnJx86gFdLn3++efnVFR9Ki8vV2RkpMrKyuTxeOp17NYPLazX8dCw7JjU398lAIC1avv9Xac9Mtu3b69zYQAAAPWlTufIAAAABII67ZG55ZZbTrv+pZdeqlMxAAAAZ6NOQebAgQM+y9XV1frkk09UWlp60h+TBAAAOB/qFGTmzZt3QltNTY1GjBihtm3bnnNRAAAAtVFv58i43W7l5uZqypQp9TUkAADAadXryb7btm3T0aNH63NIAACAU6rToaXc3FyfZWOM9uzZo4ULF2ro0KH1UhgAAMCZ1CnIrF+/3mfZ7XYrJiZGf/rTn854RRMAAEB9qVOQWbZsWX3XAQAAcNbqFGSO27dvn4qKiiRJHTp0UExMTL0UBQAAUBt1Otm3oqJCt9xyi+Lj49WjRw/16NFDCQkJGj58uA4fPlzfNQIAAJxUnYJMbm6u8vPztWDBApWWlqq0tFTz589Xfn6+7rvvvvquEQAA4KTq9OvXzZs31z//+U/17NnTp33ZsmW6/vrrtW/fvvqq75zx69dA7fGL3QACRW2/v+u0R+bw4cOKjY09ob1FixYcWgIAABdMnYKM1+vV+PHjdeTIEaft22+/1cSJE+X1euutOAAAgNOp01VLU6dOVd++fdWyZUulpaVJkj788EOFhoZq8eLF9VogAADAqdQpyKSmpmrLli165ZVX9Omnn0qSBg8erCFDhig8PLxeCwQAADiVOgWZvLw8xcbG6rbbbvNpf+mll7Rv3z6NHTu2XooDAAA4nTqdI/PCCy+oY8eOJ7Rffvnlev7558+5KAAAgNqoU5ApLi5WfHz8Ce0xMTHas2fPORcFAABQG3UKMomJiVq9evUJ7atXr1ZCQsI5FwUAAFAbdTpH5rbbbtM999yj6upq9erVS5K0dOlSPfjgg9zZFwAAXDB1CjIPPPCA9u/fr7vuuktVVVWSpLCwMI0dO1bjxo2r1wIBAABOpU5BxuVy6Q9/+IMeeeQRbd68WeHh4Wrfvr1CQ0Pruz4AAIBTqlOQOS4iIkJXXXVVfdUCAABwVup0si8AAEAgIMgAAABrEWQAAIC1CDIAAMBaBBkAAGAtggwAALAWQQYAAFiLIAMAAKxFkAEAANYiyAAAAGsRZAAAgLUIMgAAwFp+DTJ5eXm66qqr1LRpU7Vo0UIDBgxQUVGRT58jR45o5MiRatasmSIiIjRo0CCVlJT4qWIAABBI/Bpk8vPzNXLkSK1Zs0ZLlixRdXW1+vTpo4qKCqfPvffeqwULFmju3LnKz8/X7t27NXDgQD9WDQAAAkWQP5980aJFPsuzZ89WixYttG7dOvXo0UNlZWX6y1/+ojlz5qhXr16SpFmzZiklJUVr1qzRNddc44+yAQBAgAioc2TKysokSdHR0ZKkdevWqbq6WtnZ2U6fjh07qlWrViooKDjpGJWVlSovL/d5AACAhilggkxNTY3uuecedevWTZ07d5YkFRcXKyQkRFFRUT59Y2NjVVxcfNJx8vLyFBkZ6TwSExPPd+kAAMBPAibIjBw5Up988oleffXVcxpn3LhxKisrcx67du2qpwoBAECg8es5MseNGjVKb775plasWKGWLVs67XFxcaqqqlJpaanPXpmSkhLFxcWddKzQ0FCFhoae75IBAEAA8OseGWOMRo0apXnz5umdd95RcnKyz/r09HQFBwdr6dKlTltRUZF27twpr9d7ocsFAAABxq97ZEaOHKk5c+Zo/vz5atq0qXPeS2RkpMLDwxUZGanhw4crNzdX0dHR8ng8Gj16tLxeL1csAQAA/waZGTNmSJJ69uzp0z5r1izdfPPNkqQpU6bI7XZr0KBBqqysVE5OjqZPn36BKwUAAIHIr0HGGHPGPmFhYZo2bZqmTZt2ASoCAAA2CZirlgAAAM4WQQYAAFiLIAMAAKxFkAEAANYiyAAAAGsRZAAAgLUIMgAAwFoEGQAAYC2CDAAAsBZBBgAAWIsgAwAArEWQAQAA1iLIAAAAaxFkAACAtQgyAADAWgQZAABgLYIMAACwFkEGAABYiyADAACsRZABAADWIsgAAABrEWQAAIC1gvxdAACci9YPLfR3CWdtx6T+/i4BaDDYIwMAAKxFkAEAANYiyAAAAGsRZAAAgLUIMgAAwFoEGQAAYC2CDAAAsBZBBgAAWIsgAwAArEWQAQAA1iLIAAAAaxFkAACAtQgyAADAWgQZAABgrSB/FwAgcLR+aKG/SwCAs8IeGQAAYC2CDAAAsBZBBgAAWIsgAwAArEWQAQAA1iLIAAAAaxFkAACAtQgyAADAWgQZAABgLYIMAACwFkEGAABYiyADAACsRZABAADWIsgAAABrEWQAAIC1CDIAAMBaBBkAAGAtggwAALAWQQYAAFiLIAMAAKxFkAEAANYiyAAAAGsRZAAAgLUIMgAAwFp+DTIrVqzQz3/+cyUkJMjlcumNN97wWW+M0aOPPqr4+HiFh4crOztbW7Zs8U+xAAAg4Pg1yFRUVCgtLU3Tpk076fonn3xSzzzzjJ5//nkVFhaqSZMmysnJ0ZEjRy5wpQAAIBAF+fPJ+/Xrp379+p10nTFGU6dO1W9/+1tde+21kqS//vWvio2N1RtvvKEbb7zxQpYKAAACUMCeI7N9+3YVFxcrOzvbaYuMjFRGRoYKCgpOuV1lZaXKy8t9HgAAoGEK2CBTXFwsSYqNjfVpj42NddadTF5eniIjI51HYmLiea0TAAD4T8AGmboaN26cysrKnMeuXbv8XRIAADhPAjbIxMXFSZJKSkp82ktKSpx1JxMaGiqPx+PzAAAADVPABpnk5GTFxcVp6dKlTlt5ebkKCwvl9Xr9WBkAAAgUfr1q6dChQ9q6dauzvH37dm3YsEHR0dFq1aqV7rnnHj3xxBNq3769kpOT9cgjjyghIUEDBgzwX9EAACBg+DXIrF27Vj/5yU+c5dzcXEnS0KFDNXv2bD344IOqqKjQ7bffrtLSUnXv3l2LFi1SWFiYv0oGAAABxGWMMf4u4nwqLy9XZGSkysrK6v18mdYPLazX8QBcHHZM6u/vEoCAV9vv74A9RwYAAOBMCDIAAMBaBBkAAGAtggwAALAWQQYAAFiLIAMAAKxFkAEAANYiyAAAAGsRZAAAgLUIMgAAwFoEGQAAYC2CDAAAsBZBBgAAWIsgAwAArEWQAQAA1iLIAAAAaxFkAACAtQgyAADAWgQZAABgLYIMAACwFkEGAABYiyADAACsRZABAADWIsgAAABrEWQAAIC1CDIAAMBaBBkAAGAtggwAALAWQQYAAFiLIAMAAKwV5O8CAOBi0/qhhf4u4aztmNTf3yUAJ8UeGQAAYC2CDAAAsBZBBgAAWIsgAwAArEWQAQAA1iLIAAAAa3H5NQDgjLhkHIGKPTIAAMBaBBkAAGAtggwAALAWQQYAAFiLIAMAAKxFkAEAANYiyAAAAGsRZAAAgLUIMgAAwFoEGQAAYC2CDAAAsBZBBgAAWIsgAwAArEWQAQAA1iLIAAAAaxFkAACAtQgyAADAWgQZAABgLYIMAACwFkEGAABYiyADAACsFeTvAgAAwHdaP7TQ3yWctR2T+vv1+dkjAwAArEWQAQAA1rIiyEybNk2tW7dWWFiYMjIy9N577/m7JAAAEAACPsi89tprys3N1fjx4/XBBx8oLS1NOTk52rt3r79LAwAAfhbwQWby5Mm67bbbNGzYMHXq1EnPP/+8GjdurJdeesnfpQEAAD8L6CBTVVWldevWKTs722lzu93Kzs5WQUGBHysDAACBIKAvv/7666917NgxxcbG+rTHxsbq008/Pek2lZWVqqysdJbLysokSeXl5fVeX03l4XofEwBQP87H5/75ZuP3yvl6n4+Pa4w5bb+ADjJ1kZeXp4kTJ57QnpiY6IdqAAD+EjnV3xVcHM73+3zw4EFFRkaecn1AB5nmzZurUaNGKikp8WkvKSlRXFzcSbcZN26ccnNzneWamhp98803atasmVwu13mtt7y8XImJidq1a5c8Hs95fS7UDXMU+JgjOzBPgc/2OTLG6ODBg0pISDhtv4AOMiEhIUpPT9fSpUs1YMAASd8Fk6VLl2rUqFEn3SY0NFShoaE+bVFRUee5Ul8ej8fK/2guJsxR4GOO7MA8BT6b5+h0e2KOC+ggI0m5ubkaOnSorrzySl199dWaOnWqKioqNGzYMH+XBgAA/Czgg8wNN9ygffv26dFHH1VxcbG6du2qRYsWnXACMAAAuPgEfJCRpFGjRp3yUFIgCQ0N1fjx4084tIXAwRwFPubIDsxT4LtY5shlznRdEwAAQIAK6BviAQAAnA5BBgAAWIsgAwAArEWQAQAA1iLInKUJEybI5XL5PDp27OisP3LkiEaOHKlmzZopIiJCgwYNOuHOxKhfK1as0M9//nMlJCTI5XLpjTfe8FlvjNGjjz6q+Ph4hYeHKzs7W1u2bPHp880332jIkCHyeDyKiorS8OHDdejQoQv4Khq+M83TzTfffMLfVt++fX36ME/nV15enq666io1bdpULVq00IABA1RUVOTTpzafcTt37lT//v3VuHFjtWjRQg888ICOHj16IV9Kg1WbOerZs+cJf0t33nmnT5+GNEcEmTq4/PLLtWfPHuexatUqZ929996rBQsWaO7cucrPz9fu3bs1cOBAP1bb8FVUVCgtLU3Tpk076fonn3xSzzzzjJ5//nkVFhaqSZMmysnJ0ZEjR5w+Q4YM0caNG7VkyRK9+eabWrFihW6//fYL9RIuCmeaJ0nq27evz9/WP/7xD5/1zNP5lZ+fr5EjR2rNmjVasmSJqqur1adPH1VUVDh9zvQZd+zYMfXv319VVVV699139fLLL2v27Nl69NFH/fGSGpzazJEk3XbbbT5/S08++aSzrsHNkcFZGT9+vElLSzvputLSUhMcHGzmzp3rtG3evNlIMgUFBReowoubJDNv3jxnuaamxsTFxZmnnnrKaSstLTWhoaHmH//4hzHGmE2bNhlJ5v3333f6vPXWW8blcpmvvvrqgtV+MfnhPBljzNChQ8211157ym2Ypwtv7969RpLJz883xtTuM+7f//63cbvdpri42OkzY8YM4/F4TGVl5YV9AReBH86RMcZkZWWZu++++5TbNLQ5Yo9MHWzZskUJCQlq06aNhgwZop07d0qS1q1bp+rqamVnZzt9O3bsqFatWqmgoMBf5V7Utm/fruLiYp85iYyMVEZGhjMnBQUFioqK0pVXXun0yc7OltvtVmFh4QWv+WK2fPlytWjRQh06dNCIESO0f/9+Zx3zdOGVlZVJkqKjoyXV7jOuoKBAqampPndfz8nJUXl5uTZu3HgBq784/HCOjnvllVfUvHlzde7cWePGjdPhw4eddQ1tjqy4s28gycjI0OzZs9WhQwft2bNHEydOVGZmpj755BMVFxcrJCTkhB+pjI2NVXFxsX8Kvsgdf99/+JMW35+T4uJitWjRwmd9UFCQoqOjmbcLqG/fvho4cKCSk5O1bds2Pfzww+rXr58KCgrUqFEj5ukCq6mp0T333KNu3bqpc+fOklSrz7ji4uKT/r0dX4f6c7I5kqSbbrpJSUlJSkhI0EcffaSxY8eqqKhI//M//yOp4c0RQeYs9evXz/l3ly5dlJGRoaSkJL3++usKDw/3Y2WA3W688Ubn36mpqerSpYvatm2r5cuXq3fv3n6s7OI0cuRIffLJJz7nACKwnGqOvn/eWGpqquLj49W7d29t27ZNbdu2vdBlnnccWjpHUVFRuuyyy7R161bFxcWpqqpKpaWlPn1KSkoUFxfnnwIvcsff9x9eVfH9OYmLi9PevXt91h89elTffPMN8+ZHbdq0UfPmzbV161ZJzNOFNGrUKL355ptatmyZWrZs6bTX5jMuLi7upH9vx9ehfpxqjk4mIyNDknz+lhrSHBFkztGhQ4e0bds2xcfHKz09XcHBwVq6dKmzvqioSDt37pTX6/VjlRev5ORkxcXF+cxJeXm5CgsLnTnxer0qLS3VunXrnD7vvPOOampqnA8AXHhffvml9u/fr/j4eEnM04VgjNGoUaM0b948vfPOO0pOTvZZX5vPOK/Xq48//tgndC5ZskQej0edOnW6MC+kATvTHJ3Mhg0bJMnnb6lBzZG/zza2zX333WeWL19utm/fblavXm2ys7NN8+bNzd69e40xxtx5552mVatW5p133jFr1641Xq/XeL1eP1fdsB08eNCsX7/erF+/3kgykydPNuvXrzdffPGFMcaYSZMmmaioKDN//nzz0UcfmWuvvdYkJyebb7/91hmjb9++5kc/+pEpLCw0q1atMu3btzeDBw/210tqkE43TwcPHjT333+/KSgoMNu3bzf/+c9/zBVXXGHat29vjhw54ozBPJ1fI0aMMJGRkWb58uVmz549zuPw4cNOnzN9xh09etR07tzZ9OnTx2zYsMEsWrTIxMTEmHHjxvnjJTU4Z5qjrVu3mscee8ysXbvWbN++3cyfP9+0adPG9OjRwxmjoc0RQeYs3XDDDSY+Pt6EhISYSy+91Nxwww1m69atzvpvv/3W3HXXXeaSSy4xjRs3Nr/85S/Nnj17/Fhxw7ds2TIj6YTH0KFDjTHfXYL9yCOPmNjYWBMaGmp69+5tioqKfMbYv3+/GTx4sImIiDAej8cMGzbMHDx40A+vpuE63TwdPnzY9OnTx8TExJjg4GCTlJRkbrvtNp/LQ41hns63k82PJDNr1iynT20+43bs2GH69etnwsPDTfPmzc19991nqqurL/CraZjONEc7d+40PXr0MNHR0SY0NNS0a9fOPPDAA6asrMxnnIY0Ry5jjLlw+38AAADqD+fIAAAAaxFkAACAtQgyAADAWgQZAABgLYIMAACwFkEGAABYiyADAACsRZABcFo9e/bUPffc4+8yAOCkCDIAAMBaBBkAF1RVVZW/S7hgLqbXCvgLQQbAGdXU1OjBBx9UdHS04uLiNGHCBGfdzp07de211yoiIkIej0fXX3+9SkpKnPUTJkxQ165dNXPmTCUnJyssLEyS9M9//lOpqakKDw9Xs2bNlJ2drYqKCme7mTNnKiUlRWFhYerYsaOmT5/urNuxY4dcLpdeffVV/fjHP1ZYWJg6d+6s/Px8n7rz8/N19dVXKzQ0VPHx8XrooYd09OhRSdKbb76pqKgoHTt2TNJ3vxDscrn00EMPOdvfeuut+u///m9nedWqVcrMzFR4eLgSExM1ZswYn5pbt26txx9/XL/+9a/l8Xh0++23n8vbDqA2/P1jTwACW1ZWlvF4PGbChAnms88+My+//LJxuVxm8eLF5tixY6Zr166me/fuZu3atWbNmjUmPT3dZGVlOduPHz/eNGnSxPTt29d88MEH5sMPPzS7d+82QUFBZvLkyWb79u3mo48+MtOmTXN+APLvf/+7iY+PN//617/M559/bv71r3+Z6OhoM3v2bGOMMdu3bzeSTMuWLc0///lPs2nTJnPrrbeapk2bmq+//toYY8yXX35pGjdubO666y6zefNmM2/ePNO8eXMzfvx4Y4wxpaWlxu12m/fff98YY8zUqVNN8+bNTUZGhlN7u3btzIsvvmiM+e5XhZs0aWKmTJliPvvsM7N69Wrzox/9yNx8881O/6SkJOPxeMwf//hHs3XrVp8flAVwfhBkAJxWVlaW6d69u0/bVVddZcaOHWsWL15sGjVqZHbu3Oms27hxo5Fk3nvvPWPMd0EmODjY7N271+mzbt06I8ns2LHjpM/Ztm1bM2fOHJ+2xx9/3Hi9XmPM/wWZSZMmOeurq6tNy5YtzR/+8AdjjDEPP/yw6dChg6mpqXH6TJs2zURERJhjx44ZY4y54oorzFNPPWWMMWbAgAHmd7/7nQkJCTEHDx40X375pZFkPvvsM2OMMcOHDze33367T00rV640brfbfPvtt8aY74LMgAEDTvt+AqhfHFoCcEZdunTxWY6Pj9fevXu1efNmJSYmKjEx0VnXqVMnRUVFafPmzU5bUlKSYmJinOW0tDT17t1bqampuu666/Tiiy/qwIEDkqSKigpt27ZNw4cPV0REhPN44okntG3bNp86vF6v8++goCBdeeWVzvNu3rxZXq9XLpfL6dOtWzcdOnRIX375pSQpKytLy5cvlzFGK1eu1MCBA5WSkqJVq1YpPz9fCQkJat++vSTpww8/1OzZs31qysnJUU1NjbZv3+48x5VXXlm3NxlAnQT5uwAAgS84ONhn2eVyqaamptbbN2nSxGe5UaNGWrJkid59910tXrxYzz77rH7zm9+osLBQjRs3liS9+OKLysjIOGG7+tSzZ0+99NJL+vDDDxUcHKyOHTuqZ8+eWr58uQ4cOKCsrCyn76FDh3THHXdozJgxJ4zTqlUr598/fK0Azi/2yACos5SUFO3atUu7du1y2jZt2qTS0lJ16tTptNu6XC5169ZNEydO1Pr16xUSEqJ58+YpNjZWCQkJ+vzzz9WuXTufR3Jyss8Ya9ascf599OhRrVu3TikpKU5tBQUFMsY4fVavXq2mTZuqZcuWkqTMzEwdPHhQU6ZMcULL8SCzfPly9ezZ09n2iiuu0KZNm06oqV27dgoJCanbGwjgnLFHBkCdZWdnKzU1VUOGDNHUqVN19OhR3XXXXcrKyjrtIZbCwkItXbpUffr0UYsWLVRYWKh9+/Y5IWTixIkaM2aMIiMj1bdvX1VWVmrt2rU6cOCAcnNznXGmTZum9u3bKyUlRVOmTNGBAwd0yy23SJLuuusuTZ06VaNHj9aoUaNUVFSk8ePHKzc3V273d/8Pd8kll6hLly565ZVX9Nxzz0mSevTooeuvv17V1dU+e2TGjh2ra665RqNGjdKtt96qJk2aaNOmTVqyZImzLYALjyADoM5cLpfmz5+v0aNHq0ePHnK73erbt6+effbZ027n8Xi0YsUKTZ06VeXl5UpKStKf/vQn9evXT9J3lz03btxYTz31lB544AE1adJEqampJ9xheNKkSZo0aZI2bNigdu3a6X//93/VvHlzSdKll16qf//733rggQeUlpam6OhoDR8+XL/97W99xsjKytKGDRucvS/R0dHq1KmTSkpK1KFDB6dfly5dlJ+fr9/85jfKzMyUMUZt27bVDTfccI7vIoBz4TLf3+8KABbYsWOHkpOTtX79enXt2tXf5QDwI86RAQAA1iLIAAAAa3FoCQAAWIs9MgAAwFoEGQAAYC2CDAAAsBZBBgAAWIsgAwAArEWQAQAA1iLIAAAAaxFkAACAtQgyAADAWv8f8JLP2xEOD/oAAAAASUVORK5CYII=","text/plain":["
"]},"metadata":{},"output_type":"display_data"}],"source":["%matplotlib inline\n","import matplotlib as plt\n","from matplotlib import pyplot\n","plt.pyplot.hist(df[\"horsepower\"])\n","\n","# set x/y labels and plot title\n","plt.pyplot.xlabel(\"horsepower\")\n","plt.pyplot.ylabel(\"count\")\n","plt.pyplot.title(\"horsepower bins\")"]},{"cell_type":"markdown","metadata":{},"source":["
We would like 3 bins of equal size bandwidth so we use numpy's linspace(start_value, end_value, numbers_generated function.
\n","
Since we want to include the minimum value of horsepower, we want to set start_value = min(df[\"horsepower\"]).
\n","
Since we want to include the maximum value of horsepower, we want to set end_value = max(df[\"horsepower\"]).
\n","
Since we are building 3 bins of equal length, there should be 4 dividers, so numbers_generated = 4.
\n"]},{"cell_type":"markdown","metadata":{},"source":["We build a bin array with a minimum value to a maximum value by using the bandwidth calculated above. The values will determine when one bin ends and another begins.\n"]},{"cell_type":"code","execution_count":172,"metadata":{},"outputs":[{"data":{"text/plain":["array([ 48. , 119.33333333, 190.66666667, 262. ])"]},"execution_count":172,"metadata":{},"output_type":"execute_result"}],"source":["bin = np.linspace(min(df[\"horsepower\"]), max(df[\"horsepower\"]), 4)\n","bin"]},{"cell_type":"markdown","metadata":{},"source":["We set group names:\n"]},{"cell_type":"code","execution_count":173,"metadata":{},"outputs":[],"source":["group_names = list([\"Low\", \"Medium\", \"High\"])"]},{"cell_type":"markdown","metadata":{},"source":["We apply the function \"cut\" to determine what each value of `df['horsepower']` belongs to.\n"]},{"cell_type":"code","execution_count":178,"metadata":{},"outputs":[],"source":["df[\"horsepower-binned\"] = pd.cut(df[\"horsepower\"], bin, labels=group_names)\n"]},{"cell_type":"markdown","metadata":{},"source":["Let's see the number of vehicles in each bin:\n"]},{"cell_type":"code","execution_count":179,"metadata":{},"outputs":[{"data":{"text/plain":["Low 152\n","Medium 43\n","High 5\n","Name: horsepower-binned, dtype: int64"]},"execution_count":179,"metadata":{},"output_type":"execute_result"}],"source":["df[\"horsepower-binned\"].value_counts()"]},{"cell_type":"markdown","metadata":{},"source":["Let's plot the distribution of each bin:\n"]},{"cell_type":"code","execution_count":180,"metadata":{},"outputs":[{"data":{"text/plain":["Text(0.5, 1.0, 'horsepower bins')"]},"execution_count":180,"metadata":{},"output_type":"execute_result"},{"data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAjsAAAHHCAYAAABZbpmkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA4F0lEQVR4nO3deXxNd/7H8fdNQhJLErFkqYQUJZRQKk0tSck0lmmZGqqjrV0XSzWtrUMxpaELhiottc0wXQdTbZWxJKh9mxa1LylNoiWJpSLk+/vDw/n1Fq3GjXsdr+fjcR6PnO/5nu/9nOu0eed7zj3XYYwxAgAAsCkvdxcAAABQlAg7AADA1gg7AADA1gg7AADA1gg7AADA1gg7AADA1gg7AADA1gg7AADA1gg7AADA1gg7gA2NGDFCDodDP/zwg7tLwS/MmjVLDodDmzZt+s2+CQkJSkhIKPqiAJsj7AAAAFvzcXcBAICrW7JkibtLAGyBmR0AhWKM0U8//eTuMjzSmTNnXDJO8eLFVbx4cZeMBdzOCDuAjWVnZ6tLly4KCgpSYGCgunbtqrNnzzr1uXDhgl555RVVqVJFvr6+qly5sl566SXl5eU59atcubL++Mc/6ssvv1SDBg3k7++vd955R5K0dOlSNW7cWEFBQSpVqpSqV6+ul156yWn/vLw8DR8+XFWrVpWvr68iIiI0cODAK17H4XCoT58+mjt3rqpXry4/Pz/Vr19faWlpVxzf1q1b1bJlSwUEBKhUqVJq3ry51q1b53T83t7emjhxotX2ww8/yMvLS2XLlpUxxmp/5plnFBoa6jT++vXr1aJFCwUGBqpEiRKKj4/XmjVrnPpcvj9q586d+stf/qIyZcqocePG1/w3uezs2bN66qmnVLZsWQUEBOjJJ5/UyZMnnfr88p6dlStXyuFw6MMPP9To0aNVsWJF+fn5qXnz5tq3b5/Tvnv37lW7du0UGhoqPz8/VaxYUR07dlROTs5v1gbYDZexABvr0KGDoqKilJKSoi1btmj69OmqUKGCxo4da/Xp0aOHZs+erT//+c964YUXtH79eqWkpGjXrl2aP3++03i7d+/WY489pqeeeko9e/ZU9erVtWPHDv3xj39UnTp19Le//U2+vr7at2+fUygoKCjQww8/rNWrV6tXr16Kjo7W119/rfHjx2vPnj1asGCB0+ukpqbqgw8+UL9+/eTr66u3335bLVq00IYNG3T33XdLknbs2KEmTZooICBAAwcOVLFixfTOO+8oISFBqampio2NVVBQkO6++26lpaWpX79+kqTVq1fL4XDoxIkT2rlzp2rVqiVJWrVqlZo0aWLVsHz5crVs2VL169fX8OHD5eXlpZkzZ6pZs2ZatWqVGjZs6FRz+/btVa1aNb366qtOIepa+vTpo6CgII0YMUK7d+/WlClTdPjwYSvQ/JoxY8bIy8tLL774onJycvTaa6+pU6dOWr9+vSTp/PnzSkpKUl5envr27avQ0FAdPXpUixYtUnZ2tgIDA3+zPsBWDADbGT58uJFkunXr5tT+pz/9yZQtW9Za37Ztm5FkevTo4dTvxRdfNJLM8uXLrbZKlSoZSWbx4sVOfcePH28kmePHj1+znn/84x/Gy8vLrFq1yql96tSpRpJZs2aN1SbJSDKbNm2y2g4fPmz8/PzMn/70J6utbdu2pnjx4mb//v1W27Fjx0zp0qVN06ZNrbbevXubkJAQaz05Odk0bdrUVKhQwUyZMsUYY8yPP/5oHA6H+fvf/26MMaagoMBUq1bNJCUlmYKCAmvfs2fPmqioKPOHP/zBarv8Xj/22GPXPP6fmzlzppFk6tevb86fP2+1v/baa0aSWbhwodUWHx9v4uPjrfUVK1YYSSY6Otrk5eVZ7X//+9+NJPP1118bY4zZunWrkWQ++uij66oJsDsuYwE29vTTTzutN2nSRD/++KNyc3MlSZ9//rkkKTk52anfCy+8IEn67LPPnNqjoqKUlJTk1BYUFCRJWrhwoQoKCq5ax0cffaTo6GjVqFFDP/zwg7U0a9ZMkrRixQqn/nFxcapfv761HhkZqTZt2ujLL7/UxYsXdfHiRS1ZskRt27bVnXfeafULCwvTX/7yF61evdo6xiZNmigzM1O7d++WdGkGp2nTpmrSpIlWrVol6dJsjzHGmtnZtm2b9u7dq7/85S/68ccfrXrPnDmj5s2bKy0t7Ypj/eV7/Vt69eqlYsWKWevPPPOMfHx8rH+TX9O1a1ene3ku133gwAFJsmZuvvzyyysuWwK3I8IOYGORkZFO62XKlJEk696Qw4cPy8vLS1WrVnXqFxoaqqCgIB0+fNipPSoq6orXePTRR9WoUSP16NFDISEh6tixoz788EOnMLB3717t2LFD5cuXd1ruuusuSVJWVpbTmNWqVbvide666y6dPXtWx48f1/Hjx3X27FlVr179in7R0dEqKChQenq6pP8PAqtWrdKZM2e0detWNWnSRE2bNrXCzqpVqxQQEKCYmBirXknq3LnzFTVPnz5deXl5V9z7crX35tf88hhLlSqlsLAwHTp06Df3/a1/16ioKCUnJ2v69OkqV66ckpKSNHnyZO7XwW2Le3YAG/P29r5qu/nFPSW/dY/IZf7+/ldtS0tL04oVK/TZZ59p8eLF+uCDD9SsWTMtWbJE3t7eKigoUO3atTVu3LirjhsREXFdr18Y4eHhioqKUlpamipXrixjjOLi4lS+fHk999xzOnz4sFatWqX7779fXl6X/v67HNRef/111a1b96rjlipVymn9au9NUbmef9c333xTXbp00cKFC7VkyRL169dPKSkpWrdunSpWrHizSgU8AmEHuI1VqlRJBQUF2rt3r6Kjo632zMxMZWdnq1KlStc1jpeXl5o3b67mzZtr3LhxevXVV/XXv/5VK1asUGJioqpUqaLt27erefPm1xWsLs+s/NyePXtUokQJlS9fXpJUokQJ69LUz3377bfy8vJyClBNmjRRWlqaoqKiVLduXZUuXVoxMTEKDAzU4sWLtWXLFo0cOdLqX6VKFUlSQECAEhMTr+s9+L327t2rBx54wFo/ffq0vv/+e7Vq1cplr1G7dm3Vrl1bQ4cO1VdffaVGjRpp6tSpGjVqlMteA7gVcBkLuI1d/sU6YcIEp/bLMzCtW7f+zTFOnDhxRdvl2ZDLHyvv0KGDjh49qmnTpl3R96effrriuTRr167Vli1brPX09HQtXLhQDz74oLy9veXt7a0HH3xQCxcudLrsk5mZqXnz5qlx48YKCAiw2ps0aaJDhw7pgw8+sC5reXl56f7779e4ceOUn5/v9Ems+vXrq0qVKnrjjTd0+vTpK2o+fvz4b74vv+Xdd99Vfn6+tT5lyhRduHBBLVu2vOGxc3NzdeHCBae22rVry8vL64qP+gO3A2Z2gNtYTEyMOnfurHfffVfZ2dmKj4/Xhg0bNHv2bLVt29Zp5uFa/va3vyktLU2tW7dWpUqVlJWVpbffflsVK1a0njfzxBNP6MMPP9TTTz+tFStWqFGjRrp48aK+/fZbffjhh9azey67++67lZSU5PTRc0lOsy+jRo2ynu/z7LPPysfHR++8847y8vL02muvOdV4Ocjs3r1br776qtXetGlTffHFF/L19dW9995rtXt5eWn69Olq2bKlatWqpa5du+qOO+7Q0aNHtWLFCgUEBOjTTz8txDv+/86fP6/mzZurQ4cO2r17t95++201btxYDz/88A2NK1362HyfPn3Uvn173XXXXbpw4YL+8Y9/yNvbW+3atbvh8YFbDWEHuM1Nnz5dd955p2bNmqX58+crNDRUQ4YM0fDhw69r/4cffliHDh3SjBkz9MMPP6hcuXKKj4/XyJEjrU8FeXl5acGCBRo/frzmzJmj+fPnq0SJErrzzjv13HPPWTcqXxYfH6+4uDiNHDlSR44cUc2aNTVr1izVqVPH6lOrVi2tWrVKQ4YMUUpKigoKChQbG6t//vOfio2NdRqvevXqqlChgrKyspwe+Hc5BDVs2FC+vr5O+yQkJGjt2rV65ZVX9NZbb+n06dMKDQ1VbGysnnrqqet/g6/hrbfe0ty5c/Xyyy8rPz9fjz32mCZOnHjd90/9mpiYGCUlJenTTz/V0aNHVaJECcXExOiLL77Qfffdd8PjA7cah/nlnYoA4EYOh0O9e/fWW2+95e5SANgE9+wAAABbI+wAAABbI+wAAABb4wZlAB6F2wgBuBozOwAAwNYIOwAAwNa4jKVL34Nz7NgxlS5d2iXPuAAAAEXPGKNTp04pPDzc+m67qyHsSDp27FiRfhEhAAAoOunp6b/6BbeEHUmlS5eWdOnN+vn36QAAAM+Vm5uriIgI6/f4tRB2JOvSVUBAAGEHAIBbzG/dgsINygAAwNYIOwAAwNYIOwAAwNYIOwAAwNYIOwAAwNYIOwAAwNYIOwAAwNYIOwAAwNYIOwAAwNYIOwAAwNYIOwAAwNYIOwAAwNYIOwAAwNYIOwAAwNYIOwAAwNZ83F2A3VUe/Jm7S4CbHRrT2t0lAMBtjZkdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga24NO2lpaXrooYcUHh4uh8OhBQsWXLPv008/LYfDoQkTJji1nzhxQp06dVJAQICCgoLUvXt3nT59umgLBwAAtwy3hp0zZ84oJiZGkydP/tV+8+fP17p16xQeHn7Ftk6dOmnHjh1aunSpFi1apLS0NPXq1auoSgYAALcYH3e+eMuWLdWyZctf7XP06FH17dtXX375pVq3bu20bdeuXVq8eLE2btyoBg0aSJImTZqkVq1a6Y033rhqOAIAALcXj75np6CgQE888YQGDBigWrVqXbF97dq1CgoKsoKOJCUmJsrLy0vr16+/maUCAAAP5daZnd8yduxY+fj4qF+/flfdnpGRoQoVKji1+fj4KDg4WBkZGdccNy8vT3l5edZ6bm6uawoGAAAex2NndjZv3qy///3vmjVrlhwOh0vHTklJUWBgoLVERES4dHwAAOA5PDbsrFq1SllZWYqMjJSPj498fHx0+PBhvfDCC6pcubIkKTQ0VFlZWU77XbhwQSdOnFBoaOg1xx4yZIhycnKsJT09vSgPBQAAuJHHXsZ64oknlJiY6NSWlJSkJ554Ql27dpUkxcXFKTs7W5s3b1b9+vUlScuXL1dBQYFiY2OvObavr698fX2LrngAAOAx3Bp2Tp8+rX379lnrBw8e1LZt2xQcHKzIyEiVLVvWqX+xYsUUGhqq6tWrS5Kio6PVokUL9ezZU1OnTlV+fr769Omjjh078kksAAAgyc2XsTZt2qR69eqpXr16kqTk5GTVq1dPL7/88nWPMXfuXNWoUUPNmzdXq1at1LhxY7377rtFVTIAALjFuHVmJyEhQcaY6+5/6NChK9qCg4M1b948F1YFAADsxGNvUAYAAHAFwg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1wg4AALA1t4adtLQ0PfTQQwoPD5fD4dCCBQusbfn5+Ro0aJBq166tkiVLKjw8XE8++aSOHTvmNMaJEyfUqVMnBQQEKCgoSN27d9fp06dv8pEAAABP5dawc+bMGcXExGjy5MlXbDt79qy2bNmiYcOGacuWLfr3v/+t3bt36+GHH3bq16lTJ+3YsUNLly7VokWLlJaWpl69et2sQwAAAB7OYYwx7i5CkhwOh+bPn6+2bdtes8/GjRvVsGFDHT58WJGRkdq1a5dq1qypjRs3qkGDBpKkxYsXq1WrVvruu+8UHh5+Xa+dm5urwMBA5eTkKCAgwBWHY6k8+DOXjodbz6Exrd1dAgDY0vX+/r6l7tnJycmRw+FQUFCQJGnt2rUKCgqygo4kJSYmysvLS+vXr7/mOHl5ecrNzXVaAACAPd0yYefcuXMaNGiQHnvsMSu9ZWRkqEKFCk79fHx8FBwcrIyMjGuOlZKSosDAQGuJiIgo0toBAID73BJhJz8/Xx06dJAxRlOmTLnh8YYMGaKcnBxrSU9Pd0GVAADAE/m4u4DfcjnoHD58WMuXL3e6JhcaGqqsrCyn/hcuXNCJEycUGhp6zTF9fX3l6+tbZDUDAADP4dEzO5eDzt69e/Xf//5XZcuWddoeFxen7Oxsbd682Wpbvny5CgoKFBsbe7PLBQAAHsitMzunT5/Wvn37rPWDBw9q27ZtCg4OVlhYmP785z9ry5YtWrRokS5evGjdhxMcHKzixYsrOjpaLVq0UM+ePTV16lTl5+erT58+6tix43V/EgsAANibW8POpk2b9MADD1jrycnJkqTOnTtrxIgR+s9//iNJqlu3rtN+K1asUEJCgiRp7ty56tOnj5o3by4vLy+1a9dOEydOvCn1AwAAz+fWsJOQkKBfe8zP9TwCKDg4WPPmzXNlWQAAwEY8+p4dAACAG0XYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAQAAtubWsJOWlqaHHnpI4eHhcjgcWrBggdN2Y4xefvllhYWFyd/fX4mJidq7d69TnxMnTqhTp04KCAhQUFCQunfvrtOnT9/EowAAAJ7MrWHnzJkziomJ0eTJk6+6/bXXXtPEiRM1depUrV+/XiVLllRSUpLOnTtn9enUqZN27NihpUuXatGiRUpLS1OvXr1u1iEAAAAP5+POF2/ZsqVatmx51W3GGE2YMEFDhw5VmzZtJElz5sxRSEiIFixYoI4dO2rXrl1avHixNm7cqAYNGkiSJk2apFatWumNN95QeHj4TTsWAADgmTz2np2DBw8qIyNDiYmJVltgYKBiY2O1du1aSdLatWsVFBRkBR1JSkxMlJeXl9avX3/NsfPy8pSbm+u0AAAAe/LYsJORkSFJCgkJcWoPCQmxtmVkZKhChQpO2318fBQcHGz1uZqUlBQFBgZaS0REhIurBwAAnsJjw05RGjJkiHJycqwlPT3d3SUBAIAi4rFhJzQ0VJKUmZnp1J6ZmWltCw0NVVZWltP2Cxcu6MSJE1afq/H19VVAQIDTAgAA7Mljw05UVJRCQ0O1bNkyqy03N1fr169XXFycJCkuLk7Z2dnavHmz1Wf58uUqKChQbGzsTa8ZAAB4Hrd+Guv06dPat2+ftX7w4EFt27ZNwcHBioyMVP/+/TVq1ChVq1ZNUVFRGjZsmMLDw9W2bVtJUnR0tFq0aKGePXtq6tSpys/PV58+fdSxY0c+iQUAACS5Oexs2rRJDzzwgLWenJwsSercubNmzZqlgQMH6syZM+rVq5eys7PVuHFjLV68WH5+ftY+c+fOVZ8+fdS8eXN5eXmpXbt2mjhx4k0/FgAA4Jkcxhjj7iLcLTc3V4GBgcrJyXH5/TuVB3/m0vFw6zk0prW7SwAAW7re398ee88OAACAKxB2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRUq7DRr1kzZ2dlXtOfm5qpZs2Y3WhMAAIDLFCrsrFy5UufPn7+i/dy5c1q1atUNFwUAAOAqPr+n8//+9z/r5507dyojI8Nav3jxohYvXqw77rjDddUBAADcoN8VdurWrSuHwyGHw3HVy1X+/v6aNGmSy4oDAAC4Ub8r7Bw8eFDGGN15553asGGDypcvb20rXry4KlSoIG9vb5cXCQAAUFi/K+xUqlRJklRQUFAkxQAAALja7wo7P7d3716tWLFCWVlZV4Sfl19++YYLAwAAcIVChZ1p06bpmWeeUbly5RQaGiqHw2FtczgchB0AAOAxChV2Ro0apdGjR2vQoEGurgcAAMClCvWcnZMnT6p9+/aurgUAAMDlChV22rdvryVLlri6FgAAAJcr1GWsqlWratiwYVq3bp1q166tYsWKOW3v16+fS4oDAAC4UQ5jjPm9O0VFRV17QIdDBw4cuKGiLrt48aJGjBihf/7zn8rIyFB4eLi6dOmioUOHWjdFG2M0fPhwTZs2TdnZ2WrUqJGmTJmiatWqXffr5ObmKjAwUDk5OQoICHBJ7ZdVHvyZS8fDrefQmNbuLgEAbOl6f38Xambn4MGDhS7s9xg7dqymTJmi2bNnq1atWtq0aZO6du2qwMBAa/botdde08SJEzV79mxFRUVp2LBhSkpK0s6dO+Xn53dT6gQAAJ6r0M/ZuRm++uortWnTRq1bX/rLuHLlyvrXv/6lDRs2SLo0qzNhwgQNHTpUbdq0kSTNmTNHISEhWrBggTp27Oi22gEAgGcoVNjp1q3br26fMWNGoYr5pfvvv1/vvvuu9uzZo7vuukvbt2/X6tWrNW7cOEmXZpgyMjKUmJho7RMYGKjY2FitXbv2mmEnLy9PeXl51npubq5L6gUAAJ6nUGHn5MmTTuv5+fn65ptvlJ2dfdUvCC2swYMHKzc3VzVq1JC3t7cuXryo0aNHq1OnTpJkfet6SEiI034hISFO38j+SykpKRo5cqTL6gQAAJ6rUGFn/vz5V7QVFBTomWeeUZUqVW64qMs+/PBDzZ07V/PmzVOtWrW0bds29e/fX+Hh4ercuXOhxx0yZIiSk5Ot9dzcXEVERLiiZAAA4GFcds+Ol5eXkpOTlZCQoIEDB7pkzAEDBmjw4MHW5ajatWvr8OHDSklJUefOnRUaGipJyszMVFhYmLVfZmam6tate81xfX195evr65IaAQCAZyvUQwWvZf/+/bpw4YLLxjt79qy8vJxL9Pb2tr54NCoqSqGhoVq2bJm1PTc3V+vXr1dcXJzL6gAAALeuQs3s/PwSkHTpU1Hff/+9Pvvssxu6vPRLDz30kEaPHq3IyEjVqlVLW7du1bhx46wbpB0Oh/r3769Ro0apWrVq1kfPw8PD1bZtW5fVAQAAbl2FCjtbt251Wvfy8lL58uX15ptv/uYntX6PSZMmadiwYXr22WeVlZWl8PBwPfXUU07fqj5w4ECdOXNGvXr1UnZ2tho3bqzFixfzjB0AACCpkE9QthueoIyixBOUAaBoFOkTlC87fvy4du/eLUmqXr26ypcvfyPDAQAAuFyhblA+c+aMunXrprCwMDVt2lRNmzZVeHi4unfvrrNnz7q6RgAAgEIrVNhJTk5WamqqPv30U2VnZys7O1sLFy5UamqqXnjhBVfXCAAAUGiFuoz1ySef6OOPP1ZCQoLV1qpVK/n7+6tDhw6aMmWKq+oDAAC4IYWa2Tl79uwVX9EgSRUqVOAyFgAA8CiFCjtxcXEaPny4zp07Z7X99NNPGjlyJA/zAwAAHqVQl7EmTJigFi1aqGLFioqJiZEkbd++Xb6+vlqyZIlLCwQAALgRhQo7tWvX1t69ezV37lx9++23kqTHHntMnTp1kr+/v0sLBAAAuBGFCjspKSkKCQlRz549ndpnzJih48ePa9CgQS4pDgAA4EYV6p6dd955RzVq1LiivVatWpo6deoNFwUAAOAqhQo7GRkZCgsLu6K9fPny+v7772+4KAAAAFcpVNiJiIjQmjVrrmhfs2aNwsPDb7goAAAAVynUPTs9e/ZU//79lZ+fr2bNmkmSli1bpoEDB/IEZQAA4FEKFXYGDBigH3/8Uc8++6zOnz8vSfLz89OgQYM0ZMgQlxYIAABwIwoVdhwOh8aOHathw4Zp165d8vf3V7Vq1eTr6+vq+gAAAG5IocLOZaVKldK9997rqloAAABcrlA3KAMAANwqCDsAAMDWCDsAAMDWCDsAAMDWCDsAAMDWCDsAAMDWCDsAAMDWCDsAAMDWCDsAAMDWCDsAAMDWCDsAAMDWCDsAAMDWCDsAAMDWCDsAAMDWCDsAAMDWCDsAAMDWCDsAAMDWCDsAAMDWCDsAAMDWPD7sHD16VI8//rjKli0rf39/1a5dW5s2bbK2G2P08ssvKywsTP7+/kpMTNTevXvdWDEAAPAkHh12Tp48qUaNGqlYsWL64osvtHPnTr355psqU6aM1ee1117TxIkTNXXqVK1fv14lS5ZUUlKSzp0758bKAQCAp/BxdwG/ZuzYsYqIiNDMmTOttqioKOtnY4wmTJigoUOHqk2bNpKkOXPmKCQkRAsWLFDHjh1ves0AAMCzePTMzn/+8x81aNBA7du3V4UKFVSvXj1NmzbN2n7w4EFlZGQoMTHRagsMDFRsbKzWrl3rjpIBAICH8eiwc+DAAU2ZMkXVqlXTl19+qWeeeUb9+vXT7NmzJUkZGRmSpJCQEKf9QkJCrG1Xk5eXp9zcXKcFAADYk0dfxiooKFCDBg306quvSpLq1aunb775RlOnTlXnzp0LPW5KSopGjhzpqjIBAIAH8+iZnbCwMNWsWdOpLTo6WkeOHJEkhYaGSpIyMzOd+mRmZlrbrmbIkCHKycmxlvT0dBdXDgAAPIVHh51GjRpp9+7dTm179uxRpUqVJF26WTk0NFTLli2ztufm5mr9+vWKi4u75ri+vr4KCAhwWgAAgD159GWs559/Xvfff79effVVdejQQRs2bNC7776rd999V5LkcDjUv39/jRo1StWqVVNUVJSGDRum8PBwtW3b1r3FAwAAj+DRYefee+/V/PnzNWTIEP3tb39TVFSUJkyYoE6dOll9Bg4cqDNnzqhXr17Kzs5W48aNtXjxYvn5+bmxcgAA4Ckcxhjj7iLcLTc3V4GBgcrJyXH5Ja3Kgz9z6Xi49Rwa09rdJQCALV3v72+PvmcHAADgRhF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArRF2AACArfm4uwAARavy4M/cXQLc7NCY1u4uAXArZnYAAICtEXYAAICt3VJhZ8yYMXI4HOrfv7/Vdu7cOfXu3Vtly5ZVqVKl1K5dO2VmZrqvSAAA4FFumbCzceNGvfPOO6pTp45T+/PPP69PP/1UH330kVJTU3Xs2DE98sgjbqoSAAB4mlsi7Jw+fVqdOnXStGnTVKZMGas9JydH7733nsaNG6dmzZqpfv36mjlzpr766iutW7fOjRUDAABPcUuEnd69e6t169ZKTEx0at+8ebPy8/Od2mvUqKHIyEitXbv2muPl5eUpNzfXaQEAAPbk8R89f//997VlyxZt3Ljxim0ZGRkqXry4goKCnNpDQkKUkZFxzTFTUlI0cuRIV5cKAAA8kEfP7KSnp+u5557T3Llz5efn57JxhwwZopycHGtJT0932dgAAMCzeHTY2bx5s7KysnTPPffIx8dHPj4+Sk1N1cSJE+Xj46OQkBCdP39e2dnZTvtlZmYqNDT0muP6+voqICDAaQEAAPbk0Zexmjdvrq+//tqprWvXrqpRo4YGDRqkiIgIFStWTMuWLVO7du0kSbt379aRI0cUFxfnjpIBAICH8eiwU7p0ad19991ObSVLllTZsmWt9u7duys5OVnBwcEKCAhQ3759FRcXp/vuu88dJQMAAA/j0WHneowfP15eXl5q166d8vLylJSUpLffftvdZQEAAA9xy4WdlStXOq37+flp8uTJmjx5snsKAgAAHs2jb1AGAAC4UYQdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABgax4ddlJSUnTvvfeqdOnSqlChgtq2bavdu3c79Tl37px69+6tsmXLqlSpUmrXrp0yMzPdVDEAAPA0Hh12UlNT1bt3b61bt05Lly5Vfn6+HnzwQZ05c8bq8/zzz+vTTz/VRx99pNTUVB07dkyPPPKIG6sGAACexMfdBfyaxYsXO63PmjVLFSpU0ObNm9W0aVPl5OTovffe07x589SsWTNJ0syZMxUdHa1169bpvvvuc0fZAADAg3j0zM4v5eTkSJKCg4MlSZs3b1Z+fr4SExOtPjVq1FBkZKTWrl17zXHy8vKUm5vrtAAAAHu6ZcJOQUGB+vfvr0aNGunuu++WJGVkZKh48eIKCgpy6hsSEqKMjIxrjpWSkqLAwEBriYiIKMrSAQCAG90yYad379765ptv9P7779/wWEOGDFFOTo61pKenu6BCAADgiTz6np3L+vTpo0WLFiktLU0VK1a02kNDQ3X+/HllZ2c7ze5kZmYqNDT0muP5+vrK19e3KEsGAAAewqNndowx6tOnj+bPn6/ly5crKirKaXv9+vVVrFgxLVu2zGrbvXu3jhw5ori4uJtdLgAA8EAePbPTu3dvzZs3TwsXLlTp0qWt+3ACAwPl7++vwMBAde/eXcnJyQoODlZAQID69u2ruLg4PokFAAAkeXjYmTJliiQpISHBqX3mzJnq0qWLJGn8+PHy8vJSu3btlJeXp6SkJL399ts3uVIAAOCpPDrsGGN+s4+fn58mT56syZMn34SKAADArcaj79kBAAC4UYQdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABga4QdAABgaz7uLgAAYG+VB3/m7hLgZofGtHbr6zOzAwAAbI2wAwAAbI2wAwAAbI2wAwAAbI2wAwAAbI2wAwAAbM02YWfy5MmqXLmy/Pz8FBsbqw0bNri7JAAA4AFsEXY++OADJScna/jw4dqyZYtiYmKUlJSkrKwsd5cGAADczBZhZ9y4cerZs6e6du2qmjVraurUqSpRooRmzJjh7tIAAICb3fJh5/z589q8ebMSExOtNi8vLyUmJmrt2rVurAwAAHiCW/7rIn744QddvHhRISEhTu0hISH69ttvr7pPXl6e8vLyrPWcnBxJUm5ursvrK8g76/IxcWspivPq9+AcBOcg3K2ozsHL4xpjfrXfLR92CiMlJUUjR468oj0iIsIN1cDuAie4uwLc7jgH4W5FfQ6eOnVKgYGB19x+y4edcuXKydvbW5mZmU7tmZmZCg0Nveo+Q4YMUXJysrVeUFCgEydOqGzZsnI4HEVa7+0mNzdXERERSk9PV0BAgLvLwW2IcxDuxjlYdIwxOnXqlMLDw3+13y0fdooXL6769etr2bJlatu2raRL4WXZsmXq06fPVffx9fWVr6+vU1tQUFARV3p7CwgI4D9yuBXnINyNc7Bo/NqMzmW3fNiRpOTkZHXu3FkNGjRQw4YNNWHCBJ05c0Zdu3Z1d2kAAMDNbBF2Hn30UR0/flwvv/yyMjIyVLduXS1evPiKm5YBAMDtxxZhR5L69OlzzctWcB9fX18NHz78isuGwM3COQh34xx0P4f5rc9rAQAA3MJu+YcKAgAA/BrCDgAAsDXCDgAAsDXCDgBbWrlypRwOh7KzsyVJs2bN4nlaKHKFOc+6dOliPScORYOwg9+F/yjhKl26dJHD4dDTTz99xbbevXvL4XCoS5cuLnu9Rx99VHv27HHZeLj9XOv/fz8P1pxnnomwA8BtIiIi9P777+unn36y2s6dO6d58+YpMjLSpa/l7++vChUquHRM4Jc4zzwTYQcuk5qaqoYNG8rX11dhYWEaPHiwLly4IElatGiRgoKCdPHiRUnStm3b5HA4NHjwYGv/Hj166PHHH3dL7XCPe+65RxEREfr3v/9ttf373/9WZGSk6tWrZ7UVFBQoJSVFUVFR8vf3V0xMjD7++GOnsT7//HPddddd8vf31wMPPKBDhw45bf/l5YWr/ZXev39/JSQkWOsJCQnq27ev+vfvrzJlyigkJETTpk2zntBeunRpVa1aVV988cUNvxewh6tdxho1apQqVKig0qVLq0ePHho8eLDq1q17xb5vvPGGwsLCVLZsWfXu3Vv5+fk3p+jbAGEHLnH06FG1atVK9957r7Zv364pU6bovffe06hRoyRJTZo00alTp7R161ZJl4JRuXLltHLlSmuM1NRUp180uD1069ZNM2fOtNZnzJhxxVe9pKSkaM6cOZo6dap27Nih559/Xo8//rhSU1MlSenp6XrkkUf00EMPadu2bdYvFFeYPXu2ypUrpw0bNqhv37565pln1L59e91///3asmWLHnzwQT3xxBM6e/asS14P9jJ37lyNHj1aY8eO1ebNmxUZGakpU6Zc0W/FihXav3+/VqxYodmzZ2vWrFmaNWvWzS/YrgzwO3Tu3Nm0adPmivaXXnrJVK9e3RQUFFhtkydPNqVKlTIXL140xhhzzz33mNdff90YY0zbtm3N6NGjTfHixc2pU6fMd999ZySZPXv23JTjgPtdPpeysrKMr6+vOXTokDl06JDx8/Mzx48fN23atDGdO3c2586dMyVKlDBfffWV0/7du3c3jz32mDHGmCFDhpiaNWs6bR80aJCRZE6ePGmMMWbmzJkmMDDwitf/ueeee87Ex8db6/Hx8aZx48bW+oULF0zJkiXNE088YbV9//33RpJZu3btDbwbuBV07tzZeHt7m5IlSzotfn5+1rn2y/MsNjbW9O7d22mcRo0amZiYGKdxK1WqZC5cuGC1tW/f3jz66KNFfUi3DWZ24BK7du1SXFycHA6H1daoUSOdPn1a3333nSQpPj5eK1eulDFGq1at0iOPPKLo6GitXr1aqampCg8PV7Vq1dx1CHCT8uXLq3Xr1po1a5Zmzpyp1q1bq1y5ctb2ffv26ezZs/rDH/6gUqVKWcucOXO0f/9+SZfOv9jYWKdx4+LiXFJfnTp1rJ+9vb1VtmxZ1a5d22q7/B18WVlZLnk9eLYHHnhA27Ztc1qmT59+zf67d+9Ww4YNndp+uS5JtWrVkre3t7UeFhbGOeVCtvluLHi+hIQEzZgxQ9u3b1exYsVUo0YNJSQkaOXKlTp58qTi4+PdXSLcpFu3btZ3202ePNlp2+nTpyVJn332me644w6nbTfyXUNeXl4yv/i2nKvdI1GsWDGndYfD4dR2OeAXFBQUuhbcOkqWLKmqVas6tV3+g+5GXO0845xyHWZ24BLR0dFau3at0y+PNWvWqHTp0qpYsaKk/79vZ/z48VawuRx2Vq5cyf06t7EWLVro/Pnzys/PV1JSktO2mjVrytfXV0eOHFHVqlWdloiICEmXzr8NGzY47bdu3bpffc3y5cvr+++/d2rbtm3bjR8M8DPVq1fXxo0bndp+uY6iR9jB75aTk3PFNG6vXr2Unp6uvn376ttvv9XChQs1fPhwJScny8vr0mlWpkwZ1alTR3PnzrWCTdOmTbVlyxbt2bOHmZ3bmLe3t3bt2qWdO3c6TeVLUunSpfXiiy/q+eef1+zZs7V//35t2bJFkyZN0uzZsyVJTz/9tPbu3asBAwZo9+7dmjdv3m/e3NmsWTNt2rRJc+bM0d69ezV8+HB98803RXWIuE317dtX7733nmbPnq29e/dq1KhR+t///ud0yR9Fj8tY+N1Wrlzp9LFgSerevbs+//xzDRgwQDExMQoODlb37t01dOhQp37x8fHatm2bFXaCg4NVs2ZNZWZmqnr16jfrEOCBAgICrrntlVdeUfny5ZWSkqIDBw4oKChI99xzj1566SVJUmRkpD755BM9//zzmjRpkho2bKhXX31V3bp1u+aYSUlJGjZsmAYOHKhz586pW7duevLJJ/X111+7/Nhw++rUqZMOHDigF198UefOnVOHDh3UpUuXK2YiUbQc5pcXrQEAQJH5wx/+oNDQUP3jH/9wdym3DWZ2AAAoImfPntXUqVOVlJQkb29v/etf/9J///tfLV261N2l3VaY2QEAoIj89NNPeuihh7R161adO3dO1atX19ChQ/XII4+4u7TbCmEHAADYGp/GAgAAtkbYAQAAtkbYAQAAtkbYAQAAtkbYAXDDEhIS1L9/f3eXAQBXRdgBAAC2RtgB4HHOnz/v7hJumtvpWAF3IewAcImCggINHDhQwcHBCg0N1YgRI6xtR44cUZs2bVSqVCkFBASoQ4cOyszMtLaPGDFCdevW1fTp0xUVFSU/Pz9J0scff6zatWvL399fZcuWVWJios6cOWPtN336dEVHR8vPz081atTQ22+/bW07dOiQHA6H3n//fd1///3y8/PT3XffrdTUVKe6U1NT1bBhQ/n6+iosLEyDBw/WhQsXJEmLFi1SUFCQLl68KOnSt6I7HA4NHjzY2r9Hjx56/PHHrfXVq1erSZMm8vf3V0REhPr16+dUc+XKlfXKK6/oySefVEBAgHr16nUjbzuA62EA4AbFx8ebgIAAM2LECLNnzx4ze/Zs43A4zJIlS8zFixdN3bp1TePGjc2mTZvMunXrTP369U18fLy1//Dhw03JkiVNixYtzJYtW8z27dvNsWPHjI+Pjxk3bpw5ePCg+d///mcmT55sTp06ZYwx5p///KcJCwszn3zyiTlw4ID55JNPTHBwsJk1a5YxxpiDBw8aSaZixYrm448/Njt37jQ9evQwpUuXNj/88IMxxpjvvvvOlChRwjz77LNm165dZv78+aZcuXJm+PDhxhhjsrOzjZeXl9m4caMxxpgJEyaYcuXKmdjYWKv2qlWrmmnTphljjNm3b58pWbKkGT9+vNmzZ49Zs2aNqVevnunSpYvVv1KlSiYgIMC88cYbZt++fWbfvn1F9u8C4BLCDoAbFh8fbxo3buzUdu+995pBgwaZJUuWGG9vb3PkyBFr244dO4wks2HDBmPMpbBTrFgxk5WVZfXZvHmzkWQOHTp01desUqWKmTdvnlPbK6+8YuLi4owx/x92xowZY23Pz883FStWNGPHjjXGGPPSSy+Z6tWrm4KCAqvP5MmTTalSpczFixeNMcbcc8895vXXXzfGGNO2bVszevRoU7x4cXPq1Cnz3XffGUlmz549xhhjunfvbnr16uVU06pVq4yXl5f56aefjDGXwk7btm1/9f0E4FpcxgLgEnXq1HFaDwsLU1ZWlnbt2qWIiAhFRERY22rWrKmgoCDt2rXLaqtUqZLKly9vrcfExKh58+aqXbu22rdvr2nTpunkyZOSpDNnzmj//v3q3r27SpUqZS2jRo3S/v37neqIi4uzfvbx8VGDBg2s1921a5fi4uLkcDisPo0aNdLp06f13XffSZLi4+O1cuVKGWO0atUqPfLII4qOjtbq1auVmpqq8PBwVatWTZK0fft2zZo1y6mmpKQkFRQU6ODBg9ZrNGjQoHBvMoBC4VvPAbhEsWLFnNYdDocKCgque/+SJUs6rXt7e2vp0qX66quvtGTJEk2aNEl//etftX79epUoUUKSNG3aNMXGxl6xnyslJCRoxowZ2r59u4oVK6YaNWooISFBK1eu1MmTJxUfH2/1PX36tJ566in169fvinEiIyOtn395rACKFjM7AIpUdHS00tPTlZ6ebrXt3LlT2dnZqlmz5q/u63A41KhRI40cOVJbt25V8eLFNX/+fIWEhCg8PFwHDhxQ1apVnZaoqCinMdatW2f9fOHCBW3evFnR0dFWbWvXrpX52fchr1mzRqVLl1bFihUlSU2aNNGpU6c0fvx4K9hcDjsrV65UQkKCte8999yjnTt3XlFT1apVVbx48cK9gQBuGDM7AIpUYmKiateurU6dOmnChAm6cOGCnn32WcXHx//q5Zz169dr2bJlevDBB1WhQgWtX79ex48ft4LKyJEj1a9fPwUGBqpFixbKy8vTpk2bdPLkSSUnJ1vjTJ48WdWqVVN0dLTGjx+vkydPqlu3bpKkZ599VhMmTFDfvn3Vp08f7d69W8OHD1dycrK8vC79LVimTBnVqVNHc+fO1VtvvSVJatq0qTp06KD8/HynmZ1BgwbpvvvuU58+fdSjRw+VLFlSO3fu1NKlS619Adx8hB0ARcrhcGjhwoXq27evmjZtKi8vL7Vo0UKTJk361f0CAgKUlpamCRMmKDc3V5UqVdKbb76pli1bSrr0ke8SJUro9ddf14ABA1SyZEnVrl37iic5jxkzRmPGjNG2bdtUtWpV/ec//1G5cuUkSXfccYc+//xzDRgwQDExMQoODlb37t01dOhQpzHi4+O1bds2axYnODhYNWvWVGZmpqpXr271q1OnjlJTU/XXv/5VTZo0kTFGVapU0aOPPnqD7yKAG+EwP5+/BQCbOHTokKKiorR161bVrVvX3eUAcCPu2QEAALZG2AEAALbGZSwAAGBrzOwAAABbI+wAAABbI+wAAABbI+wAAABbI+wAAABbI+wAAABbI+wAAABbI+wAAABbI+wAAABb+z/vSpmJe6d7NQAAAABJRU5ErkJggg==","text/plain":["
"]},"metadata":{},"output_type":"display_data"}],"source":["%matplotlib inline\n","import matplotlib as plt\n","from matplotlib import pyplot\n","pyplot.bar(group_names, df[\"horsepower-binned\"].value_counts())\n","\n","# set x/y labels and plot title\n","plt.pyplot.xlabel(\"horsepower\")\n","plt.pyplot.ylabel(\"count\")\n","plt.pyplot.title(\"horsepower bins\")"]},{"cell_type":"markdown","metadata":{},"source":["
\n"," Look at the dataframe above carefully. You will find that the last column provides the bins for \"horsepower\" based on 3 categories (\"Low\", \"Medium\" and \"High\"). \n","
\n","
\n"," We successfully narrowed down the intervals from 59 to 3!\n","
\n","Normally, a histogram is used to visualize the distribution of bins we created above. \n"]},{"cell_type":"code","execution_count":184,"metadata":{},"outputs":[{"data":{"text/plain":["Text(0.5, 1.0, 'horsepower bins')"]},"execution_count":184,"metadata":{},"output_type":"execute_result"},{"data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAjIAAAHHCAYAAACle7JuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAxgklEQVR4nO3deXhUVZ7G8bcSIAlLJWzZJEBYZA2ggDECCUMyBJpuQRlwoacREZRFxLgAKpuiAVSgURaFFrAbF9BBWlGEYQmLIQgCKmBYZBNIQCQJi4RAzvzhwx1L9hConPD9PE89T+655576VR1SeTn3VpXLGGMEAABgIR9vFwAAAFBQBBkAAGAtggwAALAWQQYAAFiLIAMAAKxFkAEAANYiyAAAAGsRZAAAgLUIMgAAwFoEGcAyI0aMkMvl0s8//+ztUvAHM2fOlMvl0rp16y7bt3Xr1mrduvX1Lwoo5ggyAADAWiW8XQAA3IwWLVrk7RKAYoEVGQDnMcbo119/9XYZRdKJEycKZZxSpUqpVKlShTIWcDMjyACWysrK0kMPPaSgoCAFBgaqR48eOnnypEefM2fO6KWXXlLNmjXl5+en6tWr67nnnlNubq5Hv+rVq+vPf/6zvvzySzVr1kwBAQF66623JEmLFy9Wy5YtFRQUpLJly6pOnTp67rnnPI7Pzc3V8OHDVatWLfn5+SkiIkLPPvvseffjcrnUv39/zZ49W3Xq1JG/v7+aNm2qFStWnPf4NmzYoPbt28vtdqts2bKKj4/XmjVrPB6/r6+vJk6c6LT9/PPP8vHxUcWKFWWMcdr79Omj0NBQj/HT0tLUrl07BQYGqnTp0oqLi9Pq1as9+py7HmnLli168MEHVb58ebVs2fKic3LOyZMn9eijj6pixYpyu93629/+pqNHj3r0+eM1MsuXL5fL5dKcOXP08ssvq0qVKvL391d8fLx27Njhcez27dvVuXNnhYaGyt/fX1WqVNH999+v7Ozsy9YGFDecWgIs1bVrV0VGRio5OVnffPONpk+fruDgYI0ZM8bp88gjj2jWrFn6r//6Lz311FNKS0tTcnKytm7dqnnz5nmMl56ergceeECPPvqoevXqpTp16mjz5s3685//rEaNGunFF1+Un5+fduzY4fEHPz8/X3fffbdWrVql3r17q169evruu+80fvx4bdu2TZ988onH/aSkpOjDDz/UgAED5Ofnp8mTJ6tdu3Zau3atGjZsKEnavHmzWrVqJbfbrWeffVYlS5bUW2+9pdatWyslJUXR0dEKCgpSw4YNtWLFCg0YMECStGrVKrlcLv3yyy/asmWLGjRoIElauXKlWrVq5dSwdOlStW/fXk2bNtXw4cPl4+OjGTNmqE2bNlq5cqXuuOMOj5q7dOmi2rVr65VXXvEISBfTv39/BQUFacSIEUpPT9eUKVO0Z88eJ6xcyujRo+Xj46Onn35a2dnZGjt2rLp166a0tDRJ0unTp5WYmKjc3Fw9/vjjCg0N1f79+/XZZ58pKytLgYGBl60PKFYMAKsMHz7cSDIPP/ywR/s999xjKlas6Gxv3LjRSDKPPPKIR7+nn37aSDJLly512qpVq2YkmYULF3r0HT9+vJFkDh8+fNF6/vnPfxofHx+zcuVKj/apU6caSWb16tVOmyQjyaxbt85p27Nnj/H39zf33HOP09apUydTqlQps3PnTqftwIEDply5ciY2NtZp69evnwkJCXG2k5KSTGxsrAkODjZTpkwxxhhz5MgR43K5zN///ndjjDH5+fmmdu3aJjEx0eTn5zvHnjx50kRGRpr//M//dNrOPdcPPPDARR//782YMcNIMk2bNjWnT5922seOHWskmfnz5zttcXFxJi4uztletmyZkWTq1atncnNznfa///3vRpL57rvvjDHGbNiwwUgyc+fOvaKagOKOU0uApR577DGP7VatWunIkSPKycmRJH3++eeSpKSkJI9+Tz31lCRpwYIFHu2RkZFKTEz0aAsKCpIkzZ8/X/n5+ResY+7cuapXr57q1q2rn3/+2bm1adNGkrRs2TKP/jExMWratKmzXbVqVXXs2FFffvmlzp49q7Nnz2rRokXq1KmTatSo4fQLCwvTgw8+qFWrVjmPsVWrVsrMzFR6erqk31ZeYmNj1apVK61cuVLSb6s0xhhnRWbjxo3avn27HnzwQR05csSp98SJE4qPj9eKFSvOe6x/fK4vp3fv3ipZsqSz3adPH5UoUcKZk0vp0aOHx7Uz5+r+8ccfJclZcfnyyy/PO5UI3IwIMoClqlat6rFdvnx5SXKuxdizZ498fHxUq1Ytj36hoaEKCgrSnj17PNojIyPPu4/77rtPLVq00COPPKKQkBDdf//9mjNnjscf+u3bt2vz5s2qXLmyx+3WW2+VJB06dMhjzNq1a593P7feeqtOnjypw4cP6/Dhwzp58qTq1KlzXr969eopPz9f+/btk/T/f+RXrlypEydOaMOGDWrVqpViY2OdILNy5Uq53W41btzYqVeSunfvfl7N06dPV25u7nnXmlzoubmUPz7GsmXLKiwsTLt3777ssZeb18jISCUlJWn69OmqVKmSEhMTNWnSJK6PwU2La2QAS/n6+l6w3fzhGo7LXZNxTkBAwAXbVqxYoWXLlmnBggVauHChPvzwQ7Vp00aLFi2Sr6+v8vPzFRUVpXHjxl1w3IiIiCu6/4IIDw9XZGSkVqxYoerVq8sYo5iYGFWuXFlPPPGE9uzZo5UrV+quu+6Sj89v/287F8JeffVVNWnS5ILjli1b1mP7Qs/N9XIl8/r666/roYce0vz587Vo0SINGDBAycnJWrNmjapUqXKjSgWKBIIMUExVq1ZN+fn52r59u+rVq+e0Z2ZmKisrS9WqVbuicXx8fBQfH6/4+HiNGzdOr7zyip5//nktW7ZMCQkJqlmzpjZt2qT4+PgrCk3nVkR+b9u2bSpdurQqV64sSSpdurRzuuj3fvjhB/n4+HiEo1atWmnFihWKjIxUkyZNVK5cOTVu3FiBgYFauHChvvnmG40cOdLpX7NmTUmS2+1WQkLCFT0HV2v79u36j//4D2f7+PHjOnjwoP70pz8V2n1ERUUpKipKL7zwgr766iu1aNFCU6dO1ahRowrtPgAbcGoJKKbO/dGcMGGCR/u5lZMOHTpcdoxffvnlvLZzqxjn3lrdtWtX7d+/X9OmTTuv76+//nre566kpqbqm2++cbb37dun+fPnq23btvL19ZWvr6/atm2r+fPne5yKyczM1HvvvaeWLVvK7XY77a1atdLu3bv14YcfOqeafHx8dNddd2ncuHHKy8vzeMdS06ZNVbNmTb322ms6fvz4eTUfPnz4ss/L5bz99tvKy8tztqdMmaIzZ86offv21zx2Tk6Ozpw549EWFRUlHx+f897uDtwMWJEBiqnGjRure/fuevvtt5WVlaW4uDitXbtWs2bNUqdOnTxWDC7mxRdf1IoVK9ShQwdVq1ZNhw4d0uTJk1WlShXn81T++7//W3PmzNFjjz2mZcuWqUWLFjp79qx++OEHzZkzx/lsmnMaNmyoxMREj7dfS/JYNRk1apTz+TV9+/ZViRIl9NZbbyk3N1djx471qPFcSElPT9crr7zitMfGxuqLL76Qn5+fmjdv7rT7+Pho+vTpat++vRo0aKAePXrolltu0f79+7Vs2TK53W59+umnBXjG/9/p06cVHx+vrl27Kj09XZMnT1bLli119913X9O40m9vHe/fv7+6dOmiW2+9VWfOnNE///lP+fr6qnPnztc8PmAbggxQjE2fPl01atTQzJkzNW/ePIWGhmrIkCEaPnz4FR1/9913a/fu3XrnnXf0888/q1KlSoqLi9PIkSOdd8/4+Pjok08+0fjx4/Xuu+9q3rx5Kl26tGrUqKEnnnjCuej3nLi4OMXExGjkyJHau3ev6tevr5kzZ6pRo0ZOnwYNGmjlypUaMmSIkpOTlZ+fr+joaP3rX/9SdHS0x3h16tRRcHCwDh065PFhdecCzh133CE/Pz+PY1q3bq3U1FS99NJLevPNN3X8+HGFhoYqOjpajz766JU/wRfx5ptvavbs2Ro2bJjy8vL0wAMPaOLEiVd8vdKlNG7cWImJifr000+1f/9+lS5dWo0bN9YXX3yhO++885rHB2zjMn+8MhAArhOXy6V+/frpzTff9HYpAIoJrpEBAADWIsgAAABrEWQAAIC1uNgXwA3DJXkAChsrMgAAwFoEGQAAYK1if2opPz9fBw4cULly5QrlMxwAAMD1Z4zRsWPHFB4e7nxX2oUU+yBz4MCB6/qldQAA4PrZt2/fJb8MtdgHmXLlykn67Yn4/fezAACAoisnJ0cRERHO3/GLKfZB5tzpJLfbTZABAMAyl7sshIt9AQCAtQgyAADAWgQZAABgLYIMAACwFkEGAABYiyADAACsRZABAADWIsgAAABrEWQAAIC1CDIAAMBaBBkAAGAtggwAALAWQQYAAFiLIAMAAKxFkAEAANYq4e0CbFZ98AJvl3DT2D26g7dLAAAUQazIAAAAaxFkAACAtQgyAADAWgQZAABgLYIMAACwFkEGAABYiyADAACsRZABAADWIsgAAABrEWQAAIC1CDIAAMBaBBkAAGAtggwAALAWQQYAAFiLIAMAAKxFkAEAANYiyAAAAGsRZAAAgLUIMgAAwFoEGQAAYC2CDAAAsBZBBgAAWIsgAwAArEWQAQAA1iLIAAAAaxFkAACAtQgyAADAWgQZAABgLYIMAACwFkEGAABYy6tB5uzZsxo6dKgiIyMVEBCgmjVr6qWXXpIxxuljjNGwYcMUFhamgIAAJSQkaPv27V6sGgAAFBVeDTJjxozRlClT9Oabb2rr1q0aM2aMxo4dqzfeeMPpM3bsWE2cOFFTp05VWlqaypQpo8TERJ06dcqLlQMAgKKghDfv/KuvvlLHjh3VoUMHSVL16tX1/vvva+3atZJ+W42ZMGGCXnjhBXXs2FGS9O677yokJESffPKJ7r//fq/VDgAAvM+rKzJ33XWXlixZom3btkmSNm3apFWrVql9+/aSpF27dikjI0MJCQnOMYGBgYqOjlZqauoFx8zNzVVOTo7HDQAAFE9eXZEZPHiwcnJyVLduXfn6+urs2bN6+eWX1a1bN0lSRkaGJCkkJMTjuJCQEGffHyUnJ2vkyJHXt3AAAFAkeHVFZs6cOZo9e7bee+89ffPNN5o1a5Zee+01zZo1q8BjDhkyRNnZ2c5t3759hVgxAAAoSry6IvPMM89o8ODBzrUuUVFR2rNnj5KTk9W9e3eFhoZKkjIzMxUWFuYcl5mZqSZNmlxwTD8/P/n5+V332gEAgPd5dUXm5MmT8vHxLMHX11f5+fmSpMjISIWGhmrJkiXO/pycHKWlpSkmJuaG1goAAIoer67I/OUvf9HLL7+sqlWrqkGDBtqwYYPGjRunhx9+WJLkcrk0cOBAjRo1SrVr11ZkZKSGDh2q8PBwderUyZulAwCAIsCrQeaNN97Q0KFD1bdvXx06dEjh4eF69NFHNWzYMKfPs88+qxMnTqh3797KyspSy5YttXDhQvn7+3uxcgAAUBS4zO8/RrcYysnJUWBgoLKzs+V2uwt17OqDFxTqeLi43aM7eLsEAMANdKV/v/muJQAAYC2CDAAAsBZBBgAAWIsgAwAArEWQAQAA1iLIAAAAaxFkAACAtQgyAADAWgQZAABgLYIMAACwFkEGAABYiyADAACsRZABAADWIsgAAABrEWQAAIC1CDIAAMBaBBkAAGAtggwAALAWQQYAAFiLIAMAAKxFkAEAANYiyAAAAGsRZAAAgLUIMgAAwFoEGQAAYK0S3i4AuBLVBy/wdgk3hd2jO3i7BAC4KqzIAAAAaxFkAACAtQgyAADAWgQZAABgLYIMAACwFkEGAABYiyADAACsRZABAADWIsgAAABrEWQAAIC1CDIAAMBaBBkAAGAtggwAALAWQQYAAFiLIAMAAKxFkAEAANYiyAAAAGsRZAAAgLUIMgAAwFoEGQAAYC2CDAAAsBZBBgAAWIsgAwAArEWQAQAA1iLIAAAAaxFkAACAtQgyAADAWgQZAABgLYIMAACwFkEGAABYiyADAACsRZABAADWIsgAAABrEWQAAIC1CDIAAMBaBBkAAGAtggwAALAWQQYAAFiLIAMAAKxFkAEAANYiyAAAAGsRZAAAgLW8HmT279+vv/71r6pYsaICAgIUFRWldevWOfuNMRo2bJjCwsIUEBCghIQEbd++3YsVAwCAosKrQebo0aNq0aKFSpYsqS+++EJbtmzR66+/rvLlyzt9xo4dq4kTJ2rq1KlKS0tTmTJllJiYqFOnTnmxcgAAUBSU8OadjxkzRhEREZoxY4bTFhkZ6fxsjNGECRP0wgsvqGPHjpKkd999VyEhIfrkk090//333/CaAQBA0eHVFZl///vfatasmbp06aLg4GDddtttmjZtmrN/165dysjIUEJCgtMWGBio6OhopaamXnDM3Nxc5eTkeNwAAEDx5NUg8+OPP2rKlCmqXbu2vvzyS/Xp00cDBgzQrFmzJEkZGRmSpJCQEI/jQkJCnH1/lJycrMDAQOcWERFxfR8EAADwGq8Gmfz8fN1+++165ZVXdNttt6l3797q1auXpk6dWuAxhwwZouzsbOe2b9++QqwYAAAUJV4NMmFhYapfv75HW7169bR3715JUmhoqCQpMzPTo09mZqaz74/8/Pzkdrs9bgAAoHjyapBp0aKF0tPTPdq2bdumatWqSfrtwt/Q0FAtWbLE2Z+Tk6O0tDTFxMTc0FoBAEDR49V3LT355JO666679Morr6hr165au3at3n77bb399tuSJJfLpYEDB2rUqFGqXbu2IiMjNXToUIWHh6tTp07eLB0AABQBXg0yzZs317x58zRkyBC9+OKLioyM1IQJE9StWzenz7PPPqsTJ06od+/eysrKUsuWLbVw4UL5+/t7sXIAAFAUuIwxxttFXE85OTkKDAxUdnZ2oV8vU33wgkIdD/C23aM7eLsEAJB05X+/vf4VBQAAAAVFkAEAANYiyAAAAGsRZAAAgLUIMgAAwFoEGQAAYC2CDAAAsBZBBgAAWIsgAwAArEWQAQAA1iLIAAAAaxFkAACAtQgyAADAWgQZAABgLYIMAACwFkEGAABYiyADAACsRZABAADWIsgAAABrEWQAAIC1CDIAAMBaBBkAAGAtggwAALAWQQYAAFiLIAMAAKxFkAEAANYiyAAAAGsRZAAAgLUIMgAAwFoFCjJt2rRRVlbWee05OTlq06bNtdYEAABwRQoUZJYvX67Tp0+f137q1CmtXLnymosCAAC4EiWupvO3337r/LxlyxZlZGQ422fPntXChQt1yy23FF51AAAAl3BVQaZJkyZyuVxyuVwXPIUUEBCgN954o9CKAwAAuJSrCjK7du2SMUY1atTQ2rVrVblyZWdfqVKlFBwcLF9f30IvEgAA4EKuKshUq1ZNkpSfn39digEAALgaVxVkfm/79u1atmyZDh06dF6wGTZs2DUXBgAAcDkFCjLTpk1Tnz59VKlSJYWGhsrlcjn7XC4XQQYAANwQBQoyo0aN0ssvv6xBgwYVdj0AAABXrECfI3P06FF16dKlsGsBAAC4KgUKMl26dNGiRYsKuxYAAICrUqBTS7Vq1dLQoUO1Zs0aRUVFqWTJkh77BwwYUCjFAQAAXIrLGGOu9qDIyMiLD+hy6ccff7ymogpTTk6OAgMDlZ2dLbfbXahjVx+8oFDHA7xt9+gO3i4BACRd+d/vAq3I7Nq1q8CFAQAAFJYCXSMDAABQFBRoRebhhx++5P533nmnQMUAAABcjQIFmaNHj3ps5+Xl6fvvv1dWVtYFv0wSAADgeihQkJk3b955bfn5+erTp49q1qx5zUUBAABciUK7RsbHx0dJSUkaP358YQ0JAABwSYV6se/OnTt15syZwhwSAADgogp0aikpKclj2xijgwcPasGCBerevXuhFAYAAHA5BQoyGzZs8Nj28fFR5cqV9frrr1/2HU0AAACFpUBBZtmyZYVdBwAAwFUrUJA55/Dhw0pPT5ck1alTR5UrVy6UogAAAK5EgS72PXHihB5++GGFhYUpNjZWsbGxCg8PV8+ePXXy5MnCrhEAAOCCChRkkpKSlJKSok8//VRZWVnKysrS/PnzlZKSoqeeeqqwawQAALigAp1a+vjjj/XRRx+pdevWTtuf/vQnBQQEqGvXrpoyZUph1QcAAHBRBVqROXnypEJCQs5rDw4O5tQSAAC4YQoUZGJiYjR8+HCdOnXKafv11181cuRIxcTEFFpxAAAAl1KgU0sTJkxQu3btVKVKFTVu3FiStGnTJvn5+WnRokWFWiAAAMDFFCjIREVFafv27Zo9e7Z++OEHSdIDDzygbt26KSAgoFALBAAAuJgCBZnk5GSFhISoV69eHu3vvPOODh8+rEGDBhVKcQAAAJdSoGtk3nrrLdWtW/e89gYNGmjq1KnXXBQAAMCVKFCQycjIUFhY2HntlStX1sGDB6+5KAAAgCtRoCATERGh1atXn9e+evVqhYeHX3NRAAAAV6JA18j06tVLAwcOVF5entq0aSNJWrJkiZ599lk+2RcAANwwBQoyzzzzjI4cOaK+ffvq9OnTkiR/f38NGjRIQ4YMKdQCAQAALqZAQcblcmnMmDEaOnSotm7dqoCAANWuXVt+fn6FXR8AAMBFFSjInFO2bFk1b968sGoBAAC4KgW62Pd6GD16tFwulwYOHOi0nTp1Sv369VPFihVVtmxZde7cWZmZmd4rEgAAFClFIsh8/fXXeuutt9SoUSOP9ieffFKffvqp5s6dq5SUFB04cED33nuvl6oEAABFjdeDzPHjx9WtWzdNmzZN5cuXd9qzs7P1j3/8Q+PGjVObNm3UtGlTzZgxQ1999ZXWrFnjxYoBAEBR4fUg069fP3Xo0EEJCQke7evXr1deXp5He926dVW1alWlpqbe6DIBAEARdE0X+16rDz74QN98842+/vrr8/ZlZGSoVKlSCgoK8mgPCQlRRkbGRcfMzc1Vbm6us52Tk1No9QIAgKLFaysy+/bt0xNPPKHZs2fL39+/0MZNTk5WYGCgc4uIiCi0sQEAQNHitSCzfv16HTp0SLfffrtKlCihEiVKKCUlRRMnTlSJEiUUEhKi06dPKysry+O4zMxMhYaGXnTcIUOGKDs727nt27fvOj8SAADgLV47tRQfH6/vvvvOo61Hjx6qW7euBg0apIiICJUsWVJLlixR586dJUnp6enau3evYmJiLjqun58fH8wHAMBNwmtBply5cmrYsKFHW5kyZVSxYkWnvWfPnkpKSlKFChXkdrv1+OOPKyYmRnfeeac3SgYAAEWMVy/2vZzx48fLx8dHnTt3Vm5urhITEzV58mRvlwUAAIoIlzHGeLuI6yknJ0eBgYHKzs6W2+0u1LGrD15QqOMB3rZ7dAdvlwAAkq7877fXP0cGAACgoAgyAADAWgQZAABgLYIMAACwFkEGAABYiyADAACsRZABAADWIsgAAABrEWQAAIC1CDIAAMBaBBkAAGAtggwAALAWQQYAAFiLIAMAAKxFkAEAANYiyAAAAGsRZAAAgLUIMgAAwFoEGQAAYC2CDAAAsBZBBgAAWIsgAwAArEWQAQAA1iLIAAAAaxFkAACAtQgyAADAWgQZAABgLYIMAACwFkEGAABYiyADAACsRZABAADWIsgAAABrEWQAAIC1Sni7AABFR/XBC7xdwk1h9+gO3i4BKDZYkQEAANYiyAAAAGsRZAAAgLUIMgAAwFoEGQAAYC2CDAAAsBZBBgAAWIsgAwAArEWQAQAA1iLIAAAAaxFkAACAtQgyAADAWgQZAABgLYIMAACwFkEGAABYiyADAACsRZABAADWIsgAAABrEWQAAIC1CDIAAMBaBBkAAGAtggwAALAWQQYAAFiLIAMAAKxFkAEAANYiyAAAAGsRZAAAgLUIMgAAwFoEGQAAYC2CDAAAsBZBBgAAWIsgAwAArEWQAQAA1iLIAAAAa3k1yCQnJ6t58+YqV66cgoOD1alTJ6Wnp3v0OXXqlPr166eKFSuqbNmy6ty5szIzM71UMQAAKEq8GmRSUlLUr18/rVmzRosXL1ZeXp7atm2rEydOOH2efPJJffrpp5o7d65SUlJ04MAB3XvvvV6sGgAAFBUlvHnnCxcu9NieOXOmgoODtX79esXGxio7O1v/+Mc/9N5776lNmzaSpBkzZqhevXpas2aN7rzzTm+UDQAAiogidY1Mdna2JKlChQqSpPXr1ysvL08JCQlOn7p166pq1apKTU31So0AAKDo8OqKzO/l5+dr4MCBatGihRo2bChJysjIUKlSpRQUFOTRNyQkRBkZGRccJzc3V7m5uc52Tk7OdasZAAB4V5FZkenXr5++//57ffDBB9c0TnJysgIDA51bREREIVUIAACKmiIRZPr376/PPvtMy5YtU5UqVZz20NBQnT59WllZWR79MzMzFRoaesGxhgwZouzsbOe2b9++61k6AADwIq8GGWOM+vfvr3nz5mnp0qWKjIz02N+0aVOVLFlSS5YscdrS09O1d+9excTEXHBMPz8/ud1ujxsAACievHqNTL9+/fTee+9p/vz5KleunHPdS2BgoAICAhQYGKiePXsqKSlJFSpUkNvt1uOPP66YmBjesQQAALwbZKZMmSJJat26tUf7jBkz9NBDD0mSxo8fLx8fH3Xu3Fm5ublKTEzU5MmTb3ClAACgKPJqkDHGXLaPv7+/Jk2apEmTJt2AigAAgE2KxMW+AAAABUGQAQAA1iLIAAAAaxFkAACAtQgyAADAWgQZAABgLYIMAACwFkEGAABYiyADAACsRZABAADWIsgAAABrEWQAAIC1CDIAAMBaBBkAAGAtggwAALAWQQYAAFiLIAMAAKxFkAEAANYiyAAAAGsRZAAAgLUIMgAAwFoEGQAAYC2CDAAAsBZBBgAAWIsgAwAArEWQAQAA1iLIAAAAaxFkAACAtQgyAADAWgQZAABgLYIMAACwFkEGAABYiyADAACsRZABAADWIsgAAABrEWQAAIC1CDIAAMBaJbxdAADcbKoPXuDtEm4Ku0d38HYJuAFYkQEAANYiyAAAAGsRZAAAgLUIMgAAwFoEGQAAYC2CDAAAsBZBBgAAWIsgAwAArEWQAQAA1iLIAAAAaxFkAACAtQgyAADAWgQZAABgLYIMAACwFkEGAABYiyADAACsRZABAADWIsgAAABrEWQAAIC1CDIAAMBaBBkAAGAtggwAALAWQQYAAFiLIAMAAKxFkAEAANYiyAAAAGsRZAAAgLUIMgAAwFoEGQAAYC2CDAAAsJYVQWbSpEmqXr26/P39FR0drbVr13q7JAAAUASU8HYBl/Phhx8qKSlJU6dOVXR0tCZMmKDExESlp6crODjY2+UBAIqo6oMXeLuEm8Lu0R28ev9FfkVm3Lhx6tWrl3r06KH69etr6tSpKl26tN555x1vlwYAALysSAeZ06dPa/369UpISHDafHx8lJCQoNTUVC9WBgAAioIifWrp559/1tmzZxUSEuLRHhISoh9++OGCx+Tm5io3N9fZzs7OliTl5OQUen35uScLfUwAAGxyPf6+/n5cY8wl+xXpIFMQycnJGjly5HntERERXqgGAIDiLXDC9R3/2LFjCgwMvOj+Ih1kKlWqJF9fX2VmZnq0Z2ZmKjQ09ILHDBkyRElJSc52fn6+fvnlF1WsWFEul+u61puTk6OIiAjt27dPbrf7ut4XCoY5KvqYIzswT0Wf7XNkjNGxY8cUHh5+yX5FOsiUKlVKTZs21ZIlS9SpUydJvwWTJUuWqH///hc8xs/PT35+fh5tQUFB17lST26328p/NDcT5qjoY47swDwVfTbP0aVWYs4p0kFGkpKSktS9e3c1a9ZMd9xxhyZMmKATJ06oR48e3i4NAAB4WZEPMvfdd58OHz6sYcOGKSMjQ02aNNHChQvPuwAYAADcfIp8kJGk/v37X/RUUlHi5+en4cOHn3dqC0UHc1T0MUd2YJ6Kvptljlzmcu9rAgAAKKKK9AfiAQAAXApBBgAAWIsgAwAArEWQAQAA1iLIXKURI0bI5XJ53OrWrevsP3XqlPr166eKFSuqbNmy6ty583mfTIzCtWLFCv3lL39ReHi4XC6XPvnkE4/9xhgNGzZMYWFhCggIUEJCgrZv3+7R55dfflG3bt3kdrsVFBSknj176vjx4zfwURR/l5unhx566LzfrXbt2nn0YZ6ur+TkZDVv3lzlypVTcHCwOnXqpPT0dI8+V/Iat3fvXnXo0EGlS5dWcHCwnnnmGZ05c+ZGPpRi60rmqHXr1uf9Lj322GMefYrTHBFkCqBBgwY6ePCgc1u1apWz78knn9Snn36quXPnKiUlRQcOHNC9997rxWqLvxMnTqhx48aaNGnSBfePHTtWEydO1NSpU5WWlqYyZcooMTFRp06dcvp069ZNmzdv1uLFi/XZZ59pxYoV6t279416CDeFy82TJLVr187jd+v999/32M88XV8pKSnq16+f1qxZo8WLFysvL09t27bViRMnnD6Xe407e/asOnTooNOnT+urr77SrFmzNHPmTA0bNswbD6nYuZI5kqRevXp5/C6NHTvW2Vfs5sjgqgwfPtw0btz4gvuysrJMyZIlzdy5c522rVu3GkkmNTX1BlV4c5Nk5s2b52zn5+eb0NBQ8+qrrzptWVlZxs/Pz7z//vvGGGO2bNliJJmvv/7a6fPFF18Yl8tl9u/ff8Nqv5n8cZ6MMaZ79+6mY8eOFz2GebrxDh06ZCSZlJQUY8yVvcZ9/vnnxsfHx2RkZDh9pkyZYtxut8nNzb2xD+Am8Mc5MsaYuLg488QTT1z0mOI2R6zIFMD27dsVHh6uGjVqqFu3btq7d68kaf369crLy1NCQoLTt27duqpatapSU1O9Ve5NbdeuXcrIyPCYk8DAQEVHRztzkpqaqqCgIDVr1szpk5CQIB8fH6Wlpd3wmm9my5cvV3BwsOrUqaM+ffroyJEjzj7m6cbLzs6WJFWoUEHSlb3GpaamKioqyuPT1xMTE5WTk6PNmzffwOpvDn+co3Nmz56tSpUqqWHDhhoyZIhOnjzp7Ctuc2TFJ/sWJdHR0Zo5c6bq1KmjgwcPauTIkWrVqpW+//57ZWRkqFSpUud9SWVISIgyMjK8U/BN7tzz/sevtPj9nGRkZCg4ONhjf4kSJVShQgXm7QZq166d7r33XkVGRmrnzp167rnn1L59e6WmpsrX15d5usHy8/M1cOBAtWjRQg0bNpSkK3qNy8jIuODv27l9KDwXmiNJevDBB1WtWjWFh4fr22+/1aBBg5Senq7/+Z//kVT85oggc5Xat2/v/NyoUSNFR0erWrVqmjNnjgICArxYGWC3+++/3/k5KipKjRo1Us2aNbV8+XLFx8d7sbKbU79+/fT99997XAOIouVic/T768aioqIUFham+Ph47dy5UzVr1rzRZV53nFq6RkFBQbr11lu1Y8cOhYaG6vTp08rKyvLok5mZqdDQUO8UeJM797z/8V0Vv5+T0NBQHTp0yGP/mTNn9MsvvzBvXlSjRg1VqlRJO3bskMQ83Uj9+/fXZ599pmXLlqlKlSpO+5W8xoWGhl7w9+3cPhSOi83RhURHR0uSx+9ScZojgsw1On78uHbu3KmwsDA1bdpUJUuW1JIlS5z96enp2rt3r2JiYrxY5c0rMjJSoaGhHnOSk5OjtLQ0Z05iYmKUlZWl9evXO32WLl2q/Px85wUAN95PP/2kI0eOKCwsTBLzdCMYY9S/f3/NmzdPS5cuVWRkpMf+K3mNi4mJ0XfffecROhcvXiy326369evfmAdSjF1uji5k48aNkuTxu1Ss5sjbVxvb5qmnnjLLly83u3btMqtXrzYJCQmmUqVK5tChQ8YYYx577DFTtWpVs3TpUrNu3ToTExNjYmJivFx18Xbs2DGzYcMGs2HDBiPJjBs3zmzYsMHs2bPHGGPM6NGjTVBQkJk/f7759ttvTceOHU1kZKT59ddfnTHatWtnbrvtNpOWlmZWrVplateubR544AFvPaRi6VLzdOzYMfP000+b1NRUs2vXLvO///u/5vbbbze1a9c2p06dcsZgnq6vPn36mMDAQLN8+XJz8OBB53by5Emnz+Ve486cOWMaNmxo2rZtazZu3GgWLlxoKleubIYMGeKNh1TsXG6OduzYYV588UWzbt06s2vXLjN//nxTo0YNExsb64xR3OaIIHOV7rvvPhMWFmZKlSplbrnlFnPfffeZHTt2OPt//fVX07dvX1O+fHlTunRpc88995iDBw96seLib9myZUbSebfu3bsbY357C/bQoUNNSEiI8fPzM/Hx8SY9Pd1jjCNHjpgHHnjAlC1b1rjdbtOjRw9z7NgxLzya4utS83Ty5EnTtm1bU7lyZVOyZElTrVo106tXL4+3hxrDPF1vF5ofSWbGjBlOnyt5jdu9e7dp3769CQgIMJUqVTJPPfWUycvLu8GPpni63Bzt3bvXxMbGmgoVKhg/Pz9Tq1Yt88wzz5js7GyPcYrTHLmMMebGrf8AAAAUHq6RAQAA1iLIAAAAaxFkAACAtQgyAADAWgQZAABgLYIMAACwFkEGAABYiyAD4JJat26tgQMHersMALggggwAALAWQQbADXX69Glvl3DD3EyPFfAWggyAy8rPz9ezzz6rChUqKDQ0VCNGjHD27d27Vx07dlTZsmXldrvVtWtXZWZmOvtHjBihJk2aaPr06YqMjJS/v78k6aOPPlJUVJQCAgJUsWJFJSQk6MSJE85x06dPV7169eTv76+6detq8uTJzr7du3fL5XLpgw8+0F133SV/f381bNhQKSkpHnWnpKTojjvukJ+fn8LCwjR48GCdOXNGkvTZZ58pKChIZ8+elfTbNwS7XC4NHjzYOf6RRx7RX//6V2d71apVatWqlQICAhQREaEBAwZ41Fy9enW99NJL+tvf/ia3263evXtfy9MO4Ep4+8ueABRtcXFxxu12mxEjRpht27aZWbNmGZfLZRYtWmTOnj1rmjRpYlq2bGnWrVtn1qxZY5o2bWri4uKc44cPH27KlClj2rVrZ7755huzadMmc+DAAVOiRAkzbtw4s2vXLvPtt9+aSZMmOV8A+a9//cuEhYWZjz/+2Pz444/m448/NhUqVDAzZ840xhiza9cuI8lUqVLFfPTRR2bLli3mkUceMeXKlTM///yzMcaYn376yZQuXdr07dvXbN261cybN89UqlTJDB8+3BhjTFZWlvHx8TFff/21McaYCRMmmEqVKpno6Gin9lq1aplp06YZY377VuEyZcqY8ePHm23btpnVq1eb2267zTz00ENO/2rVqhm3221ee+01s2PHDo8vlAVwfRBkAFxSXFycadmypUdb8+bNzaBBg8yiRYuMr6+v2bt3r7Nv8+bNRpJZu3atMea3IFOyZElz6NAhp8/69euNJLN79+4L3mfNmjXNe++959H20ksvmZiYGGPM/weZ0aNHO/vz8vJMlSpVzJgxY4wxxjz33HOmTp06Jj8/3+kzadIkU7ZsWXP27FljjDG33367efXVV40xxnTq1Mm8/PLLplSpUubYsWPmp59+MpLMtm3bjDHG9OzZ0/Tu3dujppUrVxofHx/z66+/GmN+CzKdOnW65PMJoHBxagnAZTVq1MhjOywsTIcOHdLWrVsVERGhiIgIZ1/9+vUVFBSkrVu3Om3VqlVT5cqVne3GjRsrPj5eUVFR6tKli6ZNm6ajR49Kkk6cOKGdO3eqZ8+eKlu2rHMbNWqUdu7c6VFHTEyM83OJEiXUrFkz5363bt2qmJgYuVwup0+LFi10/Phx/fTTT5KkuLg4LV++XMYYrVy5Uvfee6/q1aunVatWKSUlReHh4apdu7YkadOmTZo5c6ZHTYmJicrPz9euXbuc+2jWrFnBnmQABVLC2wUAKPpKlizpse1yuZSfn3/Fx5cpU8Zj29fXV4sXL9ZXX32lRYsW6Y033tDzzz+vtLQ0lS5dWpI0bdo0RUdHn3dcYWrdurXeeecdbdq0SSVLllTdunXVunVrLV++XEePHlVcXJzT9/jx43r00Uc1YMCA88apWrWq8/MfHyuA64sVGQAFVq9ePe3bt0/79u1z2rZs2aKsrCzVr1//kse6XC61aNFCI0eO1IYNG1SqVCnNmzdPISEhCg8P148//qhatWp53CIjIz3GWLNmjfPzmTNntH79etWrV8+pLTU1VcYYp8/q1atVrlw5ValSRZLUqlUrHTt2TOPHj3dCy7kgs3z5crVu3do59vbbb9eWLVvOq6lWrVoqVapUwZ5AANeMFRkABZaQkKCoqCh169ZNEyZM0JkzZ9S3b1/FxcVd8hRLWlqalixZorZt2yo4OFhpaWk6fPiwE0JGjhypAQMGKDAwUO3atVNubq7WrVuno0ePKikpyRln0qRJql27turVq6fx48fr6NGjevjhhyVJffv21YQJE/T444+rf//+Sk9P1/Dhw5WUlCQfn9/+D1e+fHk1atRIs2fP1ptvvilJio2NVdeuXZWXl+exIjNo0CDdeeed6t+/vx555BGVKVNGW7Zs0eLFi51jAdx4BBkABeZyuTR//nw9/vjjio2NlY+Pj9q1a6c33njjkse53W6tWLFCEyZMUE5OjqpVq6bXX39d7du3l/Tb255Lly6tV199Vc8884zKlCmjqKio8z5hePTo0Ro9erQ2btyoWrVq6d///rcqVaokSbrlllv0+eef65lnnlHjxo1VoUIF9ezZUy+88ILHGHFxcdq4caOz+lKhQgXVr19fmZmZqlOnjtOvUaNGSklJ0fPPP69WrVrJGKOaNWvqvvvuu8ZnEcC1cJnfr7sCgAV2796tyMhIbdiwQU2aNPF2OQC8iGtkAACAtQgyAADAWpxaAgAA1mJFBgAAWIsgAwAArEWQAQAA1iLIAAAAaxFkAACAtQgyAADAWgQZAABgLYIMAACwFkEGAABY6/8AfxQy9T9ko84AAAAASUVORK5CYII=","text/plain":["
"]},"metadata":{},"output_type":"display_data"}],"source":["\n","import matplotlib as plt\n","from matplotlib import pyplot\n","\n","\n","# draw historgram of attribute \"horsepower\" with bins = 3\n","\n","plt.pyplot.hist(df[\"horsepower\"], bins=5)\n","\n","# set x/y labels and plot title\n","\n","plt.pyplot.xlabel(\"horsepower\")\n","plt.pyplot.ylabel(\"count\")\n","plt.pyplot.title(\"horsepower bins\")"]},{"cell_type":"markdown","metadata":{},"source":["The plot above shows the binning result for the attribute \"horsepower\".\n"]},{"cell_type":"markdown","metadata":{},"source":["
Indicator Variable (or Dummy Variable)
\n","What is an indicator variable?\n","
\n"," An indicator variable (or dummy variable) is a numerical variable used to label categories. They are called 'dummies' because the numbers themselves don't have inherent meaning. \n","
\n","\n","Why we use indicator variables?\n","\n","
\n"," We use indicator variables so we can use categorical variables for regression analysis in the later modules.\n","
\n","Example\n","
\n"," We see the column \"fuel-type\" has two unique values: \"gas\" or \"diesel\". Regression doesn't understand words, only numbers. To use this attribute in regression analysis, we convert \"fuel-type\" to indicator variables.\n","
\n","\n","
\n"," We will use pandas' method 'get_dummies' to assign numerical values to different categories of fuel type. \n","
\n"]},{"cell_type":"code","execution_count":185,"metadata":{},"outputs":[{"data":{"text/plain":["Index(['index', 'symboling', 'normalized-losses', 'make', 'fuel-type',\n"," 'aspiration', 'num-of-doors', 'body-style', 'drive-wheels',\n"," 'engine-location', 'wheel-base', 'length', 'width', 'height',\n"," 'curb-weight', 'engine-type', 'num-of-cylinders', 'engine-size',\n"," 'fuel-system', 'bore', 'stroke', 'compression-ratio', 'horsepower',\n"," 'peak-rpm', 'city-L/100km', 'highway-L/100km', 'price',\n"," 'horsepower-binned'],\n"," dtype='object')"]},"execution_count":185,"metadata":{},"output_type":"execute_result"}],"source":["df.columns"]},{"cell_type":"markdown","metadata":{},"source":["Get the indicator variables and assign it to data frame \"dummy_variable\\_1\":\n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":[]},{"cell_type":"markdown","metadata":{},"source":["Change the column names for clarity:\n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":[]},{"cell_type":"markdown","metadata":{},"source":["In the dataframe, column 'fuel-type' has values for 'gas' and 'diesel' as 0s and 1s now.\n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["# merge data frame \"df\" and \"dummy_variable_1\" \n","\n","\n","# drop original column \"fuel-type\" from \"df\"\n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["df.head()"]},{"cell_type":"markdown","metadata":{},"source":["The last two columns are now the indicator variable representation of the fuel-type variable. They're all 0s and 1s now.\n"]},{"cell_type":"markdown","metadata":{},"source":["
\n","
Question #4:
\n","\n","Similar to before, create an indicator variable for the column \"aspiration\"\n","\n","
\n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["# Write your code below and press Shift+Enter to execute \n"]},{"cell_type":"markdown","metadata":{},"source":["Click here for the solution\n","\n","```python\n","# get indicator variables of aspiration and assign it to data frame \"dummy_variable_2\"\n","dummy_variable_2 = pd.get_dummies(df['aspiration'])\n","\n","# change column names for clarity\n","dummy_variable_2.rename(columns={'std':'aspiration-std', 'turbo': 'aspiration-turbo'}, inplace=True)\n","\n","# show first 5 instances of data frame \"dummy_variable_1\"\n","dummy_variable_2.head()\n","\n","\n","```\n","\n","\n"]},{"cell_type":"markdown","metadata":{},"source":["
\n","
Question #5:
\n","\n","Merge the new dataframe to the original dataframe, then drop the column 'aspiration'.\n","\n","
\n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["# Write your code below and press Shift+Enter to execute \n"]},{"cell_type":"markdown","metadata":{},"source":["Click here for the solution\n","\n","```python\n","# merge the new dataframe to the original datafram\n","df = pd.concat([df, dummy_variable_2], axis=1)\n","\n","# drop original column \"aspiration\" from \"df\"\n","df.drop('aspiration', axis = 1, inplace=True)\n","\n","\n","```\n","\n","\n"]},{"cell_type":"markdown","metadata":{},"source":["Save the new csv:\n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["df.to_csv('clean_df.csv')"]},{"cell_type":"markdown","metadata":{},"source":["### Thank you for completing this lab!\n","\n","## Author\n","\n","Joseph Santarcangelo\n","\n","### Other Contributors\n","\n","Mahdi Noorian PhD\n","\n","Bahare Talayian\n","\n","Eric Xiao\n","\n","Steven Dong\n","\n","Parizad\n","\n","Hima Vasudevan\n","\n","Fiorella Wenver\n","\n","Yi Yao.\n","\n","## Change Log\n","\n","| Date (YYYY-MM-DD) | Version | Changed By | Change Description |\n","| ----------------- | ------- | ---------- | ----------------------------------- |\n","| 2020-10-30 | 2.2 | Lakshmi | Changed URL of csv |\n","| 2020-09-09 | 2.1 | Lakshmi | Updated Indicator Variables section |\n","| 2020-08-27 | 2.0 | Lavanya | Moved lab to course repo in GitLab |\n","\n","\n","\n","##