diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index 91557cd..424c2ef 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/__init__.cpython-36.pyc b/q01_load_data/__pycache__/__init__.cpython-36.pyc index 5e9e2e2..542fc16 100644 Binary files a/q01_load_data/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/build.cpython-36.pyc b/q01_load_data/__pycache__/build.cpython-36.pyc index 6ba929f..922d831 100644 Binary files a/q01_load_data/__pycache__/build.cpython-36.pyc and b/q01_load_data/__pycache__/build.cpython-36.pyc differ diff --git a/q01_load_data/build.py b/q01_load_data/build.py index 1a26cc1..9cfaf04 100644 --- a/q01_load_data/build.py +++ b/q01_load_data/build.py @@ -1,10 +1,22 @@ +# %load q01_load_data/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split def q01_load_data(path): - "write your solution here" + 'write your solution here' # use .read_csv function to read the # data and header=0 to skip the first row + df = pd.read_csv(path, header=0) + new_header = df.iloc[0] # grab the first row for the header + new_header[0] = 'country name' + df = df[1:] # take the data less the header row + df.columns = new_header # set the header row as the df header + return df + + +path = 'data/olympics.csv' +q01_load_data(path) + diff --git a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc index 46496ca..8f75b70 100644 Binary files a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/tests/__pycache__/test.cpython-36.pyc b/q01_load_data/tests/__pycache__/test.cpython-36.pyc index 0dc2257..4b80bfa 100644 Binary files a/q01_load_data/tests/__pycache__/test.cpython-36.pyc and b/q01_load_data/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q02_rename_columns/__pycache__/__init__.cpython-36.pyc b/q02_rename_columns/__pycache__/__init__.cpython-36.pyc index 687491c..2ae9ad3 100644 Binary files a/q02_rename_columns/__pycache__/__init__.cpython-36.pyc and b/q02_rename_columns/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_rename_columns/__pycache__/build.cpython-36.pyc b/q02_rename_columns/__pycache__/build.cpython-36.pyc index 28092f5..c828c06 100644 Binary files a/q02_rename_columns/__pycache__/build.cpython-36.pyc and b/q02_rename_columns/__pycache__/build.cpython-36.pyc differ diff --git a/q02_rename_columns/build.py b/q02_rename_columns/build.py index 20dd8e9..e7cd9ec 100644 --- a/q02_rename_columns/build.py +++ b/q02_rename_columns/build.py @@ -1,9 +1,16 @@ +# %load q02_rename_columns/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from greyatomlib.olympics_project.q01_load_data.build import q01_load_data def q02_rename_columns(path): - "write your solution here" + 'write your solution here' df = q01_load_data(path) - \ No newline at end of file + df.rename(columns={'01 !':'Gold','02 !':'Silver','03 !':'Bronze'},inplace=True) + return df + +path='./data/olympics.csv' +q02_rename_columns(path) + + diff --git a/q02_rename_columns/tests/__pycache__/__init__.cpython-36.pyc b/q02_rename_columns/tests/__pycache__/__init__.cpython-36.pyc index 198a898..ff7168f 100644 Binary files a/q02_rename_columns/tests/__pycache__/__init__.cpython-36.pyc and b/q02_rename_columns/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_rename_columns/tests/__pycache__/test.cpython-36.pyc b/q02_rename_columns/tests/__pycache__/test.cpython-36.pyc index 1c28f5b..8f08af9 100644 Binary files a/q02_rename_columns/tests/__pycache__/test.cpython-36.pyc and b/q02_rename_columns/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q03_split_country/__pycache__/__init__.cpython-36.pyc b/q03_split_country/__pycache__/__init__.cpython-36.pyc index e71d6ad..e729272 100644 Binary files a/q03_split_country/__pycache__/__init__.cpython-36.pyc and b/q03_split_country/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_split_country/__pycache__/build.cpython-36.pyc b/q03_split_country/__pycache__/build.cpython-36.pyc index 5935601..56ba8cd 100644 Binary files a/q03_split_country/__pycache__/build.cpython-36.pyc and b/q03_split_country/__pycache__/build.cpython-36.pyc differ diff --git a/q03_split_country/build.py b/q03_split_country/build.py index 6c075fb..acf3306 100644 --- a/q03_split_country/build.py +++ b/q03_split_country/build.py @@ -2,9 +2,19 @@ import numpy as np from sklearn.model_selection import train_test_split from greyatomlib.olympics_project.q02_rename_columns.build import q02_rename_columns - +import re def q03_summer_gold_medals(path): - "write your solution here" + 'write your solution here' df = q02_rename_columns(path) - \ No newline at end of file + #tmp= df['country name'].apply(lambda x: re.findall('\((.*?)\)',x)) + df['country name']=df['country name'].str.replace(r'\(([A-Za-z0-9 _]+)\)', '') + df['country name']=df['country name'].str.replace(r'\[([A-Za-z0-9_]+)\]', '') + df.index=df['country name'] + df.drop(['country name','Totals'], axis=1,inplace=True) + + return df[:-1] + +path='./data/olympics.csv' +q03_summer_gold_medals(path) + diff --git a/q03_split_country/tests/__pycache__/__init__.cpython-36.pyc b/q03_split_country/tests/__pycache__/__init__.cpython-36.pyc index 6015fed..07806d0 100644 Binary files a/q03_split_country/tests/__pycache__/__init__.cpython-36.pyc and b/q03_split_country/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_split_country/tests/__pycache__/test.cpython-36.pyc b/q03_split_country/tests/__pycache__/test.cpython-36.pyc index 51cbfae..12f6704 100644 Binary files a/q03_split_country/tests/__pycache__/test.cpython-36.pyc and b/q03_split_country/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q04_country_with_most_gold_medals/__pycache__/__init__.cpython-36.pyc b/q04_country_with_most_gold_medals/__pycache__/__init__.cpython-36.pyc index 5be5c53..d02aebf 100644 Binary files a/q04_country_with_most_gold_medals/__pycache__/__init__.cpython-36.pyc and b/q04_country_with_most_gold_medals/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_country_with_most_gold_medals/__pycache__/build.cpython-36.pyc b/q04_country_with_most_gold_medals/__pycache__/build.cpython-36.pyc index edf8f75..fe4b40b 100644 Binary files a/q04_country_with_most_gold_medals/__pycache__/build.cpython-36.pyc and b/q04_country_with_most_gold_medals/__pycache__/build.cpython-36.pyc differ diff --git a/q04_country_with_most_gold_medals/build.py b/q04_country_with_most_gold_medals/build.py index 27251ef..803cf49 100644 --- a/q04_country_with_most_gold_medals/build.py +++ b/q04_country_with_most_gold_medals/build.py @@ -1,3 +1,4 @@ +# %load q04_country_with_most_gold_medals/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split @@ -5,7 +6,12 @@ def q04_country_with_most_gold_medals(path): - "write your solution here" + 'write your solution here' df = q03_summer_gold_medals(path) - + max_gold = df['Gold'] + gold=pd.Series(max_gold.iloc[:,2].astype(np.int16)) + return gold.idxmax().strip() + + +q04_country_with_most_gold_medals('./data/olympics.csv') diff --git a/q04_country_with_most_gold_medals/tests/__pycache__/__init__.cpython-36.pyc b/q04_country_with_most_gold_medals/tests/__pycache__/__init__.cpython-36.pyc index e7d7d49..0bc389c 100644 Binary files a/q04_country_with_most_gold_medals/tests/__pycache__/__init__.cpython-36.pyc and b/q04_country_with_most_gold_medals/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_country_with_most_gold_medals/tests/__pycache__/test.cpython-36.pyc b/q04_country_with_most_gold_medals/tests/__pycache__/test.cpython-36.pyc index b79dc60..1d2c2c7 100644 Binary files a/q04_country_with_most_gold_medals/tests/__pycache__/test.cpython-36.pyc and b/q04_country_with_most_gold_medals/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q05_difference_in_gold_medal/__pycache__/__init__.cpython-36.pyc b/q05_difference_in_gold_medal/__pycache__/__init__.cpython-36.pyc index 2001848..5cc1223 100644 Binary files a/q05_difference_in_gold_medal/__pycache__/__init__.cpython-36.pyc and b/q05_difference_in_gold_medal/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_difference_in_gold_medal/__pycache__/build.cpython-36.pyc b/q05_difference_in_gold_medal/__pycache__/build.cpython-36.pyc index ff681a3..dca3816 100644 Binary files a/q05_difference_in_gold_medal/__pycache__/build.cpython-36.pyc and b/q05_difference_in_gold_medal/__pycache__/build.cpython-36.pyc differ diff --git a/q05_difference_in_gold_medal/build.py b/q05_difference_in_gold_medal/build.py index 9fb11ec..7758397 100644 --- a/q05_difference_in_gold_medal/build.py +++ b/q05_difference_in_gold_medal/build.py @@ -1,9 +1,19 @@ +# %load q05_difference_in_gold_medal/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from greyatomlib.olympics_project.q02_rename_columns.build import q02_rename_columns +import math + +def q05_difference_in_gold_medal(path): + 'write your solution here' + df= q02_rename_columns(path) + df=df[:-1] + df['bigg']= df['Gold'].iloc[:,0].astype(np.int16)-df['Gold'].iloc[:,1].astype(np.int16) + tmp=df['bigg'] + #return df.loc[tmp.idxmax(),'country name'].strip() + return tmp.max() + +q05_difference_in_gold_medal('./data/olympics.csv') -def q05_difference_in_gold_medal(): - "write your solution here" - diff --git a/q05_difference_in_gold_medal/tests/__pycache__/__init__.cpython-36.pyc b/q05_difference_in_gold_medal/tests/__pycache__/__init__.cpython-36.pyc index 7b04315..4c1ba9b 100644 Binary files a/q05_difference_in_gold_medal/tests/__pycache__/__init__.cpython-36.pyc and b/q05_difference_in_gold_medal/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_difference_in_gold_medal/tests/__pycache__/test.cpython-36.pyc b/q05_difference_in_gold_medal/tests/__pycache__/test.cpython-36.pyc index efd000f..adc789c 100644 Binary files a/q05_difference_in_gold_medal/tests/__pycache__/test.cpython-36.pyc and b/q05_difference_in_gold_medal/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q06_get_points/__pycache__/__init__.cpython-36.pyc b/q06_get_points/__pycache__/__init__.cpython-36.pyc index 7c1cf4d..0758280 100644 Binary files a/q06_get_points/__pycache__/__init__.cpython-36.pyc and b/q06_get_points/__pycache__/__init__.cpython-36.pyc differ diff --git a/q06_get_points/__pycache__/build.cpython-36.pyc b/q06_get_points/__pycache__/build.cpython-36.pyc index d45fe38..e65a798 100644 Binary files a/q06_get_points/__pycache__/build.cpython-36.pyc and b/q06_get_points/__pycache__/build.cpython-36.pyc differ diff --git a/q06_get_points/build.py b/q06_get_points/build.py index 4f4afd7..b122420 100644 --- a/q06_get_points/build.py +++ b/q06_get_points/build.py @@ -1,9 +1,23 @@ +# %load q06_get_points/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from greyatomlib.olympics_project.q02_rename_columns.build import q02_rename_columns -path = "data/olympics.csv" +path = 'data/olympics.csv' + + +def q06_get_points(path): + df= q02_rename_columns(path) + g=df['Gold'].iloc[:,2].astype(np.int16)*3 + b=df['Bronze'].iloc[:,2].astype(np.int16)*1 + s=df['Silver'].iloc[:,2].astype(np.int16)*2 + df['Points']=g+b+s + return df['Points'] + + + +q06_get_points(path) diff --git a/q06_get_points/tests/__pycache__/__init__.cpython-36.pyc b/q06_get_points/tests/__pycache__/__init__.cpython-36.pyc index 7db8f24..5726b91 100644 Binary files a/q06_get_points/tests/__pycache__/__init__.cpython-36.pyc and b/q06_get_points/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q06_get_points/tests/__pycache__/test.cpython-36.pyc b/q06_get_points/tests/__pycache__/test.cpython-36.pyc index 8cccf4a..f68cd2b 100644 Binary files a/q06_get_points/tests/__pycache__/test.cpython-36.pyc and b/q06_get_points/tests/__pycache__/test.cpython-36.pyc differ