Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
Empty file added .metadata/.lock
Empty file.
Binary file added .metadata/.mylyn/.taskListIndex/segments.gen
Binary file not shown.
Binary file added .metadata/.mylyn/.taskListIndex/segments_1
Binary file not shown.
Binary file added .metadata/.mylyn/.tasks.xml.zip
Binary file not shown.
Binary file added .metadata/.mylyn/repositories.xml.zip
Binary file not shown.
Binary file added .metadata/.mylyn/tasks.xml.zip
Binary file not shown.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import matplotlib.pyplot as plt
import pandas as pd
'''
Created on Nov 30, 2016

@author: peimengsui
@desc: define functions and a class for data analysis tool
'''
def visual_income_dist(year,dataset):
'''
this function plots distribution over countries of the income per person for a given year
'''
plt.hist(dataset[year].dropna().values)
plt.title("distribution of income per person across all countries_"+str(year))
plt.xlabel("income_per_person")
plt.ylabel("count")

def merge_by_year(coutries,income,year):
'''
this function merges the countries and income data sets for any given year
'''
income_year = pd.DataFrame(income[year])
merged = pd.merge(coutries,income_year,left_index=True,right_index=True)
return merged
class dataTool:
'''
this class includes tools for data ananlysis on income country datasets
'''
def __init__(self, year,dataset):
'''
Constructor
'''
self.year = year
self.dataset = dataset
def box_plot(self):
self.dataset.boxplot('Income', by = 'Region', rot = 90)
plt.xlabel('region')
plt.ylabel('income per person')
plt.savefig('box_'+self.year+'.pdf')
plt.clf()

Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import matplotlib.pyplot as plt
import pandas as pd
'''
Created on Nov 30, 2016

@author: peimengsui
@desc: define functions and a class for data analysis tool
'''
def visual_income_dist(year,dataset):
'''
this function plots distribution over countries of the income per person for a given year
'''
plt.hist(dataset[year].dropna().values)
plt.title("distribution of income per person across all countries_"+str(year))
plt.xlabel("income_per_person")
plt.ylabel("count")

def merge_by_year(coutries,income,year):
'''
this function merges the countries and income data sets for any given year
'''
income_year = pd.DataFrame(income[year])
merged = pd.merge(coutries,income_year,left_index=True,right_index=True)
merged["Country"] = merged.index
merged.rename(columns = {year:'Income'}, inplace = True)
return merged
class dataTool:
'''
this class includes tools for data analysis on income country datasets
'''
def __init__(self, year,dataset):
'''
Constructor
'''
self.year = year
self.dataset = dataset
def box_plot(self):
self.dataset.boxplot('Income', by = 'Region')
plt.xlabel('region')
plt.ylabel('income_per_person')
plt.savefig('boxplot'+str(self.year)+'.pdf')
plt.clf()
def

Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
'''
Created on Nov 30, 2016

@author: peimengsui
@desc: test the data analysis tool program
'''
import unittest
from dataTool import *

countries = pd.read_csv('countries.csv',index_col=0)
income = pd.read_excel('indicator gapminder gdp_per_capita_ppp.xlsx',index_col=0)
class Test(unittest.TestCase):
def testMerge(self):
self.assertEqual(merge_by_year(countries, income,2000).columns,['Region','Income','Country'])


if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import matplotlib.pyplot as plt
import pandas as pd
'''
Created on Nov 30, 2016

@author: peimengsui
@desc: define functions and a class for data analysis tool
'''
def visual_income_dist(year,dataset):
'''
this function plots distribution over countries of the income per person for a given year
'''
plt.hist(dataset[year].dropna().values)
plt.title("distribution of income per person across all countries_"+str(year))
plt.xlabel("income_per_person")
plt.ylabel("count")

def merge_by_year(coutries,income,year):
'''
this function merges the countries and income data sets for any given year
'''
income_year = pd.DataFrame(income[year])
merged = pd.merge(coutries,income_year,left_index=True,right_index=True)
merged["Country"] = merged.index
merged.rename(columns = {year:'Income'}, inplace = True)
return merged
class dataTool:
'''
this class includes tools for data analysis on income country datasets
'''
def __init__(self, year,dataset):
'''
Constructor
'''
self.year = year
self.dataset = dataset
def boxplot(self):
self.dataset.boxplot('Income', by = 'Region')
plt.title("boxplot of income by region_"+str(self.year))
plt.xlabel('region')
plt.ylabel('income_per_person')
plt.savefig('boxplot_'+str(self.year)+'.pdf')
plt.clf()
def histogram(self):
plt.hist(self.dataset["Income"].dropna().values)
plt.title("histogram of income_"+str(self.year))
plt.xlabel("income_per_person")
plt.ylabel("count")
plt.savefig('histogram_'+self.year+'.pdf')
plt.clf()


Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import pandas as pd
from dataTool import *
import sys
'''
Created on Nov 30, 2016

@author: peimengsui
@desc: this program runs a user interactive data analysis tool on income country dataset
'''

if __name__ == '__main__':
countries = pd.read_csv('countries.csv',index_col=0)
income = pd.read_excel('indicator gapminder gdp_per_capita_ppp.xlsx',index_col=0)
income_transpose = income.T
income_transpose.head()
while(True):
try:
year = input("Please enter the year between 1800 and 2012, or 'Finish': ")
if year == 'Finish':
for y in range(2007, 2013, 1):
print ('Plot Year '+str())
merged = merge_by_year(countries, income, y)
tools = dataTool(y,merged)
tools.boxplot()
tools.histogram()
print('Finish!')
sys.exit(0)
elif year.isdigit() and int(year) >= 1800 and int(year) <= 2012:
year = int(year)
visual_income_dist(year,income)
else:
print ('invalid year')
except KeyboardInterrupt:
sys.exit(1)
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import matplotlib.pyplot as plt
import pandas as pd
'''
Created on Nov 30, 2016

@author: peimengsui
@desc: define functions and a class for data analysis tool
'''
def visual_income_dist(year,dataset):
'''
this function plots distribution over countries of the income per person for a given year
'''
plt.hist(dataset[year].dropna().values)
plt.title("distribution of income per person across all countries_"+str(year))
plt.xlabel("income_per_person")
plt.ylabel("count")
plt.draw()
plt.close()
def merge_by_year(coutries,income,year):
'''
this function merges the countries and income data sets for any given year
'''
income_year = pd.DataFrame(income[year])
merged = pd.merge(coutries,income_year,left_index=True,right_index=True)
merged["Country"] = merged.index
merged.rename(columns = {year:'Income'}, inplace = True)
return merged
class dataTool:
'''
this class includes tools for data analysis on income country datasets
'''
def __init__(self, year,dataset):
'''
Constructor
'''
self.year = year
self.dataset = dataset
def boxplot(self):
self.dataset.boxplot('Income', by = 'Region')
plt.title("boxplot of income by region_"+str(self.year))
plt.xlabel('region')
plt.ylabel('income_per_person')
plt.draw()
plt.savefig('boxplot_'+str(self.year)+'.pdf')
plt.close()
def histogram(self):
plt.hist(self.dataset["Income"].dropna().values)
plt.title("histogram of income_"+str(self.year))
plt.xlabel("income_per_person")
plt.ylabel("count")
plt.draw()
plt.savefig('histogram_'+str(self.year)+'.pdf')
plt.close()

Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import matplotlib.pyplot as plt
import pandas as pd
'''
Created on Nov 30, 2016

@author: peimengsui
@desc: define functions and a class for data analysis tool
'''
def visual_income_dist(year,dataset):
'''
this function plots distribution over countries of the income per person for a given year
'''
plt.hist(dataset[year].dropna().values)
plt.title("distribution of income per person across all countries_"+str(year))
plt.xlabel("income_per_person")
plt.ylabel("count")

def merge_by_year(coutries,income,year):
'''
this function merges the countries and income data sets for any given year
'''
income_year = pd.DataFrame(income[year])
merged = pd.merge(coutries,income_year,left_index=True,right_index=True)
merged["Country"] = merged.index
merged.rename(columns = {year:'Income'}, inplace = True)
return merged
class dataTool:
'''
this class includes tools for data analysis on income country datasets
'''
def __init__(self, year,dataset):
'''
Constructor
'''
self.year = year
self.dataset = dataset
def boxplot(self):
self.dataset.boxplot('Income', by = 'Region')
plt.title("boxplot of income by region_"+str(self.year))
plt.xlabel('region')
plt.ylabel('income_per_person')
plt.savefig('boxplot_'+str(self.year)+'.pdf')
plt.close()
def histogram(self):
plt.hist(self.dataset["Income"].dropna().values)
plt.title("histogram of income_"+str(self.year))
plt.xlabel("income_per_person")
plt.ylabel("count")
plt.savefig('histogram_'+str(self.year)+'.pdf')
plt.close()

Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import pandas as pd
from dataTool import *
import sys
'''
Created on Nov 30, 2016

@author: peimengsui
@desc: this program runs a user interactive data analysis tool on income country dataset
'''

if __name__ == '__main__':
countries = pd.read_csv('countries.csv',index_col=0)
income = pd.read_excel('indicator gapminder gdp_per_capita_ppp.xlsx',index_col=0)
income_transpose = income.T
print(income_transpose.head())
while(True):
try:
year = input("Please enter the year between 1800 and 2012, or 'Finish': ")
if year == 'Finish':
for y in range(2007, 2013, 1):
merged = merge_by_year(countries, income, y)
tools = dataTool(y,merged)
tools.boxplot()
tools.histogram()
print('Finish!')
sys.exit(0)
elif year.isdigit() and int(year) >= 1800 and int(year) <= 2012:
year = int(year)
visual_income_dist(year,income)
else:
print ('invalid year')
except KeyboardInterrupt:
sys.exit(1)
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import matplotlib.pyplot as plt
import pandas as pd
'''
Created on Nov 30, 2016

@author: peimengsui
@desc: define functions and a class for data analysis tool
'''
def visual_income_dist(year,dataset):
'''
this function plots distribution over countries of the income per person for a given year
'''
plt.hist(dataset[year].dropna().values)
plt.title("distribution of income per person across all countries_"+str(year))
plt.xlabel("income_per_person")
plt.ylabel("count")

def merge_by_year(coutries,income,year):
'''
this function merges the countries and income data sets for any given year
'''
income_year = pd.DataFrame(income[year])
merged = pd.merge(coutries,income_year,left_index=True,right_index=True)
merged["Country"] = merged.index
merged.rename(columns = {year:'Income'}, inplace = True)
return merged
class dataTool:
'''
this class includes tools for data analysis on income country datasets
'''
def __init__(self, year,dataset):
'''
Constructor
'''
self.year = year
self.dataset = dataset
def boxplot(self):
self.dataset.boxplot('Income', by = 'Region')
plt.title("boxplot of income by region_"+str(self.year))
plt.xlabel('region')
plt.ylabel('income_per_person')
plt.savefig('boxplot_'+str(self.year)+'.pdf')
def histogram(self):
plt.hist(self.dataset["Income"].dropna().values)
plt.title("histogram of income_"+str(self.year))
plt.xlabel("income_per_person")
plt.ylabel("count")
plt.savefig('histogram_'+str(self.year)+'.pdf')

Loading