Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions assignment9.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""
Created on Mon Dec 5 01:37:07 2016

@author: Shucheng Yan
net ID: sy1253

credit wz1070 for data structure insight and unitest
"""

import pandas as pd
import matplotlib.pyplot as plt
import sys
from dataAnalysis import *
from summaryInfo import *

def main():
countries = pd.read_csv('countries.csv')
income = pd.read_excel('indicator gapminder gdp_per_capita_ppp.xlsx', index_col = 0).transpose()
print(income.head())

while True:
try:
input_year = input("please enter a year between 1800 and 2012 to see income distribution: ")
if input_year == 'finish':
for year in range(2007, 2013):
merged_data = mergeByYear(year, income, countries)
# create an obeject for each year
mergedPresent = dataAnalysis(year)
#present each year's histogram and boxplot
mergedPresent.histogram(merged_data)
mergedPresent.boxplot(merged_data)
break

elif int(input_year) >= 1800 and int(input_year) <= 2012:
inputYear = int(input_year)
displaySummary(inputYear, income)

except InputValueException:
print("please enter a valid integer year between 1800 and 2012")
except KeyboardInterrupt:
sys.exit()
except EOFError:
sys.exit()


if __name__ == "__main__":
main()


39 changes: 39 additions & 0 deletions dataAnalysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Dec 5 10:43:13 2016

@author: kevinyan
"""

import matplotlib.pyplot as plt



class dataAnalysis():

def __init__(self, year):
self.year = year


def histogram(self, income):
"""
The function plots a histogram of the distribution of the income per person by region data
"""
income.hist(by = 'Region', figsize = (12, 12), fontsize = 12)
plt.title("histogram of the distribution of the income in " + str(self.year))
plt.xlabel('Region')
plt.ylabel('Income Per Person')
plt.savefig('histogram in' + str(self.year) + '.pdf')
plt.show()


def boxplot(self, income):
"""
The function plots a boxplot to show the distribution of the income per person by region data
"""
income.boxplot(by = 'Region', figsize = (12, 12), fontsize = 12)
plt.title("boxplot of the distribution of the income in " + str(self.year))
plt.xlabel('Region')
plt.ylabel('Income Per Person')
plt.savefig('boxplot in ' + str(self.year) + '.pdf')
plt.show()
7 changes: 7 additions & 0 deletions results.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Over the period,

1. Africa has the largest percentage increase in income per person, but most of countries still remain poor compare to countries in other regions.

2. The European and North America’s income per person decreased noticeably in 2008-2009 potentially due to the financial crisis.

3. Other regions have relatively stable economic landscapes with small increase in the income per person.
37 changes: 37 additions & 0 deletions summaryInfo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Dec 5 01:38:39 2016

@author: kevinyan
"""
import pandas as pd
import matplotlib.pyplot as plt

def mergeByYear(year, income, countries):
"""
this function merges the countries and income data sets for any given year
"""
incomeSingle = pd.DataFrame(income.loc[year]).reset_index()
#change the category name is the previous file
incomeSingle = incomeSingle.rename(columns = {'gdp pc test':'Country'})
incomeSingle = incomeSingle.rename(columns = {year:'Income'})
mergedYear = pd.merge(countries, incomeSingle, on='Country')
return mergedYear


def displaySummary(year, income):
"""
this function uses histogram to present the distribution of income per person across all countries
"""
plt.hist(income.loc[year].dropna())
plt.title("distribution of income per person across all countries in the world in " + str(year))
plt.xlabel("income per person")
plt.ylabel("number of countries")
plt.show()







27 changes: 27 additions & 0 deletions test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Dec 5 13:21:15 2016

@author: kevinyan
"""

import unittest
import pandas as pd
from summaryInfo import *

countries = pd.read_csv('countries.csv')
income = pd.read_excel('indicator gapminder gdp_per_capita_ppp.xlsx', index_col = 0).transpose()


class Test(unittest.TestCase):

def testMerged(self):
mergedTest = mergeByYear(2007,income,countries)
#check if the dataFrame is correctly constructed
self.assertTrue(all(map(lambda x: x in ['Country', 'Region', 'Income'], mergedTest)))
self.assertTrue(mergedTest['Country'][4], 'Burundi')
self.assertTrue(mergedTest['Region'][5], 'AFRICA')


if __name__ == "__main__":
unittest.main()