From 57529636da0aa2079e9265ba5d1d02cc76e15289 Mon Sep 17 00:00:00 2001 From: kevinyan0619 Date: Mon, 5 Dec 2016 14:10:40 -0500 Subject: [PATCH 1/3] Add files via upload --- assignment9.py | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ dataAnalysis.py | 39 +++++++++++++++++++++++++++++++++++++++ results.rtf | 13 +++++++++++++ summaryInfo.py | 37 +++++++++++++++++++++++++++++++++++++ test.py | 27 +++++++++++++++++++++++++++ 5 files changed, 165 insertions(+) create mode 100644 assignment9.py create mode 100644 dataAnalysis.py create mode 100644 results.rtf create mode 100644 summaryInfo.py create mode 100644 test.py diff --git a/assignment9.py b/assignment9.py new file mode 100644 index 0000000..b7a882d --- /dev/null +++ b/assignment9.py @@ -0,0 +1,49 @@ +""" +Created on Mon Dec 5 01:37:07 2016 + +@author: Shucheng Yan +net ID: sy1253 + +credit wz1070 for data structure insight and unitest +""" + +import pandas as pd +import matplotlib.pyplot as plt +import sys +from dataAnalysis import * +from summaryInfo import * + +def main(): + countries = pd.read_csv('countries.csv') + income = pd.read_excel('indicator gapminder gdp_per_capita_ppp.xlsx', index_col = 0).transpose() + print(income.head()) + + while True: + try: + input_year = input("please enter a year between 1800 and 2012 to see income distribution: ") + if input_year == 'finish': + for year in range(2007, 2013): + merged_data = mergeByYear(year, income, countries) + # create an obeject for each year + mergedPresent = dataAnalysis(year) + #present each year's histogram and boxplot + mergedPresent.histogram(merged_data) + mergedPresent.boxplot(merged_data) + break + + elif int(input_year) >= 1800 and int(input_year) <= 2012: + inputYear = int(input_year) + displaySummary(inputYear, income) + + except InputValueException: + print("please enter a valid integer year between 1800 and 2012") + except KeyboardInterrupt: + sys.exit() + except EOFError: + sys.exit() + + +if __name__ == "__main__": + main() + + \ No newline at end of file diff --git a/dataAnalysis.py b/dataAnalysis.py new file mode 100644 index 0000000..b94690d --- /dev/null +++ b/dataAnalysis.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- +""" +Created on Mon Dec 5 10:43:13 2016 + +@author: kevinyan +""" + +import matplotlib.pyplot as plt + + + +class dataAnalysis(): + + def __init__(self, year): + self.year = year + + + def histogram(self, income): + """ + The function plots a histogram of the distribution of the income per person by region data + """ + income.hist(by = 'Region', figsize = (12, 12), fontsize = 12) + plt.title("histogram of the distribution of the income in " + str(self.year)) + plt.xlabel('Region') + plt.ylabel('Income Per Person') + plt.savefig('histogram in' + str(self.year) + '.pdf') + plt.show() + + + def boxplot(self, income): + """ + The function plots a boxplot to show the distribution of the income per person by region data + """ + income.boxplot(by = 'Region', figsize = (12, 12), fontsize = 12) + plt.title("boxplot of the distribution of the income in " + str(self.year)) + plt.xlabel('Region') + plt.ylabel('Income Per Person') + plt.savefig('boxplot in ' + str(self.year) + '.pdf') + plt.show() \ No newline at end of file diff --git a/results.rtf b/results.rtf new file mode 100644 index 0000000..bb0fd12 --- /dev/null +++ b/results.rtf @@ -0,0 +1,13 @@ +{\rtf1\ansi\ansicpg1252\cocoartf1404\cocoasubrtf470 +{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\margl1440\margr1440\vieww10800\viewh8400\viewkind0 +\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\pardirnatural\partightenfactor0 + +\f0\fs24 \cf0 Over the period,\ +\ +1. Africa has the largest percentage increase in income per person, but most of countries still remain poor compare to countries in other regions.\ +\ +2. The European and North America\'92s income per person decreased in 2008-2009 potentially due to the financial crisis.\ +\ +3. Other regions have relatively stable economic landscapes with mall increase in the income per person.} \ No newline at end of file diff --git a/summaryInfo.py b/summaryInfo.py new file mode 100644 index 0000000..a939300 --- /dev/null +++ b/summaryInfo.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +""" +Created on Mon Dec 5 01:38:39 2016 + +@author: kevinyan +""" +import pandas as pd +import matplotlib.pyplot as plt + +def mergeByYear(year, income, countries): + """ + this function merges the countries and income data sets for any given year + """ + incomeSingle = pd.DataFrame(income.loc[year]).reset_index() + #change the category name is the previous file + incomeSingle = incomeSingle.rename(columns = {'gdp pc test':'Country'}) + incomeSingle = incomeSingle.rename(columns = {year:'Income'}) + mergedYear = pd.merge(countries, incomeSingle, on='Country') + return mergedYear + + +def displaySummary(year, income): + """ + this function uses histogram to present the distribution of income per person across all countries + """ + plt.hist(income.loc[year].dropna()) + plt.title("distribution of income per person across all countries in the world in " + str(year)) + plt.xlabel("income per person") + plt.ylabel("number of countries") + plt.show() + + + + + + + diff --git a/test.py b/test.py new file mode 100644 index 0000000..3bcd277 --- /dev/null +++ b/test.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +""" +Created on Mon Dec 5 13:21:15 2016 + +@author: kevinyan +""" + +import unittest +import pandas as pd +from summaryInfo import * + +countries = pd.read_csv('countries.csv') +income = pd.read_excel('indicator gapminder gdp_per_capita_ppp.xlsx', index_col = 0).transpose() + + +class Test(unittest.TestCase): + + def testMerged(self): + mergedTest = mergeByYear(2007,income,countries) + #check if the dataFrame is correctly constructed + self.assertTrue(all(map(lambda x: x in ['Country', 'Region', 'Income'], mergedTest))) + self.assertTrue(mergedTest['Country'][4], 'Burundi') + self.assertTrue(mergedTest['Region'][5], 'AFRICA') + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file From 63b5006d46653cdae810ecd37a75cb71324d6c46 Mon Sep 17 00:00:00 2001 From: kevinyan0619 Date: Mon, 5 Dec 2016 14:15:48 -0500 Subject: [PATCH 2/3] Update and rename results.rtf to results.txt --- results.rtf | 13 ------------- results.txt | 7 +++++++ 2 files changed, 7 insertions(+), 13 deletions(-) delete mode 100644 results.rtf create mode 100644 results.txt diff --git a/results.rtf b/results.rtf deleted file mode 100644 index bb0fd12..0000000 --- a/results.rtf +++ /dev/null @@ -1,13 +0,0 @@ -{\rtf1\ansi\ansicpg1252\cocoartf1404\cocoasubrtf470 -{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\margl1440\margr1440\vieww10800\viewh8400\viewkind0 -\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\pardirnatural\partightenfactor0 - -\f0\fs24 \cf0 Over the period,\ -\ -1. Africa has the largest percentage increase in income per person, but most of countries still remain poor compare to countries in other regions.\ -\ -2. The European and North America\'92s income per person decreased in 2008-2009 potentially due to the financial crisis.\ -\ -3. Other regions have relatively stable economic landscapes with mall increase in the income per person.} \ No newline at end of file diff --git a/results.txt b/results.txt new file mode 100644 index 0000000..5bc94bc --- /dev/null +++ b/results.txt @@ -0,0 +1,7 @@ +Over the period, + +1. Africa has the largest percentage increase in income per person, but most of countries still remain poor compare to countries in other regions. + +2. The European and North America’s income per person decreased in 2008-2009 potentially due to the financial crisis. + +3. Other regions have relatively stable economic landscapes with mall increase in the income per person. From d91088c5fa3d3b35b11446872a52c8c67b998bbe Mon Sep 17 00:00:00 2001 From: kevinyan0619 Date: Mon, 5 Dec 2016 14:16:42 -0500 Subject: [PATCH 3/3] Update results.txt --- results.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/results.txt b/results.txt index 5bc94bc..2270c40 100644 --- a/results.txt +++ b/results.txt @@ -2,6 +2,6 @@ Over the period, 1. Africa has the largest percentage increase in income per person, but most of countries still remain poor compare to countries in other regions. -2. The European and North America’s income per person decreased in 2008-2009 potentially due to the financial crisis. +2. The European and North America’s income per person decreased noticeably in 2008-2009 potentially due to the financial crisis. -3. Other regions have relatively stable economic landscapes with mall increase in the income per person. +3. Other regions have relatively stable economic landscapes with small increase in the income per person.