diff --git a/newfile.ipynb b/newfile.ipynb new file mode 100644 index 0000000..dc42f01 --- /dev/null +++ b/newfile.ipynb @@ -0,0 +1,910 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Python Library for Data Analysis\n", + "\n", + "- series object -one dimensional array\n", + "- \n", + "\n", + "- Updating Data Frames\n", + "- Writing DataFrame back to CSV File" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GEOIDState200520062007200820092010201120122013
004000US01Alabama371503795242212444763998040933425904346441381
104000US02Alaska558915641862993639896160457848574316364861137
204000US04Arizona4524546657629933469144573946896486214704450602
304000US05Arkansas366583705740795395863653838587413023901839919
404000US06California517555531955734570145613454283533675702057528
\n", + "
" + ], + "text/plain": [ + " GEOID State 2005 2006 2007 2008 2009 2010 2011 \\\n", + "0 04000US01 Alabama 37150 37952 42212 44476 39980 40933 42590 \n", + "1 04000US02 Alaska 55891 56418 62993 63989 61604 57848 57431 \n", + "2 04000US04 Arizona 45245 46657 629933 46914 45739 46896 48621 \n", + "3 04000US05 Arkansas 36658 37057 40795 39586 36538 38587 41302 \n", + "4 04000US06 California 51755 55319 55734 57014 56134 54283 53367 \n", + "\n", + " 2012 2013 \n", + "0 43464 41381 \n", + "1 63648 61137 \n", + "2 47044 50602 \n", + "3 39018 39919 \n", + "4 57020 57528 " + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Function to read csv data into data frame\n", + "#returns the Dataframe object\n", + "filepath='DataFIles/income.csv'\n", + "import pandas as pd\n", + "def readcsvdata(filepath):\n", + " return pd.read_csv(filepath)\n", + "\n", + "readcsvdata(filepath)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GEOID State 2005 2006 2007 2008 2009 2010 2011 2012 2013 " + ] + } + ], + "source": [ + "incomedf=readcsvdata(filepath)\n", + "# Function to print all column names in a single line\n", + "#GEOID State 2005 .....\n", + "\n", + "def printdataframecolumns(df):\n", + " columns=df.columns\n", + " for column in columns:\n", + " print(column,end=' ')\n", + " return\n", + "\n", + "printdataframecolumns(incomedf)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "04000US02 Alaska 55891 56418 62993 63989 61604 57848 57431 63648 61137 " + ] + } + ], + "source": [ + "# Function to access a row based on unique column value\n", + "def accessdataframerow(df,key):\n", + " for row in df.values:\n", + " if key in row:\n", + " for item in row:\n", + " print(item,end=' ')\n", + " return\n", + "accessdataframerow(incomedf,\"Alaska\")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GEOIDState200520062007200820092010201120122013
004000US01Alabama371503795242212444763998040933425904346441381
104000US02Alaska558915641862993639896160457848574316364861137
204000US04Arizona4524546657629933469144573946896486214704450602
304000US05Arkansas366583705740795395863653838587413023901839919
404000US06California517555531955734570145613454283533675702057528
\n", + "
" + ], + "text/plain": [ + " GEOID State 2005 2006 2007 2008 2009 2010 2011 \\\n", + "0 04000US01 Alabama 37150 37952 42212 44476 39980 40933 42590 \n", + "1 04000US02 Alaska 55891 56418 62993 63989 61604 57848 57431 \n", + "2 04000US04 Arizona 45245 46657 629933 46914 45739 46896 48621 \n", + "3 04000US05 Arkansas 36658 37057 40795 39586 36538 38587 41302 \n", + "4 04000US06 California 51755 55319 55734 57014 56134 54283 53367 \n", + "\n", + " 2012 2013 \n", + "0 43464 41381 \n", + "1 63648 61137 \n", + "2 47044 50602 \n", + "3 39018 39919 \n", + "4 57020 57528 " + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "incomedf" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "56134" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Accessing a unique value based on row, column information\n", + "\n", + "#Income of a state in a given year\n", + "\n", + "def getrowindex(df,rowkey):\n", + " for i in range(len(df.values)):\n", + " if df.values[i][0]==rowkey or df.values[i][1]==rowkey:\n", + " rowindex=i\n", + " return rowindex\n", + "\n", + "def getcolumnindex(df,columnkey):\n", + " for i in range(len(df.columns)):\n", + " if df.columns[i]==columnkey:\n", + " columnindex=i\n", + " return columnindex\n", + "\n", + "\n", + "def valuefromrowcolumn(df,rowkey,columnkey):\n", + " rowindex=getrowindex(df,rowkey)\n", + " columnindex=getcolumnindex(df,columnkey)\n", + " return df.values[rowindex][columnindex]\n", + "valuefromrowcolumn(incomedf,'California','2009')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GEOIDState200520062007200820092010201120122013
004000US01Alabama371503795242212444763998040933425904346441381
104000US02Alaska558915641862993639896160457848574316364861137
204000US04Arizona4524546657629933469144573946896486214704450602
304000US05Arkansas366583705740795395863653838587413023901839919
404000US06California517555531955734570145613454283533675702057528
\n", + "
" + ], + "text/plain": [ + " GEOID State 2005 2006 2007 2008 2009 2010 2011 \\\n", + "0 04000US01 Alabama 37150 37952 42212 44476 39980 40933 42590 \n", + "1 04000US02 Alaska 55891 56418 62993 63989 61604 57848 57431 \n", + "2 04000US04 Arizona 45245 46657 629933 46914 45739 46896 48621 \n", + "3 04000US05 Arkansas 36658 37057 40795 39586 36538 38587 41302 \n", + "4 04000US06 California 51755 55319 55734 57014 56134 54283 53367 \n", + "\n", + " 2012 2013 \n", + "0 43464 41381 \n", + "1 63648 61137 \n", + "2 47044 50602 \n", + "3 39018 39919 \n", + "4 57020 57528 " + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Function to update data based on rowkey and column key\n", + "def updatedatafromrowcolumn(df,rowkey,columnkey,newdata):\n", + " rowindex=getrowindex(df,rowkey)\n", + " columnindex=getcolumnindex(df,columnkey)\n", + " row=df.values[rowindex]\n", + " row[columnindex]=newdata\n", + " df.loc[rowindex]=row\n", + " return\n", + "\n", + "updatedatafromrowcolumn(incomedf,'Arizona','2007',629933)\n", + "incomedf" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "04000US02 Alaska 55891 56418 62993 63989 61604 57848 57431 63648 61137 " + ] + } + ], + "source": [ + "accessdataframerow(incomedf,62993)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "#Function to write dataframe to csv\n", + "\n", + "incomedf.to_csv(filepath,index=False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GEOIDState200520062007200820092010201120122013
004000US01Alabama371503795242212444763998040933425904346441381
104000US02Alaska558915641862993639896160457848574316364861137
204000US04Arizona4524546657629933469144573946896486214704450602
304000US05Arkansas366583705740795395863653838587413023901839919
404000US06California517555531955734570145613454283533675702057528
61234567891011
\n", + "
" + ], + "text/plain": [ + " GEOID State 2005 2006 2007 2008 2009 2010 2011 \\\n", + "0 04000US01 Alabama 37150 37952 42212 44476 39980 40933 42590 \n", + "1 04000US02 Alaska 55891 56418 62993 63989 61604 57848 57431 \n", + "2 04000US04 Arizona 45245 46657 629933 46914 45739 46896 48621 \n", + "3 04000US05 Arkansas 36658 37057 40795 39586 36538 38587 41302 \n", + "4 04000US06 California 51755 55319 55734 57014 56134 54283 53367 \n", + "6 1 2 3 4 5 6 7 8 9 \n", + "\n", + " 2012 2013 \n", + "0 43464 41381 \n", + "1 63648 61137 \n", + "2 47044 50602 \n", + "3 39018 39919 \n", + "4 57020 57528 \n", + "6 10 11 " + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Function to add a new row of data to dataframe\n", + "\n", + "def addrowdatadataframe(df, rowdata):\n", + " lastrowindex=len(df.values)-1\n", + " df.loc[lastrowindex+1] = rowdata\n", + " return\n", + "\n", + "\n", + "rowdata=[1,2,3,4,5,6,7,8,9,10,11]\n", + "addrowdatadataframe(incomedf,rowdata)\n", + "incomedf" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GEOIDState200520062007200820092010201120122013
004000US01Alabama371503795242212444763998040933425904346441381
104000US02Alaska558915641862993639896160457848574316364861137
204000US04Arizona4524546657629933469144573946896486214704450602
304000US05Arkansas366583705740795395863653838587413023901839919
404000US06California517555531955734570145613454283533675702057528
\n", + "
" + ], + "text/plain": [ + " GEOID State 2005 2006 2007 2008 2009 2010 2011 \\\n", + "0 04000US01 Alabama 37150 37952 42212 44476 39980 40933 42590 \n", + "1 04000US02 Alaska 55891 56418 62993 63989 61604 57848 57431 \n", + "2 04000US04 Arizona 45245 46657 629933 46914 45739 46896 48621 \n", + "3 04000US05 Arkansas 36658 37057 40795 39586 36538 38587 41302 \n", + "4 04000US06 California 51755 55319 55734 57014 56134 54283 53367 \n", + "\n", + " 2012 2013 \n", + "0 43464 41381 \n", + "1 63648 61137 \n", + "2 47044 50602 \n", + "3 39018 39919 \n", + "4 57020 57528 " + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Function to delete a row in a dataframe\n", + "def deleterowdataframe(df,rowkey):\n", + " rowindex=getrowindex(df,rowkey)\n", + " return df.drop([rowindex],axis=0)\n", + " \n", + "incomedf=deleterowdataframe(incomedf,1)\n", + "incomedf\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}