diff --git a/.gitignore b/.gitignore index 2e5be45..1a3a8ce 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ Data/ # or .Rmd files *.pdf *.tex + diff --git a/Data/Crime/Crime.CPG b/Data/Crime/Crime.CPG new file mode 100644 index 0000000..3ad133c --- /dev/null +++ b/Data/Crime/Crime.CPG @@ -0,0 +1 @@ +UTF-8 \ No newline at end of file diff --git a/Data/Crime/Crime.dbf b/Data/Crime/Crime.dbf new file mode 100644 index 0000000..8556a17 Binary files /dev/null and b/Data/Crime/Crime.dbf differ diff --git a/Data/Crime/Crime.docx b/Data/Crime/Crime.docx new file mode 100644 index 0000000..4a4c5b5 Binary files /dev/null and b/Data/Crime/Crime.docx differ diff --git a/Data/Crime/Crime.prj b/Data/Crime/Crime.prj new file mode 100644 index 0000000..909bedd --- /dev/null +++ b/Data/Crime/Crime.prj @@ -0,0 +1 @@ +PROJCS["NAD_1983_HARN_StatePlane_Washington_North_FIPS_4601_Feet",GEOGCS["GCS_North_American_1983_HARN",DATUM["D_North_American_1983_HARN",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Lambert_Conformal_Conic"],PARAMETER["False_Easting",1640416.666666667],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",-120.8333333333333],PARAMETER["Standard_Parallel_1",47.5],PARAMETER["Standard_Parallel_2",48.73333333333333],PARAMETER["Latitude_Of_Origin",47.0],UNIT["Foot_US",0.3048006096012192]],VERTCS["NAVD_1988",VDATUM["North_American_Vertical_Datum_1988"],PARAMETER["Vertical_Shift",0.0],PARAMETER["Direction",1.0],UNIT["Foot_US",0.3048006096012192]] \ No newline at end of file diff --git a/Data/Crime/Crime.sbn b/Data/Crime/Crime.sbn new file mode 100644 index 0000000..29b577d Binary files /dev/null and b/Data/Crime/Crime.sbn differ diff --git a/Data/Crime/Crime.sbx b/Data/Crime/Crime.sbx new file mode 100644 index 0000000..e3a4a8b Binary files /dev/null and b/Data/Crime/Crime.sbx differ diff --git a/Spokane_CrimeData_Neighborhood.zip b/Data/Crime/Crime.shp similarity index 50% rename from Spokane_CrimeData_Neighborhood.zip rename to Data/Crime/Crime.shp index feb1000..aa9bff3 100644 Binary files a/Spokane_CrimeData_Neighborhood.zip and b/Data/Crime/Crime.shp differ diff --git a/Data/Crime/Crime.shx b/Data/Crime/Crime.shx new file mode 100644 index 0000000..05122a9 Binary files /dev/null and b/Data/Crime/Crime.shx differ diff --git a/Data/Neighborhood/Neighborhood.cpg b/Data/Neighborhood/Neighborhood.cpg new file mode 100644 index 0000000..3ad133c --- /dev/null +++ b/Data/Neighborhood/Neighborhood.cpg @@ -0,0 +1 @@ +UTF-8 \ No newline at end of file diff --git a/Data/Neighborhood/Neighborhood.dbf b/Data/Neighborhood/Neighborhood.dbf new file mode 100644 index 0000000..345598e Binary files /dev/null and b/Data/Neighborhood/Neighborhood.dbf differ diff --git a/Data/Neighborhood/Neighborhood.docx b/Data/Neighborhood/Neighborhood.docx new file mode 100644 index 0000000..a0562e4 Binary files /dev/null and b/Data/Neighborhood/Neighborhood.docx differ diff --git a/Data/Neighborhood/Neighborhood.prj b/Data/Neighborhood/Neighborhood.prj new file mode 100644 index 0000000..909bedd --- /dev/null +++ b/Data/Neighborhood/Neighborhood.prj @@ -0,0 +1 @@ +PROJCS["NAD_1983_HARN_StatePlane_Washington_North_FIPS_4601_Feet",GEOGCS["GCS_North_American_1983_HARN",DATUM["D_North_American_1983_HARN",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Lambert_Conformal_Conic"],PARAMETER["False_Easting",1640416.666666667],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",-120.8333333333333],PARAMETER["Standard_Parallel_1",47.5],PARAMETER["Standard_Parallel_2",48.73333333333333],PARAMETER["Latitude_Of_Origin",47.0],UNIT["Foot_US",0.3048006096012192]],VERTCS["NAVD_1988",VDATUM["North_American_Vertical_Datum_1988"],PARAMETER["Vertical_Shift",0.0],PARAMETER["Direction",1.0],UNIT["Foot_US",0.3048006096012192]] \ No newline at end of file diff --git a/Data/Neighborhood/Neighborhood.sbn b/Data/Neighborhood/Neighborhood.sbn new file mode 100644 index 0000000..e9a3116 Binary files /dev/null and b/Data/Neighborhood/Neighborhood.sbn differ diff --git a/Data/Neighborhood/Neighborhood.sbx b/Data/Neighborhood/Neighborhood.sbx new file mode 100644 index 0000000..b889bf2 Binary files /dev/null and b/Data/Neighborhood/Neighborhood.sbx differ diff --git a/Data/Neighborhood/Neighborhood.shp b/Data/Neighborhood/Neighborhood.shp new file mode 100644 index 0000000..a6aef81 Binary files /dev/null and b/Data/Neighborhood/Neighborhood.shp differ diff --git a/Data/Neighborhood/Neighborhood.shx b/Data/Neighborhood/Neighborhood.shx new file mode 100644 index 0000000..966c1a0 Binary files /dev/null and b/Data/Neighborhood/Neighborhood.shx differ diff --git a/Data/spokanepolicestations.csv b/Data/spokanepolicestations.csv new file mode 100644 index 0000000..610f082 --- /dev/null +++ b/Data/spokanepolicestations.csv @@ -0,0 +1 @@ +Row,Lat,Lng 1,47.655343,-117.420501 2,47.665633,-117.428998 3,47.658928,-117.429513 4,47.665286,-117.428998 5,47.676037,-117.422475 6,47.700997,-117.402734 7,47.713703,-117.405137 8,47.705618,-117.365140 9,47.656268,-117.379045 10,47.667252,-117.439985 11,47.628279,-117.369646 \ No newline at end of file diff --git a/Station_Clustering/crime_location_by_year.Rmd b/Station_Clustering/crime_location_by_year.Rmd new file mode 100644 index 0000000..638675e --- /dev/null +++ b/Station_Clustering/crime_location_by_year.Rmd @@ -0,0 +1,42 @@ +--- +title: "Spokane Crime Location by Year" +output: html_document +--- + +```{r} +library(maps) +library(ggmap) +library(lubridate) +library(dplyr) + +# Produce a scatterplot with crime colored by offense type for each year +# ====================================================================== +Crime_byNeighborhood <- read.csv('Data/Spokane_CrimeData_Neighborhood.csv') # Read in the data + # This will be updated + # when ezknitr is + # included + +#police <- read.csv('../Data/spokanepolicestations.csv') # Read in police station locations +police <- data.frame(lat=c(47.665534),lng=c(-117.429014)) + +crime <- tbl_df(data.frame(Crime_byNeighborhood)) %>% # Convert to local dataframe for easy printing + mutate(BEGINDATE = as.Date(BEGINDATE,format="%Y/%m/%d"), # Date conversion + ENDDATE = as.Date(ENDDATE,format="%Y/%m/%d"), + year = year(BEGINDATE)) # Extract year + +lm.lng <- lm(Lng~coords.x1,data=crime %>% # Linear models to convert x/y to lat/long (change) + filter(!is.na(Lat),!is.na(Lng))) +lm.lat <- lm(Lat~coords.x2,data=crime %>% + filter(!is.na(Lat),!is.na(Lng))) + +crime <- crime %>% mutate(Lat=predict(lm.lat,newdata=crime), # Use predict to update lat/long + Lng=predict(lm.lng,newdata=crime)) + + +ggplot(county_map[county_map$subregion=="spokane",],aes(x=long, y=lat)) + geom_polygon(colour="grey",fill="grey") + geom_point(data=crime %>% filter(year>2007) %>% sample_n(20000),aes(Lng,Lat,colour=OFFGEN)) + geom_point(data=police,aes(lng,lat),colour="black",size=2) + facet_wrap(~year) + +ggplot() + geom_point(data=crime %>% filter(year>2007) %>% sample_n(20000),aes(longitude,latitude,colour=OFFGEN)) + geom_point(data=police,aes(lng,lat),colour="black",size=2) + facet_wrap(~year) + + +ggplot() +geom_point(data=crime %>% filter(year>2007) %>% sample_n(200),aes(coords.x1,coords.x2,colour=OFFGEN)) + facet_wrap(~year) +``` diff --git a/annualplot1.png b/annualplot1.png new file mode 100644 index 0000000..9aae319 Binary files /dev/null and b/annualplot1.png differ diff --git a/annualplot10.png b/annualplot10.png new file mode 100644 index 0000000..93defe8 Binary files /dev/null and b/annualplot10.png differ diff --git a/annualplot2.png b/annualplot2.png new file mode 100644 index 0000000..85b09f1 Binary files /dev/null and b/annualplot2.png differ diff --git a/annualplot3.png b/annualplot3.png new file mode 100644 index 0000000..4686471 Binary files /dev/null and b/annualplot3.png differ diff --git a/annualplot4.png b/annualplot4.png new file mode 100644 index 0000000..8d46638 Binary files /dev/null and b/annualplot4.png differ diff --git a/annualplot5.png b/annualplot5.png new file mode 100644 index 0000000..3c13025 Binary files /dev/null and b/annualplot5.png differ diff --git a/annualplot6.png b/annualplot6.png new file mode 100644 index 0000000..9dc48b1 Binary files /dev/null and b/annualplot6.png differ diff --git a/annualplot7.png b/annualplot7.png new file mode 100644 index 0000000..8cbcdd2 Binary files /dev/null and b/annualplot7.png differ diff --git a/annualplot8.png b/annualplot8.png new file mode 100644 index 0000000..477f81f Binary files /dev/null and b/annualplot8.png differ diff --git a/annualplot9.png b/annualplot9.png new file mode 100644 index 0000000..56a504a Binary files /dev/null and b/annualplot9.png differ diff --git a/assaultoveryears.png b/assaultoveryears.png new file mode 100644 index 0000000..9127e0e Binary files /dev/null and b/assaultoveryears.png differ diff --git a/basecode.R b/basecode.R index 2ffa312..d5deb19 100644 --- a/basecode.R +++ b/basecode.R @@ -1,16 +1,16 @@ -#this is an R script to import Spokane City crime data, merge it with Neighborhoods, and export it as a flat file -rm(list=ls()) -library(rgdal) -library(maptools) - -#reading in Crime shapefile -Crime=readOGR(dsn="Crime",layer="Crime") -#reading in Neighborhood shapefile -Neighborhood=readOGR(dsn="Neighborhood",layer="Neighborhood") - -#Extracting Neighborhood designation for each point by location -Crime_byNeighborhood=over(Crime,Neighborhood) -#appending rest of data to each point -Crime_byNeighborhood=spCbind(Crime,Crime_byNeighborhood) -#saving resulting dataset as csv -write.csv(file="Spokane_CrimeData_Neighborhood.csv",Crime_byNeighborhood) +#this is an R script to import Spokane City crime data, merge it with Neighborhoods, and export it as a flat file +rm(list=ls()) +library(rgdal) +library(maptools) + +#reading in Crime shapefile +Crime=readOGR(dsn="Crime",layer="Crime") +#reading in Neighborhood shapefile +Neighborhood=readOGR(dsn="Neighborhood",layer="Neighborhood") + +#Extracting Neighborhood designation for each point by location +Crime_byNeighborhood=over(Crime,Neighborhood) +#appending rest of data to each point +Crime_byNeighborhood=spCbind(Crime,Crime_byNeighborhood) +#saving resulting dataset as csv +write.csv(file="/Data/Spokane_CrimeData_Neighborhood.csv",Crime_byNeighborhood) diff --git a/figure/unnamed-chunk-3-1.png b/figure/unnamed-chunk-3-1.png new file mode 100644 index 0000000..ad3d69c Binary files /dev/null and b/figure/unnamed-chunk-3-1.png differ diff --git a/figure/unnamed-chunk-4-1.png b/figure/unnamed-chunk-4-1.png new file mode 100644 index 0000000..0840dbb Binary files /dev/null and b/figure/unnamed-chunk-4-1.png differ diff --git a/figure/unnamed-chunk-5-1.png b/figure/unnamed-chunk-5-1.png new file mode 100644 index 0000000..06f1b05 Binary files /dev/null and b/figure/unnamed-chunk-5-1.png differ diff --git a/spokanecrimedata.Rmd b/spokanecrimedata.Rmd new file mode 100644 index 0000000..2e94530 --- /dev/null +++ b/spokanecrimedata.Rmd @@ -0,0 +1,130 @@ +--- +title: "Spokane Crime - Exploring Data" +author: "Patil" +date: "September 26, 2016" +output: html_document + +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +# Reading data + +```{r} +# Arrive at Crime_byNeighborhood using earlier code that was originated by Krisztian +# crimedfonly=data.frame(Crime_byNeighborhood) +# save(crimedfonly,file="Data/Crime_byNeighborhood.Rda") + +load("Data/Crime_byNeighborhood.Rda") +str(crimedfonly) +``` + +# Calling on a few packages + +```{r} +suppressPackageStartupMessages(library(ggplot2)) +suppressPackageStartupMessages(library(dplyr)) +suppressPackageStartupMessages(library(lubridate)) +library(leaflet) +library(ggmap) + +``` + +# Total offenses + +```{r} +crimedfonly %>% group_by(OFFGEN)%>%summarise(Counts=length(OFFGEN))%>% ggplot(.,aes(reorder(OFFGEN,Counts),Counts))+geom_bar(stat="identity")+geom_text(aes(x=OFFGEN,y=Counts+2000,label=Counts))+coord_flip()+theme_bw()+labs(y="Count",x="") + +``` + +# Number of times offenses occured in different neighborhoods + +```{r} + +crimedfonly %>% group_by(Name)%>%summarise(Counts=length(Name))%>%filter(Name!="")%>% ggplot(.,aes(reorder(Name,Counts),Counts))+geom_bar(stat="identity")+geom_text(aes(x=Name,y=Counts+1000,label=Counts))+coord_flip()+theme_bw()+labs(y="Count",x="") +``` + +## Offenses in different neighborhoods + +```{r fig.width=10, fig.height=6} +# A shiny app will do well here + +crimedfonly %>% group_by(Name,OFFGEN)%>%summarise(Counts=length(OFFGEN))%>%filter(Name!="")%>% ggplot(.,aes(OFFGEN,Counts))+geom_bar(stat="identity")+facet_wrap(~Name,scales="free") + +``` + +## Messing with Dates + +```{r} +# Adds 9 columns to the dataset + +crimedfonly=crimedfonly[,1:11] + +# Beginning dates parsed + +crimedfonly$beginyear=year(ymd(crimedfonly$BEGINDATE)) +crimedfonly$beginmonth=month(ymd(crimedfonly$BEGINDATE),label=TRUE)# label parameter inserts name of month instead of number +crimedfonly$begindate=day(ymd(crimedfonly$BEGINDATE)) +crimedfonly$beginday=wday(ymd(crimedfonly$BEGINDATE)) # label parameter inserts day of week instead of number of the day in a 7-day week + +# Ending dates parsed + +crimedfonly$endyear=year(ymd(crimedfonly$ENDDATE)) +crimedfonly$endmonth=month(ymd(crimedfonly$ENDDATE),label=TRUE) +crimedfonly$enddate=day(ymd(crimedfonly$ENDDATE)) +crimedfonly$endday=wday(ymd(crimedfonly$ENDDATE),label=TRUE) + +# duration, in days, between starting and ending days +crimedfonly$durationdays= (as.duration(ymd(crimedfonly$ENDDATE)-ymd(crimedfonly$BEGINDATE)))/ddays(1) + +head(crimedfonly) + +``` + +```{r eval=FALSE} +crimedfonly=crimedfonly[!is.na(crimedfonly$Lat),] +crimedfonly=crimedfonly[!is.na(crimedfonly$Lng),] +crime2008=crimedfonly[crimedfonly$beginyear==2008,] + +location=c(-117.402209,47.665330) +map=get_map(location,source="osm", color="bw",zoom=11) + +for (i in 1:10){ + + +crimedfsub=crimedfonly[crimedfonly$OFFGEN==levels(crimedfonly$OFFGEN)[i],] +ggmap(map)+geom_point(data=crimedfsub,aes(Lng,Lat),color="red")+facet_wrap(~beginyear)+theme(panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + axis.text = element_blank(),axis.title = element_blank(), + axis.ticks = element_blank())+ggtitle(paste(levels(crimedfonly$OFFGEN)[i],"Over the Years")) + +ggsave(file=paste("annualplot",i,".png",sep=""),scale=4) +} + +``` + + + + + +```{r eval=FALSE} +pal <- colorFactor(rainbow(10), domain = levels(crime2008$OFFGEN)) +crimedfonly=crimedfonly[!is.na(crimedfonly$Lat),] +crimedfonly=crimedfonly[!is.na(crimedfonly$Lng),] +crime2008=crimedfonly[crimedfonly$beginyear==2008,] +levels(crime2008$OFFGEN) +leaflet(data=crime2008)%>%addTiles()%>%addCircleMarkers(color =~pal(OFFGEN),popup=~LOCATION) +``` + + + +# Few low hanging fruits to pick next + + + +* plots of trends for years, months, weeks, days, offense types +* mapping stuff by different variables + + diff --git a/spokanecrimedata.html b/spokanecrimedata.html new file mode 100644 index 0000000..633e43d --- /dev/null +++ b/spokanecrimedata.html @@ -0,0 +1,342 @@ + + + + + + + + + + + + + + + +Spokane Crime - Exploring Data + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + +
+

Reading data

+
# Arrive at Crime_byNeighborhood using earlier code that was originated by Krisztian
+# crimedfonly=data.frame(Crime_byNeighborhood)
+# save(crimedfonly,file="Data/Crime_byNeighborhood.Rda")
+
+load("Data/Crime_byNeighborhood.Rda")
+str(crimedfonly)
+
## 'data.frame':    216341 obs. of  11 variables:
+##  $ OFFENSE  : Factor w/ 43 levels "ARSON","ASSAULT-SIMPLE",..: 27 23 23 8 37 37 23 37 37 27 ...
+##  $ OFFGEN   : Factor w/ 10 levels "Arson","Assault",..: 8 5 5 2 9 9 5 9 9 8 ...
+##  $ BEGINDATE: Factor w/ 3341 levels "2000/01/01","2000/07/14",..: 999 1066 196 198 192 199 200 199 201 201 ...
+##  $ ENDDATE  : Factor w/ 3259 levels "2000/12/01","2001/01/01",..: 914 982 112 113 107 115 115 115 116 116 ...
+##  $ LOCATION : Factor w/ 12021 levels "0 Block Of E 13Th Av",..: 10784 2688 10290 720 10696 353 5879 9549 965 11116 ...
+##  $ Name     : Factor w/ 28 levels "Audubon/Downriver",..: NA NA 16 20 18 16 14 16 6 15 ...
+##  $ Lat      : num  NA NA 47.6 47.7 47.7 ...
+##  $ Lng      : num  NA NA -117 -117 -117 ...
+##  $ coords.x1: num  2477294 2480061 2481088 2464135 2485380 ...
+##  $ coords.x2: num  361936 299239 252062 291819 287599 ...
+##  $ optional : logi  TRUE TRUE TRUE TRUE TRUE TRUE ...
+
+
+

Calling on a few packages

+
suppressPackageStartupMessages(library(ggplot2))
+suppressPackageStartupMessages(library(dplyr))
+suppressPackageStartupMessages(library(lubridate))
+
+
+

Total offenses

+
crimedfonly %>% group_by(OFFGEN)%>%summarise(Counts=length(OFFGEN))%>% ggplot(.,aes(reorder(OFFGEN,Counts),Counts))+geom_bar(stat="identity")+geom_text(aes(x=OFFGEN,y=Counts+2000,label=Counts))+coord_flip()+theme_bw()+labs(y="Count",x="")
+

+
+
+

Number of times offenses occured in different neighborhoods

+
crimedfonly %>% group_by(Name)%>%summarise(Counts=length(Name))%>%filter(Name!="")%>% ggplot(.,aes(reorder(Name,Counts),Counts))+geom_bar(stat="identity")+geom_text(aes(x=Name,y=Counts+1000,label=Counts))+coord_flip()+theme_bw()+labs(y="Count",x="")
+

+
+

Offenses in different neighborhoods

+
# A shiny app will do well here
+
+crimedfonly %>% group_by(Name,OFFGEN)%>%summarise(Counts=length(OFFGEN))%>%filter(Name!="")%>% ggplot(.,aes(OFFGEN,Counts))+geom_bar(stat="identity")+facet_wrap(~Name,scales="free")
+

+
+
+

Messing with Dates

+
# Adds 9 columns to the dataset
+
+crimedfonly=crimedfonly[,1:11]
+
+# Beginning dates parsed
+
+crimedfonly$beginyear=year(ymd(crimedfonly$BEGINDATE))
+crimedfonly$beginmonth=month(ymd(crimedfonly$BEGINDATE),label=TRUE)# label parameter inserts name of month instead of number
+crimedfonly$begindate=day(ymd(crimedfonly$BEGINDATE)) 
+crimedfonly$beginday=wday(ymd(crimedfonly$BEGINDATE)) # label parameter inserts day of week instead of number of the day in a 7-day week
+
+# Ending dates parsed
+
+crimedfonly$endyear=year(ymd(crimedfonly$ENDDATE))
+crimedfonly$endmonth=month(ymd(crimedfonly$ENDDATE),label=TRUE)
+crimedfonly$enddate=day(ymd(crimedfonly$ENDDATE))
+crimedfonly$endday=wday(ymd(crimedfonly$ENDDATE),label=TRUE)
+
+# duration, in days, between starting and ending days
+crimedfonly$durationdays= (as.duration(ymd(crimedfonly$ENDDATE)-ymd(crimedfonly$BEGINDATE)))/ddays(1)
+
+head(crimedfonly)
+
##              OFFENSE             OFFGEN  BEGINDATE    ENDDATE
+## 1              THEFT              Theft 2010/03/12 2010/03/12
+## 2 MALICIOUS MISCHIEF Malicious Mischief 2010/05/18 2010/05/19
+## 3 MALICIOUS MISCHIEF Malicious Mischief 2007/12/30 2007/12/31
+## 4   ASSAULT 4TH/CITY            Assault 2008/01/01 2008/01/01
+## 5          VEH-PROWL   Vehicle Prowling 2007/12/26 2007/12/26
+## 6          VEH-PROWL   Vehicle Prowling 2008/01/02 2008/01/03
+##                       LOCATION               Name      Lat       Lng
+## 1       800 Block Of S Thor St               <NA>       NA        NA
+## 2   1800 Block Of W Gardner Av               <NA>       NA        NA
+## 3       700 Block Of W 17Th Av Manito/Cannon Hill 47.63393 -117.4179
+## 4 10200 Block Of N Seminole Dr North Indian Trail 47.74563 -117.4911
+## 5  800 Block Of E Magnesium Rd  Nevada/Lidgerwood 47.71833 -117.3989
+## 6       100 Block Of W 17Th Av Manito/Cannon Hill 47.63393 -117.4179
+##   coords.x1 coords.x2 optional beginyear beginmonth begindate beginday
+## 1   2477294    361936     TRUE      2010        Mar        12        6
+## 2   2480061    299239     TRUE      2010        May        18        3
+## 3   2481088    252062     TRUE      2007        Dec        30        1
+## 4   2464135    291819     TRUE      2008        Jan         1        3
+## 5   2485380    287599     TRUE      2007        Dec        26        4
+## 6   2483348    252177     TRUE      2008        Jan         2        4
+##   endyear endmonth enddate endday durationdays
+## 1    2010      Mar      12    Fri            0
+## 2    2010      May      19    Wed            1
+## 3    2007      Dec      31    Mon            1
+## 4    2008      Jan       1   Tues            0
+## 5    2007      Dec      26    Wed            0
+## 6    2008      Jan       3  Thurs            1
+
+
+
+

Few low hanging fruits to pick next

+ +
+ + + + +
+ + + + + + + + diff --git a/spokanecrimedata.md b/spokanecrimedata.md new file mode 100644 index 0000000..345fc39 --- /dev/null +++ b/spokanecrimedata.md @@ -0,0 +1,144 @@ +--- +title: "Spokane Crime - Exploring Data" +author: "Patil" +date: "September 24, 2016" +output: html_document + +--- + + + +# Reading data + + +```r +# Arrive at Crime_byNeighborhood using earlier code that was originated by Krisztian +# crimedfonly=data.frame(Crime_byNeighborhood) +# save(crimedfonly,file="Data/Crime_byNeighborhood.Rda") + +load("Data/Crime_byNeighborhood.Rda") +str(crimedfonly) +``` + +``` +## 'data.frame': 216341 obs. of 11 variables: +## $ OFFENSE : Factor w/ 43 levels "ARSON","ASSAULT-SIMPLE",..: 27 23 23 8 37 37 23 37 37 27 ... +## $ OFFGEN : Factor w/ 10 levels "Arson","Assault",..: 8 5 5 2 9 9 5 9 9 8 ... +## $ BEGINDATE: Factor w/ 3341 levels "2000/01/01","2000/07/14",..: 999 1066 196 198 192 199 200 199 201 201 ... +## $ ENDDATE : Factor w/ 3259 levels "2000/12/01","2001/01/01",..: 914 982 112 113 107 115 115 115 116 116 ... +## $ LOCATION : Factor w/ 12021 levels "0 Block Of E 13Th Av",..: 10784 2688 10290 720 10696 353 5879 9549 965 11116 ... +## $ Name : Factor w/ 28 levels "Audubon/Downriver",..: NA NA 16 20 18 16 14 16 6 15 ... +## $ Lat : num NA NA 47.6 47.7 47.7 ... +## $ Lng : num NA NA -117 -117 -117 ... +## $ coords.x1: num 2477294 2480061 2481088 2464135 2485380 ... +## $ coords.x2: num 361936 299239 252062 291819 287599 ... +## $ optional : logi TRUE TRUE TRUE TRUE TRUE TRUE ... +``` + +# Calling on a few packages + + +```r +suppressPackageStartupMessages(library(ggplot2)) +suppressPackageStartupMessages(library(dplyr)) +suppressPackageStartupMessages(library(lubridate)) +``` + +# Total offenses + + +```r +crimedfonly %>% group_by(OFFGEN)%>%summarise(Counts=length(OFFGEN))%>% ggplot(.,aes(reorder(OFFGEN,Counts),Counts))+geom_bar(stat="identity")+geom_text(aes(x=OFFGEN,y=Counts+2000,label=Counts))+coord_flip()+theme_bw()+labs(y="Count",x="") +``` + +![plot of chunk unnamed-chunk-3](figure/unnamed-chunk-3-1.png) + +# Number of times offenses occured in different neighborhoods + + +```r +crimedfonly %>% group_by(Name)%>%summarise(Counts=length(Name))%>%filter(Name!="")%>% ggplot(.,aes(reorder(Name,Counts),Counts))+geom_bar(stat="identity")+geom_text(aes(x=Name,y=Counts+1000,label=Counts))+coord_flip()+theme_bw()+labs(y="Count",x="") +``` + +![plot of chunk unnamed-chunk-4](figure/unnamed-chunk-4-1.png) + +## Offenses in different neighborhoods + + +```r +# A shiny app will do well here + +crimedfonly %>% group_by(Name,OFFGEN)%>%summarise(Counts=length(OFFGEN))%>%filter(Name!="")%>% ggplot(.,aes(OFFGEN,Counts))+geom_bar(stat="identity")+facet_wrap(~Name,scales="free") +``` + +![plot of chunk unnamed-chunk-5](figure/unnamed-chunk-5-1.png) + +## Messing with Dates + + +```r +# Adds 9 columns to the dataset + +crimedfonly=crimedfonly[,1:11] + +# Beginning dates parsed + +crimedfonly$beginyear=year(ymd(crimedfonly$BEGINDATE)) +crimedfonly$beginmonth=month(ymd(crimedfonly$BEGINDATE),label=TRUE)# label parameter inserts name of month instead of number +crimedfonly$begindate=day(ymd(crimedfonly$BEGINDATE)) +crimedfonly$beginday=wday(ymd(crimedfonly$BEGINDATE)) # label parameter inserts day of week instead of number of the day in a 7-day week + +# Ending dates parsed + +crimedfonly$endyear=year(ymd(crimedfonly$ENDDATE)) +crimedfonly$endmonth=month(ymd(crimedfonly$ENDDATE),label=TRUE) +crimedfonly$enddate=day(ymd(crimedfonly$ENDDATE)) +crimedfonly$endday=wday(ymd(crimedfonly$ENDDATE),label=TRUE) + +# duration, in days, between starting and ending days +crimedfonly$durationdays= (as.duration(ymd(crimedfonly$ENDDATE)-ymd(crimedfonly$BEGINDATE)))/ddays(1) + +head(crimedfonly) +``` + +``` +## OFFENSE OFFGEN BEGINDATE ENDDATE +## 1 THEFT Theft 2010/03/12 2010/03/12 +## 2 MALICIOUS MISCHIEF Malicious Mischief 2010/05/18 2010/05/19 +## 3 MALICIOUS MISCHIEF Malicious Mischief 2007/12/30 2007/12/31 +## 4 ASSAULT 4TH/CITY Assault 2008/01/01 2008/01/01 +## 5 VEH-PROWL Vehicle Prowling 2007/12/26 2007/12/26 +## 6 VEH-PROWL Vehicle Prowling 2008/01/02 2008/01/03 +## LOCATION Name Lat Lng +## 1 800 Block Of S Thor St NA NA +## 2 1800 Block Of W Gardner Av NA NA +## 3 700 Block Of W 17Th Av Manito/Cannon Hill 47.63393 -117.4179 +## 4 10200 Block Of N Seminole Dr North Indian Trail 47.74563 -117.4911 +## 5 800 Block Of E Magnesium Rd Nevada/Lidgerwood 47.71833 -117.3989 +## 6 100 Block Of W 17Th Av Manito/Cannon Hill 47.63393 -117.4179 +## coords.x1 coords.x2 optional beginyear beginmonth begindate beginday +## 1 2477294 361936 TRUE 2010 Mar 12 6 +## 2 2480061 299239 TRUE 2010 May 18 3 +## 3 2481088 252062 TRUE 2007 Dec 30 1 +## 4 2464135 291819 TRUE 2008 Jan 1 3 +## 5 2485380 287599 TRUE 2007 Dec 26 4 +## 6 2483348 252177 TRUE 2008 Jan 2 4 +## endyear endmonth enddate endday durationdays +## 1 2010 Mar 12 Fri 0 +## 2 2010 May 19 Wed 1 +## 3 2007 Dec 31 Mon 1 +## 4 2008 Jan 1 Tues 0 +## 5 2007 Dec 26 Wed 0 +## 6 2008 Jan 3 Thurs 1 +``` + + + +# Few low hanging fruits to pick next + + + +* plots of trends for years, months, weeks, days, offense types +* mapping stuff by different variables + + diff --git a/spokanecrimedata_files/figure-html/unnamed-chunk-3-1.png b/spokanecrimedata_files/figure-html/unnamed-chunk-3-1.png new file mode 100644 index 0000000..04165d5 Binary files /dev/null and b/spokanecrimedata_files/figure-html/unnamed-chunk-3-1.png differ diff --git a/spokanecrimedata_files/figure-html/unnamed-chunk-4-1.png b/spokanecrimedata_files/figure-html/unnamed-chunk-4-1.png new file mode 100644 index 0000000..bed9132 Binary files /dev/null and b/spokanecrimedata_files/figure-html/unnamed-chunk-4-1.png differ diff --git a/spokanecrimedata_files/figure-html/unnamed-chunk-5-1.png b/spokanecrimedata_files/figure-html/unnamed-chunk-5-1.png new file mode 100644 index 0000000..34cf4e5 Binary files /dev/null and b/spokanecrimedata_files/figure-html/unnamed-chunk-5-1.png differ