-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcorrelations.R
More file actions
59 lines (58 loc) · 2.26 KB
/
correlations.R
File metadata and controls
59 lines (58 loc) · 2.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
library(dplyr)
library(tidyr)
library(magrittr)
library(ggplot2)
library(ggthemr)
library(grid)
housingUnits <- data.frame(County=character(),
"Total Housing Units"=integer(),
Year=character(),
stringsAsFactors=FALSE)
for (year in 2010:2017) {
curHousingUnits <- read.csv(paste("E5", year, ".csv", sep=""), skip=3, nrows=58)
colnames(curHousingUnits)[1] <- "County"
colnames(curHousingUnits)[5] <- "Total Housing Units"
curHousingUnits <- curHousingUnits[,1:13]
curHousingUnits$Year = year
curHousingUnits$`Total Housing Units` <- as.numeric(gsub(",","",curHousingUnits$`Total Housing Units`))
housingUnits %<>% rbind(curHousingUnits)
}
medinc <- readRDS("MedianIncome.RDS")
medhome <- readRDS("MedianHomePrice.RDS")
hai <- read.csv("HistoricalHAI.csv", skip=3)
haiyear <- read.csv("HAIYearly.csv")
View(haiyear)
for (year in 1991:2017) {
if (year <= 2005) {
data <- hai[seq((year - 1991)*12+1, (year-1991)*12+12,1), -1]
haiyear[year-1990,-1] <- colMeans(data, na.rm=TRUE)
} else {
data <- hai[seq(182+ (year - 2006) * 4, 186 + (year-2006)*4), -1]
haiyear[year-1990, -1] <- colMeans(data, na.rm=TRUE)
}
}
colnames(haiyear) <- gsub(".", " ", colnames(haiyear), fixed=TRUE)
buildingpermits <- readRDS("BuildingPermits.RDS")
View(buildingpermits)
rentalprice <- readRDS("RentalPrice.RDS") %>% subset(State == "CA")
View(rentalprice)
correlations <- data.frame(matrix(ncol = 14, nrow = 58))
colnames(correlations)[1:2] <- c("County","Building Permits")
colnames(correlations)[3:14] <- colnames(housingUnits)[2:13]
for (i in 1:43) {
hai <- haiyear[,2+i]
countyname <- colnames(haiyear)[2+i]
ind = which(correlations$County == countyname)
print(ind)
permits <- c(rep(NA, 10), as.numeric(gsub(",","",buildingpermits[i,-1])))
correlations[ind,2] <- cor(permits,hai, use="complete.obs")
for (j in 1:12) {
feature <- c(rep(NA,19), as.numeric(gsub("%", "", gsub(",", "", subset(housingUnits, County == countyname)[,1+j]), fixed=TRUE)))
correlations[ind, j+2] <- cor(feature, hai, use="complete.obs")
}
}
data <- data.frame(matrix(ncol = 14, nrow = 58))
colnames(data)[1] <- "County"
data$County <- buildingpermits$County
colMeans(correlations[,-1], na.rm=TRUE)
View(correlations)