melanieriley · palmorezm · Mar 20, 2022 · Mar 20, 2022 · Mar 22, 2022 · Apr 18, 2022
diff --git a/.gitignore b/.gitignore
diff --git a/Financial/cleaning.R b/Financial/cleaning.R
@@ -0,0 +1,37 @@
+
+# Financial Data Prep 
+
+# Packages
+require(dplyr)
+require(stringr)
+
+# Source
+df <- read.csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vSNwyF0GHoS_VUYOzXkw4yWie44Zx_9rBJ5iXZesRgYpRcXnes8TMKWpIXyLs0YPEZcSp0E31BzAP6M/pub?gid=1021108104&single=true&output=csv")
+df$Date <- as.Date(df$Date)
+df$Cost <- as.numeric(stringr::str_remove_all(df$Cost, "\\$"))
+df$Category <- as.factor(df$Category)
+df$Account <- as.factor(df$Account)
+df <- df %>% 
+  dplyr::select("Location", "Date", "Description", 
+                "Cost", "Category", "Account") 
+df <- na.omit(df)
+
+df %>% 
+  filter(Category == "alcohol") %>% 
+  ggplot(aes(Cost)) + 
+  geom_histogram(fill = "white", col = "light blue", alpha = 0.5) +
+  theme_minimal()
+
+hist(df$Cost) # Needs to be fixed, current bin size is 0 - 50 with 800+ transactions
+as.Date.character() # Does this work to format the date or will we need to change the source data type? 
+# Location - Should this be a factor data type with lots of levels? We shop at a lot of the same stores
+# Category - needs an amount associated with each
+# Amount - needs an amount associated with each and needs fixing (extra 11 transactions under Zach
+#   would otherwise be unclassified)
+# Big picture - what do we want to gain from this as a shiny?
+#   Are there any particular questions that we ask each time we want to make a purchase that could save us money? 
+
+plot(df)
+summary(df)
+
+df[which(is.na(df)),]
diff --git a/Financial/dataprep.txt.txt b/Financial/dataprep.txt.txt
@@ -0,0 +1,37 @@
+
+# Financial Data Prep 
+
+# Packages
+require(dplyr)
+require(stringr)
+
+# Source
+df <- read.csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vSNwyF0GHoS_VUYOzXkw4yWie44Zx_9rBJ5iXZesRgYpRcXnes8TMKWpIXyLs0YPEZcSp0E31BzAP6M/pub?gid=1021108104&single=true&output=csv")
+df$Date <- as.Date(df$Date)
+df$Cost <- as.numeric(stringr::str_remove_all(df$Cost, "\\$"))
+df$Category <- as.factor(df$Category)
+df$Account <- as.factor(df$Account)
+df <- df %>% 
+  dplyr::select("Location", "Date", "Description", 
+                "Cost", "Category", "Account") 
+df <- na.omit(df)
+
+df %>% 
+  filter(Category == "alcohol") %>% 
+  ggplot(aes(Cost)) + 
+  geom_histogram(fill = "white", col = "light blue", alpha = 0.5) +
+  theme_minimal()
+
+hist(df$Cost) # Needs to be fixed, current bin size is 0 - 50 with 800+ transactions
+as.Date.character() # Does this work to format the date or will we need to change the source data type? 
+# Location - Should this be a factor data type with lots of levels? We shop at a lot of the same stores
+# Category - needs an amount associated with each
+# Amount - needs an amount associated with each and needs fixing (extra 11 transactions under Zach
+#   would otherwise be unclassified)
+# Big picture - what do we want to gain from this as a shiny?
+#   Are there any particular questions that we ask each time we want to make a purchase that could save us money? 
+
+plot(df)
+summary(df)
+
+df[which(is.na(df)),]
diff --git a/Outline.Rmd b/Outline.Rmd
diff --git a/Posts/index.qmd b/Posts/index.qmd
diff --git a/README.md b/README.md
@@ -1,2 +1,2 @@
 # Home
- What is the best place to live?
+Contains basics of the home directory from our shared drive with some project areas we can focus on like our financial projects, the home affordability model, cost of living estimator, the time project, and more.  
diff --git a/Vacation/LockScreenLocations.R b/Vacation/LockScreenLocations.R
@@ -8,7 +8,7 @@ locations <- read.delim2(file = file, header = F, sep = "\t")
 print(locations)
 
 
-#####
+##### Example text
 
 # Travel � List</span></p><p class=c1 c2><span class=c0></span>
 # </p><p class=c1><span class=c0>Lake Mezzola, Italy</span>
@@ -25,4 +25,65 @@ library(stringr)
 
 # Extract the characters that contains our locations
 
-str1 <- stringr::str_extract_all(locations, "class=c0>*</span>")
+# Attempts to reach 5e
+# str1 <- stringr::str_extract_all(locations, "class=c0>(.*?)</span>")
+# str2 <- stringr::str_extract_all(locations, "c0>(.*?)</")
+# pattern <- "class=c0>(.*?)</span>"
+# str3 <- regmatches(locations, regexec(pattern, locations)) # Nothing good
+# str4 <- stringr::str_extract_all(locations, ">(.*?)<")
+
+# Functioning Below: 
+str5 <- data.frame(stringr::str_extract_all(locations, ">(.*?)<"))
+str5a <- data.frame(stringr::str_remove_all(str5[1], ">"))
+str5b <- stringr::str_remove_all(str5[,1], ">")
+str5c <- stringr::str_remove_all(str5b, "<")
+str5d <- data.frame(str5c)
+# head(str5d, na.rm = T)
+str5d$str5c[which(str5d$str5c == "")] <- NA
+# sum(is.na(str5d$str5c))
+str5e <- na.omit(str5d)
+
+
+
+
+# Image Collection
+require(rvest)
+# require(purrr)
+require(httr)
+
+# To see an image of "london" in the url:
+# https://www.google.com/search?tbm=isch&q=london
+image_query_base <- "https://www.google.com/search?tbm=isch&q=" 
+str5e$str5c[[9]] # Where locations begin
+url <- paste0(image_query_base, str5e$str5c[[9]])
+res1 <- GET(url = url)
+
+# *Consider using selector gadget to select in CSS first 3 images using london example
+# then read in html nodes and collect image? 
+# from selector gadget: .Q4LuWd (gives all images on the page)
+
+html_res1 <- read_html(res1$content)
+
+read_html(sprintf(url, 1))
+
+images <- str5e(1:10, function(i) {
+
+  # simple but effective progress indicator
+  cat(".")
+
+  pg <- read_html(sprintf(url_base, i))
+
+  data.frame(wine=html_text(html_nodes(pg, ".review-listing .title")),
+             excerpt=html_text(html_nodes(pg, "div.excerpt")),
+             rating=gsub(" Points", "", html_text(html_nodes(pg, "span.rating"))),
+             appellation=html_text(html_nodes(pg, "span.appellation")),
+             price=gsub("\\$", "", html_text(html_nodes(pg, "span.price"))),
+             stringsAsFactors=FALSE)
+
+})
+
+dplyr::glimpse(wines)
+
+
+
+
diff --git a/cities.csv b/cities.csv
diff --git a/home_afford_app/.RData b/home_afford_app/.RData