Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified .gitignore
Binary file not shown.
37 changes: 37 additions & 0 deletions Financial/cleaning.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@

# Financial Data Prep

# Packages
require(dplyr)
require(stringr)

# Source
df <- read.csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vSNwyF0GHoS_VUYOzXkw4yWie44Zx_9rBJ5iXZesRgYpRcXnes8TMKWpIXyLs0YPEZcSp0E31BzAP6M/pub?gid=1021108104&single=true&output=csv")
df$Date <- as.Date(df$Date)
df$Cost <- as.numeric(stringr::str_remove_all(df$Cost, "\\$"))
df$Category <- as.factor(df$Category)
df$Account <- as.factor(df$Account)
df <- df %>%
dplyr::select("Location", "Date", "Description",
"Cost", "Category", "Account")
df <- na.omit(df)

df %>%
filter(Category == "alcohol") %>%
ggplot(aes(Cost)) +
geom_histogram(fill = "white", col = "light blue", alpha = 0.5) +
theme_minimal()

hist(df$Cost) # Needs to be fixed, current bin size is 0 - 50 with 800+ transactions
as.Date.character() # Does this work to format the date or will we need to change the source data type?
# Location - Should this be a factor data type with lots of levels? We shop at a lot of the same stores
# Category - needs an amount associated with each
# Amount - needs an amount associated with each and needs fixing (extra 11 transactions under Zach
# would otherwise be unclassified)
# Big picture - what do we want to gain from this as a shiny?
# Are there any particular questions that we ask each time we want to make a purchase that could save us money?

plot(df)
summary(df)

df[which(is.na(df)),]
37 changes: 37 additions & 0 deletions Financial/dataprep.txt.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@

# Financial Data Prep

# Packages
require(dplyr)
require(stringr)

# Source
df <- read.csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vSNwyF0GHoS_VUYOzXkw4yWie44Zx_9rBJ5iXZesRgYpRcXnes8TMKWpIXyLs0YPEZcSp0E31BzAP6M/pub?gid=1021108104&single=true&output=csv")
df$Date <- as.Date(df$Date)
df$Cost <- as.numeric(stringr::str_remove_all(df$Cost, "\\$"))
df$Category <- as.factor(df$Category)
df$Account <- as.factor(df$Account)
df <- df %>%
dplyr::select("Location", "Date", "Description",
"Cost", "Category", "Account")
df <- na.omit(df)

df %>%
filter(Category == "alcohol") %>%
ggplot(aes(Cost)) +
geom_histogram(fill = "white", col = "light blue", alpha = 0.5) +
theme_minimal()

hist(df$Cost) # Needs to be fixed, current bin size is 0 - 50 with 800+ transactions
as.Date.character() # Does this work to format the date or will we need to change the source data type?
# Location - Should this be a factor data type with lots of levels? We shop at a lot of the same stores
# Category - needs an amount associated with each
# Amount - needs an amount associated with each and needs fixing (extra 11 transactions under Zach
# would otherwise be unclassified)
# Big picture - what do we want to gain from this as a shiny?
# Are there any particular questions that we ask each time we want to make a purchase that could save us money?

plot(df)
summary(df)

df[which(is.na(df)),]
22 changes: 0 additions & 22 deletions Outline.Rmd

This file was deleted.

741 changes: 741 additions & 0 deletions Posts/index.qmd

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# Home
What is the best place to live?
Contains basics of the home directory from our shared drive with some project areas we can focus on like our financial projects, the home affordability model, cost of living estimator, the time project, and more.
65 changes: 63 additions & 2 deletions Vacation/LockScreenLocations.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ locations <- read.delim2(file = file, header = F, sep = "\t")
print(locations)


#####
##### Example text

# Travel � List</span></p><p class=c1 c2><span class=c0></span>
# </p><p class=c1><span class=c0>Lake Mezzola, Italy</span>
Expand All @@ -25,4 +25,65 @@ library(stringr)

# Extract the characters that contains our locations

str1 <- stringr::str_extract_all(locations, "class=c0>*</span>")
# Attempts to reach 5e
# str1 <- stringr::str_extract_all(locations, "class=c0>(.*?)</span>")
# str2 <- stringr::str_extract_all(locations, "c0>(.*?)</")
# pattern <- "class=c0>(.*?)</span>"
# str3 <- regmatches(locations, regexec(pattern, locations)) # Nothing good
# str4 <- stringr::str_extract_all(locations, ">(.*?)<")

# Functioning Below:
str5 <- data.frame(stringr::str_extract_all(locations, ">(.*?)<"))
str5a <- data.frame(stringr::str_remove_all(str5[1], ">"))
str5b <- stringr::str_remove_all(str5[,1], ">")
str5c <- stringr::str_remove_all(str5b, "<")
str5d <- data.frame(str5c)
# head(str5d, na.rm = T)
str5d$str5c[which(str5d$str5c == "")] <- NA
# sum(is.na(str5d$str5c))
str5e <- na.omit(str5d)




# Image Collection
require(rvest)
# require(purrr)
require(httr)

# To see an image of "london" in the url:
# https://www.google.com/search?tbm=isch&q=london
image_query_base <- "https://www.google.com/search?tbm=isch&q="
str5e$str5c[[9]] # Where locations begin
url <- paste0(image_query_base, str5e$str5c[[9]])
res1 <- GET(url = url)

# *Consider using selector gadget to select in CSS first 3 images using london example
# then read in html nodes and collect image?
# from selector gadget: .Q4LuWd (gives all images on the page)

html_res1 <- read_html(res1$content)

read_html(sprintf(url, 1))

images <- str5e(1:10, function(i) {

# simple but effective progress indicator
cat(".")

pg <- read_html(sprintf(url_base, i))

data.frame(wine=html_text(html_nodes(pg, ".review-listing .title")),
excerpt=html_text(html_nodes(pg, "div.excerpt")),
rating=gsub(" Points", "", html_text(html_nodes(pg, "span.rating"))),
appellation=html_text(html_nodes(pg, "span.appellation")),
price=gsub("\\$", "", html_text(html_nodes(pg, "span.price"))),
stringsAsFactors=FALSE)

})

dplyr::glimpse(wines)




61 changes: 0 additions & 61 deletions cities.csv

This file was deleted.

Binary file added home_afford_app/.RData
Binary file not shown.
Loading