From 4c555cd95f7dbb4dbb4f355916b492f356c132e8 Mon Sep 17 00:00:00 2001 From: Phil Henrickson Date: Thu, 20 Mar 2025 15:59:58 -0500 Subject: [PATCH 1/7] fixed table for game counts --- predictions.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/predictions.qmd b/predictions.qmd index 212a118..4311d7e 100644 --- a/predictions.qmd +++ b/predictions.qmd @@ -171,7 +171,7 @@ upcoming_games <- prepared_games |> upcoming_games |> group_by(yearpublished) |> count() |> - mutate(yearpublished = as.factor(yearpublished)) |> + ungroup() |> gt::gt() ``` From 7ca636367dbb3871210443f505b241fa08890589 Mon Sep 17 00:00:00 2001 From: Phil Henrickson Date: Thu, 20 Mar 2025 16:00:04 -0500 Subject: [PATCH 2/7] updated renv --- renv.lock | 52 +------------------------- renv/activate.R | 97 ++++++++++++++++++++++++++++++------------------- 2 files changed, 61 insertions(+), 88 deletions(-) diff --git a/renv.lock b/renv.lock index e50313d..a9683c2 100644 --- a/renv.lock +++ b/renv.lock @@ -7305,56 +7305,8 @@ }, "renv": { "Package": "renv", - "Version": "1.1.2", - "Source": "Repository", - "Type": "Package", - "Title": "Project Environments", - "Authors@R": "c( person(\"Kevin\", \"Ushey\", role = c(\"aut\", \"cre\"), email = \"kevin@rstudio.com\", comment = c(ORCID = \"0000-0003-2880-7407\")), person(\"Hadley\", \"Wickham\", role = c(\"aut\"), email = \"hadley@rstudio.com\", comment = c(ORCID = \"0000-0003-4757-117X\")), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\")) )", - "Description": "A dependency management toolkit for R. Using 'renv', you can create and manage project-local R libraries, save the state of these libraries to a 'lockfile', and later restore your library as required. Together, these tools can help make your projects more isolated, portable, and reproducible.", - "License": "MIT + file LICENSE", - "URL": "https://rstudio.github.io/renv/, https://github.com/rstudio/renv", - "BugReports": "https://github.com/rstudio/renv/issues", - "Imports": [ - "utils" - ], - "Suggests": [ - "BiocManager", - "cli", - "compiler", - "covr", - "cpp11", - "devtools", - "gitcreds", - "jsonlite", - "jsonvalidate", - "knitr", - "miniUI", - "modules", - "packrat", - "pak", - "R6", - "remotes", - "reticulate", - "rmarkdown", - "rstudioapi", - "shiny", - "testthat", - "uuid", - "waldo", - "yaml", - "webfakes" - ], - "Encoding": "UTF-8", - "RoxygenNote": "7.3.2", - "VignetteBuilder": "knitr", - "Config/Needs/website": "tidyverse/tidytemplate", - "Config/testthat/edition": "3", - "Config/testthat/parallel": "true", - "Config/testthat/start-first": "bioconductor,python,install,restore,snapshot,retrieve,remotes", - "NeedsCompilation": "no", - "Author": "Kevin Ushey [aut, cre] (), Hadley Wickham [aut] (), Posit Software, PBC [cph, fnd]", - "Maintainer": "Kevin Ushey ", - "Repository": "CRAN" + "Version": "1.1.4", + "Source": "Repository" }, "rlang": { "Package": "rlang", diff --git a/renv/activate.R b/renv/activate.R index c9f5942..90b251c 100644 --- a/renv/activate.R +++ b/renv/activate.R @@ -2,7 +2,7 @@ local({ # the requested version of renv - version <- "1.1.2" + version <- "1.1.4" attr(version, "sha") <- NULL # the project directory @@ -695,11 +695,19 @@ local({ } - renv_bootstrap_platform_prefix <- function() { + renv_bootstrap_platform_prefix_default <- function() { - # construct version prefix - version <- paste(R.version$major, R.version$minor, sep = ".") - prefix <- paste("R", numeric_version(version)[1, 1:2], sep = "-") + # read version component + version <- Sys.getenv("RENV_PATHS_VERSION", unset = "R-%v") + + # expand placeholders + placeholders <- list( + list("%v", format(getRversion()[1, 1:2])), + list("%V", format(getRversion()[1, 1:3])) + ) + + for (placeholder in placeholders) + version <- gsub(placeholder[[1L]], placeholder[[2L]], version, fixed = TRUE) # include SVN revision for development versions of R # (to avoid sharing platform-specific artefacts with released versions of R) @@ -708,10 +716,19 @@ local({ identical(R.version[["nickname"]], "Unsuffered Consequences") if (devel) - prefix <- paste(prefix, R.version[["svn rev"]], sep = "-r") + version <- paste(version, R.version[["svn rev"]], sep = "-r") + + version + + } + + renv_bootstrap_platform_prefix <- function() { + + # construct version prefix + version <- renv_bootstrap_platform_prefix_default() # build list of path components - components <- c(prefix, R.version$platform) + components <- c(version, R.version$platform) # include prefix if provided by user prefix <- renv_bootstrap_platform_prefix_impl() @@ -950,14 +967,14 @@ local({ } renv_bootstrap_validate_version_dev <- function(version, description) { - + expected <- description[["RemoteSha"]] if (!is.character(expected)) return(FALSE) - + pattern <- sprintf("^\\Q%s\\E", version) grepl(pattern, expected, perl = TRUE) - + } renv_bootstrap_validate_version_release <- function(version, description) { @@ -1198,86 +1215,89 @@ local({ } renv_json_read_patterns <- function() { - + list( - + # objects - list("{", "\t\n\tobject(\t\n\t"), - list("}", "\t\n\t)\t\n\t"), - + list("{", "\t\n\tobject(\t\n\t", TRUE), + list("}", "\t\n\t)\t\n\t", TRUE), + # arrays - list("[", "\t\n\tarray(\t\n\t"), - list("]", "\n\t\n)\n\t\n"), - + list("[", "\t\n\tarray(\t\n\t", TRUE), + list("]", "\n\t\n)\n\t\n", TRUE), + # maps - list(":", "\t\n\t=\t\n\t") - + list(":", "\t\n\t=\t\n\t", TRUE), + + # newlines + list("\\u000a", "\n", FALSE) + ) - + } renv_json_read_envir <- function() { envir <- new.env(parent = emptyenv()) - + envir[["+"]] <- `+` envir[["-"]] <- `-` - + envir[["object"]] <- function(...) { result <- list(...) names(result) <- as.character(names(result)) result } - + envir[["array"]] <- list - + envir[["true"]] <- TRUE envir[["false"]] <- FALSE envir[["null"]] <- NULL - + envir - + } renv_json_read_remap <- function(object, patterns) { - + # repair names if necessary if (!is.null(names(object))) { - + nms <- names(object) for (pattern in patterns) nms <- gsub(pattern[[2L]], pattern[[1L]], nms, fixed = TRUE) names(object) <- nms - + } - + # repair strings if necessary if (is.character(object)) { for (pattern in patterns) object <- gsub(pattern[[2L]], pattern[[1L]], object, fixed = TRUE) } - + # recurse for other objects if (is.recursive(object)) for (i in seq_along(object)) object[i] <- list(renv_json_read_remap(object[[i]], patterns)) - + # return remapped object object - + } renv_json_read_default <- function(file = NULL, text = NULL) { # read json text text <- paste(text %||% readLines(file, warn = FALSE), collapse = "\n") - + # convert into something the R parser will understand patterns <- renv_json_read_patterns() transformed <- text for (pattern in patterns) transformed <- gsub(pattern[[1L]], pattern[[2L]], transformed, fixed = TRUE) - + # parse it rfile <- tempfile("renv-json-", fileext = ".R") on.exit(unlink(rfile), add = TRUE) @@ -1287,9 +1307,10 @@ local({ # evaluate in safe environment result <- eval(json, envir = renv_json_read_envir()) - # fix up strings if necessary + # fix up strings if necessary -- do so only with reversible patterns + patterns <- Filter(function(pattern) pattern[[3L]], patterns) renv_json_read_remap(result, patterns) - + } From f251f7a4ca691abd1483c045fb63d30e96884158 Mon Sep 17 00:00:00 2001 From: Phil Henrickson Date: Thu, 20 Mar 2025 16:00:20 -0500 Subject: [PATCH 3/7] cleaned up extra files --- README.gcp.md | 104 ----------------- batch-job.json | 39 ------- cloud-run-job.yaml | 25 ---- cloud-scheduler.yaml | 13 --- gcp_setup.md | 265 ------------------------------------------- 5 files changed, 446 deletions(-) delete mode 100644 README.gcp.md delete mode 100644 batch-job.json delete mode 100644 cloud-run-job.yaml delete mode 100644 cloud-scheduler.yaml delete mode 100644 gcp_setup.md diff --git a/README.gcp.md b/README.gcp.md deleted file mode 100644 index f1acde5..0000000 --- a/README.gcp.md +++ /dev/null @@ -1,104 +0,0 @@ -# Running BGG Models in Google Cloud Platform - -This README provides a quick reference for running the BGG Models targets pipeline in Google Cloud Platform (GCP) using cloud compute resources. - -## Files - -- `gcp_setup.md`: Detailed guide for setting up and running the pipeline in GCP -- `Dockerfile`: Docker configuration for containerizing the application -- `run_pipeline.sh`: Shell script for running the pipeline on a VM -- `batch-job.json`: Configuration for running the pipeline as a Cloud Batch job -- `cloud-run-job.yaml`: Configuration for running the pipeline as a Cloud Run Job -- `cloud-scheduler.yaml`: Configuration for scheduling the Cloud Run Job - -## Quick Start - -### Option 1: Cloud Run Jobs (Recommended) - -1. Build and push the Docker image: - ```bash - docker build -t gcr.io/YOUR_PROJECT_ID/bgg-models:latest . - docker push gcr.io/YOUR_PROJECT_ID/bgg-models:latest - ``` - -2. Create a secret for the service account: - ```bash - gcloud secrets create bgg-service-account --data-file=service-account.json - ``` - -3. Deploy the Cloud Run Job: - ```bash - gcloud run jobs create bgg-models-job --image gcr.io/YOUR_PROJECT_ID/bgg-models:latest --region us-central1 - ``` - -4. Run the job manually: - ```bash - gcloud run jobs execute bgg-models-job --region us-central1 - ``` - -5. (Optional) Set up scheduled execution: - ```bash - gcloud scheduler jobs create http bgg-models-schedule --schedule="0 0 * * 0" --uri="https://us-central1-run.googleapis.com/apis/run.googleapis.com/v1/namespaces/YOUR_PROJECT_ID/jobs/bgg-models-job:run" --http-method=POST - ``` - -### Option 2: Compute Engine VM - -1. Create a VM: - ```bash - gcloud compute instances create bgg-models-vm --machine-type=e2-standard-4 - ``` - -2. SSH into the VM and set up the environment: - ```bash - gcloud compute ssh bgg-models-vm - ``` - -3. Clone the repository and install dependencies: - ```bash - git clone https://github.com/YOUR_USERNAME/bgg_models.git - cd bgg_models - ``` - -4. Run the pipeline: - ```bash - ./run_pipeline.sh - ``` - -### Option 3: Cloud Batch - -1. Build and push the Docker image: - ```bash - docker build -t gcr.io/YOUR_PROJECT_ID/bgg-models:latest . - docker push gcr.io/YOUR_PROJECT_ID/bgg-models:latest - ``` - -2. Submit the batch job: - ```bash - gcloud batch jobs submit bgg-models-job --location=us-central1 --config=batch-job.json - ``` - -## Service Account Setup - -Create a service account with the necessary permissions: - -```bash -gcloud iam service-accounts create bgg-models-sa --display-name="BGG Models Service Account" -gcloud projects add-iam-policy-binding YOUR_PROJECT_ID --member="serviceAccount:bgg-models-sa@YOUR_PROJECT_ID.iam.gserviceaccount.com" --role="roles/storage.admin" -gcloud iam service-accounts keys create service-account.json --iam-account=bgg-models-sa@YOUR_PROJECT_ID.iam.gserviceaccount.com -``` - -## Monitoring - -Monitor the execution of your pipeline in GCP: - -- Cloud Run Jobs: `gcloud run jobs executions list --job bgg-models-job --region us-central1` -- Compute Engine: `gcloud compute ssh bgg-models-vm --command "tail -f /path/to/log"` -- Cloud Batch: `gcloud batch jobs describe bgg-models-job --location=us-central1` - -## Cost Optimization - -- Use Cloud Run Jobs for shorter workloads -- Use preemptible VMs for Compute Engine -- Use Spot VMs for Batch jobs - -For more detailed instructions, see `gcp_setup.md`. diff --git a/batch-job.json b/batch-job.json deleted file mode 100644 index 8d08cc1..0000000 --- a/batch-job.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "taskGroups": [ - { - "taskSpec": { - "runnables": [ - { - "container": { - "imageUri": "gcr.io/YOUR_PROJECT_ID/bgg-models:latest", - "volumes": [ - "/secrets:/secrets" - ], - "options": "-e GCS_AUTH_FILE=/secrets/service-account.json" - } - } - ], - "volumes": [ - { - "gcs": { - "remotePath": "gs://YOUR_BUCKET/service-account.json" - }, - "mountPath": "/secrets/service-account.json" - } - ] - }, - "taskCount": 1, - "parallelism": 1 - } - ], - "logsPolicy": { - "destination": "CLOUD_LOGGING" - }, - "allocationPolicy": { - "instances": [ - { - "instanceTemplate": "projects/YOUR_PROJECT_ID/global/instanceTemplates/bgg-models-template" - } - ] - } -} diff --git a/cloud-run-job.yaml b/cloud-run-job.yaml deleted file mode 100644 index edd4790..0000000 --- a/cloud-run-job.yaml +++ /dev/null @@ -1,25 +0,0 @@ -apiVersion: run.googleapis.com/v1 -kind: Job -metadata: - name: bgg-models-job -spec: - template: - spec: - containers: - - image: gcr.io/YOUR_PROJECT_ID/bgg-models:latest - resources: - limits: - cpu: "4" - memory: 8Gi - env: - - name: GCS_AUTH_FILE - value: /secrets/service-account.json - volumeMounts: - - name: service-account - mountPath: /secrets - volumes: - - name: service-account - secret: - secretName: bgg-service-account - timeoutSeconds: 3600 - serviceAccountName: YOUR_SERVICE_ACCOUNT@YOUR_PROJECT_ID.iam.gserviceaccount.com diff --git a/cloud-scheduler.yaml b/cloud-scheduler.yaml deleted file mode 100644 index 84694fa..0000000 --- a/cloud-scheduler.yaml +++ /dev/null @@ -1,13 +0,0 @@ -name: bgg-models-schedule -description: "Schedule for running BGG Models pipeline" -schedule: "0 0 * * 0" # Run weekly on Sunday at midnight -timeZone: "America/Chicago" -target: - httpTarget: - uri: "https://us-central1-run.googleapis.com/apis/run.googleapis.com/v1/namespaces/YOUR_PROJECT_ID/jobs/bgg-models-job:run" - httpMethod: POST - headers: - Content-Type: application/json - oauthToken: - serviceAccountEmail: YOUR_SERVICE_ACCOUNT@YOUR_PROJECT_ID.iam.gserviceaccount.com - scope: "https://www.googleapis.com/auth/cloud-platform" diff --git a/gcp_setup.md b/gcp_setup.md deleted file mode 100644 index 74ee4e3..0000000 --- a/gcp_setup.md +++ /dev/null @@ -1,265 +0,0 @@ -# Running BGG Models in Google Cloud Platform - -This guide outlines how to run the BGG Models targets pipeline in Google Cloud Platform (GCP) using cloud compute resources instead of running it locally. - -## Prerequisites - -- A Google Cloud Platform account -- The `gcloud` CLI tool installed and configured -- A GCP project with billing enabled -- Service account with appropriate permissions - -## Option 1: Using Google Cloud Run Jobs - -Cloud Run Jobs is a fully managed compute platform that runs containerized applications on demand. It's well-suited for batch processing tasks like this targets pipeline. - -### 1. Create a Dockerfile - -Create a Dockerfile in the project root: - -```dockerfile -FROM rocker/r-ver:4.2.0 - -# Install system dependencies -RUN apt-get update && apt-get install -y \ - libcurl4-openssl-dev \ - libssl-dev \ - libxml2-dev \ - libgit2-dev \ - git \ - && rm -rf /var/lib/apt/lists/* - -# Install R packages -RUN R -e "install.packages(c('remotes', 'renv'))" - -# Copy renv files -COPY renv.lock renv.lock -COPY .Rprofile .Rprofile -COPY renv/ renv/ - -# Restore packages from renv.lock -RUN R -e "renv::restore()" - -# Install additional packages needed for the pipeline -RUN R -e "install.packages(c('targets', 'tarchetypes', 'config', 'googleCloudStorageR', 'googleAuthR'))" - -# Copy project files -COPY . /app -WORKDIR /app - -# Set environment variables -ENV GCS_AUTH_FILE=/secrets/service-account.json - -# Run the targets pipeline -CMD ["R", "-e", "targets::tar_make()"] -``` - -### 2. Build and Push the Docker Image - -```bash -# Build the Docker image -docker build -t gcr.io/YOUR_PROJECT_ID/bgg-models:latest . - -# Push the image to Google Container Registry -docker push gcr.io/YOUR_PROJECT_ID/bgg-models:latest -``` - -### 3. Create a Cloud Run Job - -```bash -gcloud run jobs create bgg-models-job \ - --image gcr.io/YOUR_PROJECT_ID/bgg-models:latest \ - --region us-central1 \ - --service-account YOUR_SERVICE_ACCOUNT@YOUR_PROJECT_ID.iam.gserviceaccount.com \ - --set-secrets=GCS_AUTH_FILE=bgg-service-account:latest -``` - -### 4. Run the Job Manually - -```bash -gcloud run jobs execute bgg-models-job --region us-central1 -``` - -### 5. Set Up Scheduled Execution (Optional) - -Use Cloud Scheduler to run the job on a schedule: - -```bash -gcloud scheduler jobs create http bgg-models-schedule \ - --schedule="0 0 * * 0" \ # Run weekly on Sunday at midnight - --uri="https://us-central1-run.googleapis.com/apis/run.googleapis.com/v1/namespaces/YOUR_PROJECT_ID/jobs/bgg-models-job:run" \ - --http-method=POST \ - --oauth-service-account-email=YOUR_SERVICE_ACCOUNT@YOUR_PROJECT_ID.iam.gserviceaccount.com -``` - -## Option 2: Using Google Compute Engine - -For more control over the environment or if you need more compute resources, you can use a Compute Engine VM. - -### 1. Create a Compute Engine VM - -```bash -gcloud compute instances create bgg-models-vm \ - --machine-type=e2-standard-4 \ - --image-family=debian-11 \ - --image-project=debian-cloud \ - --boot-disk-size=50GB \ - --service-account=YOUR_SERVICE_ACCOUNT@YOUR_PROJECT_ID.iam.gserviceaccount.com \ - --scopes=cloud-platform -``` - -### 2. Set Up the VM - -SSH into the VM and install R and dependencies: - -```bash -# Update and install dependencies -sudo apt-get update -sudo apt-get install -y \ - r-base \ - r-base-dev \ - libcurl4-openssl-dev \ - libssl-dev \ - libxml2-dev \ - libgit2-dev \ - git - -# Clone the repository -git clone https://github.com/YOUR_USERNAME/bgg_models.git -cd bgg_models - -# Install renv and restore packages -R -e "install.packages('renv')" -R -e "renv::restore()" - -# Install additional packages -R -e "install.packages(c('targets', 'tarchetypes', 'config', 'googleCloudStorageR', 'googleAuthR'))" -``` - -### 3. Set Up Authentication - -Upload your service account JSON file to the VM and set the environment variable: - -```bash -# Set environment variable -echo 'export GCS_AUTH_FILE=/path/to/service-account.json' >> ~/.bashrc -source ~/.bashrc -``` - -### 4. Create a Run Script - -Create a script to run the targets pipeline: - -```bash -# Create run script -cat > run_pipeline.sh << 'EOF' -#!/bin/bash -cd /home/YOUR_USERNAME/bgg_models -Rscript -e "targets::tar_make()" -EOF - -chmod +x run_pipeline.sh -``` - -### 5. Set Up Cron Job (Optional) - -Set up a cron job to run the pipeline on a schedule: - -```bash -# Add cron job to run weekly on Sunday at midnight -(crontab -l 2>/dev/null; echo "0 0 * * 0 /home/YOUR_USERNAME/bgg_models/run_pipeline.sh") | crontab - -``` - -## Option 3: Using Google Cloud Batch - -Google Cloud Batch is a fully managed service that allows you to run batch jobs on Google Cloud. It's designed for workloads that require significant compute resources. - -### 1. Create a Batch Job Configuration - -Create a file named `batch-job.json`: - -```json -{ - "taskGroups": [ - { - "taskSpec": { - "runnables": [ - { - "container": { - "imageUri": "gcr.io/YOUR_PROJECT_ID/bgg-models:latest", - "volumes": [ - "/secrets:/secrets" - ] - } - } - ], - "volumes": [ - { - "gcs": { - "remotePath": "gs://YOUR_BUCKET/service-account.json" - }, - "mountPath": "/secrets/service-account.json" - } - ] - } - } - ] -} -``` - -### 2. Submit the Batch Job - -```bash -gcloud batch jobs submit bgg-models-job \ - --location=us-central1 \ - --config=batch-job.json -``` - -## Service Account Setup - -For any of these options, you'll need a service account with the following permissions: - -- Storage Admin (`roles/storage.admin`) for the buckets used in the project -- Logs Writer (`roles/logging.logWriter`) for writing logs - -Create a service account and assign the necessary roles: - -```bash -# Create service account -gcloud iam service-accounts create bgg-models-sa \ - --display-name="BGG Models Service Account" - -# Assign roles -gcloud projects add-iam-policy-binding YOUR_PROJECT_ID \ - --member="serviceAccount:bgg-models-sa@YOUR_PROJECT_ID.iam.gserviceaccount.com" \ - --role="roles/storage.admin" - -gcloud projects add-iam-policy-binding YOUR_PROJECT_ID \ - --member="serviceAccount:bgg-models-sa@YOUR_PROJECT_ID.iam.gserviceaccount.com" \ - --role="roles/logging.logWriter" - -# Create and download the service account key -gcloud iam service-accounts keys create service-account.json \ - --iam-account=bgg-models-sa@YOUR_PROJECT_ID.iam.gserviceaccount.com -``` - -## Monitoring and Logging - -To monitor the execution of your pipeline in GCP: - -1. Use Cloud Logging to view logs from your jobs -2. Set up Cloud Monitoring alerts for job failures -3. Use Cloud Storage notifications to get alerts when new files are created - -## Cost Optimization - -To optimize costs when running in GCP: - -1. Use preemptible VMs for Compute Engine if your job can handle interruptions -2. Set appropriate machine types based on your workload requirements -3. Use Cloud Run Jobs for shorter workloads as you only pay for the time your code runs -4. Consider using Spot VMs for Batch jobs to reduce costs - -## Conclusion - -This guide provides multiple options for running your BGG Models targets pipeline in Google Cloud Platform. Choose the option that best fits your requirements for compute resources, management overhead, and cost considerations. From baeb0441d9b62f34cc5f4a7965e98b23c506af6e Mon Sep 17 00:00:00 2001 From: Phil Henrickson Date: Fri, 21 Mar 2025 08:57:59 -0500 Subject: [PATCH 4/7] wip adding predictions by category --- index.qmd | 105 +++++++++++++++++- src/visualizations/tables.R | 215 +++++++++++++++++++++++++++++++++++- 2 files changed, 312 insertions(+), 8 deletions(-) diff --git a/index.qmd b/index.qmd index 8ad1b3b..b720498 100644 --- a/index.qmd +++ b/index.qmd @@ -3,6 +3,9 @@ title: "Predicting Upcoming Board Games" subtitle: "Predictive Models for BoardGameGeek Ratings" editor: source echo: false +format: + html: + html-table-processing: none --- ```{r} @@ -96,12 +99,11 @@ The following table displays predicted BGG outcomes for games that are expected #| warning: false #| message: false #| label: upcoming-games-table -#| column: page-inset-right +#| column: screen-inset-right # table predictions |> filter(.pred_hurdle_class == 'yes') |> - select(-starts_with(".pred_hurdle")) |> # this goddamn bah humbug game filter(game_id != 388225) |> predictions_dt( @@ -113,19 +115,110 @@ predictions |> ``` -## Hurdle +## Top Predictions by Year -This table displays predicted probabilities for whether games will achieve enough ratings (25) to be assigned a Geek Rating +What are currently the top predictions for each year? + +```{r} + +years = predictions |> + group_by(yearpublished) |> + count() |> + head(4) |> + pull(yearpublished) + + +top_preds = + map( + years, + ~ predictions |> + filter(yearpublished == .x) |> + arrange(desc(.pred_bayesaverage)) |> + filter(!is.na(thumbnail)) |> + head(25) |> + predictions_gt(games = games) |> + gtExtras::gt_theme_nytimes() + ) + +``` + +::: {.panel-tabset} + +### `r years[1]` + +```{r} +#| class: scroll +#| column: screen-inset-right +top_preds[[1]] + +``` + +### `r years[2]` + +```{r} +#| class: scroll +#| column: screen-inset-right +top_preds[[2]] + +``` + +### `r years[3]` + +```{r} +#| class: scroll +#| column: screen-inset-right +top_preds[[3]] + +``` + +### `r years[4]` + + +```{r} +#| class: scroll +#| column: screen-inset-right +top_preds[[4]] + +``` + +::: + +## Top Predictions by Category + +What are the top predictions for different genres of upcoming games? + +```{r} + +``` + +::: {.panel-tabset} + +### Economic + + +### War Game + + +### Card Game + + +### Party + + +::: + +## Hurdle Predictions + +Which games are expected to receive enough ratings to be assigned a Geek rating? ```{r} #| echo: false #| message: false #| warning: false #| label: hurdle-table -#| column: page-inset-right +#| column: screen-inset-right predictions |> - filter(.pred_hurdle_class == 'yes') |> hurdle_dt( lazy_load = TRUE ) diff --git a/src/visualizations/tables.R b/src/visualizations/tables.R index 881e09e..1fe18c5 100644 --- a/src/visualizations/tables.R +++ b/src/visualizations/tables.R @@ -26,8 +26,12 @@ make_image_link = function(link, height = 52) { prep_predictions_dt = function(predictions, games) { predictions |> arrange(desc(.pred_bayesaverage)) |> - filter(!is.na(thumbnail)) |> - mutate(across(starts_with(".pred"), ~ round(.x, 2))) |> + mutate( + across( + c(starts_with(".pred"), -starts_with(".pred_hurdle_class")), + ~ round(.x, 2) + ) + ) |> mutate( name = make_hyperlink( make_bgg_link(game_id), @@ -365,3 +369,210 @@ hurdle_dt = function(data, lazy_load = TRUE) { ) } } + +# Prepare data for gt table +prep_predictions_gt = function(predictions, games, max_rows = 50) { + result <- predictions |> + arrange(desc(.pred_bayesaverage)) + + result |> + head(max_rows) |> # Limit to specified number of rows + mutate( + across( + c(starts_with(".pred"), -starts_with(".pred_hurdle_class")), + ~ round(.x, 2) + ) + ) |> + mutate( + name = make_hyperlink( + make_bgg_link(game_id), + mytext = paste(name, paste0("(", yearpublished, ")")) + ) + ) |> + mutate( + Rank = row_number(), + Image = thumbnail, # Store just the URL, not the HTML + Game = name, + `Average Weight` = .pred_averageweight, + `Average Rating` = .pred_average, + `Users Rated` = .pred_usersrated, + `Geek Rating` = .pred_bayesaverage, + .keep = 'none' + ) +} + +# Create a non-interactive gt table +predictions_gt = function( + predictions, + games, + max_rows = 50 +) { + # Columns to include + cols = c( + "Rank", + "Image", + "Game", + "Average Weight", + "Average Rating", + "Users Rated", + "Geek Rating" + ) + + # Prepare data + prepared_data <- prep_predictions_gt( + predictions, + games = games, + max_rows = max_rows + ) + + # Create gt table + gt_table <- prepared_data |> + gt::gt() |> + # Set table options + gt::tab_options( + table.width = gt::pct(100), + column_labels.font.weight = "bold", + data_row.padding = gt::px(2) + ) |> + # Format columns + gt::fmt_markdown(columns = c("Game")) |> # Remove Image from fmt_markdown + gt::cols_align(align = "center", columns = -c("Game")) |> + # Set equal column widths + gt::cols_width( + Rank ~ gt::px(60), + Image ~ gt::px(100), + Game ~ gt::px(150), + `Average Weight` ~ gt::px(75), + `Average Rating` ~ gt::px(75), + `Users Rated` ~ gt::px(75), + `Geek Rating` ~ gt::px(75) + ) + + # Add color scales + gt_table <- gt_table |> + # Geek Rating color scale + gt::data_color( + columns = "Geek Rating", + colors = scales::col_numeric( + palette = c("white", "dodgerblue2"), + domain = c(5, 9) + ) + ) |> + # Average Rating color scale + gt::data_color( + columns = "Average Rating", + colors = scales::col_numeric( + palette = c("white", "dodgerblue2"), + domain = c(6, 10) + ) + ) |> + # Users Rated color scale + gt::data_color( + columns = "Users Rated", + colors = scales::col_numeric( + palette = c("white", "dodgerblue2"), + domain = c(0, 50000) + ) + ) |> + # Average Weight color scale + gt::data_color( + columns = "Average Weight", + colors = scales::col_numeric( + palette = c("deepskyblue1", "white", "orange"), + domain = c(0.8, 3, 5) + ) + ) |> + gt::text_transform( + locations = gt::cells_body(columns = c("Image")), + fn = function(x) { + gt::web_image( + url = x, # Now x is just the URL, not HTML + height = 50 + ) + } + ) + + return(gt_table) +} + +# Function for hurdle model table +hurdle_gt = function(data, max_rows = 50) { + # Check if thumbnail exists in the data + has_thumbnail <- "thumbnail" %in% names(data) + + # Prepare data + result <- data |> + arrange(desc(.pred_hurdle_yes)) + + # Only filter by thumbnail if it exists + if (has_thumbnail) { + result <- result |> + filter(!is.na(thumbnail)) + } + + prepared_data <- result |> + head(max_rows) |> # Limit to specified number of rows + mutate( + name = make_hyperlink( + make_bgg_link(game_id), + mytext = paste(name, paste0("(", yearpublished, ")")) + ) + ) |> + mutate( + Image = thumbnail, # Store just the URL, not the HTML + Game = name, + Description = stringr::str_trunc(description, width = 150), + `Pr(Hurdle)` = round(.pred_hurdle_yes, 3), + `Ratings` = usersrated, + .keep = 'none' + ) + + # Create gt table + gt_table <- prepared_data |> + gt::gt() |> + # Set table options + gt::tab_options( + table.width = gt::pct(100), + table.font.size = gt::px(10), + column_labels.font.weight = "bold", + data_row.padding = gt::px(2) + ) |> + # Format columns + gt::fmt_markdown(columns = c("Game", "Description")) |> # Remove Image from fmt_markdown + gt::cols_align( + align = "center", + columns = c("Image", "Pr(Hurdle)", "Ratings") + ) |> + gt::cols_width( + Image ~ gt::px(100), + Game ~ gt::px(200), + Description ~ gt::px(200), + `Pr(Hurdle)` ~ gt::px(100), + `Ratings` ~ gt::px(100) + ) |> + # Add title + gt::tab_header( + title = "Hurdle Model Predictions" + ) + + # Add color scale for Pr(Hurdle) + gt_table <- gt_table |> + gt::data_color( + columns = "Pr(Hurdle)", + colors = scales::col_numeric( + palette = c("white", "dodgerblue2"), + domain = c(0, 1) + ) + ) |> + gt::text_transform( + locations = gt::cells_body(columns = c("Image")), + fn = function(x) { + gt::web_image( + url = x, # Now x is just the URL, not HTML + height = 50 + ) + } + ) + + return(gt_table) +} From 3c999e33a902cbd3d26715b771711a899f5dd8cc Mon Sep 17 00:00:00 2001 From: Phil Henrickson Date: Fri, 21 Mar 2025 09:00:32 -0500 Subject: [PATCH 5/7] adding viz for data --- src/visualizations/data.R | 236 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 236 insertions(+) create mode 100644 src/visualizations/data.R diff --git a/src/visualizations/data.R b/src/visualizations/data.R new file mode 100644 index 0000000..1568bf3 --- /dev/null +++ b/src/visualizations/data.R @@ -0,0 +1,236 @@ +longer_outcomes = function(data) { + data |> + pivot_longer( + cols = c("usersrated", "averageweight", "average", "bayesaverage"), + names_to = "outcome", + values_to = "value" + ) +} + +plot_outcomes_distributions = function(data) { + data |> + longer_outcomes() |> + ggplot(aes(x = value)) + + geom_histogram(bins = 80) + + facet_wrap(outcome ~ ., ncol = 2, scales = "free") + + theme_light() +} + +plot_geek_vs_average = function(data, labels = T) { + p = + data |> + ggplot(aes( + x = average, + label = name, + color = bayesaverage, + y = usersrated + )) + + geom_point(alpha = 0.3) + + scale_y_log10(labels = scales::label_comma()) + + scale_color_gradient2( + high = "deepskyblue1", + low = "red", + mid = "grey80", + limits = c(4, 8), + oob = scales::squish, + midpoint = 6 + ) + + theme_light() + + theme( + legend.position = 'top', + legend.title = element_text(hjust = 0.5), + axis.text = element_text() + ) + + guides( + color = guide_colorbar( + barwidth = 12, + barheight = 0.35, + title = 'geek rating', + title.position = 'top' + ) + ) + + xlab("average rating") + + ylab("users rated (logged)") + + if (labels == T) { + p + + geom_text(size = 2, check_overlap = T, vjust = -1) + } else { + p + } +} + +filter_geek = function(data) { + data |> + filter(!is.na(bayesaverage)) +} + +filter_weight = function(data) { + data |> + filter(!is.na(averageweight)) +} + +log_ratings = function(data) { + data |> + mutate(usersrated = log(usersrated)) +} + +get_game_types = function( + data, + types = c( + "wargames", + "abstracts", + "childrensgames", + "familygames", + "thematic", + "strategygames", + "partygames" + ) +) { + unnest_game_types = function(data) { + unnested = + data |> + select(game_id, ranks) |> + unnest(ranks) + } + + filter_game_types = function( + data, + types = c( + "wargames", + "abstracts", + "childrensgames", + "familygames", + "strategygames", + "thematic", + "partygames" + ) + ) { + data |> + inner_join( + tibble(type = c("family"), name = types), + by = c("type", "name") + ) + } + + clean_game_types = function(data) { + data |> + mutate( + name = gsub("games$", "", name), + name = gsub("s$", "", name), + name = case_when(name == 'war' ~ 'wargame', TRUE ~ name) + ) + } + + data |> + unnest_game_types() |> + filter_game_types(types = types) |> + clean_game_types() |> + select(game_id, game_type = name) +} + +add_game_types = function(data, raw_games) { + data |> + left_join( + raw_games |> + get_game_types(), + by = c("game_id") + ) |> + select(game_id, game_type, name, yearpublished, everything()) |> + mutate(game_type = replace_na(game_type, 'other')) +} + +color_game_types = function() { + c( + "wargame" = "#E41A1C", + "strategy" = "#377EB8", + "thematic" = "#FFFF33", + "abstract" = "#984EA3", + "family" = "#4DAF4A", + "party" = "#FF7F00", + "children" = "#F781BF", + "other" = "grey80" + ) +} + +scale_color_game_types = function(values = color_game_types()) { + scale_color_manual(values = values) +} + +scale_fill_game_types = function(values = color_game_types()) { + scale_fill_manual(values = values) +} + +plot_outcomes_matrix = function(data, color = NULL) { + data |> + ggplot(aes( + x = .panel_x, + y = .panel_y, + fill = {{ color }}, + color = {{ color }} + )) + + ggforce::geom_autopoint(alpha = 0.5, size = 0.5) + + ggforce::geom_autodensity(alpha = 0.5, position = 'identity') + + ggforce::facet_matrix( + vars(c("averageweight", "average", "bayesaverage", "usersrated")), + layer.diag = 2, + grid.y.diag = F + ) + + theme_light() + + theme( + panel.grid.major = element_blank(), + strip.text = element_text(size = 8) + ) +} + +plot_outcomes_relationships = function(data, color = NULL) { + data |> + log_ratings() |> + filter_geek() |> + filter_weight() |> + plot_outcomes_matrix(color = color) +} + +plot_outcomes_by_type = function(data, color) { + data |> + log_ratings() |> + filter_geek() |> + filter_weight() |> + plot_outcomes_matrix(color = {{ color }}) + + scale_color_game_types() + + scale_fill_game_types() +} + +# split$data |> +# add_game_types(games_raw) |> +# filter(yearpublished >= 1980) |> +# filter_geek() |> +# group_by(game_type, +# yearpublished) |> +# count() |> +# filter(game_type != 'other') |> +# ggplot(aes(x=yearpublished, y=n, fill = game_type))+ +# geom_col()+ +# scale_fill_game_types()+ +# bggUtils::theme_bgg()+ +# facet_wrap(game_type ~.)+ +# guides(fill = F) +# +# split$data |> +# plot_outcomes_relationships() +# +# split$data |> +# add_game_types(games_raw) |> +# filter(game_type != 'other') |> +# filter(game_type != 'children') |> +# plot_outcomes_by_type(color = game_type) +# +# split$data |> +# sample_n(1000) |> +# plot_outcomes_relationships() +# +# +# split$data |> +# filter_geek() |> +# log_ratings() |> +# plot_geek_vs_average() From a57de2ef57d50dccf38d18de9e2419d2cb891b79 Mon Sep 17 00:00:00 2001 From: Phil Henrickson Date: Fri, 21 Mar 2025 09:33:03 -0500 Subject: [PATCH 6/7] added additional tables for predictions --- index.qmd | 95 ++++++++++++++++++++++++--- methodology.qmd | 66 +++++++++++++++++++ renv.lock | 166 ++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 304 insertions(+), 23 deletions(-) diff --git a/index.qmd b/index.qmd index b720498..567cb58 100644 --- a/index.qmd +++ b/index.qmd @@ -117,9 +117,11 @@ predictions |> ## Top Predictions by Year -What are currently the top predictions for each year? +What are the model's top predictions for each year? ```{r} +#| warning: false +#| message: false years = predictions |> group_by(yearpublished) |> @@ -127,7 +129,6 @@ years = predictions |> head(4) |> pull(yearpublished) - top_preds = map( years, @@ -185,29 +186,105 @@ top_preds[[4]] ## Top Predictions by Category -What are the top predictions for different genres of upcoming games? +What are the model's top predictions for different genres of upcoming games? ```{r} +#| include: false +categories = c( + 'Economic', + 'Card Game', + 'Wargame', + 'Abstract Strategy', + 'Dice', + 'Party Game' +) + +games_and_categories = + games_raw |> + bggUtils:::unnest_categories() |> + inner_join( + tibble(value = categories) + ) + +top_categories = map( + categories, + ~ predictions |> + filter(!is.na(thumbnail)) |> + inner_join( + games_and_categories |> + filter(value == .x), + by = join_by(game_id) + ) |> + head(25) |> + predictions_gt(games = games) |> + gtExtras::gt_theme_nytimes() |> + gt::tab_header( + title = paste("Top Upcoming Predictions:", .x) + ) +) ``` ::: {.panel-tabset} -### Economic +### `r categories[1]` + +```{r} +#| class: scroll +#| column: screen-inset-right +top_categories[[1]] + +``` + +### `r categories[2]` + +```{r} +#| class: scroll +#| column: screen-inset-right +top_categories[[2]] + +``` + +### `r categories[3]` +```{r} +#| class: scroll +#| column: screen-inset-right +top_categories[[3]] -### War Game +``` +### `r categories[4]` -### Card Game +```{r} +#| class: scroll +#| column: screen-inset-right +top_categories[[4]] +``` -### Party +### `r categories[5]` +```{r} +#| class: scroll +#| column: screen-inset-right +top_categories[[5]] + +``` + + +### `r categories[6]` + +```{r} +#| class: scroll +#| column: screen-inset-right +top_categories[[6]] + +``` ::: -## Hurdle Predictions + \ No newline at end of file diff --git a/methodology.qmd b/methodology.qmd index 8672b66..175ce28 100644 --- a/methodology.qmd +++ b/methodology.qmd @@ -139,6 +139,72 @@ graph LR ## Data +```{r} + +# data +# raw pull from bgg +tar_load(games_raw) +# split for train/valid/test +tar_load(split) + +# get training set +train_data = + split |> + training() + +# get validation set +valid_data = + split |> + validation() + +# full data +full_data = + bind_rows(train_data, valid_data) +``` + +# Data + +As of my most recent data job, there are **`r nrow(games)`** individual games on BGG. For each of these games, I have information about the game (playing time, mechanisms, components, publishers, artists, designers, etc), as well as information voted on by the BGG community (average rating, average weight). + +```{r} + +full_data |> + sample_n(1000) |> + visdat::vis_dat() + +``` + +## Outcomes + +The data is at the game level, where I observe the BGG community’s aggregated ratings for individual games. This means I do not have data on the underlying ratings for games, only the average, standard deviation, or sum of the distribution. + +I examine four different community outcomes for games: average weight rating (complexity), number of user ratings, average user rating, and geek rating. Only a subset of games have received enough votes by the BGG community to receive a geek rating, which is a bayesian average based on the number of ratings as well as the average rating. + +```{r} +#| message: false + +full_data |> + filter_geek() |> + log_ratings() |> + plot_outcomes_distributions() + + theme_set(theme_light() + theme(legend.position = 'top')) + +``` + +Each of these BGG outcomes (average weight, average, user ratings) is related to each other in some way, which is important to keep in mind as we think about modeling these outcomes. + +The average weight tends to be highly correlated with the average rating, while not being correlated with the number of user ratings. The geek rating is a function of the average and user ratings, which means it is also then correlated with the average weight. + +```{r} + +full_data |> + plot_outcomes_relationships() + + theme_set(theme_light() + theme(legend.position = 'top')) + +``` + +### Number of Ratings + I train and evaluate the models primarily on games that have achieved at least 25 ratings, as this is the required number of user ratings for a game to receive a Geek rating. The majority of games on BoardGameGeek do not actually receive this number of ratings, especially in recent years as the number of games published has dramatically increased. My main aim is to predict games that are set to be published and available to purchase, and many of the over 100k games on BGG have not been been published or distributed. diff --git a/renv.lock b/renv.lock index a9683c2..d776075 100644 --- a/renv.lock +++ b/renv.lock @@ -1103,7 +1103,7 @@ "NeedsCompilation": "yes", "Author": "Hadley Wickham [aut, cre] (), Jennifer Bryan [aut] (), Posit Software, PBC [cph, fnd]", "Maintainer": "Hadley Wickham ", - "Repository": "CRAN" + "Repository": "https://packagemanager.posit.co/cran/latest" }, "bit": { "Package": "bit", @@ -1608,7 +1608,7 @@ "NeedsCompilation": "no", "Author": "Joyce Cahoon [aut] (), Davis Vaughan [aut], Max Kuhn [aut], Alex Hayes [aut], Julia Silge [cre, aut] (), Posit, PBC [cph, fnd]", "Maintainer": "Julia Silge ", - "Repository": "CRAN" + "Repository": "https://packagemanager.posit.co/cran/latest" }, "cachem": { "Package": "cachem", @@ -1792,7 +1792,7 @@ "NeedsCompilation": "no", "Author": "Julia Silge [cre, aut] (), Davis Vaughan [aut], Posit Software, PBC [cph, fnd]", "Maintainer": "Julia Silge ", - "Repository": "CRAN" + "Repository": "https://packagemanager.posit.co/cran/latest" }, "class": { "Package": "class", @@ -3054,6 +3054,48 @@ "Maintainer": "Richard Iannone ", "Repository": "CRAN" }, + "forcats": { + "Package": "forcats", + "Version": "1.0.0", + "Source": "Repository", + "Title": "Tools for Working with Categorical Variables (Factors)", + "Authors@R": "c( person(\"Hadley\", \"Wickham\", , \"hadley@rstudio.com\", role = c(\"aut\", \"cre\")), person(\"RStudio\", role = c(\"cph\", \"fnd\")) )", + "Description": "Helpers for reordering factor levels (including moving specified levels to front, ordering by first appearance, reversing, and randomly shuffling), and tools for modifying factor levels (including collapsing rare levels into other, 'anonymising', and manually 'recoding').", + "License": "MIT + file LICENSE", + "URL": "https://forcats.tidyverse.org/, https://github.com/tidyverse/forcats", + "BugReports": "https://github.com/tidyverse/forcats/issues", + "Depends": [ + "R (>= 3.4)" + ], + "Imports": [ + "cli (>= 3.4.0)", + "glue", + "lifecycle", + "magrittr", + "rlang (>= 1.0.0)", + "tibble" + ], + "Suggests": [ + "covr", + "dplyr", + "ggplot2", + "knitr", + "readr", + "rmarkdown", + "testthat (>= 3.0.0)", + "withr" + ], + "VignetteBuilder": "knitr", + "Config/Needs/website": "tidyverse/tidytemplate", + "Config/testthat/edition": "3", + "Encoding": "UTF-8", + "LazyData": "true", + "RoxygenNote": "7.2.3", + "NeedsCompilation": "no", + "Author": "Hadley Wickham [aut, cre], RStudio [cph, fnd]", + "Maintainer": "Hadley Wickham ", + "Repository": "CRAN" + }, "foreach": { "Package": "foreach", "Version": "1.5.2", @@ -3206,7 +3248,7 @@ "NeedsCompilation": "no", "Author": "Henrik Bengtsson [aut, cre, cph] ()", "Maintainer": "Henrik Bengtsson ", - "Repository": "CRAN" + "Repository": "https://packagemanager.posit.co/cran/latest" }, "future.apply": { "Package": "future.apply", @@ -3719,7 +3761,7 @@ "NeedsCompilation": "no", "Author": "Jeffrey B. Arnold [aut, cre] (), Gergely Daroczi [ctb], Bo Werth [ctb], Brian Weitzner [ctb], Joshua Kunst [ctb], Baptise Auguie [ctb], Bob Rudis [ctb], Hadley Wickham [ctb] (Code from the ggplot2 package.), Justin Talbot [ctb] (Code from the labeling package), Joshua London [ctb]", "Maintainer": "Jeffrey B. Arnold ", - "Repository": "CRAN" + "Repository": "https://packagemanager.posit.co/cran/latest" }, "gh": { "Package": "gh", @@ -4746,7 +4788,7 @@ "NeedsCompilation": "yes", "Author": "Jim Baglama [aut, cph], Lothar Reichel [aut, cph], B. W. Lewis [aut, cre, cph]", "Maintainer": "B. W. Lewis ", - "Repository": "CRAN", + "Repository": "https://packagemanager.posit.co/cran/latest", "Encoding": "UTF-8" }, "isoband": { @@ -5398,7 +5440,7 @@ "NeedsCompilation": "yes", "Author": "Hadley Wickham [aut, cre], RStudio [cph]", "Maintainer": "Hadley Wickham ", - "Repository": "CRAN" + "Repository": "https://packagemanager.posit.co/cran/latest" }, "lubridate": { "Package": "lubridate", @@ -6170,7 +6212,7 @@ "Config/Needs/website": "gifski", "NeedsCompilation": "no", "Author": "Thomas Lin Pedersen [cre, aut] ()", - "Repository": "CRAN" + "Repository": "https://packagemanager.posit.co/cran/latest" }, "pbkrtest": { "Package": "pbkrtest", @@ -6966,7 +7008,7 @@ "NeedsCompilation": "yes", "Author": "David Cooley [aut, cre], Milo Yip [ctb] (Author of c++ rapidjson library, provided through THL A29 Limited, a Tencent company), Alexander Chemeris [ctb] (Author of c++ msinttypes library)", "Maintainer": "David Cooley ", - "Repository": "CRAN" + "Repository": "https://packagemanager.posit.co/cran/latest" }, "rapidoc": { "Package": "rapidoc", @@ -6989,7 +7031,7 @@ "Config/testthat/edition": "3", "NeedsCompilation": "no", "Author": "Bruno Tremblay [aut, cre], Barret Schloerke [ctb] (), Mrinmoy Majumdar [cph]", - "Repository": "CRAN" + "Repository": "https://packagemanager.posit.co/cran/latest" }, "rappdirs": { "Package": "rappdirs", @@ -7306,7 +7348,55 @@ "renv": { "Package": "renv", "Version": "1.1.4", - "Source": "Repository" + "Source": "Repository", + "Type": "Package", + "Title": "Project Environments", + "Authors@R": "c( person(\"Kevin\", \"Ushey\", role = c(\"aut\", \"cre\"), email = \"kevin@rstudio.com\", comment = c(ORCID = \"0000-0003-2880-7407\")), person(\"Hadley\", \"Wickham\", role = c(\"aut\"), email = \"hadley@rstudio.com\", comment = c(ORCID = \"0000-0003-4757-117X\")), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\")) )", + "Description": "A dependency management toolkit for R. Using 'renv', you can create and manage project-local R libraries, save the state of these libraries to a 'lockfile', and later restore your library as required. Together, these tools can help make your projects more isolated, portable, and reproducible.", + "License": "MIT + file LICENSE", + "URL": "https://rstudio.github.io/renv/, https://github.com/rstudio/renv", + "BugReports": "https://github.com/rstudio/renv/issues", + "Imports": [ + "utils" + ], + "Suggests": [ + "BiocManager", + "cli", + "compiler", + "covr", + "cpp11", + "devtools", + "gitcreds", + "jsonlite", + "jsonvalidate", + "knitr", + "miniUI", + "modules", + "packrat", + "pak", + "R6", + "remotes", + "reticulate", + "rmarkdown", + "rstudioapi", + "shiny", + "testthat", + "uuid", + "waldo", + "yaml", + "webfakes" + ], + "Encoding": "UTF-8", + "RoxygenNote": "7.3.2", + "VignetteBuilder": "knitr", + "Config/Needs/website": "tidyverse/tidytemplate", + "Config/testthat/edition": "3", + "Config/testthat/parallel": "true", + "Config/testthat/start-first": "bioconductor,python,install,restore,snapshot,retrieve,remotes", + "NeedsCompilation": "no", + "Author": "Kevin Ushey [aut, cre] (), Hadley Wickham [aut] (), Posit Software, PBC [cph, fnd]", + "Maintainer": "Kevin Ushey ", + "Repository": "CRAN" }, "rlang": { "Package": "rlang", @@ -7811,7 +7901,7 @@ "NeedsCompilation": "no", "Author": "Max Kuhn [aut, cre] ()", "Maintainer": "Max Kuhn ", - "Repository": "CRAN" + "Repository": "https://packagemanager.posit.co/cran/latest" }, "shape": { "Package": "shape", @@ -7986,7 +8076,7 @@ "BugReports": "https://github.com/traversc/stringfish/issues", "NeedsCompilation": "yes", "Author": "Travers Ching [aut, cre, cph], Phillip Hazel [ctb] (Bundled PCRE2 code), Zoltan Herczeg [ctb, cph] (Bundled PCRE2 code), University of Cambridge [cph] (Bundled PCRE2 code), Tilera Corporation [cph] (Stack-less Just-In-Time compiler bundled with PCRE2), Yann Collet [ctb, cph] (Yann Collet is the author of the bundled xxHash code)", - "Repository": "CRAN" + "Repository": "https://packagemanager.posit.co/cran/latest" }, "stringi": { "Package": "stringi", @@ -9081,7 +9171,7 @@ "NeedsCompilation": "no", "Author": "Julia Silge [cre, aut] (), Posit Software, PBC [cph, fnd]", "Maintainer": "Julia Silge ", - "Repository": "CRAN" + "Repository": "https://packagemanager.posit.co/cran/latest" }, "vip": { "Package": "vip", @@ -9230,6 +9320,54 @@ "Author": "Almende B.V. and Contributors [aut, cph] (vis.js library in htmlwidgets/lib, https://visjs.org/, https://github.com/visjs/vis-network), Benoit Thieurmel [aut, cre] (R interface)", "Repository": "CRAN" }, + "visdat": { + "Package": "visdat", + "Version": "0.6.0", + "Source": "Repository", + "Title": "Preliminary Visualisation of Data", + "Authors@R": "c( person(\"Nicholas\", \"Tierney\", role = c(\"aut\", \"cre\"), email = \"nicholas.tierney@gmail.com\", comment = c(ORCID = \"https://orcid.org/0000-0003-1460-8722\")), person(\"Sean\", \"Hughes\", role = \"rev\", comment =c(ORCID = \"https://orcid.org/0000-0002-9409-9405\", \"Sean Hughes reviewed the package for rOpenSci, see https://github.com/ropensci/onboarding/issues/87\")), person(\"Mara\", \"Averick\", role = \"rev\", comment = \"Mara Averick reviewed the package for rOpenSci, see https://github.com/ropensci/onboarding/issues/87\"), person(\"Stuart\", \"Lee\", role = c(\"ctb\")), person(\"Earo\", \"Wang\", role = c(\"ctb\")), person(\"Nic\", \"Crane\", role = c(\"ctb\")), person(\"Christophe\", \"Regouby\", role=c(\"ctb\")) )", + "Description": "Create preliminary exploratory data visualisations of an entire dataset to identify problems or unexpected features using 'ggplot2'.", + "Depends": [ + "R (>= 3.2.2)" + ], + "License": "MIT + file LICENSE", + "LazyData": "true", + "RoxygenNote": "7.2.3", + "Imports": [ + "ggplot2", + "tidyr", + "dplyr", + "purrr", + "readr", + "magrittr", + "stats", + "tibble", + "glue", + "forcats", + "cli", + "scales" + ], + "URL": "https://docs.ropensci.org/visdat/, https://github.com/ropensci/visdat", + "BugReports": "https://github.com/ropensci/visdat/issues", + "Suggests": [ + "testthat (>= 3.0.0)", + "plotly (>= 4.5.6)", + "knitr", + "rmarkdown", + "vdiffr", + "spelling", + "covr", + "stringr" + ], + "VignetteBuilder": "knitr", + "Encoding": "UTF-8", + "Language": "en-US", + "Config/testthat/edition": "3", + "NeedsCompilation": "no", + "Author": "Nicholas Tierney [aut, cre] (), Sean Hughes [rev] (, Sean Hughes reviewed the package for rOpenSci, see https://github.com/ropensci/onboarding/issues/87), Mara Averick [rev] (Mara Averick reviewed the package for rOpenSci, see https://github.com/ropensci/onboarding/issues/87), Stuart Lee [ctb], Earo Wang [ctb], Nic Crane [ctb], Christophe Regouby [ctb]", + "Maintainer": "Nicholas Tierney ", + "Repository": "https://packagemanager.posit.co/cran/latest" + }, "vroom": { "Package": "vroom", "Version": "1.6.5", From 17342fceb7e73bd3ad6699c4638207815013d677 Mon Sep 17 00:00:00 2001 From: Phil Henrickson Date: Fri, 21 Mar 2025 09:35:47 -0500 Subject: [PATCH 7/7] updates and fixes for quarto reports --- index.qmd | 5 ++++- methodology.qmd | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/index.qmd b/index.qmd index 567cb58..2605449 100644 --- a/index.qmd +++ b/index.qmd @@ -138,7 +138,10 @@ top_preds = filter(!is.na(thumbnail)) |> head(25) |> predictions_gt(games = games) |> - gtExtras::gt_theme_nytimes() + gtExtras::gt_theme_nytimes() |> + gt::tab_header( + title = paste("Top Upcoming Predictions:", .x) + ) ) ``` diff --git a/methodology.qmd b/methodology.qmd index 175ce28..6e88214 100644 --- a/methodology.qmd +++ b/methodology.qmd @@ -468,7 +468,7 @@ What were the model's top predictions in the validation set? ```{r} #| label: validation-predictions-table -#| column: page-inset-right +#| column: screen-inset-right valid_predictions |> filter(.pred_hurdle_class == 'yes') |>