From d38497016906eab8b203d45c4bb16a34281922f6 Mon Sep 17 00:00:00 2001 From: Trevor Riley <89118428+TNRiley@users.noreply.github.com> Date: Thu, 19 Dec 2024 13:17:21 -0500 Subject: [PATCH 01/14] change read citation argument to include all fields only_key_fields changed to FALSE as shiny users don't have ability to toggle this currently, TY field and others are helpful in organizing/evaluating. --- inst/shiny-app/CiteSource/app.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/inst/shiny-app/CiteSource/app.R b/inst/shiny-app/CiteSource/app.R index e7452ce2..3751e519 100644 --- a/inst/shiny-app/CiteSource/app.R +++ b/inst/shiny-app/CiteSource/app.R @@ -387,7 +387,9 @@ server <- function(input, output, session) { files = path_list, cite_sources = suggested_source, cite_labels = empty_strings, - cite_strings = empty_strings + cite_strings = empty_strings, + only_key_fields = FALSE + ) # Summarize the number of records by citation source From d74de4d62d423c163b847a5b3bb6650622493548 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Thu, 19 Dec 2024 18:19:09 +0000 Subject: [PATCH 02/14] Documentation --- NAMESPACE | 1 + man/{CiteSource.Rd => CiteSource-package.Rd} | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) rename man/{CiteSource.Rd => CiteSource-package.Rd} (98%) diff --git a/NAMESPACE b/NAMESPACE index 80fd572b..d376cdec 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +S3method(as.data.frame,bibliography) export("%>%") export(calculate_detailed_records) export(calculate_initial_records) diff --git a/man/CiteSource.Rd b/man/CiteSource-package.Rd similarity index 98% rename from man/CiteSource.Rd rename to man/CiteSource-package.Rd index 0c2363b9..19a730d8 100644 --- a/man/CiteSource.Rd +++ b/man/CiteSource-package.Rd @@ -1,9 +1,9 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/CiteSource.R \docType{package} -\name{CiteSource} -\alias{CiteSource-package} +\name{CiteSource-package} \alias{CiteSource} +\alias{CiteSource-package} \title{CiteSource: A package to compare sources of citation records} \description{ The CiteSource package supports evidence aggregation by helping with the From 612bdd4e3146f5583668f64237e8127acd7aa8bc Mon Sep 17 00:00:00 2001 From: Trevor Riley Date: Mon, 12 May 2025 09:43:59 -0400 Subject: [PATCH 03/14] add google analytics to dev app --- inst/shiny-app/CiteSource/app.R | 5 +++-- inst/shiny-app/CiteSource/google-analytics.html | 9 +++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 inst/shiny-app/CiteSource/google-analytics.html diff --git a/inst/shiny-app/CiteSource/app.R b/inst/shiny-app/CiteSource/app.R index 3fed1bf6..d600a6a2 100644 --- a/inst/shiny-app/CiteSource/app.R +++ b/inst/shiny-app/CiteSource/app.R @@ -3,8 +3,9 @@ library(CiteSource) library(dplyr) # Set background color -shiny::tags$head(shiny::tags$style( - shiny::HTML(' +shiny::tags$head( + includeHTML("google-analytics.html"), + shiny::tags$style(shiny::HTML(' #sidebar { background-color: #ffffff; } diff --git a/inst/shiny-app/CiteSource/google-analytics.html b/inst/shiny-app/CiteSource/google-analytics.html new file mode 100644 index 00000000..f31c3d31 --- /dev/null +++ b/inst/shiny-app/CiteSource/google-analytics.html @@ -0,0 +1,9 @@ + + + \ No newline at end of file From d07913707cffce56ff449b4bde8dc5c9daffd9f9 Mon Sep 17 00:00:00 2001 From: Trevor Riley Date: Mon, 12 May 2025 10:02:37 -0400 Subject: [PATCH 04/14] add google analytics to main --- .../{google-analytics.html => google_analytics_dev.html} | 0 inst/shiny-app/CiteSource/google_analytics_main.html | 9 +++++++++ 2 files changed, 9 insertions(+) rename inst/shiny-app/CiteSource/{google-analytics.html => google_analytics_dev.html} (100%) create mode 100644 inst/shiny-app/CiteSource/google_analytics_main.html diff --git a/inst/shiny-app/CiteSource/google-analytics.html b/inst/shiny-app/CiteSource/google_analytics_dev.html similarity index 100% rename from inst/shiny-app/CiteSource/google-analytics.html rename to inst/shiny-app/CiteSource/google_analytics_dev.html diff --git a/inst/shiny-app/CiteSource/google_analytics_main.html b/inst/shiny-app/CiteSource/google_analytics_main.html new file mode 100644 index 00000000..eb4691bd --- /dev/null +++ b/inst/shiny-app/CiteSource/google_analytics_main.html @@ -0,0 +1,9 @@ + + + \ No newline at end of file From a097301b38b49964e0356d2702de2dc590f4d2a5 Mon Sep 17 00:00:00 2001 From: Trevor Riley Date: Mon, 12 May 2025 10:03:44 -0400 Subject: [PATCH 05/14] add google analytics to app and deploy --- .github/workflows/document-and-deploy.yml | 12 ++++++++++-- inst/shiny-app/CiteSource/app.R | 16 ++++++++++++++-- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/.github/workflows/document-and-deploy.yml b/.github/workflows/document-and-deploy.yml index 03e25e5f..f1643c74 100644 --- a/.github/workflows/document-and-deploy.yml +++ b/.github/workflows/document-and-deploy.yml @@ -53,7 +53,11 @@ jobs: R -e " remotes::install_github('ESHackathon/CiteSource', force = TRUE); rsconnect::setAccountInfo(name=${{secrets.SHINY_LUKAS_ACCOUNT}}, token=${{secrets.SHINY_LUKAS_TOKEN}}, secret=${{secrets.SHINY_LUKAS_SECRET}}); - rsconnect::deployApp(appName = 'CiteSource_latest', appDir = './inst/shiny-app/CiteSource', forceUpdate = TRUE)" + rsconnect::deployApp( + appName = 'CiteSource_latest', + appDir = './inst/shiny-app/CiteSource', + envVars = c(APP_ENV = "development"), + forceUpdate = TRUE)" - name: Deploy stable version from main if: github.ref == 'refs/heads/main' @@ -63,7 +67,11 @@ jobs: R -e " remotes::install_github('ESHackathon/CiteSource', force = TRUE); rsconnect::setAccountInfo(name=${{secrets.SHINY_LUKAS_ACCOUNT}}, token=${{secrets.SHINY_LUKAS_TOKEN}}, secret=${{secrets.SHINY_LUKAS_SECRET}}); - rsconnect::deployApp(appName = 'CiteSource', appDir = './inst/shiny-app/CiteSource', forceUpdate = TRUE)" + rsconnect::deployApp( + appName = 'CiteSource', + appDir = './inst/shiny-app/CiteSource', + envVars = c(APP_ENV = "production"), + forceUpdate = TRUE)" - name: Create pkgdown env: diff --git a/inst/shiny-app/CiteSource/app.R b/inst/shiny-app/CiteSource/app.R index d600a6a2..a095e841 100644 --- a/inst/shiny-app/CiteSource/app.R +++ b/inst/shiny-app/CiteSource/app.R @@ -2,9 +2,21 @@ library(DT) library(CiteSource) library(dplyr) -# Set background color +app_environment <- Sys.getenv("APP_ENV") +# Determine which Google Analytics file to include based on the environment +ga_include_file <- NULL +if (app_environment == "production") { + ga_include_file <- "google_analytics_main.html" +} else if (app_environment == "development") { + ga_include_file <- "google_analytics_dev.html" +} + shiny::tags$head( - includeHTML("google-analytics.html"), + # Include the appropriate Google Analytics file if determined + if (!is.null(ga_include_file) && file.exists(ga_include_file)) { + includeHTML(ga_include_file) + }, + # style shiny::tags$style(shiny::HTML(' #sidebar { background-color: #ffffff; From 1e2335d11f376aeb11ebb23f1ac353fc6f80cb04 Mon Sep 17 00:00:00 2001 From: Trevor Riley Date: Mon, 12 May 2025 10:33:15 -0400 Subject: [PATCH 06/14] fix: Correct env var quoting in shinyapps deploy steps --- .github/workflows/document-and-deploy.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/document-and-deploy.yml b/.github/workflows/document-and-deploy.yml index f1643c74..dd81669e 100644 --- a/.github/workflows/document-and-deploy.yml +++ b/.github/workflows/document-and-deploy.yml @@ -56,7 +56,7 @@ jobs: rsconnect::deployApp( appName = 'CiteSource_latest', appDir = './inst/shiny-app/CiteSource', - envVars = c(APP_ENV = "development"), + envVars = c(APP_ENV = 'development'), forceUpdate = TRUE)" - name: Deploy stable version from main @@ -70,7 +70,7 @@ jobs: rsconnect::deployApp( appName = 'CiteSource', appDir = './inst/shiny-app/CiteSource', - envVars = c(APP_ENV = "production"), + envVars = c(APP_ENV = 'production'), forceUpdate = TRUE)" - name: Create pkgdown From c7c783720792bed766ff8347ae3659d0fb54dcea Mon Sep 17 00:00:00 2001 From: Trevor Riley Date: Mon, 12 May 2025 10:44:54 -0400 Subject: [PATCH 07/14] Attempt to fix deploy error by restructuring envVars --- .github/workflows/document-and-deploy.yml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/document-and-deploy.yml b/.github/workflows/document-and-deploy.yml index dd81669e..1406da35 100644 --- a/.github/workflows/document-and-deploy.yml +++ b/.github/workflows/document-and-deploy.yml @@ -53,10 +53,12 @@ jobs: R -e " remotes::install_github('ESHackathon/CiteSource', force = TRUE); rsconnect::setAccountInfo(name=${{secrets.SHINY_LUKAS_ACCOUNT}}, token=${{secrets.SHINY_LUKAS_TOKEN}}, secret=${{secrets.SHINY_LUKAS_SECRET}}); + deploy_env_vars <- setNames('development', 'APP_ENV') + # Deploy the Shiny app rsconnect::deployApp( - appName = 'CiteSource_latest', + appName = 'CiteSource_latest', appDir = './inst/shiny-app/CiteSource', - envVars = c(APP_ENV = 'development'), + envVars = deploy_env_vars, forceUpdate = TRUE)" - name: Deploy stable version from main @@ -67,10 +69,12 @@ jobs: R -e " remotes::install_github('ESHackathon/CiteSource', force = TRUE); rsconnect::setAccountInfo(name=${{secrets.SHINY_LUKAS_ACCOUNT}}, token=${{secrets.SHINY_LUKAS_TOKEN}}, secret=${{secrets.SHINY_LUKAS_SECRET}}); + deploy_env_vars <- setNames('production', 'APP_ENV') + # Deploy the Shiny app rsconnect::deployApp( - appName = 'CiteSource', + appName = 'CiteSource', appDir = './inst/shiny-app/CiteSource', - envVars = c(APP_ENV = 'production'), + envVars = deploy_env_vars, forceUpdate = TRUE)" - name: Create pkgdown From 2143779dbd6c218614c9bd7bf5dcb4519b1681cc Mon Sep 17 00:00:00 2001 From: Trevor Riley Date: Mon, 12 May 2025 11:03:04 -0400 Subject: [PATCH 08/14] attempt new GA integration --- .github/workflows/document-and-deploy.yml | 6 -- inst/shiny-app/CiteSource/app.R | 79 +++++++++++++++++++---- 2 files changed, 66 insertions(+), 19 deletions(-) diff --git a/.github/workflows/document-and-deploy.yml b/.github/workflows/document-and-deploy.yml index 1406da35..402c7b9e 100644 --- a/.github/workflows/document-and-deploy.yml +++ b/.github/workflows/document-and-deploy.yml @@ -53,12 +53,9 @@ jobs: R -e " remotes::install_github('ESHackathon/CiteSource', force = TRUE); rsconnect::setAccountInfo(name=${{secrets.SHINY_LUKAS_ACCOUNT}}, token=${{secrets.SHINY_LUKAS_TOKEN}}, secret=${{secrets.SHINY_LUKAS_SECRET}}); - deploy_env_vars <- setNames('development', 'APP_ENV') - # Deploy the Shiny app rsconnect::deployApp( appName = 'CiteSource_latest', appDir = './inst/shiny-app/CiteSource', - envVars = deploy_env_vars, forceUpdate = TRUE)" - name: Deploy stable version from main @@ -69,12 +66,9 @@ jobs: R -e " remotes::install_github('ESHackathon/CiteSource', force = TRUE); rsconnect::setAccountInfo(name=${{secrets.SHINY_LUKAS_ACCOUNT}}, token=${{secrets.SHINY_LUKAS_TOKEN}}, secret=${{secrets.SHINY_LUKAS_SECRET}}); - deploy_env_vars <- setNames('production', 'APP_ENV') - # Deploy the Shiny app rsconnect::deployApp( appName = 'CiteSource', appDir = './inst/shiny-app/CiteSource', - envVars = deploy_env_vars, forceUpdate = TRUE)" - name: Create pkgdown diff --git a/inst/shiny-app/CiteSource/app.R b/inst/shiny-app/CiteSource/app.R index a095e841..2aad160e 100644 --- a/inst/shiny-app/CiteSource/app.R +++ b/inst/shiny-app/CiteSource/app.R @@ -2,20 +2,7 @@ library(DT) library(CiteSource) library(dplyr) -app_environment <- Sys.getenv("APP_ENV") -# Determine which Google Analytics file to include based on the environment -ga_include_file <- NULL -if (app_environment == "production") { - ga_include_file <- "google_analytics_main.html" -} else if (app_environment == "development") { - ga_include_file <- "google_analytics_dev.html" -} - shiny::tags$head( - # Include the appropriate Google Analytics file if determined - if (!is.null(ga_include_file) && file.exists(ga_include_file)) { - includeHTML(ga_include_file) - }, # style shiny::tags$style(shiny::HTML(' #sidebar { @@ -396,6 +383,9 @@ ui <- shiny::navbarPage("CiteSource", # Define server logic to read selected file ---- server <- function(input, output, session) { + + # --- Reactive Values --- + # Used to store data that changes during the session rv <- shiny::reactiveValues() rv$df <- data.frame() rv$upload_df <- data.frame()#for original uploads @@ -403,6 +393,69 @@ server <- function(input, output, session) { rv$pairs_to_check <- data.frame()#for potential duplicates/manual dedup rv$pairs_removed <- data.frame()#for removed records + # --- Google Analytics Integration --- + # Flag to ensure GA script is inserted only once per session + ga_script_inserted <- reactiveVal(FALSE) + + # Use observeEvent on session$clientData which becomes available early + observeEvent(session$clientData, { + # Only proceed if the script hasn't been inserted yet for this session + if (!ga_script_inserted()) { + # Get the application's path from the URL (e.g., /CiteSource_latest/) + app_path <- session$clientData$url_pathname + ga_include_file <- NULL # Variable to hold the GA HTML filename + + # --- Determine GA HTML filename based on the application path --- + # Check if the path ends with '_latest' or '_latest/' (case-insensitive) + if (grepl("_latest/?$", app_path, ignore.case = TRUE)) { + # Development version + message("GA: Detected DEV environment based on URL path: ", app_path) # Logging + # *** SET the DEV Google Analytics HTML filename *** + ga_include_file <- "google_analytics_dev.html" # file is in same directory as app.R + + } + # Check if the path corresponds to the production app name (e.g., /CiteSource/ or /CiteSource) + # Adjust '/CiteSource/?$' if your production app name is different + else if (grepl("/CiteSource/?$", app_path, ignore.case = TRUE)) { + # Production version + message("GA: Detected PROD environment based on URL path: ", app_path) # Logging + # *** SET the PROD Google Analytics HTML filename *** + ga_include_file <- "google_analytics_main.html" # file is in same directory as app.R + + } else { + # Path didn't match known patterns + message("GA: Could not determine environment from URL path: ", app_path) # Logging + } + + # --- Insert the GA HTML file content if a filename was determined and file exists --- + if (!is.null(ga_include_file) && nzchar(ga_include_file)) { + # Check if the determined file actually exists in the app directory + if (file.exists(ga_include_file)) { + # Insert the content of the HTML file into the document's + insertUI( + selector = "head", # Target the tag + where = "beforeEnd", # Add the script at the end of the head's content + # Use includeHTML to read and insert the file content + ui = includeHTML(ga_include_file), + immediate = TRUE # Attempt to insert as soon as possible + ) + # Set the flag to TRUE to prevent this code running again for this session + ga_script_inserted(TRUE) + message("GA: Inserted script from file: ", ga_include_file) # Logging + } else { + # Log an error if the file is missing + message("GA Error: HTML file not found: ", ga_include_file) + # Optionally set the flag anyway to prevent repeated checks for missing file + ga_script_inserted(TRUE) + } + } else { + # If no file was determined (e.g., path didn't match), set flag to prevent re-check + ga_script_inserted(TRUE) + } + } + }, ignoreNULL = TRUE, once = FALSE) # Trigger when clientData is available, but flag prevents re-run + # --- End Google Analytics Integration --- + #### Upload files tab section ------ # upload on click shiny::observeEvent(input$file, { From 397a7ef58080bf4fba25fa1fe1030233754cc344 Mon Sep 17 00:00:00 2001 From: Matthew Grainger Date: Wed, 14 May 2025 13:27:24 +0200 Subject: [PATCH 09/14] fix typo in README - closes #234 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 931d34f0..b90eb9a0 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ [![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0) -## About the Pacakge +## About the Package CiteSource was developed to provide researchers the ability to examine the utility and efficacy of literature resources and search methodologies. The idea behind CiteSource is simply allowing users to deduplicate citation records, while maintaining customizable metadata about the citation. From f9aebfe215f8185a92addf3203a7ba96b9341d18 Mon Sep 17 00:00:00 2001 From: Trevor Riley Date: Wed, 14 May 2025 14:39:41 -0400 Subject: [PATCH 10/14] add 100MB limit size for testing server disconnect issue with large files --- inst/shiny-app/CiteSource/app.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/inst/shiny-app/CiteSource/app.R b/inst/shiny-app/CiteSource/app.R index 2aad160e..ba96df7a 100644 --- a/inst/shiny-app/CiteSource/app.R +++ b/inst/shiny-app/CiteSource/app.R @@ -1,3 +1,5 @@ +options(shiny.maxRequestSize = 100*1024^2) #set limit to 100MB + library(DT) library(CiteSource) library(dplyr) From e6f9cf046b51f41c777ece432af96e503b72e430 Mon Sep 17 00:00:00 2001 From: Trevor Riley Date: Wed, 14 May 2025 15:12:39 -0400 Subject: [PATCH 11/14] increase to 500MB for testing --- inst/shiny-app/CiteSource/app.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/shiny-app/CiteSource/app.R b/inst/shiny-app/CiteSource/app.R index ba96df7a..f7497a54 100644 --- a/inst/shiny-app/CiteSource/app.R +++ b/inst/shiny-app/CiteSource/app.R @@ -1,4 +1,4 @@ -options(shiny.maxRequestSize = 100*1024^2) #set limit to 100MB +options(shiny.maxRequestSize = 500*1024^2) #set limit to 100MB library(DT) library(CiteSource) From cc0fdf10b64e22c64c482f119b8719c6f06b1021 Mon Sep 17 00:00:00 2001 From: Trevor Riley Date: Sat, 17 May 2025 15:18:03 -0400 Subject: [PATCH 12/14] add timeout and increase size limit further --- inst/shiny-app/CiteSource/app.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/shiny-app/CiteSource/app.R b/inst/shiny-app/CiteSource/app.R index f7497a54..942a9852 100644 --- a/inst/shiny-app/CiteSource/app.R +++ b/inst/shiny-app/CiteSource/app.R @@ -1,4 +1,4 @@ -options(shiny.maxRequestSize = 500*1024^2) #set limit to 100MB +options(shiny.maxRequestSize=1000*1024^2, timeout = 40000000) library(DT) library(CiteSource) From e1a42580c183c6721b110d5e5cf967b06f9954b9 Mon Sep 17 00:00:00 2001 From: Matthew Grainger Date: Mon, 22 Sep 2025 11:13:00 +0200 Subject: [PATCH 13/14] Update README.md Response to review comment - remove circular link --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b90eb9a0..227c24d2 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ Install CiteSource in R with remotes::install_github("ESHackathon/CiteSource") **Vignettes** -Vignettes covering various use cases can be found on the [CiteSource web page](https://www.eshackathon.org/CiteSource/). +Vignettes covering various use cases can be found on the [CiteSource web page](https://www.eshackathon.org/CiteSource/articles). ## Feedback From 1d95b0ac27522e133a9c064f2f889d3efd26d10b Mon Sep 17 00:00:00 2001 From: Trevor Riley Date: Tue, 18 Nov 2025 09:58:23 -0500 Subject: [PATCH 14/14] added tagging instructions and deduplication information for clarity- increased max request size --- inst/shiny-app/CiteSource/app.R | 237 +++++++++++++++++++++++++++++++- 1 file changed, 230 insertions(+), 7 deletions(-) diff --git a/inst/shiny-app/CiteSource/app.R b/inst/shiny-app/CiteSource/app.R index 942a9852..de8ff54e 100644 --- a/inst/shiny-app/CiteSource/app.R +++ b/inst/shiny-app/CiteSource/app.R @@ -1,4 +1,4 @@ -options(shiny.maxRequestSize=1000*1024^2, timeout = 40000000) +options(shiny.maxRequestSize=10000*1024^2, timeout = 40000000) library(DT) library(CiteSource) @@ -14,7 +14,7 @@ shiny::tags$head( body, label, input, button, select { font-family: "Arial"; }') -)) + )) columns2hide <- c("title", "author", "doi", "volume", "pages", "number", "year", "abstract", "journal", "isbn") @@ -98,7 +98,8 @@ ui <- shiny::navbarPage("CiteSource", shiny::mainPanel( shiny::h5("Step 2: Double click on a column to edit sources, labels, and strings. Use *Ctrl+Enter* to save edits, one column at a time"), # Output: Data file ---- - DT::dataTableOutput("tbl_out") + DT::dataTableOutput("tbl_out"), + shiny::uiOutput("post_upload_guide") ) ) ) @@ -124,7 +125,7 @@ ui <- shiny::navbarPage("CiteSource", shiny::tabPanel( "Manual deduplication", br(), - shiny::h5("Step 4: Review potential duplicates manually"), + shiny::h5("Step 4 (optional): Manually select further duplicates"), shiny::p("The following records were identified as potential duplicates. Potential duplicates are combined into a single row with metadata fields for each record represented (ex. Title 1 & Title 2). Click any row to indicate that the records in that row ARE duplicates. Once all duplicates are identified you can click the button 'Remove additional duplicates' and then proceed to the visualizations."), shiny::textOutput("Manual_pretext"), shiny::br(), @@ -147,7 +148,7 @@ ui <- shiny::navbarPage("CiteSource", ) %>% htmltools::tagAppendAttributes(style = "background-color: #23395B"), shinyWidgets::dropdown( - + tags$h3("Select columns to display"), shinyWidgets::pickerInput( @@ -170,6 +171,15 @@ ui <- shiny::navbarPage("CiteSource", tags$style(HTML(".table.dataTable tbody td.active, .table.dataTable tbody tr.active td { background-color: #CBF7ED!important; color: black!important}")), + ), + shiny::tabPanel( + "How deduplication works", + shiny::fluidRow( + shiny::column( + 12, + shiny::uiOutput("dedup_logic_guide") + ) + ) ) ) ), @@ -395,6 +405,43 @@ server <- function(input, output, session) { rv$pairs_to_check <- data.frame()#for potential duplicates/manual dedup rv$pairs_removed <- data.frame()#for removed records + # 1. The Container (Decides WHEN to show it) + output$post_upload_guide <- shiny::renderUI({ + # Only show this if the upload dataframe exists and has rows + shiny::req(is.data.frame(rv$df) && nrow(rv$df) > 0) + + shiny::tagList( + shiny::br(), + shiny::hr(), + shiny::h5("Tagging Overview"), + # We refer to the table output created below + shiny::tableOutput("guide_table_content") + ) + }) + + # 2. The Table Content (Generates the table itself) + output$guide_table_content <- shiny::renderTable({ + # We don't need the req() here because the UI above handles the hiding + + data.frame( + "Column Name" = c("Source", "Label", "String"), + "Description" = c( + "'Source' is used to citations in files according to where the came from. This can include database names (e.g. Web of Science, Scopus) or a method used to find the citations (e.g. citation searching, numbered search string).", + + "'Label' is used to tag citations in files with information related to their associated screening phase. The label field requires one of three terms: ‘search’, ‘screened’, or ‘final’. All plots/tables require at least one file to be labeled as ‘search’, no other terms in the label field are permitted. NOTE: files that are tagged as 'screened' or 'final' should not have a 'source' tag.", + + "'String' is used to further differentiate sets of records. While the source/label fields alone can handle most use cases, the string field can be used to record other supplementary information a user may want to retain for analysis, or to further differentiate string variations (e.g. String1-narrow, String1-broad, String2-narrow, etc.)" + ), + check.names = FALSE + ) + }, + striped = TRUE, + hover = TRUE, + width = "100%", + align = "l", + sanitize.text.function = function(x) x # Allows the tags to work + ) + # --- Google Analytics Integration --- # Flag to ensure GA script is inserted only once per session ga_script_inserted <- reactiveVal(FALSE) @@ -479,7 +526,7 @@ server <- function(input, output, session) { cite_labels = suggested_label, cite_strings = empty_strings, only_key_fields = FALSE - + ) # Summarize the number of records by citation source @@ -941,6 +988,182 @@ server <- function(input, output, session) { }) + ## How Deduplication works tab + + # 1. The Container (With Padding added) + output$dedup_logic_guide <- shiny::renderUI({ + + shiny::tagList( + shiny::br(), + # WRAPPER DIV: Adds 15px vertical and 30px horizontal padding + shiny::div(style = "padding: 15px 30px;", + + shiny::h4("Deduplication Logic: ASySD Criteria"), + shiny::p("ASySD identifies duplicates in two phases. First, it blocks records into potential groups. Second, it scores text similarity. Finally, 'close calls' are flagged for manual review."), + + shiny::hr(), + + # The table output + shiny::tableOutput("dedup_guide_table_content") + ) + ) + + # 1. The Container (UI with Accordions) + output$dedup_logic_guide <- shiny::renderUI({ + + shiny::tagList( + shiny::br(), + # Wrapper div with padding + shiny::div(style = "padding: 0px 15px; max-width: 1050px;", + + shiny::h4("Deduplication Criteria"), + shiny::p("ASySD identifies duplicates in two automated phases, followed by a manual review safety net. Click a phase below to view the full logic."), + + shiny::hr(), + + # --- ACCORDION 1: BLOCKING --- + shiny::tags$details( + style = "border: 1px solid #ddd; border-radius: 5px; padding: 10px; margin-bottom: 10px;", + shiny::tags$summary(style = "cursor: pointer; font-weight: bold; font-size: 15px;", + "Phase 1: Blocking (The Wide Net)"), + shiny::br(), + shiny::p(style = "font-style: italic; font-size: 13px;", + "Records are grouped into potential duplicate sets if they match EXACTLY on any of these combinations."), + shiny::tableOutput("tbl_phase1") + ), + + # --- ACCORDION 2: VALIDATION --- + shiny::tags$details( + style = "border: 1px solid #ddd; border-radius: 5px; padding: 10px; margin-bottom: 10px;", + shiny::tags$summary(style = "cursor: pointer; font-weight: bold; font-size: 15px;", + "Phase 2: Validation (The Strict Check)"), + shiny::br(), + shiny::p(style = "font-style: italic; font-size: 13px;", + "Candidate pairs are text-scored (0-100%). A pair is confirmed as a duplicate ONLY if it meets one of these threshold sets."), + shiny::tableOutput("tbl_phase2") + ), + + # --- ACCORDION 3: MANUAL REVIEW --- + shiny::tags$details( + style = "border: 1px solid #ddd; border-radius: 5px; padding: 10px; margin-bottom: 10px;", + shiny::tags$summary(style = "cursor: pointer; font-weight: bold; font-size: 15px;", + "Phase 3: Manual Review (The Safety Net)"), + shiny::br(), + shiny::p(style = "font-style: italic; font-size: 13px;", + "Pairs that fall into the 'Grey Area' or have conflicting metadata are flagged for human review."), + shiny::tableOutput("tbl_phase3") + ) + ) # End div + ) # End tagList + }) # End renderUI + + + # 2. Table Content - Phase 1 (Blocking) + output$tbl_phase1 <- shiny::renderTable({ + data.frame( + "Category" = c("Round 1 (Broad)", "Round 2 (Bibliographic)", "Round 3 (Numeric)", "Round 4 (Loose)"), + "Criteria" = c( + "
  • Title & Pages
  • Title & Author
  • Title & Abstract
  • DOI (Exact)
", + "
  • Author & Year & Pages
  • Journal & Volume & Pages
  • ISBN & Volume & Pages
  • Title & ISBN
", + "
  • Year & Pages & Volume
  • Year & Number & Volume
  • Year & Pages & Number
", + "
  • Author & Year
  • Year & Title
  • Title & Volume
  • Title & Journal
" + ), + check.names = FALSE + ) + }, striped = TRUE, hover = TRUE, width = "100%", sanitize.text.function = function(x) x) + }) + + + # 3. Table Content - Phase 2 (Validation - FULL DETAIL) + output$tbl_phase2 <- shiny::renderTable({ + data.frame( + "Category" = c( + "Strict Bibliographic", + "Abstract Heavy", + "DOI Specific", + "Complex Metadata", + "Strict Journal + Abstract", + "High Confidence Metadata", + "High Numeric Confidence", + "Title & Journal/ISBN" + ), + "Criteria" = c( + # Strict Bibliographic + "
    +
  • Pages(>80%) + Vol(>80%) + Title(>90%) + Abstract(>90%) + Author(>50%) + ISBN(>99%)
  • +
  • Pages(>80%) + Vol(>80%) + Title(>90%) + Abstract(>90%) + Author(>50%) + Journal(>60%)
  • +
  • Pages(>80%) + No.(>80%) + Title(>90%) + Abstract(>90%) + Author(>50%) + Journal(>60%)
  • +
  • Vol(>80%) + No.(>80%) + Title(>90%) + Abstract(>90%) + Author(>50%) + Journal(>60%)
  • +
", + + # Abstract Heavy + "
    +
  • Vol(>80%) + No.(>80%) + Title(>90%) + Abstract(>90%) + Author(>80%)
  • +
  • Vol(>80%) + Pages(>80%) + Title(>90%) + Abstract(>90%) + Author(>80%)
  • +
  • Pages(>80%) + No.(>80%) + Title(>90%) + Abstract(>90%) + Author(>80%)
  • +
", + + # DOI Specific + "
  • DOI(>95%) + Author(>75%) + Title(>90%)
", + + # Complex Metadata + "
    +
  • Title(>80%) + Abstract(>90%) + Vol(>85%) + Journal(>65%) + Author(>90%)
  • +
  • Title(>90%) + Abstract(>80%) + Vol(>85%) + Journal(>65%) + Author(>90%)
  • +
", + + # Strict Journal & Abstract + "
    +
  • Pages(>80%) + Vol(>80%) + Title(>90%) + Abstract(>80%) + Author(>90%) + Journal(>75%)
  • +
  • Pages(>80%) + No.(>80%) + Title(>90%) + Abstract(>80%) + Author(>90%) + Journal(>75%)
  • +
  • Vol(>80%) + No.(>80%) + Title(>90%) + Abstract(>80%) + Author(>90%) + Journal(>75%)
  • +
", + + # High Confidence Metadata + "
    +
  • Title(>90%) + Author(>90%) + Abstract(>90%) + Journal(>70%)
  • +
  • Title(>90%) + Author(>90%) + Abstract(>90%) + ISBN(>99%)
  • +
", + + # High Numeric Confidence + "
    +
  • Pages(>90%) + No.(>90%) + Title(>90%) + Author(>80%) + Journal(>60%)
  • +
  • No.(>90%) + Vol(>90%) + Title(>90%) + Author(>90%) + ISBN(>99%)
  • +
  • Pages(>90%) + Vol(>90%) + Title(>90%) + Author(>80%) + Journal(>60%)
  • +
  • Pages(>90%) + No.(>90%) + Title(>90%) + Author(>80%) + ISBN(>99%)
  • +
", + + # Title & Journal/ISBN Specific + "
    +
  • Pages(>80%) + Vol(>80%) + Title(>95%) + Author(>80%) + Journal(>90%)
  • +
  • No.(>80%) + Vol(>80%) + Title(>95%) + Author(>80%) + Journal(>90%)
  • +
  • No.(>80%) + Pages(>80%) + Title(>95%) + Author(>80%) + Journal(>90%)
  • +
  • Pages(>80%) + Vol(>80%) + Title(>95%) + Author(>80%) + ISBN(>99%)
  • +
" + ), + check.names = FALSE + ) + }, striped = TRUE, hover = TRUE, width = "100%", sanitize.text.function = function(x) x) + + + # 4. Table Content - Phase 3 (Manual) + output$tbl_phase3 <- shiny::renderTable({ + data.frame( + "Category" = c("The 'Grey Area'", "Conflicting DOI", "Year Mismatch"), + "Criteria" = c( + "
    +
  • Title(>85%) + Author(>75%)
  • +
  • Title(>80%) + Abstract(>80%)
  • +
  • Title(>80%) + ISBN(>99%)
  • +
  • Title(>80%) + Journal(>80%)
  • +
", + "Pairs that match perfectly but have different DOIs.", + "Pairs that match perfectly but are published >1 year apart." + ), + check.names = FALSE + ) + }, striped = TRUE, hover = TRUE, width = "100%", sanitize.text.function = function(x) x) + #### Visualise tab #### # Reactive expression to filter the data for visualization (used for Heatmap and Upset) @@ -1565,7 +1788,7 @@ server <- function(input, output, session) { return(detailed_counts_final) }) - + # Rendering the detailed record table output$detailedRecordTab <- gt::render_gt({ # Check if base data is loaded