diff --git a/.travis.yml b/.travis.yml index 6e9e761..f8a09b0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -40,6 +40,9 @@ before_script: - sudo apt-get install -y --force-yes r-base r-cran-rserve - sudo apt-get install -y --force-yes libcurl4-gnutls-dev - sudo apt-get install -y --force-yes libxml2-dev + - sudo apt-get install -y --force-yes texlive-full + - wget https://github.com/jgm/pandoc/releases/download/1.17.2/pandoc-1.17.2-1-amd64.deb + - sudo dpkg -i pandoc-1.17.2-1-amd64.deb - sudo mkdir -p ~/R/Library - sudo echo 'R_LIBS=~/R/Library' > ~/.Renviron - sudo echo 'options(repos = "http://cran.rstudio.com")' > ~/.Rprofile @@ -48,6 +51,7 @@ before_script: - sudo Rscript -e 'if(!"gplots" %in% rownames(installed.packages())) { install.packages("gplots", dependencies = TRUE) }' - sudo Rscript -e 'if(!"ggplot2" %in% rownames(installed.packages())) { install.packages("ggplot2", dependencies = TRUE) }' - sudo Rscript -e 'if(!"gridExtra" %in% rownames(installed.packages())) { install.packages("gridExtra", dependencies = TRUE) }' + - sudo Rscript -e 'if(!"pander" %in% rownames(installed.packages())) { install.packages("pander", dependencies = TRUE) }' - sudo Rscript -e 'if(!"psy" %in% rownames(installed.packages())) { install.packages("psy", dependencies = TRUE) }' - sudo Rscript -e 'update.packages(ask = FALSE, instlib = "~/R/Library")' - sudo R CMD Rserve diff --git a/app/Lib/Rscripts/report.R b/app/Lib/Rscripts/report.R index 851546d..3cc88ca 100644 --- a/app/Lib/Rscripts/report.R +++ b/app/Lib/Rscripts/report.R @@ -1,728 +1,17 @@ -# Packages -library(gplots) -library(ggplot2) -library(plyr) -library(grid) -# nolint start -library(gridExtra) -# nolint end - -report <- function(filename, - number_students, +report <- function(template, + filename, number_answeroptions, - number_questions, cronbach, frequency_answer_options, percentage_answer_options, - input_correct, key, correct_frequency, correct_percentage, corrected_item_tot_cor, corrected_item_tot_cor_answ_option, title, - item_names) { - # Creating results for each item - item_list <- list() # Creates list to put item output in - colnames1 <- c("Answer Option", "Frequency", "Percentage", "IRC") - colnames2 <- c("Answer Option", "Frequency", "Percentage", "IRC", "Correct") - - # Filling a list with frequency, percentage and IRC for total and each answer options. - # every item gets a seperate data frame - # If no answeroptions are present, only the correct statistics are displayed. - # If there are more than 14 answer options, only the correct statistics are displayed to conserve space. - # This list is used for the first part of the output. This list is modified to a data frame to make the plots with - # the answer options. - for (i in 1:number_questions) { - if (number_answeroptions[i] > 0 & number_answeroptions[i] < 15) { - Correct <- rep("Incorrect", number_answeroptions[i] + 1) - Correct[which(key[, i] == 1)] <- "Correct" - - # Frequency is also stored at this point, but not used. - # in case someone wants to alter the script to display the frequency instead of the percentage - item_list[[i]] <- data.frame( - c(LETTERS[1:number_answeroptions[i]], "Missing"), - c(frequency_answer_options[c(2:(number_answeroptions[i] + 1), 1), i]), - c(percentage_answer_options[c(2:(number_answeroptions[i] + 1), 1), i]), - c(corrected_item_tot_cor_answ_option[c(2:(number_answeroptions[i] + 1), 1), i]), - Correct, - row.names = NULL - ) - colnames(item_list[[i]]) <- colnames2 - } else { - item_list[[i]] <- data.frame("Total", correct_frequency[i], correct_percentage[i], corrected_item_tot_cor[i]) - # Frequency is also stored at this point, but not used, - # in case someone wants to alter the script to display the frequency instead of the percentage - colnames(item_list[[i]]) <- colnames1 - } - } - - #Creating item names and putting them in the list as names - items <- numeric() - for (i in 1:number_questions) { - items <- c(items, paste("Item", as.character(item_names[i]), sep = " ")) - } - - names(item_list) <- items - - # Create extra variables to make the bar plots - item_list1 <- item_list - for (i in 1:number_questions) { - #Create right order on the x-axis (Missingness last) - if (any(key[, i] != 0) & number_answeroptions[i] < 15) { - item_list1[[i]]$Ans_Factor <- factor( - item_list1[[i]]$"Answer Option", - levels = c(LETTERS[1 : max(number_answeroptions)], "Missing") - ) - # nolint start - item_list1[[i]]$"Col_scale" <- as.numeric(item_list1[[i]]$Correct) * 2 - 3 - item_list1[[i]]$"IRC_col_scale" <- with(item_list1[[i]], IRC * Col_scale) - item_list1[[i]]$"Perc_col_scale" <- with(item_list1[[i]], Percentage) - # nolint end - } - } - - #Create data frame of all the items which have answer options. This is used to make barplots per answer option - if (any(key != 0)) { - ans_opt_datafrm <- plyr::ldply(item_list1[number_answeroptions != 0 & number_answeroptions < 15], data.frame) - names(ans_opt_datafrm)[1] <- "id" - ans_opt_datafrm[, 2] <- gsub("Missing", "Mis", ans_opt_datafrm[, 2]) - ans_opt_datafrm$Ans_Factor <- gsub("Missing", "Mis", ans_opt_datafrm$Ans_Factor) - ans_opt_datafrm$Ans_Factor <- factor( - ans_opt_datafrm$Ans_Factor, - levels = c(LETTERS[1 : max(number_answeroptions)], "Mis") - ) - ans_opt_datafrm$id <- factor(ans_opt_datafrm$id, levels = items[number_answeroptions != 0]) - ans_opt_datafrm$Perc_col_scale[ans_opt_datafrm$Correct == "Correct"] <- - 100 - ans_opt_datafrm$Perc_col_scale[ans_opt_datafrm$Correct == "Correct"] - - id2 <- as.numeric(ans_opt_datafrm$id) - ans_opt_datafrm <- cbind(ans_opt_datafrm, id2) - } - - # Create a dataframe with only the correct statistics in it. This is used in the general item plots - dataframe_correct <- data.frame( - factor(1:number_questions), - correct_frequency, - correct_percentage, - corrected_item_tot_cor - ) - names(dataframe_correct)[1] <- "item" - - # Starting explanation - start_text <- paste( - "Number of students : ", number_students, "\n", - "Number of questions : ", number_questions, "\n", - "Average score : ", round(mean(rowSums(input_correct)), digits = 3), "\n", - "Standard deviation : ", round(sd(rowSums(input_correct)), digits = 3), "\n", - "Cronbach's alpha : ", cronbach, "\n", - "Standard error : ", round(sd(rowSums(input_correct) * sqrt(1 - cronbach)), digits = 3), "\n", - sep = "" - ) - - explanation_items <- paste( - "Explanation Table", "\n", "\n", - "For each question the frequency, percentage and item rest correlations (IRC)", "\n", - "from every answer option are diplayed. The IRC should be (highly) positive", "\n", - "for the right answer option and low for the wrong answer option(s).", "\n", - sep = "" - ) - - pdf(file = filename, h = 8, w = 10) - - # Textplot plots outside the normal plot window, therefore xpd = NA - par(xpd = NA, mar = rep(2, 4)) - - # If maximum 7 answer options 8 items per page - if (max(number_answeroptions) < 8) { - # Creating the first page - layout(matrix(c(1, 1, 1, 1, 1, 1, - 2, 2, 3, 3, 3, 3, - 2, 2, 3, 3, 3, 3, - 2, 2, 3, 3, 3, 3, - 4, 4, 4, 5, 5, 5, - 4, 4, 4, 5, 5, 5, - 4, 4, 4, 5, 5, 5, - 6, 6, 6, 7, 7, 7, - 6, 6, 6, 7, 7, 7, - 6, 6, 6, 7, 7, 7, - 8, 8, 8, 9, 9, 9, - 8, 8, 8, 9, 9, 9, - 8, 8, 8, 9, 9, 9), ncol = 6, byrow = TRUE)) - # matrix(c(1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6), 3, 6, byrow = TRUE)) - - # Title - gplots::textplot(title, valign = "top", cex = 2) - - # Add overall test info and explanation - gplots::textplot(start_text, halign = "left", valign = "top", cex = 1, mar = c(1, 5, 5, 1)) - gplots::textplot(explanation_items, halign = "left", valign = "top", mar = c(1, 1, 5, 5)) - - # Display the first 6 items - if (number_questions < 7) { - for (i in 1:number_questions) { - gplots::textplot(item_list[[i]][, 1:4], show.rownames = FALSE, cex = 1, valign = "top", mar = c(5, 1, 0, 1)) - title(items[i], line = 2) - # Adding highlighting box for the right answeroption - if (any(key[, i] == 1)) { - for (j in 1 : sum(key[, i] == 1)) { - rect( - .19, - .92 - which(key[, i] == 1)[j] * .11, - .85, - 1.02 - which(key[, i] == 1)[j] * .11, - col = rgb(0, .9, 0, .5), - density = NA - ) - } - } - } - } else { - for (i in 1:6) { - gplots::textplot(item_list[[i]][, 1:4], show.rownames = FALSE, cex = 1, valign = "top", mar = c(5, 1, 0, 1)) - title(items[i], line = 2) - # Adding highlighting box for the right answeroption - if (any(key[, i] == 1)) { - for (j in 1 : sum(key[, i] == 1)) { - rect( - .19, - .92 - which(key[, i] == 1)[j] * .11, - .85, - 1.02 - which(key[, i] == 1)[j] * .11, - col = rgb(0, .9, 0, .5), - density = NA - ) - } - } - } - - # Creating the item output on second page forward, only if more than 6 questions - layout(matrix(c(1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8), 4, 6, byrow = TRUE)) - for (i in 7:number_questions) { - gplots::textplot(item_list[[i]][, 1:4], show.rownames = FALSE, cex = 1, valign = "top", mar = c(2, 1, 3, 1)) - title(items[i], line = -0.85) - - if (any(key[, i] == 1)) { - for (j in 1 : sum(key[, i] == 1)) { - rect( - .19, - .92 - which(key[, i] == 1)[j] * .096, - .85, - 1.01 - which(key[, i] == 1)[j] * .096, - col = rgb(0, .9, 0, .5), - density = NA - ) - } - } - } - } - } else { - # Creates PDF for more than 7 answeroptions, only 4 items per page are displayed - layout(matrix(c(1, 1, 1, 1, 1, 1, - 2, 2, 3, 3, 3, 3, - 2, 2, 3, 3, 3, 3, - 2, 2, 3, 3, 3, 3, - 4, 4, 4, 5, 5, 5, - 4, 4, 4, 5, 5, 5, - 4, 4, 4, 5, 5, 5, - 4, 4, 4, 5, 5, 5, - 6, 6, 6, 7, 7, 7, - 6, 6, 6, 7, 7, 7, - 6, 6, 6, 7, 7, 7, - 6, 6, 6, 7, 7, 7), ncol = 6, byrow = TRUE)) - - # Title - gplots::textplot(title, valign = "top", cex = 2) - - # Introduction text - gplots::textplot(start_text, halign = "left", valign = "top", cex = 1, mar = c(1, 5, 5, 1)) - gplots::textplot(explanation_items, halign = "left", valign = "top", mar = c(1, 1, 5, 5)) - - # Creating item output - for (i in 1:4) { - gplots::textplot(item_list[[i]][, 1:4], show.rownames = FALSE, cex = 1, valign = "top", mar = c(5, 1, 0, 1)) - title(items[i], line = 1.8) - - # Adding highlighting box for the right answeroption - if (any(key[, i] == 1) & number_answeroptions[i] < 15) { - for (j in 1 : sum(key[, i] == 1)) { - rect( - .19, - .95 - .065 * which(key[, i] == 1)[j], - .85, - 1.01 - .065 * which(key[, i] == 1)[j], - col = rgb(0, .9, 0, .5), - density = NA - ) - } - } - } - - # Second page and further - layout(matrix(c(1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6), 3, 6, byrow = TRUE)) - - # Displaying the items - for (i in 4:number_questions) { - gplots::textplot(item_list[[i]][, 1:4], show.rownames = FALSE, cex = 1, valign = "top", mar = c(2, 1, 3, 1)) - title(items[i], line = -.9) - - # Adding highlighting box for the right answeroption - if (any(key[, i] == 1) & number_answeroptions[i] < 15) { - for (j in 1 : sum(key[, i] == 1)) { - rect( - .19, - .95 - .065 * which(key[, i] == 1)[j], - .85, - 1.01 - .065 * which(key[, i] == 1)[j], - col = rgb(0, .9, 0, .5), - density = NA - ) - } - } - } - } - - ### Frequency Plot for total items - - # Create chart with Answer Option on x-axis and IRC on y-asix - bar_plot_freq <- ggplot2::ggplot( - dataframe_correct, - ggplot2::aes(item, correct_percentage, fill = correct_percentage) - ) - - bar_freq <- - bar_plot_freq + - # Create Bar chart - geom_bar(stat = "identity") + - # Create colour boundray: Green = "right" (low for wrong answer options, high for right answer options) - scale_fill_gradient2(low = "red", mid = "green", high = "red", midpoint = 50, limits = c(0, 100), name = "") + - # Change y-axis limit to constant - coord_cartesian(ylim = c(0, 100)) + - # Change titles and x axis name - labs(x = "Item", title = "Correct Percentage", y = "Percentage") + - theme( - strip.text.x = element_text(size = 7), - # Change font size of item names and Answer options - axis.text.x = element_text(size = 8), - axis.ticks.x = element_line(size = .4) - ) - - ### IRC Bar Plot for total items - - # Create chart with Answer_Option on x-axis and IRC on y-asix - bar_plot_IRC <- ggplot2::ggplot( - dataframe_correct, - ggplot2::aes(item, corrected_item_tot_cor, fill = corrected_item_tot_cor) - ) - bar_IRC <- - bar_plot_IRC + - # Create Bar chart - geom_bar(stat = "identity") + - # Create colour boundray: Green = "right" (low for wrong answer options, high for right answer options) - scale_fill_gradient(low = "red", high = "green", limits = c(-.1, .4), name = "") + - # coord_cartesian(ylim = c(, 1)) + # Change y-axis limit to constant - # Change titles and x axis name - labs(x = "Item", title = "Item Rest Correlations", y = "Item Rest Correlation") + - theme( - strip.text.x = element_text(size = 7), - # Change font size of item names and Answer options - axis.text.x = element_text(size = 8), - axis.ticks.x = element_line(size = .4) - ) - - # Calculating which questions are displayed on which plot. - # Determination rule: max 80 answer options per plot - if (any(key != 0)) { - tot_answer_options <- 0 - Questions_p1 <- 0 - Questions_p2 <- numeric() - Questions_p3 <- numeric() - Questions_p4 <- numeric() - questions_with_ans_opts <- nlevels(ans_opt_datafrm$id) - - # Calculating which questions are on the first plot. Total answeroptions should be less than 100 - while (tot_answer_options < 100 & Questions_p1 < questions_with_ans_opts) { - Questions_p1 <- Questions_p1 + 1 - tot_answer_options <- tot_answer_options + sum(ans_opt_datafrm$id2 == Questions_p1) - } - - # Calculating which questions are on the second plot. Total answeroptions should be less than 100 - Questions_p2 <- Questions_p1 - while (tot_answer_options < 200 & Questions_p2 < questions_with_ans_opts) { - Questions_p2 <- Questions_p2 + 1 - tot_answer_options <- tot_answer_options + sum(ans_opt_datafrm$id2 == Questions_p2) - } - - # Calculating which questions are on the third plot. Total answeroptions should be less than 100 - Questions_p3 <- Questions_p2 - while (tot_answer_options < 300 & Questions_p3 < questions_with_ans_opts) { - Questions_p3 <- Questions_p3 + 1 - tot_answer_options <- tot_answer_options + sum(ans_opt_datafrm$id2 == Questions_p3) - } - - # Emptying the plots if no questions are present for that plot - Questions_p4 <- ifelse(Questions_p3 < questions_with_ans_opts, questions_with_ans_opts, 0) - Questions_p3 <- ifelse(Questions_p3 != Questions_p2, Questions_p3, 0) - Questions_p2 <- ifelse(Questions_p2 != Questions_p1, Questions_p2, 0) - - # Creating IRC plots 16 items per plot. - # The only difference between these for codes is in the item selection on the first row - # ([1:Questions_p1] in the first case - - bar_plot_freq1 <- ggplot2::ggplot( - # Create subset of first 16 questions - ans_opt_datafrm[ans_opt_datafrm$id %in% c(levels(ans_opt_datafrm$id)[1:Questions_p1]), ], - # Create chart with Answer Option on x-axis and IRC on y-asix - ggplot2::aes("Answer Option", Percentage, fill = Correct, colour = Perc_col_scale) - ) - bar_freq1 <- - bar_plot_freq1 + - # Create Bar chart - geom_bar(ggplot2::aes(x = Ans_Factor), stat = "identity") + - # Display the different items - facet_grid(. ~ id, scales = "free_x", space = "free_x") + - # Fill in the bars: Green right answer options, Red wrong answer options - scale_fill_manual(values = c("Incorrect" = "Red", "Correct" = "Green"), guide = FALSE) + - # Create colour boundray: Green = "right" (low for wrong answer options, high for right answer options) - scale_colour_gradient(low = "green", high = "red", guide = FALSE) + - # Change y-axis limit to constant - coord_cartesian(ylim = c(0, 100)) + - # Change titles and x axis name - labs( - x = "Answer Options", - title = paste( - "Percentage chart(s) per question and per answer options. The green bars represent the right answer ", - "options.", - "\n", - "The color of the border represents the desirability (50% for the right answer options, low for the wrong ", - "answer options)", - sep = "" - ) - ) + - theme( - strip.text.x = element_text(size = 7), - # Change font size of item names and Answer options - axis.text.x = element_text(size = 4.8), - axis.ticks.x = element_line(size = .1), - title = element_text(size = 8) - ) - - if (Questions_p2 != 0) { - bar_plot_freq2 <- ggplot2::ggplot( - # Create subset of the other questions - ans_opt_datafrm[ans_opt_datafrm$id %in% c(levels(ans_opt_datafrm$id)[(Questions_p1 + 1) : Questions_p2]), ], - # Create chart with Answer Option on x-axis and IRC on y-asix - ggplot2::aes("Answer Option", Percentage, fill = Correct, colour = Perc_col_scale) - ) - bar_freq2 <- - bar_plot_freq2 + - # Create Bar chart - geom_bar(ggplot2::aes(x = Ans_Factor), stat = "identity") + - # Display the different items - facet_grid(. ~ id, scales = "free_x", space = "free_x") + - # Fill in the bars: Green right answer options, Red wrong answer options - scale_fill_manual(values = c("Incorrect" = "Red", "Correct" = "Green"), guide = FALSE) + - # Create colour boundray: Green = "right" (low for wrong answer options, high for right answer options) - scale_colour_gradient(low = "green", high = "red", guide = FALSE) + - # Change y-axis limit to constant - coord_cartesian(ylim = c(0, 100)) + - # Change titles and x axis name - labs(x = "Answer Options", title = "Item") + - theme( - strip.text.x = element_text(size = 7), - # Change font size of item names and Answer options - axis.text.x = element_text(size = 4.8), - axis.ticks.x = element_line(size = .1) - ) - } - - if (Questions_p3 != 0) { - bar_plot_freq3 <- ggplot2::ggplot( - # Create subset of the other questions - ans_opt_datafrm[ans_opt_datafrm[, 1] %in% c(levels(ans_opt_datafrm$id)[(Questions_p2 + 1) : Questions_p3]), ], - # Create chart with Answer Option on x-axis and IRC on y-asix - ggplot2::aes("Answer Option", Percentage, fill = Correct, colour = Perc_col_scale) - ) - bar_freq3 <- - bar_plot_freq3 + - # Create Bar chart - geom_bar(ggplot2::aes(x = Ans_Factor), stat = "identity") + - # Display the different items - facet_grid(. ~ id, scales = "free_x", space = "free_x") + - # Fill in the bars: Green right answer options, Red wrong answer options - scale_fill_manual(values = c("Incorrect" = "Red", "Correct" = "Green"), guide = FALSE) + - # Create colour boundray: Green = "right" (low for wrong answer options, high for right answer options) - scale_colour_gradient(low = "green", high = "red", guide = FALSE) + - # Change y-axis limit to constant - coord_cartesian(ylim = c(0, 100)) + - # Change titles and x axis name - labs(x = "Answer Options", title = "Item") + - theme( - strip.text.x = element_text(size = 7), - # Change font size of item names and Answer options - axis.text.x = element_text(size = 4.8), - axis.ticks.x = element_line(size = .1) - ) - } - - if (Questions_p4 != 0) { - bar_plot_freq4 <- ggplot2::ggplot( - # Create subset of the other questions - ans_opt_datafrm[ans_opt_datafrm[, 1] %in% c(levels(ans_opt_datafrm$id)[(Questions_p3 + 1) : Questions_p4]), ], - # Create chart with Answer Option on x-axis and IRC on y-asix - ggplot2::aes("Answer Option", Percentage, fill = Correct, colour = Perc_col_scale) - ) - bar_freq4 <- - bar_plot_freq4 + - # Create Bar chart - geom_bar(ggplot2::aes(x = Ans_Factor), stat = "identity") + - # Display the different items - facet_grid(. ~ id, scales = "free_x", space = "free_x") + - # Fill in the bars: Green right answer options, Red wrong answer options - scale_fill_manual(values = c("Incorrect" = "Red", "Correct" = "Green"), guide = FALSE) + - # Create colour boundray: Green = "right" (low for wrong answer options, high for right answer options) - scale_colour_gradient(low = "green", high = "red", guide = FALSE) + - # Change y-axis limit to constant - coord_cartesian(ylim = c(0, 100)) + - # Change titles and x axis name - labs(x = "Answer Options", title = "Item") + - theme( - strip.text.x = element_text(size = 7), - # Change font size of item names and Answer options - axis.text.x = element_text(size = 4.8), - axis.ticks.x = element_line(size = .1) - ) - } - - # Creating IRC plots 16 items per plot. - # The only difference between these for codes is in the item selection on the first row - # ([1:Questions_p1] in the first case) - - bar_plot_IRC1 <- ggplot2::ggplot( - # Create subset of first 16 questions - ans_opt_datafrm[ans_opt_datafrm$id %in% c(levels(ans_opt_datafrm$id)[1:Questions_p1]), ], - # Create chart with Answer Option on x-axis and IRC on y-asix - ggplot2::aes("Answer Option", IRC, fill = Correct, colour = IRC_col_scale) - ) - bar_IRC1 <- - bar_plot_IRC1 + - # Create Bar chart - geom_bar(ggplot2::aes(x = Ans_Factor), stat = "identity") + - # Display the different items - facet_grid(. ~ id, scales = "free_x", space = "free_x") + - # Fill in the bars: Green right answer options, Red wrong answer options - scale_fill_manual(values = c("Incorrect" = "Red", "Correct" = "Green"), guide = FALSE) + - # Create colour boundray: Green = "right" (low for wrong answer options, high for right answer options) - scale_colour_gradient(low = "green", high = "red", guide = FALSE) + - # Change y-axis limit to constant - coord_cartesian(ylim = c(-.3, .4)) + - # Change titles and x axis name - labs( - x = "Answer Options", - title = paste( - "IRC chart(s) per question and per answer options. The green bars represent the right answer options.", - "\n", - "The color of the border represents the desirability (high for the right answer options, low for the ", - "wrong answer options)", - sep = "" - ) - ) + - theme( - strip.text.x = element_text(size = 7), - # Change font size of item names and Answer options - axis.text.x = element_text(size = 4.8), - axis.ticks.x = element_line(size = .1), - title = element_text(size = 8) - ) - - if (Questions_p2 != 0) { - bar_plot_IRC2 <- ggplot2::ggplot( - # Create subset of first 16 questions - ans_opt_datafrm[ans_opt_datafrm$id %in% c(levels(ans_opt_datafrm$id)[(Questions_p1 + 1) : Questions_p2]), ], - # Create chart with Answer Option on x-axis and IRC on y-asix - ggplot2::aes("Answer Option", IRC, fill = Correct, colour = IRC_col_scale) - ) - bar_IRC2 <- - bar_plot_IRC2 + - # Create Bar chart - geom_bar(ggplot2::aes(x = Ans_Factor), stat = "identity") + - # Display the different items - facet_grid(. ~ id, scales = "free_x", space = "free_x") + - # Fill in the bars: Green right answer options, Red wrong answer options - scale_fill_manual(values = c("Incorrect" = "Red", "Correct" = "Green"), guide = FALSE) + - # Create colour boundray: Green = "right" (low for wrong answer options, high for right answer options) - scale_colour_gradient(low = "green", high = "red", guide = FALSE) + - # Change y-axis limit to constant - coord_cartesian(ylim = c(-.3, .4)) + - # Change titles and x axis name - labs(x = "Answer Options", title = "Item") + - theme( - strip.text.x = element_text(size = 7), - # Change font size of item names and Answer options - axis.text.x = element_text(size = 4.8), - axis.ticks.x = element_line(size = .1) - ) - } - - if (Questions_p3 != 0) { - bar_plot_IRC3 <- ggplot2::ggplot( - # Create subset of first 16 questions - ans_opt_datafrm[ans_opt_datafrm$id %in% c(levels(ans_opt_datafrm$id)[(Questions_p2 + 1) : Questions_p3]), ], - # Create chart with Answer Option on x-axis and IRC on y-asix - ggplot2::aes("Answer Option", IRC, fill = Correct, colour = IRC_col_scale) - ) - bar_IRC3 <- - bar_plot_IRC3 + - # Create Bar chart - geom_bar(ggplot2::aes(x = Ans_Factor), stat = "identity") + - # Display the different items - facet_grid(. ~ id, scales = "free_x", space = "free_x") + - # Fill in the bars: Green right answer options, Red wrong answer options - scale_fill_manual(values = c("Incorrect" = "Red", "Correct" = "Green"), guide = FALSE) + - # Create colour boundray: Green = "right" (low for wrong answer options, high for right answer options) - scale_colour_gradient(low = "green", high = "red", guide = FALSE) + - # Change y-axis limit to constant - coord_cartesian(ylim = c(-.3, .4)) + - # Change titles and x axis name - labs(x = "Answer Options", title = "Item") + - theme( - strip.text.x = element_text(size = 7), - # Change font size of item names and Answer options - axis.text.x = element_text(size = 4.8), - axis.ticks.x = element_line(size = .1) - ) - } - - if (Questions_p4 != 0) { - bar_plot_IRC4 <- ggplot2::ggplot( - # Create subset of first 16 questions - ans_opt_datafrm[ans_opt_datafrm$id %in% c(levels(ans_opt_datafrm$id)[(Questions_p3 + 1) : Questions_p4]), ], - # Create chart with Answer Option on x-axis and IRC on y-asix - ggplot2::aes("Answer Option", IRC, fill = Correct, colour = IRC_col_scale) - ) - bar_IRC4 <- - bar_plot_IRC4 + - # Create Bar chart - geom_bar(ggplot2::aes(x = Ans_Factor), stat = "identity") + - # Display the different items - facet_grid(. ~ id, scales = "free_x", space = "free_x") + - # Fill in the bars: Green right answer options, Red wrong answer options - scale_fill_manual(values = c("Incorrect" = "Red", "Correct" = "Green"), guide = FALSE) + - # Create colour boundray: Green = "right" (low for wrong answer options, high for right answer options) - scale_colour_gradient(low = "green", high = "red", guide = FALSE) + - # Change y-axis limit to constant - coord_cartesian(ylim = c(-.3, .4)) + - # Change titles and x axis name - labs(x = "Answer Options", title = "Item") + - theme( - strip.text.x = element_text(size = 7), - axis.text.x = element_text(size = 4.8), # Change font size of item names and Answer options - axis.ticks.x = element_line(size = .1) - ) - } - } - - # Creating the bar plots. Depending on the amount of plots, different arranges are made. - suppressWarnings( - if (any(key != 0)) { - if (Questions_p2 == 0) { - # Only 1 Answer Option plot --> all plots on 1 page - - grid::grid.newpage() - grid::pushViewport(grid::viewport(layout = grid::grid.layout(4, 1))) - vplayout <- function(x, y) { - grid::viewport(layout.pos.row = x, layout.pos.col = y) - } - print(bar_freq, vp = vplayout(1, 1)) - print(bar_IRC, vp = vplayout(2, 1)) - print(bar_freq1, vp = vplayout(3, 1)) - print(bar_IRC1, vp = vplayout(4, 1)) - } - - if (Questions_p3 == 0 & Questions_p2 != 0) { - # 2 Answer Option plots --> 2 pages of plot output - - grid::grid.newpage() - grid::pushViewport(grid::viewport(layout = grid::grid.layout(3, 1))) - vplayout <- function(x, y) { - grid::viewport(layout.pos.row = x, layout.pos.col = y) - } - print(bar_freq, vp = vplayout(1, 1)) - print(bar_IRC, vp = vplayout(2, 1)) - print(bar_freq1, vp = vplayout(3, 1)) - - grid::grid.newpage() - grid::pushViewport(grid::viewport(layout = grid::grid.layout(3, 1))) - - print(bar_freq2, vp = vplayout(1, 1)) - print(bar_IRC1, vp = vplayout(2, 1)) - print(bar_IRC2, vp = vplayout(3, 1)) - } - - if (Questions_p4 == 0 & Questions_p3 != 0) { - # 3 Answer Option plots --> 2 pages of plot output - - grid::grid.newpage() - grid::pushViewport(grid::viewport(layout = grid::grid.layout(2, 1))) - vplayout <- function(x, y) { - grid::viewport(layout.pos.row = x, layout.pos.col = y) - } - print(bar_freq, vp = vplayout(1, 1)) - print(bar_IRC, vp = vplayout(2, 1)) - - grid::grid.newpage() - grid::pushViewport(grid::viewport(layout = grid::grid.layout(3, 1))) - - print(bar_freq1, vp = vplayout(1, 1)) - print(bar_freq2, vp = vplayout(2, 1)) - print(bar_freq3, vp = vplayout(3, 1)) - - grid::grid.newpage() - grid::pushViewport(grid::viewport(layout = grid::grid.layout(3, 1))) - - print(bar_IRC1, vp = vplayout(1, 1)) - print(bar_IRC2, vp = vplayout(2, 1)) - print(bar_IRC3, vp = vplayout(3, 1)) - } - - if (Questions_p4 != 0) { - # 4 Answer options plots --> 3 pages of plot output - - grid::grid.newpage() - grid::pushViewport(grid::viewport(layout = grid::grid.layout(2, 1))) - vplayout <- function(x, y) { - grid::viewport(layout.pos.row = x, layout.pos.col = y) - } - print(bar_freq, vp = vplayout(1, 1)) - print(bar_IRC, vp = vplayout(2, 1)) - - grid::grid.newpage() - grid::pushViewport(grid::viewport(layout = grid::grid.layout(4, 1))) - - print(bar_freq1, vp = vplayout(1, 1)) - print(bar_freq2, vp = vplayout(2, 1)) - print(bar_freq3, vp = vplayout(3, 1)) - print(bar_freq4, vp = vplayout(4, 1)) - - grid::grid.newpage() - grid::pushViewport(grid::viewport(layout = grid::grid.layout(4, 1))) - - print(bar_IRC1, vp = vplayout(1, 1)) - print(bar_IRC2, vp = vplayout(2, 1)) - print(bar_IRC3, vp = vplayout(3, 1)) - print(bar_IRC4, vp = vplayout(4, 1)) - } - } else { - # Plot if no answer options are present - - grid::grid.newpage() - grid::pushViewport(grid::viewport(layout = grid::grid.layout(2, 1))) - vplayout <- function(x, y) { - grid::viewport(layout.pos.row = x, layout.pos.col = y) - } - print(bar_freq, vp = vplayout(1, 1)) - print(bar_IRC, vp = vplayout(2, 1)) - } - ) - dev.off() + item_names, + student_scores, + categories) { + rmarkdown::render(template, output_format = "pdf_document", output_file = filename) } diff --git a/app/Lib/Rscripts/report.Rmd b/app/Lib/Rscripts/report.Rmd new file mode 100644 index 0000000..89c2ead --- /dev/null +++ b/app/Lib/Rscripts/report.Rmd @@ -0,0 +1,780 @@ +--- +html_document: null +fig_width: 11 +classoption: landscape +keep_md: yes +output: html_document +pdf_document: default +--- + +```{r, echo = FALSE, warning = FALSE, message = FALSE} +library(ggplot2) +library(pander) +library(plyr) +``` + + +```{r, echo = FALSE, eval = TRUE, results = "asis"} + +create_item_list <- function(number_answeroptions, + correct_frequency, + correct_percentage, + frequency_answer_options, + percentage_answer_options, + corrected_item_tot_cor, + corrected_item_tot_cor_answ_option, + item_names, + key) { + # Creates a list with frequency, percentage correct, and IRC for every item. + # + # Args: + # number_answeroptions: Vector with number of answer options per item. + # Length should be at least 3 and the same as + # correct_frequency, correct_percentage, item_names, + # and corrected_item_tot_cor. + # Length should also be the same as the number of + # colums of: frequency_answer_options, + # percentage_answer_options, corrected_item_tot_cor, + # corrected_item_tot_cor_answ_option, and key + 1. + # correct_frequency: Vector with total number correct per item. + # Length requirements: see number_answeroptions. + # correct_percentage: Vector with percentage correct per item. + # Length requirements: see number_answeroptions. + # frequency_answer_options: Matrix with [i,j] the frequency of answer option + # i of item j. Dimension requirements: see key + # + one extra column (for missing). + # percentage_answer_options: Matrix with [i,j] the percentage of + # answer option i of item j. + # Dimension requirements: see key + 1 extra + # column (for missing) + # corrected_item_tot_cor: Vector with item rest correlation per item. + # Length requirements: see number_answeroptions + # corrected_item_tot_cor_answ_option: Matrix with [i,j] the item total + # correlation for answer option i of + # item j. Dimension requirements: + # see key + one extra column. + # key: Matrix of 0's and 1's. key[i,j] indicates whether answer option i to + # item j is right (1) or wrong (0). If a column (item) consists of only + # 0s, the item is interpreted as graded manually. + # Number of columns should be equal to the length of: + # number_answeroptions, correct_frequency, correct_percentage, and + # corrected_item_tot_cor. + # The following objects have the same dimensions as key + 1 extra row: + # frequency_answer_options, percentage_answer_options, and + # corrected_item_tot_cor_answ_option + # + # Returns: + # List with one entry per item. Each entry consists of a data frame with + # frequency, percentage and IRC for the total item and the answer options + # (if the item has answer options) + + item_list <- list() # Creates list to put item output in + colnames1 <- c(" ", "Frequency", "Percentage", "IRC") + colnames2 <- c("Answer Option", "Frequency", "Percentage", "IRC", "Correct") + + # For every item, create entry in list with frequencies, percentages, and IRCs + for (i in 1:length(item_names)) { + if (number_answeroptions[i] > 0) { + correct <- c( + ifelse( + key[1:number_answeroptions[i], i] == 1, + "Correct", + "Incorrect" + ), + "Incorrect" + ) + + item_list[[i]] <- data.frame( + c(LETTERS[1:number_answeroptions[i]], "Missing"), + c(frequency_answer_options[c(2:(number_answeroptions[i] + 1), 1), i]), + c(percentage_answer_options[c(2:(number_answeroptions[i] + 1), 1), i]), + c(corrected_item_tot_cor_answ_option[c(2:(number_answeroptions[i] + 1), 1), i]), + correct, + row.names = NULL + ) + colnames(item_list[[i]]) <- colnames2 + + } else { + item_list[[i]] <- data.frame("Correct", + correct_frequency[i], + correct_percentage[i], + corrected_item_tot_cor[i] + ) + colnames(item_list[[i]]) <- colnames1 + } + } + + names(item_list) <- item_names + return(item_list) +} + +# Use the function defined above +item_list <- create_item_list( + number_answeroptions, + correct_frequency, + correct_percentage, + frequency_answer_options, + percentage_answer_options, + corrected_item_tot_cor, + corrected_item_tot_cor_answ_option, + item_names, + key +) + +cat(paste("#", title)) +``` + +# General + +What | Result | +:---------------------- | :------------- | +Number of students | `r length(student_scores)`| +Number of items | `r length(item_names)`| +Average score | `r round(mean(student_scores), digits = 3)`| +Standard deviation | `r round(sd(student_scores), digits = 3)`| +Cronbach's alpha | `r cronbach`| +Standard error | `r round(sd(student_scores * sqrt(1 - cronbach)), digits = 3)`| + + +# Item statistics + +For each item the frequency, percentage and item rest correlations (IRC) of +every answer option are shown The IRC should be (highly) positive for the +right answer option and low for the wrong answer option(s). + +```{r pander, results = "asis", eval = TRUE, echo = FALSE, warning = FALSE, message = FALSE} + +print_item_tables <- function(item_list, key) { + # Print table for each item with frequency, percentage and the + # item-rest correlation. Correct answers are highlighted in bold. + # + # Args: + # item_list: list with one entry per item. Each entry has a data frame + # Data frame has a row for every answer option + one for missing. + # Columns are: Answer option, frequency, percentage, IRC, + # and Correct (answer option is Correct/Incorrect) + # Number of rows of each data frame has a maximum as the number of + # columns of key + 1. + # key: Matrix of 0's and 1's. key[i,j] implies wether answer option i + # to item j is right (1) or wrong (0). If a column (item) consists of + # only 0s, the item is interpreted as graded manually. + # Number of columns should be equal to length of item_list + + pander::panderOptions("knitr.auto.asis", FALSE) + for (i in 1:length(item_list)) { + if (any(key[, i] == 1)) { + item_list[[i]]$Correct <- revalue( + item_list[[i]]$Correct, + c("Correct" = "X", "Incorrect" = "") + ) + pander::emphasize.strong.rows(which(key[, i] == 1, arr.ind = TRUE)) + pander::pander(item_list[i]) + } else { + t <- item_list[i] + pander::pander(t) + } + } +} + +print_item_tables(item_list, key) +``` + +\pagebreak + +```{r, echo = FALSE, message = FALSE, warning = FALSE, fig.width = 11, results = 'asis'} + +create_df <- function(item_names, correct_percentage, corrected_item_tot_cor) { + # Creates a data frame with the overall item statistics including a colour + # indicator for the percentage correct and IRC. Colours is defined below. + # + # Args: + # These are the same as defined in create_item_list. + # + # Returns: + # Data frame with a row for every item. Columns consist of the item name, + # percentage correct, colour indicator for percentage, IRC and colour + # indicator for the IRC. + + correct_df <- data.frame( + item = factor(item_names, item_names), + correct_percentage, + perc_col = ifelse( + correct_percentage < 40, 0, + ifelse(correct_percentage > 70, 1, 1 / 75 * correct_percentage - 7 / 30) + ), + corrected_item_tot_cor, + irc_col = ifelse( + corrected_item_tot_cor < 0, 0, + ifelse( + corrected_item_tot_cor > .05, + 1, + 8 * corrected_item_tot_cor + 3 / 10) + ) + ) + + return(correct_df) +} + +plot_overall_perc <- function(correct_df) { + # Plots bar graph of percentage correct of every item. Bars are coloured + # depending on the percentage correct: + # < 40: Red + # 40 - 70: Orange till green-ish + # > 70: Green + # + # Args: + # correct_df: data frame with item name, percentage correct and colour code + # for the percentages. + # + # Returns: + # Explanation of the plot and the plot itself + + cat(" + +## Percentage Correct + +Plot of percentage correct per item. If *more than 70%* of the students answer +an item correctly, the students have mastered the material well enough, thus it +is coloured *green*. If *less than 40%* of the students answer an item +correctly, the students have not mastered the material, and is coloured *red*. +*Orange* indicates the percentage correct lies *between 40% and 70%*. The +colours are based on multiple choice items with 3 or 4 answer options. + +") + + bar_freq <- ggplot2::ggplot( + correct_df, + ggplot2::aes(item, correct_percentage, fill = perc_col) + ) + bar_freq <- + bar_freq + + # Create bar chart + geom_bar(stat = "identity") + + # Creat colour boundary + scale_fill_gradient( + low = "red", + high = "green", + limits = c(0, 1), + guide = FALSE + ) + + # Change y-axis limit to constant + coord_cartesian(ylim = c(0, 100)) + + # Axes names + labs(x = "Item", y = "Percentage Correct") + + # Black en white + theme_bw() + + # Change various sizes + theme( + axis.text.x = element_text(size = 8, angle = 90), + axis.ticks.x = element_line(size = .4) + ) + + suppressWarnings(print(bar_freq)) +} + +plot_overall_irc <- function(correct_df) { + # Plots bar graph of IRC of every item. Bars are coloured by IRC: + # < 0: Red + # 0 - .10: Orange till green-ish + # > .10: green + # + # Args: + # correct_df: data frame with item name, percentage correct and colour code + # for the percentages. + # + # Returns: + # Explanation of the plot and the plot itself + + cat("\\pagebreak + +## Item rest correlations + +Plot of Item Rest Correlations per item: The correlation between an item (0 for +wrong answers and 1 for right answers) and the sum of all other items. Items +with a correlation *higher than .05* indicate that students who answered this +item correctly answered the other items more often correctly than students who +answered this item incorrectly. Thus it is coloured *green*. Items with a +correlation *lower than 0* indicate no relationship between this item and the +other items, or that students who answered this item correctly answered the +other items more often incorrectly than students who answered this incorrectly, +thus it is coloured *red*. *Between 0 and .05*, an item is coloured *orange*. + +") + + # Create bar chart + bar_irc <- ggplot2::ggplot( + correct_df, + ggplot2::aes(item, corrected_item_tot_cor, fill = irc_col) + ) + bar_irc <- + bar_irc + + geom_bar(stat = "identity") + + scale_fill_gradient( + low = "red", + high = "green", + limits = c(0, 1), + guide = FALSE + ) + + theme_bw() + + labs(x = "Item", y = "Item Rest Correlation") + + theme( + strip.text.x = element_text(size = 7), + axis.text.x = element_text(size = 8, angle = 90), + axis.ticks.x = element_line(size = .4) + ) + + suppressWarnings(print(bar_irc)) +} + +correct_df <- create_df(item_names, correct_percentage, corrected_item_tot_cor) +plot_overall_perc(correct_df) +plot_overall_irc(correct_df) +``` + +```{r, echo = FALSE, eval = TRUE, warning= FALSE, fig.width = 11} + +make_perc_col <- function(percentage, correct) { + # Calculate colours for the percentage answer options. + # + # Args: + # percentage: Percentage answered per answer options + # correct: Vector of "Correct" and "Incorrect" of same length of percentage + # + # Returns: + # Value between 0 and 1. 0 corresponds to red and 1 to green in the plots. + # For the correct options: + # * < 40: red + # * 40 - 70: orange - green-ish + # * > 70: green + # For the incorrect options: difference between correct answer option and + # incorrect answer options are calculated. Colours are based on difference: + # * < -10: green + # * -10 - 0: orange - green-ish + # * > 0: green + + perc_col <- vector("numeric", length = length(correct)) + perc_col[correct == "Correct"] <- ifelse( + percentage[correct == "Correct"] < 40, + 0, + ifelse( + percentage[correct == "Correct"] > 70, + 1, + 1 / 75 * percentage[correct == "Correct"] - 7 / 30 + ) + ) + dif <- percentage[correct == "Incorrect"] - min(percentage[correct == "Correct"]) + + perc_col[correct == "Incorrect"] <- ifelse( + dif >= 0, + 0, + ifelse(dif < -10, 1, -1 / 25 * dif + 3 / 10) + ) + perc_col +} + +make_irc_col <- function(irc, correct) { + # Calculate colours for the percentage answer options. + # + # Args: + # IRC: Vector of IRC per answer options. Same length as correct + # correct: Vector of "Correct" and "Incorrect" of same length of IRC + # + # Returns: + # Value between 0 and 1. 0 corresponds to red and 1 to green in the plots. + # For the correct answer options: + # * < 0: red + # * 0 - 0.05: orange - green-ish + # * > 0.05: green + # For the incorrect answer options: difference between correct answer option + # and incorrect answer option are calculated. Colours are based on this + # difference: + # * > 0: red + # * -0.05 - 0: orange - green-ish + # * < -0.05: red + + irc_col <- numeric() + irc_col[correct == "Correct"] <- ifelse( + irc[correct == "Correct"] < 0, + 0, + ifelse(irc[correct == "Correct"] > .05, + 1, + 8 * irc[correct == "Correct"] + 3 / 10 + ) + ) + dif <- irc[correct == "Incorrect"] - min(irc[correct == "Correct"]) + irc_col[correct == "Incorrect"] <- ifelse( + dif >= 0, + 0, + ifelse(dif < -.05, 1, -8 * dif + 3 / 10) + ) + return(irc_col) +} + +make_answ_df <- function(item_list, key, number_answeroptions, item_names) { + # Creates a data frame with the statistics per answer option including a + # colour indicator for the percentage correct and IRC. + # + # Args: + # These are the same as defined earlier + # + # Returns: + # Data frame with a row for every answer option. Columns consist of the + # item name, answer options, frequency, percentage, IRC, Correct (whether + # an option is correct/incorrect), colour for percentage and colour for IRC + + for (i in 1:length(item_list)) { + if (any(key[, i] != 0)) { + item_list[[i]]$perc_col <- with(item_list[[i]], make_perc_col(Percentage, Correct)) + item_list[[i]]$irc_col <- with(item_list[[i]], make_irc_col(IRC, Correct)) + } + } + ans_opt_df <- plyr::ldply(item_list[number_answeroptions != 0], data.frame) + names(ans_opt_df)[1] <- "id" + ans_opt_df$Answer.Option <- gsub("Missing", "Mi", ans_opt_df$Answer.Option) + ans_opt_df$Answer.Option <- factor( + ans_opt_df$Answer.Option, + levels = c(LETTERS[1:max(number_answeroptions)], "Mi") + ) + ans_opt_df$id <- factor( + ans_opt_df$id, + levels = item_names[number_answeroptions != 0] + ) + if (all(key == 0)) { + ans_opt_df <- NULL + } + + ans_opt_df +} + +calc_n_plot <- function(ans_opt_df, key) { + # Calculates which item is displayed in which plot. The total number of + # answer options is too large (usually) to display them all in one plot. + # + # Args: + # ans_opt_df: As defined in function above + # + # Returns: + # n_plots: the number of plots + # items_in_plot: list entry per plot. Every entry consists of item names + # in that plot + + if (any(key != 0)) { + max_answ_opts <- 80 # Maximum number of bars per plot + tot_answ_opts <- nrow(ans_opt_df) + n_plots <- ceiling(tot_answ_opts / max_answ_opts) + answ_opts_per_plot <- floor(tot_answ_opts / n_plots) + + items_in_plot <- list(unique(ans_opt_df[1:answ_opts_per_plot, 1])) + if (n_plots > 1) { + for (i in 2:n_plots) { + items_in_plot[[i]] <- unique( + ans_opt_df[( ( (i - 1) * answ_opts_per_plot) + 1) : (i * answ_opts_per_plot), 1] + ) + if (items_in_plot[[i]][1] %in% items_in_plot[[i - 1]]) { + items_in_plot[[i]] <- items_in_plot[[i]][-1] + } + } + } + assign("items_in_plot", items_in_plot, envir = globalenv()) + } +} + +ans_opt_df <- make_answ_df(item_list, key, number_answeroptions, item_names) +calc_n_plot(ans_opt_df, key) + +``` + +```{r, echo = FALSE, eval = TRUE, message = FALSE, warning = FALSE, fig.width = 11, results= 'asis'} + +plot_perc_answ <- function(ans_opt_df, items_in_plot, key) { + # Plots bar graph(s) of the percentages of every answer option + # + # Args: + # All defined as before + # + # Returns: + # Description of how to read the plots + # One or multiple plots + + if (any(key != 0)) { + + cat("\\pagebreak + +## Percentage chosen per answer options + +Percentage plot of all answer options per item. **White** bars represent the +**right** answer option(s) and **black** the **wrong** options. + +The border represents the performance of an item. The **right** answer options +are coloured *green* if *more than 70%* of the students choose this option. In +that case, the students have mastered the material. They are coloured *red* if +*less than 40%* choose this option. *Between 40% and 70%*, it is coloured +*orange*. + +The **wrong** answer options are coloured *green* if an option is chosen *10 +percentage point less than the right option*. In that case, more students chose +the right answer option than the wrong option indicating that students are not +confused by the wrong option. They are coloured *red* if an option is chosen +*equally or more often than the right option*. In this case, students pick the +wrong answer option often, indicating that this option may be right as well. It +is coloured *orange* in between these two. + +") + + for (i in 1:length(items_in_plot)) { + bar_freq1 <- ggplot2::ggplot( + ans_opt_df[ans_opt_df$id %in% items_in_plot[[i]], ], + ggplot2::aes( + "Answer Option", + Percentage, + fill = Correct, + colour = perc_col) + ) + bar_freq1 <- + bar_freq1 + + geom_bar(aes(x = Answer.Option), stat = "identity", size = 1.5) + + # Plot items + facet_grid(. ~ id, scales = "free_x", space = "free_x") + + # Fill in the bars + scale_fill_manual( + values = c("Incorrect" = "Black", "Correct" = "White"), + guide = FALSE + ) + + # Colour the bandaries + scale_colour_gradient( + low = "red", + high = "green", + limits = c(0, 1), + guide = FALSE + ) + + # Change y-axis limits to constants + coord_cartesian(ylim = c(0, 100)) + + # Names x-axis + labs(x = "Answer Options") + + theme_bw() + + # Change various font sizes + theme( + axis.text.x = element_text(size = 8), + axis.text.y = element_text(size = 12), + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15) + ) + + suppressWarnings(print(bar_freq1)) + } + } +} + +plot_irc_answ <- function(ans_opt_df, items_in_plot, key) { + # Plots bar graph(s) of the IRC of every answer option + # + # Args: + # All defined as before + # + # Returns: + # Description of how to read the plots + # One or multiple plots + + if (any(key != 0)) { + + cat('\\pagebreak + +## Item rest correlations per answer option + +Item rest correlation (IRC) plot of all answer options per item. The IRC is the +correlation between an answer option (1 for students who chose this options and +0 for student who did not) and the sumscore on all other items. **White** bars +represent the **right** answer option(s) and **black** the **wrong** options. + +The border represents the performance of an item. The **right** answer options +are coloured *green* if the IRC is *higher than .05*. In that case, students who +picked the right answer option scored higher on all other items than student who +did not pick the right option. They are coloured *red* if the IRC is *less than +0*. In that case, students who picked the right answer option scored equally or +worse on the other items than students who picked the other options. Between *0 +and .05*, the option is coloured *orange*. + +The **wrong** answer options are coloured *green* if the IRC is *more than .1 +lower than the right option*. In that case, students who answered the other +items incorrectly, usually answered this item incorrectly as well. They are +coloured *red* if the IRC is *equal or higher than the right answer option*. +In that case, students who answered the other items correctly may have answered +this item incorrectly. In between these two, the option is coloured *orange*. + +') + + # Creating IRC plots + for (i in 1:length(items_in_plot)) { + # Select items and variables to plot + bar_irc1 <- ggplot2::ggplot( + ans_opt_df[ans_opt_df$id %in% items_in_plot[[i]], ], + ggplot2::aes("Answer Option", IRC, fill = Correct, colour = irc_col) + ) + bar_irc1 <- + bar_irc1 + + geom_bar(aes(x = Answer.Option), stat = "identity", size = 1.5) + + facet_grid(. ~ id, scales = "free_x", space = "free_x") + + scale_fill_manual(values = c("Incorrect" = "Black", "Correct" = "White"), guide = FALSE) + + scale_colour_gradient( + low = "red", + high = "green", + limits = c(0, 1), + guide = FALSE + ) + + # Change y-axis limit to either the maximum and minimum IRC + coord_cartesian(ylim = c(min(ans_opt_df$IRC) - .01, max(ans_opt_df$IRC) + .01)) + + labs(x = "Answer Options") + + theme_bw() + + theme( + axis.text.x = element_text(size = 8), + axis.text.y = element_text(size = 12), + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15) + ) + suppressWarnings(print(bar_irc1)) + } + } +} + +plot_irc_answ(ans_opt_df, items_in_plot, key) +plot_perc_answ(ans_opt_df, items_in_plot, key) +``` + +\pagebreak + +## Cumulative Distribution + +Cumulative Distribution showing all possible scores and the percentage of +students that scored that number or higher + +```{r, echo = FALSE, eval = TRUE, message = FALSE, warning = FALSE, fig.width = 11} +cum_hist <- function(student_scores, n_item) { + # Create a reversed cumulative histogram, i.e. the height of score x is + # the percentage of students who scores x or higher. + # + # Args: + # student_scores: numeric vector of test score per student. + # n_item: number of items + # + # Returns: + # Reversed cumulative histogram plot + + h <- heights <- length(student_scores) + n <- sort(unique(student_scores)) + + for (i in 1:length(n)) { + h <- h - length(student_scores[student_scores == n[i]]) + heights <- c(heights, h) + } + + cesuurlijnen <- seq(0.2, 0.8, 0.2) * length(student_scores) + gridlijnen <- round(seq(0, length(student_scores), length(student_scores) / 25), 0) + + gridlijnen <- gridlijnen[grep(paste(round(cesuurlijnen, 0), collapse = "|"), gridlijnen)] + + heights <- heights[1:length(unique(student_scores))] + percent <- heights / length(student_scores) * 100 + df <- data.frame(score = sort(unique(student_scores)), total = heights, percent = round(percent, 2)) + + vec <- df[1, 1]:df[nrow(df), 1] # save all possible total scores + + # Find if a total score is missing and add it + mis <- NA + if (sum(! (vec %in% df[, 1]) != 0)) { + mis <- vec[! (vec %in% df[, 1])] + } + if (! is.na(mis[1])) { + for (i in 1:length(mis)) { + df[ (nrow(df) + 1), ] <- c(mis[i], 0, 0) + } + df <- df[order(df$score), ] + } + + + # if no one gets a certain total score, + # replace the 0 with the next total score + suppressWarnings( + df[which(df[, 2] == 0, arr.ind = TRUE), 2] <- df[(which(df[, 2] == 0, arr.ind = TRUE) + 1), 2] + ) + suppressWarnings( + df[which(df[, 3] == 0, arr.ind = TRUE), 3] <- df[(which(df[, 3] == 0, arr.ind = TRUE) + 1), 3] + ) + + g <- + ggplot2::ggplot(data = df, ggplot2::aes(x = score, y = total)) + + geom_hline( + yintercept = c(gridlijnen), + linetype = "solid", + colour = "gray80" + ) + + geom_hline( + yintercept = cesuurlijnen, + linetype = "dashed", + size = 0.75, + colour = "black" + ) + + geom_bar(stat = "identity") + + xlab("Total score") + + ylab("% of students with score or higher") + + scale_y_continuous(breaks = c(0, cesuurlijnen, length(student_scores)), labels = c(seq(0, 100, 20))) + + scale_x_continuous(limits = c(df[1, 1] - 1, n_item + 1), breaks = 0:n_item, expand = c(0, 0)) + + theme_bw() + + theme(panel.grid = element_blank()) + + print(g) +} + +cum_hist(student_scores, n_item = ncol(key)) +``` + +```{r, echo = FALSE, eval = TRUE, warnings = FALSE, message = FALSE, results = 'asis', fig.width = 11} +## Do the whole script above for every sub category +if (!length(categories) == 0) { + cat("\n\n") + cat("\\pagebreak") + cat("\n\n") + cat("# Analysis for subcategories") + cat("\n\n") + + for (i in 1:length(categories)) { + items <- categories[[i]]$items + if (i != 1) { + cat("\\pagebreak") + cat("\n\n") + } + cat("\n\n") + cat(paste("## Category:", categories[[i]]$name )) + cat("\n\n") + + item_list <- create_item_list( + number_answeroptions[items], + correct_frequency[items], + correct_percentage[items], + frequency_answer_options[, items], + percentage_answer_options[, items], + categories[[i]]$corrected_item_tot_cor, + categories[[i]]$corrected_item_tot_cor_answ_option, + item_names[items], + key[, items] + ) + + correct_df <- create_df(item_names[items], correct_percentage[items], categories[[i]]$corrected_item_tot_cor) + + plot_overall_perc(correct_df) + plot_overall_irc(correct_df) + + if (any(key[, items] != 0)) { + ans_opt_df <- make_answ_df(item_list, key[, items], number_answeroptions[items], item_names[items]) + calc_n_plot(ans_opt_df, key[, items]) + plot_perc_answ(ans_opt_df, items_in_plot, key[, items]) + plot_irc_answ(ans_opt_df, items_in_plot, key[, items]) + } + + cat("\\pagebreak") + cat("\n\n") + cat("## Cumulative Distribution") + cat("\n\n") + cum_hist(categories[[i]]$student_scores, n_item = length(items)) + } +} +``` diff --git a/app/Model/Exam.php b/app/Model/Exam.php index 5596132..7a09672 100644 --- a/app/Model/Exam.php +++ b/app/Model/Exam.php @@ -589,12 +589,6 @@ protected function _report($exam) { } } - // Create the input_correct matrix (with given dimensions) by filling it with a vector (by row) - $script[] = sprintf( - 'input_correct = matrix( c( %s ), number_students, number_questions, byrow = TRUE );', - implode(',', $inputCorrectMatrix) - ); - $keyMatrix = array(); foreach ($exam['Item'] as $i => $item) { @@ -613,13 +607,18 @@ protected function _report($exam) { implode(',', $keyMatrix) ); + $script[] = 'student_scores = c( );'; + + $script[] = 'categories = c( );'; + + $template = APP . 'Lib' . DS . 'Rscripts' . DS . 'report.Rmd'; $script[] = sprintf( 'report( ' . - '"%s", number_students, number_answeroptions, number_questions, Cronbach, frequency_answer_options, ' . - 'percentage_answer_options, input_correct, key, correct_frequency, correct_percentage, ' . - 'corrected_item_tot_cor, corrected_item_tot_cor_answ_option, "%s", item_names' . + '"%s", "%s", number_answeroptions, Cronbach, frequency_answer_options, percentage_answer_options, key, ' . + 'correct_frequency, correct_percentage, corrected_item_tot_cor, corrected_item_tot_cor_answ_option, "%s", ' . + 'item_names, student_scores, categories' . ' );', - $tempFile, $exam['Exam']['name'] + $template, $tempFile, $exam['Exam']['name'] ); $script = implode("\n", $script); diff --git a/app/Test/Case/Lib/RserveTest.php b/app/Test/Case/Lib/RserveTest.php index 0197488..2606173 100644 --- a/app/Test/Case/Lib/RserveTest.php +++ b/app/Test/Case/Lib/RserveTest.php @@ -200,19 +200,20 @@ public function testExecuteRscriptsAnalyseTooLittleStudents() { */ public function testExecuteRscriptsReport() { $filename = $this->__tmpPath . DS . 'testExecuteRscriptsReport.pdf'; + $template = APP . 'Lib' . DS . 'Rscripts' . DS . 'report.Rmd'; $this->assertFileNotExists($filename); $script = array(); $script[] = file_get_contents(APP . 'Lib' . DS . 'Rscripts' . DS . 'report.R'); - $script[] = sprintf('file_name = "%s";', $filename); + $script[] = sprintf('filename = "%s";', $filename); + $script[] = sprintf('template = "%s";', $template); $script[] = 'number_students = 2;'; $script[] = 'number_answeroptions = c( 3, 3, 3 );'; $script[] = 'number_questions = 3;'; $script[] = 'cronbach = 0.5;'; $script[] = 'frequency_answer_options = matrix( c( 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0 ), nrow = 4, ncol = number_questions, byrow = FALSE );'; $script[] = 'percentage_answer_options = matrix( c( 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0 ), nrow = 4, ncol = number_questions, byrow = FALSE );'; - $script[] = 'input_correct = matrix( c( 0, 0, 0, 0, 0, 0 ), nrow = number_students, ncol = number_questions, byrow = TRUE );'; $script[] = 'key = matrix( c( 1, 0, 0, 1, 0, 0, 1, 0, 0 ), nrow = 3, ncol = number_questions, byrow = FALSE );'; $script[] = 'correct_frequency = c( 0, 0, 0 );'; $script[] = 'correct_percentage = c( 0, 0, 0 );'; @@ -220,12 +221,10 @@ public function testExecuteRscriptsReport() { $script[] = 'corrected_item_tot_cor_answ_option = matrix( c( 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0 ), nrow = 4, ncol = number_questions, byrow = FALSE );'; $script[] = 'title = "Test Title";'; $script[] = 'item_names = c( "Item 1", "Item 2", "Item 3" );'; - $script[] = 'report( file_name, number_students, number_answeroptions, number_questions, cronbach, - frequency_answer_options, percentage_answer_options, - input_correct, key, correct_frequency, - correct_percentage, corrected_item_tot_cor, - corrected_item_tot_cor_answ_option, title, - item_names);'; + $script[] = 'student_scores = c( 7, 9 );'; + $script[] = 'categories = c( );'; + $script[] = 'report(template, filename, number_answeroptions, cronbach, frequency_answer_options, percentage_answer_options, key, correct_frequency, + correct_percentage, corrected_item_tot_cor, corrected_item_tot_cor_answ_option, title, item_names, student_scores, categories );'; $script = implode("\n", $script); $result = $this->Rserve->execute($script); diff --git a/bin/travis-linter.sh b/bin/travis-linter.sh index bcb7082..749821f 100755 --- a/bin/travis-linter.sh +++ b/bin/travis-linter.sh @@ -3,7 +3,7 @@ set -e exitstatus=0 -for file in $(pwd)/app/Lib/Rscripts/*.R +for file in $(pwd)/app/Lib/Rscripts/*.R* do Rscript -e "lintr::lint(\"$file\")" outputbytes=`Rscript -e "lintr::lint(\"$file\")" | grep ^ | wc -c`