From 46d342c3a33305805c93e1f8599241219649fcfd Mon Sep 17 00:00:00 2001 From: Mark van Driel Date: Wed, 3 Aug 2016 14:46:52 +0200 Subject: [PATCH 01/21] Added new R scripts --- app/Lib/Rscripts/report.R | 740 +----------------------------------- app/Lib/Rscripts/report.Rmd | 649 +++++++++++++++++++++++++++++++ 2 files changed, 664 insertions(+), 725 deletions(-) create mode 100644 app/Lib/Rscripts/report.Rmd diff --git a/app/Lib/Rscripts/report.R b/app/Lib/Rscripts/report.R index 617e326..0c10111 100644 --- a/app/Lib/Rscripts/report.R +++ b/app/Lib/Rscripts/report.R @@ -1,727 +1,17 @@ -# Packages -library(gplots) -library(ggplot2) -library(plyr) -library(grid) -# nolint start -library(gridExtra) -# nolint end - -report <- function(filename, - number_students, - number_answeroptions, - number_questions, - cronbach, - frequency_answer_options, - percentage_answer_options, - input_correct, +report <- function(file.name, + title, + number.answeroptions, key, - correct_frequency, - correct_percentage, - corrected_item_tot_cor, - corrected_item_tot_cor_answ_option, - title, item_names) { - # Creating results for each item - item_list <- list() # Creates list to put item output in - colnames1 <- c("Answer Option", "Frequency", "Percentage", "IRC") - colnames2 <- c("Answer Option", "Frequency", "Percentage", "IRC", "Correct") - - # Filling a list with frequency, percentage and IRC for total and each answer options. - # every item gets a seperate data frame - # If no answeroptions are present, only the correct statistics are displayed. - # If there are more than 14 answer options, only the correct statistics are displayed to conserve space. - # This list is used for the first part of the output. This list is modified to a data frame to make the plots with - # the answer options. - for (i in 1:number_questions) { - if (number_answeroptions[i] > 0 & number_answeroptions[i] < 15) { - Correct <- rep("Incorrect", number_answeroptions[i] + 1) - Correct[which(key[, i] == 1)] <- "Correct" - - # Frequency is also stored at this point, but not used. - # in case someone wants to alter the script to display the frequency instead of the percentage - item_list[[i]] <- data.frame( - c(LETTERS[1:number_answeroptions[i]], "Missing"), - c(frequency_answer_options[c(2:(number_answeroptions[i] + 1), 1), i]), - c(percentage_answer_options[c(2:(number_answeroptions[i] + 1), 1), i]), - c(corrected_item_tot_cor_answ_option[c(2:(number_answeroptions[i] + 1), 1), i]), - Correct, - row.names = NULL - ) - colnames(item_list[[i]]) <- colnames2 - } else { - item_list[[i]] <- data.frame("Total", correct_frequency[i], correct_percentage[i], corrected_item_tot_cor[i]) - # Frequency is also stored at this point, but not used, - # in case someone wants to alter the script to display the frequency instead of the percentage - colnames(item_list[[i]]) <- colnames1 - } - } - - #Creating item names and putting them in the list as names - items <- numeric() - for (i in 1:number_questions) { - items <- c(items, paste("Item", as.character(item_names[i]), sep = " ")) - } - - names(item_list) <- items - - # Create extra variables to make the bar plots - item_list1 <- item_list - for (i in 1:number_questions) { - #Create right order on the x-axis (Missingness last) - if (any(key[, i] != 0) & number_answeroptions[i] < 15) { - item_list1[[i]]$Ans_Factor <- factor( - item_list1[[i]]$"Answer Option", - levels = c(LETTERS[1 : max(number_answeroptions)], "Missing") - ) - # nolint start - item_list1[[i]]$"Col_scale" <- as.numeric(item_list1[[i]]$Correct) * 2 - 3 - item_list1[[i]]$"IRC_col_scale" <- with(item_list1[[i]], IRC * Col_scale) - item_list1[[i]]$"Perc_col_scale" <- with(item_list1[[i]], Percentage) - # nolint end - } - } - - #Create data frame of all the items which have answer options. This is used to make barplots per answer option - if (any(key != 0)) { - ans_opt_datafrm <- plyr::ldply(item_list1[number_answeroptions != 0 & number_answeroptions < 15], data.frame) - names(ans_opt_datafrm)[1] <- "id" - ans_opt_datafrm[, 2] <- gsub("Missing", "Mis", ans_opt_datafrm[, 2]) - ans_opt_datafrm$Ans_Factor <- gsub("Missing", "Mis", ans_opt_datafrm$Ans_Factor) - ans_opt_datafrm$Ans_Factor <- factor( - ans_opt_datafrm$Ans_Factor, - levels = c(LETTERS[1 : max(number_answeroptions)], "Mis") - ) - ans_opt_datafrm$id <- factor(ans_opt_datafrm$id, levels = items[number_answeroptions != 0]) - ans_opt_datafrm$Perc_col_scale[ans_opt_datafrm$Correct == "Correct"] <- - 100 - ans_opt_datafrm$Perc_col_scale[ans_opt_datafrm$Correct == "Correct"] - - id2 <- as.numeric(ans_opt_datafrm$id) - ans_opt_datafrm <- cbind(ans_opt_datafrm, id2) - } - - # Create a dataframe with only the correct statistics in it. This is used in the general item plots - dataframe_correct <- data.frame( - factor(1:number_questions), - correct_frequency, - correct_percentage, - corrected_item_tot_cor - ) - names(dataframe_correct)[1] <- "item" - - # Starting explanation - start_text <- paste( - "Number of students : ", number_students, "\n", - "Number of questions : ", number_questions, "\n", - "Average score : ", round(mean(rowSums(input_correct)), digits = 3), "\n", - "Standard deviation : ", round(sd(rowSums(input_correct)), digits = 3), "\n", - "Cronbach's alpha : ", cronbach, "\n", - "Standard error : ", round(sd(rowSums(input_correct) * sqrt(1 - cronbach)), digits = 3), "\n", - sep = "" - ) - - explanation_items <- paste( - "Explanation Table", "\n", "\n", - "For each question the frequency, percentage and item rest correlations (IRC)", "\n", - "from every answer option are diplayed. The IRC should be (highly) positive", "\n", - "for the right answer option and low for the wrong answer option(s).", "\n", - sep = "" - ) - - pdf(file = filename, h = 8, w = 10) - - # Textplot plots outside the normal plot window, therefore xpd = NA - par(xpd = NA, mar = rep(2, 4)) - - # If maximum 7 answer options 8 items per page - if (max(number_answeroptions) < 8) { - # Creating the first page - layout(matrix(c(1, 1, 1, 1, 1, 1, - 2, 2, 3, 3, 3, 3, - 2, 2, 3, 3, 3, 3, - 2, 2, 3, 3, 3, 3, - 4, 4, 4, 5, 5, 5, - 4, 4, 4, 5, 5, 5, - 4, 4, 4, 5, 5, 5, - 6, 6, 6, 7, 7, 7, - 6, 6, 6, 7, 7, 7, - 6, 6, 6, 7, 7, 7, - 8, 8, 8, 9, 9, 9, - 8, 8, 8, 9, 9, 9, - 8, 8, 8, 9, 9, 9), ncol = 6, byrow = TRUE)) - # matrix(c(1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6), 3, 6, byrow = TRUE)) - - # Title - gplots::textplot(title, valign = "top", cex = 2) - - # Add overall test info and explanation - gplots::textplot(start_text, halign = "left", valign = "top", cex = 1, mar = c(1, 5, 5, 1)) - gplots::textplot(explanation_items, halign = "left", valign = "top", mar = c(1, 1, 5, 5)) - - # Display the first 6 items - if (number_questions < 7) { - for (i in 1:number_questions) { - gplots::textplot(item_list[[i]][, 1:4], show.rownames = FALSE, cex = 1, valign = "top", mar = c(5, 1, 0, 1)) - title(items[i], line = 2) - # Adding highlighting box for the right answeroption - if (any(key[, i] == 1)) { - for (j in 1 : sum(key[, i] == 1)) { - rect( - .19, - .92 - which(key[, i] == 1)[j] * .11, - .85, - 1.02 - which(key[, i] == 1)[j] * .11, - col = rgb(0, .9, 0, .5), - density = NA - ) - } - } - } - } else { - for (i in 1:6) { - gplots::textplot(item_list[[i]][, 1:4], show.rownames = FALSE, cex = 1, valign = "top", mar = c(5, 1, 0, 1)) - title(items[i], line = 2) - # Adding highlighting box for the right answeroption - if (any(key[, i] == 1)) { - for (j in 1 : sum(key[, i] == 1)) { - rect( - .19, - .92 - which(key[, i] == 1)[j] * .11, - .85, - 1.02 - which(key[, i] == 1)[j] * .11, - col = rgb(0, .9, 0, .5), - density = NA - ) - } - } - } - - # Creating the item output on second page forward, only if more than 6 questions - layout(matrix(c(1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8), 4, 6, byrow = TRUE)) - for (i in 7:number_questions) { - gplots::textplot(item_list[[i]][, 1:4], show.rownames = FALSE, cex = 1, valign = "top", mar = c(2, 1, 3, 1)) - title(items[i], line = -0.85) - - if (any(key[, i] == 1)) { - for (j in 1 : sum(key[, i] == 1)) { - rect( - .19, - .92 - which(key[, i] == 1)[j] * .096, - .85, - 1.01 - which(key[, i] == 1)[j] * .096, - col = rgb(0, .9, 0, .5), - density = NA - ) - } - } - } - } - } else { - # Creates PDF for more than 7 answeroptions, only 4 items per page are displayed - layout(matrix(c(1, 1, 1, 1, 1, 1, - 2, 2, 3, 3, 3, 3, - 2, 2, 3, 3, 3, 3, - 2, 2, 3, 3, 3, 3, - 4, 4, 4, 5, 5, 5, - 4, 4, 4, 5, 5, 5, - 4, 4, 4, 5, 5, 5, - 4, 4, 4, 5, 5, 5, - 6, 6, 6, 7, 7, 7, - 6, 6, 6, 7, 7, 7, - 6, 6, 6, 7, 7, 7, - 6, 6, 6, 7, 7, 7), ncol = 6, byrow = TRUE)) - - # Title - gplots::textplot(title, valign = "top", cex = 2) - - # Introduction text - gplots::textplot(start_text, halign = "left", valign = "top", cex = 1, mar = c(1, 5, 5, 1)) - gplots::textplot(explanation_items, halign = "left", valign = "top", mar = c(1, 1, 5, 5)) - - # Creating item output - for (i in 1:4) { - gplots::textplot(item_list[[i]][, 1:4], show.rownames = FALSE, cex = 1, valign = "top", mar = c(5, 1, 0, 1)) - title(items[i], line = 1.8) - - # Adding highlighting box for the right answeroption - if (any(key[, i] == 1) & number_answeroptions[i] < 15) { - for (j in 1 : sum(key[, i] == 1)) { - rect( - .19, - .95 - .065 * which(key[, i] == 1)[j], - .85, - 1.01 - .065 * which(key[, i] == 1)[j], - col = rgb(0, .9, 0, .5), - density = NA - ) - } - } - } - - # Second page and further - layout(matrix(c(1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6), 3, 6, byrow = TRUE)) - - # Displaying the items - for (i in 4:number_questions) { - gplots::textplot(item_list[[i]][, 1:4], show.rownames = FALSE, cex = 1, valign = "top", mar = c(2, 1, 3, 1)) - title(items[i], line = -.9) - - # Adding highlighting box for the right answeroption - if (any(key[, i] == 1) & number_answeroptions[i] < 15) { - for (j in 1 : sum(key[, i] == 1)) { - rect( - .19, - .95 - .065 * which(key[, i] == 1)[j], - .85, - 1.01 - .065 * which(key[, i] == 1)[j], - col = rgb(0, .9, 0, .5), - density = NA - ) - } - } - } - } - - ### Frequency Plot for total items - - # Create chart with Answer Option on x-axis and IRC on y-asix - bar_plot_freq <- ggplot2::ggplot( - dataframe_correct, - ggplot2::aes(item, correct_percentage, fill = correct_percentage) - ) - - bar_freq <- - bar_plot_freq + - # Create Bar chart - geom_bar(stat = "identity") + - # Create colour boundray: Green = "right" (low for wrong answer options, high for right answer options) - scale_fill_gradient2(low = "red", mid = "green", high = "red", midpoint = 50, limits = c(0, 100), name = "") + - # Change y-axis limit to constant - coord_cartesian(ylim = c(0, 100)) + - # Change titles and x axis name - labs(x = "Item", title = "Correct Percentage", y = "Percentage") + - theme( - strip.text.x = element_text(size = 7), - # Change font size of item names and Answer options - axis.text.x = element_text(size = 8), - axis.ticks.x = element_line(size = .4) - ) - - ### IRC Bar Plot for total items - - # Create chart with Answer_Option on x-axis and IRC on y-asix - bar_plot_IRC <- ggplot2::ggplot( - dataframe_correct, - ggplot2::aes(item, corrected_item_tot_cor, fill = corrected_item_tot_cor) - ) - bar_IRC <- - bar_plot_IRC + - # Create Bar chart - geom_bar(stat = "identity") + - # Create colour boundray: Green = "right" (low for wrong answer options, high for right answer options) - scale_fill_gradient(low = "red", high = "green", limits = c(-.1, .4), name = "") + - # coord_cartesian(ylim = c(, 1)) + # Change y-axis limit to constant - # Change titles and x axis name - labs(x = "Item", title = "Item Rest Correlations", y = "Item Rest Correlation") + - theme( - strip.text.x = element_text(size = 7), - # Change font size of item names and Answer options - axis.text.x = element_text(size = 8), - axis.ticks.x = element_line(size = .4) - ) - - # Calculating which questions are displayed on which plot. - # Determination rule: max 80 answer options per plot - if (any(key != 0)) { - tot_answer_options <- 0 - Questions_p1 <- 0 - Questions_p2 <- numeric() - Questions_p3 <- numeric() - Questions_p4 <- numeric() - questions_with_ans_opts <- nlevels(ans_opt_datafrm$id) - - # Calculating which questions are on the first plot. Total answeroptions should be less than 100 - while (tot_answer_options < 100 & Questions_p1 < questions_with_ans_opts) { - Questions_p1 <- Questions_p1 + 1 - tot_answer_options <- tot_answer_options + sum(ans_opt_datafrm$id2 == Questions_p1) - } - - # Calculating which questions are on the second plot. Total answeroptions should be less than 100 - Questions_p2 <- Questions_p1 - while (tot_answer_options < 200 & Questions_p2 < questions_with_ans_opts) { - Questions_p2 <- Questions_p2 + 1 - tot_answer_options <- tot_answer_options + sum(ans_opt_datafrm$id2 == Questions_p2) - } - - # Calculating which questions are on the third plot. Total answeroptions should be less than 100 - Questions_p3 <- Questions_p2 - while (tot_answer_options < 300 & Questions_p3 < questions_with_ans_opts) { - Questions_p3 <- Questions_p3 + 1 - tot_answer_options <- tot_answer_options + sum(ans_opt_datafrm$id2 == Questions_p3) - } - - # Emptying the plots if no questions are present for that plot - Questions_p4 <- ifelse(Questions_p3 < questions_with_ans_opts, questions_with_ans_opts, 0) - Questions_p3 <- ifelse(Questions_p3 != Questions_p2, Questions_p3, 0) - Questions_p2 <- ifelse(Questions_p2 != Questions_p1, Questions_p2, 0) - - # Creating IRC plots 16 items per plot. - # The only difference between these for codes is in the item selection on the first row - # ([1:Questions_p1] in the first case - - bar_plot_freq1 <- ggplot2::ggplot( - # Create subset of first 16 questions - ans_opt_datafrm[ans_opt_datafrm$id %in% c(levels(ans_opt_datafrm$id)[1:Questions_p1]), ], - # Create chart with Answer Option on x-axis and IRC on y-asix - ggplot2::aes("Answer Option", Percentage, fill = Correct, colour = Perc_col_scale) - ) - bar_freq1 <- - bar_plot_freq1 + - # Create Bar chart - geom_bar(ggplot2::aes(x = Ans_Factor), stat = "identity") + - # Display the different items - facet_grid(. ~ id, scales = "free_x", space = "free_x") + - # Fill in the bars: Green right answer options, Red wrong answer options - scale_fill_manual(values = c("Incorrect" = "Red", "Correct" = "Green"), guide = FALSE) + - # Create colour boundray: Green = "right" (low for wrong answer options, high for right answer options) - scale_colour_gradient(low = "green", high = "red", guide = FALSE) + - # Change y-axis limit to constant - coord_cartesian(ylim = c(0, 100)) + - # Change titles and x axis name - labs( - x = "Answer Options", - title = paste( - "Percentage chart(s) per question and per answer options. The green bars represent the right answer ", - "options.", - "\n", - "The color of the border represents the desirability (50% for the right answer options, low for the wrong ", - "answer options)", - sep = "" - ) - ) + - theme( - strip.text.x = element_text(size = 7), - # Change font size of item names and Answer options - axis.text.x = element_text(size = 4.8), - axis.ticks.x = element_line(size = .1), - title = element_text(size = 8) - ) - - if (Questions_p2 != 0) { - bar_plot_freq2 <- ggplot2::ggplot( - # Create subset of the other questions - ans_opt_datafrm[ans_opt_datafrm$id %in% c(levels(ans_opt_datafrm$id)[(Questions_p1 + 1) : Questions_p2]), ], - # Create chart with Answer Option on x-axis and IRC on y-asix - ggplot2::aes("Answer Option", Percentage, fill = Correct, colour = Perc_col_scale) - ) - bar_freq2 <- - bar_plot_freq2 + - # Create Bar chart - geom_bar(ggplot2::aes(x = Ans_Factor), stat = "identity") + - # Display the different items - facet_grid(. ~ id, scales = "free_x", space = "free_x") + - # Fill in the bars: Green right answer options, Red wrong answer options - scale_fill_manual(values = c("Incorrect" = "Red", "Correct" = "Green"), guide = FALSE) + - # Create colour boundray: Green = "right" (low for wrong answer options, high for right answer options) - scale_colour_gradient(low = "green", high = "red", guide = FALSE) + - # Change y-axis limit to constant - coord_cartesian(ylim = c(0, 100)) + - # Change titles and x axis name - labs(x = "Answer Options", title = "Item") + - theme( - strip.text.x = element_text(size = 7), - # Change font size of item names and Answer options - axis.text.x = element_text(size = 4.8), - axis.ticks.x = element_line(size = .1) - ) - } - - if (Questions_p3 != 0) { - bar_plot_freq3 <- ggplot2::ggplot( - # Create subset of the other questions - ans_opt_datafrm[ans_opt_datafrm[, 1] %in% c(levels(ans_opt_datafrm$id)[(Questions_p2 + 1) : Questions_p3]), ], - # Create chart with Answer Option on x-axis and IRC on y-asix - ggplot2::aes("Answer Option", Percentage, fill = Correct, colour = Perc_col_scale) - ) - bar_freq3 <- - bar_plot_freq3 + - # Create Bar chart - geom_bar(ggplot2::aes(x = Ans_Factor), stat = "identity") + - # Display the different items - facet_grid(. ~ id, scales = "free_x", space = "free_x") + - # Fill in the bars: Green right answer options, Red wrong answer options - scale_fill_manual(values = c("Incorrect" = "Red", "Correct" = "Green"), guide = FALSE) + - # Create colour boundray: Green = "right" (low for wrong answer options, high for right answer options) - scale_colour_gradient(low = "green", high = "red", guide = FALSE) + - # Change y-axis limit to constant - coord_cartesian(ylim = c(0, 100)) + - # Change titles and x axis name - labs(x = "Answer Options", title = "Item") + - theme( - strip.text.x = element_text(size = 7), - # Change font size of item names and Answer options - axis.text.x = element_text(size = 4.8), - axis.ticks.x = element_line(size = .1) - ) - } - - if (Questions_p4 != 0) { - bar_plot_freq4 <- ggplot2::ggplot( - # Create subset of the other questions - ans_opt_datafrm[ans_opt_datafrm[, 1] %in% c(levels(ans_opt_datafrm$id)[(Questions_p3 + 1) : Questions_p4]), ], - # Create chart with Answer Option on x-axis and IRC on y-asix - ggplot2::aes("Answer Option", Percentage, fill = Correct, colour = Perc_col_scale) - ) - bar_freq4 <- - bar_plot_freq4 + - # Create Bar chart - geom_bar(ggplot2::aes(x = Ans_Factor), stat = "identity") + - # Display the different items - facet_grid(. ~ id, scales = "free_x", space = "free_x") + - # Fill in the bars: Green right answer options, Red wrong answer options - scale_fill_manual(values = c("Incorrect" = "Red", "Correct" = "Green"), guide = FALSE) + - # Create colour boundray: Green = "right" (low for wrong answer options, high for right answer options) - scale_colour_gradient(low = "green", high = "red", guide = FALSE) + - # Change y-axis limit to constant - coord_cartesian(ylim = c(0, 100)) + - # Change titles and x axis name - labs(x = "Answer Options", title = "Item") + - theme( - strip.text.x = element_text(size = 7), - # Change font size of item names and Answer options - axis.text.x = element_text(size = 4.8), - axis.ticks.x = element_line(size = .1) - ) - } - - # Creating IRC plots 16 items per plot. - # The only difference between these for codes is in the item selection on the first row - # ([1:Questions_p1] in the first case) - - bar_plot_IRC1 <- ggplot2::ggplot( - # Create subset of first 16 questions - ans_opt_datafrm[ans_opt_datafrm$id %in% c(levels(ans_opt_datafrm$id)[1:Questions_p1]), ], - # Create chart with Answer Option on x-axis and IRC on y-asix - ggplot2::aes("Answer Option", IRC, fill = Correct, colour = IRC_col_scale) - ) - bar_IRC1 <- - bar_plot_IRC1 + - # Create Bar chart - geom_bar(ggplot2::aes(x = Ans_Factor), stat = "identity") + - # Display the different items - facet_grid(. ~ id, scales = "free_x", space = "free_x") + - # Fill in the bars: Green right answer options, Red wrong answer options - scale_fill_manual(values = c("Incorrect" = "Red", "Correct" = "Green"), guide = FALSE) + - # Create colour boundray: Green = "right" (low for wrong answer options, high for right answer options) - scale_colour_gradient(low = "green", high = "red", guide = FALSE) + - # Change y-axis limit to constant - coord_cartesian(ylim = c(-.3, .4)) + - # Change titles and x axis name - labs( - x = "Answer Options", - title = paste( - "IRC chart(s) per question and per answer options. The green bars represent the right answer options.", - "\n", - "The color of the border represents the desirability (high for the right answer options, low for the ", - "wrong answer options)", - sep = "" - ) - ) + - theme( - strip.text.x = element_text(size = 7), - # Change font size of item names and Answer options - axis.text.x = element_text(size = 4.8), - axis.ticks.x = element_line(size = .1), - title = element_text(size = 8) - ) - - if (Questions_p2 != 0) { - bar_plot_IRC2 <- ggplot2::ggplot( - # Create subset of first 16 questions - ans_opt_datafrm[ans_opt_datafrm$id %in% c(levels(ans_opt_datafrm$id)[(Questions_p1 + 1) : Questions_p2]), ], - # Create chart with Answer Option on x-axis and IRC on y-asix - ggplot2::aes("Answer Option", IRC, fill = Correct, colour = IRC_col_scale) - ) - bar_IRC2 <- - bar_plot_IRC2 + - # Create Bar chart - geom_bar(ggplot2::aes(x = Ans_Factor), stat = "identity") + - # Display the different items - facet_grid(. ~ id, scales = "free_x", space = "free_x") + - # Fill in the bars: Green right answer options, Red wrong answer options - scale_fill_manual(values = c("Incorrect" = "Red", "Correct" = "Green"), guide = FALSE) + - # Create colour boundray: Green = "right" (low for wrong answer options, high for right answer options) - scale_colour_gradient(low = "green", high = "red", guide = FALSE) + - # Change y-axis limit to constant - coord_cartesian(ylim = c(-.3, .4)) + - # Change titles and x axis name - labs(x = "Answer Options", title = "Item") + - theme( - strip.text.x = element_text(size = 7), - # Change font size of item names and Answer options - axis.text.x = element_text(size = 4.8), - axis.ticks.x = element_line(size = .1) - ) - } - - if (Questions_p3 != 0) { - bar_plot_IRC3 <- ggplot2::ggplot( - # Create subset of first 16 questions - ans_opt_datafrm[ans_opt_datafrm$id %in% c(levels(ans_opt_datafrm$id)[(Questions_p2 + 1) : Questions_p3]), ], - # Create chart with Answer Option on x-axis and IRC on y-asix - ggplot2::aes("Answer Option", IRC, fill = Correct, colour = IRC_col_scale) - ) - bar_IRC3 <- - bar_plot_IRC3 + - # Create Bar chart - geom_bar(ggplot2::aes(x = Ans_Factor), stat = "identity") + - # Display the different items - facet_grid(. ~ id, scales = "free_x", space = "free_x") + - # Fill in the bars: Green right answer options, Red wrong answer options - scale_fill_manual(values = c("Incorrect" = "Red", "Correct" = "Green"), guide = FALSE) + - # Create colour boundray: Green = "right" (low for wrong answer options, high for right answer options) - scale_colour_gradient(low = "green", high = "red", guide = FALSE) + - # Change y-axis limit to constant - coord_cartesian(ylim = c(-.3, .4)) + - # Change titles and x axis name - labs(x = "Answer Options", title = "Item") + - theme( - strip.text.x = element_text(size = 7), - # Change font size of item names and Answer options - axis.text.x = element_text(size = 4.8), - axis.ticks.x = element_line(size = .1) - ) - } - - if (Questions_p4 != 0) { - bar_plot_IRC4 <- ggplot2::ggplot( - # Create subset of first 16 questions - ans_opt_datafrm[ans_opt_datafrm$id %in% c(levels(ans_opt_datafrm$id)[(Questions_p3 + 1) : Questions_p4]), ], - # Create chart with Answer Option on x-axis and IRC on y-asix - ggplot2::aes("Answer Option", IRC, fill = Correct, colour = IRC_col_scale) - ) - bar_IRC4 <- - bar_plot_IRC4 + - # Create Bar chart - geom_bar(ggplot2::aes(x = Ans_Factor), stat = "identity") + - # Display the different items - facet_grid(. ~ id, scales = "free_x", space = "free_x") + - # Fill in the bars: Green right answer options, Red wrong answer options - scale_fill_manual(values = c("Incorrect" = "Red", "Correct" = "Green"), guide = FALSE) + - # Create colour boundray: Green = "right" (low for wrong answer options, high for right answer options) - scale_colour_gradient(low = "green", high = "red", guide = FALSE) + - # Change y-axis limit to constant - coord_cartesian(ylim = c(-.3, .4)) + - # Change titles and x axis name - labs(x = "Answer Options", title = "Item") + - theme( - strip.text.x = element_text(size = 7), - axis.text.x = element_text(size = 4.8), # Change font size of item names and Answer options - axis.ticks.x = element_line(size = .1) - ) - } - } - - # Creating the bar plots. Depending on the amount of plots, different arranges are made. - suppressWarnings( - if (any(key != 0)) { - if (Questions_p2 == 0) { - # Only 1 Answer Option plot --> all plots on 1 page - - grid::grid.newpage() - grid::pushViewport(grid::viewport(layout = grid::grid.layout(4, 1))) - vplayout <- function(x, y) { - grid::viewport(layout.pos.row = x, layout.pos.col = y) - } - print(bar_freq, vp = vplayout(1, 1)) - print(bar_IRC, vp = vplayout(2, 1)) - print(bar_freq1, vp = vplayout(3, 1)) - print(bar_IRC1, vp = vplayout(4, 1)) - } - - if (Questions_p3 == 0 & Questions_p2 != 0) { - # 2 Answer Option plots --> 2 pages of plot output - - grid::grid.newpage() - grid::pushViewport(grid::viewport(layout = grid::grid.layout(3, 1))) - vplayout <- function(x, y) { - grid::viewport(layout.pos.row = x, layout.pos.col = y) - } - print(bar_freq, vp = vplayout(1, 1)) - print(bar_IRC, vp = vplayout(2, 1)) - print(bar_freq1, vp = vplayout(3, 1)) - - grid::grid.newpage() - grid::pushViewport(grid::viewport(layout = grid::grid.layout(3, 1))) - - print(bar_freq2, vp = vplayout(1, 1)) - print(bar_IRC1, vp = vplayout(2, 1)) - print(bar_IRC2, vp = vplayout(3, 1)) - } - - if (Questions_p4 == 0 & Questions_p3 != 0) { - # 3 Answer Option plots --> 2 pages of plot output - - grid::grid.newpage() - grid::pushViewport(grid::viewport(layout = grid::grid.layout(2, 1))) - vplayout <- function(x, y) { - grid::viewport(layout.pos.row = x, layout.pos.col = y) - } - print(bar_freq, vp = vplayout(1, 1)) - print(bar_IRC, vp = vplayout(2, 1)) - - grid::grid.newpage() - grid::pushViewport(grid::viewport(layout = grid::grid.layout(3, 1))) - - print(bar_freq1, vp = vplayout(1, 1)) - print(bar_freq2, vp = vplayout(2, 1)) - print(bar_freq3, vp = vplayout(3, 1)) - - grid::grid.newpage() - grid::pushViewport(grid::viewport(layout = grid::grid.layout(3, 1))) - - print(bar_IRC1, vp = vplayout(1, 1)) - print(bar_IRC2, vp = vplayout(2, 1)) - print(bar_IRC3, vp = vplayout(3, 1)) - } - - if (Questions_p4 != 0) { - # 4 Answer options plots --> 3 pages of plot output - - grid::grid.newpage() - grid::pushViewport(grid::viewport(layout = grid::grid.layout(2, 1))) - vplayout <- function(x, y) { - grid::viewport(layout.pos.row = x, layout.pos.col = y) - } - print(bar_freq, vp = vplayout(1, 1)) - print(bar_IRC, vp = vplayout(2, 1)) - - grid::grid.newpage() - grid::pushViewport(grid::viewport(layout = grid::grid.layout(4, 1))) - - print(bar_freq1, vp = vplayout(1, 1)) - print(bar_freq2, vp = vplayout(2, 1)) - print(bar_freq3, vp = vplayout(3, 1)) - print(bar_freq4, vp = vplayout(4, 1)) - - grid::grid.newpage() - grid::pushViewport(grid::viewport(layout = grid::grid.layout(4, 1))) - - print(bar_IRC1, vp = vplayout(1, 1)) - print(bar_IRC2, vp = vplayout(2, 1)) - print(bar_IRC3, vp = vplayout(3, 1)) - print(bar_IRC4, vp = vplayout(4, 1)) - } - } else { - # Plot if no answer options are present - - grid::grid.newpage() - grid::pushViewport(grid::viewport(layout = grid::grid.layout(2, 1))) - vplayout <- function(x, y) { - grid::viewport(layout.pos.row = x, layout.pos.col = y) - } - print(bar_freq, vp = vplayout(1, 1)) - print(bar_IRC, vp = vplayout(2, 1)) - } - ) - dev.off() + item.names, + correct.frequency, + correct.percentage, + frequency.answer.options, + percentage.answer.options, + corrected.item.tot.cor, + corrected.item.tot.cor.answ.option, + cronbach, + student.scores, + categories) { + rmarkdown::render("report.Rmd", output_format = "html_document", output_file = paste0(file.name,".html")) + rmarkdown::render("report.Rmd", output_format = "pdf_document", output_file = paste0(file.name,".pdf")) } diff --git a/app/Lib/Rscripts/report.Rmd b/app/Lib/Rscripts/report.Rmd new file mode 100644 index 0000000..986124b --- /dev/null +++ b/app/Lib/Rscripts/report.Rmd @@ -0,0 +1,649 @@ +--- +html_document: null +fig_width: 11 +classoption: landscape +keep_md: yes +output: html_document +pdf_document: default +--- + +```{r, echo = FALSE, warning = FALSE, message = FALSE} +library(ggplot2) +library(pander) +library(plyr) +``` + + +```{r, echo = FALSE, eval = TRUE, results = "asis"} + +CreateItemList <- function(number.answeroptions, correct.frequency, + correct.percentage, frequency.answer.options, percentage.answer.options, + corrected.item.tot.cor, corrected.item.tot.cor.answ.option, item.names, key) { + # Creates a list with frequency, percentage correct, and IRC for every item. + # + # Args: + # number.answeroptions: Vector with number of answer options per item + # frequency.answer.options: Matrix with [i,j] the frequency of + # answer option i of item j + # percentage.answer.options: Matrix with [i,j] the percentage of + # answer option i of item j + # corrected.item.tot.cor.answ.option: Matrix with [i,j] the item total + # correlation for answer option i of item j + # correct.frequency: Vector with total number correct per item + # correct.percentage: Vector with percentage correct per item + # corrected.item.tot.cor: Vector with item rest correlation per item + # + # Returns: + # List with one entry per item. Each entry consists of a data frame with + # frequency, percentage and IRC for the total item and the answer options + # (if the item has answer options) + + item.list <- list() # Creates list to put item output in + colnames1 <- c(" ", "Frequency", "Percentage", "IRC") + colnames2 <- c("Answer Option", "Frequency", "Percentage", "IRC", "Correct") + + # For every item, create entry in list with frequencies, percentages, and IRCs + for (i in 1:length(item.names)) { + if (number.answeroptions[i] > 0) { + correct <- c(ifelse(key[1:number.answeroptions[i], i] == 1, + "Correct", "Incorrect"), "Incorrect") + + item.list[[i]] <- data.frame( + c(LETTERS[1:number.answeroptions[i]], "Missing"), + c(frequency.answer.options[c(2: (number.answeroptions[i] + 1), 1), i]), + c(percentage.answer.options[c(2: (number.answeroptions[i] + 1), 1), i]), + c(corrected.item.tot.cor.answ.option[c(2: (number.answeroptions[i] + 1), + 1), i]), + correct, + row.names = NULL) + colnames(item.list[[i]]) <- colnames2 + + } else { + item.list[[i]] <- data.frame("Correct", correct.frequency[i], + correct.percentage[i], + corrected.item.tot.cor[i]) + colnames(item.list[[i]]) <- colnames1 + } + } + + names(item.list) <- item.names # Attach item names + return(item.list) +} + +# Use the function defined above +item.list <- CreateItemList(number.answeroptions, correct.frequency, + correct.percentage, frequency.answer.options, percentage.answer.options, + corrected.item.tot.cor, corrected.item.tot.cor.answ.option, item.names, key) + +cat(paste("#", title)) +``` + +# General + +What | Result | +:---------------------- | :------------- | +Number of students | `r length(student.scores)`| +Number of items | `r length(item.names)`| +Average score | `r round(mean(student.scores), digits = 3)`| +Standard deviation | `r round(sd(student.scores), digits = 3)`| +Cronbach's alpha | `r cronbach`| +Standard error | `r round(sd(student.scores * sqrt(1 - cronbach)), digits = 3)`| + + +# Item statistics + +For each item the frequency, percentage and item rest correlations (IRC) of +every answer option are shown The IRC should be (highly) positive for the +right answer option and low for the wrong answer option(s). + +```{r pander, results = "asis", eval = TRUE, echo = FALSE, warning = FALSE, message = FALSE} + +pander.table <- function(item.list, key) { + # Print table for each item with frequency, percentage and the + # item-rest correlation. Correct answers are highlighted in bold + # + # Args: + # item.list: list with one entry per item. Each entry consists of a data frame + # Data frame has a row for every answer option + one for missing. + # Columns are: Answer option, frequency, percentage, IRC, + # and Correct (answer option is Correct/Incorrect) + # Length should be the same as number of columns of key + # key: Matrix of 0's and 1's. key[i,j] implies wether answer option i + # to item j is right (1) or wrong (0). If a column (item) consists of + # only 0s, the item is interpreted as graded manually. + # Number of columns should be equal to length of item.list + + panderOptions("knitr.auto.asis", FALSE) + for (i in 1:length(item.list)) { + if (any(key[, i] == 1)) { + item.list[[i]]$Correct <- revalue(item.list[[i]]$Correct, + c("Correct" = "X", "Incorrect" = "")) + emphasize.strong.rows(which(key[, i] == 1, arr.ind = TRUE)) + pander(item.list[i]) + } else { + t <- item.list[i] + pander(t) + } + } +} + +pander.table(item.list, key) +``` + +\pagebreak + +```{r, echo = FALSE, message = FALSE, warning = FALSE, fig.width = 11, results = 'asis'} + +CreateDf <- function(item.names, correct.percentage, corrected.item.tot.cor) { + # Creates a data frame with the overall item statistics including a colour + # indicator for the percentage correct and IRC. Colours is defined below. + # + # Args: + # These are the same as defined in CreateItemList + # + # Returns: + # Data frame with a row for every item. Columns consist of the item name, + # percentage correct, colour indicator for percentage, IRC and colour + # indicator for the IRC. + + correct.df <- data.frame(item = factor(item.names, item.names), + correct.percentage, + perc.col = ifelse(correct.percentage < 40, 0, + ifelse(correct.percentage > 70, 1, + 1 / 75 * correct.percentage - 7 / 30)), + + corrected.item.tot.cor, + irc.col = ifelse(corrected.item.tot.cor < 0, 0, + ifelse(corrected.item.tot.cor > .05, 1, + 8 * corrected.item.tot.cor + 3 / 10)) + ) + + return(correct.df) +} + +PercPlotOverall <- function(correct.df) { + # Plots bar graph of percentage correct of every item. Bars are coloured + # depending on the percentage correct: + # < 40: Red + # 40 - 70: Orange till green-ish + # > 70: Green + # + # Args: + # correct.df: data frame with item name, percentage correct and colour code + # for the percentages. + # + # Returns: + # Explanation of the plot and the plot itself + + cat(" + +## Percentage Correct + +Plot of percentage correct per item. If *more than 70%* of the students answer +an item correctly, the students have mastered the material well enough, thus it +is coloured *green*. If *less than 40%* of the students answer an item +correctly, the students have not mastered the material, and is coloured *red*. +*Orange* indicates the percentage correct lies *between 40% and 70%*. The +colours are based on multiple choice items with 3 or 4 answer options. + +") + + bar.freq <- ggplot(correct.df, aes(item, correct.percentage, fill = perc.col)) + bar.freq <- bar.freq + geom_bar(stat = "identity") + # Create Bar chart + scale_fill_gradient(low = "red", high = "green", limits = c(0, 1), + guide = FALSE) + # Create colour boundray + coord_cartesian(ylim = c(0, 100)) + # Change y-axis limit to constant + labs(x = "Item", y = "Percentage Correct") + # x-axis name + theme_bw() + # Black and white + theme(axis.text.x = element_text(size = 8, angle = 90), # Change font size + axis.ticks.x = element_line(size = .4)) + + suppressWarnings(print(bar.freq)) +} + +IRCPlotOverall <- function(correct.df) { + # Plots bar graph of IRC of every item. Bars are coloured by IRC: + # < 0: Red + # 0 - .10: Orange till green-ish + # > .10: green + # + # Args: + # correct.df: data frame with item name, percentage correct and colour code + # for the percentages. + # + # Returns: + # Explanation of the plot and the plot itself + + cat("\\pagebreak + +## Item rest correlations + +Plot of Item Rest Correlations per item: The correlation between an item (0 for +wrong answers and 1 for right answers) and the sum of all other items. Items +with a correlation *higher than .05* indicate that students who answered this +item correctly answered the other items more often correctly than students who +answered this item incorrectly. Thus it is coloured *green*. Items with a +correlation *lower than 0* indicate no relationship between this item and the +other items, or that students who answered this item correctly answered the +other items more often incorrectly than students who answered this incorrectly, +thus it is coloured *red*. *Between 0 and .05*, an item is coloured *orange*. + +") + + # Create bar chart + bar.IRC <- ggplot(correct.df, aes(item, corrected.item.tot.cor, + fill = irc.col)) + bar.IRC <- bar.IRC + geom_bar(stat = "identity") + # Create Bar chart + scale_fill_gradient(low = "red", high = "green", limits = c(0, 1), + guide = FALSE) + + theme_bw() + + labs(x = "Item", y = "Item Rest Correlation") + # axes names + theme(strip.text.x = element_text(size = 7), + axis.text.x = element_text(size = 8, angle = 90), + axis.ticks.x = element_line(size = .4)) + + suppressWarnings(print(bar.IRC)) +} + +correct.df <- CreateDf(item.names, correct.percentage, corrected.item.tot.cor) +PercPlotOverall(correct.df) +IRCPlotOverall(correct.df) +``` + +```{r, echo = FALSE, eval = TRUE, warning= FALSE, fig.width = 11} + +# Create extra variables for the colours in the bar plots +PercCol <- function(percentage, correct) { + # Calculate colours for the percentage answer options. + # Args: + # percentage: Percentage answered per answer options + # correct: Vector of "Correct" and "Incorrect" of same length of percentage + # + # Returns: + # Value between 0 and 1. 0 corresponds to red and 1 to green in the plots. + # For the correct options: + # < 40: red + # 40 - 70: organge - green-ish + # > 70: green + # For the incorrect options: difference between correct answer option and + # incorrect answer options are calculated. Colours are based on difference: + # < -10: Green + # -10 - 0: orange - green-ish + # > 0: green + + perc.col <- vector("numeric", length = length(correct)) + perc.col[correct == "Correct"] <- ifelse(percentage[correct == "Correct"] + < 40, 0, ifelse(percentage[correct == "Correct"] > 70, 1, + 1 / 75 * percentage[correct == "Correct"] - 7 / 30)) + dif <- percentage[correct == "Incorrect"] - + min(percentage[correct == "Correct"]) + perc.col[correct == "Incorrect"] <- ifelse(dif >= 0, 0, + ifelse(dif < -10, 1, + -1 / 25 * dif + 3 / 10)) + perc.col +} + +IrcCol <- function(IRC, correct){ + # Calculate colours for the percentage answer options. + # + # Args: + # IRC: IRC per answer options. Same length as correct + # correct: Vector of "Correct" and "Incorrect" of same length of IRC + # + # Returns: + # Value between 0 and 1. 0 corresponds to red and 1 to green in the plots. + # For the correct answer options: + # < 0: red + # 0 - 0.05: orange - green-ish + # > 0.05: green + # For the incorrect answer options: difference between correct answer option + # and incorrect answer option are calculated. Colours are based on this + # difference: + # > 0: red + # -0.05 - 0: orange - green-ish + # < -0.05: red + + IRC.col <- numeric() + IRC.col[correct == "Correct"] <- ifelse(IRC[correct == "Correct"] < 0, 0, + ifelse(IRC[correct == "Correct"] > .05, 1, + 8 * IRC[correct == "Correct"] + 3 / 10)) + dif <- IRC[correct == "Incorrect"] - min(IRC[correct == "Correct"]) + IRC.col[correct == "Incorrect"] <- ifelse(dif >= 0, 0, + ifelse(dif < -.05, 1, -8 * dif + 3 / 10)) + return(IRC.col) +} + +CreateAnsDf <- function(item.list, key, number.answeroptions, item.names) { + # Creates a data frame with the statistics per answer option including a + # colour indicator for the percentage correct and IRC. + # + # Args: + # These are the same as defined earlier + # + # Returns: + # Data frame with a row for every answer option. Columns consist of the + # item name, answer options, frequency, percentage, IRC, Correct (whether + # an option is correct/incorrect), colour for percentage and colour for IRC + + for (i in 1:length(item.list)) { + if (any(key[, i] != 0)) { + item.list[[i]]$perc.col <- with(item.list[[i]], + PercCol(Percentage, Correct)) + item.list[[i]]$IRC.col <- with(item.list[[i]], + IrcCol(IRC, Correct)) + } + } + ans.opt.df <- ldply(item.list[number.answeroptions != 0], data.frame) + names(ans.opt.df)[1] <- "id" + ans.opt.df$Answer.Option <- gsub("Missing", "Mi", ans.opt.df$Answer.Option) + ans.opt.df$Answer.Option <- factor(ans.opt.df$Answer.Option, + levels = c(LETTERS[1:max(number.answeroptions)], "Mi")) + ans.opt.df$id <- factor(ans.opt.df$id, + levels = item.names[number.answeroptions != 0]) + if (all(key == 0)) { + ans.opt.df <- NULL + } + + ans.opt.df +} + +NumberOfPlots <- function(ans.opt.df, key) { + # Calculates which item is displayed in which plot. The total number of + # answer options is too large (usually) to display in one plot. + # + # Args: + # ans.opt.df: As defined in function above + # + # Returns: + # n.plots: the number of plots + # items.in.plot: list entry per plot. Every entry consists of item names + # in that plot + + if (any(key != 0)) { + max.answ.opts <- 80 # Maximum number of bars per plot + tot.answ.opts <- nrow(ans.opt.df) + n.plots <- ceiling(tot.answ.opts / max.answ.opts) + answ.opts.per.plot <- floor(tot.answ.opts / n.plots) + + items.in.plot <- list(unique(ans.opt.df[1:answ.opts.per.plot, 1])) + if (n.plots > 1) { + for (i in 2:n.plots) { + items.in.plot[[i]] <- unique(ans.opt.df[ (((i - 1) * answ.opts.per.plot) + + 1): (i * answ.opts.per.plot), 1]) + if (items.in.plot[[i]][1] %in% items.in.plot[[i - 1]]) { + items.in.plot[[i]] <- items.in.plot[[i]][-1] + } + } + } + assign("n.plots", n.plots, envir = globalenv()) + assign("items.in.plot", items.in.plot, envir = globalenv()) + + } +} + +ans.opt.df <- CreateAnsDf(item.list, key, number.answeroptions, item.names) +NumberOfPlots(ans.opt.df, key) + +``` + +```{r, echo = FALSE, eval = TRUE, message = FALSE, warning = FALSE, fig.width = 11, results= 'asis'} + +PercPlotAnswOpt <- function(ans.opt.df, items.in.plot, key) { + # Plots bar graph(s) of the percentages of every answer option + # + # Args: + # All defined as before + # + # Returns: + # Description of how to read the plots + # One or multiple plots + + if (any(key != 0)) { + + cat("\\pagebreak + +## Percentage chosen per answer options + +Percentage plot of all answer options per item. **White** bars represent the +**right** answer option(s) and **black** the **wrong** options. + +The border represents the performance of an item. The **right** answer options +are coloured *green* if *more than 70%* of the students choose this option. In +that case, the students have mastered the material. They are coloured *red* if +*less than 40%* choose this option. *Between 40% and 70%*, it is coloured +*orange*. + +The **wrong** answer options are coloured *green* if an option is chosen *10 +percentage point less than the right option*. In that case, more students chose +the right answer option than the wrong option indicating that students are not +confused by the wrong option. They are coloured *red* if an option is chosen +*equally or more often than the right option*. In this case, students pick the +wrong answer option often, indicating that this option may be right as well. It +is coloured *orange* in between these two. + +") + + for (i in 1:length(items.in.plot)) { + bar.freq1 <- ggplot(ans.opt.df[ans.opt.df$id %in% items.in.plot[[i]], ], + aes("Answer Option", Percentage, fill = Correct, colour = perc.col)) + bar.freq1 <- bar.freq1 + geom_bar(aes(x = Answer.Option), + stat = "identity", size = 1.5) + + facet_grid(. ~ id, scales = "free_x", space = "free_x") + # Plot items + scale_fill_manual(values = c("Incorrect" = "Black", # Fill in the bars + "Correct" = "White"), guide = FALSE) + + scale_colour_gradient(low = "red", high = "green", limits = c(0, 1), + guide = FALSE) + # Create colour boundary + coord_cartesian(ylim = c(0, 100)) + # Change y-axis limit to constant + labs(x = "Answer Options") + # x axis name + theme_bw() + + theme(axis.text.x = element_text(size = 8), # Font sizes + axis.text.y = element_text(size = 12), + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15)) + + suppressWarnings(print(bar.freq1)) + } + } +} + +IRCPlotAnswOpt <- function(ans.opt.df, items.in.plot, key) { + # Plots bar graph(s) of the IRC of every answer option + # + # Args: + # All defined as before + # + # Returns: + # Description of how to read the plots + # One or multiple plots + + if (any(key != 0)) { + + cat('\\pagebreak + +## Item rest correlations per answer option + +Item rest correlation (IRC) plot of all answer options per item The IRC is the +correlation between an answer option (1 for students who chose this options and +0 for student who did not) and the sumscore on all other items. **White** bars +represent the **right** answer option(s) and **black** the **wrong** options. + +The border represents the performance of an item. The **right** answer options +are coloured *green* if the IRC is *higher than .05*. In that case, students who +picked the right answer option scored higher on all other items than student who +did not pick the right option. They are coloured *red* if the IRC is *less than +0*. In that case, students who picked the right answer option scored equally or +worse on the other items than students who picked the other options. Between *0 +and .05*, the option is coloured *orange*. + +The **wrong** answer options are coloured *green* if the IRC is *more than .1 +lower than the right option*. In that case, students who answered the other +items incorrectly, usually answered this item incorrectly as well. They are +coloured *red* if the IRC is *equal or higher than the right answer option*. +In that case, students who answered the other items correctly may have answered +this item incorrectly. In between these two, the option is coloured *orange*. + +') + + # Creating IRC plots + for (i in 1:length(items.in.plot)) { + # Select items and variables to plot + bar.IRC1 <- ggplot(ans.opt.df[ans.opt.df$id %in% items.in.plot[[i]], ], + aes("Answer Option", IRC, fill = Correct, colour = IRC.col)) + bar.IRC1 <- bar.IRC1 + geom_bar(aes(x = Answer.Option), stat = "identity", + size = 1.5) + # Create Bar chart + facet_grid(. ~ id, scales = "free_x", space = "free_x") + + scale_fill_manual(values = c("Incorrect" = "Black", "Correct" = "White"), + guide = FALSE) + # Fill in the bars + scale_colour_gradient(low = "red", high = "green", limits = c(0, 1), + guide = FALSE) + # Create colour boundray + # Change y-axis limit to either the maximum and minimum IRC + coord_cartesian(ylim = c(min(ans.opt.df$IRC) - .01, + max(ans.opt.df$IRC) + .01)) + + labs(x = "Answer Options") + + theme_bw() + + theme(axis.text.x = element_text(size = 8), # Font sizes + axis.text.y = element_text(size = 12), + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15) + ) + suppressWarnings(print(bar.IRC1)) + } + } +} + +IRCPlotAnswOpt(ans.opt.df, items.in.plot, key) +PercPlotAnswOpt(ans.opt.df, items.in.plot, key) +``` + +\pagebreak + +## Cumulative Distribution + +Cumulative Distribution showing all possible scores and the percentage of +students that scored that number or higher + +```{r, echo = FALSE, eval = TRUE, message = FALSE, warning = FALSE, fig.width = 11} +CumHist <- function(student.scores, n.item) { + # Create a reversed cumulative histogram, i.e. the height of score x is + # the percentage of students who scores x or higher. + # + # Args: + # student.scores: numeric vector of test score per student. + # n.item: number of items + # + # Returns: + # Reversed cumulative histogram plot + + h <- heights <- length(student.scores) + n <- sort(unique(student.scores)) + + for (i in 1:length(n)) { + h <- h - length(student.scores[student.scores == n[i]]) + heights <- c(heights, h) + } + + cesuurlijnen <- seq(0.2, 0.8, 0.2) * length(student.scores) + gridlijnen <- round(seq(0, length(student.scores), + length(student.scores) / 25), 0) + + gridlijnen <- gridlijnen[-grep(paste(round(cesuurlijnen, 0), + collapse = "|"), gridlijnen)] + + heights <- heights[1:length(unique(student.scores))] + percent <- heights / length(student.scores) * 100 + df <- data.frame(score = sort(unique(student.scores)), + total = heights, percent = round(percent, 2)) + + vec <- df[1, 1]:df[nrow(df), 1] # save all possible total scores + + # Find if a total score is missing and add it + mis <- NA + if (sum(! (vec %in% df[, 1]) != 0)) { + mis <- vec[! (vec %in% df[, 1])] + } + if (!is.na(mis[1])) { + for (i in 1:length(mis)) { + df[ (nrow(df) + 1), ] <- c(mis[i], 0, 0) + } + df <- df[order(df$score), ] + } + + + # if no one gets a certain total score, replace the 0 with the next total score + suppressWarnings( + df[which(df[, 2] == 0, arr.ind = TRUE), 2] <- df[ (which(df[, 2] == 0, + arr.ind = TRUE) + 1), 2] + ) + suppressWarnings( + df[which(df[, 3] == 0, arr.ind = TRUE), 3] <- df[ (which(df[, 3] == 0, + arr.ind = TRUE) + 1), 3] + ) + + g <- ggplot(data = df, aes(x = score, y = total)) + + geom_hline(yintercept = c(gridlijnen), linetype = "solid", + colour = "gray80") + + geom_hline(yintercept = cesuurlijnen, linetype = "dashed", size = 0.75, + colour = "black") + + geom_histogram(stat = "identity", binwidth = 1) + + xlab("Total score") + ylab("% of students with score or higher") + + scale_y_continuous(breaks = c(0, cesuurlijnen, length(student.scores)), + labels = c(seq(0, 100, 20))) + + scale_x_continuous(limits = c(df[1, 1] - 1, n.item + 1), breaks = 0:n.item, + expand = c(0, 0)) + + theme_bw() + + theme(panel.grid = element_blank()) + + print(g) +} + +CumHist(student.scores, n.item = ncol(key)) +``` + +```{r, echo = FALSE, eval = TRUE, warnings = FALSE, message = FALSE, results = 'asis', fig.width = 11} +## Do the whole script above for every sub category +if (!length(categories) == 0) { + cat("\n\n") + cat("\\pagebreak") + cat("\n\n") + cat("# Analysis for subcategories") + cat("\n\n") + + for (i in 1:length(categories)) { + items <- categories[[i]]$items + if (i != 1) { + cat("\\pagebreak") + cat("\n\n") + } + cat("\n\n") + cat(paste("## Category:", categories[[i]]$name )) + cat("\n\n") + + item.list <- CreateItemList(number.answeroptions[items], + correct.frequency[items], correct.percentage[items], + frequency.answer.options[, items], percentage.answer.options[, items], + categories[[i]]$corrected.item.tot.cor, + categories[[i]]$corrected.item.tot.cor.answ.option, + item.names[items], key[, items]) + + correct.df <- CreateDf(item.names[items], correct.percentage[items], + categories[[i]]$corrected.item.tot.cor) + + PercPlotOverall(correct.df) + IRCPlotOverall(correct.df) + + if (any(key[, items] != 0)) { + ans.opt.df <- CreateAnsDf(item.list, key[, items], + number.answeroptions[items], item.names[items]) + NumberOfPlots(ans.opt.df, key[, items]) + PercPlotAnswOpt(ans.opt.df, items.in.plot, key[, items]) + IRCPlotAnswOpt(ans.opt.df, items.in.plot, key[, items]) + } + + cat("\\pagebreak") + cat("\n\n") + cat("## Cumulative Distribution") + cat("\n\n") + CumHist(categories[[i]]$student.scores, n.item = length(items)) + } +} +``` \ No newline at end of file From 1377dfc9e7ffebbeaf15e134a59769860a97023a Mon Sep 17 00:00:00 2001 From: Mark van Driel Date: Wed, 3 Aug 2016 15:17:32 +0200 Subject: [PATCH 02/21] Added *.Rmd files to R lintr --- bin/travis-linter.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/travis-linter.sh b/bin/travis-linter.sh index bcb7082..749821f 100755 --- a/bin/travis-linter.sh +++ b/bin/travis-linter.sh @@ -3,7 +3,7 @@ set -e exitstatus=0 -for file in $(pwd)/app/Lib/Rscripts/*.R +for file in $(pwd)/app/Lib/Rscripts/*.R* do Rscript -e "lintr::lint(\"$file\")" outputbytes=`Rscript -e "lintr::lint(\"$file\")" | grep ^ | wc -c` From b7b6fa13a1242264302aa29a4fbf3d79d16d7221 Mon Sep 17 00:00:00 2001 From: Mark van Driel Date: Wed, 10 Aug 2016 10:30:03 +0200 Subject: [PATCH 03/21] Updated R scripts --- app/Lib/Rscripts/report.R | 27 +- app/Lib/Rscripts/report.Rmd | 1471 +++++++++++++++++++---------------- 2 files changed, 832 insertions(+), 666 deletions(-) diff --git a/app/Lib/Rscripts/report.R b/app/Lib/Rscripts/report.R index 0c10111..02fc318 100644 --- a/app/Lib/Rscripts/report.R +++ b/app/Lib/Rscripts/report.R @@ -1,17 +1,10 @@ -report <- function(file.name, - title, - number.answeroptions, - key, - item.names, - correct.frequency, - correct.percentage, - frequency.answer.options, - percentage.answer.options, - corrected.item.tot.cor, - corrected.item.tot.cor.answ.option, - cronbach, - student.scores, - categories) { - rmarkdown::render("report.Rmd", output_format = "html_document", output_file = paste0(file.name,".html")) - rmarkdown::render("report.Rmd", output_format = "pdf_document", output_file = paste0(file.name,".pdf")) -} +report <- function(filename, title, number_answeroptions, key, item_names, + correct_frequency, correct_percentage, + frequency_answer_options, percentage_answer_options, + corrected_item_tot_cor, corrected_item_tot_cor_answ_option, + cronbach, student_scores, categories) { + rmarkdown::render("report_subcategories.Rmd", output_format = "html_document", + output_file = paste0(filename, ".html")) + rmarkdown::render("report_subcategories.Rmd", output_format = "pdf_document", + output_file = paste0(filename, ".pdf")) +} \ No newline at end of file diff --git a/app/Lib/Rscripts/report.Rmd b/app/Lib/Rscripts/report.Rmd index 986124b..90d8192 100644 --- a/app/Lib/Rscripts/report.Rmd +++ b/app/Lib/Rscripts/report.Rmd @@ -1,649 +1,822 @@ ---- -html_document: null -fig_width: 11 -classoption: landscape -keep_md: yes -output: html_document -pdf_document: default ---- - -```{r, echo = FALSE, warning = FALSE, message = FALSE} -library(ggplot2) -library(pander) -library(plyr) -``` - - -```{r, echo = FALSE, eval = TRUE, results = "asis"} - -CreateItemList <- function(number.answeroptions, correct.frequency, - correct.percentage, frequency.answer.options, percentage.answer.options, - corrected.item.tot.cor, corrected.item.tot.cor.answ.option, item.names, key) { - # Creates a list with frequency, percentage correct, and IRC for every item. - # - # Args: - # number.answeroptions: Vector with number of answer options per item - # frequency.answer.options: Matrix with [i,j] the frequency of - # answer option i of item j - # percentage.answer.options: Matrix with [i,j] the percentage of - # answer option i of item j - # corrected.item.tot.cor.answ.option: Matrix with [i,j] the item total - # correlation for answer option i of item j - # correct.frequency: Vector with total number correct per item - # correct.percentage: Vector with percentage correct per item - # corrected.item.tot.cor: Vector with item rest correlation per item - # - # Returns: - # List with one entry per item. Each entry consists of a data frame with - # frequency, percentage and IRC for the total item and the answer options - # (if the item has answer options) - - item.list <- list() # Creates list to put item output in - colnames1 <- c(" ", "Frequency", "Percentage", "IRC") - colnames2 <- c("Answer Option", "Frequency", "Percentage", "IRC", "Correct") - - # For every item, create entry in list with frequencies, percentages, and IRCs - for (i in 1:length(item.names)) { - if (number.answeroptions[i] > 0) { - correct <- c(ifelse(key[1:number.answeroptions[i], i] == 1, - "Correct", "Incorrect"), "Incorrect") - - item.list[[i]] <- data.frame( - c(LETTERS[1:number.answeroptions[i]], "Missing"), - c(frequency.answer.options[c(2: (number.answeroptions[i] + 1), 1), i]), - c(percentage.answer.options[c(2: (number.answeroptions[i] + 1), 1), i]), - c(corrected.item.tot.cor.answ.option[c(2: (number.answeroptions[i] + 1), - 1), i]), - correct, - row.names = NULL) - colnames(item.list[[i]]) <- colnames2 - - } else { - item.list[[i]] <- data.frame("Correct", correct.frequency[i], - correct.percentage[i], - corrected.item.tot.cor[i]) - colnames(item.list[[i]]) <- colnames1 - } - } - - names(item.list) <- item.names # Attach item names - return(item.list) -} - -# Use the function defined above -item.list <- CreateItemList(number.answeroptions, correct.frequency, - correct.percentage, frequency.answer.options, percentage.answer.options, - corrected.item.tot.cor, corrected.item.tot.cor.answ.option, item.names, key) - -cat(paste("#", title)) -``` - -# General - -What | Result | -:---------------------- | :------------- | -Number of students | `r length(student.scores)`| -Number of items | `r length(item.names)`| -Average score | `r round(mean(student.scores), digits = 3)`| -Standard deviation | `r round(sd(student.scores), digits = 3)`| -Cronbach's alpha | `r cronbach`| -Standard error | `r round(sd(student.scores * sqrt(1 - cronbach)), digits = 3)`| - - -# Item statistics - -For each item the frequency, percentage and item rest correlations (IRC) of -every answer option are shown The IRC should be (highly) positive for the -right answer option and low for the wrong answer option(s). - -```{r pander, results = "asis", eval = TRUE, echo = FALSE, warning = FALSE, message = FALSE} - -pander.table <- function(item.list, key) { - # Print table for each item with frequency, percentage and the - # item-rest correlation. Correct answers are highlighted in bold - # - # Args: - # item.list: list with one entry per item. Each entry consists of a data frame - # Data frame has a row for every answer option + one for missing. - # Columns are: Answer option, frequency, percentage, IRC, - # and Correct (answer option is Correct/Incorrect) - # Length should be the same as number of columns of key - # key: Matrix of 0's and 1's. key[i,j] implies wether answer option i - # to item j is right (1) or wrong (0). If a column (item) consists of - # only 0s, the item is interpreted as graded manually. - # Number of columns should be equal to length of item.list - - panderOptions("knitr.auto.asis", FALSE) - for (i in 1:length(item.list)) { - if (any(key[, i] == 1)) { - item.list[[i]]$Correct <- revalue(item.list[[i]]$Correct, - c("Correct" = "X", "Incorrect" = "")) - emphasize.strong.rows(which(key[, i] == 1, arr.ind = TRUE)) - pander(item.list[i]) - } else { - t <- item.list[i] - pander(t) - } - } -} - -pander.table(item.list, key) -``` - -\pagebreak - -```{r, echo = FALSE, message = FALSE, warning = FALSE, fig.width = 11, results = 'asis'} - -CreateDf <- function(item.names, correct.percentage, corrected.item.tot.cor) { - # Creates a data frame with the overall item statistics including a colour - # indicator for the percentage correct and IRC. Colours is defined below. - # - # Args: - # These are the same as defined in CreateItemList - # - # Returns: - # Data frame with a row for every item. Columns consist of the item name, - # percentage correct, colour indicator for percentage, IRC and colour - # indicator for the IRC. - - correct.df <- data.frame(item = factor(item.names, item.names), - correct.percentage, - perc.col = ifelse(correct.percentage < 40, 0, - ifelse(correct.percentage > 70, 1, - 1 / 75 * correct.percentage - 7 / 30)), - - corrected.item.tot.cor, - irc.col = ifelse(corrected.item.tot.cor < 0, 0, - ifelse(corrected.item.tot.cor > .05, 1, - 8 * corrected.item.tot.cor + 3 / 10)) - ) - - return(correct.df) -} - -PercPlotOverall <- function(correct.df) { - # Plots bar graph of percentage correct of every item. Bars are coloured - # depending on the percentage correct: - # < 40: Red - # 40 - 70: Orange till green-ish - # > 70: Green - # - # Args: - # correct.df: data frame with item name, percentage correct and colour code - # for the percentages. - # - # Returns: - # Explanation of the plot and the plot itself - - cat(" - -## Percentage Correct - -Plot of percentage correct per item. If *more than 70%* of the students answer -an item correctly, the students have mastered the material well enough, thus it -is coloured *green*. If *less than 40%* of the students answer an item -correctly, the students have not mastered the material, and is coloured *red*. -*Orange* indicates the percentage correct lies *between 40% and 70%*. The -colours are based on multiple choice items with 3 or 4 answer options. - -") - - bar.freq <- ggplot(correct.df, aes(item, correct.percentage, fill = perc.col)) - bar.freq <- bar.freq + geom_bar(stat = "identity") + # Create Bar chart - scale_fill_gradient(low = "red", high = "green", limits = c(0, 1), - guide = FALSE) + # Create colour boundray - coord_cartesian(ylim = c(0, 100)) + # Change y-axis limit to constant - labs(x = "Item", y = "Percentage Correct") + # x-axis name - theme_bw() + # Black and white - theme(axis.text.x = element_text(size = 8, angle = 90), # Change font size - axis.ticks.x = element_line(size = .4)) - - suppressWarnings(print(bar.freq)) -} - -IRCPlotOverall <- function(correct.df) { - # Plots bar graph of IRC of every item. Bars are coloured by IRC: - # < 0: Red - # 0 - .10: Orange till green-ish - # > .10: green - # - # Args: - # correct.df: data frame with item name, percentage correct and colour code - # for the percentages. - # - # Returns: - # Explanation of the plot and the plot itself - - cat("\\pagebreak - -## Item rest correlations - -Plot of Item Rest Correlations per item: The correlation between an item (0 for -wrong answers and 1 for right answers) and the sum of all other items. Items -with a correlation *higher than .05* indicate that students who answered this -item correctly answered the other items more often correctly than students who -answered this item incorrectly. Thus it is coloured *green*. Items with a -correlation *lower than 0* indicate no relationship between this item and the -other items, or that students who answered this item correctly answered the -other items more often incorrectly than students who answered this incorrectly, -thus it is coloured *red*. *Between 0 and .05*, an item is coloured *orange*. - -") - - # Create bar chart - bar.IRC <- ggplot(correct.df, aes(item, corrected.item.tot.cor, - fill = irc.col)) - bar.IRC <- bar.IRC + geom_bar(stat = "identity") + # Create Bar chart - scale_fill_gradient(low = "red", high = "green", limits = c(0, 1), - guide = FALSE) + - theme_bw() + - labs(x = "Item", y = "Item Rest Correlation") + # axes names - theme(strip.text.x = element_text(size = 7), - axis.text.x = element_text(size = 8, angle = 90), - axis.ticks.x = element_line(size = .4)) - - suppressWarnings(print(bar.IRC)) -} - -correct.df <- CreateDf(item.names, correct.percentage, corrected.item.tot.cor) -PercPlotOverall(correct.df) -IRCPlotOverall(correct.df) -``` - -```{r, echo = FALSE, eval = TRUE, warning= FALSE, fig.width = 11} - -# Create extra variables for the colours in the bar plots -PercCol <- function(percentage, correct) { - # Calculate colours for the percentage answer options. - # Args: - # percentage: Percentage answered per answer options - # correct: Vector of "Correct" and "Incorrect" of same length of percentage - # - # Returns: - # Value between 0 and 1. 0 corresponds to red and 1 to green in the plots. - # For the correct options: - # < 40: red - # 40 - 70: organge - green-ish - # > 70: green - # For the incorrect options: difference between correct answer option and - # incorrect answer options are calculated. Colours are based on difference: - # < -10: Green - # -10 - 0: orange - green-ish - # > 0: green - - perc.col <- vector("numeric", length = length(correct)) - perc.col[correct == "Correct"] <- ifelse(percentage[correct == "Correct"] - < 40, 0, ifelse(percentage[correct == "Correct"] > 70, 1, - 1 / 75 * percentage[correct == "Correct"] - 7 / 30)) - dif <- percentage[correct == "Incorrect"] - - min(percentage[correct == "Correct"]) - perc.col[correct == "Incorrect"] <- ifelse(dif >= 0, 0, - ifelse(dif < -10, 1, - -1 / 25 * dif + 3 / 10)) - perc.col -} - -IrcCol <- function(IRC, correct){ - # Calculate colours for the percentage answer options. - # - # Args: - # IRC: IRC per answer options. Same length as correct - # correct: Vector of "Correct" and "Incorrect" of same length of IRC - # - # Returns: - # Value between 0 and 1. 0 corresponds to red and 1 to green in the plots. - # For the correct answer options: - # < 0: red - # 0 - 0.05: orange - green-ish - # > 0.05: green - # For the incorrect answer options: difference between correct answer option - # and incorrect answer option are calculated. Colours are based on this - # difference: - # > 0: red - # -0.05 - 0: orange - green-ish - # < -0.05: red - - IRC.col <- numeric() - IRC.col[correct == "Correct"] <- ifelse(IRC[correct == "Correct"] < 0, 0, - ifelse(IRC[correct == "Correct"] > .05, 1, - 8 * IRC[correct == "Correct"] + 3 / 10)) - dif <- IRC[correct == "Incorrect"] - min(IRC[correct == "Correct"]) - IRC.col[correct == "Incorrect"] <- ifelse(dif >= 0, 0, - ifelse(dif < -.05, 1, -8 * dif + 3 / 10)) - return(IRC.col) -} - -CreateAnsDf <- function(item.list, key, number.answeroptions, item.names) { - # Creates a data frame with the statistics per answer option including a - # colour indicator for the percentage correct and IRC. - # - # Args: - # These are the same as defined earlier - # - # Returns: - # Data frame with a row for every answer option. Columns consist of the - # item name, answer options, frequency, percentage, IRC, Correct (whether - # an option is correct/incorrect), colour for percentage and colour for IRC - - for (i in 1:length(item.list)) { - if (any(key[, i] != 0)) { - item.list[[i]]$perc.col <- with(item.list[[i]], - PercCol(Percentage, Correct)) - item.list[[i]]$IRC.col <- with(item.list[[i]], - IrcCol(IRC, Correct)) - } - } - ans.opt.df <- ldply(item.list[number.answeroptions != 0], data.frame) - names(ans.opt.df)[1] <- "id" - ans.opt.df$Answer.Option <- gsub("Missing", "Mi", ans.opt.df$Answer.Option) - ans.opt.df$Answer.Option <- factor(ans.opt.df$Answer.Option, - levels = c(LETTERS[1:max(number.answeroptions)], "Mi")) - ans.opt.df$id <- factor(ans.opt.df$id, - levels = item.names[number.answeroptions != 0]) - if (all(key == 0)) { - ans.opt.df <- NULL - } - - ans.opt.df -} - -NumberOfPlots <- function(ans.opt.df, key) { - # Calculates which item is displayed in which plot. The total number of - # answer options is too large (usually) to display in one plot. - # - # Args: - # ans.opt.df: As defined in function above - # - # Returns: - # n.plots: the number of plots - # items.in.plot: list entry per plot. Every entry consists of item names - # in that plot - - if (any(key != 0)) { - max.answ.opts <- 80 # Maximum number of bars per plot - tot.answ.opts <- nrow(ans.opt.df) - n.plots <- ceiling(tot.answ.opts / max.answ.opts) - answ.opts.per.plot <- floor(tot.answ.opts / n.plots) - - items.in.plot <- list(unique(ans.opt.df[1:answ.opts.per.plot, 1])) - if (n.plots > 1) { - for (i in 2:n.plots) { - items.in.plot[[i]] <- unique(ans.opt.df[ (((i - 1) * answ.opts.per.plot) - + 1): (i * answ.opts.per.plot), 1]) - if (items.in.plot[[i]][1] %in% items.in.plot[[i - 1]]) { - items.in.plot[[i]] <- items.in.plot[[i]][-1] - } - } - } - assign("n.plots", n.plots, envir = globalenv()) - assign("items.in.plot", items.in.plot, envir = globalenv()) - - } -} - -ans.opt.df <- CreateAnsDf(item.list, key, number.answeroptions, item.names) -NumberOfPlots(ans.opt.df, key) - -``` - -```{r, echo = FALSE, eval = TRUE, message = FALSE, warning = FALSE, fig.width = 11, results= 'asis'} - -PercPlotAnswOpt <- function(ans.opt.df, items.in.plot, key) { - # Plots bar graph(s) of the percentages of every answer option - # - # Args: - # All defined as before - # - # Returns: - # Description of how to read the plots - # One or multiple plots - - if (any(key != 0)) { - - cat("\\pagebreak - -## Percentage chosen per answer options - -Percentage plot of all answer options per item. **White** bars represent the -**right** answer option(s) and **black** the **wrong** options. - -The border represents the performance of an item. The **right** answer options -are coloured *green* if *more than 70%* of the students choose this option. In -that case, the students have mastered the material. They are coloured *red* if -*less than 40%* choose this option. *Between 40% and 70%*, it is coloured -*orange*. - -The **wrong** answer options are coloured *green* if an option is chosen *10 -percentage point less than the right option*. In that case, more students chose -the right answer option than the wrong option indicating that students are not -confused by the wrong option. They are coloured *red* if an option is chosen -*equally or more often than the right option*. In this case, students pick the -wrong answer option often, indicating that this option may be right as well. It -is coloured *orange* in between these two. - -") - - for (i in 1:length(items.in.plot)) { - bar.freq1 <- ggplot(ans.opt.df[ans.opt.df$id %in% items.in.plot[[i]], ], - aes("Answer Option", Percentage, fill = Correct, colour = perc.col)) - bar.freq1 <- bar.freq1 + geom_bar(aes(x = Answer.Option), - stat = "identity", size = 1.5) + - facet_grid(. ~ id, scales = "free_x", space = "free_x") + # Plot items - scale_fill_manual(values = c("Incorrect" = "Black", # Fill in the bars - "Correct" = "White"), guide = FALSE) + - scale_colour_gradient(low = "red", high = "green", limits = c(0, 1), - guide = FALSE) + # Create colour boundary - coord_cartesian(ylim = c(0, 100)) + # Change y-axis limit to constant - labs(x = "Answer Options") + # x axis name - theme_bw() + - theme(axis.text.x = element_text(size = 8), # Font sizes - axis.text.y = element_text(size = 12), - axis.title.x = element_text(size = 15), - axis.title.y = element_text(size = 15)) - - suppressWarnings(print(bar.freq1)) - } - } -} - -IRCPlotAnswOpt <- function(ans.opt.df, items.in.plot, key) { - # Plots bar graph(s) of the IRC of every answer option - # - # Args: - # All defined as before - # - # Returns: - # Description of how to read the plots - # One or multiple plots - - if (any(key != 0)) { - - cat('\\pagebreak - -## Item rest correlations per answer option - -Item rest correlation (IRC) plot of all answer options per item The IRC is the -correlation between an answer option (1 for students who chose this options and -0 for student who did not) and the sumscore on all other items. **White** bars -represent the **right** answer option(s) and **black** the **wrong** options. - -The border represents the performance of an item. The **right** answer options -are coloured *green* if the IRC is *higher than .05*. In that case, students who -picked the right answer option scored higher on all other items than student who -did not pick the right option. They are coloured *red* if the IRC is *less than -0*. In that case, students who picked the right answer option scored equally or -worse on the other items than students who picked the other options. Between *0 -and .05*, the option is coloured *orange*. - -The **wrong** answer options are coloured *green* if the IRC is *more than .1 -lower than the right option*. In that case, students who answered the other -items incorrectly, usually answered this item incorrectly as well. They are -coloured *red* if the IRC is *equal or higher than the right answer option*. -In that case, students who answered the other items correctly may have answered -this item incorrectly. In between these two, the option is coloured *orange*. - -') - - # Creating IRC plots - for (i in 1:length(items.in.plot)) { - # Select items and variables to plot - bar.IRC1 <- ggplot(ans.opt.df[ans.opt.df$id %in% items.in.plot[[i]], ], - aes("Answer Option", IRC, fill = Correct, colour = IRC.col)) - bar.IRC1 <- bar.IRC1 + geom_bar(aes(x = Answer.Option), stat = "identity", - size = 1.5) + # Create Bar chart - facet_grid(. ~ id, scales = "free_x", space = "free_x") + - scale_fill_manual(values = c("Incorrect" = "Black", "Correct" = "White"), - guide = FALSE) + # Fill in the bars - scale_colour_gradient(low = "red", high = "green", limits = c(0, 1), - guide = FALSE) + # Create colour boundray - # Change y-axis limit to either the maximum and minimum IRC - coord_cartesian(ylim = c(min(ans.opt.df$IRC) - .01, - max(ans.opt.df$IRC) + .01)) + - labs(x = "Answer Options") + - theme_bw() + - theme(axis.text.x = element_text(size = 8), # Font sizes - axis.text.y = element_text(size = 12), - axis.title.x = element_text(size = 15), - axis.title.y = element_text(size = 15) - ) - suppressWarnings(print(bar.IRC1)) - } - } -} - -IRCPlotAnswOpt(ans.opt.df, items.in.plot, key) -PercPlotAnswOpt(ans.opt.df, items.in.plot, key) -``` - -\pagebreak - -## Cumulative Distribution - -Cumulative Distribution showing all possible scores and the percentage of -students that scored that number or higher - -```{r, echo = FALSE, eval = TRUE, message = FALSE, warning = FALSE, fig.width = 11} -CumHist <- function(student.scores, n.item) { - # Create a reversed cumulative histogram, i.e. the height of score x is - # the percentage of students who scores x or higher. - # - # Args: - # student.scores: numeric vector of test score per student. - # n.item: number of items - # - # Returns: - # Reversed cumulative histogram plot - - h <- heights <- length(student.scores) - n <- sort(unique(student.scores)) - - for (i in 1:length(n)) { - h <- h - length(student.scores[student.scores == n[i]]) - heights <- c(heights, h) - } - - cesuurlijnen <- seq(0.2, 0.8, 0.2) * length(student.scores) - gridlijnen <- round(seq(0, length(student.scores), - length(student.scores) / 25), 0) - - gridlijnen <- gridlijnen[-grep(paste(round(cesuurlijnen, 0), - collapse = "|"), gridlijnen)] - - heights <- heights[1:length(unique(student.scores))] - percent <- heights / length(student.scores) * 100 - df <- data.frame(score = sort(unique(student.scores)), - total = heights, percent = round(percent, 2)) - - vec <- df[1, 1]:df[nrow(df), 1] # save all possible total scores - - # Find if a total score is missing and add it - mis <- NA - if (sum(! (vec %in% df[, 1]) != 0)) { - mis <- vec[! (vec %in% df[, 1])] - } - if (!is.na(mis[1])) { - for (i in 1:length(mis)) { - df[ (nrow(df) + 1), ] <- c(mis[i], 0, 0) - } - df <- df[order(df$score), ] - } - - - # if no one gets a certain total score, replace the 0 with the next total score - suppressWarnings( - df[which(df[, 2] == 0, arr.ind = TRUE), 2] <- df[ (which(df[, 2] == 0, - arr.ind = TRUE) + 1), 2] - ) - suppressWarnings( - df[which(df[, 3] == 0, arr.ind = TRUE), 3] <- df[ (which(df[, 3] == 0, - arr.ind = TRUE) + 1), 3] - ) - - g <- ggplot(data = df, aes(x = score, y = total)) + - geom_hline(yintercept = c(gridlijnen), linetype = "solid", - colour = "gray80") + - geom_hline(yintercept = cesuurlijnen, linetype = "dashed", size = 0.75, - colour = "black") + - geom_histogram(stat = "identity", binwidth = 1) + - xlab("Total score") + ylab("% of students with score or higher") + - scale_y_continuous(breaks = c(0, cesuurlijnen, length(student.scores)), - labels = c(seq(0, 100, 20))) + - scale_x_continuous(limits = c(df[1, 1] - 1, n.item + 1), breaks = 0:n.item, - expand = c(0, 0)) + - theme_bw() + - theme(panel.grid = element_blank()) - - print(g) -} - -CumHist(student.scores, n.item = ncol(key)) -``` - -```{r, echo = FALSE, eval = TRUE, warnings = FALSE, message = FALSE, results = 'asis', fig.width = 11} -## Do the whole script above for every sub category -if (!length(categories) == 0) { - cat("\n\n") - cat("\\pagebreak") - cat("\n\n") - cat("# Analysis for subcategories") - cat("\n\n") - - for (i in 1:length(categories)) { - items <- categories[[i]]$items - if (i != 1) { - cat("\\pagebreak") - cat("\n\n") - } - cat("\n\n") - cat(paste("## Category:", categories[[i]]$name )) - cat("\n\n") - - item.list <- CreateItemList(number.answeroptions[items], - correct.frequency[items], correct.percentage[items], - frequency.answer.options[, items], percentage.answer.options[, items], - categories[[i]]$corrected.item.tot.cor, - categories[[i]]$corrected.item.tot.cor.answ.option, - item.names[items], key[, items]) - - correct.df <- CreateDf(item.names[items], correct.percentage[items], - categories[[i]]$corrected.item.tot.cor) - - PercPlotOverall(correct.df) - IRCPlotOverall(correct.df) - - if (any(key[, items] != 0)) { - ans.opt.df <- CreateAnsDf(item.list, key[, items], - number.answeroptions[items], item.names[items]) - NumberOfPlots(ans.opt.df, key[, items]) - PercPlotAnswOpt(ans.opt.df, items.in.plot, key[, items]) - IRCPlotAnswOpt(ans.opt.df, items.in.plot, key[, items]) - } - - cat("\\pagebreak") - cat("\n\n") - cat("## Cumulative Distribution") - cat("\n\n") - CumHist(categories[[i]]$student.scores, n.item = length(items)) - } -} -``` \ No newline at end of file +--- +html_document: null +fig_width: 11 +classoption: landscape +keep_md: yes +output: html_document +pdf_document: default +--- + +```{r, echo = FALSE, warning = FALSE, message = FALSE} +library(ggplot2) +library(pander) +library(plyr) +``` + + +```{r, echo = FALSE, eval = TRUE, results = "asis"} + +create_item_list <- function(number_answeroptions, + correct_frequency, + correct_percentage, + frequency_answer_options, + percentage_answer_options, + corrected_item_tot_cor, + corrected_item_tot_cor_answ_option, + item_names, + key) { + # Creates a list with frequency, percentage correct, and IRC for every item. + # + # Args: + # number_answeroptions: Vector with number of answer options per item. + # Length should be at least 3 and the same as + # correct_frequency, correct_percentage, item_names, + # and corrected_item_tot_cor. + # Length should also be the same as the number of + # colums of: frequency_answer_options, + # percentage_answer_options, corrected_item_tot_cor, + # corrected_item_tot_cor_answ_option, and key + 1. + # correct_frequency: Vector with total number correct per item. + # Length requirements: see number_answeroptions. + # correct_percentage: Vector with percentage correct per item. + # Length requirements: see number_answeroptions. + # frequency_answer_options: Matrix with [i,j] the frequency of answer option + # i of item j. Dimension requirements: see key + # + one extra column (for missing). + # percentage_answer_options: Matrix with [i,j] the percentage of + # answer option i of item j. + # Dimension requirements: see key + 1 extra + # column (for missing) + # corrected_item_tot_cor: Vector with item rest correlation per item. + # Length requirements: see number_answeroptions + # corrected_item_tot_cor_answ_option: Matrix with [i,j] the item total + # correlation for answer option i of + # item j. Dimension requirements: + # see key + one extra column. + # key: Matrix of 0's and 1's. key[i,j] indicates whether answer option i to + # item j is right (1) or wrong (0). If a column (item) consists of only + # 0s, the item is interpreted as graded manually. + # Number of columns should be equal to the length of: + # number_answeroptions, correct_frequency, correct_percentage, and + # corrected_item_tot_cor. + # The following objects have the same dimensions as key + 1 extra row: + # frequency_answer_options, percentage_answer_options, and + # corrected_item_tot_cor_answ_option + # + # Returns: + # List with one entry per item. Each entry consists of a data frame with + # frequency, percentage and IRC for the total item and the answer options + # (if the item has answer options) + + item_list <- list() # Creates list to put item output in + colnames1 <- c(" ", "Frequency", "Percentage", "IRC") + colnames2 <- c("Answer Option", "Frequency", "Percentage", "IRC", "Correct") + + # For every item, create entry in list with frequencies, percentages, and IRCs + for (i in 1:length(item_names)) { + if (number_answeroptions[i] > 0) { + correct <- c( + ifelse( + key[1:number_answeroptions[i], i] == 1, + "Correct", + "Incorrect" + ), + "Incorrect" + ) + + item_list[[i]] <- data.frame( + c(LETTERS[1:number_answeroptions[i]], "Missing"), + c(frequency_answer_options[c(2:(number_answeroptions[i] + 1), 1), i]), + c(percentage_answer_options[c(2:(number_answeroptions[i] + 1), 1), i]), + c(corrected_item_tot_cor_answ_option[ + c(2:(number_answeroptions[i] + 1), 1), + i + ]), + correct, + row.names = NULL) + colnames(item_list[[i]]) <- colnames2 + + } else { + item_list[[i]] <- data.frame("Correct", + correct_frequency[i], + correct_percentage[i], + corrected_item_tot_cor[i] + ) + colnames(item_list[[i]]) <- colnames1 + } + } + + names(item_list) <- item_names + return(item_list) +} + +# Use the function defined above +item_list <- create_item_list( + number_answeroptions, + correct_frequency, + correct_percentage, + frequency_answer_options, + percentage_answer_options, + corrected_item_tot_cor, + corrected_item_tot_cor_answ_option, + item_names, + key +) + +cat(paste("#", title)) +``` + +# General + +What | Result | +:---------------------- | :------------- | +Number of students | `r length(student_scores)`| +Number of items | `r length(item_names)`| +Average score | `r round(mean(student_scores), digits = 3)`| +Standard deviation | `r round(sd(student_scores), digits = 3)`| +Cronbach's alpha | `r cronbach`| +Standard error | `r round(sd(student_scores * sqrt(1 - cronbach)), digits = 3)`| + + +# Item statistics + +For each item the frequency, percentage and item rest correlations (IRC) of +every answer option are shown The IRC should be (highly) positive for the +right answer option and low for the wrong answer option(s). + +```{r pander, results = "asis", eval = TRUE, echo = FALSE, warning = FALSE, message = FALSE} + +print_item_tables <- function(item_list, key) { + # Print table for each item with frequency, percentage and the + # item-rest correlation. Correct answers are highlighted in bold. + # + # Args: + # item_list: list with one entry per item. Each entry has a data frame + # Data frame has a row for every answer option + one for missing. + # Columns are: Answer option, frequency, percentage, IRC, + # and Correct (answer option is Correct/Incorrect) + # Number of rows of each data frame has a maximum as the number of + # columns of key + 1. + # key: Matrix of 0's and 1's. key[i,j] implies wether answer option i + # to item j is right (1) or wrong (0). If a column (item) consists of + # only 0s, the item is interpreted as graded manually. + # Number of columns should be equal to length of item_list + + pander::panderOptions("knitr.auto.asis", FALSE) + for (i in 1:length(item_list)) { + if (any(key[, i] == 1)) { + item_list[[i]]$Correct <- revalue( + item_list[[i]]$Correct, + c("Correct" = "X", "Incorrect" = "") + ) + pander::emphasize.strong.rows(which(key[, i] == 1, arr.ind = TRUE)) + pander::pander(item_list[i]) + } else { + t <- item_list[i] + pander::pander(t) + } + } +} + +print_item_tables(item_list, key) +``` + +\pagebreak + +```{r, echo = FALSE, message = FALSE, warning = FALSE, fig.width = 11, results = 'asis'} + +create_df <- function(item_names, correct_percentage, corrected_item_tot_cor) { + # Creates a data frame with the overall item statistics including a colour + # indicator for the percentage correct and IRC. Colours is defined below. + # + # Args: + # These are the same as defined in create_item_list. + # + # Returns: + # Data frame with a row for every item. Columns consist of the item name, + # percentage correct, colour indicator for percentage, IRC and colour + # indicator for the IRC. + + correct_df <- data.frame( + item = factor(item_names, item_names), + correct_percentage, + perc_col = ifelse( + correct_percentage < 40, 0, + ifelse(correct_percentage > 70, 1, 1 / 75 * correct_percentage - 7 / 30) + ), + corrected_item_tot_cor, + irc_col = ifelse( + corrected_item_tot_cor < 0, 0, + ifelse( + corrected_item_tot_cor > .05, + 1, + 8 * corrected_item_tot_cor + 3 / 10) + ) + ) + + return(correct_df) +} + +plot_overall_perc <- function(correct_df) { + # Plots bar graph of percentage correct of every item. Bars are coloured + # depending on the percentage correct: + # < 40: Red + # 40 - 70: Orange till green-ish + # > 70: Green + # + # Args: + # correct_df: data frame with item name, percentage correct and colour code + # for the percentages. + # + # Returns: + # Explanation of the plot and the plot itself + + cat(" + +## Percentage Correct + +Plot of percentage correct per item. If *more than 70%* of the students answer +an item correctly, the students have mastered the material well enough, thus it +is coloured *green*. If *less than 40%* of the students answer an item +correctly, the students have not mastered the material, and is coloured *red*. +*Orange* indicates the percentage correct lies *between 40% and 70%*. The +colours are based on multiple choice items with 3 or 4 answer options. + +") + + bar_freq <- ggplot2::ggplot( + correct_df, + ggplot2::aes(item, correct_percentage, fill = perc_col)) + bar_freq <- + bar_freq + + # Create bar chart + geom_bar(stat = "identity") + + # Creat colour boundary + scale_fill_gradient( + low = "red", + high = "green", + limits = c(0, 1), + guide = FALSE + ) + + # Change y-axis limit to constant + coord_cartesian(ylim = c(0, 100)) + + # Axes names + labs(x = "Item", y = "Percentage Correct") + + # Black en white + theme_bw() + + # Change various sizes + theme( + axis.text.x = element_text(size = 8, angle = 90), + axis.ticks.x = element_line(size = .4) + ) + + suppressWarnings(print(bar_freq)) +} + +plot_overall_irc <- function(correct_df) { + # Plots bar graph of IRC of every item. Bars are coloured by IRC: + # < 0: Red + # 0 - .10: Orange till green-ish + # > .10: green + # + # Args: + # correct_df: data frame with item name, percentage correct and colour code + # for the percentages. + # + # Returns: + # Explanation of the plot and the plot itself + + cat("\\pagebreak + +## Item rest correlations + +Plot of Item Rest Correlations per item: The correlation between an item (0 for +wrong answers and 1 for right answers) and the sum of all other items. Items +with a correlation *higher than .05* indicate that students who answered this +item correctly answered the other items more often correctly than students who +answered this item incorrectly. Thus it is coloured *green*. Items with a +correlation *lower than 0* indicate no relationship between this item and the +other items, or that students who answered this item correctly answered the +other items more often incorrectly than students who answered this incorrectly, +thus it is coloured *red*. *Between 0 and .05*, an item is coloured *orange*. + +") + + # Create bar chart + bar_irc <- ggplot2::ggplot( + correct_df, + ggplot2::aes(item, corrected_item_tot_cor, fill = irc_col)) + bar_irc <- + bar_irc + + geom_bar(stat = "identity") + + scale_fill_gradient( + low = "red", + high = "green", + limits = c(0, 1), + guide = FALSE + ) + + theme_bw() + + labs(x = "Item", y = "Item Rest Correlation") + + theme( + strip.text.x = element_text(size = 7), + axis.text.x = element_text(size = 8, angle = 90), + axis.ticks.x = element_line(size = .4) + ) + + suppressWarnings(print(bar_irc)) +} + +correct_df <- create_df(item_names, correct_percentage, corrected_item_tot_cor) +plot_overall_perc(correct_df) +plot_overall_irc(correct_df) +``` + +```{r, echo = FALSE, eval = TRUE, warning= FALSE, fig.width = 11} + +# Create extra variables for the colours in the bar plots +make_perc_col <- function(percentage, correct) { + # Calculate colours for the percentage answer options. + # + # Args: + # percentage: Percentage answered per answer options + # correct: Vector of "Correct" and "Incorrect" of same length of percentage + # + # Returns: + # Value between 0 and 1. 0 corresponds to red and 1 to green in the plots. + # For the correct options: + # < 40: red + # 40 - 70: orange - green-ish + # > 70: green + # For the incorrect options: difference between correct answer option and + # incorrect answer options are calculated. Colours are based on difference: + # < -10: Green + # -10 - 0: orange - green-ish + # > 0: green + + perc_col <- vector("numeric", length = length(correct)) + perc_col[correct == "Correct"] <- ifelse( + percentage[correct == "Correct"] < 40, + 0, + ifelse( + percentage[correct == "Correct"] > 70, + 1, + 1 / 75 * percentage[correct == "Correct"] - 7 / 30 + ) + ) + dif <- + percentage[correct == "Incorrect"] - min(percentage[correct == "Correct"]) + perc_col[correct == "Incorrect"] <- ifelse( + dif >= 0, + 0, + ifelse(dif < -10, 1, -1 / 25 * dif + 3 / 10) + ) + perc_col +} + +make_irc_col <- function(IRC, correct){ + # Calculate colours for the percentage answer options. + # + # Args: + # IRC: Vector of IRC per answer options. Same length as correct + # correct: Vector of "Correct" and "Incorrect" of same length of IRC + # + # Returns: + # Value between 0 and 1. 0 corresponds to red and 1 to green in the plots. + # For the correct answer options: + # < 0: red + # 0 - 0.05: orange - green-ish + # > 0.05: green + # For the incorrect answer options: difference between correct answer option + # and incorrect answer option are calculated. Colours are based on this + # difference: + # > 0: red + # -0.05 - 0: orange - green-ish + # < -0.05: red + + irc_col <- numeric() + irc_col[correct == "Correct"] <- ifelse( + IRC[correct == "Correct"] < 0, + 0, + ifelse(IRC[correct == "Correct"] > .05, + 1, + 8 * IRC[correct == "Correct"] + 3 / 10 + ) + ) + dif <- IRC[correct == "Incorrect"] - min(IRC[correct == "Correct"]) + irc_col[correct == "Incorrect"] <- ifelse( + dif >= 0, + 0, + ifelse(dif < -.05, 1, -8 * dif + 3 / 10) + ) + return(irc_col) +} + +make_answ_df <- function(item_list, key, number_answeroptions, item_names) { + # Creates a data frame with the statistics per answer option including a + # colour indicator for the percentage correct and IRC. + # + # Args: + # These are the same as defined earlier + # + # Returns: + # Data frame with a row for every answer option. Columns consist of the + # item name, answer options, frequency, percentage, IRC, Correct (whether + # an option is correct/incorrect), colour for percentage and colour for IRC + + for (i in 1:length(item_list)) { + if (any(key[, i] != 0)) { + item_list[[i]]$perc_col <- with( + item_list[[i]], + make_perc_col(Percentage, Correct) + ) + item_list[[i]]$irc_col <- with(item_list[[i]], make_irc_col(IRC, Correct)) + } + } + ans_opt_df <- plyr::ldply(item_list[number_answeroptions != 0], data.frame) + names(ans_opt_df)[1] <- "id" + ans_opt_df$Answer.Option <- gsub("Missing", "Mi", ans_opt_df$Answer.Option) + ans_opt_df$Answer.Option <- factor( + ans_opt_df$Answer.Option, + levels = c(LETTERS[1:max(number_answeroptions)], "Mi") + ) + ans_opt_df$id <- factor( + ans_opt_df$id, + levels = item_names[number_answeroptions != 0] + ) + if (all(key == 0)) { + ans_opt_df <- NULL + } + + ans_opt_df +} + +calc_n_plot <- function(ans_opt_df, key) { + # Calculates which item is displayed in which plot. The total number of + # answer options is too large (usually) to display them all in one plot. + # + # Args: + # ans_opt_df: As defined in function above + # + # Returns: + # n_plots: the number of plots + # items_in_plot: list entry per plot. Every entry consists of item names + # in that plot + + if (any(key != 0)) { + max_answ_opts <- 80 # Maximum number of bars per plot + tot_answ_opts <- nrow(ans_opt_df) + n_plots <- ceiling(tot_answ_opts / max_answ_opts) + answ_opts_per_plot <- floor(tot_answ_opts / n_plots) + + items_in_plot <- list(unique(ans_opt_df[1:answ_opts_per_plot, 1])) + if (n_plots > 1) { + for (i in 2:n_plots) { + items_in_plot[[i]] <- unique( + ans_opt_df[ + (((i - 1) * answ_opts_per_plot) + 1):(i * answ_opts_per_plot), + 1 + ] + ) + if (items_in_plot[[i]][1] %in% items_in_plot[[i - 1]]) { + items_in_plot[[i]] <- items_in_plot[[i]][-1] + } + } + } + assign("items_in_plot", items_in_plot, envir = globalenv()) + } +} + +ans_opt_df <- make_answ_df(item_list, key, number_answeroptions, item_names) +calc_n_plot(ans_opt_df, key) + +``` + +```{r, echo = FALSE, eval = TRUE, message = FALSE, warning = FALSE, fig.width = 11, results= 'asis'} + +plot_perc_answ <- function(ans_opt_df, items_in_plot, key) { + # Plots bar graph(s) of the percentages of every answer option + # + # Args: + # All defined as before + # + # Returns: + # Description of how to read the plots + # One or multiple plots + + if (any(key != 0)) { + + cat("\\pagebreak + +## Percentage chosen per answer options + +Percentage plot of all answer options per item. **White** bars represent the +**right** answer option(s) and **black** the **wrong** options. + +The border represents the performance of an item. The **right** answer options +are coloured *green* if *more than 70%* of the students choose this option. In +that case, the students have mastered the material. They are coloured *red* if +*less than 40%* choose this option. *Between 40% and 70%*, it is coloured +*orange*. + +The **wrong** answer options are coloured *green* if an option is chosen *10 +percentage point less than the right option*. In that case, more students chose +the right answer option than the wrong option indicating that students are not +confused by the wrong option. They are coloured *red* if an option is chosen +*equally or more often than the right option*. In this case, students pick the +wrong answer option often, indicating that this option may be right as well. It +is coloured *orange* in between these two. + +") + + for (i in 1:length(items_in_plot)) { + bar_freq1 <- ggplot2::ggplot( + ans_opt_df[ans_opt_df$id %in% items_in_plot[[i]], ], + ggplot2::aes( + "Answer Option", + Percentage, + fill = Correct, + colour = perc_col) + ) + bar_freq1 <- + bar_freq1 + + geom_bar(aes(x = Answer.Option), stat = "identity", size = 1.5) + + # Plot items + facet_grid(. ~ id, scales = "free_x", space = "free_x") + + # Fill in the bars + scale_fill_manual( + values = c("Incorrect" = "Black", "Correct" = "White"), + guide = FALSE + ) + + # Colour the bandaries + scale_colour_gradient( + low = "red", + high = "green", + limits = c(0, 1), + guide = FALSE + ) + + # Change y-axis limits to constants + coord_cartesian(ylim = c(0, 100)) + + # Names x-axis + labs(x = "Answer Options") + + theme_bw() + + # Change various font sizes + theme( + axis.text.x = element_text(size = 8), + axis.text.y = element_text(size = 12), + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15) + ) + + suppressWarnings(print(bar_freq1)) + } + } +} + +plot_irc_answ <- function(ans_opt_df, items_in_plot, key) { + # Plots bar graph(s) of the IRC of every answer option + # + # Args: + # All defined as before + # + # Returns: + # Description of how to read the plots + # One or multiple plots + + if (any(key != 0)) { + + cat('\\pagebreak + +## Item rest correlations per answer option + +Item rest correlation (IRC) plot of all answer options per item. The IRC is the +correlation between an answer option (1 for students who chose this options and +0 for student who did not) and the sumscore on all other items. **White** bars +represent the **right** answer option(s) and **black** the **wrong** options. + +The border represents the performance of an item. The **right** answer options +are coloured *green* if the IRC is *higher than .05*. In that case, students who +picked the right answer option scored higher on all other items than student who +did not pick the right option. They are coloured *red* if the IRC is *less than +0*. In that case, students who picked the right answer option scored equally or +worse on the other items than students who picked the other options. Between *0 +and .05*, the option is coloured *orange*. + +The **wrong** answer options are coloured *green* if the IRC is *more than .1 +lower than the right option*. In that case, students who answered the other +items incorrectly, usually answered this item incorrectly as well. They are +coloured *red* if the IRC is *equal or higher than the right answer option*. +In that case, students who answered the other items correctly may have answered +this item incorrectly. In between these two, the option is coloured *orange*. + +') + + # Creating IRC plots + for (i in 1:length(items_in_plot)) { + # Select items and variables to plot + bar_irc1 <- ggplot2::ggplot( + ans_opt_df[ans_opt_df$id %in% items_in_plot[[i]], ], + ggplot2::aes("Answer Option", IRC, fill = Correct, colour = irc_col) + ) + bar_irc1 <- + bar_irc1 + + geom_bar(aes(x = Answer.Option), stat = "identity", size = 1.5) + + facet_grid(. ~ id, scales = "free_x", space = "free_x") + + scale_fill_manual( + values = c("Incorrect" = "Black", "Correct" = "White"), + guide = FALSE + ) + + scale_colour_gradient( + low = "red", + high = "green", + limits = c(0, 1), + guide = FALSE + ) + + # Change y-axis limit to either the maximum and minimum IRC + coord_cartesian( + ylim = c(min(ans_opt_df$IRC) - .01, max(ans_opt_df$IRC) + .01) + ) + + labs(x = "Answer Options") + + theme_bw() + + theme( + axis.text.x = element_text(size = 8), + axis.text.y = element_text(size = 12), + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15) + ) + suppressWarnings(print(bar_irc1)) + } + } +} + +plot_irc_answ(ans_opt_df, items_in_plot, key) +plot_perc_answ(ans_opt_df, items_in_plot, key) +``` + +\pagebreak + +## Cumulative Distribution + +Cumulative Distribution showing all possible scores and the percentage of +students that scored that number or higher + +```{r, echo = FALSE, eval = TRUE, message = FALSE, warning = FALSE, fig.width = 11} +CumHist <- function(student_scores, n_item) { + # Create a reversed cumulative histogram, i.e. the height of score x is + # the percentage of students who scores x or higher. + # + # Args: + # student_scores: numeric vector of test score per student. + # n_item: number of items + # + # Returns: + # Reversed cumulative histogram plot + + h <- heights <- length(student_scores) + n <- sort(unique(student_scores)) + + for (i in 1:length(n)) { + h <- h - length(student_scores[student_scores == n[i]]) + heights <- c(heights, h) + } + + cesuurlijnen <- seq(0.2, 0.8, 0.2) * length(student_scores) + gridlijnen <- round( + seq(0, length(student_scores), length(student_scores) / 25), + 0 + ) + + gridlijnen <- gridlijnen[ + -grep(paste(round(cesuurlijnen, 0), collapse = "|"), gridlijnen) + ] + + heights <- heights[1:length(unique(student_scores))] + percent <- heights / length(student_scores) * 100 + df <- data.frame( + score = sort(unique(student_scores)), + total = heights, + percent = round(percent, 2) + ) + + vec <- df[1, 1]:df[nrow(df), 1] # save all possible total scores + + # Find if a total score is missing and add it + mis <- NA + if (sum(! (vec %in% df[, 1]) != 0)) { + mis <- vec[! (vec %in% df[, 1])] + } + if (! is.na(mis[1])) { + for (i in 1:length(mis)) { + df[ (nrow(df) + 1), ] <- c(mis[i], 0, 0) + } + df <- df[order(df$score), ] + } + + + # if no one gets a certain total score, + # replace the 0 with the next total score + suppressWarnings( + df[which(df[, 2] == 0, arr.ind = TRUE), 2] <- + df[(which(df[, 2] == 0, arr.ind = TRUE) + 1), 2] + ) + suppressWarnings( + df[which(df[, 3] == 0, arr.ind = TRUE), 3] <- + df[(which(df[, 3] == 0, arr.ind = TRUE) + 1), 3] + ) + + g <- + ggplot2::ggplot( + data = df, + ggplot2::aes(x = score, y = total) + ) + + geom_hline( + yintercept = c(gridlijnen), + linetype = "solid", + colour = "gray80" + ) + + geom_hline( + yintercept = cesuurlijnen, + linetype = "dashed", + size = 0.75, + colour = "black" + ) + + geom_histogram(stat = "identity", binwidth = 1) + + xlab("Total score") + + ylab("% of students with score or higher") + + scale_y_continuous( + breaks = c(0, cesuurlijnen, length(student_scores)), + labels = c(seq(0, 100, 20)) + ) + + scale_x_continuous( + limits = c(df[1, 1] - 1, n_item + 1), + breaks = 0:n_item, + expand = c(0, 0) + ) + + theme_bw() + + theme(panel.grid = element_blank()) + + print(g) +} + +plot_cum_hist(student_scores, n_item = ncol(key)) +``` + +```{r, echo = FALSE, eval = TRUE, warnings = FALSE, message = FALSE, results = 'asis', fig.width = 11} +## Do the whole script above for every sub category +if (!length(categories) == 0) { + cat("\n\n") + cat("\\pagebreak") + cat("\n\n") + cat("# Analysis for subcategories") + cat("\n\n") + + for (i in 1:length(categories)) { + items <- categories[[i]]$items + if (i != 1) { + cat("\\pagebreak") + cat("\n\n") + } + cat("\n\n") + cat(paste("## Category:", categories[[i]]$name )) + cat("\n\n") + + item_list <- create_item_list( + number_answeroptions[items], + correct_frequency[items], + correct_percentage[items], + frequency_answer_options[, items], + percentage_answer_options[, items], + categories[[i]]$corrected_item_tot_cor, + categories[[i]]$corrected_item_tot_cor_answ_option, + item_names[items], + key[, items] + ) + + correct_df <- create_df( + item_names[items], + correct_percentage[items], + categories[[i]]$corrected_item_tot_cor + ) + + plot_overall_perc(correct_df) + plot_overall_irc(correct_df) + + if (any(key[, items] != 0)) { + ans_opt_df <- make_answ_df( + item_list, + key[, items], + number_answeroptions[items], + item_names[items] + ) + calc_n_plot(ans_opt_df, key[, items]) + plot_perc_answ(ans_opt_df, items_in_plot, key[, items]) + plot_irc_answ(ans_opt_df, items_in_plot, key[, items]) + } + + cat("\\pagebreak") + cat("\n\n") + cat("## Cumulative Distribution") + cat("\n\n") + plot_cum_hist(categories[[i]]$student_scores, n_item = length(items)) + } +} +``` From 71aa1846ca842e3e3e49989bfd67b75439b19f55 Mon Sep 17 00:00:00 2001 From: Mark van Driel Date: Thu, 11 Aug 2016 14:04:18 +0200 Subject: [PATCH 04/21] Changed order of parameters to be consistent with previous version of the report script --- app/Lib/Rscripts/report.R | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/app/Lib/Rscripts/report.R b/app/Lib/Rscripts/report.R index 02fc318..119ac64 100644 --- a/app/Lib/Rscripts/report.R +++ b/app/Lib/Rscripts/report.R @@ -1,10 +1,16 @@ -report <- function(filename, title, number_answeroptions, key, item_names, - correct_frequency, correct_percentage, - frequency_answer_options, percentage_answer_options, - corrected_item_tot_cor, corrected_item_tot_cor_answ_option, - cronbach, student_scores, categories) { - rmarkdown::render("report_subcategories.Rmd", output_format = "html_document", - output_file = paste0(filename, ".html")) - rmarkdown::render("report_subcategories.Rmd", output_format = "pdf_document", - output_file = paste0(filename, ".pdf")) -} \ No newline at end of file +report <- function(filename, + number_answeroptions, + cronbach, + frequency_answer_options, + percentage_answer_options, + key, + correct_frequency, + correct_percentage, + corrected_item_tot_cor, + corrected_item_tot_cor_answ_option, + title, + item_names, + student_scores, + categories) { + rmarkdown::render("report.Rmd", output_format = "pdf_document", output_file = filename) +} From cfb48e6e4dadd1635183471f0db573c8594651b6 Mon Sep 17 00:00:00 2001 From: Mark van Driel Date: Thu, 11 Aug 2016 14:24:19 +0200 Subject: [PATCH 05/21] Updated call to new report script --- app/Model/Exam.php | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/app/Model/Exam.php b/app/Model/Exam.php index 5596132..40993f8 100644 --- a/app/Model/Exam.php +++ b/app/Model/Exam.php @@ -589,12 +589,6 @@ protected function _report($exam) { } } - // Create the input_correct matrix (with given dimensions) by filling it with a vector (by row) - $script[] = sprintf( - 'input_correct = matrix( c( %s ), number_students, number_questions, byrow = TRUE );', - implode(',', $inputCorrectMatrix) - ); - $keyMatrix = array(); foreach ($exam['Item'] as $i => $item) { @@ -613,11 +607,15 @@ protected function _report($exam) { implode(',', $keyMatrix) ); + $script[] = 'student_scores = c( );'; + + $script[] = 'categories = c( );'; + $script[] = sprintf( 'report( ' . - '"%s", number_students, number_answeroptions, number_questions, Cronbach, frequency_answer_options, ' . - 'percentage_answer_options, input_correct, key, correct_frequency, correct_percentage, ' . - 'corrected_item_tot_cor, corrected_item_tot_cor_answ_option, "%s", item_names' . + '"%s", number_answeroptions, Cronbach, frequency_answer_options, percentage_answer_options, key, ' . + 'correct_frequency, correct_percentage, corrected_item_tot_cor, corrected_item_tot_cor_answ_option, "%s", ' . + 'item_names, student_scores, categories' . ' );', $tempFile, $exam['Exam']['name'] ); From afcb095c72b0927988857b78be91b02263ee2cf1 Mon Sep 17 00:00:00 2001 From: Mark van Driel Date: Thu, 11 Aug 2016 14:58:28 +0200 Subject: [PATCH 06/21] Updated call to new report script --- app/Test/Case/Lib/RserveTest.php | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/app/Test/Case/Lib/RserveTest.php b/app/Test/Case/Lib/RserveTest.php index 0197488..1d148ce 100644 --- a/app/Test/Case/Lib/RserveTest.php +++ b/app/Test/Case/Lib/RserveTest.php @@ -205,14 +205,13 @@ public function testExecuteRscriptsReport() { $script = array(); $script[] = file_get_contents(APP . 'Lib' . DS . 'Rscripts' . DS . 'report.R'); - $script[] = sprintf('file_name = "%s";', $filename); + $script[] = sprintf('filename = "%s";', $filename); $script[] = 'number_students = 2;'; $script[] = 'number_answeroptions = c( 3, 3, 3 );'; $script[] = 'number_questions = 3;'; $script[] = 'cronbach = 0.5;'; $script[] = 'frequency_answer_options = matrix( c( 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0 ), nrow = 4, ncol = number_questions, byrow = FALSE );'; $script[] = 'percentage_answer_options = matrix( c( 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0 ), nrow = 4, ncol = number_questions, byrow = FALSE );'; - $script[] = 'input_correct = matrix( c( 0, 0, 0, 0, 0, 0 ), nrow = number_students, ncol = number_questions, byrow = TRUE );'; $script[] = 'key = matrix( c( 1, 0, 0, 1, 0, 0, 1, 0, 0 ), nrow = 3, ncol = number_questions, byrow = FALSE );'; $script[] = 'correct_frequency = c( 0, 0, 0 );'; $script[] = 'correct_percentage = c( 0, 0, 0 );'; @@ -220,12 +219,10 @@ public function testExecuteRscriptsReport() { $script[] = 'corrected_item_tot_cor_answ_option = matrix( c( 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0 ), nrow = 4, ncol = number_questions, byrow = FALSE );'; $script[] = 'title = "Test Title";'; $script[] = 'item_names = c( "Item 1", "Item 2", "Item 3" );'; - $script[] = 'report( file_name, number_students, number_answeroptions, number_questions, cronbach, - frequency_answer_options, percentage_answer_options, - input_correct, key, correct_frequency, - correct_percentage, corrected_item_tot_cor, - corrected_item_tot_cor_answ_option, title, - item_names);'; + $script[] = 'student_scores = c( );'; + $script[] = 'categories = c( );'; + $script[] = 'report( filename, number_answeroptions, cronbach, frequency_answer_options, percentage_answer_options, key, correct_frequency, + correct_percentage, corrected_item_tot_cor, corrected_item_tot_cor_answ_option, title, item_names, student_scores, categories );'; $script = implode("\n", $script); $result = $this->Rserve->execute($script); From d7d2a1674dafc86eb7b630abebaa20d513d578d4 Mon Sep 17 00:00:00 2001 From: Mark van Driel Date: Thu, 11 Aug 2016 15:07:59 +0200 Subject: [PATCH 07/21] Added installtion of Pandoc and pander package to Travis configuration --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index 6e9e761..313a56f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -40,6 +40,7 @@ before_script: - sudo apt-get install -y --force-yes r-base r-cran-rserve - sudo apt-get install -y --force-yes libcurl4-gnutls-dev - sudo apt-get install -y --force-yes libxml2-dev + - sudo apt-get install -y --force-yes pandoc - sudo mkdir -p ~/R/Library - sudo echo 'R_LIBS=~/R/Library' > ~/.Renviron - sudo echo 'options(repos = "http://cran.rstudio.com")' > ~/.Rprofile @@ -48,6 +49,7 @@ before_script: - sudo Rscript -e 'if(!"gplots" %in% rownames(installed.packages())) { install.packages("gplots", dependencies = TRUE) }' - sudo Rscript -e 'if(!"ggplot2" %in% rownames(installed.packages())) { install.packages("ggplot2", dependencies = TRUE) }' - sudo Rscript -e 'if(!"gridExtra" %in% rownames(installed.packages())) { install.packages("gridExtra", dependencies = TRUE) }' + - sudo Rscript -e 'if(!"pander" %in% rownames(installed.packages())) { install.packages("pander", dependencies = TRUE) }' - sudo Rscript -e 'if(!"psy" %in% rownames(installed.packages())) { install.packages("psy", dependencies = TRUE) }' - sudo Rscript -e 'update.packages(ask = FALSE, instlib = "~/R/Library")' - sudo R CMD Rserve From 60f1325a44bcf76641255fecc797ce30cea0e785 Mon Sep 17 00:00:00 2001 From: Mark van Driel Date: Thu, 11 Aug 2016 15:25:06 +0200 Subject: [PATCH 08/21] Updated Travis configuration to install Pandoc version 1.12.3 or higher --- .travis.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 313a56f..b808069 100644 --- a/.travis.yml +++ b/.travis.yml @@ -40,7 +40,10 @@ before_script: - sudo apt-get install -y --force-yes r-base r-cran-rserve - sudo apt-get install -y --force-yes libcurl4-gnutls-dev - sudo apt-get install -y --force-yes libxml2-dev - - sudo apt-get install -y --force-yes pandoc + - sudo apt-get install -y --force-yes ghc + - sudo apt-get install -y --force-yes cabal-install + - sudo cabal update + - sudo cabal install pandoc - sudo mkdir -p ~/R/Library - sudo echo 'R_LIBS=~/R/Library' > ~/.Renviron - sudo echo 'options(repos = "http://cran.rstudio.com")' > ~/.Rprofile From c4ffad4502c78427a04ba6663bbadd9557e68b38 Mon Sep 17 00:00:00 2001 From: Mark van Driel Date: Thu, 11 Aug 2016 15:37:27 +0200 Subject: [PATCH 09/21] Fixed installation of Pandoc --- .travis.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index b808069..080d989 100644 --- a/.travis.yml +++ b/.travis.yml @@ -40,10 +40,8 @@ before_script: - sudo apt-get install -y --force-yes r-base r-cran-rserve - sudo apt-get install -y --force-yes libcurl4-gnutls-dev - sudo apt-get install -y --force-yes libxml2-dev - - sudo apt-get install -y --force-yes ghc - - sudo apt-get install -y --force-yes cabal-install - - sudo cabal update - - sudo cabal install pandoc + wget https://github.com/jgm/pandoc/releases/download/1.17.2/pandoc-1.17.2-1-amd64.deb + - sudo dpkg -i pandoc-1.17.2-1-amd64.deb - sudo mkdir -p ~/R/Library - sudo echo 'R_LIBS=~/R/Library' > ~/.Renviron - sudo echo 'options(repos = "http://cran.rstudio.com")' > ~/.Rprofile From 8a38448d2b380fd5c2267891dbacfe777f018e4a Mon Sep 17 00:00:00 2001 From: Mark van Driel Date: Thu, 11 Aug 2016 15:44:34 +0200 Subject: [PATCH 10/21] Fixed typo --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 080d989..b5bd556 100644 --- a/.travis.yml +++ b/.travis.yml @@ -40,7 +40,7 @@ before_script: - sudo apt-get install -y --force-yes r-base r-cran-rserve - sudo apt-get install -y --force-yes libcurl4-gnutls-dev - sudo apt-get install -y --force-yes libxml2-dev - wget https://github.com/jgm/pandoc/releases/download/1.17.2/pandoc-1.17.2-1-amd64.deb + - wget https://github.com/jgm/pandoc/releases/download/1.17.2/pandoc-1.17.2-1-amd64.deb - sudo dpkg -i pandoc-1.17.2-1-amd64.deb - sudo mkdir -p ~/R/Library - sudo echo 'R_LIBS=~/R/Library' > ~/.Renviron From 1c0c30c8d9ef854e4d79ac4d36c722b3b1f9314d Mon Sep 17 00:00:00 2001 From: Mark van Driel Date: Thu, 11 Aug 2016 16:56:54 +0200 Subject: [PATCH 11/21] Added student scores to test --- app/Test/Case/Lib/RserveTest.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/Test/Case/Lib/RserveTest.php b/app/Test/Case/Lib/RserveTest.php index 1d148ce..c9f98be 100644 --- a/app/Test/Case/Lib/RserveTest.php +++ b/app/Test/Case/Lib/RserveTest.php @@ -219,7 +219,7 @@ public function testExecuteRscriptsReport() { $script[] = 'corrected_item_tot_cor_answ_option = matrix( c( 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0 ), nrow = 4, ncol = number_questions, byrow = FALSE );'; $script[] = 'title = "Test Title";'; $script[] = 'item_names = c( "Item 1", "Item 2", "Item 3" );'; - $script[] = 'student_scores = c( );'; + $script[] = 'student_scores = c( 7, 9 );'; $script[] = 'categories = c( );'; $script[] = 'report( filename, number_answeroptions, cronbach, frequency_answer_options, percentage_answer_options, key, correct_frequency, correct_percentage, corrected_item_tot_cor, corrected_item_tot_cor_answ_option, title, item_names, student_scores, categories );'; From 4bda000c8b61a5cd09d76ff38296b95cb7d7c843 Mon Sep 17 00:00:00 2001 From: Mark van Driel Date: Thu, 11 Aug 2016 16:57:32 +0200 Subject: [PATCH 12/21] Fixed path to Rmd script (workaround) --- app/Lib/Rscripts/report.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/Lib/Rscripts/report.R b/app/Lib/Rscripts/report.R index 119ac64..477040f 100644 --- a/app/Lib/Rscripts/report.R +++ b/app/Lib/Rscripts/report.R @@ -12,5 +12,5 @@ report <- function(filename, item_names, student_scores, categories) { - rmarkdown::render("report.Rmd", output_format = "pdf_document", output_file = filename) + rmarkdown::render("/home/travis/build/nlware/qdnatool/app/Lib/Rscripts/report.Rmd", output_format = "pdf_document", output_file = filename) } From e6ff1a6537a3181a72c3264b654fc4b20d1d8326 Mon Sep 17 00:00:00 2001 From: Mark van Driel Date: Thu, 11 Aug 2016 17:13:19 +0200 Subject: [PATCH 13/21] Fixed encoding issue --- app/Lib/Rscripts/report.Rmd | 1644 +++++++++++++++++------------------ 1 file changed, 822 insertions(+), 822 deletions(-) diff --git a/app/Lib/Rscripts/report.Rmd b/app/Lib/Rscripts/report.Rmd index 90d8192..ad69e1f 100644 --- a/app/Lib/Rscripts/report.Rmd +++ b/app/Lib/Rscripts/report.Rmd @@ -1,822 +1,822 @@ ---- -html_document: null -fig_width: 11 -classoption: landscape -keep_md: yes -output: html_document -pdf_document: default ---- - -```{r, echo = FALSE, warning = FALSE, message = FALSE} -library(ggplot2) -library(pander) -library(plyr) -``` - - -```{r, echo = FALSE, eval = TRUE, results = "asis"} - -create_item_list <- function(number_answeroptions, - correct_frequency, - correct_percentage, - frequency_answer_options, - percentage_answer_options, - corrected_item_tot_cor, - corrected_item_tot_cor_answ_option, - item_names, - key) { - # Creates a list with frequency, percentage correct, and IRC for every item. - # - # Args: - # number_answeroptions: Vector with number of answer options per item. - # Length should be at least 3 and the same as - # correct_frequency, correct_percentage, item_names, - # and corrected_item_tot_cor. - # Length should also be the same as the number of - # colums of: frequency_answer_options, - # percentage_answer_options, corrected_item_tot_cor, - # corrected_item_tot_cor_answ_option, and key + 1. - # correct_frequency: Vector with total number correct per item. - # Length requirements: see number_answeroptions. - # correct_percentage: Vector with percentage correct per item. - # Length requirements: see number_answeroptions. - # frequency_answer_options: Matrix with [i,j] the frequency of answer option - # i of item j. Dimension requirements: see key - # + one extra column (for missing). - # percentage_answer_options: Matrix with [i,j] the percentage of - # answer option i of item j. - # Dimension requirements: see key + 1 extra - # column (for missing) - # corrected_item_tot_cor: Vector with item rest correlation per item. - # Length requirements: see number_answeroptions - # corrected_item_tot_cor_answ_option: Matrix with [i,j] the item total - # correlation for answer option i of - # item j. Dimension requirements: - # see key + one extra column. - # key: Matrix of 0's and 1's. key[i,j] indicates whether answer option i to - # item j is right (1) or wrong (0). If a column (item) consists of only - # 0s, the item is interpreted as graded manually. - # Number of columns should be equal to the length of: - # number_answeroptions, correct_frequency, correct_percentage, and - # corrected_item_tot_cor. - # The following objects have the same dimensions as key + 1 extra row: - # frequency_answer_options, percentage_answer_options, and - # corrected_item_tot_cor_answ_option - # - # Returns: - # List with one entry per item. Each entry consists of a data frame with - # frequency, percentage and IRC for the total item and the answer options - # (if the item has answer options) - - item_list <- list() # Creates list to put item output in - colnames1 <- c(" ", "Frequency", "Percentage", "IRC") - colnames2 <- c("Answer Option", "Frequency", "Percentage", "IRC", "Correct") - - # For every item, create entry in list with frequencies, percentages, and IRCs - for (i in 1:length(item_names)) { - if (number_answeroptions[i] > 0) { - correct <- c( - ifelse( - key[1:number_answeroptions[i], i] == 1, - "Correct", - "Incorrect" - ), - "Incorrect" - ) - - item_list[[i]] <- data.frame( - c(LETTERS[1:number_answeroptions[i]], "Missing"), - c(frequency_answer_options[c(2:(number_answeroptions[i] + 1), 1), i]), - c(percentage_answer_options[c(2:(number_answeroptions[i] + 1), 1), i]), - c(corrected_item_tot_cor_answ_option[ - c(2:(number_answeroptions[i] + 1), 1), - i - ]), - correct, - row.names = NULL) - colnames(item_list[[i]]) <- colnames2 - - } else { - item_list[[i]] <- data.frame("Correct", - correct_frequency[i], - correct_percentage[i], - corrected_item_tot_cor[i] - ) - colnames(item_list[[i]]) <- colnames1 - } - } - - names(item_list) <- item_names - return(item_list) -} - -# Use the function defined above -item_list <- create_item_list( - number_answeroptions, - correct_frequency, - correct_percentage, - frequency_answer_options, - percentage_answer_options, - corrected_item_tot_cor, - corrected_item_tot_cor_answ_option, - item_names, - key -) - -cat(paste("#", title)) -``` - -# General - -What | Result | -:---------------------- | :------------- | -Number of students | `r length(student_scores)`| -Number of items | `r length(item_names)`| -Average score | `r round(mean(student_scores), digits = 3)`| -Standard deviation | `r round(sd(student_scores), digits = 3)`| -Cronbach's alpha | `r cronbach`| -Standard error | `r round(sd(student_scores * sqrt(1 - cronbach)), digits = 3)`| - - -# Item statistics - -For each item the frequency, percentage and item rest correlations (IRC) of -every answer option are shown The IRC should be (highly) positive for the -right answer option and low for the wrong answer option(s). - -```{r pander, results = "asis", eval = TRUE, echo = FALSE, warning = FALSE, message = FALSE} - -print_item_tables <- function(item_list, key) { - # Print table for each item with frequency, percentage and the - # item-rest correlation. Correct answers are highlighted in bold. - # - # Args: - # item_list: list with one entry per item. Each entry has a data frame - # Data frame has a row for every answer option + one for missing. - # Columns are: Answer option, frequency, percentage, IRC, - # and Correct (answer option is Correct/Incorrect) - # Number of rows of each data frame has a maximum as the number of - # columns of key + 1. - # key: Matrix of 0's and 1's. key[i,j] implies wether answer option i - # to item j is right (1) or wrong (0). If a column (item) consists of - # only 0s, the item is interpreted as graded manually. - # Number of columns should be equal to length of item_list - - pander::panderOptions("knitr.auto.asis", FALSE) - for (i in 1:length(item_list)) { - if (any(key[, i] == 1)) { - item_list[[i]]$Correct <- revalue( - item_list[[i]]$Correct, - c("Correct" = "X", "Incorrect" = "") - ) - pander::emphasize.strong.rows(which(key[, i] == 1, arr.ind = TRUE)) - pander::pander(item_list[i]) - } else { - t <- item_list[i] - pander::pander(t) - } - } -} - -print_item_tables(item_list, key) -``` - -\pagebreak - -```{r, echo = FALSE, message = FALSE, warning = FALSE, fig.width = 11, results = 'asis'} - -create_df <- function(item_names, correct_percentage, corrected_item_tot_cor) { - # Creates a data frame with the overall item statistics including a colour - # indicator for the percentage correct and IRC. Colours is defined below. - # - # Args: - # These are the same as defined in create_item_list. - # - # Returns: - # Data frame with a row for every item. Columns consist of the item name, - # percentage correct, colour indicator for percentage, IRC and colour - # indicator for the IRC. - - correct_df <- data.frame( - item = factor(item_names, item_names), - correct_percentage, - perc_col = ifelse( - correct_percentage < 40, 0, - ifelse(correct_percentage > 70, 1, 1 / 75 * correct_percentage - 7 / 30) - ), - corrected_item_tot_cor, - irc_col = ifelse( - corrected_item_tot_cor < 0, 0, - ifelse( - corrected_item_tot_cor > .05, - 1, - 8 * corrected_item_tot_cor + 3 / 10) - ) - ) - - return(correct_df) -} - -plot_overall_perc <- function(correct_df) { - # Plots bar graph of percentage correct of every item. Bars are coloured - # depending on the percentage correct: - # < 40: Red - # 40 - 70: Orange till green-ish - # > 70: Green - # - # Args: - # correct_df: data frame with item name, percentage correct and colour code - # for the percentages. - # - # Returns: - # Explanation of the plot and the plot itself - - cat(" - -## Percentage Correct - -Plot of percentage correct per item. If *more than 70%* of the students answer -an item correctly, the students have mastered the material well enough, thus it -is coloured *green*. If *less than 40%* of the students answer an item -correctly, the students have not mastered the material, and is coloured *red*. -*Orange* indicates the percentage correct lies *between 40% and 70%*. The -colours are based on multiple choice items with 3 or 4 answer options. - -") - - bar_freq <- ggplot2::ggplot( - correct_df, - ggplot2::aes(item, correct_percentage, fill = perc_col)) - bar_freq <- - bar_freq + - # Create bar chart - geom_bar(stat = "identity") + - # Creat colour boundary - scale_fill_gradient( - low = "red", - high = "green", - limits = c(0, 1), - guide = FALSE - ) + - # Change y-axis limit to constant - coord_cartesian(ylim = c(0, 100)) + - # Axes names - labs(x = "Item", y = "Percentage Correct") + - # Black en white - theme_bw() + - # Change various sizes - theme( - axis.text.x = element_text(size = 8, angle = 90), - axis.ticks.x = element_line(size = .4) - ) - - suppressWarnings(print(bar_freq)) -} - -plot_overall_irc <- function(correct_df) { - # Plots bar graph of IRC of every item. Bars are coloured by IRC: - # < 0: Red - # 0 - .10: Orange till green-ish - # > .10: green - # - # Args: - # correct_df: data frame with item name, percentage correct and colour code - # for the percentages. - # - # Returns: - # Explanation of the plot and the plot itself - - cat("\\pagebreak - -## Item rest correlations - -Plot of Item Rest Correlations per item: The correlation between an item (0 for -wrong answers and 1 for right answers) and the sum of all other items. Items -with a correlation *higher than .05* indicate that students who answered this -item correctly answered the other items more often correctly than students who -answered this item incorrectly. Thus it is coloured *green*. Items with a -correlation *lower than 0* indicate no relationship between this item and the -other items, or that students who answered this item correctly answered the -other items more often incorrectly than students who answered this incorrectly, -thus it is coloured *red*. *Between 0 and .05*, an item is coloured *orange*. - -") - - # Create bar chart - bar_irc <- ggplot2::ggplot( - correct_df, - ggplot2::aes(item, corrected_item_tot_cor, fill = irc_col)) - bar_irc <- - bar_irc + - geom_bar(stat = "identity") + - scale_fill_gradient( - low = "red", - high = "green", - limits = c(0, 1), - guide = FALSE - ) + - theme_bw() + - labs(x = "Item", y = "Item Rest Correlation") + - theme( - strip.text.x = element_text(size = 7), - axis.text.x = element_text(size = 8, angle = 90), - axis.ticks.x = element_line(size = .4) - ) - - suppressWarnings(print(bar_irc)) -} - -correct_df <- create_df(item_names, correct_percentage, corrected_item_tot_cor) -plot_overall_perc(correct_df) -plot_overall_irc(correct_df) -``` - -```{r, echo = FALSE, eval = TRUE, warning= FALSE, fig.width = 11} - -# Create extra variables for the colours in the bar plots -make_perc_col <- function(percentage, correct) { - # Calculate colours for the percentage answer options. - # - # Args: - # percentage: Percentage answered per answer options - # correct: Vector of "Correct" and "Incorrect" of same length of percentage - # - # Returns: - # Value between 0 and 1. 0 corresponds to red and 1 to green in the plots. - # For the correct options: - # < 40: red - # 40 - 70: orange - green-ish - # > 70: green - # For the incorrect options: difference between correct answer option and - # incorrect answer options are calculated. Colours are based on difference: - # < -10: Green - # -10 - 0: orange - green-ish - # > 0: green - - perc_col <- vector("numeric", length = length(correct)) - perc_col[correct == "Correct"] <- ifelse( - percentage[correct == "Correct"] < 40, - 0, - ifelse( - percentage[correct == "Correct"] > 70, - 1, - 1 / 75 * percentage[correct == "Correct"] - 7 / 30 - ) - ) - dif <- - percentage[correct == "Incorrect"] - min(percentage[correct == "Correct"]) - perc_col[correct == "Incorrect"] <- ifelse( - dif >= 0, - 0, - ifelse(dif < -10, 1, -1 / 25 * dif + 3 / 10) - ) - perc_col -} - -make_irc_col <- function(IRC, correct){ - # Calculate colours for the percentage answer options. - # - # Args: - # IRC: Vector of IRC per answer options. Same length as correct - # correct: Vector of "Correct" and "Incorrect" of same length of IRC - # - # Returns: - # Value between 0 and 1. 0 corresponds to red and 1 to green in the plots. - # For the correct answer options: - # < 0: red - # 0 - 0.05: orange - green-ish - # > 0.05: green - # For the incorrect answer options: difference between correct answer option - # and incorrect answer option are calculated. Colours are based on this - # difference: - # > 0: red - # -0.05 - 0: orange - green-ish - # < -0.05: red - - irc_col <- numeric() - irc_col[correct == "Correct"] <- ifelse( - IRC[correct == "Correct"] < 0, - 0, - ifelse(IRC[correct == "Correct"] > .05, - 1, - 8 * IRC[correct == "Correct"] + 3 / 10 - ) - ) - dif <- IRC[correct == "Incorrect"] - min(IRC[correct == "Correct"]) - irc_col[correct == "Incorrect"] <- ifelse( - dif >= 0, - 0, - ifelse(dif < -.05, 1, -8 * dif + 3 / 10) - ) - return(irc_col) -} - -make_answ_df <- function(item_list, key, number_answeroptions, item_names) { - # Creates a data frame with the statistics per answer option including a - # colour indicator for the percentage correct and IRC. - # - # Args: - # These are the same as defined earlier - # - # Returns: - # Data frame with a row for every answer option. Columns consist of the - # item name, answer options, frequency, percentage, IRC, Correct (whether - # an option is correct/incorrect), colour for percentage and colour for IRC - - for (i in 1:length(item_list)) { - if (any(key[, i] != 0)) { - item_list[[i]]$perc_col <- with( - item_list[[i]], - make_perc_col(Percentage, Correct) - ) - item_list[[i]]$irc_col <- with(item_list[[i]], make_irc_col(IRC, Correct)) - } - } - ans_opt_df <- plyr::ldply(item_list[number_answeroptions != 0], data.frame) - names(ans_opt_df)[1] <- "id" - ans_opt_df$Answer.Option <- gsub("Missing", "Mi", ans_opt_df$Answer.Option) - ans_opt_df$Answer.Option <- factor( - ans_opt_df$Answer.Option, - levels = c(LETTERS[1:max(number_answeroptions)], "Mi") - ) - ans_opt_df$id <- factor( - ans_opt_df$id, - levels = item_names[number_answeroptions != 0] - ) - if (all(key == 0)) { - ans_opt_df <- NULL - } - - ans_opt_df -} - -calc_n_plot <- function(ans_opt_df, key) { - # Calculates which item is displayed in which plot. The total number of - # answer options is too large (usually) to display them all in one plot. - # - # Args: - # ans_opt_df: As defined in function above - # - # Returns: - # n_plots: the number of plots - # items_in_plot: list entry per plot. Every entry consists of item names - # in that plot - - if (any(key != 0)) { - max_answ_opts <- 80 # Maximum number of bars per plot - tot_answ_opts <- nrow(ans_opt_df) - n_plots <- ceiling(tot_answ_opts / max_answ_opts) - answ_opts_per_plot <- floor(tot_answ_opts / n_plots) - - items_in_plot <- list(unique(ans_opt_df[1:answ_opts_per_plot, 1])) - if (n_plots > 1) { - for (i in 2:n_plots) { - items_in_plot[[i]] <- unique( - ans_opt_df[ - (((i - 1) * answ_opts_per_plot) + 1):(i * answ_opts_per_plot), - 1 - ] - ) - if (items_in_plot[[i]][1] %in% items_in_plot[[i - 1]]) { - items_in_plot[[i]] <- items_in_plot[[i]][-1] - } - } - } - assign("items_in_plot", items_in_plot, envir = globalenv()) - } -} - -ans_opt_df <- make_answ_df(item_list, key, number_answeroptions, item_names) -calc_n_plot(ans_opt_df, key) - -``` - -```{r, echo = FALSE, eval = TRUE, message = FALSE, warning = FALSE, fig.width = 11, results= 'asis'} - -plot_perc_answ <- function(ans_opt_df, items_in_plot, key) { - # Plots bar graph(s) of the percentages of every answer option - # - # Args: - # All defined as before - # - # Returns: - # Description of how to read the plots - # One or multiple plots - - if (any(key != 0)) { - - cat("\\pagebreak - -## Percentage chosen per answer options - -Percentage plot of all answer options per item. **White** bars represent the -**right** answer option(s) and **black** the **wrong** options. - -The border represents the performance of an item. The **right** answer options -are coloured *green* if *more than 70%* of the students choose this option. In -that case, the students have mastered the material. They are coloured *red* if -*less than 40%* choose this option. *Between 40% and 70%*, it is coloured -*orange*. - -The **wrong** answer options are coloured *green* if an option is chosen *10 -percentage point less than the right option*. In that case, more students chose -the right answer option than the wrong option indicating that students are not -confused by the wrong option. They are coloured *red* if an option is chosen -*equally or more often than the right option*. In this case, students pick the -wrong answer option often, indicating that this option may be right as well. It -is coloured *orange* in between these two. - -") - - for (i in 1:length(items_in_plot)) { - bar_freq1 <- ggplot2::ggplot( - ans_opt_df[ans_opt_df$id %in% items_in_plot[[i]], ], - ggplot2::aes( - "Answer Option", - Percentage, - fill = Correct, - colour = perc_col) - ) - bar_freq1 <- - bar_freq1 + - geom_bar(aes(x = Answer.Option), stat = "identity", size = 1.5) + - # Plot items - facet_grid(. ~ id, scales = "free_x", space = "free_x") + - # Fill in the bars - scale_fill_manual( - values = c("Incorrect" = "Black", "Correct" = "White"), - guide = FALSE - ) + - # Colour the bandaries - scale_colour_gradient( - low = "red", - high = "green", - limits = c(0, 1), - guide = FALSE - ) + - # Change y-axis limits to constants - coord_cartesian(ylim = c(0, 100)) + - # Names x-axis - labs(x = "Answer Options") + - theme_bw() + - # Change various font sizes - theme( - axis.text.x = element_text(size = 8), - axis.text.y = element_text(size = 12), - axis.title.x = element_text(size = 15), - axis.title.y = element_text(size = 15) - ) - - suppressWarnings(print(bar_freq1)) - } - } -} - -plot_irc_answ <- function(ans_opt_df, items_in_plot, key) { - # Plots bar graph(s) of the IRC of every answer option - # - # Args: - # All defined as before - # - # Returns: - # Description of how to read the plots - # One or multiple plots - - if (any(key != 0)) { - - cat('\\pagebreak - -## Item rest correlations per answer option - -Item rest correlation (IRC) plot of all answer options per item. The IRC is the -correlation between an answer option (1 for students who chose this options and -0 for student who did not) and the sumscore on all other items. **White** bars -represent the **right** answer option(s) and **black** the **wrong** options. - -The border represents the performance of an item. The **right** answer options -are coloured *green* if the IRC is *higher than .05*. In that case, students who -picked the right answer option scored higher on all other items than student who -did not pick the right option. They are coloured *red* if the IRC is *less than -0*. In that case, students who picked the right answer option scored equally or -worse on the other items than students who picked the other options. Between *0 -and .05*, the option is coloured *orange*. - -The **wrong** answer options are coloured *green* if the IRC is *more than .1 -lower than the right option*. In that case, students who answered the other -items incorrectly, usually answered this item incorrectly as well. They are -coloured *red* if the IRC is *equal or higher than the right answer option*. -In that case, students who answered the other items correctly may have answered -this item incorrectly. In between these two, the option is coloured *orange*. - -') - - # Creating IRC plots - for (i in 1:length(items_in_plot)) { - # Select items and variables to plot - bar_irc1 <- ggplot2::ggplot( - ans_opt_df[ans_opt_df$id %in% items_in_plot[[i]], ], - ggplot2::aes("Answer Option", IRC, fill = Correct, colour = irc_col) - ) - bar_irc1 <- - bar_irc1 + - geom_bar(aes(x = Answer.Option), stat = "identity", size = 1.5) + - facet_grid(. ~ id, scales = "free_x", space = "free_x") + - scale_fill_manual( - values = c("Incorrect" = "Black", "Correct" = "White"), - guide = FALSE - ) + - scale_colour_gradient( - low = "red", - high = "green", - limits = c(0, 1), - guide = FALSE - ) + - # Change y-axis limit to either the maximum and minimum IRC - coord_cartesian( - ylim = c(min(ans_opt_df$IRC) - .01, max(ans_opt_df$IRC) + .01) - ) + - labs(x = "Answer Options") + - theme_bw() + - theme( - axis.text.x = element_text(size = 8), - axis.text.y = element_text(size = 12), - axis.title.x = element_text(size = 15), - axis.title.y = element_text(size = 15) - ) - suppressWarnings(print(bar_irc1)) - } - } -} - -plot_irc_answ(ans_opt_df, items_in_plot, key) -plot_perc_answ(ans_opt_df, items_in_plot, key) -``` - -\pagebreak - -## Cumulative Distribution - -Cumulative Distribution showing all possible scores and the percentage of -students that scored that number or higher - -```{r, echo = FALSE, eval = TRUE, message = FALSE, warning = FALSE, fig.width = 11} -CumHist <- function(student_scores, n_item) { - # Create a reversed cumulative histogram, i.e. the height of score x is - # the percentage of students who scores x or higher. - # - # Args: - # student_scores: numeric vector of test score per student. - # n_item: number of items - # - # Returns: - # Reversed cumulative histogram plot - - h <- heights <- length(student_scores) - n <- sort(unique(student_scores)) - - for (i in 1:length(n)) { - h <- h - length(student_scores[student_scores == n[i]]) - heights <- c(heights, h) - } - - cesuurlijnen <- seq(0.2, 0.8, 0.2) * length(student_scores) - gridlijnen <- round( - seq(0, length(student_scores), length(student_scores) / 25), - 0 - ) - - gridlijnen <- gridlijnen[ - -grep(paste(round(cesuurlijnen, 0), collapse = "|"), gridlijnen) - ] - - heights <- heights[1:length(unique(student_scores))] - percent <- heights / length(student_scores) * 100 - df <- data.frame( - score = sort(unique(student_scores)), - total = heights, - percent = round(percent, 2) - ) - - vec <- df[1, 1]:df[nrow(df), 1] # save all possible total scores - - # Find if a total score is missing and add it - mis <- NA - if (sum(! (vec %in% df[, 1]) != 0)) { - mis <- vec[! (vec %in% df[, 1])] - } - if (! is.na(mis[1])) { - for (i in 1:length(mis)) { - df[ (nrow(df) + 1), ] <- c(mis[i], 0, 0) - } - df <- df[order(df$score), ] - } - - - # if no one gets a certain total score, - # replace the 0 with the next total score - suppressWarnings( - df[which(df[, 2] == 0, arr.ind = TRUE), 2] <- - df[(which(df[, 2] == 0, arr.ind = TRUE) + 1), 2] - ) - suppressWarnings( - df[which(df[, 3] == 0, arr.ind = TRUE), 3] <- - df[(which(df[, 3] == 0, arr.ind = TRUE) + 1), 3] - ) - - g <- - ggplot2::ggplot( - data = df, - ggplot2::aes(x = score, y = total) - ) + - geom_hline( - yintercept = c(gridlijnen), - linetype = "solid", - colour = "gray80" - ) + - geom_hline( - yintercept = cesuurlijnen, - linetype = "dashed", - size = 0.75, - colour = "black" - ) + - geom_histogram(stat = "identity", binwidth = 1) + - xlab("Total score") + - ylab("% of students with score or higher") + - scale_y_continuous( - breaks = c(0, cesuurlijnen, length(student_scores)), - labels = c(seq(0, 100, 20)) - ) + - scale_x_continuous( - limits = c(df[1, 1] - 1, n_item + 1), - breaks = 0:n_item, - expand = c(0, 0) - ) + - theme_bw() + - theme(panel.grid = element_blank()) - - print(g) -} - -plot_cum_hist(student_scores, n_item = ncol(key)) -``` - -```{r, echo = FALSE, eval = TRUE, warnings = FALSE, message = FALSE, results = 'asis', fig.width = 11} -## Do the whole script above for every sub category -if (!length(categories) == 0) { - cat("\n\n") - cat("\\pagebreak") - cat("\n\n") - cat("# Analysis for subcategories") - cat("\n\n") - - for (i in 1:length(categories)) { - items <- categories[[i]]$items - if (i != 1) { - cat("\\pagebreak") - cat("\n\n") - } - cat("\n\n") - cat(paste("## Category:", categories[[i]]$name )) - cat("\n\n") - - item_list <- create_item_list( - number_answeroptions[items], - correct_frequency[items], - correct_percentage[items], - frequency_answer_options[, items], - percentage_answer_options[, items], - categories[[i]]$corrected_item_tot_cor, - categories[[i]]$corrected_item_tot_cor_answ_option, - item_names[items], - key[, items] - ) - - correct_df <- create_df( - item_names[items], - correct_percentage[items], - categories[[i]]$corrected_item_tot_cor - ) - - plot_overall_perc(correct_df) - plot_overall_irc(correct_df) - - if (any(key[, items] != 0)) { - ans_opt_df <- make_answ_df( - item_list, - key[, items], - number_answeroptions[items], - item_names[items] - ) - calc_n_plot(ans_opt_df, key[, items]) - plot_perc_answ(ans_opt_df, items_in_plot, key[, items]) - plot_irc_answ(ans_opt_df, items_in_plot, key[, items]) - } - - cat("\\pagebreak") - cat("\n\n") - cat("## Cumulative Distribution") - cat("\n\n") - plot_cum_hist(categories[[i]]$student_scores, n_item = length(items)) - } -} -``` +--- +html_document: null +fig_width: 11 +classoption: landscape +keep_md: yes +output: html_document +pdf_document: default +--- + +```{r, echo = FALSE, warning = FALSE, message = FALSE} +library(ggplot2) +library(pander) +library(plyr) +``` + + +```{r, echo = FALSE, eval = TRUE, results = "asis"} + +create_item_list <- function(number_answeroptions, + correct_frequency, + correct_percentage, + frequency_answer_options, + percentage_answer_options, + corrected_item_tot_cor, + corrected_item_tot_cor_answ_option, + item_names, + key) { + # Creates a list with frequency, percentage correct, and IRC for every item. + # + # Args: + # number_answeroptions: Vector with number of answer options per item. + # Length should be at least 3 and the same as + # correct_frequency, correct_percentage, item_names, + # and corrected_item_tot_cor. + # Length should also be the same as the number of + # colums of: frequency_answer_options, + # percentage_answer_options, corrected_item_tot_cor, + # corrected_item_tot_cor_answ_option, and key + 1. + # correct_frequency: Vector with total number correct per item. + # Length requirements: see number_answeroptions. + # correct_percentage: Vector with percentage correct per item. + # Length requirements: see number_answeroptions. + # frequency_answer_options: Matrix with [i,j] the frequency of answer option + # i of item j. Dimension requirements: see key + # + one extra column (for missing). + # percentage_answer_options: Matrix with [i,j] the percentage of + # answer option i of item j. + # Dimension requirements: see key + 1 extra + # column (for missing) + # corrected_item_tot_cor: Vector with item rest correlation per item. + # Length requirements: see number_answeroptions + # corrected_item_tot_cor_answ_option: Matrix with [i,j] the item total + # correlation for answer option i of + # item j. Dimension requirements: + # see key + one extra column. + # key: Matrix of 0's and 1's. key[i,j] indicates whether answer option i to + # item j is right (1) or wrong (0). If a column (item) consists of only + # 0s, the item is interpreted as graded manually. + # Number of columns should be equal to the length of: + # number_answeroptions, correct_frequency, correct_percentage, and + # corrected_item_tot_cor. + # The following objects have the same dimensions as key + 1 extra row: + # frequency_answer_options, percentage_answer_options, and + # corrected_item_tot_cor_answ_option + # + # Returns: + # List with one entry per item. Each entry consists of a data frame with + # frequency, percentage and IRC for the total item and the answer options + # (if the item has answer options) + + item_list <- list() # Creates list to put item output in + colnames1 <- c(" ", "Frequency", "Percentage", "IRC") + colnames2 <- c("Answer Option", "Frequency", "Percentage", "IRC", "Correct") + + # For every item, create entry in list with frequencies, percentages, and IRCs + for (i in 1:length(item_names)) { + if (number_answeroptions[i] > 0) { + correct <- c( + ifelse( + key[1:number_answeroptions[i], i] == 1, + "Correct", + "Incorrect" + ), + "Incorrect" + ) + + item_list[[i]] <- data.frame( + c(LETTERS[1:number_answeroptions[i]], "Missing"), + c(frequency_answer_options[c(2:(number_answeroptions[i] + 1), 1), i]), + c(percentage_answer_options[c(2:(number_answeroptions[i] + 1), 1), i]), + c(corrected_item_tot_cor_answ_option[ + c(2:(number_answeroptions[i] + 1), 1), + i + ]), + correct, + row.names = NULL) + colnames(item_list[[i]]) <- colnames2 + + } else { + item_list[[i]] <- data.frame("Correct", + correct_frequency[i], + correct_percentage[i], + corrected_item_tot_cor[i] + ) + colnames(item_list[[i]]) <- colnames1 + } + } + + names(item_list) <- item_names + return(item_list) +} + +# Use the function defined above +item_list <- create_item_list( + number_answeroptions, + correct_frequency, + correct_percentage, + frequency_answer_options, + percentage_answer_options, + corrected_item_tot_cor, + corrected_item_tot_cor_answ_option, + item_names, + key +) + +cat(paste("#", title)) +``` + +# General + +What | Result | +:---------------------- | :------------- | +Number of students | `r length(student_scores)`| +Number of items | `r length(item_names)`| +Average score | `r round(mean(student_scores), digits = 3)`| +Standard deviation | `r round(sd(student_scores), digits = 3)`| +Cronbach's alpha | `r cronbach`| +Standard error | `r round(sd(student_scores * sqrt(1 - cronbach)), digits = 3)`| + + +# Item statistics + +For each item the frequency, percentage and item rest correlations (IRC) of +every answer option are shown The IRC should be (highly) positive for the +right answer option and low for the wrong answer option(s). + +```{r pander, results = "asis", eval = TRUE, echo = FALSE, warning = FALSE, message = FALSE} + +print_item_tables <- function(item_list, key) { + # Print table for each item with frequency, percentage and the + # item-rest correlation. Correct answers are highlighted in bold. + # + # Args: + # item_list: list with one entry per item. Each entry has a data frame + # Data frame has a row for every answer option + one for missing. + # Columns are: Answer option, frequency, percentage, IRC, + # and Correct (answer option is Correct/Incorrect) + # Number of rows of each data frame has a maximum as the number of + # columns of key + 1. + # key: Matrix of 0's and 1's. key[i,j] implies wether answer option i + # to item j is right (1) or wrong (0). If a column (item) consists of + # only 0s, the item is interpreted as graded manually. + # Number of columns should be equal to length of item_list + + pander::panderOptions("knitr.auto.asis", FALSE) + for (i in 1:length(item_list)) { + if (any(key[, i] == 1)) { + item_list[[i]]$Correct <- revalue( + item_list[[i]]$Correct, + c("Correct" = "X", "Incorrect" = "") + ) + pander::emphasize.strong.rows(which(key[, i] == 1, arr.ind = TRUE)) + pander::pander(item_list[i]) + } else { + t <- item_list[i] + pander::pander(t) + } + } +} + +print_item_tables(item_list, key) +``` + +\pagebreak + +```{r, echo = FALSE, message = FALSE, warning = FALSE, fig.width = 11, results = 'asis'} + +create_df <- function(item_names, correct_percentage, corrected_item_tot_cor) { + # Creates a data frame with the overall item statistics including a colour + # indicator for the percentage correct and IRC. Colours is defined below. + # + # Args: + # These are the same as defined in create_item_list. + # + # Returns: + # Data frame with a row for every item. Columns consist of the item name, + # percentage correct, colour indicator for percentage, IRC and colour + # indicator for the IRC. + + correct_df <- data.frame( + item = factor(item_names, item_names), + correct_percentage, + perc_col = ifelse( + correct_percentage < 40, 0, + ifelse(correct_percentage > 70, 1, 1 / 75 * correct_percentage - 7 / 30) + ), + corrected_item_tot_cor, + irc_col = ifelse( + corrected_item_tot_cor < 0, 0, + ifelse( + corrected_item_tot_cor > .05, + 1, + 8 * corrected_item_tot_cor + 3 / 10) + ) + ) + + return(correct_df) +} + +plot_overall_perc <- function(correct_df) { + # Plots bar graph of percentage correct of every item. Bars are coloured + # depending on the percentage correct: + # < 40: Red + # 40 - 70: Orange till green-ish + # > 70: Green + # + # Args: + # correct_df: data frame with item name, percentage correct and colour code + # for the percentages. + # + # Returns: + # Explanation of the plot and the plot itself + + cat(" + +## Percentage Correct + +Plot of percentage correct per item. If *more than 70%* of the students answer +an item correctly, the students have mastered the material well enough, thus it +is coloured *green*. If *less than 40%* of the students answer an item +correctly, the students have not mastered the material, and is coloured *red*. +*Orange* indicates the percentage correct lies *between 40% and 70%*. The +colours are based on multiple choice items with 3 or 4 answer options. + +") + + bar_freq <- ggplot2::ggplot( + correct_df, + ggplot2::aes(item, correct_percentage, fill = perc_col)) + bar_freq <- + bar_freq + + # Create bar chart + geom_bar(stat = "identity") + + # Creat colour boundary + scale_fill_gradient( + low = "red", + high = "green", + limits = c(0, 1), + guide = FALSE + ) + + # Change y-axis limit to constant + coord_cartesian(ylim = c(0, 100)) + + # Axes names + labs(x = "Item", y = "Percentage Correct") + + # Black en white + theme_bw() + + # Change various sizes + theme( + axis.text.x = element_text(size = 8, angle = 90), + axis.ticks.x = element_line(size = .4) + ) + + suppressWarnings(print(bar_freq)) +} + +plot_overall_irc <- function(correct_df) { + # Plots bar graph of IRC of every item. Bars are coloured by IRC: + # < 0: Red + # 0 - .10: Orange till green-ish + # > .10: green + # + # Args: + # correct_df: data frame with item name, percentage correct and colour code + # for the percentages. + # + # Returns: + # Explanation of the plot and the plot itself + + cat("\\pagebreak + +## Item rest correlations + +Plot of Item Rest Correlations per item: The correlation between an item (0 for +wrong answers and 1 for right answers) and the sum of all other items. Items +with a correlation *higher than .05* indicate that students who answered this +item correctly answered the other items more often correctly than students who +answered this item incorrectly. Thus it is coloured *green*. Items with a +correlation *lower than 0* indicate no relationship between this item and the +other items, or that students who answered this item correctly answered the +other items more often incorrectly than students who answered this incorrectly, +thus it is coloured *red*. *Between 0 and .05*, an item is coloured *orange*. + +") + + # Create bar chart + bar_irc <- ggplot2::ggplot( + correct_df, + ggplot2::aes(item, corrected_item_tot_cor, fill = irc_col)) + bar_irc <- + bar_irc + + geom_bar(stat = "identity") + + scale_fill_gradient( + low = "red", + high = "green", + limits = c(0, 1), + guide = FALSE + ) + + theme_bw() + + labs(x = "Item", y = "Item Rest Correlation") + + theme( + strip.text.x = element_text(size = 7), + axis.text.x = element_text(size = 8, angle = 90), + axis.ticks.x = element_line(size = .4) + ) + + suppressWarnings(print(bar_irc)) +} + +correct_df <- create_df(item_names, correct_percentage, corrected_item_tot_cor) +plot_overall_perc(correct_df) +plot_overall_irc(correct_df) +``` + +```{r, echo = FALSE, eval = TRUE, warning= FALSE, fig.width = 11} + +# Create extra variables for the colours in the bar plots +make_perc_col <- function(percentage, correct) { + # Calculate colours for the percentage answer options. + # + # Args: + # percentage: Percentage answered per answer options + # correct: Vector of "Correct" and "Incorrect" of same length of percentage + # + # Returns: + # Value between 0 and 1. 0 corresponds to red and 1 to green in the plots. + # For the correct options: + # < 40: red + # 40 - 70: orange - green-ish + # > 70: green + # For the incorrect options: difference between correct answer option and + # incorrect answer options are calculated. Colours are based on difference: + # < -10: Green + # -10 - 0: orange - green-ish + # > 0: green + + perc_col <- vector("numeric", length = length(correct)) + perc_col[correct == "Correct"] <- ifelse( + percentage[correct == "Correct"] < 40, + 0, + ifelse( + percentage[correct == "Correct"] > 70, + 1, + 1 / 75 * percentage[correct == "Correct"] - 7 / 30 + ) + ) + dif <- + percentage[correct == "Incorrect"] - min(percentage[correct == "Correct"]) + perc_col[correct == "Incorrect"] <- ifelse( + dif >= 0, + 0, + ifelse(dif < -10, 1, -1 / 25 * dif + 3 / 10) + ) + perc_col +} + +make_irc_col <- function(IRC, correct){ + # Calculate colours for the percentage answer options. + # + # Args: + # IRC: Vector of IRC per answer options. Same length as correct + # correct: Vector of "Correct" and "Incorrect" of same length of IRC + # + # Returns: + # Value between 0 and 1. 0 corresponds to red and 1 to green in the plots. + # For the correct answer options: + # < 0: red + # 0 - 0.05: orange - green-ish + # > 0.05: green + # For the incorrect answer options: difference between correct answer option + # and incorrect answer option are calculated. Colours are based on this + # difference: + # > 0: red + # -0.05 - 0: orange - green-ish + # < -0.05: red + + irc_col <- numeric() + irc_col[correct == "Correct"] <- ifelse( + IRC[correct == "Correct"] < 0, + 0, + ifelse(IRC[correct == "Correct"] > .05, + 1, + 8 * IRC[correct == "Correct"] + 3 / 10 + ) + ) + dif <- IRC[correct == "Incorrect"] - min(IRC[correct == "Correct"]) + irc_col[correct == "Incorrect"] <- ifelse( + dif >= 0, + 0, + ifelse(dif < -.05, 1, -8 * dif + 3 / 10) + ) + return(irc_col) +} + +make_answ_df <- function(item_list, key, number_answeroptions, item_names) { + # Creates a data frame with the statistics per answer option including a + # colour indicator for the percentage correct and IRC. + # + # Args: + # These are the same as defined earlier + # + # Returns: + # Data frame with a row for every answer option. Columns consist of the + # item name, answer options, frequency, percentage, IRC, Correct (whether + # an option is correct/incorrect), colour for percentage and colour for IRC + + for (i in 1:length(item_list)) { + if (any(key[, i] != 0)) { + item_list[[i]]$perc_col <- with( + item_list[[i]], + make_perc_col(Percentage, Correct) + ) + item_list[[i]]$irc_col <- with(item_list[[i]], make_irc_col(IRC, Correct)) + } + } + ans_opt_df <- plyr::ldply(item_list[number_answeroptions != 0], data.frame) + names(ans_opt_df)[1] <- "id" + ans_opt_df$Answer.Option <- gsub("Missing", "Mi", ans_opt_df$Answer.Option) + ans_opt_df$Answer.Option <- factor( + ans_opt_df$Answer.Option, + levels = c(LETTERS[1:max(number_answeroptions)], "Mi") + ) + ans_opt_df$id <- factor( + ans_opt_df$id, + levels = item_names[number_answeroptions != 0] + ) + if (all(key == 0)) { + ans_opt_df <- NULL + } + + ans_opt_df +} + +calc_n_plot <- function(ans_opt_df, key) { + # Calculates which item is displayed in which plot. The total number of + # answer options is too large (usually) to display them all in one plot. + # + # Args: + # ans_opt_df: As defined in function above + # + # Returns: + # n_plots: the number of plots + # items_in_plot: list entry per plot. Every entry consists of item names + # in that plot + + if (any(key != 0)) { + max_answ_opts <- 80 # Maximum number of bars per plot + tot_answ_opts <- nrow(ans_opt_df) + n_plots <- ceiling(tot_answ_opts / max_answ_opts) + answ_opts_per_plot <- floor(tot_answ_opts / n_plots) + + items_in_plot <- list(unique(ans_opt_df[1:answ_opts_per_plot, 1])) + if (n_plots > 1) { + for (i in 2:n_plots) { + items_in_plot[[i]] <- unique( + ans_opt_df[ + (((i - 1) * answ_opts_per_plot) + 1):(i * answ_opts_per_plot), + 1 + ] + ) + if (items_in_plot[[i]][1] %in% items_in_plot[[i - 1]]) { + items_in_plot[[i]] <- items_in_plot[[i]][-1] + } + } + } + assign("items_in_plot", items_in_plot, envir = globalenv()) + } +} + +ans_opt_df <- make_answ_df(item_list, key, number_answeroptions, item_names) +calc_n_plot(ans_opt_df, key) + +``` + +```{r, echo = FALSE, eval = TRUE, message = FALSE, warning = FALSE, fig.width = 11, results= 'asis'} + +plot_perc_answ <- function(ans_opt_df, items_in_plot, key) { + # Plots bar graph(s) of the percentages of every answer option + # + # Args: + # All defined as before + # + # Returns: + # Description of how to read the plots + # One or multiple plots + + if (any(key != 0)) { + + cat("\\pagebreak + +## Percentage chosen per answer options + +Percentage plot of all answer options per item. **White** bars represent the +**right** answer option(s) and **black** the **wrong** options. + +The border represents the performance of an item. The **right** answer options +are coloured *green* if *more than 70%* of the students choose this option. In +that case, the students have mastered the material. They are coloured *red* if +*less than 40%* choose this option. *Between 40% and 70%*, it is coloured +*orange*. + +The **wrong** answer options are coloured *green* if an option is chosen *10 +percentage point less than the right option*. In that case, more students chose +the right answer option than the wrong option indicating that students are not +confused by the wrong option. They are coloured *red* if an option is chosen +*equally or more often than the right option*. In this case, students pick the +wrong answer option often, indicating that this option may be right as well. It +is coloured *orange* in between these two. + +") + + for (i in 1:length(items_in_plot)) { + bar_freq1 <- ggplot2::ggplot( + ans_opt_df[ans_opt_df$id %in% items_in_plot[[i]], ], + ggplot2::aes( + "Answer Option", + Percentage, + fill = Correct, + colour = perc_col) + ) + bar_freq1 <- + bar_freq1 + + geom_bar(aes(x = Answer.Option), stat = "identity", size = 1.5) + + # Plot items + facet_grid(. ~ id, scales = "free_x", space = "free_x") + + # Fill in the bars + scale_fill_manual( + values = c("Incorrect" = "Black", "Correct" = "White"), + guide = FALSE + ) + + # Colour the bandaries + scale_colour_gradient( + low = "red", + high = "green", + limits = c(0, 1), + guide = FALSE + ) + + # Change y-axis limits to constants + coord_cartesian(ylim = c(0, 100)) + + # Names x-axis + labs(x = "Answer Options") + + theme_bw() + + # Change various font sizes + theme( + axis.text.x = element_text(size = 8), + axis.text.y = element_text(size = 12), + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15) + ) + + suppressWarnings(print(bar_freq1)) + } + } +} + +plot_irc_answ <- function(ans_opt_df, items_in_plot, key) { + # Plots bar graph(s) of the IRC of every answer option + # + # Args: + # All defined as before + # + # Returns: + # Description of how to read the plots + # One or multiple plots + + if (any(key != 0)) { + + cat('\\pagebreak + +## Item rest correlations per answer option + +Item rest correlation (IRC) plot of all answer options per item. The IRC is the +correlation between an answer option (1 for students who chose this options and +0 for student who did not) and the sumscore on all other items. **White** bars +represent the **right** answer option(s) and **black** the **wrong** options. + +The border represents the performance of an item. The **right** answer options +are coloured *green* if the IRC is *higher than .05*. In that case, students who +picked the right answer option scored higher on all other items than student who +did not pick the right option. They are coloured *red* if the IRC is *less than +0*. In that case, students who picked the right answer option scored equally or +worse on the other items than students who picked the other options. Between *0 +and .05*, the option is coloured *orange*. + +The **wrong** answer options are coloured *green* if the IRC is *more than .1 +lower than the right option*. In that case, students who answered the other +items incorrectly, usually answered this item incorrectly as well. They are +coloured *red* if the IRC is *equal or higher than the right answer option*. +In that case, students who answered the other items correctly may have answered +this item incorrectly. In between these two, the option is coloured *orange*. + +') + + # Creating IRC plots + for (i in 1:length(items_in_plot)) { + # Select items and variables to plot + bar_irc1 <- ggplot2::ggplot( + ans_opt_df[ans_opt_df$id %in% items_in_plot[[i]], ], + ggplot2::aes("Answer Option", IRC, fill = Correct, colour = irc_col) + ) + bar_irc1 <- + bar_irc1 + + geom_bar(aes(x = Answer.Option), stat = "identity", size = 1.5) + + facet_grid(. ~ id, scales = "free_x", space = "free_x") + + scale_fill_manual( + values = c("Incorrect" = "Black", "Correct" = "White"), + guide = FALSE + ) + + scale_colour_gradient( + low = "red", + high = "green", + limits = c(0, 1), + guide = FALSE + ) + + # Change y-axis limit to either the maximum and minimum IRC + coord_cartesian( + ylim = c(min(ans_opt_df$IRC) - .01, max(ans_opt_df$IRC) + .01) + ) + + labs(x = "Answer Options") + + theme_bw() + + theme( + axis.text.x = element_text(size = 8), + axis.text.y = element_text(size = 12), + axis.title.x = element_text(size = 15), + axis.title.y = element_text(size = 15) + ) + suppressWarnings(print(bar_irc1)) + } + } +} + +plot_irc_answ(ans_opt_df, items_in_plot, key) +plot_perc_answ(ans_opt_df, items_in_plot, key) +``` + +\pagebreak + +## Cumulative Distribution + +Cumulative Distribution showing all possible scores and the percentage of +students that scored that number or higher + +```{r, echo = FALSE, eval = TRUE, message = FALSE, warning = FALSE, fig.width = 11} +CumHist <- function(student_scores, n_item) { + # Create a reversed cumulative histogram, i.e. the height of score x is + # the percentage of students who scores x or higher. + # + # Args: + # student_scores: numeric vector of test score per student. + # n_item: number of items + # + # Returns: + # Reversed cumulative histogram plot + + h <- heights <- length(student_scores) + n <- sort(unique(student_scores)) + + for (i in 1:length(n)) { + h <- h - length(student_scores[student_scores == n[i]]) + heights <- c(heights, h) + } + + cesuurlijnen <- seq(0.2, 0.8, 0.2) * length(student_scores) + gridlijnen <- round( + seq(0, length(student_scores), length(student_scores) / 25), + 0 + ) + + gridlijnen <- gridlijnen[ + -grep(paste(round(cesuurlijnen, 0), collapse = "|"), gridlijnen) + ] + + heights <- heights[1:length(unique(student_scores))] + percent <- heights / length(student_scores) * 100 + df <- data.frame( + score = sort(unique(student_scores)), + total = heights, + percent = round(percent, 2) + ) + + vec <- df[1, 1]:df[nrow(df), 1] # save all possible total scores + + # Find if a total score is missing and add it + mis <- NA + if (sum(! (vec %in% df[, 1]) != 0)) { + mis <- vec[! (vec %in% df[, 1])] + } + if (! is.na(mis[1])) { + for (i in 1:length(mis)) { + df[ (nrow(df) + 1), ] <- c(mis[i], 0, 0) + } + df <- df[order(df$score), ] + } + + + # if no one gets a certain total score, + # replace the 0 with the next total score + suppressWarnings( + df[which(df[, 2] == 0, arr.ind = TRUE), 2] <- + df[(which(df[, 2] == 0, arr.ind = TRUE) + 1), 2] + ) + suppressWarnings( + df[which(df[, 3] == 0, arr.ind = TRUE), 3] <- + df[(which(df[, 3] == 0, arr.ind = TRUE) + 1), 3] + ) + + g <- + ggplot2::ggplot( + data = df, + ggplot2::aes(x = score, y = total) + ) + + geom_hline( + yintercept = c(gridlijnen), + linetype = "solid", + colour = "gray80" + ) + + geom_hline( + yintercept = cesuurlijnen, + linetype = "dashed", + size = 0.75, + colour = "black" + ) + + geom_histogram(stat = "identity", binwidth = 1) + + xlab("Total score") + + ylab("% of students with score or higher") + + scale_y_continuous( + breaks = c(0, cesuurlijnen, length(student_scores)), + labels = c(seq(0, 100, 20)) + ) + + scale_x_continuous( + limits = c(df[1, 1] - 1, n_item + 1), + breaks = 0:n_item, + expand = c(0, 0) + ) + + theme_bw() + + theme(panel.grid = element_blank()) + + print(g) +} + +plot_cum_hist(student_scores, n_item = ncol(key)) +``` + +```{r, echo = FALSE, eval = TRUE, warnings = FALSE, message = FALSE, results = 'asis', fig.width = 11} +## Do the whole script above for every sub category +if (!length(categories) == 0) { + cat("\n\n") + cat("\\pagebreak") + cat("\n\n") + cat("# Analysis for subcategories") + cat("\n\n") + + for (i in 1:length(categories)) { + items <- categories[[i]]$items + if (i != 1) { + cat("\\pagebreak") + cat("\n\n") + } + cat("\n\n") + cat(paste("## Category:", categories[[i]]$name )) + cat("\n\n") + + item_list <- create_item_list( + number_answeroptions[items], + correct_frequency[items], + correct_percentage[items], + frequency_answer_options[, items], + percentage_answer_options[, items], + categories[[i]]$corrected_item_tot_cor, + categories[[i]]$corrected_item_tot_cor_answ_option, + item_names[items], + key[, items] + ) + + correct_df <- create_df( + item_names[items], + correct_percentage[items], + categories[[i]]$corrected_item_tot_cor + ) + + plot_overall_perc(correct_df) + plot_overall_irc(correct_df) + + if (any(key[, items] != 0)) { + ans_opt_df <- make_answ_df( + item_list, + key[, items], + number_answeroptions[items], + item_names[items] + ) + calc_n_plot(ans_opt_df, key[, items]) + plot_perc_answ(ans_opt_df, items_in_plot, key[, items]) + plot_irc_answ(ans_opt_df, items_in_plot, key[, items]) + } + + cat("\\pagebreak") + cat("\n\n") + cat("## Cumulative Distribution") + cat("\n\n") + plot_cum_hist(categories[[i]]$student_scores, n_item = length(items)) + } +} +``` From 596dde254d94a32edaae4e235160372d75a8968e Mon Sep 17 00:00:00 2001 From: Mark van Driel Date: Wed, 17 Aug 2016 09:41:04 +0200 Subject: [PATCH 14/21] Fixed bug in refactoring --- app/Lib/Rscripts/report.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/Lib/Rscripts/report.Rmd b/app/Lib/Rscripts/report.Rmd index ad69e1f..80f6f5e 100644 --- a/app/Lib/Rscripts/report.Rmd +++ b/app/Lib/Rscripts/report.Rmd @@ -660,7 +660,7 @@ Cumulative Distribution showing all possible scores and the percentage of students that scored that number or higher ```{r, echo = FALSE, eval = TRUE, message = FALSE, warning = FALSE, fig.width = 11} -CumHist <- function(student_scores, n_item) { +cum_hist <- function(student_scores, n_item) { # Create a reversed cumulative histogram, i.e. the height of score x is # the percentage of students who scores x or higher. # @@ -757,7 +757,7 @@ CumHist <- function(student_scores, n_item) { print(g) } -plot_cum_hist(student_scores, n_item = ncol(key)) +cum_hist(student_scores, n_item = ncol(key)) ``` ```{r, echo = FALSE, eval = TRUE, warnings = FALSE, message = FALSE, results = 'asis', fig.width = 11} From 4e210b68afd014bf0e4d63e9d2d9615e4bac94af Mon Sep 17 00:00:00 2001 From: Mark van Driel Date: Wed, 17 Aug 2016 09:48:19 +0200 Subject: [PATCH 15/21] Fixed bug in refactoring --- app/Lib/Rscripts/report.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/Lib/Rscripts/report.Rmd b/app/Lib/Rscripts/report.Rmd index 80f6f5e..43cc890 100644 --- a/app/Lib/Rscripts/report.Rmd +++ b/app/Lib/Rscripts/report.Rmd @@ -816,7 +816,7 @@ if (!length(categories) == 0) { cat("\n\n") cat("## Cumulative Distribution") cat("\n\n") - plot_cum_hist(categories[[i]]$student_scores, n_item = length(items)) + cum_hist(categories[[i]]$student_scores, n_item = length(items)) } } ``` From 4551aebb76da9d3c7892d6b8af01f96aae57cd66 Mon Sep 17 00:00:00 2001 From: Mark van Driel Date: Thu, 18 Aug 2016 11:07:09 +0200 Subject: [PATCH 16/21] Fixed: Error: Unknown parameters: binwidth, bins, pad --- app/Lib/Rscripts/report.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/Lib/Rscripts/report.Rmd b/app/Lib/Rscripts/report.Rmd index 43cc890..ff55c0f 100644 --- a/app/Lib/Rscripts/report.Rmd +++ b/app/Lib/Rscripts/report.Rmd @@ -739,7 +739,7 @@ cum_hist <- function(student_scores, n_item) { size = 0.75, colour = "black" ) + - geom_histogram(stat = "identity", binwidth = 1) + + geom_bar(stat = "identity") + xlab("Total score") + ylab("% of students with score or higher") + scale_y_continuous( From 86edb9fb996227c089d340f07e5f5d6ae2ec5eb5 Mon Sep 17 00:00:00 2001 From: Mark van Driel Date: Thu, 18 Aug 2016 14:54:14 +0200 Subject: [PATCH 17/21] Added required packages to Travis configuration --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index b5bd556..f8a09b0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -40,6 +40,7 @@ before_script: - sudo apt-get install -y --force-yes r-base r-cran-rserve - sudo apt-get install -y --force-yes libcurl4-gnutls-dev - sudo apt-get install -y --force-yes libxml2-dev + - sudo apt-get install -y --force-yes texlive-full - wget https://github.com/jgm/pandoc/releases/download/1.17.2/pandoc-1.17.2-1-amd64.deb - sudo dpkg -i pandoc-1.17.2-1-amd64.deb - sudo mkdir -p ~/R/Library From c19164e3824aec708d159f1cea4cf173d505b92c Mon Sep 17 00:00:00 2001 From: Mark van Driel Date: Wed, 12 Oct 2016 16:51:31 +0200 Subject: [PATCH 18/21] Removed trailing whitespace --- app/Lib/Rscripts/report.Rmd | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/app/Lib/Rscripts/report.Rmd b/app/Lib/Rscripts/report.Rmd index ff55c0f..5f8ab02 100644 --- a/app/Lib/Rscripts/report.Rmd +++ b/app/Lib/Rscripts/report.Rmd @@ -247,10 +247,10 @@ colours are based on multiple choice items with 3 or 4 answer options. bar_freq <- ggplot2::ggplot( correct_df, ggplot2::aes(item, correct_percentage, fill = perc_col)) - bar_freq <- - bar_freq + + bar_freq <- + bar_freq + # Create bar chart - geom_bar(stat = "identity") + + geom_bar(stat = "identity") + # Creat colour boundary scale_fill_gradient( low = "red", @@ -306,7 +306,7 @@ thus it is coloured *red*. *Between 0 and .05*, an item is coloured *orange*. bar_irc <- ggplot2::ggplot( correct_df, ggplot2::aes(item, corrected_item_tot_cor, fill = irc_col)) - bar_irc <- + bar_irc <- bar_irc + geom_bar(stat = "identity") + scale_fill_gradient( @@ -472,10 +472,7 @@ calc_n_plot <- function(ans_opt_df, key) { if (n_plots > 1) { for (i in 2:n_plots) { items_in_plot[[i]] <- unique( - ans_opt_df[ - (((i - 1) * answ_opts_per_plot) + 1):(i * answ_opts_per_plot), - 1 - ] + ans_opt_df[(((i - 1) * answ_opts_per_plot) + 1) : (i * answ_opts_per_plot), 1] ) if (items_in_plot[[i]][1] %in% items_in_plot[[i - 1]]) { items_in_plot[[i]] <- items_in_plot[[i]][-1] @@ -537,7 +534,7 @@ is coloured *orange* in between these two. fill = Correct, colour = perc_col) ) - bar_freq1 <- + bar_freq1 <- bar_freq1 + geom_bar(aes(x = Answer.Option), stat = "identity", size = 1.5) + # Plot items @@ -723,7 +720,7 @@ cum_hist <- function(student_scores, n_item) { df[(which(df[, 3] == 0, arr.ind = TRUE) + 1), 3] ) - g <- + g <- ggplot2::ggplot( data = df, ggplot2::aes(x = score, y = total) From 34b2f984eb166cffa367746949ec32c0773d4353 Mon Sep 17 00:00:00 2001 From: Mark van Driel Date: Wed, 12 Oct 2016 17:21:16 +0200 Subject: [PATCH 19/21] Pass template filename as variable to R-script --- app/Lib/Rscripts/report.R | 5 +++-- app/Model/Exam.php | 5 +++-- app/Test/Case/Lib/RserveTest.php | 4 +++- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/app/Lib/Rscripts/report.R b/app/Lib/Rscripts/report.R index 477040f..3cc88ca 100644 --- a/app/Lib/Rscripts/report.R +++ b/app/Lib/Rscripts/report.R @@ -1,4 +1,5 @@ -report <- function(filename, +report <- function(template, + filename, number_answeroptions, cronbach, frequency_answer_options, @@ -12,5 +13,5 @@ report <- function(filename, item_names, student_scores, categories) { - rmarkdown::render("/home/travis/build/nlware/qdnatool/app/Lib/Rscripts/report.Rmd", output_format = "pdf_document", output_file = filename) + rmarkdown::render(template, output_format = "pdf_document", output_file = filename) } diff --git a/app/Model/Exam.php b/app/Model/Exam.php index 40993f8..7a09672 100644 --- a/app/Model/Exam.php +++ b/app/Model/Exam.php @@ -611,13 +611,14 @@ protected function _report($exam) { $script[] = 'categories = c( );'; + $template = APP . 'Lib' . DS . 'Rscripts' . DS . 'report.Rmd'; $script[] = sprintf( 'report( ' . - '"%s", number_answeroptions, Cronbach, frequency_answer_options, percentage_answer_options, key, ' . + '"%s", "%s", number_answeroptions, Cronbach, frequency_answer_options, percentage_answer_options, key, ' . 'correct_frequency, correct_percentage, corrected_item_tot_cor, corrected_item_tot_cor_answ_option, "%s", ' . 'item_names, student_scores, categories' . ' );', - $tempFile, $exam['Exam']['name'] + $template, $tempFile, $exam['Exam']['name'] ); $script = implode("\n", $script); diff --git a/app/Test/Case/Lib/RserveTest.php b/app/Test/Case/Lib/RserveTest.php index c9f98be..2606173 100644 --- a/app/Test/Case/Lib/RserveTest.php +++ b/app/Test/Case/Lib/RserveTest.php @@ -200,12 +200,14 @@ public function testExecuteRscriptsAnalyseTooLittleStudents() { */ public function testExecuteRscriptsReport() { $filename = $this->__tmpPath . DS . 'testExecuteRscriptsReport.pdf'; + $template = APP . 'Lib' . DS . 'Rscripts' . DS . 'report.Rmd'; $this->assertFileNotExists($filename); $script = array(); $script[] = file_get_contents(APP . 'Lib' . DS . 'Rscripts' . DS . 'report.R'); $script[] = sprintf('filename = "%s";', $filename); + $script[] = sprintf('template = "%s";', $template); $script[] = 'number_students = 2;'; $script[] = 'number_answeroptions = c( 3, 3, 3 );'; $script[] = 'number_questions = 3;'; @@ -221,7 +223,7 @@ public function testExecuteRscriptsReport() { $script[] = 'item_names = c( "Item 1", "Item 2", "Item 3" );'; $script[] = 'student_scores = c( 7, 9 );'; $script[] = 'categories = c( );'; - $script[] = 'report( filename, number_answeroptions, cronbach, frequency_answer_options, percentage_answer_options, key, correct_frequency, + $script[] = 'report(template, filename, number_answeroptions, cronbach, frequency_answer_options, percentage_answer_options, key, correct_frequency, correct_percentage, corrected_item_tot_cor, corrected_item_tot_cor_answ_option, title, item_names, student_scores, categories );'; $script = implode("\n", $script); From 2f5d47985fc082a90af6f13089ae64f83f694ce6 Mon Sep 17 00:00:00 2001 From: Mark van Driel Date: Wed, 19 Oct 2016 12:19:03 +0200 Subject: [PATCH 20/21] Fixed Cs issues --- app/Lib/Rscripts/report.Rmd | 39 +++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/app/Lib/Rscripts/report.Rmd b/app/Lib/Rscripts/report.Rmd index 5f8ab02..a12ed13 100644 --- a/app/Lib/Rscripts/report.Rmd +++ b/app/Lib/Rscripts/report.Rmd @@ -88,12 +88,10 @@ create_item_list <- function(number_answeroptions, c(LETTERS[1:number_answeroptions[i]], "Missing"), c(frequency_answer_options[c(2:(number_answeroptions[i] + 1), 1), i]), c(percentage_answer_options[c(2:(number_answeroptions[i] + 1), 1), i]), - c(corrected_item_tot_cor_answ_option[ - c(2:(number_answeroptions[i] + 1), 1), - i - ]), + c(corrected_item_tot_cor_answ_option[c(2:(number_answeroptions[i] + 1), 1), i]), correct, - row.names = NULL) + row.names = NULL + ) colnames(item_list[[i]]) <- colnames2 } else { @@ -246,7 +244,8 @@ colours are based on multiple choice items with 3 or 4 answer options. bar_freq <- ggplot2::ggplot( correct_df, - ggplot2::aes(item, correct_percentage, fill = perc_col)) + ggplot2::aes(item, correct_percentage, fill = perc_col) + ) bar_freq <- bar_freq + # Create bar chart @@ -305,7 +304,8 @@ thus it is coloured *red*. *Between 0 and .05*, an item is coloured *orange*. # Create bar chart bar_irc <- ggplot2::ggplot( correct_df, - ggplot2::aes(item, corrected_item_tot_cor, fill = irc_col)) + ggplot2::aes(item, corrected_item_tot_cor, fill = irc_col) + ) bar_irc <- bar_irc + geom_bar(stat = "identity") + @@ -363,8 +363,8 @@ make_perc_col <- function(percentage, correct) { 1 / 75 * percentage[correct == "Correct"] - 7 / 30 ) ) - dif <- - percentage[correct == "Incorrect"] - min(percentage[correct == "Correct"]) + dif <- percentage[correct == "Incorrect"] - min(percentage[correct == "Correct"]) + perc_col[correct == "Incorrect"] <- ifelse( dif >= 0, 0, @@ -373,7 +373,7 @@ make_perc_col <- function(percentage, correct) { perc_col } -make_irc_col <- function(IRC, correct){ +make_irc_col <- function(IRC, correct) { # Calculate colours for the percentage answer options. # # Args: @@ -383,15 +383,15 @@ make_irc_col <- function(IRC, correct){ # Returns: # Value between 0 and 1. 0 corresponds to red and 1 to green in the plots. # For the correct answer options: - # < 0: red - # 0 - 0.05: orange - green-ish - # > 0.05: green + # * < 0: red + # * 0 - 0.05: orange - green-ish + # * > 0.05: green # For the incorrect answer options: difference between correct answer option # and incorrect answer option are calculated. Colours are based on this # difference: - # > 0: red - # -0.05 - 0: orange - green-ish - # < -0.05: red + # * > 0: red + # * -0.05 - 0: orange - green-ish + # * < -0.05: red irc_col <- numeric() irc_col[correct == "Correct"] <- ifelse( @@ -425,10 +425,7 @@ make_answ_df <- function(item_list, key, number_answeroptions, item_names) { for (i in 1:length(item_list)) { if (any(key[, i] != 0)) { - item_list[[i]]$perc_col <- with( - item_list[[i]], - make_perc_col(Percentage, Correct) - ) + item_list[[i]]$perc_col <- with(item_list[[i]], make_perc_col(Percentage, Correct)) item_list[[i]]$irc_col <- with(item_list[[i]], make_irc_col(IRC, Correct)) } } @@ -472,7 +469,7 @@ calc_n_plot <- function(ans_opt_df, key) { if (n_plots > 1) { for (i in 2:n_plots) { items_in_plot[[i]] <- unique( - ans_opt_df[(((i - 1) * answ_opts_per_plot) + 1) : (i * answ_opts_per_plot), 1] + ans_opt_df[(( ( i - 1) * answ_opts_per_plot) + 1) : (i * answ_opts_per_plot), 1] ) if (items_in_plot[[i]][1] %in% items_in_plot[[i - 1]]) { items_in_plot[[i]] <- items_in_plot[[i]][-1] From 727a9e3576d0e8e7ff147ed8b82ada8d9130dc61 Mon Sep 17 00:00:00 2001 From: Mark van Driel Date: Wed, 19 Oct 2016 13:20:38 +0200 Subject: [PATCH 21/21] Fixed Cs issues --- app/Lib/Rscripts/report.Rmd | 84 +++++++++++-------------------------- 1 file changed, 24 insertions(+), 60 deletions(-) diff --git a/app/Lib/Rscripts/report.Rmd b/app/Lib/Rscripts/report.Rmd index a12ed13..89c2ead 100644 --- a/app/Lib/Rscripts/report.Rmd +++ b/app/Lib/Rscripts/report.Rmd @@ -333,7 +333,6 @@ plot_overall_irc(correct_df) ```{r, echo = FALSE, eval = TRUE, warning= FALSE, fig.width = 11} -# Create extra variables for the colours in the bar plots make_perc_col <- function(percentage, correct) { # Calculate colours for the percentage answer options. # @@ -344,14 +343,14 @@ make_perc_col <- function(percentage, correct) { # Returns: # Value between 0 and 1. 0 corresponds to red and 1 to green in the plots. # For the correct options: - # < 40: red - # 40 - 70: orange - green-ish - # > 70: green + # * < 40: red + # * 40 - 70: orange - green-ish + # * > 70: green # For the incorrect options: difference between correct answer option and # incorrect answer options are calculated. Colours are based on difference: - # < -10: Green - # -10 - 0: orange - green-ish - # > 0: green + # * < -10: green + # * -10 - 0: orange - green-ish + # * > 0: green perc_col <- vector("numeric", length = length(correct)) perc_col[correct == "Correct"] <- ifelse( @@ -373,7 +372,7 @@ make_perc_col <- function(percentage, correct) { perc_col } -make_irc_col <- function(IRC, correct) { +make_irc_col <- function(irc, correct) { # Calculate colours for the percentage answer options. # # Args: @@ -395,14 +394,14 @@ make_irc_col <- function(IRC, correct) { irc_col <- numeric() irc_col[correct == "Correct"] <- ifelse( - IRC[correct == "Correct"] < 0, + irc[correct == "Correct"] < 0, 0, - ifelse(IRC[correct == "Correct"] > .05, + ifelse(irc[correct == "Correct"] > .05, 1, - 8 * IRC[correct == "Correct"] + 3 / 10 + 8 * irc[correct == "Correct"] + 3 / 10 ) ) - dif <- IRC[correct == "Incorrect"] - min(IRC[correct == "Correct"]) + dif <- irc[correct == "Incorrect"] - min(irc[correct == "Correct"]) irc_col[correct == "Incorrect"] <- ifelse( dif >= 0, 0, @@ -469,7 +468,7 @@ calc_n_plot <- function(ans_opt_df, key) { if (n_plots > 1) { for (i in 2:n_plots) { items_in_plot[[i]] <- unique( - ans_opt_df[(( ( i - 1) * answ_opts_per_plot) + 1) : (i * answ_opts_per_plot), 1] + ans_opt_df[( ( (i - 1) * answ_opts_per_plot) + 1) : (i * answ_opts_per_plot), 1] ) if (items_in_plot[[i]][1] %in% items_in_plot[[i - 1]]) { items_in_plot[[i]] <- items_in_plot[[i]][-1] @@ -615,10 +614,7 @@ this item incorrectly. In between these two, the option is coloured *orange*. bar_irc1 + geom_bar(aes(x = Answer.Option), stat = "identity", size = 1.5) + facet_grid(. ~ id, scales = "free_x", space = "free_x") + - scale_fill_manual( - values = c("Incorrect" = "Black", "Correct" = "White"), - guide = FALSE - ) + + scale_fill_manual(values = c("Incorrect" = "Black", "Correct" = "White"), guide = FALSE) + scale_colour_gradient( low = "red", high = "green", @@ -626,9 +622,7 @@ this item incorrectly. In between these two, the option is coloured *orange*. guide = FALSE ) + # Change y-axis limit to either the maximum and minimum IRC - coord_cartesian( - ylim = c(min(ans_opt_df$IRC) - .01, max(ans_opt_df$IRC) + .01) - ) + + coord_cartesian(ylim = c(min(ans_opt_df$IRC) - .01, max(ans_opt_df$IRC) + .01)) + labs(x = "Answer Options") + theme_bw() + theme( @@ -674,22 +668,13 @@ cum_hist <- function(student_scores, n_item) { } cesuurlijnen <- seq(0.2, 0.8, 0.2) * length(student_scores) - gridlijnen <- round( - seq(0, length(student_scores), length(student_scores) / 25), - 0 - ) + gridlijnen <- round(seq(0, length(student_scores), length(student_scores) / 25), 0) - gridlijnen <- gridlijnen[ - -grep(paste(round(cesuurlijnen, 0), collapse = "|"), gridlijnen) - ] + gridlijnen <- gridlijnen[grep(paste(round(cesuurlijnen, 0), collapse = "|"), gridlijnen)] heights <- heights[1:length(unique(student_scores))] percent <- heights / length(student_scores) * 100 - df <- data.frame( - score = sort(unique(student_scores)), - total = heights, - percent = round(percent, 2) - ) + df <- data.frame(score = sort(unique(student_scores)), total = heights, percent = round(percent, 2)) vec <- df[1, 1]:df[nrow(df), 1] # save all possible total scores @@ -709,19 +694,14 @@ cum_hist <- function(student_scores, n_item) { # if no one gets a certain total score, # replace the 0 with the next total score suppressWarnings( - df[which(df[, 2] == 0, arr.ind = TRUE), 2] <- - df[(which(df[, 2] == 0, arr.ind = TRUE) + 1), 2] + df[which(df[, 2] == 0, arr.ind = TRUE), 2] <- df[(which(df[, 2] == 0, arr.ind = TRUE) + 1), 2] ) suppressWarnings( - df[which(df[, 3] == 0, arr.ind = TRUE), 3] <- - df[(which(df[, 3] == 0, arr.ind = TRUE) + 1), 3] + df[which(df[, 3] == 0, arr.ind = TRUE), 3] <- df[(which(df[, 3] == 0, arr.ind = TRUE) + 1), 3] ) g <- - ggplot2::ggplot( - data = df, - ggplot2::aes(x = score, y = total) - ) + + ggplot2::ggplot(data = df, ggplot2::aes(x = score, y = total)) + geom_hline( yintercept = c(gridlijnen), linetype = "solid", @@ -736,15 +716,8 @@ cum_hist <- function(student_scores, n_item) { geom_bar(stat = "identity") + xlab("Total score") + ylab("% of students with score or higher") + - scale_y_continuous( - breaks = c(0, cesuurlijnen, length(student_scores)), - labels = c(seq(0, 100, 20)) - ) + - scale_x_continuous( - limits = c(df[1, 1] - 1, n_item + 1), - breaks = 0:n_item, - expand = c(0, 0) - ) + + scale_y_continuous(breaks = c(0, cesuurlijnen, length(student_scores)), labels = c(seq(0, 100, 20))) + + scale_x_continuous(limits = c(df[1, 1] - 1, n_item + 1), breaks = 0:n_item, expand = c(0, 0)) + theme_bw() + theme(panel.grid = element_blank()) @@ -785,22 +758,13 @@ if (!length(categories) == 0) { key[, items] ) - correct_df <- create_df( - item_names[items], - correct_percentage[items], - categories[[i]]$corrected_item_tot_cor - ) + correct_df <- create_df(item_names[items], correct_percentage[items], categories[[i]]$corrected_item_tot_cor) plot_overall_perc(correct_df) plot_overall_irc(correct_df) if (any(key[, items] != 0)) { - ans_opt_df <- make_answ_df( - item_list, - key[, items], - number_answeroptions[items], - item_names[items] - ) + ans_opt_df <- make_answ_df(item_list, key[, items], number_answeroptions[items], item_names[items]) calc_n_plot(ans_opt_df, key[, items]) plot_perc_answ(ans_opt_df, items_in_plot, key[, items]) plot_irc_answ(ans_opt_df, items_in_plot, key[, items])