From 46d342c3a33305805c93e1f8599241219649fcfd Mon Sep 17 00:00:00 2001
From: Mark van Driel <mvdriel@nlware.com>
Date: Wed, 3 Aug 2016 14:46:52 +0200
Subject: [PATCH 01/21] Added new R scripts

---
 app/Lib/Rscripts/report.R   | 740 +-----------------------------------
 app/Lib/Rscripts/report.Rmd | 649 +++++++++++++++++++++++++++++++
 2 files changed, 664 insertions(+), 725 deletions(-)
 create mode 100644 app/Lib/Rscripts/report.Rmd

diff --git a/app/Lib/Rscripts/report.R b/app/Lib/Rscripts/report.R
index 617e326..0c10111 100644
--- a/app/Lib/Rscripts/report.R
+++ b/app/Lib/Rscripts/report.R
@@ -1,727 +1,17 @@
-# Packages
-library(gplots)
-library(ggplot2)
-library(plyr)
-library(grid)
-# nolint start
-library(gridExtra)
-# nolint end
-
-report <- function(filename,
-                   number_students,
-                   number_answeroptions,
-                   number_questions,
-                   cronbach,
-                   frequency_answer_options,
-                   percentage_answer_options,
-                   input_correct,
+report <- function(file.name,
+                   title,
+                   number.answeroptions,
                    key,
-                   correct_frequency,
-                   correct_percentage,
-                   corrected_item_tot_cor,
-                   corrected_item_tot_cor_answ_option,
-                   title, item_names) {
-  # Creating results for each item
-  item_list <- list() # Creates list to put item output in
-  colnames1 <- c("Answer Option", "Frequency", "Percentage", "IRC")
-  colnames2 <- c("Answer Option", "Frequency", "Percentage", "IRC", "Correct")
-
-  # Filling a list with frequency, percentage and IRC for total and each answer options.
-  # every item gets a seperate data frame
-  # If no answeroptions are present, only the correct statistics are displayed.
-  # If there are more than 14 answer options, only the correct statistics are displayed to conserve space.
-  # This list is used for the first part of the output. This list is modified to a data frame to make the plots with
-  # the answer options.
-  for (i in 1:number_questions) {
-    if (number_answeroptions[i] > 0 & number_answeroptions[i] < 15) {
-      Correct <- rep("Incorrect", number_answeroptions[i] + 1)
-      Correct[which(key[, i] == 1)] <- "Correct"
-
-      # Frequency is also stored at this point, but not used.
-      # in case someone wants to alter the script to display the frequency instead of the percentage
-      item_list[[i]] <- data.frame(
-        c(LETTERS[1:number_answeroptions[i]], "Missing"),
-        c(frequency_answer_options[c(2:(number_answeroptions[i] + 1), 1), i]),
-        c(percentage_answer_options[c(2:(number_answeroptions[i] + 1), 1), i]),
-        c(corrected_item_tot_cor_answ_option[c(2:(number_answeroptions[i] + 1), 1), i]),
-        Correct,
-        row.names = NULL
-      )
-      colnames(item_list[[i]]) <- colnames2
-    } else {
-      item_list[[i]] <- data.frame("Total", correct_frequency[i], correct_percentage[i], corrected_item_tot_cor[i])
-      # Frequency is also stored at this point, but not used,
-      # in case someone wants to alter the script to display the frequency instead of the percentage
-      colnames(item_list[[i]]) <- colnames1
-    }
-  }
-
-  #Creating  item names and putting them in the list as names
-  items <- numeric()
-  for (i in 1:number_questions) {
-    items <- c(items, paste("Item", as.character(item_names[i]), sep = " "))
-  }
-
-  names(item_list) <- items
-
-  # Create extra variables to make the bar plots
-  item_list1 <- item_list
-  for (i in 1:number_questions) {
-    #Create right order on the x-axis (Missingness last)
-    if (any(key[, i] != 0) & number_answeroptions[i] < 15) {
-      item_list1[[i]]$Ans_Factor <- factor(
-        item_list1[[i]]$"Answer Option",
-        levels = c(LETTERS[1 : max(number_answeroptions)], "Missing")
-      )
-      # nolint start
-      item_list1[[i]]$"Col_scale" <- as.numeric(item_list1[[i]]$Correct) * 2 - 3
-      item_list1[[i]]$"IRC_col_scale" <- with(item_list1[[i]], IRC * Col_scale)
-      item_list1[[i]]$"Perc_col_scale" <- with(item_list1[[i]], Percentage)
-      # nolint end
-    }
-  }
-
-  #Create data frame of all the items which have answer options. This is used to make barplots per answer option
-  if (any(key != 0)) {
-    ans_opt_datafrm <- plyr::ldply(item_list1[number_answeroptions != 0 & number_answeroptions < 15], data.frame)
-    names(ans_opt_datafrm)[1] <- "id"
-    ans_opt_datafrm[, 2] <- gsub("Missing", "Mis", ans_opt_datafrm[, 2])
-    ans_opt_datafrm$Ans_Factor <- gsub("Missing", "Mis", ans_opt_datafrm$Ans_Factor)
-    ans_opt_datafrm$Ans_Factor <- factor(
-      ans_opt_datafrm$Ans_Factor,
-      levels = c(LETTERS[1 : max(number_answeroptions)], "Mis")
-    )
-    ans_opt_datafrm$id <- factor(ans_opt_datafrm$id, levels = items[number_answeroptions != 0])
-    ans_opt_datafrm$Perc_col_scale[ans_opt_datafrm$Correct == "Correct"] <-
-      100 - ans_opt_datafrm$Perc_col_scale[ans_opt_datafrm$Correct == "Correct"]
-
-    id2 <- as.numeric(ans_opt_datafrm$id)
-    ans_opt_datafrm <- cbind(ans_opt_datafrm, id2)
-  }
-
-  # Create a dataframe with only the correct statistics in it. This is used in the general item plots
-  dataframe_correct <- data.frame(
-    factor(1:number_questions),
-    correct_frequency,
-    correct_percentage,
-    corrected_item_tot_cor
-  )
-  names(dataframe_correct)[1] <- "item"
-
-  # Starting explanation
-  start_text <- paste(
-    "Number of students  : ", number_students, "\n",
-    "Number of questions : ", number_questions, "\n",
-    "Average score       : ", round(mean(rowSums(input_correct)), digits = 3), "\n",
-    "Standard deviation  : ", round(sd(rowSums(input_correct)), digits = 3), "\n",
-    "Cronbach's alpha    : ", cronbach, "\n",
-    "Standard error      : ", round(sd(rowSums(input_correct) * sqrt(1 - cronbach)), digits = 3), "\n",
-    sep = ""
-  )
-
-  explanation_items <- paste(
-    "Explanation Table", "\n", "\n",
-    "For each question the frequency, percentage and item rest correlations (IRC)", "\n",
-    "from every answer option are diplayed. The IRC should be (highly) positive", "\n",
-    "for the right answer option and low for the wrong answer option(s).", "\n",
-    sep = ""
-  )
-
-  pdf(file = filename, h = 8, w = 10)
-
-  # Textplot plots outside the normal plot window, therefore xpd = NA
-  par(xpd = NA, mar = rep(2, 4))
-
-  # If maximum 7 answer options 8 items per page
-  if (max(number_answeroptions) < 8) {
-    # Creating the first page
-    layout(matrix(c(1, 1, 1, 1, 1, 1,
-                    2, 2, 3, 3, 3, 3,
-                    2, 2, 3, 3, 3, 3,
-                    2, 2, 3, 3, 3, 3,
-                    4, 4, 4, 5, 5, 5,
-                    4, 4, 4, 5, 5, 5,
-                    4, 4, 4, 5, 5, 5,
-                    6, 6, 6, 7, 7, 7,
-                    6, 6, 6, 7, 7, 7,
-                    6, 6, 6, 7, 7, 7,
-                    8, 8, 8, 9, 9, 9,
-                    8, 8, 8, 9, 9, 9,
-                    8, 8, 8, 9, 9, 9), ncol = 6, byrow = TRUE))
-    # matrix(c(1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6), 3, 6, byrow = TRUE))
-
-    # Title
-    gplots::textplot(title, valign = "top", cex = 2)
-
-    # Add overall test info and explanation
-    gplots::textplot(start_text, halign = "left", valign = "top", cex = 1, mar = c(1, 5, 5, 1))
-    gplots::textplot(explanation_items, halign = "left", valign = "top", mar = c(1, 1, 5, 5))
-
-    # Display the first 6 items
-    if (number_questions < 7) {
-      for (i in 1:number_questions) {
-        gplots::textplot(item_list[[i]][, 1:4], show.rownames = FALSE, cex = 1, valign = "top", mar = c(5, 1, 0, 1))
-        title(items[i], line = 2)
-        # Adding highlighting box for the right answeroption
-        if (any(key[, i] == 1)) {
-          for (j in 1 : sum(key[, i] == 1)) {
-            rect(
-              .19,
-              .92 - which(key[, i] == 1)[j] * .11,
-              .85,
-              1.02 - which(key[, i] == 1)[j] * .11,
-              col = rgb(0, .9, 0, .5),
-              density = NA
-            )
-          }
-        }
-      }
-    } else {
-      for (i in 1:6) {
-        gplots::textplot(item_list[[i]][, 1:4], show.rownames = FALSE, cex = 1, valign = "top", mar = c(5, 1, 0, 1))
-        title(items[i], line = 2)
-        # Adding highlighting box for the right answeroption
-        if (any(key[, i] == 1)) {
-          for (j in 1 : sum(key[, i] == 1)) {
-            rect(
-              .19,
-              .92 - which(key[, i] == 1)[j] * .11,
-              .85,
-              1.02 - which(key[, i] == 1)[j] * .11,
-              col = rgb(0, .9, 0, .5),
-              density = NA
-            )
-          }
-        }
-      }
-
-      # Creating the item output on second page forward, only if more than 6 questions
-      layout(matrix(c(1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8), 4, 6, byrow = TRUE))
-      for (i in 7:number_questions) {
-        gplots::textplot(item_list[[i]][, 1:4], show.rownames = FALSE, cex = 1, valign = "top", mar = c(2, 1, 3, 1))
-        title(items[i], line = -0.85)
-
-        if (any(key[, i] == 1)) {
-          for (j in 1 : sum(key[, i] == 1)) {
-            rect(
-              .19,
-              .92 - which(key[, i] == 1)[j] * .096,
-              .85,
-              1.01 - which(key[, i] == 1)[j] * .096,
-              col = rgb(0, .9, 0, .5),
-              density = NA
-            )
-          }
-        }
-      }
-    }
-  } else {
-    # Creates PDF for more than 7 answeroptions, only 4 items per page are displayed
-    layout(matrix(c(1, 1, 1, 1, 1, 1,
-                    2, 2, 3, 3, 3, 3,
-                    2, 2, 3, 3, 3, 3,
-                    2, 2, 3, 3, 3, 3,
-                    4, 4, 4, 5, 5, 5,
-                    4, 4, 4, 5, 5, 5,
-                    4, 4, 4, 5, 5, 5,
-                    4, 4, 4, 5, 5, 5,
-                    6, 6, 6, 7, 7, 7,
-                    6, 6, 6, 7, 7, 7,
-                    6, 6, 6, 7, 7, 7,
-                    6, 6, 6, 7, 7, 7), ncol = 6, byrow = TRUE))
-
-    # Title
-    gplots::textplot(title, valign = "top", cex = 2)
-
-    # Introduction text
-    gplots::textplot(start_text, halign = "left", valign = "top", cex = 1, mar = c(1, 5, 5, 1))
-    gplots::textplot(explanation_items, halign = "left", valign = "top", mar = c(1, 1, 5, 5))
-
-    # Creating item output
-    for (i in 1:4) {
-      gplots::textplot(item_list[[i]][, 1:4], show.rownames = FALSE, cex = 1, valign = "top", mar = c(5, 1, 0, 1))
-      title(items[i], line = 1.8)
-
-      # Adding highlighting box for the right answeroption
-      if (any(key[, i] == 1) & number_answeroptions[i] < 15) {
-        for (j in 1 : sum(key[, i] == 1)) {
-          rect(
-            .19,
-            .95 - .065 * which(key[, i] == 1)[j],
-            .85,
-            1.01 - .065 * which(key[, i] == 1)[j],
-            col = rgb(0, .9, 0, .5),
-            density = NA
-          )
-        }
-      }
-    }
-
-    # Second page and further
-    layout(matrix(c(1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6), 3, 6, byrow = TRUE))
-
-    # Displaying the items
-    for (i in 4:number_questions) {
-      gplots::textplot(item_list[[i]][, 1:4], show.rownames = FALSE, cex = 1, valign = "top", mar = c(2, 1, 3, 1))
-      title(items[i], line = -.9)
-
-      # Adding highlighting box for the right answeroption
-      if (any(key[, i] == 1) &  number_answeroptions[i] < 15) {
-        for (j in 1 : sum(key[, i] == 1)) {
-          rect(
-            .19,
-            .95 - .065 * which(key[, i] == 1)[j],
-            .85,
-            1.01 - .065 * which(key[, i] == 1)[j],
-            col = rgb(0, .9, 0, .5),
-            density = NA
-          )
-        }
-      }
-    }
-  }
-
-  ### Frequency Plot for total items
-
-  # Create chart with Answer Option on x-axis and IRC on y-asix
-  bar_plot_freq <- ggplot2::ggplot(
-    dataframe_correct,
-    ggplot2::aes(item, correct_percentage, fill = correct_percentage)
-  )
-
-  bar_freq <-
-    bar_plot_freq +
-    # Create Bar chart
-    geom_bar(stat = "identity") +
-    # Create colour boundray: Green = "right" (low for wrong answer options, high for right answer options)
-    scale_fill_gradient2(low = "red", mid = "green", high = "red", midpoint = 50, limits = c(0, 100), name = "") +
-    # Change y-axis limit to constant
-    coord_cartesian(ylim = c(0, 100)) +
-    # Change titles and x axis name
-    labs(x = "Item", title = "Correct Percentage", y = "Percentage") +
-    theme(
-      strip.text.x = element_text(size = 7),
-      # Change font size of item names and Answer options
-      axis.text.x = element_text(size = 8),
-      axis.ticks.x = element_line(size = .4)
-    )
-
-  ### IRC Bar Plot for total items
-
-  # Create chart with Answer_Option on x-axis and IRC on y-asix
-  bar_plot_IRC <- ggplot2::ggplot(
-    dataframe_correct,
-    ggplot2::aes(item, corrected_item_tot_cor, fill = corrected_item_tot_cor)
-  )
-  bar_IRC <-
-    bar_plot_IRC +
-    # Create Bar chart
-    geom_bar(stat = "identity") +
-    # Create colour boundray: Green = "right" (low for wrong answer options, high for right answer options)
-    scale_fill_gradient(low = "red", high = "green", limits = c(-.1, .4), name = "") +
-    # coord_cartesian(ylim = c(, 1)) + # Change y-axis limit to constant
-    # Change titles and x axis name
-    labs(x = "Item", title = "Item Rest Correlations", y = "Item Rest Correlation") +
-    theme(
-      strip.text.x = element_text(size = 7),
-      # Change font size of item names and Answer options
-      axis.text.x = element_text(size = 8),
-      axis.ticks.x = element_line(size = .4)
-    )
-
-  # Calculating which questions are displayed on which plot.
-  # Determination rule: max 80 answer options per plot
-  if (any(key != 0)) {
-    tot_answer_options <- 0
-    Questions_p1 <- 0
-    Questions_p2 <- numeric()
-    Questions_p3 <- numeric()
-    Questions_p4 <- numeric()
-    questions_with_ans_opts <- nlevels(ans_opt_datafrm$id)
-
-    # Calculating which questions are on the first plot. Total answeroptions should be less than 100
-    while (tot_answer_options < 100 & Questions_p1 < questions_with_ans_opts) {
-      Questions_p1 <- Questions_p1 + 1
-      tot_answer_options <- tot_answer_options + sum(ans_opt_datafrm$id2 == Questions_p1)
-    }
-
-    # Calculating which questions are on the second plot. Total answeroptions should be less than 100
-    Questions_p2 <- Questions_p1
-    while (tot_answer_options < 200 & Questions_p2 < questions_with_ans_opts) {
-      Questions_p2 <- Questions_p2 + 1
-      tot_answer_options <- tot_answer_options + sum(ans_opt_datafrm$id2 == Questions_p2)
-    }
-
-    # Calculating which questions are on the third plot. Total answeroptions should be less than 100
-    Questions_p3 <- Questions_p2
-    while (tot_answer_options < 300 & Questions_p3 < questions_with_ans_opts) {
-      Questions_p3 <- Questions_p3 + 1
-      tot_answer_options <- tot_answer_options + sum(ans_opt_datafrm$id2 == Questions_p3)
-    }
-
-    # Emptying the plots if no questions are present for that plot
-    Questions_p4 <- ifelse(Questions_p3 < questions_with_ans_opts, questions_with_ans_opts, 0)
-    Questions_p3 <- ifelse(Questions_p3 != Questions_p2, Questions_p3, 0)
-    Questions_p2 <- ifelse(Questions_p2 != Questions_p1, Questions_p2, 0)
-
-    # Creating IRC plots 16 items per plot.
-    # The only difference between these for codes is in the item selection on the first row
-    # ([1:Questions_p1] in the first case
-
-    bar_plot_freq1 <- ggplot2::ggplot(
-      # Create subset of first 16 questions
-      ans_opt_datafrm[ans_opt_datafrm$id %in% c(levels(ans_opt_datafrm$id)[1:Questions_p1]), ],
-      # Create chart with Answer Option on x-axis and IRC on y-asix
-      ggplot2::aes("Answer Option", Percentage, fill = Correct, colour = Perc_col_scale)
-    )
-    bar_freq1 <-
-      bar_plot_freq1 +
-      # Create Bar chart
-      geom_bar(ggplot2::aes(x = Ans_Factor), stat = "identity") +
-      # Display the different items
-      facet_grid(. ~ id, scales = "free_x", space = "free_x") +
-      # Fill in the bars: Green right answer options, Red wrong answer options
-      scale_fill_manual(values = c("Incorrect" = "Red", "Correct" = "Green"), guide = FALSE) +
-      # Create colour boundray: Green = "right" (low for wrong answer options, high for right answer options)
-      scale_colour_gradient(low = "green", high = "red", guide = FALSE) +
-      # Change y-axis limit to constant
-      coord_cartesian(ylim = c(0, 100)) +
-      # Change titles and x axis name
-      labs(
-        x = "Answer Options",
-        title = paste(
-          "Percentage chart(s) per question and per answer options. The green bars represent the right answer ",
-          "options.",
-          "\n",
-          "The color of the border represents the desirability (50% for the right answer options, low for the wrong ",
-          "answer options)",
-          sep = ""
-        )
-      ) +
-      theme(
-        strip.text.x = element_text(size = 7),
-        # Change font size of item names and Answer options
-        axis.text.x = element_text(size = 4.8),
-        axis.ticks.x = element_line(size = .1),
-        title = element_text(size = 8)
-      )
-
-    if (Questions_p2 != 0) {
-      bar_plot_freq2 <- ggplot2::ggplot(
-        # Create subset of the other questions
-        ans_opt_datafrm[ans_opt_datafrm$id %in% c(levels(ans_opt_datafrm$id)[(Questions_p1 + 1) : Questions_p2]), ],
-        # Create chart with Answer Option on x-axis and IRC on y-asix
-        ggplot2::aes("Answer Option", Percentage, fill = Correct, colour = Perc_col_scale)
-      )
-      bar_freq2 <-
-        bar_plot_freq2 +
-        # Create Bar chart
-        geom_bar(ggplot2::aes(x = Ans_Factor), stat = "identity") +
-        # Display the different items
-        facet_grid(. ~ id, scales = "free_x", space = "free_x") +
-        # Fill in the bars: Green right answer options, Red wrong answer options
-        scale_fill_manual(values = c("Incorrect" = "Red", "Correct" = "Green"), guide = FALSE) +
-        # Create colour boundray: Green = "right" (low for wrong answer options, high for right answer options)
-        scale_colour_gradient(low = "green", high = "red", guide = FALSE) +
-        # Change y-axis limit to constant
-        coord_cartesian(ylim = c(0, 100)) +
-        # Change titles and x axis name
-        labs(x = "Answer Options", title = "Item") +
-        theme(
-          strip.text.x = element_text(size = 7),
-          # Change font size of item names and Answer options
-          axis.text.x = element_text(size = 4.8),
-          axis.ticks.x = element_line(size = .1)
-        )
-    }
-
-    if (Questions_p3 != 0) {
-      bar_plot_freq3 <- ggplot2::ggplot(
-        # Create subset of the other questions
-        ans_opt_datafrm[ans_opt_datafrm[, 1] %in% c(levels(ans_opt_datafrm$id)[(Questions_p2 + 1) : Questions_p3]), ],
-        # Create chart with Answer Option on x-axis and IRC on y-asix
-        ggplot2::aes("Answer Option", Percentage, fill = Correct, colour = Perc_col_scale)
-      )
-      bar_freq3 <-
-        bar_plot_freq3 +
-        # Create Bar chart
-        geom_bar(ggplot2::aes(x = Ans_Factor), stat = "identity") +
-        # Display the different items
-        facet_grid(. ~ id, scales = "free_x", space = "free_x") +
-        # Fill in the bars: Green right answer options, Red wrong answer options
-        scale_fill_manual(values = c("Incorrect" = "Red", "Correct" = "Green"), guide = FALSE) +
-        # Create colour boundray: Green = "right" (low for wrong answer options, high for right answer options)
-        scale_colour_gradient(low = "green", high = "red", guide = FALSE) +
-        # Change y-axis limit to constant
-        coord_cartesian(ylim = c(0, 100)) +
-        # Change titles and x axis name
-        labs(x = "Answer Options", title = "Item") +
-        theme(
-          strip.text.x = element_text(size = 7),
-          # Change font size of item names and Answer options
-          axis.text.x = element_text(size = 4.8),
-          axis.ticks.x = element_line(size = .1)
-        )
-    }
-
-    if (Questions_p4 != 0) {
-      bar_plot_freq4 <- ggplot2::ggplot(
-        # Create subset of the other questions
-        ans_opt_datafrm[ans_opt_datafrm[, 1] %in% c(levels(ans_opt_datafrm$id)[(Questions_p3 + 1) : Questions_p4]), ],
-        # Create chart with Answer Option on x-axis and IRC on y-asix
-        ggplot2::aes("Answer Option", Percentage, fill = Correct, colour = Perc_col_scale)
-      )
-      bar_freq4 <-
-        bar_plot_freq4 +
-        # Create Bar chart
-        geom_bar(ggplot2::aes(x = Ans_Factor), stat = "identity") +
-        # Display the different items
-        facet_grid(. ~ id, scales = "free_x", space = "free_x") +
-        # Fill in the bars: Green right answer options, Red wrong answer options
-        scale_fill_manual(values = c("Incorrect" = "Red", "Correct" = "Green"), guide = FALSE) +
-        # Create colour boundray: Green = "right" (low for wrong answer options, high for right answer options)
-        scale_colour_gradient(low = "green", high = "red", guide = FALSE) +
-        # Change y-axis limit to constant
-        coord_cartesian(ylim = c(0, 100)) +
-        # Change titles and x axis name
-        labs(x = "Answer Options", title = "Item") +
-        theme(
-          strip.text.x = element_text(size = 7),
-          # Change font size of item names and Answer options
-          axis.text.x = element_text(size = 4.8),
-          axis.ticks.x = element_line(size = .1)
-        )
-    }
-
-    # Creating IRC plots 16 items per plot.
-    # The only difference between these for codes is in the item selection on the first row
-    # ([1:Questions_p1] in the first case)
-
-    bar_plot_IRC1 <- ggplot2::ggplot(
-      # Create subset of first 16 questions
-      ans_opt_datafrm[ans_opt_datafrm$id %in% c(levels(ans_opt_datafrm$id)[1:Questions_p1]), ],
-      # Create chart with Answer Option on x-axis and IRC on y-asix
-      ggplot2::aes("Answer Option", IRC, fill = Correct, colour = IRC_col_scale)
-    )
-    bar_IRC1 <-
-      bar_plot_IRC1 +
-      # Create Bar chart
-      geom_bar(ggplot2::aes(x = Ans_Factor), stat = "identity") +
-      # Display the different items
-      facet_grid(. ~ id, scales = "free_x", space = "free_x") +
-      # Fill in the bars: Green right answer options, Red wrong answer options
-      scale_fill_manual(values = c("Incorrect" = "Red", "Correct" = "Green"), guide = FALSE) +
-      # Create colour boundray: Green = "right" (low for wrong answer options, high for right answer options)
-      scale_colour_gradient(low = "green", high = "red", guide = FALSE) +
-      # Change y-axis limit to constant
-      coord_cartesian(ylim = c(-.3, .4)) +
-      # Change titles and x axis name
-      labs(
-        x = "Answer Options",
-        title = paste(
-          "IRC chart(s) per question and per answer options. The green bars represent the right answer options.",
-          "\n",
-          "The color of the border represents the desirability (high for the right answer options, low for the ",
-          "wrong answer options)",
-          sep = ""
-        )
-      ) +
-      theme(
-        strip.text.x = element_text(size = 7),
-        # Change font size of item names and Answer options
-        axis.text.x = element_text(size = 4.8),
-        axis.ticks.x = element_line(size = .1),
-        title = element_text(size = 8)
-      )
-
-    if (Questions_p2 != 0) {
-      bar_plot_IRC2 <- ggplot2::ggplot(
-        # Create subset of first 16 questions
-        ans_opt_datafrm[ans_opt_datafrm$id %in% c(levels(ans_opt_datafrm$id)[(Questions_p1 + 1) : Questions_p2]), ],
-        # Create chart with Answer Option on x-axis and IRC on y-asix
-        ggplot2::aes("Answer Option", IRC, fill = Correct, colour = IRC_col_scale)
-      )
-      bar_IRC2 <-
-        bar_plot_IRC2 +
-        # Create Bar chart
-        geom_bar(ggplot2::aes(x = Ans_Factor), stat = "identity") +
-        # Display the different items
-        facet_grid(. ~ id, scales = "free_x", space = "free_x") +
-        # Fill in the bars: Green right answer options, Red wrong answer options
-        scale_fill_manual(values = c("Incorrect" = "Red", "Correct" = "Green"), guide = FALSE) +
-        # Create colour boundray: Green = "right" (low for wrong answer options, high for right answer options)
-        scale_colour_gradient(low = "green", high = "red", guide = FALSE) +
-        # Change y-axis limit to constant
-        coord_cartesian(ylim = c(-.3, .4)) +
-        # Change titles and x axis name
-        labs(x = "Answer Options", title = "Item") +
-        theme(
-          strip.text.x = element_text(size = 7),
-          # Change font size of item names and Answer options
-          axis.text.x = element_text(size = 4.8),
-          axis.ticks.x = element_line(size = .1)
-        )
-    }
-
-    if (Questions_p3 != 0) {
-      bar_plot_IRC3 <- ggplot2::ggplot(
-        # Create subset of first 16 questions
-        ans_opt_datafrm[ans_opt_datafrm$id %in% c(levels(ans_opt_datafrm$id)[(Questions_p2 + 1) : Questions_p3]), ],
-        # Create chart with Answer Option on x-axis and IRC on y-asix
-        ggplot2::aes("Answer Option", IRC, fill = Correct, colour = IRC_col_scale)
-      )
-      bar_IRC3 <-
-        bar_plot_IRC3 +
-        # Create Bar chart
-        geom_bar(ggplot2::aes(x = Ans_Factor), stat = "identity") +
-        # Display the different items
-        facet_grid(. ~ id, scales = "free_x", space = "free_x") +
-        # Fill in the bars: Green right answer options, Red wrong answer options
-        scale_fill_manual(values = c("Incorrect" = "Red", "Correct" = "Green"), guide = FALSE) +
-        # Create colour boundray: Green = "right" (low for wrong answer options, high for right answer options)
-        scale_colour_gradient(low = "green", high = "red", guide = FALSE) +
-        # Change y-axis limit to constant
-        coord_cartesian(ylim = c(-.3, .4)) +
-        # Change titles and x axis name
-        labs(x = "Answer Options", title = "Item") +
-        theme(
-          strip.text.x = element_text(size = 7),
-          # Change font size of item names and Answer options
-          axis.text.x = element_text(size = 4.8),
-          axis.ticks.x = element_line(size = .1)
-        )
-    }
-
-    if (Questions_p4 != 0) {
-      bar_plot_IRC4 <- ggplot2::ggplot(
-        # Create subset of first 16 questions
-        ans_opt_datafrm[ans_opt_datafrm$id %in% c(levels(ans_opt_datafrm$id)[(Questions_p3 + 1) : Questions_p4]), ],
-        # Create chart with Answer Option on x-axis and IRC on y-asix
-        ggplot2::aes("Answer Option", IRC, fill = Correct, colour = IRC_col_scale)
-      )
-      bar_IRC4 <-
-        bar_plot_IRC4 +
-        # Create Bar chart
-        geom_bar(ggplot2::aes(x = Ans_Factor), stat = "identity") +
-        # Display the different items
-        facet_grid(. ~ id, scales = "free_x", space = "free_x") +
-        # Fill in the bars: Green right answer options, Red wrong answer options
-        scale_fill_manual(values = c("Incorrect" = "Red", "Correct" = "Green"), guide = FALSE) +
-        # Create colour boundray: Green = "right" (low for wrong answer options, high for right answer options)
-        scale_colour_gradient(low = "green", high = "red", guide = FALSE) +
-        # Change y-axis limit to constant
-        coord_cartesian(ylim = c(-.3, .4)) +
-        # Change titles and x axis name
-        labs(x = "Answer Options", title = "Item") +
-        theme(
-          strip.text.x = element_text(size = 7),
-          axis.text.x = element_text(size = 4.8), # Change font size of item names and Answer options
-          axis.ticks.x = element_line(size = .1)
-        )
-    }
-  }
-
-  # Creating the bar plots. Depending on the amount of plots, different arranges are made.
-  suppressWarnings(
-    if (any(key != 0)) {
-      if (Questions_p2 == 0) {
-        # Only 1 Answer Option plot --> all plots on 1 page
-
-        grid::grid.newpage()
-        grid::pushViewport(grid::viewport(layout = grid::grid.layout(4, 1)))
-        vplayout <- function(x, y) {
-          grid::viewport(layout.pos.row = x, layout.pos.col = y)
-        }
-        print(bar_freq, vp = vplayout(1, 1))
-        print(bar_IRC, vp = vplayout(2, 1))
-        print(bar_freq1, vp = vplayout(3, 1))
-        print(bar_IRC1, vp = vplayout(4, 1))
-      }
-
-      if (Questions_p3 == 0 & Questions_p2 != 0) {
-        # 2 Answer Option plots --> 2 pages of plot output
-
-        grid::grid.newpage()
-        grid::pushViewport(grid::viewport(layout = grid::grid.layout(3, 1)))
-        vplayout <- function(x, y) {
-          grid::viewport(layout.pos.row = x, layout.pos.col = y)
-        }
-        print(bar_freq, vp = vplayout(1, 1))
-        print(bar_IRC, vp = vplayout(2, 1))
-        print(bar_freq1, vp = vplayout(3, 1))
-
-        grid::grid.newpage()
-        grid::pushViewport(grid::viewport(layout = grid::grid.layout(3, 1)))
-
-        print(bar_freq2, vp = vplayout(1, 1))
-        print(bar_IRC1, vp = vplayout(2, 1))
-        print(bar_IRC2, vp = vplayout(3, 1))
-      }
-
-      if (Questions_p4 == 0 & Questions_p3 != 0) {
-        # 3 Answer Option plots --> 2 pages of plot output
-
-        grid::grid.newpage()
-        grid::pushViewport(grid::viewport(layout = grid::grid.layout(2, 1)))
-        vplayout <- function(x, y) {
-          grid::viewport(layout.pos.row = x, layout.pos.col = y)
-        }
-        print(bar_freq, vp = vplayout(1, 1))
-        print(bar_IRC, vp = vplayout(2, 1))
-
-        grid::grid.newpage()
-        grid::pushViewport(grid::viewport(layout = grid::grid.layout(3, 1)))
-
-        print(bar_freq1, vp = vplayout(1, 1))
-        print(bar_freq2, vp = vplayout(2, 1))
-        print(bar_freq3, vp = vplayout(3, 1))
-
-        grid::grid.newpage()
-        grid::pushViewport(grid::viewport(layout = grid::grid.layout(3, 1)))
-
-        print(bar_IRC1, vp = vplayout(1, 1))
-        print(bar_IRC2, vp = vplayout(2, 1))
-        print(bar_IRC3, vp = vplayout(3, 1))
-      }
-
-      if (Questions_p4 != 0) {
-        # 4 Answer options plots --> 3 pages of plot output
-
-        grid::grid.newpage()
-        grid::pushViewport(grid::viewport(layout = grid::grid.layout(2, 1)))
-        vplayout <- function(x, y) {
-          grid::viewport(layout.pos.row = x, layout.pos.col = y)
-        }
-        print(bar_freq, vp = vplayout(1, 1))
-        print(bar_IRC, vp = vplayout(2, 1))
-
-        grid::grid.newpage()
-        grid::pushViewport(grid::viewport(layout = grid::grid.layout(4, 1)))
-
-        print(bar_freq1, vp = vplayout(1, 1))
-        print(bar_freq2, vp = vplayout(2, 1))
-        print(bar_freq3, vp = vplayout(3, 1))
-        print(bar_freq4, vp = vplayout(4, 1))
-
-        grid::grid.newpage()
-        grid::pushViewport(grid::viewport(layout = grid::grid.layout(4, 1)))
-
-        print(bar_IRC1, vp = vplayout(1, 1))
-        print(bar_IRC2, vp = vplayout(2, 1))
-        print(bar_IRC3, vp = vplayout(3, 1))
-        print(bar_IRC4, vp = vplayout(4, 1))
-      }
-    } else {
-      # Plot if no answer options are present
-
-      grid::grid.newpage()
-      grid::pushViewport(grid::viewport(layout = grid::grid.layout(2, 1)))
-      vplayout <- function(x, y) {
-        grid::viewport(layout.pos.row = x, layout.pos.col = y)
-      }
-      print(bar_freq, vp = vplayout(1, 1))
-      print(bar_IRC, vp = vplayout(2, 1))
-    }
-  )
-  dev.off()
+                   item.names,
+                   correct.frequency,
+                   correct.percentage,
+                   frequency.answer.options,
+                   percentage.answer.options,
+                   corrected.item.tot.cor,
+                   corrected.item.tot.cor.answ.option,
+                   cronbach,
+                   student.scores,
+                   categories) {
+  rmarkdown::render("report.Rmd", output_format = "html_document", output_file = paste0(file.name,".html"))
+  rmarkdown::render("report.Rmd", output_format = "pdf_document", output_file = paste0(file.name,".pdf"))
 }
diff --git a/app/Lib/Rscripts/report.Rmd b/app/Lib/Rscripts/report.Rmd
new file mode 100644
index 0000000..986124b
--- /dev/null
+++ b/app/Lib/Rscripts/report.Rmd
@@ -0,0 +1,649 @@
+---
+html_document: null
+fig_width: 11
+classoption: landscape
+keep_md: yes
+output: html_document
+pdf_document: default
+---
+
+```{r, echo = FALSE, warning = FALSE, message = FALSE}
+library(ggplot2)
+library(pander)
+library(plyr)
+```
+
+
+```{r, echo = FALSE, eval = TRUE, results = "asis"}
+
+CreateItemList <- function(number.answeroptions, correct.frequency,
+  correct.percentage, frequency.answer.options, percentage.answer.options,
+  corrected.item.tot.cor, corrected.item.tot.cor.answ.option, item.names, key) {
+  # Creates a list with frequency, percentage correct, and IRC for every item.
+  #
+  # Args:
+  #  number.answeroptions: Vector with number of answer options per item
+  #  frequency.answer.options: Matrix with [i,j] the frequency of
+  #    answer option i of item j
+  #  percentage.answer.options: Matrix with [i,j] the percentage of
+  #    answer option i of item j
+  #  corrected.item.tot.cor.answ.option: Matrix with [i,j] the item total
+  #    correlation for answer option i of item j
+  #  correct.frequency: Vector with total number correct per item
+  #  correct.percentage: Vector with percentage correct per item
+  #  corrected.item.tot.cor: Vector with item rest correlation per item
+  #
+  # Returns:
+  #  List with one entry per item. Each entry consists of a data frame with
+  #  frequency, percentage and IRC for the total item and the answer options
+  #  (if the item has answer options)
+
+  item.list <- list() #  Creates list to put item output in
+  colnames1 <- c(" ", "Frequency", "Percentage", "IRC")
+  colnames2 <- c("Answer Option", "Frequency", "Percentage", "IRC", "Correct")
+
+  # For every item, create entry in list with frequencies, percentages, and IRCs
+  for (i in 1:length(item.names)) {
+    if (number.answeroptions[i] > 0) {
+      correct <- c(ifelse(key[1:number.answeroptions[i], i] == 1,
+                          "Correct", "Incorrect"), "Incorrect")
+
+      item.list[[i]] <- data.frame(
+        c(LETTERS[1:number.answeroptions[i]], "Missing"),
+        c(frequency.answer.options[c(2: (number.answeroptions[i] + 1), 1), i]),
+        c(percentage.answer.options[c(2: (number.answeroptions[i] + 1), 1), i]),
+        c(corrected.item.tot.cor.answ.option[c(2: (number.answeroptions[i] + 1),
+                                               1), i]),
+        correct,
+        row.names = NULL)
+      colnames(item.list[[i]]) <- colnames2
+
+    } else {
+      item.list[[i]] <- data.frame("Correct", correct.frequency[i],
+                                    correct.percentage[i],
+                                    corrected.item.tot.cor[i])
+      colnames(item.list[[i]]) <- colnames1
+     }
+  }
+
+  names(item.list) <- item.names #  Attach item names
+  return(item.list)
+}
+
+# Use the function defined above
+item.list <- CreateItemList(number.answeroptions, correct.frequency,
+  correct.percentage, frequency.answer.options, percentage.answer.options,
+  corrected.item.tot.cor, corrected.item.tot.cor.answ.option, item.names, key)
+
+cat(paste("#", title))
+```
+
+# General
+
+What | Result |
+:---------------------- | :------------- |
+Number of students | `r length(student.scores)`|
+Number of items | `r length(item.names)`|
+Average score | `r round(mean(student.scores), digits = 3)`|
+Standard deviation | `r round(sd(student.scores), digits = 3)`| 
+Cronbach's alpha | `r cronbach`| 
+Standard error | `r round(sd(student.scores * sqrt(1 - cronbach)), digits = 3)`| 
+
+
+# Item statistics
+
+For each item the frequency, percentage and item rest correlations (IRC) of 
+every answer option are shown The IRC should be (highly) positive for the 
+right answer option and low for the wrong answer option(s).
+
+```{r pander, results = "asis", eval = TRUE, echo = FALSE, warning = FALSE, message = FALSE}
+
+pander.table <- function(item.list, key) {
+  # Print table for each item with frequency, percentage and the
+  # item-rest correlation. Correct answers are highlighted in bold
+  #
+  # Args:
+  #  item.list: list with one entry per item. Each entry consists of a data frame
+  #             Data frame has a row for every answer option + one for missing.
+  #             Columns are: Answer option, frequency, percentage, IRC,
+  #                          and Correct (answer option is Correct/Incorrect)
+  #            Length should be the same as number of columns of key
+  #  key: Matrix of 0's and 1's. key[i,j] implies wether answer option i
+  #       to item j is right (1) or wrong (0). If a column (item) consists of
+  #       only 0s, the item is interpreted as graded manually.
+  #       Number of columns should be equal to length of item.list
+
+  panderOptions("knitr.auto.asis", FALSE)
+  for (i in 1:length(item.list)) {
+    if (any(key[, i] == 1)) {
+      item.list[[i]]$Correct <- revalue(item.list[[i]]$Correct,
+                                        c("Correct" = "X", "Incorrect" = ""))
+      emphasize.strong.rows(which(key[, i] == 1, arr.ind = TRUE))
+      pander(item.list[i])
+    } else {
+      t <- item.list[i]
+      pander(t)
+    }
+  }
+}
+
+pander.table(item.list, key)
+```
+
+\pagebreak
+
+```{r, echo = FALSE, message = FALSE, warning = FALSE, fig.width = 11, results = 'asis'}
+
+CreateDf <- function(item.names, correct.percentage, corrected.item.tot.cor) {
+  # Creates a data frame with the overall item statistics including a colour
+  # indicator for the percentage correct and IRC. Colours is defined below.
+  #
+  # Args:
+  #  These are the same as defined in CreateItemList
+  #
+  # Returns:
+  #  Data frame with a row for every item. Columns consist of the item name,
+  #  percentage correct, colour indicator for percentage, IRC and colour
+  #  indicator for the IRC.
+
+  correct.df <- data.frame(item = factor(item.names, item.names),
+    correct.percentage,
+    perc.col = ifelse(correct.percentage < 40, 0,
+                      ifelse(correct.percentage > 70, 1,
+                             1 / 75 * correct.percentage - 7 / 30)),
+
+    corrected.item.tot.cor,
+    irc.col = ifelse(corrected.item.tot.cor < 0, 0,
+                     ifelse(corrected.item.tot.cor > .05, 1,
+                            8 * corrected.item.tot.cor + 3 / 10))
+  )
+
+  return(correct.df)
+}
+
+PercPlotOverall <- function(correct.df) {
+  # Plots bar graph of percentage correct of every item. Bars are coloured
+  # depending on the percentage correct:
+  # < 40: Red
+  # 40 - 70: Orange till green-ish
+  # > 70: Green
+  #
+  # Args:
+  #  correct.df: data frame with item name, percentage correct and colour code
+  #  for the percentages.
+  #
+  # Returns:
+  #  Explanation of the plot and the plot itself
+
+  cat("
+
+## Percentage Correct
+
+Plot of percentage correct per item. If *more than 70%* of the students answer
+an item correctly, the students have mastered the material well enough, thus it
+is coloured *green*. If *less than 40%* of the students answer an item
+correctly, the students have not mastered the material, and is coloured *red*.
+*Orange* indicates the percentage correct lies *between 40% and 70%*. The
+colours are based on multiple choice items with 3 or 4 answer options.
+
+")
+
+  bar.freq <- ggplot(correct.df, aes(item, correct.percentage, fill = perc.col))
+  bar.freq <- bar.freq + geom_bar(stat = "identity") + #  Create Bar chart
+    scale_fill_gradient(low = "red", high = "green", limits = c(0, 1),
+                        guide = FALSE)  + #  Create colour boundray
+    coord_cartesian(ylim = c(0, 100)) + #  Change y-axis limit to constant
+    labs(x = "Item", y = "Percentage Correct") + #  x-axis name
+    theme_bw() + #  Black and white
+    theme(axis.text.x = element_text(size = 8, angle = 90), #  Change font size
+          axis.ticks.x = element_line(size = .4))
+
+  suppressWarnings(print(bar.freq))
+}
+
+IRCPlotOverall <- function(correct.df) {
+  # Plots bar graph of IRC of every item. Bars are coloured by IRC:
+  # < 0: Red
+  # 0 - .10: Orange till green-ish
+  # > .10: green
+  #
+  # Args:
+  #  correct.df: data frame with item name, percentage correct and colour code
+  #  for the percentages.
+  #
+  # Returns:
+  #  Explanation of the plot and the plot itself
+
+  cat("\\pagebreak
+
+## Item rest correlations
+
+Plot of Item Rest Correlations per item: The correlation between an item (0 for
+wrong answers and 1 for right answers) and the sum of all other items. Items
+with a correlation *higher than .05* indicate that students who answered this
+item correctly answered the other items more often correctly than students who
+answered this item incorrectly. Thus it is coloured *green*. Items with a
+correlation *lower than 0* indicate no relationship between this item and the
+other items, or that students who answered this item correctly answered the
+other items more often incorrectly than students who answered this incorrectly,
+thus it is coloured *red*. *Between 0 and .05*, an item is coloured *orange*.
+
+")
+
+  # Create bar chart
+  bar.IRC <- ggplot(correct.df, aes(item, corrected.item.tot.cor,
+                                    fill = irc.col))
+  bar.IRC <-  bar.IRC + geom_bar(stat = "identity") + #  Create Bar chart
+    scale_fill_gradient(low = "red", high = "green", limits = c(0, 1),
+                        guide = FALSE) +
+    theme_bw() +
+    labs(x = "Item", y = "Item Rest Correlation") + #  axes names
+    theme(strip.text.x = element_text(size = 7),
+          axis.text.x = element_text(size = 8, angle = 90),
+          axis.ticks.x = element_line(size = .4))
+
+  suppressWarnings(print(bar.IRC))
+}
+
+correct.df <- CreateDf(item.names, correct.percentage, corrected.item.tot.cor)
+PercPlotOverall(correct.df)
+IRCPlotOverall(correct.df)
+```
+
+```{r, echo = FALSE, eval = TRUE, warning= FALSE, fig.width = 11}
+
+# Create extra variables for the colours in the bar plots
+PercCol <- function(percentage, correct) {
+  # Calculate colours for the percentage answer options.
+  # Args:
+  #  percentage: Percentage answered per answer options
+  #  correct: Vector of "Correct" and "Incorrect" of same length of percentage
+  #
+  # Returns:
+  #  Value between 0 and 1. 0 corresponds to red and 1 to green in the plots.
+  #  For the correct options:
+  #    < 40: red
+  #    40 - 70: organge - green-ish
+  #    > 70: green
+  #  For the incorrect options: difference between correct answer option and
+  #  incorrect answer options are calculated. Colours are based on difference:
+  #    < -10: Green
+  #    -10 - 0: orange - green-ish
+  #    > 0: green
+
+  perc.col <- vector("numeric", length = length(correct))
+  perc.col[correct == "Correct"] <- ifelse(percentage[correct == "Correct"]
+    < 40, 0, ifelse(percentage[correct == "Correct"] > 70, 1,
+                    1 / 75 * percentage[correct == "Correct"] - 7 / 30))
+  dif <- percentage[correct == "Incorrect"] -
+                    min(percentage[correct == "Correct"])
+  perc.col[correct == "Incorrect"] <- ifelse(dif >= 0, 0,
+                                      ifelse(dif < -10, 1,
+                                             -1 / 25 * dif + 3 / 10))
+  perc.col
+}
+
+IrcCol <- function(IRC, correct){
+  # Calculate colours for the percentage answer options.
+  #
+  # Args:
+  #  IRC: IRC per answer options. Same length as correct
+  #  correct: Vector of "Correct" and "Incorrect" of same length of IRC
+  #
+  # Returns:
+  #  Value between 0 and 1. 0 corresponds to red and 1 to green in the plots.
+  #  For the correct answer options:
+  #    < 0: red
+  #    0 - 0.05: orange - green-ish
+  #    > 0.05: green
+  # For the incorrect answer options: difference between correct answer option
+  # and incorrect answer option are calculated. Colours are based on this
+  # difference:
+  #    > 0: red
+  #    -0.05 - 0: orange - green-ish
+  #    < -0.05: red
+
+  IRC.col <- numeric()
+  IRC.col[correct == "Correct"] <- ifelse(IRC[correct == "Correct"] < 0, 0,
+    ifelse(IRC[correct == "Correct"] > .05, 1,
+      8 * IRC[correct == "Correct"] + 3 / 10))
+  dif <- IRC[correct == "Incorrect"] - min(IRC[correct == "Correct"])
+  IRC.col[correct == "Incorrect"] <- ifelse(dif >= 0, 0,
+    ifelse(dif < -.05, 1, -8 * dif + 3 / 10))
+  return(IRC.col)
+}
+
+CreateAnsDf <- function(item.list, key, number.answeroptions, item.names) {
+  # Creates a data frame with the statistics per answer option including a
+  # colour indicator for the percentage correct and IRC.
+  #
+  # Args:
+  #  These are the same as defined earlier
+  #
+  # Returns:
+  #  Data frame with a row for every answer option. Columns consist of the
+  #  item name, answer options, frequency, percentage, IRC, Correct (whether
+  #  an option is correct/incorrect), colour for percentage and colour for IRC
+
+  for (i in 1:length(item.list)) {
+    if (any(key[, i] != 0)) {
+      item.list[[i]]$perc.col <- with(item.list[[i]],
+                                      PercCol(Percentage, Correct))
+      item.list[[i]]$IRC.col <- with(item.list[[i]],
+                                     IrcCol(IRC, Correct))
+    }
+  }
+  ans.opt.df <- ldply(item.list[number.answeroptions != 0], data.frame)
+  names(ans.opt.df)[1] <- "id"
+  ans.opt.df$Answer.Option <- gsub("Missing", "Mi", ans.opt.df$Answer.Option)
+  ans.opt.df$Answer.Option <- factor(ans.opt.df$Answer.Option,
+    levels = c(LETTERS[1:max(number.answeroptions)], "Mi"))
+  ans.opt.df$id <- factor(ans.opt.df$id,
+                          levels = item.names[number.answeroptions != 0])
+  if (all(key == 0)) {
+    ans.opt.df <- NULL
+  }
+
+  ans.opt.df
+}
+
+NumberOfPlots <- function(ans.opt.df, key) {
+  # Calculates which item is displayed in which plot. The total number of
+  # answer options is too large (usually) to display in one plot.
+  #
+  # Args:
+  #  ans.opt.df: As defined in function above
+  #
+  # Returns:
+  #  n.plots: the number of plots
+  #  items.in.plot: list entry per plot. Every entry consists of item names
+  #  in that plot
+
+  if (any(key != 0)) {
+    max.answ.opts <- 80 #  Maximum number of bars per plot
+    tot.answ.opts <- nrow(ans.opt.df)
+    n.plots <- ceiling(tot.answ.opts / max.answ.opts)
+    answ.opts.per.plot <- floor(tot.answ.opts / n.plots)
+
+    items.in.plot <- list(unique(ans.opt.df[1:answ.opts.per.plot, 1]))
+    if (n.plots > 1) {
+      for (i in 2:n.plots) {
+        items.in.plot[[i]] <- unique(ans.opt.df[ (((i - 1) * answ.opts.per.plot)
+                                     + 1): (i * answ.opts.per.plot), 1])
+        if (items.in.plot[[i]][1] %in% items.in.plot[[i - 1]]) {
+          items.in.plot[[i]] <- items.in.plot[[i]][-1]
+        }
+      }
+    }
+    assign("n.plots", n.plots, envir = globalenv())
+    assign("items.in.plot", items.in.plot, envir = globalenv())
+
+  }
+}
+
+ans.opt.df <- CreateAnsDf(item.list, key, number.answeroptions, item.names)
+NumberOfPlots(ans.opt.df, key)
+
+```
+
+```{r, echo = FALSE, eval = TRUE, message = FALSE, warning = FALSE, fig.width = 11, results= 'asis'}    
+
+PercPlotAnswOpt <- function(ans.opt.df, items.in.plot, key) {
+  # Plots bar graph(s) of the percentages of every answer option
+  #
+  # Args:
+  #  All defined as before
+  #
+  # Returns:
+  #  Description of how to read the plots
+  #  One or multiple plots
+
+  if (any(key != 0)) {
+
+    cat("\\pagebreak
+
+## Percentage chosen per answer options
+
+Percentage plot of all answer options per item. **White** bars represent the
+**right** answer option(s) and **black** the **wrong** options.
+
+The border represents the performance of an item. The **right** answer options
+are coloured *green* if *more than 70%* of the students choose this option. In
+that case, the students have mastered the material. They are coloured *red* if
+*less than 40%* choose this option. *Between 40% and 70%*, it is coloured
+*orange*.
+
+The **wrong** answer options are coloured *green* if an option is chosen *10
+percentage point less than the right option*. In that case, more students chose
+the right answer option than the wrong option indicating that students are not
+confused by the wrong option. They are coloured *red* if an option is chosen
+*equally or more often than the right option*. In this case, students pick the
+wrong answer option often, indicating that this option may be right as well. It
+is coloured *orange* in between these two.
+
+")
+
+    for (i in 1:length(items.in.plot)) {
+      bar.freq1 <- ggplot(ans.opt.df[ans.opt.df$id %in% items.in.plot[[i]], ],
+        aes("Answer Option", Percentage, fill = Correct, colour = perc.col))
+      bar.freq1 <- bar.freq1 + geom_bar(aes(x = Answer.Option),
+                                        stat = "identity", size = 1.5) +
+        facet_grid(. ~ id, scales = "free_x", space = "free_x") + #  Plot items
+        scale_fill_manual(values = c("Incorrect" = "Black", #  Fill in the bars
+                          "Correct" = "White"), guide = FALSE) +
+        scale_colour_gradient(low = "red", high = "green", limits = c(0, 1),
+                              guide = FALSE)  + #  Create colour boundary
+        coord_cartesian(ylim = c(0, 100)) + #  Change y-axis limit to constant
+        labs(x = "Answer Options") + # x  axis name
+        theme_bw() +
+        theme(axis.text.x = element_text(size = 8), # Font sizes
+              axis.text.y = element_text(size = 12),
+              axis.title.x = element_text(size = 15),
+              axis.title.y = element_text(size = 15))
+
+        suppressWarnings(print(bar.freq1))
+    }
+  }
+}
+
+IRCPlotAnswOpt <- function(ans.opt.df, items.in.plot, key) {
+  # Plots bar graph(s) of the IRC of every answer option
+  #
+  # Args:
+  #  All defined as before
+  #
+  # Returns:
+  #  Description of how to read the plots
+  #  One or multiple plots
+
+  if (any(key != 0)) {
+
+    cat('\\pagebreak
+
+## Item rest correlations per answer option
+
+Item rest correlation (IRC) plot of all answer options per item The IRC is the
+correlation between an answer option (1 for students who chose this options and
+0 for student who did not) and the sumscore on all other items. **White** bars
+represent the **right** answer option(s) and **black** the **wrong** options.
+
+The border represents the performance of an item. The **right** answer options
+are coloured *green* if the IRC is *higher than .05*. In that case, students who
+picked the right answer option scored higher on all other items than student who
+did not pick the right option. They are coloured *red* if the IRC is *less than
+0*. In that case, students who picked the right answer option scored equally or
+worse on the other items than students who picked the other options. Between *0
+and .05*, the option is coloured *orange*.
+
+The **wrong** answer options are coloured *green* if the IRC is *more than .1
+lower than the right option*. In that case, students who answered the other
+items incorrectly, usually answered this item incorrectly as well. They are
+coloured *red* if the IRC is *equal or higher than the right answer option*.
+In that case, students who answered the other items correctly may have answered
+this item incorrectly. In between these two, the option is coloured *orange*.
+
+')
+
+    # Creating IRC plots
+    for (i in 1:length(items.in.plot)) {
+      # Select items and variables to plot
+      bar.IRC1 <- ggplot(ans.opt.df[ans.opt.df$id %in% items.in.plot[[i]], ],
+        aes("Answer Option", IRC, fill = Correct, colour = IRC.col))
+      bar.IRC1 <- bar.IRC1 + geom_bar(aes(x = Answer.Option), stat = "identity",
+                                      size = 1.5) + #  Create Bar chart
+        facet_grid(. ~ id, scales = "free_x", space = "free_x") +
+        scale_fill_manual(values = c("Incorrect" = "Black", "Correct" = "White"),
+                          guide = FALSE) + #  Fill in the bars
+        scale_colour_gradient(low = "red", high = "green", limits = c(0, 1),
+                              guide = FALSE)  + #  Create colour boundray
+        # Change y-axis limit to either the maximum and minimum IRC
+        coord_cartesian(ylim = c(min(ans.opt.df$IRC) - .01,
+                                 max(ans.opt.df$IRC)  + .01)) +
+        labs(x = "Answer Options") +
+        theme_bw() +
+        theme(axis.text.x = element_text(size = 8), #  Font sizes
+              axis.text.y = element_text(size = 12),
+              axis.title.x = element_text(size = 15),
+              axis.title.y = element_text(size = 15)
+        )
+        suppressWarnings(print(bar.IRC1))
+    }
+  }
+}
+
+IRCPlotAnswOpt(ans.opt.df, items.in.plot, key)
+PercPlotAnswOpt(ans.opt.df, items.in.plot, key)
+```
+
+\pagebreak
+
+## Cumulative Distribution
+
+Cumulative Distribution showing all possible scores and the percentage of 
+students that scored that number or higher
+
+```{r, echo = FALSE, eval = TRUE, message = FALSE, warning = FALSE, fig.width = 11}
+CumHist <- function(student.scores, n.item) {
+  # Create a reversed cumulative histogram, i.e. the height of score x is
+  # the percentage of students who scores x or higher.
+  #
+  # Args:
+  #  student.scores: numeric vector of test score per student.
+  #  n.item: number of items
+  #
+  # Returns:
+  #  Reversed cumulative histogram plot
+
+  h <- heights <- length(student.scores)
+  n <- sort(unique(student.scores))
+
+  for (i in 1:length(n)) {
+    h <- h - length(student.scores[student.scores == n[i]])
+    heights <- c(heights, h)
+  }
+
+  cesuurlijnen <- seq(0.2, 0.8, 0.2) * length(student.scores)
+  gridlijnen <- round(seq(0, length(student.scores),
+                      length(student.scores) / 25), 0)
+
+  gridlijnen <- gridlijnen[-grep(paste(round(cesuurlijnen, 0),
+                           collapse = "|"), gridlijnen)]
+
+  heights <- heights[1:length(unique(student.scores))]
+  percent <- heights / length(student.scores) * 100
+  df <- data.frame(score = sort(unique(student.scores)),
+                   total = heights, percent = round(percent, 2))
+
+  vec <- df[1, 1]:df[nrow(df), 1] #  save all possible total scores
+
+  # Find if a total score is missing and add it
+  mis <- NA
+  if (sum(! (vec %in% df[, 1]) != 0)) {
+    mis <- vec[! (vec %in% df[, 1])]
+  }
+  if (!is.na(mis[1])) {
+    for (i in 1:length(mis)) {
+      df[ (nrow(df) + 1), ] <- c(mis[i], 0, 0)
+    }
+    df <- df[order(df$score), ]
+  }
+
+
+  # if no one gets a certain total score, replace the 0 with the next total score
+  suppressWarnings(
+    df[which(df[, 2] == 0, arr.ind = TRUE), 2] <- df[ (which(df[, 2] == 0,
+      arr.ind = TRUE) + 1), 2]
+  )
+  suppressWarnings(
+    df[which(df[, 3] == 0, arr.ind = TRUE), 3] <- df[ (which(df[, 3] == 0,
+      arr.ind = TRUE) + 1), 3]
+  )
+
+  g <- ggplot(data = df, aes(x = score, y = total)) +
+    geom_hline(yintercept = c(gridlijnen), linetype = "solid",
+               colour = "gray80") +
+    geom_hline(yintercept = cesuurlijnen, linetype = "dashed", size = 0.75,
+               colour = "black") +
+    geom_histogram(stat = "identity", binwidth = 1) +
+    xlab("Total score") + ylab("% of students with score or higher") +
+    scale_y_continuous(breaks = c(0, cesuurlijnen, length(student.scores)),
+                       labels = c(seq(0, 100, 20))) +
+    scale_x_continuous(limits = c(df[1, 1] - 1, n.item + 1), breaks = 0:n.item,
+                       expand = c(0, 0)) +
+    theme_bw() +
+    theme(panel.grid = element_blank())
+
+  print(g)
+}
+
+CumHist(student.scores, n.item = ncol(key))
+```
+
+```{r, echo = FALSE, eval = TRUE, warnings = FALSE, message = FALSE, results = 'asis', fig.width = 11}
+## Do the whole script above for every sub category
+if (!length(categories) == 0) {
+  cat("\n\n")
+  cat("\\pagebreak")
+  cat("\n\n")
+  cat("# Analysis for subcategories")
+  cat("\n\n")
+
+  for (i in 1:length(categories)) {
+    items <- categories[[i]]$items
+    if (i != 1) {
+      cat("\\pagebreak")
+      cat("\n\n")
+    }
+    cat("\n\n")
+    cat(paste("## Category:", categories[[i]]$name ))
+    cat("\n\n")
+
+    item.list <- CreateItemList(number.answeroptions[items],
+      correct.frequency[items], correct.percentage[items],
+      frequency.answer.options[, items], percentage.answer.options[, items],
+      categories[[i]]$corrected.item.tot.cor,
+      categories[[i]]$corrected.item.tot.cor.answ.option,
+      item.names[items], key[, items])
+
+    correct.df <- CreateDf(item.names[items], correct.percentage[items],
+                           categories[[i]]$corrected.item.tot.cor)
+
+    PercPlotOverall(correct.df)
+    IRCPlotOverall(correct.df)
+
+    if (any(key[, items] != 0)) {
+      ans.opt.df <- CreateAnsDf(item.list, key[, items],
+                                number.answeroptions[items], item.names[items])
+      NumberOfPlots(ans.opt.df, key[, items])
+      PercPlotAnswOpt(ans.opt.df, items.in.plot, key[, items])
+      IRCPlotAnswOpt(ans.opt.df, items.in.plot, key[, items])
+    }
+
+    cat("\\pagebreak")
+    cat("\n\n")
+    cat("## Cumulative Distribution")
+    cat("\n\n")
+    CumHist(categories[[i]]$student.scores, n.item = length(items))
+  }
+}
+```
\ No newline at end of file

From 1377dfc9e7ffebbeaf15e134a59769860a97023a Mon Sep 17 00:00:00 2001
From: Mark van Driel <mvdriel@nlware.com>
Date: Wed, 3 Aug 2016 15:17:32 +0200
Subject: [PATCH 02/21] Added *.Rmd files to R lintr

---
 bin/travis-linter.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/travis-linter.sh b/bin/travis-linter.sh
index bcb7082..749821f 100755
--- a/bin/travis-linter.sh
+++ b/bin/travis-linter.sh
@@ -3,7 +3,7 @@ set -e
 
 exitstatus=0
 
-for file in $(pwd)/app/Lib/Rscripts/*.R
+for file in $(pwd)/app/Lib/Rscripts/*.R*
 do
     Rscript -e "lintr::lint(\"$file\")"
     outputbytes=`Rscript -e "lintr::lint(\"$file\")" | grep ^ | wc -c`

From b7b6fa13a1242264302aa29a4fbf3d79d16d7221 Mon Sep 17 00:00:00 2001
From: Mark van Driel <mvdriel@nlware.com>
Date: Wed, 10 Aug 2016 10:30:03 +0200
Subject: [PATCH 03/21] Updated R scripts

---
 app/Lib/Rscripts/report.R   |   27 +-
 app/Lib/Rscripts/report.Rmd | 1471 +++++++++++++++++++----------------
 2 files changed, 832 insertions(+), 666 deletions(-)

diff --git a/app/Lib/Rscripts/report.R b/app/Lib/Rscripts/report.R
index 0c10111..02fc318 100644
--- a/app/Lib/Rscripts/report.R
+++ b/app/Lib/Rscripts/report.R
@@ -1,17 +1,10 @@
-report <- function(file.name,
-                   title,
-                   number.answeroptions,
-                   key,
-                   item.names,
-                   correct.frequency,
-                   correct.percentage,
-                   frequency.answer.options,
-                   percentage.answer.options,
-                   corrected.item.tot.cor,
-                   corrected.item.tot.cor.answ.option,
-                   cronbach,
-                   student.scores,
-                   categories) {
-  rmarkdown::render("report.Rmd", output_format = "html_document", output_file = paste0(file.name,".html"))
-  rmarkdown::render("report.Rmd", output_format = "pdf_document", output_file = paste0(file.name,".pdf"))
-}
+report <- function(filename, title, number_answeroptions, key, item_names,
+                   correct_frequency, correct_percentage,
+                   frequency_answer_options, percentage_answer_options,
+                   corrected_item_tot_cor, corrected_item_tot_cor_answ_option,
+                   cronbach, student_scores, categories) {
+  rmarkdown::render("report_subcategories.Rmd", output_format = "html_document",
+                    output_file = paste0(filename, ".html"))
+  rmarkdown::render("report_subcategories.Rmd", output_format = "pdf_document",
+                    output_file = paste0(filename, ".pdf"))
+}
\ No newline at end of file
diff --git a/app/Lib/Rscripts/report.Rmd b/app/Lib/Rscripts/report.Rmd
index 986124b..90d8192 100644
--- a/app/Lib/Rscripts/report.Rmd
+++ b/app/Lib/Rscripts/report.Rmd
@@ -1,649 +1,822 @@
----
-html_document: null
-fig_width: 11
-classoption: landscape
-keep_md: yes
-output: html_document
-pdf_document: default
----
-
-```{r, echo = FALSE, warning = FALSE, message = FALSE}
-library(ggplot2)
-library(pander)
-library(plyr)
-```
-
-
-```{r, echo = FALSE, eval = TRUE, results = "asis"}
-
-CreateItemList <- function(number.answeroptions, correct.frequency,
-  correct.percentage, frequency.answer.options, percentage.answer.options,
-  corrected.item.tot.cor, corrected.item.tot.cor.answ.option, item.names, key) {
-  # Creates a list with frequency, percentage correct, and IRC for every item.
-  #
-  # Args:
-  #  number.answeroptions: Vector with number of answer options per item
-  #  frequency.answer.options: Matrix with [i,j] the frequency of
-  #    answer option i of item j
-  #  percentage.answer.options: Matrix with [i,j] the percentage of
-  #    answer option i of item j
-  #  corrected.item.tot.cor.answ.option: Matrix with [i,j] the item total
-  #    correlation for answer option i of item j
-  #  correct.frequency: Vector with total number correct per item
-  #  correct.percentage: Vector with percentage correct per item
-  #  corrected.item.tot.cor: Vector with item rest correlation per item
-  #
-  # Returns:
-  #  List with one entry per item. Each entry consists of a data frame with
-  #  frequency, percentage and IRC for the total item and the answer options
-  #  (if the item has answer options)
-
-  item.list <- list() #  Creates list to put item output in
-  colnames1 <- c(" ", "Frequency", "Percentage", "IRC")
-  colnames2 <- c("Answer Option", "Frequency", "Percentage", "IRC", "Correct")
-
-  # For every item, create entry in list with frequencies, percentages, and IRCs
-  for (i in 1:length(item.names)) {
-    if (number.answeroptions[i] > 0) {
-      correct <- c(ifelse(key[1:number.answeroptions[i], i] == 1,
-                          "Correct", "Incorrect"), "Incorrect")
-
-      item.list[[i]] <- data.frame(
-        c(LETTERS[1:number.answeroptions[i]], "Missing"),
-        c(frequency.answer.options[c(2: (number.answeroptions[i] + 1), 1), i]),
-        c(percentage.answer.options[c(2: (number.answeroptions[i] + 1), 1), i]),
-        c(corrected.item.tot.cor.answ.option[c(2: (number.answeroptions[i] + 1),
-                                               1), i]),
-        correct,
-        row.names = NULL)
-      colnames(item.list[[i]]) <- colnames2
-
-    } else {
-      item.list[[i]] <- data.frame("Correct", correct.frequency[i],
-                                    correct.percentage[i],
-                                    corrected.item.tot.cor[i])
-      colnames(item.list[[i]]) <- colnames1
-     }
-  }
-
-  names(item.list) <- item.names #  Attach item names
-  return(item.list)
-}
-
-# Use the function defined above
-item.list <- CreateItemList(number.answeroptions, correct.frequency,
-  correct.percentage, frequency.answer.options, percentage.answer.options,
-  corrected.item.tot.cor, corrected.item.tot.cor.answ.option, item.names, key)
-
-cat(paste("#", title))
-```
-
-# General
-
-What | Result |
-:---------------------- | :------------- |
-Number of students | `r length(student.scores)`|
-Number of items | `r length(item.names)`|
-Average score | `r round(mean(student.scores), digits = 3)`|
-Standard deviation | `r round(sd(student.scores), digits = 3)`| 
-Cronbach's alpha | `r cronbach`| 
-Standard error | `r round(sd(student.scores * sqrt(1 - cronbach)), digits = 3)`| 
-
-
-# Item statistics
-
-For each item the frequency, percentage and item rest correlations (IRC) of 
-every answer option are shown The IRC should be (highly) positive for the 
-right answer option and low for the wrong answer option(s).
-
-```{r pander, results = "asis", eval = TRUE, echo = FALSE, warning = FALSE, message = FALSE}
-
-pander.table <- function(item.list, key) {
-  # Print table for each item with frequency, percentage and the
-  # item-rest correlation. Correct answers are highlighted in bold
-  #
-  # Args:
-  #  item.list: list with one entry per item. Each entry consists of a data frame
-  #             Data frame has a row for every answer option + one for missing.
-  #             Columns are: Answer option, frequency, percentage, IRC,
-  #                          and Correct (answer option is Correct/Incorrect)
-  #            Length should be the same as number of columns of key
-  #  key: Matrix of 0's and 1's. key[i,j] implies wether answer option i
-  #       to item j is right (1) or wrong (0). If a column (item) consists of
-  #       only 0s, the item is interpreted as graded manually.
-  #       Number of columns should be equal to length of item.list
-
-  panderOptions("knitr.auto.asis", FALSE)
-  for (i in 1:length(item.list)) {
-    if (any(key[, i] == 1)) {
-      item.list[[i]]$Correct <- revalue(item.list[[i]]$Correct,
-                                        c("Correct" = "X", "Incorrect" = ""))
-      emphasize.strong.rows(which(key[, i] == 1, arr.ind = TRUE))
-      pander(item.list[i])
-    } else {
-      t <- item.list[i]
-      pander(t)
-    }
-  }
-}
-
-pander.table(item.list, key)
-```
-
-\pagebreak
-
-```{r, echo = FALSE, message = FALSE, warning = FALSE, fig.width = 11, results = 'asis'}
-
-CreateDf <- function(item.names, correct.percentage, corrected.item.tot.cor) {
-  # Creates a data frame with the overall item statistics including a colour
-  # indicator for the percentage correct and IRC. Colours is defined below.
-  #
-  # Args:
-  #  These are the same as defined in CreateItemList
-  #
-  # Returns:
-  #  Data frame with a row for every item. Columns consist of the item name,
-  #  percentage correct, colour indicator for percentage, IRC and colour
-  #  indicator for the IRC.
-
-  correct.df <- data.frame(item = factor(item.names, item.names),
-    correct.percentage,
-    perc.col = ifelse(correct.percentage < 40, 0,
-                      ifelse(correct.percentage > 70, 1,
-                             1 / 75 * correct.percentage - 7 / 30)),
-
-    corrected.item.tot.cor,
-    irc.col = ifelse(corrected.item.tot.cor < 0, 0,
-                     ifelse(corrected.item.tot.cor > .05, 1,
-                            8 * corrected.item.tot.cor + 3 / 10))
-  )
-
-  return(correct.df)
-}
-
-PercPlotOverall <- function(correct.df) {
-  # Plots bar graph of percentage correct of every item. Bars are coloured
-  # depending on the percentage correct:
-  # < 40: Red
-  # 40 - 70: Orange till green-ish
-  # > 70: Green
-  #
-  # Args:
-  #  correct.df: data frame with item name, percentage correct and colour code
-  #  for the percentages.
-  #
-  # Returns:
-  #  Explanation of the plot and the plot itself
-
-  cat("
-
-## Percentage Correct
-
-Plot of percentage correct per item. If *more than 70%* of the students answer
-an item correctly, the students have mastered the material well enough, thus it
-is coloured *green*. If *less than 40%* of the students answer an item
-correctly, the students have not mastered the material, and is coloured *red*.
-*Orange* indicates the percentage correct lies *between 40% and 70%*. The
-colours are based on multiple choice items with 3 or 4 answer options.
-
-")
-
-  bar.freq <- ggplot(correct.df, aes(item, correct.percentage, fill = perc.col))
-  bar.freq <- bar.freq + geom_bar(stat = "identity") + #  Create Bar chart
-    scale_fill_gradient(low = "red", high = "green", limits = c(0, 1),
-                        guide = FALSE)  + #  Create colour boundray
-    coord_cartesian(ylim = c(0, 100)) + #  Change y-axis limit to constant
-    labs(x = "Item", y = "Percentage Correct") + #  x-axis name
-    theme_bw() + #  Black and white
-    theme(axis.text.x = element_text(size = 8, angle = 90), #  Change font size
-          axis.ticks.x = element_line(size = .4))
-
-  suppressWarnings(print(bar.freq))
-}
-
-IRCPlotOverall <- function(correct.df) {
-  # Plots bar graph of IRC of every item. Bars are coloured by IRC:
-  # < 0: Red
-  # 0 - .10: Orange till green-ish
-  # > .10: green
-  #
-  # Args:
-  #  correct.df: data frame with item name, percentage correct and colour code
-  #  for the percentages.
-  #
-  # Returns:
-  #  Explanation of the plot and the plot itself
-
-  cat("\\pagebreak
-
-## Item rest correlations
-
-Plot of Item Rest Correlations per item: The correlation between an item (0 for
-wrong answers and 1 for right answers) and the sum of all other items. Items
-with a correlation *higher than .05* indicate that students who answered this
-item correctly answered the other items more often correctly than students who
-answered this item incorrectly. Thus it is coloured *green*. Items with a
-correlation *lower than 0* indicate no relationship between this item and the
-other items, or that students who answered this item correctly answered the
-other items more often incorrectly than students who answered this incorrectly,
-thus it is coloured *red*. *Between 0 and .05*, an item is coloured *orange*.
-
-")
-
-  # Create bar chart
-  bar.IRC <- ggplot(correct.df, aes(item, corrected.item.tot.cor,
-                                    fill = irc.col))
-  bar.IRC <-  bar.IRC + geom_bar(stat = "identity") + #  Create Bar chart
-    scale_fill_gradient(low = "red", high = "green", limits = c(0, 1),
-                        guide = FALSE) +
-    theme_bw() +
-    labs(x = "Item", y = "Item Rest Correlation") + #  axes names
-    theme(strip.text.x = element_text(size = 7),
-          axis.text.x = element_text(size = 8, angle = 90),
-          axis.ticks.x = element_line(size = .4))
-
-  suppressWarnings(print(bar.IRC))
-}
-
-correct.df <- CreateDf(item.names, correct.percentage, corrected.item.tot.cor)
-PercPlotOverall(correct.df)
-IRCPlotOverall(correct.df)
-```
-
-```{r, echo = FALSE, eval = TRUE, warning= FALSE, fig.width = 11}
-
-# Create extra variables for the colours in the bar plots
-PercCol <- function(percentage, correct) {
-  # Calculate colours for the percentage answer options.
-  # Args:
-  #  percentage: Percentage answered per answer options
-  #  correct: Vector of "Correct" and "Incorrect" of same length of percentage
-  #
-  # Returns:
-  #  Value between 0 and 1. 0 corresponds to red and 1 to green in the plots.
-  #  For the correct options:
-  #    < 40: red
-  #    40 - 70: organge - green-ish
-  #    > 70: green
-  #  For the incorrect options: difference between correct answer option and
-  #  incorrect answer options are calculated. Colours are based on difference:
-  #    < -10: Green
-  #    -10 - 0: orange - green-ish
-  #    > 0: green
-
-  perc.col <- vector("numeric", length = length(correct))
-  perc.col[correct == "Correct"] <- ifelse(percentage[correct == "Correct"]
-    < 40, 0, ifelse(percentage[correct == "Correct"] > 70, 1,
-                    1 / 75 * percentage[correct == "Correct"] - 7 / 30))
-  dif <- percentage[correct == "Incorrect"] -
-                    min(percentage[correct == "Correct"])
-  perc.col[correct == "Incorrect"] <- ifelse(dif >= 0, 0,
-                                      ifelse(dif < -10, 1,
-                                             -1 / 25 * dif + 3 / 10))
-  perc.col
-}
-
-IrcCol <- function(IRC, correct){
-  # Calculate colours for the percentage answer options.
-  #
-  # Args:
-  #  IRC: IRC per answer options. Same length as correct
-  #  correct: Vector of "Correct" and "Incorrect" of same length of IRC
-  #
-  # Returns:
-  #  Value between 0 and 1. 0 corresponds to red and 1 to green in the plots.
-  #  For the correct answer options:
-  #    < 0: red
-  #    0 - 0.05: orange - green-ish
-  #    > 0.05: green
-  # For the incorrect answer options: difference between correct answer option
-  # and incorrect answer option are calculated. Colours are based on this
-  # difference:
-  #    > 0: red
-  #    -0.05 - 0: orange - green-ish
-  #    < -0.05: red
-
-  IRC.col <- numeric()
-  IRC.col[correct == "Correct"] <- ifelse(IRC[correct == "Correct"] < 0, 0,
-    ifelse(IRC[correct == "Correct"] > .05, 1,
-      8 * IRC[correct == "Correct"] + 3 / 10))
-  dif <- IRC[correct == "Incorrect"] - min(IRC[correct == "Correct"])
-  IRC.col[correct == "Incorrect"] <- ifelse(dif >= 0, 0,
-    ifelse(dif < -.05, 1, -8 * dif + 3 / 10))
-  return(IRC.col)
-}
-
-CreateAnsDf <- function(item.list, key, number.answeroptions, item.names) {
-  # Creates a data frame with the statistics per answer option including a
-  # colour indicator for the percentage correct and IRC.
-  #
-  # Args:
-  #  These are the same as defined earlier
-  #
-  # Returns:
-  #  Data frame with a row for every answer option. Columns consist of the
-  #  item name, answer options, frequency, percentage, IRC, Correct (whether
-  #  an option is correct/incorrect), colour for percentage and colour for IRC
-
-  for (i in 1:length(item.list)) {
-    if (any(key[, i] != 0)) {
-      item.list[[i]]$perc.col <- with(item.list[[i]],
-                                      PercCol(Percentage, Correct))
-      item.list[[i]]$IRC.col <- with(item.list[[i]],
-                                     IrcCol(IRC, Correct))
-    }
-  }
-  ans.opt.df <- ldply(item.list[number.answeroptions != 0], data.frame)
-  names(ans.opt.df)[1] <- "id"
-  ans.opt.df$Answer.Option <- gsub("Missing", "Mi", ans.opt.df$Answer.Option)
-  ans.opt.df$Answer.Option <- factor(ans.opt.df$Answer.Option,
-    levels = c(LETTERS[1:max(number.answeroptions)], "Mi"))
-  ans.opt.df$id <- factor(ans.opt.df$id,
-                          levels = item.names[number.answeroptions != 0])
-  if (all(key == 0)) {
-    ans.opt.df <- NULL
-  }
-
-  ans.opt.df
-}
-
-NumberOfPlots <- function(ans.opt.df, key) {
-  # Calculates which item is displayed in which plot. The total number of
-  # answer options is too large (usually) to display in one plot.
-  #
-  # Args:
-  #  ans.opt.df: As defined in function above
-  #
-  # Returns:
-  #  n.plots: the number of plots
-  #  items.in.plot: list entry per plot. Every entry consists of item names
-  #  in that plot
-
-  if (any(key != 0)) {
-    max.answ.opts <- 80 #  Maximum number of bars per plot
-    tot.answ.opts <- nrow(ans.opt.df)
-    n.plots <- ceiling(tot.answ.opts / max.answ.opts)
-    answ.opts.per.plot <- floor(tot.answ.opts / n.plots)
-
-    items.in.plot <- list(unique(ans.opt.df[1:answ.opts.per.plot, 1]))
-    if (n.plots > 1) {
-      for (i in 2:n.plots) {
-        items.in.plot[[i]] <- unique(ans.opt.df[ (((i - 1) * answ.opts.per.plot)
-                                     + 1): (i * answ.opts.per.plot), 1])
-        if (items.in.plot[[i]][1] %in% items.in.plot[[i - 1]]) {
-          items.in.plot[[i]] <- items.in.plot[[i]][-1]
-        }
-      }
-    }
-    assign("n.plots", n.plots, envir = globalenv())
-    assign("items.in.plot", items.in.plot, envir = globalenv())
-
-  }
-}
-
-ans.opt.df <- CreateAnsDf(item.list, key, number.answeroptions, item.names)
-NumberOfPlots(ans.opt.df, key)
-
-```
-
-```{r, echo = FALSE, eval = TRUE, message = FALSE, warning = FALSE, fig.width = 11, results= 'asis'}    
-
-PercPlotAnswOpt <- function(ans.opt.df, items.in.plot, key) {
-  # Plots bar graph(s) of the percentages of every answer option
-  #
-  # Args:
-  #  All defined as before
-  #
-  # Returns:
-  #  Description of how to read the plots
-  #  One or multiple plots
-
-  if (any(key != 0)) {
-
-    cat("\\pagebreak
-
-## Percentage chosen per answer options
-
-Percentage plot of all answer options per item. **White** bars represent the
-**right** answer option(s) and **black** the **wrong** options.
-
-The border represents the performance of an item. The **right** answer options
-are coloured *green* if *more than 70%* of the students choose this option. In
-that case, the students have mastered the material. They are coloured *red* if
-*less than 40%* choose this option. *Between 40% and 70%*, it is coloured
-*orange*.
-
-The **wrong** answer options are coloured *green* if an option is chosen *10
-percentage point less than the right option*. In that case, more students chose
-the right answer option than the wrong option indicating that students are not
-confused by the wrong option. They are coloured *red* if an option is chosen
-*equally or more often than the right option*. In this case, students pick the
-wrong answer option often, indicating that this option may be right as well. It
-is coloured *orange* in between these two.
-
-")
-
-    for (i in 1:length(items.in.plot)) {
-      bar.freq1 <- ggplot(ans.opt.df[ans.opt.df$id %in% items.in.plot[[i]], ],
-        aes("Answer Option", Percentage, fill = Correct, colour = perc.col))
-      bar.freq1 <- bar.freq1 + geom_bar(aes(x = Answer.Option),
-                                        stat = "identity", size = 1.5) +
-        facet_grid(. ~ id, scales = "free_x", space = "free_x") + #  Plot items
-        scale_fill_manual(values = c("Incorrect" = "Black", #  Fill in the bars
-                          "Correct" = "White"), guide = FALSE) +
-        scale_colour_gradient(low = "red", high = "green", limits = c(0, 1),
-                              guide = FALSE)  + #  Create colour boundary
-        coord_cartesian(ylim = c(0, 100)) + #  Change y-axis limit to constant
-        labs(x = "Answer Options") + # x  axis name
-        theme_bw() +
-        theme(axis.text.x = element_text(size = 8), # Font sizes
-              axis.text.y = element_text(size = 12),
-              axis.title.x = element_text(size = 15),
-              axis.title.y = element_text(size = 15))
-
-        suppressWarnings(print(bar.freq1))
-    }
-  }
-}
-
-IRCPlotAnswOpt <- function(ans.opt.df, items.in.plot, key) {
-  # Plots bar graph(s) of the IRC of every answer option
-  #
-  # Args:
-  #  All defined as before
-  #
-  # Returns:
-  #  Description of how to read the plots
-  #  One or multiple plots
-
-  if (any(key != 0)) {
-
-    cat('\\pagebreak
-
-## Item rest correlations per answer option
-
-Item rest correlation (IRC) plot of all answer options per item The IRC is the
-correlation between an answer option (1 for students who chose this options and
-0 for student who did not) and the sumscore on all other items. **White** bars
-represent the **right** answer option(s) and **black** the **wrong** options.
-
-The border represents the performance of an item. The **right** answer options
-are coloured *green* if the IRC is *higher than .05*. In that case, students who
-picked the right answer option scored higher on all other items than student who
-did not pick the right option. They are coloured *red* if the IRC is *less than
-0*. In that case, students who picked the right answer option scored equally or
-worse on the other items than students who picked the other options. Between *0
-and .05*, the option is coloured *orange*.
-
-The **wrong** answer options are coloured *green* if the IRC is *more than .1
-lower than the right option*. In that case, students who answered the other
-items incorrectly, usually answered this item incorrectly as well. They are
-coloured *red* if the IRC is *equal or higher than the right answer option*.
-In that case, students who answered the other items correctly may have answered
-this item incorrectly. In between these two, the option is coloured *orange*.
-
-')
-
-    # Creating IRC plots
-    for (i in 1:length(items.in.plot)) {
-      # Select items and variables to plot
-      bar.IRC1 <- ggplot(ans.opt.df[ans.opt.df$id %in% items.in.plot[[i]], ],
-        aes("Answer Option", IRC, fill = Correct, colour = IRC.col))
-      bar.IRC1 <- bar.IRC1 + geom_bar(aes(x = Answer.Option), stat = "identity",
-                                      size = 1.5) + #  Create Bar chart
-        facet_grid(. ~ id, scales = "free_x", space = "free_x") +
-        scale_fill_manual(values = c("Incorrect" = "Black", "Correct" = "White"),
-                          guide = FALSE) + #  Fill in the bars
-        scale_colour_gradient(low = "red", high = "green", limits = c(0, 1),
-                              guide = FALSE)  + #  Create colour boundray
-        # Change y-axis limit to either the maximum and minimum IRC
-        coord_cartesian(ylim = c(min(ans.opt.df$IRC) - .01,
-                                 max(ans.opt.df$IRC)  + .01)) +
-        labs(x = "Answer Options") +
-        theme_bw() +
-        theme(axis.text.x = element_text(size = 8), #  Font sizes
-              axis.text.y = element_text(size = 12),
-              axis.title.x = element_text(size = 15),
-              axis.title.y = element_text(size = 15)
-        )
-        suppressWarnings(print(bar.IRC1))
-    }
-  }
-}
-
-IRCPlotAnswOpt(ans.opt.df, items.in.plot, key)
-PercPlotAnswOpt(ans.opt.df, items.in.plot, key)
-```
-
-\pagebreak
-
-## Cumulative Distribution
-
-Cumulative Distribution showing all possible scores and the percentage of 
-students that scored that number or higher
-
-```{r, echo = FALSE, eval = TRUE, message = FALSE, warning = FALSE, fig.width = 11}
-CumHist <- function(student.scores, n.item) {
-  # Create a reversed cumulative histogram, i.e. the height of score x is
-  # the percentage of students who scores x or higher.
-  #
-  # Args:
-  #  student.scores: numeric vector of test score per student.
-  #  n.item: number of items
-  #
-  # Returns:
-  #  Reversed cumulative histogram plot
-
-  h <- heights <- length(student.scores)
-  n <- sort(unique(student.scores))
-
-  for (i in 1:length(n)) {
-    h <- h - length(student.scores[student.scores == n[i]])
-    heights <- c(heights, h)
-  }
-
-  cesuurlijnen <- seq(0.2, 0.8, 0.2) * length(student.scores)
-  gridlijnen <- round(seq(0, length(student.scores),
-                      length(student.scores) / 25), 0)
-
-  gridlijnen <- gridlijnen[-grep(paste(round(cesuurlijnen, 0),
-                           collapse = "|"), gridlijnen)]
-
-  heights <- heights[1:length(unique(student.scores))]
-  percent <- heights / length(student.scores) * 100
-  df <- data.frame(score = sort(unique(student.scores)),
-                   total = heights, percent = round(percent, 2))
-
-  vec <- df[1, 1]:df[nrow(df), 1] #  save all possible total scores
-
-  # Find if a total score is missing and add it
-  mis <- NA
-  if (sum(! (vec %in% df[, 1]) != 0)) {
-    mis <- vec[! (vec %in% df[, 1])]
-  }
-  if (!is.na(mis[1])) {
-    for (i in 1:length(mis)) {
-      df[ (nrow(df) + 1), ] <- c(mis[i], 0, 0)
-    }
-    df <- df[order(df$score), ]
-  }
-
-
-  # if no one gets a certain total score, replace the 0 with the next total score
-  suppressWarnings(
-    df[which(df[, 2] == 0, arr.ind = TRUE), 2] <- df[ (which(df[, 2] == 0,
-      arr.ind = TRUE) + 1), 2]
-  )
-  suppressWarnings(
-    df[which(df[, 3] == 0, arr.ind = TRUE), 3] <- df[ (which(df[, 3] == 0,
-      arr.ind = TRUE) + 1), 3]
-  )
-
-  g <- ggplot(data = df, aes(x = score, y = total)) +
-    geom_hline(yintercept = c(gridlijnen), linetype = "solid",
-               colour = "gray80") +
-    geom_hline(yintercept = cesuurlijnen, linetype = "dashed", size = 0.75,
-               colour = "black") +
-    geom_histogram(stat = "identity", binwidth = 1) +
-    xlab("Total score") + ylab("% of students with score or higher") +
-    scale_y_continuous(breaks = c(0, cesuurlijnen, length(student.scores)),
-                       labels = c(seq(0, 100, 20))) +
-    scale_x_continuous(limits = c(df[1, 1] - 1, n.item + 1), breaks = 0:n.item,
-                       expand = c(0, 0)) +
-    theme_bw() +
-    theme(panel.grid = element_blank())
-
-  print(g)
-}
-
-CumHist(student.scores, n.item = ncol(key))
-```
-
-```{r, echo = FALSE, eval = TRUE, warnings = FALSE, message = FALSE, results = 'asis', fig.width = 11}
-## Do the whole script above for every sub category
-if (!length(categories) == 0) {
-  cat("\n\n")
-  cat("\\pagebreak")
-  cat("\n\n")
-  cat("# Analysis for subcategories")
-  cat("\n\n")
-
-  for (i in 1:length(categories)) {
-    items <- categories[[i]]$items
-    if (i != 1) {
-      cat("\\pagebreak")
-      cat("\n\n")
-    }
-    cat("\n\n")
-    cat(paste("## Category:", categories[[i]]$name ))
-    cat("\n\n")
-
-    item.list <- CreateItemList(number.answeroptions[items],
-      correct.frequency[items], correct.percentage[items],
-      frequency.answer.options[, items], percentage.answer.options[, items],
-      categories[[i]]$corrected.item.tot.cor,
-      categories[[i]]$corrected.item.tot.cor.answ.option,
-      item.names[items], key[, items])
-
-    correct.df <- CreateDf(item.names[items], correct.percentage[items],
-                           categories[[i]]$corrected.item.tot.cor)
-
-    PercPlotOverall(correct.df)
-    IRCPlotOverall(correct.df)
-
-    if (any(key[, items] != 0)) {
-      ans.opt.df <- CreateAnsDf(item.list, key[, items],
-                                number.answeroptions[items], item.names[items])
-      NumberOfPlots(ans.opt.df, key[, items])
-      PercPlotAnswOpt(ans.opt.df, items.in.plot, key[, items])
-      IRCPlotAnswOpt(ans.opt.df, items.in.plot, key[, items])
-    }
-
-    cat("\\pagebreak")
-    cat("\n\n")
-    cat("## Cumulative Distribution")
-    cat("\n\n")
-    CumHist(categories[[i]]$student.scores, n.item = length(items))
-  }
-}
-```
\ No newline at end of file
+---
+html_document: null
+fig_width: 11
+classoption: landscape
+keep_md: yes
+output: html_document
+pdf_document: default
+---
+
+```{r, echo = FALSE, warning = FALSE, message = FALSE}
+library(ggplot2)
+library(pander)
+library(plyr)
+```
+
+
+```{r, echo = FALSE, eval = TRUE, results = "asis"}
+
+create_item_list <- function(number_answeroptions,
+                             correct_frequency,
+                             correct_percentage,
+                             frequency_answer_options,
+                             percentage_answer_options,
+                             corrected_item_tot_cor,
+                             corrected_item_tot_cor_answ_option,
+                             item_names,
+                             key) {
+  # Creates a list with frequency, percentage correct, and IRC for every item.
+  #
+  # Args:
+  #  number_answeroptions: Vector with number of answer options per item.
+  #                        Length should be at least 3 and the same as
+  #                        correct_frequency, correct_percentage, item_names,
+  #                        and corrected_item_tot_cor.
+  #                        Length should also be the same as the number of
+  #                        colums of: frequency_answer_options,
+  #                        percentage_answer_options, corrected_item_tot_cor,
+  #                        corrected_item_tot_cor_answ_option, and key + 1.
+  #  correct_frequency: Vector with total number correct per item.
+  #                     Length requirements: see number_answeroptions.
+  #  correct_percentage: Vector with percentage correct per item.
+  #                      Length requirements: see number_answeroptions.
+  #  frequency_answer_options: Matrix with [i,j] the frequency of answer option
+  #                            i of item j. Dimension requirements: see key
+  #                            + one extra column (for missing).
+  #  percentage_answer_options: Matrix with [i,j] the percentage of
+  #                             answer option i of item j.
+  #                             Dimension requirements: see key + 1 extra
+  #                             column (for missing)
+  #  corrected_item_tot_cor: Vector with item rest correlation per item.
+  #                          Length requirements: see number_answeroptions
+  #  corrected_item_tot_cor_answ_option: Matrix with [i,j] the item total
+  #                                      correlation for answer option i of
+  #                                      item j. Dimension requirements:
+  #                                      see key + one extra column.
+  #   key: Matrix of 0's and 1's. key[i,j] indicates whether answer option i to
+  #        item j is right (1) or wrong (0). If a column (item) consists of only
+  #        0s, the item is interpreted as graded manually.
+  #        Number of columns should be equal to the length of:
+  #        number_answeroptions, correct_frequency, correct_percentage, and
+  #        corrected_item_tot_cor.
+  #        The following objects have the same dimensions as key + 1 extra row:
+  #        frequency_answer_options, percentage_answer_options, and
+  #        corrected_item_tot_cor_answ_option
+  #
+  # Returns:
+  #  List with one entry per item. Each entry consists of a data frame with
+  #  frequency, percentage and IRC for the total item and the answer options
+  #  (if the item has answer options)
+
+  item_list <- list() #  Creates list to put item output in
+  colnames1 <- c(" ", "Frequency", "Percentage", "IRC")
+  colnames2 <- c("Answer Option", "Frequency", "Percentage", "IRC", "Correct")
+
+  # For every item, create entry in list with frequencies, percentages, and IRCs
+  for (i in 1:length(item_names)) {
+    if (number_answeroptions[i] > 0) {
+      correct <- c(
+        ifelse(
+          key[1:number_answeroptions[i], i] == 1,
+          "Correct",
+          "Incorrect"
+        ),
+        "Incorrect"
+      )
+
+      item_list[[i]] <- data.frame(
+        c(LETTERS[1:number_answeroptions[i]], "Missing"),
+        c(frequency_answer_options[c(2:(number_answeroptions[i] + 1), 1), i]),
+        c(percentage_answer_options[c(2:(number_answeroptions[i] + 1), 1), i]),
+        c(corrected_item_tot_cor_answ_option[
+          c(2:(number_answeroptions[i] + 1), 1),
+          i
+        ]),
+        correct,
+        row.names = NULL)
+      colnames(item_list[[i]]) <- colnames2
+
+    } else {
+      item_list[[i]] <- data.frame("Correct",
+        correct_frequency[i],
+        correct_percentage[i],
+        corrected_item_tot_cor[i]
+      )
+      colnames(item_list[[i]]) <- colnames1
+     }
+  }
+
+  names(item_list) <- item_names
+  return(item_list)
+}
+
+# Use the function defined above
+item_list <- create_item_list(
+  number_answeroptions,
+  correct_frequency,
+  correct_percentage,
+  frequency_answer_options,
+  percentage_answer_options,
+  corrected_item_tot_cor,
+  corrected_item_tot_cor_answ_option,
+  item_names,
+  key
+)
+
+cat(paste("#", title))
+```
+
+# General
+
+What | Result |
+:---------------------- | :------------- |
+Number of students | `r length(student_scores)`|
+Number of items | `r length(item_names)`|
+Average score | `r round(mean(student_scores), digits = 3)`|
+Standard deviation | `r round(sd(student_scores), digits = 3)`| 
+Cronbach's alpha | `r cronbach`| 
+Standard error | `r round(sd(student_scores * sqrt(1 - cronbach)), digits = 3)`| 
+
+
+# Item statistics
+
+For each item the frequency, percentage and item rest correlations (IRC) of 
+every answer option are shown The IRC should be (highly) positive for the 
+right answer option and low for the wrong answer option(s).
+
+```{r pander, results = "asis", eval = TRUE, echo = FALSE, warning = FALSE, message = FALSE}
+
+print_item_tables <- function(item_list, key) {
+  # Print table for each item with frequency, percentage and the
+  # item-rest correlation. Correct answers are highlighted in bold.
+  #
+  # Args:
+  #  item_list: list with one entry per item. Each entry has a data frame
+  #             Data frame has a row for every answer option + one for missing.
+  #             Columns are: Answer option, frequency, percentage, IRC,
+  #                          and Correct (answer option is Correct/Incorrect)
+  #            Number of rows of each data frame has a maximum as the number of
+  #            columns of key + 1.
+  #  key: Matrix of 0's and 1's. key[i,j] implies wether answer option i
+  #       to item j is right (1) or wrong (0). If a column (item) consists of
+  #       only 0s, the item is interpreted as graded manually.
+  #       Number of columns should be equal to length of item_list
+
+  pander::panderOptions("knitr.auto.asis", FALSE)
+  for (i in 1:length(item_list)) {
+    if (any(key[, i] == 1)) {
+      item_list[[i]]$Correct <- revalue(
+        item_list[[i]]$Correct,
+        c("Correct" = "X", "Incorrect" = "")
+      )
+      pander::emphasize.strong.rows(which(key[, i] == 1, arr.ind = TRUE))
+      pander::pander(item_list[i])
+    } else {
+      t <- item_list[i]
+      pander::pander(t)
+    }
+  }
+}
+
+print_item_tables(item_list, key)
+```
+
+\pagebreak
+
+```{r, echo = FALSE, message = FALSE, warning = FALSE, fig.width = 11, results = 'asis'}
+
+create_df <- function(item_names, correct_percentage, corrected_item_tot_cor) {
+  # Creates a data frame with the overall item statistics including a colour
+  # indicator for the percentage correct and IRC. Colours is defined below.
+  #
+  # Args:
+  #  These are the same as defined in create_item_list.
+  #
+  # Returns:
+  #  Data frame with a row for every item. Columns consist of the item name,
+  #  percentage correct, colour indicator for percentage, IRC and colour
+  #  indicator for the IRC.
+
+  correct_df <- data.frame(
+    item = factor(item_names, item_names),
+    correct_percentage,
+    perc_col = ifelse(
+      correct_percentage < 40, 0,
+      ifelse(correct_percentage > 70, 1, 1 / 75 * correct_percentage - 7 / 30)
+    ),
+    corrected_item_tot_cor,
+    irc_col = ifelse(
+      corrected_item_tot_cor < 0, 0,
+      ifelse(
+        corrected_item_tot_cor > .05,
+        1,
+        8 * corrected_item_tot_cor + 3 / 10)
+    )
+  )
+
+  return(correct_df)
+}
+
+plot_overall_perc <- function(correct_df) {
+  # Plots bar graph of percentage correct of every item. Bars are coloured
+  # depending on the percentage correct:
+  # < 40: Red
+  # 40 - 70: Orange till green-ish
+  # > 70: Green
+  #
+  # Args:
+  #  correct_df: data frame with item name, percentage correct and colour code
+  #  for the percentages.
+  #
+  # Returns:
+  #  Explanation of the plot and the plot itself
+
+  cat("
+
+## Percentage Correct
+
+Plot of percentage correct per item. If *more than 70%* of the students answer
+an item correctly, the students have mastered the material well enough, thus it
+is coloured *green*. If *less than 40%* of the students answer an item
+correctly, the students have not mastered the material, and is coloured *red*.
+*Orange* indicates the percentage correct lies *between 40% and 70%*. The
+colours are based on multiple choice items with 3 or 4 answer options.
+
+")
+
+  bar_freq <- ggplot2::ggplot(
+    correct_df,
+    ggplot2::aes(item, correct_percentage, fill = perc_col))
+  bar_freq <- 
+    bar_freq + 
+    # Create bar chart
+    geom_bar(stat = "identity") + 
+    # Creat colour boundary
+    scale_fill_gradient(
+      low    = "red",
+      high   = "green",
+      limits = c(0, 1),
+      guide  = FALSE
+    ) +
+    # Change y-axis limit to constant
+    coord_cartesian(ylim = c(0, 100)) +
+    # Axes names
+    labs(x = "Item", y = "Percentage Correct") +
+    # Black en white
+    theme_bw() +
+    # Change various sizes
+    theme(
+      axis.text.x  = element_text(size = 8, angle = 90),
+      axis.ticks.x = element_line(size = .4)
+    )
+
+  suppressWarnings(print(bar_freq))
+}
+
+plot_overall_irc <- function(correct_df) {
+  # Plots bar graph of IRC of every item. Bars are coloured by IRC:
+  # < 0: Red
+  # 0 - .10: Orange till green-ish
+  # > .10: green
+  #
+  # Args:
+  #  correct_df: data frame with item name, percentage correct and colour code
+  #  for the percentages.
+  #
+  # Returns:
+  #  Explanation of the plot and the plot itself
+
+  cat("\\pagebreak
+
+## Item rest correlations
+
+Plot of Item Rest Correlations per item: The correlation between an item (0 for
+wrong answers and 1 for right answers) and the sum of all other items. Items
+with a correlation *higher than .05* indicate that students who answered this
+item correctly answered the other items more often correctly than students who
+answered this item incorrectly. Thus it is coloured *green*. Items with a
+correlation *lower than 0* indicate no relationship between this item and the
+other items, or that students who answered this item correctly answered the
+other items more often incorrectly than students who answered this incorrectly,
+thus it is coloured *red*. *Between 0 and .05*, an item is coloured *orange*.
+
+")
+
+  # Create bar chart
+  bar_irc <- ggplot2::ggplot(
+    correct_df,
+    ggplot2::aes(item, corrected_item_tot_cor, fill = irc_col))
+  bar_irc <-  
+    bar_irc +
+    geom_bar(stat = "identity") +
+    scale_fill_gradient(
+      low    = "red",
+      high   = "green",
+      limits = c(0, 1),
+      guide  = FALSE
+    ) +
+    theme_bw() +
+    labs(x = "Item", y = "Item Rest Correlation") +
+    theme(
+      strip.text.x = element_text(size = 7),
+      axis.text.x  = element_text(size = 8, angle = 90),
+      axis.ticks.x = element_line(size = .4)
+    )
+
+  suppressWarnings(print(bar_irc))
+}
+
+correct_df <- create_df(item_names, correct_percentage, corrected_item_tot_cor)
+plot_overall_perc(correct_df)
+plot_overall_irc(correct_df)
+```
+
+```{r, echo = FALSE, eval = TRUE, warning= FALSE, fig.width = 11}
+
+# Create extra variables for the colours in the bar plots
+make_perc_col <- function(percentage, correct) {
+  # Calculate colours for the percentage answer options.
+  #
+  # Args:
+  #  percentage: Percentage answered per answer options
+  #  correct: Vector of "Correct" and "Incorrect" of same length of percentage
+  #
+  # Returns:
+  #  Value between 0 and 1. 0 corresponds to red and 1 to green in the plots.
+  #  For the correct options:
+  #    < 40: red
+  #    40 - 70: orange - green-ish
+  #    > 70: green
+  #  For the incorrect options: difference between correct answer option and
+  #  incorrect answer options are calculated. Colours are based on difference:
+  #    < -10: Green
+  #    -10 - 0: orange - green-ish
+  #    > 0: green
+
+  perc_col <- vector("numeric", length = length(correct))
+  perc_col[correct == "Correct"] <- ifelse(
+    percentage[correct == "Correct"] < 40,
+    0,
+    ifelse(
+      percentage[correct == "Correct"] > 70,
+      1,
+      1 / 75 * percentage[correct == "Correct"] - 7 / 30
+    )
+  )
+  dif <-
+    percentage[correct == "Incorrect"] - min(percentage[correct == "Correct"])
+  perc_col[correct == "Incorrect"] <- ifelse(
+    dif >= 0,
+    0,
+    ifelse(dif < -10, 1, -1 / 25 * dif + 3 / 10)
+  )
+  perc_col
+}
+
+make_irc_col <- function(IRC, correct){
+  # Calculate colours for the percentage answer options.
+  #
+  # Args:
+  #  IRC: Vector of IRC per answer options. Same length as correct
+  #  correct: Vector of "Correct" and "Incorrect" of same length of IRC
+  #
+  # Returns:
+  #  Value between 0 and 1. 0 corresponds to red and 1 to green in the plots.
+  #  For the correct answer options:
+  #    < 0: red
+  #    0 - 0.05: orange - green-ish
+  #    > 0.05: green
+  # For the incorrect answer options: difference between correct answer option
+  # and incorrect answer option are calculated. Colours are based on this
+  # difference:
+  #    > 0: red
+  #    -0.05 - 0: orange - green-ish
+  #    < -0.05: red
+
+  irc_col <- numeric()
+  irc_col[correct == "Correct"] <- ifelse(
+    IRC[correct == "Correct"] < 0,
+    0,
+    ifelse(IRC[correct == "Correct"] > .05,
+      1,
+      8 * IRC[correct == "Correct"] + 3 / 10
+    )
+  )
+  dif <- IRC[correct == "Incorrect"] - min(IRC[correct == "Correct"])
+  irc_col[correct == "Incorrect"] <- ifelse(
+    dif >= 0,
+    0,
+    ifelse(dif < -.05, 1, -8 * dif + 3 / 10)
+  )
+  return(irc_col)
+}
+
+make_answ_df <- function(item_list, key, number_answeroptions, item_names) {
+  # Creates a data frame with the statistics per answer option including a
+  # colour indicator for the percentage correct and IRC.
+  #
+  # Args:
+  #  These are the same as defined earlier
+  #
+  # Returns:
+  #  Data frame with a row for every answer option. Columns consist of the
+  #  item name, answer options, frequency, percentage, IRC, Correct (whether
+  #  an option is correct/incorrect), colour for percentage and colour for IRC
+
+  for (i in 1:length(item_list)) {
+    if (any(key[, i] != 0)) {
+      item_list[[i]]$perc_col <- with(
+        item_list[[i]],
+        make_perc_col(Percentage, Correct)
+      )
+      item_list[[i]]$irc_col <- with(item_list[[i]], make_irc_col(IRC, Correct))
+    }
+  }
+  ans_opt_df <- plyr::ldply(item_list[number_answeroptions != 0], data.frame)
+  names(ans_opt_df)[1] <- "id"
+  ans_opt_df$Answer.Option <- gsub("Missing", "Mi", ans_opt_df$Answer.Option)
+  ans_opt_df$Answer.Option <- factor(
+    ans_opt_df$Answer.Option,
+    levels = c(LETTERS[1:max(number_answeroptions)], "Mi")
+  )
+  ans_opt_df$id <- factor(
+    ans_opt_df$id,
+    levels = item_names[number_answeroptions != 0]
+  )
+  if (all(key == 0)) {
+    ans_opt_df <- NULL
+  }
+
+  ans_opt_df
+}
+
+calc_n_plot <- function(ans_opt_df, key) {
+  # Calculates which item is displayed in which plot. The total number of
+  # answer options is too large (usually) to display them all in one plot.
+  #
+  # Args:
+  #  ans_opt_df: As defined in function above
+  #
+  # Returns:
+  #  n_plots: the number of plots
+  #  items_in_plot: list entry per plot. Every entry consists of item names
+  #  in that plot
+
+  if (any(key != 0)) {
+    max_answ_opts <- 80 #  Maximum number of bars per plot
+    tot_answ_opts <- nrow(ans_opt_df)
+    n_plots <- ceiling(tot_answ_opts / max_answ_opts)
+    answ_opts_per_plot <- floor(tot_answ_opts / n_plots)
+
+    items_in_plot <- list(unique(ans_opt_df[1:answ_opts_per_plot, 1]))
+    if (n_plots > 1) {
+      for (i in 2:n_plots) {
+        items_in_plot[[i]] <- unique(
+          ans_opt_df[
+            (((i - 1) * answ_opts_per_plot) + 1):(i * answ_opts_per_plot),
+            1
+          ]
+        )
+        if (items_in_plot[[i]][1] %in% items_in_plot[[i - 1]]) {
+          items_in_plot[[i]] <- items_in_plot[[i]][-1]
+        }
+      }
+    }
+    assign("items_in_plot", items_in_plot, envir = globalenv())
+  }
+}
+
+ans_opt_df <- make_answ_df(item_list, key, number_answeroptions, item_names)
+calc_n_plot(ans_opt_df, key)
+
+```
+
+```{r, echo = FALSE, eval = TRUE, message = FALSE, warning = FALSE, fig.width = 11, results= 'asis'}    
+
+plot_perc_answ <- function(ans_opt_df, items_in_plot, key) {
+  # Plots bar graph(s) of the percentages of every answer option
+  #
+  # Args:
+  #  All defined as before
+  #
+  # Returns:
+  #  Description of how to read the plots
+  #  One or multiple plots
+
+  if (any(key != 0)) {
+
+    cat("\\pagebreak
+
+## Percentage chosen per answer options
+
+Percentage plot of all answer options per item. **White** bars represent the
+**right** answer option(s) and **black** the **wrong** options.
+
+The border represents the performance of an item. The **right** answer options
+are coloured *green* if *more than 70%* of the students choose this option. In
+that case, the students have mastered the material. They are coloured *red* if
+*less than 40%* choose this option. *Between 40% and 70%*, it is coloured
+*orange*.
+
+The **wrong** answer options are coloured *green* if an option is chosen *10
+percentage point less than the right option*. In that case, more students chose
+the right answer option than the wrong option indicating that students are not
+confused by the wrong option. They are coloured *red* if an option is chosen
+*equally or more often than the right option*. In this case, students pick the
+wrong answer option often, indicating that this option may be right as well. It
+is coloured *orange* in between these two.
+
+")
+
+    for (i in 1:length(items_in_plot)) {
+      bar_freq1 <- ggplot2::ggplot(
+        ans_opt_df[ans_opt_df$id %in% items_in_plot[[i]], ],
+        ggplot2::aes(
+          "Answer Option",
+          Percentage,
+          fill   = Correct,
+          colour = perc_col)
+        )
+      bar_freq1 <- 
+        bar_freq1 +
+        geom_bar(aes(x = Answer.Option), stat = "identity", size = 1.5) +
+        # Plot items
+        facet_grid(. ~ id, scales = "free_x", space = "free_x") +
+        # Fill in the bars
+        scale_fill_manual(
+          values = c("Incorrect" = "Black", "Correct" = "White"),
+          guide  = FALSE
+        ) +
+        # Colour the bandaries
+        scale_colour_gradient(
+          low    = "red",
+          high   = "green",
+          limits = c(0, 1),
+          guide  = FALSE
+        )  +
+        # Change y-axis limits to constants
+        coord_cartesian(ylim = c(0, 100)) +
+        # Names x-axis
+        labs(x = "Answer Options") +
+        theme_bw() +
+        # Change various font sizes
+        theme(
+          axis.text.x  = element_text(size = 8),
+          axis.text.y  = element_text(size = 12),
+          axis.title.x = element_text(size = 15),
+          axis.title.y = element_text(size = 15)
+        )
+
+        suppressWarnings(print(bar_freq1))
+    }
+  }
+}
+
+plot_irc_answ <- function(ans_opt_df, items_in_plot, key) {
+  # Plots bar graph(s) of the IRC of every answer option
+  #
+  # Args:
+  #  All defined as before
+  #
+  # Returns:
+  #  Description of how to read the plots
+  #  One or multiple plots
+
+  if (any(key != 0)) {
+
+    cat('\\pagebreak
+
+## Item rest correlations per answer option
+
+Item rest correlation (IRC) plot of all answer options per item. The IRC is the
+correlation between an answer option (1 for students who chose this options and
+0 for student who did not) and the sumscore on all other items. **White** bars
+represent the **right** answer option(s) and **black** the **wrong** options.
+
+The border represents the performance of an item. The **right** answer options
+are coloured *green* if the IRC is *higher than .05*. In that case, students who
+picked the right answer option scored higher on all other items than student who
+did not pick the right option. They are coloured *red* if the IRC is *less than
+0*. In that case, students who picked the right answer option scored equally or
+worse on the other items than students who picked the other options. Between *0
+and .05*, the option is coloured *orange*.
+
+The **wrong** answer options are coloured *green* if the IRC is *more than .1
+lower than the right option*. In that case, students who answered the other
+items incorrectly, usually answered this item incorrectly as well. They are
+coloured *red* if the IRC is *equal or higher than the right answer option*.
+In that case, students who answered the other items correctly may have answered
+this item incorrectly. In between these two, the option is coloured *orange*.
+
+')
+
+    # Creating IRC plots
+    for (i in 1:length(items_in_plot)) {
+      # Select items and variables to plot
+      bar_irc1 <- ggplot2::ggplot(
+        ans_opt_df[ans_opt_df$id %in% items_in_plot[[i]], ],
+        ggplot2::aes("Answer Option", IRC, fill = Correct, colour = irc_col)
+      )
+      bar_irc1 <-
+        bar_irc1 +
+        geom_bar(aes(x = Answer.Option), stat = "identity", size = 1.5) +
+        facet_grid(. ~ id, scales = "free_x", space = "free_x") +
+        scale_fill_manual(
+          values = c("Incorrect" = "Black", "Correct" = "White"),
+          guide  = FALSE
+        ) +
+        scale_colour_gradient(
+          low    = "red",
+          high   = "green",
+          limits = c(0, 1),
+          guide  = FALSE
+        ) +
+        # Change y-axis limit to either the maximum and minimum IRC
+        coord_cartesian(
+          ylim = c(min(ans_opt_df$IRC) - .01, max(ans_opt_df$IRC)  + .01)
+        ) +
+        labs(x = "Answer Options") +
+        theme_bw() +
+        theme(
+          axis.text.x  = element_text(size = 8),
+          axis.text.y  = element_text(size = 12),
+          axis.title.x = element_text(size = 15),
+          axis.title.y = element_text(size = 15)
+        )
+        suppressWarnings(print(bar_irc1))
+    }
+  }
+}
+
+plot_irc_answ(ans_opt_df, items_in_plot, key)
+plot_perc_answ(ans_opt_df, items_in_plot, key)
+```
+
+\pagebreak
+
+## Cumulative Distribution
+
+Cumulative Distribution showing all possible scores and the percentage of 
+students that scored that number or higher
+
+```{r, echo = FALSE, eval = TRUE, message = FALSE, warning = FALSE, fig.width = 11}
+CumHist <- function(student_scores, n_item) {
+  # Create a reversed cumulative histogram, i.e. the height of score x is
+  # the percentage of students who scores x or higher.
+  #
+  # Args:
+  #  student_scores: numeric vector of test score per student.
+  #  n_item: number of items
+  #
+  # Returns:
+  #  Reversed cumulative histogram plot
+
+  h <- heights <- length(student_scores)
+  n <- sort(unique(student_scores))
+
+  for (i in 1:length(n)) {
+    h <- h - length(student_scores[student_scores == n[i]])
+    heights <- c(heights, h)
+  }
+
+  cesuurlijnen <- seq(0.2, 0.8, 0.2) * length(student_scores)
+  gridlijnen <- round(
+    seq(0, length(student_scores), length(student_scores) / 25),
+    0
+  )
+
+  gridlijnen <- gridlijnen[
+    -grep(paste(round(cesuurlijnen, 0), collapse = "|"), gridlijnen)
+  ]
+
+  heights <- heights[1:length(unique(student_scores))]
+  percent <- heights / length(student_scores) * 100
+  df <- data.frame(
+    score   = sort(unique(student_scores)),
+    total   = heights,
+    percent = round(percent, 2)
+  )
+
+  vec <- df[1, 1]:df[nrow(df), 1] #  save all possible total scores
+
+  # Find if a total score is missing and add it
+  mis <- NA
+  if (sum(! (vec %in% df[, 1]) != 0)) {
+    mis <- vec[! (vec %in% df[, 1])]
+  }
+  if (! is.na(mis[1])) {
+    for (i in 1:length(mis)) {
+      df[ (nrow(df) + 1), ] <- c(mis[i], 0, 0)
+    }
+    df <- df[order(df$score), ]
+  }
+
+
+  # if no one gets a certain total score,
+  # replace the 0 with the next total score
+  suppressWarnings(
+    df[which(df[, 2] == 0, arr.ind = TRUE), 2] <-
+      df[(which(df[, 2] == 0, arr.ind = TRUE) + 1), 2]
+  )
+  suppressWarnings(
+    df[which(df[, 3] == 0, arr.ind = TRUE), 3] <-
+      df[(which(df[, 3] == 0, arr.ind = TRUE) + 1), 3]
+  )
+
+  g <- 
+    ggplot2::ggplot(
+      data = df,
+      ggplot2::aes(x = score, y = total)
+    ) +
+    geom_hline(
+      yintercept = c(gridlijnen),
+      linetype   = "solid",
+      colour     = "gray80"
+    ) +
+    geom_hline(
+      yintercept = cesuurlijnen,
+      linetype   = "dashed",
+      size       = 0.75,
+      colour     = "black"
+    ) +
+    geom_histogram(stat = "identity", binwidth = 1) +
+    xlab("Total score") +
+    ylab("% of students with score or higher") +
+    scale_y_continuous(
+      breaks = c(0, cesuurlijnen, length(student_scores)),
+      labels = c(seq(0, 100, 20))
+    ) +
+    scale_x_continuous(
+      limits = c(df[1, 1] - 1, n_item + 1),
+      breaks = 0:n_item,
+      expand = c(0, 0)
+    ) +
+    theme_bw() +
+    theme(panel.grid = element_blank())
+
+  print(g)
+}
+
+plot_cum_hist(student_scores, n_item = ncol(key))
+```
+
+```{r, echo = FALSE, eval = TRUE, warnings = FALSE, message = FALSE, results = 'asis', fig.width = 11}
+## Do the whole script above for every sub category
+if (!length(categories) == 0) {
+  cat("\n\n")
+  cat("\\pagebreak")
+  cat("\n\n")
+  cat("# Analysis for subcategories")
+  cat("\n\n")
+
+  for (i in 1:length(categories)) {
+    items <- categories[[i]]$items
+    if (i != 1) {
+      cat("\\pagebreak")
+      cat("\n\n")
+    }
+    cat("\n\n")
+    cat(paste("## Category:", categories[[i]]$name ))
+    cat("\n\n")
+
+    item_list <- create_item_list(
+      number_answeroptions[items],
+      correct_frequency[items],
+      correct_percentage[items],
+      frequency_answer_options[, items],
+      percentage_answer_options[, items],
+      categories[[i]]$corrected_item_tot_cor,
+      categories[[i]]$corrected_item_tot_cor_answ_option,
+      item_names[items],
+      key[, items]
+    )
+
+    correct_df <- create_df(
+      item_names[items],
+      correct_percentage[items],
+      categories[[i]]$corrected_item_tot_cor
+    )
+
+    plot_overall_perc(correct_df)
+    plot_overall_irc(correct_df)
+
+    if (any(key[, items] != 0)) {
+      ans_opt_df <- make_answ_df(
+        item_list,
+        key[, items],
+        number_answeroptions[items],
+        item_names[items]
+      )
+      calc_n_plot(ans_opt_df, key[, items])
+      plot_perc_answ(ans_opt_df, items_in_plot, key[, items])
+      plot_irc_answ(ans_opt_df, items_in_plot, key[, items])
+    }
+
+    cat("\\pagebreak")
+    cat("\n\n")
+    cat("## Cumulative Distribution")
+    cat("\n\n")
+    plot_cum_hist(categories[[i]]$student_scores, n_item = length(items))
+  }
+}
+```

From 71aa1846ca842e3e3e49989bfd67b75439b19f55 Mon Sep 17 00:00:00 2001
From: Mark van Driel <mvdriel@nlware.com>
Date: Thu, 11 Aug 2016 14:04:18 +0200
Subject: [PATCH 04/21] Changed order of parameters to be consistent with
 previous version of the report script

---
 app/Lib/Rscripts/report.R | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/app/Lib/Rscripts/report.R b/app/Lib/Rscripts/report.R
index 02fc318..119ac64 100644
--- a/app/Lib/Rscripts/report.R
+++ b/app/Lib/Rscripts/report.R
@@ -1,10 +1,16 @@
-report <- function(filename, title, number_answeroptions, key, item_names,
-                   correct_frequency, correct_percentage,
-                   frequency_answer_options, percentage_answer_options,
-                   corrected_item_tot_cor, corrected_item_tot_cor_answ_option,
-                   cronbach, student_scores, categories) {
-  rmarkdown::render("report_subcategories.Rmd", output_format = "html_document",
-                    output_file = paste0(filename, ".html"))
-  rmarkdown::render("report_subcategories.Rmd", output_format = "pdf_document",
-                    output_file = paste0(filename, ".pdf"))
-}
\ No newline at end of file
+report <- function(filename,
+                   number_answeroptions,
+                   cronbach,
+                   frequency_answer_options,
+                   percentage_answer_options,
+                   key,
+                   correct_frequency,
+                   correct_percentage,
+                   corrected_item_tot_cor,
+                   corrected_item_tot_cor_answ_option,
+                   title,
+                   item_names,
+                   student_scores,
+                   categories) {
+  rmarkdown::render("report.Rmd", output_format = "pdf_document", output_file = filename)
+}

From cfb48e6e4dadd1635183471f0db573c8594651b6 Mon Sep 17 00:00:00 2001
From: Mark van Driel <mvdriel@nlware.com>
Date: Thu, 11 Aug 2016 14:24:19 +0200
Subject: [PATCH 05/21] Updated call to new report script

---
 app/Model/Exam.php | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/app/Model/Exam.php b/app/Model/Exam.php
index 5596132..40993f8 100644
--- a/app/Model/Exam.php
+++ b/app/Model/Exam.php
@@ -589,12 +589,6 @@ protected function _report($exam) {
 				}
 			}
 
-			// Create the input_correct matrix (with given dimensions) by filling it with a vector (by row)
-			$script[] = sprintf(
-				'input_correct = matrix( c( %s ), number_students, number_questions, byrow = TRUE );',
-				implode(',', $inputCorrectMatrix)
-			);
-
 			$keyMatrix = array();
 
 			foreach ($exam['Item'] as $i => $item) {
@@ -613,11 +607,15 @@ protected function _report($exam) {
 				implode(',', $keyMatrix)
 			);
 
+			$script[] = 'student_scores = c(  );';
+
+			$script[] = 'categories = c(  );';
+
 			$script[] = sprintf(
 				'report( ' .
-				'"%s", number_students, number_answeroptions, number_questions, Cronbach, frequency_answer_options, ' .
-				'percentage_answer_options, input_correct, key, correct_frequency, correct_percentage, ' .
-				'corrected_item_tot_cor, corrected_item_tot_cor_answ_option, "%s", item_names' .
+				'"%s", number_answeroptions, Cronbach, frequency_answer_options, percentage_answer_options, key, ' .
+				'correct_frequency, correct_percentage, corrected_item_tot_cor, corrected_item_tot_cor_answ_option, "%s", ' .
+				'item_names, student_scores, categories' .
 				' );',
 				$tempFile, $exam['Exam']['name']
 			);

From afcb095c72b0927988857b78be91b02263ee2cf1 Mon Sep 17 00:00:00 2001
From: Mark van Driel <mvdriel@nlware.com>
Date: Thu, 11 Aug 2016 14:58:28 +0200
Subject: [PATCH 06/21] Updated call to new report script

---
 app/Test/Case/Lib/RserveTest.php | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/app/Test/Case/Lib/RserveTest.php b/app/Test/Case/Lib/RserveTest.php
index 0197488..1d148ce 100644
--- a/app/Test/Case/Lib/RserveTest.php
+++ b/app/Test/Case/Lib/RserveTest.php
@@ -205,14 +205,13 @@ public function testExecuteRscriptsReport() {
 
 		$script = array();
 		$script[] = file_get_contents(APP . 'Lib' . DS . 'Rscripts' . DS . 'report.R');
-		$script[] = sprintf('file_name = "%s";', $filename);
+		$script[] = sprintf('filename = "%s";', $filename);
 		$script[] = 'number_students = 2;';
 		$script[] = 'number_answeroptions = c( 3, 3, 3 );';
 		$script[] = 'number_questions = 3;';
 		$script[] = 'cronbach = 0.5;';
 		$script[] = 'frequency_answer_options = matrix( c( 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0 ), nrow = 4, ncol = number_questions, byrow = FALSE );';
 		$script[] = 'percentage_answer_options = matrix( c( 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0 ), nrow = 4, ncol = number_questions, byrow = FALSE );';
-		$script[] = 'input_correct = matrix( c( 0, 0, 0, 0, 0, 0 ), nrow = number_students, ncol = number_questions, byrow = TRUE );';
 		$script[] = 'key = matrix( c( 1, 0, 0, 1, 0, 0, 1, 0, 0 ), nrow = 3, ncol = number_questions, byrow = FALSE );';
 		$script[] = 'correct_frequency = c( 0, 0, 0 );';
 		$script[] = 'correct_percentage = c( 0, 0, 0 );';
@@ -220,12 +219,10 @@ public function testExecuteRscriptsReport() {
 		$script[] = 'corrected_item_tot_cor_answ_option = matrix( c( 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0 ), nrow = 4, ncol = number_questions, byrow = FALSE );';
 		$script[] = 'title = "Test Title";';
 		$script[] = 'item_names = c( "Item 1", "Item 2", "Item 3" );';
-		$script[] = 'report( file_name, number_students, number_answeroptions, number_questions, cronbach,
-			frequency_answer_options, percentage_answer_options,
-			input_correct, key, correct_frequency,
-			correct_percentage, corrected_item_tot_cor,
-			corrected_item_tot_cor_answ_option, title,
-			item_names);';
+		$script[] = 'student_scores = c(  );';
+		$script[] = 'categories = c(  );';
+		$script[] = 'report( filename, number_answeroptions, cronbach, frequency_answer_options, percentage_answer_options, key, correct_frequency,
+			correct_percentage, corrected_item_tot_cor, corrected_item_tot_cor_answ_option, title, item_names, student_scores, categories );';
 
 		$script = implode("\n", $script);
 		$result = $this->Rserve->execute($script);

From d7d2a1674dafc86eb7b630abebaa20d513d578d4 Mon Sep 17 00:00:00 2001
From: Mark van Driel <mvdriel@nlware.com>
Date: Thu, 11 Aug 2016 15:07:59 +0200
Subject: [PATCH 07/21] Added installtion of Pandoc and pander package to
 Travis configuration

---
 .travis.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 6e9e761..313a56f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -40,6 +40,7 @@ before_script:
   - sudo apt-get install -y --force-yes r-base r-cran-rserve
   - sudo apt-get install -y --force-yes libcurl4-gnutls-dev
   - sudo apt-get install -y --force-yes libxml2-dev
+  - sudo apt-get install -y --force-yes pandoc
   - sudo mkdir -p ~/R/Library
   - sudo echo 'R_LIBS=~/R/Library' > ~/.Renviron
   - sudo echo 'options(repos = "http://cran.rstudio.com")' > ~/.Rprofile
@@ -48,6 +49,7 @@ before_script:
   - sudo Rscript -e 'if(!"gplots" %in% rownames(installed.packages())) { install.packages("gplots", dependencies = TRUE) }'
   - sudo Rscript -e 'if(!"ggplot2" %in% rownames(installed.packages())) { install.packages("ggplot2", dependencies = TRUE) }'
   - sudo Rscript -e 'if(!"gridExtra" %in% rownames(installed.packages())) { install.packages("gridExtra", dependencies = TRUE) }'
+  - sudo Rscript -e 'if(!"pander" %in% rownames(installed.packages())) { install.packages("pander", dependencies = TRUE) }'
   - sudo Rscript -e 'if(!"psy" %in% rownames(installed.packages())) { install.packages("psy", dependencies = TRUE) }'
   - sudo Rscript -e 'update.packages(ask = FALSE, instlib = "~/R/Library")'
   - sudo R CMD Rserve

From 60f1325a44bcf76641255fecc797ce30cea0e785 Mon Sep 17 00:00:00 2001
From: Mark van Driel <mvdriel@nlware.com>
Date: Thu, 11 Aug 2016 15:25:06 +0200
Subject: [PATCH 08/21] Updated Travis configuration to install Pandoc version
 1.12.3 or higher

---
 .travis.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 313a56f..b808069 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -40,7 +40,10 @@ before_script:
   - sudo apt-get install -y --force-yes r-base r-cran-rserve
   - sudo apt-get install -y --force-yes libcurl4-gnutls-dev
   - sudo apt-get install -y --force-yes libxml2-dev
-  - sudo apt-get install -y --force-yes pandoc
+  - sudo apt-get install -y --force-yes ghc
+  - sudo apt-get install -y --force-yes cabal-install
+  - sudo cabal update
+  - sudo cabal install pandoc
   - sudo mkdir -p ~/R/Library
   - sudo echo 'R_LIBS=~/R/Library' > ~/.Renviron
   - sudo echo 'options(repos = "http://cran.rstudio.com")' > ~/.Rprofile

From c4ffad4502c78427a04ba6663bbadd9557e68b38 Mon Sep 17 00:00:00 2001
From: Mark van Driel <mvdriel@nlware.com>
Date: Thu, 11 Aug 2016 15:37:27 +0200
Subject: [PATCH 09/21] Fixed installation of Pandoc

---
 .travis.yml | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index b808069..080d989 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -40,10 +40,8 @@ before_script:
   - sudo apt-get install -y --force-yes r-base r-cran-rserve
   - sudo apt-get install -y --force-yes libcurl4-gnutls-dev
   - sudo apt-get install -y --force-yes libxml2-dev
-  - sudo apt-get install -y --force-yes ghc
-  - sudo apt-get install -y --force-yes cabal-install
-  - sudo cabal update
-  - sudo cabal install pandoc
+    wget https://github.com/jgm/pandoc/releases/download/1.17.2/pandoc-1.17.2-1-amd64.deb
+  - sudo dpkg -i pandoc-1.17.2-1-amd64.deb
   - sudo mkdir -p ~/R/Library
   - sudo echo 'R_LIBS=~/R/Library' > ~/.Renviron
   - sudo echo 'options(repos = "http://cran.rstudio.com")' > ~/.Rprofile

From 8a38448d2b380fd5c2267891dbacfe777f018e4a Mon Sep 17 00:00:00 2001
From: Mark van Driel <mvdriel@nlware.com>
Date: Thu, 11 Aug 2016 15:44:34 +0200
Subject: [PATCH 10/21] Fixed typo

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 080d989..b5bd556 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -40,7 +40,7 @@ before_script:
   - sudo apt-get install -y --force-yes r-base r-cran-rserve
   - sudo apt-get install -y --force-yes libcurl4-gnutls-dev
   - sudo apt-get install -y --force-yes libxml2-dev
-    wget https://github.com/jgm/pandoc/releases/download/1.17.2/pandoc-1.17.2-1-amd64.deb
+  - wget https://github.com/jgm/pandoc/releases/download/1.17.2/pandoc-1.17.2-1-amd64.deb
   - sudo dpkg -i pandoc-1.17.2-1-amd64.deb
   - sudo mkdir -p ~/R/Library
   - sudo echo 'R_LIBS=~/R/Library' > ~/.Renviron

From 1c0c30c8d9ef854e4d79ac4d36c722b3b1f9314d Mon Sep 17 00:00:00 2001
From: Mark van Driel <mvdriel@nlware.com>
Date: Thu, 11 Aug 2016 16:56:54 +0200
Subject: [PATCH 11/21] Added student scores to test

---
 app/Test/Case/Lib/RserveTest.php | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/Test/Case/Lib/RserveTest.php b/app/Test/Case/Lib/RserveTest.php
index 1d148ce..c9f98be 100644
--- a/app/Test/Case/Lib/RserveTest.php
+++ b/app/Test/Case/Lib/RserveTest.php
@@ -219,7 +219,7 @@ public function testExecuteRscriptsReport() {
 		$script[] = 'corrected_item_tot_cor_answ_option = matrix( c( 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0 ), nrow = 4, ncol = number_questions, byrow = FALSE );';
 		$script[] = 'title = "Test Title";';
 		$script[] = 'item_names = c( "Item 1", "Item 2", "Item 3" );';
-		$script[] = 'student_scores = c(  );';
+		$script[] = 'student_scores = c( 7, 9 );';
 		$script[] = 'categories = c(  );';
 		$script[] = 'report( filename, number_answeroptions, cronbach, frequency_answer_options, percentage_answer_options, key, correct_frequency,
 			correct_percentage, corrected_item_tot_cor, corrected_item_tot_cor_answ_option, title, item_names, student_scores, categories );';

From 4bda000c8b61a5cd09d76ff38296b95cb7d7c843 Mon Sep 17 00:00:00 2001
From: Mark van Driel <mvdriel@nlware.com>
Date: Thu, 11 Aug 2016 16:57:32 +0200
Subject: [PATCH 12/21] Fixed path to Rmd script (workaround)

---
 app/Lib/Rscripts/report.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/Lib/Rscripts/report.R b/app/Lib/Rscripts/report.R
index 119ac64..477040f 100644
--- a/app/Lib/Rscripts/report.R
+++ b/app/Lib/Rscripts/report.R
@@ -12,5 +12,5 @@ report <- function(filename,
                    item_names,
                    student_scores,
                    categories) {
-  rmarkdown::render("report.Rmd", output_format = "pdf_document", output_file = filename)
+  rmarkdown::render("/home/travis/build/nlware/qdnatool/app/Lib/Rscripts/report.Rmd", output_format = "pdf_document", output_file = filename)
 }

From e6ff1a6537a3181a72c3264b654fc4b20d1d8326 Mon Sep 17 00:00:00 2001
From: Mark van Driel <mvdriel@nlware.com>
Date: Thu, 11 Aug 2016 17:13:19 +0200
Subject: [PATCH 13/21] Fixed encoding issue

---
 app/Lib/Rscripts/report.Rmd | 1644 +++++++++++++++++------------------
 1 file changed, 822 insertions(+), 822 deletions(-)

diff --git a/app/Lib/Rscripts/report.Rmd b/app/Lib/Rscripts/report.Rmd
index 90d8192..ad69e1f 100644
--- a/app/Lib/Rscripts/report.Rmd
+++ b/app/Lib/Rscripts/report.Rmd
@@ -1,822 +1,822 @@
----
-html_document: null
-fig_width: 11
-classoption: landscape
-keep_md: yes
-output: html_document
-pdf_document: default
----
-
-```{r, echo = FALSE, warning = FALSE, message = FALSE}
-library(ggplot2)
-library(pander)
-library(plyr)
-```
-
-
-```{r, echo = FALSE, eval = TRUE, results = "asis"}
-
-create_item_list <- function(number_answeroptions,
-                             correct_frequency,
-                             correct_percentage,
-                             frequency_answer_options,
-                             percentage_answer_options,
-                             corrected_item_tot_cor,
-                             corrected_item_tot_cor_answ_option,
-                             item_names,
-                             key) {
-  # Creates a list with frequency, percentage correct, and IRC for every item.
-  #
-  # Args:
-  #  number_answeroptions: Vector with number of answer options per item.
-  #                        Length should be at least 3 and the same as
-  #                        correct_frequency, correct_percentage, item_names,
-  #                        and corrected_item_tot_cor.
-  #                        Length should also be the same as the number of
-  #                        colums of: frequency_answer_options,
-  #                        percentage_answer_options, corrected_item_tot_cor,
-  #                        corrected_item_tot_cor_answ_option, and key + 1.
-  #  correct_frequency: Vector with total number correct per item.
-  #                     Length requirements: see number_answeroptions.
-  #  correct_percentage: Vector with percentage correct per item.
-  #                      Length requirements: see number_answeroptions.
-  #  frequency_answer_options: Matrix with [i,j] the frequency of answer option
-  #                            i of item j. Dimension requirements: see key
-  #                            + one extra column (for missing).
-  #  percentage_answer_options: Matrix with [i,j] the percentage of
-  #                             answer option i of item j.
-  #                             Dimension requirements: see key + 1 extra
-  #                             column (for missing)
-  #  corrected_item_tot_cor: Vector with item rest correlation per item.
-  #                          Length requirements: see number_answeroptions
-  #  corrected_item_tot_cor_answ_option: Matrix with [i,j] the item total
-  #                                      correlation for answer option i of
-  #                                      item j. Dimension requirements:
-  #                                      see key + one extra column.
-  #   key: Matrix of 0's and 1's. key[i,j] indicates whether answer option i to
-  #        item j is right (1) or wrong (0). If a column (item) consists of only
-  #        0s, the item is interpreted as graded manually.
-  #        Number of columns should be equal to the length of:
-  #        number_answeroptions, correct_frequency, correct_percentage, and
-  #        corrected_item_tot_cor.
-  #        The following objects have the same dimensions as key + 1 extra row:
-  #        frequency_answer_options, percentage_answer_options, and
-  #        corrected_item_tot_cor_answ_option
-  #
-  # Returns:
-  #  List with one entry per item. Each entry consists of a data frame with
-  #  frequency, percentage and IRC for the total item and the answer options
-  #  (if the item has answer options)
-
-  item_list <- list() #  Creates list to put item output in
-  colnames1 <- c(" ", "Frequency", "Percentage", "IRC")
-  colnames2 <- c("Answer Option", "Frequency", "Percentage", "IRC", "Correct")
-
-  # For every item, create entry in list with frequencies, percentages, and IRCs
-  for (i in 1:length(item_names)) {
-    if (number_answeroptions[i] > 0) {
-      correct <- c(
-        ifelse(
-          key[1:number_answeroptions[i], i] == 1,
-          "Correct",
-          "Incorrect"
-        ),
-        "Incorrect"
-      )
-
-      item_list[[i]] <- data.frame(
-        c(LETTERS[1:number_answeroptions[i]], "Missing"),
-        c(frequency_answer_options[c(2:(number_answeroptions[i] + 1), 1), i]),
-        c(percentage_answer_options[c(2:(number_answeroptions[i] + 1), 1), i]),
-        c(corrected_item_tot_cor_answ_option[
-          c(2:(number_answeroptions[i] + 1), 1),
-          i
-        ]),
-        correct,
-        row.names = NULL)
-      colnames(item_list[[i]]) <- colnames2
-
-    } else {
-      item_list[[i]] <- data.frame("Correct",
-        correct_frequency[i],
-        correct_percentage[i],
-        corrected_item_tot_cor[i]
-      )
-      colnames(item_list[[i]]) <- colnames1
-     }
-  }
-
-  names(item_list) <- item_names
-  return(item_list)
-}
-
-# Use the function defined above
-item_list <- create_item_list(
-  number_answeroptions,
-  correct_frequency,
-  correct_percentage,
-  frequency_answer_options,
-  percentage_answer_options,
-  corrected_item_tot_cor,
-  corrected_item_tot_cor_answ_option,
-  item_names,
-  key
-)
-
-cat(paste("#", title))
-```
-
-# General
-
-What | Result |
-:---------------------- | :------------- |
-Number of students | `r length(student_scores)`|
-Number of items | `r length(item_names)`|
-Average score | `r round(mean(student_scores), digits = 3)`|
-Standard deviation | `r round(sd(student_scores), digits = 3)`| 
-Cronbach's alpha | `r cronbach`| 
-Standard error | `r round(sd(student_scores * sqrt(1 - cronbach)), digits = 3)`| 
-
-
-# Item statistics
-
-For each item the frequency, percentage and item rest correlations (IRC) of 
-every answer option are shown The IRC should be (highly) positive for the 
-right answer option and low for the wrong answer option(s).
-
-```{r pander, results = "asis", eval = TRUE, echo = FALSE, warning = FALSE, message = FALSE}
-
-print_item_tables <- function(item_list, key) {
-  # Print table for each item with frequency, percentage and the
-  # item-rest correlation. Correct answers are highlighted in bold.
-  #
-  # Args:
-  #  item_list: list with one entry per item. Each entry has a data frame
-  #             Data frame has a row for every answer option + one for missing.
-  #             Columns are: Answer option, frequency, percentage, IRC,
-  #                          and Correct (answer option is Correct/Incorrect)
-  #            Number of rows of each data frame has a maximum as the number of
-  #            columns of key + 1.
-  #  key: Matrix of 0's and 1's. key[i,j] implies wether answer option i
-  #       to item j is right (1) or wrong (0). If a column (item) consists of
-  #       only 0s, the item is interpreted as graded manually.
-  #       Number of columns should be equal to length of item_list
-
-  pander::panderOptions("knitr.auto.asis", FALSE)
-  for (i in 1:length(item_list)) {
-    if (any(key[, i] == 1)) {
-      item_list[[i]]$Correct <- revalue(
-        item_list[[i]]$Correct,
-        c("Correct" = "X", "Incorrect" = "")
-      )
-      pander::emphasize.strong.rows(which(key[, i] == 1, arr.ind = TRUE))
-      pander::pander(item_list[i])
-    } else {
-      t <- item_list[i]
-      pander::pander(t)
-    }
-  }
-}
-
-print_item_tables(item_list, key)
-```
-
-\pagebreak
-
-```{r, echo = FALSE, message = FALSE, warning = FALSE, fig.width = 11, results = 'asis'}
-
-create_df <- function(item_names, correct_percentage, corrected_item_tot_cor) {
-  # Creates a data frame with the overall item statistics including a colour
-  # indicator for the percentage correct and IRC. Colours is defined below.
-  #
-  # Args:
-  #  These are the same as defined in create_item_list.
-  #
-  # Returns:
-  #  Data frame with a row for every item. Columns consist of the item name,
-  #  percentage correct, colour indicator for percentage, IRC and colour
-  #  indicator for the IRC.
-
-  correct_df <- data.frame(
-    item = factor(item_names, item_names),
-    correct_percentage,
-    perc_col = ifelse(
-      correct_percentage < 40, 0,
-      ifelse(correct_percentage > 70, 1, 1 / 75 * correct_percentage - 7 / 30)
-    ),
-    corrected_item_tot_cor,
-    irc_col = ifelse(
-      corrected_item_tot_cor < 0, 0,
-      ifelse(
-        corrected_item_tot_cor > .05,
-        1,
-        8 * corrected_item_tot_cor + 3 / 10)
-    )
-  )
-
-  return(correct_df)
-}
-
-plot_overall_perc <- function(correct_df) {
-  # Plots bar graph of percentage correct of every item. Bars are coloured
-  # depending on the percentage correct:
-  # < 40: Red
-  # 40 - 70: Orange till green-ish
-  # > 70: Green
-  #
-  # Args:
-  #  correct_df: data frame with item name, percentage correct and colour code
-  #  for the percentages.
-  #
-  # Returns:
-  #  Explanation of the plot and the plot itself
-
-  cat("
-
-## Percentage Correct
-
-Plot of percentage correct per item. If *more than 70%* of the students answer
-an item correctly, the students have mastered the material well enough, thus it
-is coloured *green*. If *less than 40%* of the students answer an item
-correctly, the students have not mastered the material, and is coloured *red*.
-*Orange* indicates the percentage correct lies *between 40% and 70%*. The
-colours are based on multiple choice items with 3 or 4 answer options.
-
-")
-
-  bar_freq <- ggplot2::ggplot(
-    correct_df,
-    ggplot2::aes(item, correct_percentage, fill = perc_col))
-  bar_freq <- 
-    bar_freq + 
-    # Create bar chart
-    geom_bar(stat = "identity") + 
-    # Creat colour boundary
-    scale_fill_gradient(
-      low    = "red",
-      high   = "green",
-      limits = c(0, 1),
-      guide  = FALSE
-    ) +
-    # Change y-axis limit to constant
-    coord_cartesian(ylim = c(0, 100)) +
-    # Axes names
-    labs(x = "Item", y = "Percentage Correct") +
-    # Black en white
-    theme_bw() +
-    # Change various sizes
-    theme(
-      axis.text.x  = element_text(size = 8, angle = 90),
-      axis.ticks.x = element_line(size = .4)
-    )
-
-  suppressWarnings(print(bar_freq))
-}
-
-plot_overall_irc <- function(correct_df) {
-  # Plots bar graph of IRC of every item. Bars are coloured by IRC:
-  # < 0: Red
-  # 0 - .10: Orange till green-ish
-  # > .10: green
-  #
-  # Args:
-  #  correct_df: data frame with item name, percentage correct and colour code
-  #  for the percentages.
-  #
-  # Returns:
-  #  Explanation of the plot and the plot itself
-
-  cat("\\pagebreak
-
-## Item rest correlations
-
-Plot of Item Rest Correlations per item: The correlation between an item (0 for
-wrong answers and 1 for right answers) and the sum of all other items. Items
-with a correlation *higher than .05* indicate that students who answered this
-item correctly answered the other items more often correctly than students who
-answered this item incorrectly. Thus it is coloured *green*. Items with a
-correlation *lower than 0* indicate no relationship between this item and the
-other items, or that students who answered this item correctly answered the
-other items more often incorrectly than students who answered this incorrectly,
-thus it is coloured *red*. *Between 0 and .05*, an item is coloured *orange*.
-
-")
-
-  # Create bar chart
-  bar_irc <- ggplot2::ggplot(
-    correct_df,
-    ggplot2::aes(item, corrected_item_tot_cor, fill = irc_col))
-  bar_irc <-  
-    bar_irc +
-    geom_bar(stat = "identity") +
-    scale_fill_gradient(
-      low    = "red",
-      high   = "green",
-      limits = c(0, 1),
-      guide  = FALSE
-    ) +
-    theme_bw() +
-    labs(x = "Item", y = "Item Rest Correlation") +
-    theme(
-      strip.text.x = element_text(size = 7),
-      axis.text.x  = element_text(size = 8, angle = 90),
-      axis.ticks.x = element_line(size = .4)
-    )
-
-  suppressWarnings(print(bar_irc))
-}
-
-correct_df <- create_df(item_names, correct_percentage, corrected_item_tot_cor)
-plot_overall_perc(correct_df)
-plot_overall_irc(correct_df)
-```
-
-```{r, echo = FALSE, eval = TRUE, warning= FALSE, fig.width = 11}
-
-# Create extra variables for the colours in the bar plots
-make_perc_col <- function(percentage, correct) {
-  # Calculate colours for the percentage answer options.
-  #
-  # Args:
-  #  percentage: Percentage answered per answer options
-  #  correct: Vector of "Correct" and "Incorrect" of same length of percentage
-  #
-  # Returns:
-  #  Value between 0 and 1. 0 corresponds to red and 1 to green in the plots.
-  #  For the correct options:
-  #    < 40: red
-  #    40 - 70: orange - green-ish
-  #    > 70: green
-  #  For the incorrect options: difference between correct answer option and
-  #  incorrect answer options are calculated. Colours are based on difference:
-  #    < -10: Green
-  #    -10 - 0: orange - green-ish
-  #    > 0: green
-
-  perc_col <- vector("numeric", length = length(correct))
-  perc_col[correct == "Correct"] <- ifelse(
-    percentage[correct == "Correct"] < 40,
-    0,
-    ifelse(
-      percentage[correct == "Correct"] > 70,
-      1,
-      1 / 75 * percentage[correct == "Correct"] - 7 / 30
-    )
-  )
-  dif <-
-    percentage[correct == "Incorrect"] - min(percentage[correct == "Correct"])
-  perc_col[correct == "Incorrect"] <- ifelse(
-    dif >= 0,
-    0,
-    ifelse(dif < -10, 1, -1 / 25 * dif + 3 / 10)
-  )
-  perc_col
-}
-
-make_irc_col <- function(IRC, correct){
-  # Calculate colours for the percentage answer options.
-  #
-  # Args:
-  #  IRC: Vector of IRC per answer options. Same length as correct
-  #  correct: Vector of "Correct" and "Incorrect" of same length of IRC
-  #
-  # Returns:
-  #  Value between 0 and 1. 0 corresponds to red and 1 to green in the plots.
-  #  For the correct answer options:
-  #    < 0: red
-  #    0 - 0.05: orange - green-ish
-  #    > 0.05: green
-  # For the incorrect answer options: difference between correct answer option
-  # and incorrect answer option are calculated. Colours are based on this
-  # difference:
-  #    > 0: red
-  #    -0.05 - 0: orange - green-ish
-  #    < -0.05: red
-
-  irc_col <- numeric()
-  irc_col[correct == "Correct"] <- ifelse(
-    IRC[correct == "Correct"] < 0,
-    0,
-    ifelse(IRC[correct == "Correct"] > .05,
-      1,
-      8 * IRC[correct == "Correct"] + 3 / 10
-    )
-  )
-  dif <- IRC[correct == "Incorrect"] - min(IRC[correct == "Correct"])
-  irc_col[correct == "Incorrect"] <- ifelse(
-    dif >= 0,
-    0,
-    ifelse(dif < -.05, 1, -8 * dif + 3 / 10)
-  )
-  return(irc_col)
-}
-
-make_answ_df <- function(item_list, key, number_answeroptions, item_names) {
-  # Creates a data frame with the statistics per answer option including a
-  # colour indicator for the percentage correct and IRC.
-  #
-  # Args:
-  #  These are the same as defined earlier
-  #
-  # Returns:
-  #  Data frame with a row for every answer option. Columns consist of the
-  #  item name, answer options, frequency, percentage, IRC, Correct (whether
-  #  an option is correct/incorrect), colour for percentage and colour for IRC
-
-  for (i in 1:length(item_list)) {
-    if (any(key[, i] != 0)) {
-      item_list[[i]]$perc_col <- with(
-        item_list[[i]],
-        make_perc_col(Percentage, Correct)
-      )
-      item_list[[i]]$irc_col <- with(item_list[[i]], make_irc_col(IRC, Correct))
-    }
-  }
-  ans_opt_df <- plyr::ldply(item_list[number_answeroptions != 0], data.frame)
-  names(ans_opt_df)[1] <- "id"
-  ans_opt_df$Answer.Option <- gsub("Missing", "Mi", ans_opt_df$Answer.Option)
-  ans_opt_df$Answer.Option <- factor(
-    ans_opt_df$Answer.Option,
-    levels = c(LETTERS[1:max(number_answeroptions)], "Mi")
-  )
-  ans_opt_df$id <- factor(
-    ans_opt_df$id,
-    levels = item_names[number_answeroptions != 0]
-  )
-  if (all(key == 0)) {
-    ans_opt_df <- NULL
-  }
-
-  ans_opt_df
-}
-
-calc_n_plot <- function(ans_opt_df, key) {
-  # Calculates which item is displayed in which plot. The total number of
-  # answer options is too large (usually) to display them all in one plot.
-  #
-  # Args:
-  #  ans_opt_df: As defined in function above
-  #
-  # Returns:
-  #  n_plots: the number of plots
-  #  items_in_plot: list entry per plot. Every entry consists of item names
-  #  in that plot
-
-  if (any(key != 0)) {
-    max_answ_opts <- 80 #  Maximum number of bars per plot
-    tot_answ_opts <- nrow(ans_opt_df)
-    n_plots <- ceiling(tot_answ_opts / max_answ_opts)
-    answ_opts_per_plot <- floor(tot_answ_opts / n_plots)
-
-    items_in_plot <- list(unique(ans_opt_df[1:answ_opts_per_plot, 1]))
-    if (n_plots > 1) {
-      for (i in 2:n_plots) {
-        items_in_plot[[i]] <- unique(
-          ans_opt_df[
-            (((i - 1) * answ_opts_per_plot) + 1):(i * answ_opts_per_plot),
-            1
-          ]
-        )
-        if (items_in_plot[[i]][1] %in% items_in_plot[[i - 1]]) {
-          items_in_plot[[i]] <- items_in_plot[[i]][-1]
-        }
-      }
-    }
-    assign("items_in_plot", items_in_plot, envir = globalenv())
-  }
-}
-
-ans_opt_df <- make_answ_df(item_list, key, number_answeroptions, item_names)
-calc_n_plot(ans_opt_df, key)
-
-```
-
-```{r, echo = FALSE, eval = TRUE, message = FALSE, warning = FALSE, fig.width = 11, results= 'asis'}    
-
-plot_perc_answ <- function(ans_opt_df, items_in_plot, key) {
-  # Plots bar graph(s) of the percentages of every answer option
-  #
-  # Args:
-  #  All defined as before
-  #
-  # Returns:
-  #  Description of how to read the plots
-  #  One or multiple plots
-
-  if (any(key != 0)) {
-
-    cat("\\pagebreak
-
-## Percentage chosen per answer options
-
-Percentage plot of all answer options per item. **White** bars represent the
-**right** answer option(s) and **black** the **wrong** options.
-
-The border represents the performance of an item. The **right** answer options
-are coloured *green* if *more than 70%* of the students choose this option. In
-that case, the students have mastered the material. They are coloured *red* if
-*less than 40%* choose this option. *Between 40% and 70%*, it is coloured
-*orange*.
-
-The **wrong** answer options are coloured *green* if an option is chosen *10
-percentage point less than the right option*. In that case, more students chose
-the right answer option than the wrong option indicating that students are not
-confused by the wrong option. They are coloured *red* if an option is chosen
-*equally or more often than the right option*. In this case, students pick the
-wrong answer option often, indicating that this option may be right as well. It
-is coloured *orange* in between these two.
-
-")
-
-    for (i in 1:length(items_in_plot)) {
-      bar_freq1 <- ggplot2::ggplot(
-        ans_opt_df[ans_opt_df$id %in% items_in_plot[[i]], ],
-        ggplot2::aes(
-          "Answer Option",
-          Percentage,
-          fill   = Correct,
-          colour = perc_col)
-        )
-      bar_freq1 <- 
-        bar_freq1 +
-        geom_bar(aes(x = Answer.Option), stat = "identity", size = 1.5) +
-        # Plot items
-        facet_grid(. ~ id, scales = "free_x", space = "free_x") +
-        # Fill in the bars
-        scale_fill_manual(
-          values = c("Incorrect" = "Black", "Correct" = "White"),
-          guide  = FALSE
-        ) +
-        # Colour the bandaries
-        scale_colour_gradient(
-          low    = "red",
-          high   = "green",
-          limits = c(0, 1),
-          guide  = FALSE
-        )  +
-        # Change y-axis limits to constants
-        coord_cartesian(ylim = c(0, 100)) +
-        # Names x-axis
-        labs(x = "Answer Options") +
-        theme_bw() +
-        # Change various font sizes
-        theme(
-          axis.text.x  = element_text(size = 8),
-          axis.text.y  = element_text(size = 12),
-          axis.title.x = element_text(size = 15),
-          axis.title.y = element_text(size = 15)
-        )
-
-        suppressWarnings(print(bar_freq1))
-    }
-  }
-}
-
-plot_irc_answ <- function(ans_opt_df, items_in_plot, key) {
-  # Plots bar graph(s) of the IRC of every answer option
-  #
-  # Args:
-  #  All defined as before
-  #
-  # Returns:
-  #  Description of how to read the plots
-  #  One or multiple plots
-
-  if (any(key != 0)) {
-
-    cat('\\pagebreak
-
-## Item rest correlations per answer option
-
-Item rest correlation (IRC) plot of all answer options per item. The IRC is the
-correlation between an answer option (1 for students who chose this options and
-0 for student who did not) and the sumscore on all other items. **White** bars
-represent the **right** answer option(s) and **black** the **wrong** options.
-
-The border represents the performance of an item. The **right** answer options
-are coloured *green* if the IRC is *higher than .05*. In that case, students who
-picked the right answer option scored higher on all other items than student who
-did not pick the right option. They are coloured *red* if the IRC is *less than
-0*. In that case, students who picked the right answer option scored equally or
-worse on the other items than students who picked the other options. Between *0
-and .05*, the option is coloured *orange*.
-
-The **wrong** answer options are coloured *green* if the IRC is *more than .1
-lower than the right option*. In that case, students who answered the other
-items incorrectly, usually answered this item incorrectly as well. They are
-coloured *red* if the IRC is *equal or higher than the right answer option*.
-In that case, students who answered the other items correctly may have answered
-this item incorrectly. In between these two, the option is coloured *orange*.
-
-')
-
-    # Creating IRC plots
-    for (i in 1:length(items_in_plot)) {
-      # Select items and variables to plot
-      bar_irc1 <- ggplot2::ggplot(
-        ans_opt_df[ans_opt_df$id %in% items_in_plot[[i]], ],
-        ggplot2::aes("Answer Option", IRC, fill = Correct, colour = irc_col)
-      )
-      bar_irc1 <-
-        bar_irc1 +
-        geom_bar(aes(x = Answer.Option), stat = "identity", size = 1.5) +
-        facet_grid(. ~ id, scales = "free_x", space = "free_x") +
-        scale_fill_manual(
-          values = c("Incorrect" = "Black", "Correct" = "White"),
-          guide  = FALSE
-        ) +
-        scale_colour_gradient(
-          low    = "red",
-          high   = "green",
-          limits = c(0, 1),
-          guide  = FALSE
-        ) +
-        # Change y-axis limit to either the maximum and minimum IRC
-        coord_cartesian(
-          ylim = c(min(ans_opt_df$IRC) - .01, max(ans_opt_df$IRC)  + .01)
-        ) +
-        labs(x = "Answer Options") +
-        theme_bw() +
-        theme(
-          axis.text.x  = element_text(size = 8),
-          axis.text.y  = element_text(size = 12),
-          axis.title.x = element_text(size = 15),
-          axis.title.y = element_text(size = 15)
-        )
-        suppressWarnings(print(bar_irc1))
-    }
-  }
-}
-
-plot_irc_answ(ans_opt_df, items_in_plot, key)
-plot_perc_answ(ans_opt_df, items_in_plot, key)
-```
-
-\pagebreak
-
-## Cumulative Distribution
-
-Cumulative Distribution showing all possible scores and the percentage of 
-students that scored that number or higher
-
-```{r, echo = FALSE, eval = TRUE, message = FALSE, warning = FALSE, fig.width = 11}
-CumHist <- function(student_scores, n_item) {
-  # Create a reversed cumulative histogram, i.e. the height of score x is
-  # the percentage of students who scores x or higher.
-  #
-  # Args:
-  #  student_scores: numeric vector of test score per student.
-  #  n_item: number of items
-  #
-  # Returns:
-  #  Reversed cumulative histogram plot
-
-  h <- heights <- length(student_scores)
-  n <- sort(unique(student_scores))
-
-  for (i in 1:length(n)) {
-    h <- h - length(student_scores[student_scores == n[i]])
-    heights <- c(heights, h)
-  }
-
-  cesuurlijnen <- seq(0.2, 0.8, 0.2) * length(student_scores)
-  gridlijnen <- round(
-    seq(0, length(student_scores), length(student_scores) / 25),
-    0
-  )
-
-  gridlijnen <- gridlijnen[
-    -grep(paste(round(cesuurlijnen, 0), collapse = "|"), gridlijnen)
-  ]
-
-  heights <- heights[1:length(unique(student_scores))]
-  percent <- heights / length(student_scores) * 100
-  df <- data.frame(
-    score   = sort(unique(student_scores)),
-    total   = heights,
-    percent = round(percent, 2)
-  )
-
-  vec <- df[1, 1]:df[nrow(df), 1] #  save all possible total scores
-
-  # Find if a total score is missing and add it
-  mis <- NA
-  if (sum(! (vec %in% df[, 1]) != 0)) {
-    mis <- vec[! (vec %in% df[, 1])]
-  }
-  if (! is.na(mis[1])) {
-    for (i in 1:length(mis)) {
-      df[ (nrow(df) + 1), ] <- c(mis[i], 0, 0)
-    }
-    df <- df[order(df$score), ]
-  }
-
-
-  # if no one gets a certain total score,
-  # replace the 0 with the next total score
-  suppressWarnings(
-    df[which(df[, 2] == 0, arr.ind = TRUE), 2] <-
-      df[(which(df[, 2] == 0, arr.ind = TRUE) + 1), 2]
-  )
-  suppressWarnings(
-    df[which(df[, 3] == 0, arr.ind = TRUE), 3] <-
-      df[(which(df[, 3] == 0, arr.ind = TRUE) + 1), 3]
-  )
-
-  g <- 
-    ggplot2::ggplot(
-      data = df,
-      ggplot2::aes(x = score, y = total)
-    ) +
-    geom_hline(
-      yintercept = c(gridlijnen),
-      linetype   = "solid",
-      colour     = "gray80"
-    ) +
-    geom_hline(
-      yintercept = cesuurlijnen,
-      linetype   = "dashed",
-      size       = 0.75,
-      colour     = "black"
-    ) +
-    geom_histogram(stat = "identity", binwidth = 1) +
-    xlab("Total score") +
-    ylab("% of students with score or higher") +
-    scale_y_continuous(
-      breaks = c(0, cesuurlijnen, length(student_scores)),
-      labels = c(seq(0, 100, 20))
-    ) +
-    scale_x_continuous(
-      limits = c(df[1, 1] - 1, n_item + 1),
-      breaks = 0:n_item,
-      expand = c(0, 0)
-    ) +
-    theme_bw() +
-    theme(panel.grid = element_blank())
-
-  print(g)
-}
-
-plot_cum_hist(student_scores, n_item = ncol(key))
-```
-
-```{r, echo = FALSE, eval = TRUE, warnings = FALSE, message = FALSE, results = 'asis', fig.width = 11}
-## Do the whole script above for every sub category
-if (!length(categories) == 0) {
-  cat("\n\n")
-  cat("\\pagebreak")
-  cat("\n\n")
-  cat("# Analysis for subcategories")
-  cat("\n\n")
-
-  for (i in 1:length(categories)) {
-    items <- categories[[i]]$items
-    if (i != 1) {
-      cat("\\pagebreak")
-      cat("\n\n")
-    }
-    cat("\n\n")
-    cat(paste("## Category:", categories[[i]]$name ))
-    cat("\n\n")
-
-    item_list <- create_item_list(
-      number_answeroptions[items],
-      correct_frequency[items],
-      correct_percentage[items],
-      frequency_answer_options[, items],
-      percentage_answer_options[, items],
-      categories[[i]]$corrected_item_tot_cor,
-      categories[[i]]$corrected_item_tot_cor_answ_option,
-      item_names[items],
-      key[, items]
-    )
-
-    correct_df <- create_df(
-      item_names[items],
-      correct_percentage[items],
-      categories[[i]]$corrected_item_tot_cor
-    )
-
-    plot_overall_perc(correct_df)
-    plot_overall_irc(correct_df)
-
-    if (any(key[, items] != 0)) {
-      ans_opt_df <- make_answ_df(
-        item_list,
-        key[, items],
-        number_answeroptions[items],
-        item_names[items]
-      )
-      calc_n_plot(ans_opt_df, key[, items])
-      plot_perc_answ(ans_opt_df, items_in_plot, key[, items])
-      plot_irc_answ(ans_opt_df, items_in_plot, key[, items])
-    }
-
-    cat("\\pagebreak")
-    cat("\n\n")
-    cat("## Cumulative Distribution")
-    cat("\n\n")
-    plot_cum_hist(categories[[i]]$student_scores, n_item = length(items))
-  }
-}
-```
+---
+html_document: null
+fig_width: 11
+classoption: landscape
+keep_md: yes
+output: html_document
+pdf_document: default
+---
+
+```{r, echo = FALSE, warning = FALSE, message = FALSE}
+library(ggplot2)
+library(pander)
+library(plyr)
+```
+
+
+```{r, echo = FALSE, eval = TRUE, results = "asis"}
+
+create_item_list <- function(number_answeroptions,
+                             correct_frequency,
+                             correct_percentage,
+                             frequency_answer_options,
+                             percentage_answer_options,
+                             corrected_item_tot_cor,
+                             corrected_item_tot_cor_answ_option,
+                             item_names,
+                             key) {
+  # Creates a list with frequency, percentage correct, and IRC for every item.
+  #
+  # Args:
+  #  number_answeroptions: Vector with number of answer options per item.
+  #                        Length should be at least 3 and the same as
+  #                        correct_frequency, correct_percentage, item_names,
+  #                        and corrected_item_tot_cor.
+  #                        Length should also be the same as the number of
+  #                        colums of: frequency_answer_options,
+  #                        percentage_answer_options, corrected_item_tot_cor,
+  #                        corrected_item_tot_cor_answ_option, and key + 1.
+  #  correct_frequency: Vector with total number correct per item.
+  #                     Length requirements: see number_answeroptions.
+  #  correct_percentage: Vector with percentage correct per item.
+  #                      Length requirements: see number_answeroptions.
+  #  frequency_answer_options: Matrix with [i,j] the frequency of answer option
+  #                            i of item j. Dimension requirements: see key
+  #                            + one extra column (for missing).
+  #  percentage_answer_options: Matrix with [i,j] the percentage of
+  #                             answer option i of item j.
+  #                             Dimension requirements: see key + 1 extra
+  #                             column (for missing)
+  #  corrected_item_tot_cor: Vector with item rest correlation per item.
+  #                          Length requirements: see number_answeroptions
+  #  corrected_item_tot_cor_answ_option: Matrix with [i,j] the item total
+  #                                      correlation for answer option i of
+  #                                      item j. Dimension requirements:
+  #                                      see key + one extra column.
+  #   key: Matrix of 0's and 1's. key[i,j] indicates whether answer option i to
+  #        item j is right (1) or wrong (0). If a column (item) consists of only
+  #        0s, the item is interpreted as graded manually.
+  #        Number of columns should be equal to the length of:
+  #        number_answeroptions, correct_frequency, correct_percentage, and
+  #        corrected_item_tot_cor.
+  #        The following objects have the same dimensions as key + 1 extra row:
+  #        frequency_answer_options, percentage_answer_options, and
+  #        corrected_item_tot_cor_answ_option
+  #
+  # Returns:
+  #  List with one entry per item. Each entry consists of a data frame with
+  #  frequency, percentage and IRC for the total item and the answer options
+  #  (if the item has answer options)
+
+  item_list <- list() #  Creates list to put item output in
+  colnames1 <- c(" ", "Frequency", "Percentage", "IRC")
+  colnames2 <- c("Answer Option", "Frequency", "Percentage", "IRC", "Correct")
+
+  # For every item, create entry in list with frequencies, percentages, and IRCs
+  for (i in 1:length(item_names)) {
+    if (number_answeroptions[i] > 0) {
+      correct <- c(
+        ifelse(
+          key[1:number_answeroptions[i], i] == 1,
+          "Correct",
+          "Incorrect"
+        ),
+        "Incorrect"
+      )
+
+      item_list[[i]] <- data.frame(
+        c(LETTERS[1:number_answeroptions[i]], "Missing"),
+        c(frequency_answer_options[c(2:(number_answeroptions[i] + 1), 1), i]),
+        c(percentage_answer_options[c(2:(number_answeroptions[i] + 1), 1), i]),
+        c(corrected_item_tot_cor_answ_option[
+          c(2:(number_answeroptions[i] + 1), 1),
+          i
+        ]),
+        correct,
+        row.names = NULL)
+      colnames(item_list[[i]]) <- colnames2
+
+    } else {
+      item_list[[i]] <- data.frame("Correct",
+        correct_frequency[i],
+        correct_percentage[i],
+        corrected_item_tot_cor[i]
+      )
+      colnames(item_list[[i]]) <- colnames1
+     }
+  }
+
+  names(item_list) <- item_names
+  return(item_list)
+}
+
+# Use the function defined above
+item_list <- create_item_list(
+  number_answeroptions,
+  correct_frequency,
+  correct_percentage,
+  frequency_answer_options,
+  percentage_answer_options,
+  corrected_item_tot_cor,
+  corrected_item_tot_cor_answ_option,
+  item_names,
+  key
+)
+
+cat(paste("#", title))
+```
+
+# General
+
+What | Result |
+:---------------------- | :------------- |
+Number of students | `r length(student_scores)`|
+Number of items | `r length(item_names)`|
+Average score | `r round(mean(student_scores), digits = 3)`|
+Standard deviation | `r round(sd(student_scores), digits = 3)`| 
+Cronbach's alpha | `r cronbach`| 
+Standard error | `r round(sd(student_scores * sqrt(1 - cronbach)), digits = 3)`| 
+
+
+# Item statistics
+
+For each item the frequency, percentage and item rest correlations (IRC) of 
+every answer option are shown The IRC should be (highly) positive for the 
+right answer option and low for the wrong answer option(s).
+
+```{r pander, results = "asis", eval = TRUE, echo = FALSE, warning = FALSE, message = FALSE}
+
+print_item_tables <- function(item_list, key) {
+  # Print table for each item with frequency, percentage and the
+  # item-rest correlation. Correct answers are highlighted in bold.
+  #
+  # Args:
+  #  item_list: list with one entry per item. Each entry has a data frame
+  #             Data frame has a row for every answer option + one for missing.
+  #             Columns are: Answer option, frequency, percentage, IRC,
+  #                          and Correct (answer option is Correct/Incorrect)
+  #            Number of rows of each data frame has a maximum as the number of
+  #            columns of key + 1.
+  #  key: Matrix of 0's and 1's. key[i,j] implies wether answer option i
+  #       to item j is right (1) or wrong (0). If a column (item) consists of
+  #       only 0s, the item is interpreted as graded manually.
+  #       Number of columns should be equal to length of item_list
+
+  pander::panderOptions("knitr.auto.asis", FALSE)
+  for (i in 1:length(item_list)) {
+    if (any(key[, i] == 1)) {
+      item_list[[i]]$Correct <- revalue(
+        item_list[[i]]$Correct,
+        c("Correct" = "X", "Incorrect" = "")
+      )
+      pander::emphasize.strong.rows(which(key[, i] == 1, arr.ind = TRUE))
+      pander::pander(item_list[i])
+    } else {
+      t <- item_list[i]
+      pander::pander(t)
+    }
+  }
+}
+
+print_item_tables(item_list, key)
+```
+
+\pagebreak
+
+```{r, echo = FALSE, message = FALSE, warning = FALSE, fig.width = 11, results = 'asis'}
+
+create_df <- function(item_names, correct_percentage, corrected_item_tot_cor) {
+  # Creates a data frame with the overall item statistics including a colour
+  # indicator for the percentage correct and IRC. Colours is defined below.
+  #
+  # Args:
+  #  These are the same as defined in create_item_list.
+  #
+  # Returns:
+  #  Data frame with a row for every item. Columns consist of the item name,
+  #  percentage correct, colour indicator for percentage, IRC and colour
+  #  indicator for the IRC.
+
+  correct_df <- data.frame(
+    item = factor(item_names, item_names),
+    correct_percentage,
+    perc_col = ifelse(
+      correct_percentage < 40, 0,
+      ifelse(correct_percentage > 70, 1, 1 / 75 * correct_percentage - 7 / 30)
+    ),
+    corrected_item_tot_cor,
+    irc_col = ifelse(
+      corrected_item_tot_cor < 0, 0,
+      ifelse(
+        corrected_item_tot_cor > .05,
+        1,
+        8 * corrected_item_tot_cor + 3 / 10)
+    )
+  )
+
+  return(correct_df)
+}
+
+plot_overall_perc <- function(correct_df) {
+  # Plots bar graph of percentage correct of every item. Bars are coloured
+  # depending on the percentage correct:
+  # < 40: Red
+  # 40 - 70: Orange till green-ish
+  # > 70: Green
+  #
+  # Args:
+  #  correct_df: data frame with item name, percentage correct and colour code
+  #  for the percentages.
+  #
+  # Returns:
+  #  Explanation of the plot and the plot itself
+
+  cat("
+
+## Percentage Correct
+
+Plot of percentage correct per item. If *more than 70%* of the students answer
+an item correctly, the students have mastered the material well enough, thus it
+is coloured *green*. If *less than 40%* of the students answer an item
+correctly, the students have not mastered the material, and is coloured *red*.
+*Orange* indicates the percentage correct lies *between 40% and 70%*. The
+colours are based on multiple choice items with 3 or 4 answer options.
+
+")
+
+  bar_freq <- ggplot2::ggplot(
+    correct_df,
+    ggplot2::aes(item, correct_percentage, fill = perc_col))
+  bar_freq <- 
+    bar_freq + 
+    # Create bar chart
+    geom_bar(stat = "identity") + 
+    # Creat colour boundary
+    scale_fill_gradient(
+      low    = "red",
+      high   = "green",
+      limits = c(0, 1),
+      guide  = FALSE
+    ) +
+    # Change y-axis limit to constant
+    coord_cartesian(ylim = c(0, 100)) +
+    # Axes names
+    labs(x = "Item", y = "Percentage Correct") +
+    # Black en white
+    theme_bw() +
+    # Change various sizes
+    theme(
+      axis.text.x  = element_text(size = 8, angle = 90),
+      axis.ticks.x = element_line(size = .4)
+    )
+
+  suppressWarnings(print(bar_freq))
+}
+
+plot_overall_irc <- function(correct_df) {
+  # Plots bar graph of IRC of every item. Bars are coloured by IRC:
+  # < 0: Red
+  # 0 - .10: Orange till green-ish
+  # > .10: green
+  #
+  # Args:
+  #  correct_df: data frame with item name, percentage correct and colour code
+  #  for the percentages.
+  #
+  # Returns:
+  #  Explanation of the plot and the plot itself
+
+  cat("\\pagebreak
+
+## Item rest correlations
+
+Plot of Item Rest Correlations per item: The correlation between an item (0 for
+wrong answers and 1 for right answers) and the sum of all other items. Items
+with a correlation *higher than .05* indicate that students who answered this
+item correctly answered the other items more often correctly than students who
+answered this item incorrectly. Thus it is coloured *green*. Items with a
+correlation *lower than 0* indicate no relationship between this item and the
+other items, or that students who answered this item correctly answered the
+other items more often incorrectly than students who answered this incorrectly,
+thus it is coloured *red*. *Between 0 and .05*, an item is coloured *orange*.
+
+")
+
+  # Create bar chart
+  bar_irc <- ggplot2::ggplot(
+    correct_df,
+    ggplot2::aes(item, corrected_item_tot_cor, fill = irc_col))
+  bar_irc <-  
+    bar_irc +
+    geom_bar(stat = "identity") +
+    scale_fill_gradient(
+      low    = "red",
+      high   = "green",
+      limits = c(0, 1),
+      guide  = FALSE
+    ) +
+    theme_bw() +
+    labs(x = "Item", y = "Item Rest Correlation") +
+    theme(
+      strip.text.x = element_text(size = 7),
+      axis.text.x  = element_text(size = 8, angle = 90),
+      axis.ticks.x = element_line(size = .4)
+    )
+
+  suppressWarnings(print(bar_irc))
+}
+
+correct_df <- create_df(item_names, correct_percentage, corrected_item_tot_cor)
+plot_overall_perc(correct_df)
+plot_overall_irc(correct_df)
+```
+
+```{r, echo = FALSE, eval = TRUE, warning= FALSE, fig.width = 11}
+
+# Create extra variables for the colours in the bar plots
+make_perc_col <- function(percentage, correct) {
+  # Calculate colours for the percentage answer options.
+  #
+  # Args:
+  #  percentage: Percentage answered per answer options
+  #  correct: Vector of "Correct" and "Incorrect" of same length of percentage
+  #
+  # Returns:
+  #  Value between 0 and 1. 0 corresponds to red and 1 to green in the plots.
+  #  For the correct options:
+  #    < 40: red
+  #    40 - 70: orange - green-ish
+  #    > 70: green
+  #  For the incorrect options: difference between correct answer option and
+  #  incorrect answer options are calculated. Colours are based on difference:
+  #    < -10: Green
+  #    -10 - 0: orange - green-ish
+  #    > 0: green
+
+  perc_col <- vector("numeric", length = length(correct))
+  perc_col[correct == "Correct"] <- ifelse(
+    percentage[correct == "Correct"] < 40,
+    0,
+    ifelse(
+      percentage[correct == "Correct"] > 70,
+      1,
+      1 / 75 * percentage[correct == "Correct"] - 7 / 30
+    )
+  )
+  dif <-
+    percentage[correct == "Incorrect"] - min(percentage[correct == "Correct"])
+  perc_col[correct == "Incorrect"] <- ifelse(
+    dif >= 0,
+    0,
+    ifelse(dif < -10, 1, -1 / 25 * dif + 3 / 10)
+  )
+  perc_col
+}
+
+make_irc_col <- function(IRC, correct){
+  # Calculate colours for the percentage answer options.
+  #
+  # Args:
+  #  IRC: Vector of IRC per answer options. Same length as correct
+  #  correct: Vector of "Correct" and "Incorrect" of same length of IRC
+  #
+  # Returns:
+  #  Value between 0 and 1. 0 corresponds to red and 1 to green in the plots.
+  #  For the correct answer options:
+  #    < 0: red
+  #    0 - 0.05: orange - green-ish
+  #    > 0.05: green
+  # For the incorrect answer options: difference between correct answer option
+  # and incorrect answer option are calculated. Colours are based on this
+  # difference:
+  #    > 0: red
+  #    -0.05 - 0: orange - green-ish
+  #    < -0.05: red
+
+  irc_col <- numeric()
+  irc_col[correct == "Correct"] <- ifelse(
+    IRC[correct == "Correct"] < 0,
+    0,
+    ifelse(IRC[correct == "Correct"] > .05,
+      1,
+      8 * IRC[correct == "Correct"] + 3 / 10
+    )
+  )
+  dif <- IRC[correct == "Incorrect"] - min(IRC[correct == "Correct"])
+  irc_col[correct == "Incorrect"] <- ifelse(
+    dif >= 0,
+    0,
+    ifelse(dif < -.05, 1, -8 * dif + 3 / 10)
+  )
+  return(irc_col)
+}
+
+make_answ_df <- function(item_list, key, number_answeroptions, item_names) {
+  # Creates a data frame with the statistics per answer option including a
+  # colour indicator for the percentage correct and IRC.
+  #
+  # Args:
+  #  These are the same as defined earlier
+  #
+  # Returns:
+  #  Data frame with a row for every answer option. Columns consist of the
+  #  item name, answer options, frequency, percentage, IRC, Correct (whether
+  #  an option is correct/incorrect), colour for percentage and colour for IRC
+
+  for (i in 1:length(item_list)) {
+    if (any(key[, i] != 0)) {
+      item_list[[i]]$perc_col <- with(
+        item_list[[i]],
+        make_perc_col(Percentage, Correct)
+      )
+      item_list[[i]]$irc_col <- with(item_list[[i]], make_irc_col(IRC, Correct))
+    }
+  }
+  ans_opt_df <- plyr::ldply(item_list[number_answeroptions != 0], data.frame)
+  names(ans_opt_df)[1] <- "id"
+  ans_opt_df$Answer.Option <- gsub("Missing", "Mi", ans_opt_df$Answer.Option)
+  ans_opt_df$Answer.Option <- factor(
+    ans_opt_df$Answer.Option,
+    levels = c(LETTERS[1:max(number_answeroptions)], "Mi")
+  )
+  ans_opt_df$id <- factor(
+    ans_opt_df$id,
+    levels = item_names[number_answeroptions != 0]
+  )
+  if (all(key == 0)) {
+    ans_opt_df <- NULL
+  }
+
+  ans_opt_df
+}
+
+calc_n_plot <- function(ans_opt_df, key) {
+  # Calculates which item is displayed in which plot. The total number of
+  # answer options is too large (usually) to display them all in one plot.
+  #
+  # Args:
+  #  ans_opt_df: As defined in function above
+  #
+  # Returns:
+  #  n_plots: the number of plots
+  #  items_in_plot: list entry per plot. Every entry consists of item names
+  #  in that plot
+
+  if (any(key != 0)) {
+    max_answ_opts <- 80 #  Maximum number of bars per plot
+    tot_answ_opts <- nrow(ans_opt_df)
+    n_plots <- ceiling(tot_answ_opts / max_answ_opts)
+    answ_opts_per_plot <- floor(tot_answ_opts / n_plots)
+
+    items_in_plot <- list(unique(ans_opt_df[1:answ_opts_per_plot, 1]))
+    if (n_plots > 1) {
+      for (i in 2:n_plots) {
+        items_in_plot[[i]] <- unique(
+          ans_opt_df[
+            (((i - 1) * answ_opts_per_plot) + 1):(i * answ_opts_per_plot),
+            1
+          ]
+        )
+        if (items_in_plot[[i]][1] %in% items_in_plot[[i - 1]]) {
+          items_in_plot[[i]] <- items_in_plot[[i]][-1]
+        }
+      }
+    }
+    assign("items_in_plot", items_in_plot, envir = globalenv())
+  }
+}
+
+ans_opt_df <- make_answ_df(item_list, key, number_answeroptions, item_names)
+calc_n_plot(ans_opt_df, key)
+
+```
+
+```{r, echo = FALSE, eval = TRUE, message = FALSE, warning = FALSE, fig.width = 11, results= 'asis'}    
+
+plot_perc_answ <- function(ans_opt_df, items_in_plot, key) {
+  # Plots bar graph(s) of the percentages of every answer option
+  #
+  # Args:
+  #  All defined as before
+  #
+  # Returns:
+  #  Description of how to read the plots
+  #  One or multiple plots
+
+  if (any(key != 0)) {
+
+    cat("\\pagebreak
+
+## Percentage chosen per answer options
+
+Percentage plot of all answer options per item. **White** bars represent the
+**right** answer option(s) and **black** the **wrong** options.
+
+The border represents the performance of an item. The **right** answer options
+are coloured *green* if *more than 70%* of the students choose this option. In
+that case, the students have mastered the material. They are coloured *red* if
+*less than 40%* choose this option. *Between 40% and 70%*, it is coloured
+*orange*.
+
+The **wrong** answer options are coloured *green* if an option is chosen *10
+percentage point less than the right option*. In that case, more students chose
+the right answer option than the wrong option indicating that students are not
+confused by the wrong option. They are coloured *red* if an option is chosen
+*equally or more often than the right option*. In this case, students pick the
+wrong answer option often, indicating that this option may be right as well. It
+is coloured *orange* in between these two.
+
+")
+
+    for (i in 1:length(items_in_plot)) {
+      bar_freq1 <- ggplot2::ggplot(
+        ans_opt_df[ans_opt_df$id %in% items_in_plot[[i]], ],
+        ggplot2::aes(
+          "Answer Option",
+          Percentage,
+          fill   = Correct,
+          colour = perc_col)
+        )
+      bar_freq1 <- 
+        bar_freq1 +
+        geom_bar(aes(x = Answer.Option), stat = "identity", size = 1.5) +
+        # Plot items
+        facet_grid(. ~ id, scales = "free_x", space = "free_x") +
+        # Fill in the bars
+        scale_fill_manual(
+          values = c("Incorrect" = "Black", "Correct" = "White"),
+          guide  = FALSE
+        ) +
+        # Colour the bandaries
+        scale_colour_gradient(
+          low    = "red",
+          high   = "green",
+          limits = c(0, 1),
+          guide  = FALSE
+        )  +
+        # Change y-axis limits to constants
+        coord_cartesian(ylim = c(0, 100)) +
+        # Names x-axis
+        labs(x = "Answer Options") +
+        theme_bw() +
+        # Change various font sizes
+        theme(
+          axis.text.x  = element_text(size = 8),
+          axis.text.y  = element_text(size = 12),
+          axis.title.x = element_text(size = 15),
+          axis.title.y = element_text(size = 15)
+        )
+
+        suppressWarnings(print(bar_freq1))
+    }
+  }
+}
+
+plot_irc_answ <- function(ans_opt_df, items_in_plot, key) {
+  # Plots bar graph(s) of the IRC of every answer option
+  #
+  # Args:
+  #  All defined as before
+  #
+  # Returns:
+  #  Description of how to read the plots
+  #  One or multiple plots
+
+  if (any(key != 0)) {
+
+    cat('\\pagebreak
+
+## Item rest correlations per answer option
+
+Item rest correlation (IRC) plot of all answer options per item. The IRC is the
+correlation between an answer option (1 for students who chose this options and
+0 for student who did not) and the sumscore on all other items. **White** bars
+represent the **right** answer option(s) and **black** the **wrong** options.
+
+The border represents the performance of an item. The **right** answer options
+are coloured *green* if the IRC is *higher than .05*. In that case, students who
+picked the right answer option scored higher on all other items than student who
+did not pick the right option. They are coloured *red* if the IRC is *less than
+0*. In that case, students who picked the right answer option scored equally or
+worse on the other items than students who picked the other options. Between *0
+and .05*, the option is coloured *orange*.
+
+The **wrong** answer options are coloured *green* if the IRC is *more than .1
+lower than the right option*. In that case, students who answered the other
+items incorrectly, usually answered this item incorrectly as well. They are
+coloured *red* if the IRC is *equal or higher than the right answer option*.
+In that case, students who answered the other items correctly may have answered
+this item incorrectly. In between these two, the option is coloured *orange*.
+
+')
+
+    # Creating IRC plots
+    for (i in 1:length(items_in_plot)) {
+      # Select items and variables to plot
+      bar_irc1 <- ggplot2::ggplot(
+        ans_opt_df[ans_opt_df$id %in% items_in_plot[[i]], ],
+        ggplot2::aes("Answer Option", IRC, fill = Correct, colour = irc_col)
+      )
+      bar_irc1 <-
+        bar_irc1 +
+        geom_bar(aes(x = Answer.Option), stat = "identity", size = 1.5) +
+        facet_grid(. ~ id, scales = "free_x", space = "free_x") +
+        scale_fill_manual(
+          values = c("Incorrect" = "Black", "Correct" = "White"),
+          guide  = FALSE
+        ) +
+        scale_colour_gradient(
+          low    = "red",
+          high   = "green",
+          limits = c(0, 1),
+          guide  = FALSE
+        ) +
+        # Change y-axis limit to either the maximum and minimum IRC
+        coord_cartesian(
+          ylim = c(min(ans_opt_df$IRC) - .01, max(ans_opt_df$IRC)  + .01)
+        ) +
+        labs(x = "Answer Options") +
+        theme_bw() +
+        theme(
+          axis.text.x  = element_text(size = 8),
+          axis.text.y  = element_text(size = 12),
+          axis.title.x = element_text(size = 15),
+          axis.title.y = element_text(size = 15)
+        )
+        suppressWarnings(print(bar_irc1))
+    }
+  }
+}
+
+plot_irc_answ(ans_opt_df, items_in_plot, key)
+plot_perc_answ(ans_opt_df, items_in_plot, key)
+```
+
+\pagebreak
+
+## Cumulative Distribution
+
+Cumulative Distribution showing all possible scores and the percentage of 
+students that scored that number or higher
+
+```{r, echo = FALSE, eval = TRUE, message = FALSE, warning = FALSE, fig.width = 11}
+CumHist <- function(student_scores, n_item) {
+  # Create a reversed cumulative histogram, i.e. the height of score x is
+  # the percentage of students who scores x or higher.
+  #
+  # Args:
+  #  student_scores: numeric vector of test score per student.
+  #  n_item: number of items
+  #
+  # Returns:
+  #  Reversed cumulative histogram plot
+
+  h <- heights <- length(student_scores)
+  n <- sort(unique(student_scores))
+
+  for (i in 1:length(n)) {
+    h <- h - length(student_scores[student_scores == n[i]])
+    heights <- c(heights, h)
+  }
+
+  cesuurlijnen <- seq(0.2, 0.8, 0.2) * length(student_scores)
+  gridlijnen <- round(
+    seq(0, length(student_scores), length(student_scores) / 25),
+    0
+  )
+
+  gridlijnen <- gridlijnen[
+    -grep(paste(round(cesuurlijnen, 0), collapse = "|"), gridlijnen)
+  ]
+
+  heights <- heights[1:length(unique(student_scores))]
+  percent <- heights / length(student_scores) * 100
+  df <- data.frame(
+    score   = sort(unique(student_scores)),
+    total   = heights,
+    percent = round(percent, 2)
+  )
+
+  vec <- df[1, 1]:df[nrow(df), 1] #  save all possible total scores
+
+  # Find if a total score is missing and add it
+  mis <- NA
+  if (sum(! (vec %in% df[, 1]) != 0)) {
+    mis <- vec[! (vec %in% df[, 1])]
+  }
+  if (! is.na(mis[1])) {
+    for (i in 1:length(mis)) {
+      df[ (nrow(df) + 1), ] <- c(mis[i], 0, 0)
+    }
+    df <- df[order(df$score), ]
+  }
+
+
+  # if no one gets a certain total score,
+  # replace the 0 with the next total score
+  suppressWarnings(
+    df[which(df[, 2] == 0, arr.ind = TRUE), 2] <-
+      df[(which(df[, 2] == 0, arr.ind = TRUE) + 1), 2]
+  )
+  suppressWarnings(
+    df[which(df[, 3] == 0, arr.ind = TRUE), 3] <-
+      df[(which(df[, 3] == 0, arr.ind = TRUE) + 1), 3]
+  )
+
+  g <- 
+    ggplot2::ggplot(
+      data = df,
+      ggplot2::aes(x = score, y = total)
+    ) +
+    geom_hline(
+      yintercept = c(gridlijnen),
+      linetype   = "solid",
+      colour     = "gray80"
+    ) +
+    geom_hline(
+      yintercept = cesuurlijnen,
+      linetype   = "dashed",
+      size       = 0.75,
+      colour     = "black"
+    ) +
+    geom_histogram(stat = "identity", binwidth = 1) +
+    xlab("Total score") +
+    ylab("% of students with score or higher") +
+    scale_y_continuous(
+      breaks = c(0, cesuurlijnen, length(student_scores)),
+      labels = c(seq(0, 100, 20))
+    ) +
+    scale_x_continuous(
+      limits = c(df[1, 1] - 1, n_item + 1),
+      breaks = 0:n_item,
+      expand = c(0, 0)
+    ) +
+    theme_bw() +
+    theme(panel.grid = element_blank())
+
+  print(g)
+}
+
+plot_cum_hist(student_scores, n_item = ncol(key))
+```
+
+```{r, echo = FALSE, eval = TRUE, warnings = FALSE, message = FALSE, results = 'asis', fig.width = 11}
+## Do the whole script above for every sub category
+if (!length(categories) == 0) {
+  cat("\n\n")
+  cat("\\pagebreak")
+  cat("\n\n")
+  cat("# Analysis for subcategories")
+  cat("\n\n")
+
+  for (i in 1:length(categories)) {
+    items <- categories[[i]]$items
+    if (i != 1) {
+      cat("\\pagebreak")
+      cat("\n\n")
+    }
+    cat("\n\n")
+    cat(paste("## Category:", categories[[i]]$name ))
+    cat("\n\n")
+
+    item_list <- create_item_list(
+      number_answeroptions[items],
+      correct_frequency[items],
+      correct_percentage[items],
+      frequency_answer_options[, items],
+      percentage_answer_options[, items],
+      categories[[i]]$corrected_item_tot_cor,
+      categories[[i]]$corrected_item_tot_cor_answ_option,
+      item_names[items],
+      key[, items]
+    )
+
+    correct_df <- create_df(
+      item_names[items],
+      correct_percentage[items],
+      categories[[i]]$corrected_item_tot_cor
+    )
+
+    plot_overall_perc(correct_df)
+    plot_overall_irc(correct_df)
+
+    if (any(key[, items] != 0)) {
+      ans_opt_df <- make_answ_df(
+        item_list,
+        key[, items],
+        number_answeroptions[items],
+        item_names[items]
+      )
+      calc_n_plot(ans_opt_df, key[, items])
+      plot_perc_answ(ans_opt_df, items_in_plot, key[, items])
+      plot_irc_answ(ans_opt_df, items_in_plot, key[, items])
+    }
+
+    cat("\\pagebreak")
+    cat("\n\n")
+    cat("## Cumulative Distribution")
+    cat("\n\n")
+    plot_cum_hist(categories[[i]]$student_scores, n_item = length(items))
+  }
+}
+```

From 596dde254d94a32edaae4e235160372d75a8968e Mon Sep 17 00:00:00 2001
From: Mark van Driel <mvdriel@nlware.com>
Date: Wed, 17 Aug 2016 09:41:04 +0200
Subject: [PATCH 14/21] Fixed bug in refactoring

---
 app/Lib/Rscripts/report.Rmd | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/Lib/Rscripts/report.Rmd b/app/Lib/Rscripts/report.Rmd
index ad69e1f..80f6f5e 100644
--- a/app/Lib/Rscripts/report.Rmd
+++ b/app/Lib/Rscripts/report.Rmd
@@ -660,7 +660,7 @@ Cumulative Distribution showing all possible scores and the percentage of
 students that scored that number or higher
 
 ```{r, echo = FALSE, eval = TRUE, message = FALSE, warning = FALSE, fig.width = 11}
-CumHist <- function(student_scores, n_item) {
+cum_hist <- function(student_scores, n_item) {
   # Create a reversed cumulative histogram, i.e. the height of score x is
   # the percentage of students who scores x or higher.
   #
@@ -757,7 +757,7 @@ CumHist <- function(student_scores, n_item) {
   print(g)
 }
 
-plot_cum_hist(student_scores, n_item = ncol(key))
+cum_hist(student_scores, n_item = ncol(key))
 ```
 
 ```{r, echo = FALSE, eval = TRUE, warnings = FALSE, message = FALSE, results = 'asis', fig.width = 11}

From 4e210b68afd014bf0e4d63e9d2d9615e4bac94af Mon Sep 17 00:00:00 2001
From: Mark van Driel <mvdriel@nlware.com>
Date: Wed, 17 Aug 2016 09:48:19 +0200
Subject: [PATCH 15/21] Fixed bug in refactoring

---
 app/Lib/Rscripts/report.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/Lib/Rscripts/report.Rmd b/app/Lib/Rscripts/report.Rmd
index 80f6f5e..43cc890 100644
--- a/app/Lib/Rscripts/report.Rmd
+++ b/app/Lib/Rscripts/report.Rmd
@@ -816,7 +816,7 @@ if (!length(categories) == 0) {
     cat("\n\n")
     cat("## Cumulative Distribution")
     cat("\n\n")
-    plot_cum_hist(categories[[i]]$student_scores, n_item = length(items))
+    cum_hist(categories[[i]]$student_scores, n_item = length(items))
   }
 }
 ```

From 4551aebb76da9d3c7892d6b8af01f96aae57cd66 Mon Sep 17 00:00:00 2001
From: Mark van Driel <mvdriel@nlware.com>
Date: Thu, 18 Aug 2016 11:07:09 +0200
Subject: [PATCH 16/21] Fixed: Error: Unknown parameters: binwidth, bins, pad

---
 app/Lib/Rscripts/report.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/Lib/Rscripts/report.Rmd b/app/Lib/Rscripts/report.Rmd
index 43cc890..ff55c0f 100644
--- a/app/Lib/Rscripts/report.Rmd
+++ b/app/Lib/Rscripts/report.Rmd
@@ -739,7 +739,7 @@ cum_hist <- function(student_scores, n_item) {
       size       = 0.75,
       colour     = "black"
     ) +
-    geom_histogram(stat = "identity", binwidth = 1) +
+    geom_bar(stat = "identity") +
     xlab("Total score") +
     ylab("% of students with score or higher") +
     scale_y_continuous(

From 86edb9fb996227c089d340f07e5f5d6ae2ec5eb5 Mon Sep 17 00:00:00 2001
From: Mark van Driel <mvdriel@nlware.com>
Date: Thu, 18 Aug 2016 14:54:14 +0200
Subject: [PATCH 17/21] Added required packages to Travis configuration

---
 .travis.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis.yml b/.travis.yml
index b5bd556..f8a09b0 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -40,6 +40,7 @@ before_script:
   - sudo apt-get install -y --force-yes r-base r-cran-rserve
   - sudo apt-get install -y --force-yes libcurl4-gnutls-dev
   - sudo apt-get install -y --force-yes libxml2-dev
+  - sudo apt-get install -y --force-yes texlive-full
   - wget https://github.com/jgm/pandoc/releases/download/1.17.2/pandoc-1.17.2-1-amd64.deb
   - sudo dpkg -i pandoc-1.17.2-1-amd64.deb
   - sudo mkdir -p ~/R/Library

From c19164e3824aec708d159f1cea4cf173d505b92c Mon Sep 17 00:00:00 2001
From: Mark van Driel <mvdriel@nlware.com>
Date: Wed, 12 Oct 2016 16:51:31 +0200
Subject: [PATCH 18/21] Removed trailing whitespace

---
 app/Lib/Rscripts/report.Rmd | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/app/Lib/Rscripts/report.Rmd b/app/Lib/Rscripts/report.Rmd
index ff55c0f..5f8ab02 100644
--- a/app/Lib/Rscripts/report.Rmd
+++ b/app/Lib/Rscripts/report.Rmd
@@ -247,10 +247,10 @@ colours are based on multiple choice items with 3 or 4 answer options.
   bar_freq <- ggplot2::ggplot(
     correct_df,
     ggplot2::aes(item, correct_percentage, fill = perc_col))
-  bar_freq <- 
-    bar_freq + 
+  bar_freq <-
+    bar_freq +
     # Create bar chart
-    geom_bar(stat = "identity") + 
+    geom_bar(stat = "identity") +
     # Creat colour boundary
     scale_fill_gradient(
       low    = "red",
@@ -306,7 +306,7 @@ thus it is coloured *red*. *Between 0 and .05*, an item is coloured *orange*.
   bar_irc <- ggplot2::ggplot(
     correct_df,
     ggplot2::aes(item, corrected_item_tot_cor, fill = irc_col))
-  bar_irc <-  
+  bar_irc <-
     bar_irc +
     geom_bar(stat = "identity") +
     scale_fill_gradient(
@@ -472,10 +472,7 @@ calc_n_plot <- function(ans_opt_df, key) {
     if (n_plots > 1) {
       for (i in 2:n_plots) {
         items_in_plot[[i]] <- unique(
-          ans_opt_df[
-            (((i - 1) * answ_opts_per_plot) + 1):(i * answ_opts_per_plot),
-            1
-          ]
+          ans_opt_df[(((i - 1) * answ_opts_per_plot) + 1) : (i * answ_opts_per_plot), 1]
         )
         if (items_in_plot[[i]][1] %in% items_in_plot[[i - 1]]) {
           items_in_plot[[i]] <- items_in_plot[[i]][-1]
@@ -537,7 +534,7 @@ is coloured *orange* in between these two.
           fill   = Correct,
           colour = perc_col)
         )
-      bar_freq1 <- 
+      bar_freq1 <-
         bar_freq1 +
         geom_bar(aes(x = Answer.Option), stat = "identity", size = 1.5) +
         # Plot items
@@ -723,7 +720,7 @@ cum_hist <- function(student_scores, n_item) {
       df[(which(df[, 3] == 0, arr.ind = TRUE) + 1), 3]
   )
 
-  g <- 
+  g <-
     ggplot2::ggplot(
       data = df,
       ggplot2::aes(x = score, y = total)

From 34b2f984eb166cffa367746949ec32c0773d4353 Mon Sep 17 00:00:00 2001
From: Mark van Driel <mvdriel@nlware.com>
Date: Wed, 12 Oct 2016 17:21:16 +0200
Subject: [PATCH 19/21] Pass template filename as variable to R-script

---
 app/Lib/Rscripts/report.R        | 5 +++--
 app/Model/Exam.php               | 5 +++--
 app/Test/Case/Lib/RserveTest.php | 4 +++-
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/app/Lib/Rscripts/report.R b/app/Lib/Rscripts/report.R
index 477040f..3cc88ca 100644
--- a/app/Lib/Rscripts/report.R
+++ b/app/Lib/Rscripts/report.R
@@ -1,4 +1,5 @@
-report <- function(filename,
+report <- function(template,
+                   filename,
                    number_answeroptions,
                    cronbach,
                    frequency_answer_options,
@@ -12,5 +13,5 @@ report <- function(filename,
                    item_names,
                    student_scores,
                    categories) {
-  rmarkdown::render("/home/travis/build/nlware/qdnatool/app/Lib/Rscripts/report.Rmd", output_format = "pdf_document", output_file = filename)
+  rmarkdown::render(template, output_format = "pdf_document", output_file = filename)
 }
diff --git a/app/Model/Exam.php b/app/Model/Exam.php
index 40993f8..7a09672 100644
--- a/app/Model/Exam.php
+++ b/app/Model/Exam.php
@@ -611,13 +611,14 @@ protected function _report($exam) {
 
 			$script[] = 'categories = c(  );';
 
+			$template = APP . 'Lib' . DS . 'Rscripts' . DS . 'report.Rmd';
 			$script[] = sprintf(
 				'report( ' .
-				'"%s", number_answeroptions, Cronbach, frequency_answer_options, percentage_answer_options, key, ' .
+				'"%s", "%s", number_answeroptions, Cronbach, frequency_answer_options, percentage_answer_options, key, ' .
 				'correct_frequency, correct_percentage, corrected_item_tot_cor, corrected_item_tot_cor_answ_option, "%s", ' .
 				'item_names, student_scores, categories' .
 				' );',
-				$tempFile, $exam['Exam']['name']
+				$template, $tempFile, $exam['Exam']['name']
 			);
 
 			$script = implode("\n", $script);
diff --git a/app/Test/Case/Lib/RserveTest.php b/app/Test/Case/Lib/RserveTest.php
index c9f98be..2606173 100644
--- a/app/Test/Case/Lib/RserveTest.php
+++ b/app/Test/Case/Lib/RserveTest.php
@@ -200,12 +200,14 @@ public function testExecuteRscriptsAnalyseTooLittleStudents() {
  */
 	public function testExecuteRscriptsReport() {
 		$filename = $this->__tmpPath . DS . 'testExecuteRscriptsReport.pdf';
+		$template = APP . 'Lib' . DS . 'Rscripts' . DS . 'report.Rmd';
 
 		$this->assertFileNotExists($filename);
 
 		$script = array();
 		$script[] = file_get_contents(APP . 'Lib' . DS . 'Rscripts' . DS . 'report.R');
 		$script[] = sprintf('filename = "%s";', $filename);
+		$script[] = sprintf('template = "%s";', $template);
 		$script[] = 'number_students = 2;';
 		$script[] = 'number_answeroptions = c( 3, 3, 3 );';
 		$script[] = 'number_questions = 3;';
@@ -221,7 +223,7 @@ public function testExecuteRscriptsReport() {
 		$script[] = 'item_names = c( "Item 1", "Item 2", "Item 3" );';
 		$script[] = 'student_scores = c( 7, 9 );';
 		$script[] = 'categories = c(  );';
-		$script[] = 'report( filename, number_answeroptions, cronbach, frequency_answer_options, percentage_answer_options, key, correct_frequency,
+		$script[] = 'report(template, filename, number_answeroptions, cronbach, frequency_answer_options, percentage_answer_options, key, correct_frequency,
 			correct_percentage, corrected_item_tot_cor, corrected_item_tot_cor_answ_option, title, item_names, student_scores, categories );';
 
 		$script = implode("\n", $script);

From 2f5d47985fc082a90af6f13089ae64f83f694ce6 Mon Sep 17 00:00:00 2001
From: Mark van Driel <mvdriel@nlware.com>
Date: Wed, 19 Oct 2016 12:19:03 +0200
Subject: [PATCH 20/21] Fixed Cs issues

---
 app/Lib/Rscripts/report.Rmd | 39 +++++++++++++++++--------------------
 1 file changed, 18 insertions(+), 21 deletions(-)

diff --git a/app/Lib/Rscripts/report.Rmd b/app/Lib/Rscripts/report.Rmd
index 5f8ab02..a12ed13 100644
--- a/app/Lib/Rscripts/report.Rmd
+++ b/app/Lib/Rscripts/report.Rmd
@@ -88,12 +88,10 @@ create_item_list <- function(number_answeroptions,
         c(LETTERS[1:number_answeroptions[i]], "Missing"),
         c(frequency_answer_options[c(2:(number_answeroptions[i] + 1), 1), i]),
         c(percentage_answer_options[c(2:(number_answeroptions[i] + 1), 1), i]),
-        c(corrected_item_tot_cor_answ_option[
-          c(2:(number_answeroptions[i] + 1), 1),
-          i
-        ]),
+        c(corrected_item_tot_cor_answ_option[c(2:(number_answeroptions[i] + 1), 1), i]),
         correct,
-        row.names = NULL)
+        row.names = NULL
+      )
       colnames(item_list[[i]]) <- colnames2
 
     } else {
@@ -246,7 +244,8 @@ colours are based on multiple choice items with 3 or 4 answer options.
 
   bar_freq <- ggplot2::ggplot(
     correct_df,
-    ggplot2::aes(item, correct_percentage, fill = perc_col))
+    ggplot2::aes(item, correct_percentage, fill = perc_col)
+  )
   bar_freq <-
     bar_freq +
     # Create bar chart
@@ -305,7 +304,8 @@ thus it is coloured *red*. *Between 0 and .05*, an item is coloured *orange*.
   # Create bar chart
   bar_irc <- ggplot2::ggplot(
     correct_df,
-    ggplot2::aes(item, corrected_item_tot_cor, fill = irc_col))
+    ggplot2::aes(item, corrected_item_tot_cor, fill = irc_col)
+  )
   bar_irc <-
     bar_irc +
     geom_bar(stat = "identity") +
@@ -363,8 +363,8 @@ make_perc_col <- function(percentage, correct) {
       1 / 75 * percentage[correct == "Correct"] - 7 / 30
     )
   )
-  dif <-
-    percentage[correct == "Incorrect"] - min(percentage[correct == "Correct"])
+  dif <- percentage[correct == "Incorrect"] - min(percentage[correct == "Correct"])
+
   perc_col[correct == "Incorrect"] <- ifelse(
     dif >= 0,
     0,
@@ -373,7 +373,7 @@ make_perc_col <- function(percentage, correct) {
   perc_col
 }
 
-make_irc_col <- function(IRC, correct){
+make_irc_col <- function(IRC, correct) {
   # Calculate colours for the percentage answer options.
   #
   # Args:
@@ -383,15 +383,15 @@ make_irc_col <- function(IRC, correct){
   # Returns:
   #  Value between 0 and 1. 0 corresponds to red and 1 to green in the plots.
   #  For the correct answer options:
-  #    < 0: red
-  #    0 - 0.05: orange - green-ish
-  #    > 0.05: green
+  #   * < 0: red
+  #   * 0 - 0.05: orange - green-ish
+  #   * > 0.05: green
   # For the incorrect answer options: difference between correct answer option
   # and incorrect answer option are calculated. Colours are based on this
   # difference:
-  #    > 0: red
-  #    -0.05 - 0: orange - green-ish
-  #    < -0.05: red
+  #   * > 0: red
+  #   * -0.05 - 0: orange - green-ish
+  #   * < -0.05: red
 
   irc_col <- numeric()
   irc_col[correct == "Correct"] <- ifelse(
@@ -425,10 +425,7 @@ make_answ_df <- function(item_list, key, number_answeroptions, item_names) {
 
   for (i in 1:length(item_list)) {
     if (any(key[, i] != 0)) {
-      item_list[[i]]$perc_col <- with(
-        item_list[[i]],
-        make_perc_col(Percentage, Correct)
-      )
+      item_list[[i]]$perc_col <- with(item_list[[i]], make_perc_col(Percentage, Correct))
       item_list[[i]]$irc_col <- with(item_list[[i]], make_irc_col(IRC, Correct))
     }
   }
@@ -472,7 +469,7 @@ calc_n_plot <- function(ans_opt_df, key) {
     if (n_plots > 1) {
       for (i in 2:n_plots) {
         items_in_plot[[i]] <- unique(
-          ans_opt_df[(((i - 1) * answ_opts_per_plot) + 1) : (i * answ_opts_per_plot), 1]
+          ans_opt_df[(( ( i - 1) * answ_opts_per_plot) + 1) : (i * answ_opts_per_plot), 1]
         )
         if (items_in_plot[[i]][1] %in% items_in_plot[[i - 1]]) {
           items_in_plot[[i]] <- items_in_plot[[i]][-1]

From 727a9e3576d0e8e7ff147ed8b82ada8d9130dc61 Mon Sep 17 00:00:00 2001
From: Mark van Driel <mvdriel@nlware.com>
Date: Wed, 19 Oct 2016 13:20:38 +0200
Subject: [PATCH 21/21] Fixed Cs issues

---
 app/Lib/Rscripts/report.Rmd | 84 +++++++++++--------------------------
 1 file changed, 24 insertions(+), 60 deletions(-)

diff --git a/app/Lib/Rscripts/report.Rmd b/app/Lib/Rscripts/report.Rmd
index a12ed13..89c2ead 100644
--- a/app/Lib/Rscripts/report.Rmd
+++ b/app/Lib/Rscripts/report.Rmd
@@ -333,7 +333,6 @@ plot_overall_irc(correct_df)
 
 ```{r, echo = FALSE, eval = TRUE, warning= FALSE, fig.width = 11}
 
-# Create extra variables for the colours in the bar plots
 make_perc_col <- function(percentage, correct) {
   # Calculate colours for the percentage answer options.
   #
@@ -344,14 +343,14 @@ make_perc_col <- function(percentage, correct) {
   # Returns:
   #  Value between 0 and 1. 0 corresponds to red and 1 to green in the plots.
   #  For the correct options:
-  #    < 40: red
-  #    40 - 70: orange - green-ish
-  #    > 70: green
+  #   * < 40: red
+  #   * 40 - 70: orange - green-ish
+  #   * > 70: green
   #  For the incorrect options: difference between correct answer option and
   #  incorrect answer options are calculated. Colours are based on difference:
-  #    < -10: Green
-  #    -10 - 0: orange - green-ish
-  #    > 0: green
+  #   * < -10: green
+  #   * -10 - 0: orange - green-ish
+  #   * > 0: green
 
   perc_col <- vector("numeric", length = length(correct))
   perc_col[correct == "Correct"] <- ifelse(
@@ -373,7 +372,7 @@ make_perc_col <- function(percentage, correct) {
   perc_col
 }
 
-make_irc_col <- function(IRC, correct) {
+make_irc_col <- function(irc, correct) {
   # Calculate colours for the percentage answer options.
   #
   # Args:
@@ -395,14 +394,14 @@ make_irc_col <- function(IRC, correct) {
 
   irc_col <- numeric()
   irc_col[correct == "Correct"] <- ifelse(
-    IRC[correct == "Correct"] < 0,
+      irc[correct == "Correct"] < 0,
     0,
-    ifelse(IRC[correct == "Correct"] > .05,
+    ifelse(irc[correct == "Correct"] > .05,
       1,
-      8 * IRC[correct == "Correct"] + 3 / 10
+      8 * irc[correct == "Correct"] + 3 / 10
     )
   )
-  dif <- IRC[correct == "Incorrect"] - min(IRC[correct == "Correct"])
+  dif <- irc[correct == "Incorrect"] - min(irc[correct == "Correct"])
   irc_col[correct == "Incorrect"] <- ifelse(
     dif >= 0,
     0,
@@ -469,7 +468,7 @@ calc_n_plot <- function(ans_opt_df, key) {
     if (n_plots > 1) {
       for (i in 2:n_plots) {
         items_in_plot[[i]] <- unique(
-          ans_opt_df[(( ( i - 1) * answ_opts_per_plot) + 1) : (i * answ_opts_per_plot), 1]
+          ans_opt_df[( ( (i - 1) * answ_opts_per_plot) + 1) : (i * answ_opts_per_plot), 1]
         )
         if (items_in_plot[[i]][1] %in% items_in_plot[[i - 1]]) {
           items_in_plot[[i]] <- items_in_plot[[i]][-1]
@@ -615,10 +614,7 @@ this item incorrectly. In between these two, the option is coloured *orange*.
         bar_irc1 +
         geom_bar(aes(x = Answer.Option), stat = "identity", size = 1.5) +
         facet_grid(. ~ id, scales = "free_x", space = "free_x") +
-        scale_fill_manual(
-          values = c("Incorrect" = "Black", "Correct" = "White"),
-          guide  = FALSE
-        ) +
+        scale_fill_manual(values = c("Incorrect" = "Black", "Correct" = "White"), guide  = FALSE) +
         scale_colour_gradient(
           low    = "red",
           high   = "green",
@@ -626,9 +622,7 @@ this item incorrectly. In between these two, the option is coloured *orange*.
           guide  = FALSE
         ) +
         # Change y-axis limit to either the maximum and minimum IRC
-        coord_cartesian(
-          ylim = c(min(ans_opt_df$IRC) - .01, max(ans_opt_df$IRC)  + .01)
-        ) +
+        coord_cartesian(ylim = c(min(ans_opt_df$IRC) - .01, max(ans_opt_df$IRC)  + .01)) +
         labs(x = "Answer Options") +
         theme_bw() +
         theme(
@@ -674,22 +668,13 @@ cum_hist <- function(student_scores, n_item) {
   }
 
   cesuurlijnen <- seq(0.2, 0.8, 0.2) * length(student_scores)
-  gridlijnen <- round(
-    seq(0, length(student_scores), length(student_scores) / 25),
-    0
-  )
+  gridlijnen <- round(seq(0, length(student_scores), length(student_scores) / 25), 0)
 
-  gridlijnen <- gridlijnen[
-    -grep(paste(round(cesuurlijnen, 0), collapse = "|"), gridlijnen)
-  ]
+  gridlijnen <- gridlijnen[grep(paste(round(cesuurlijnen, 0), collapse = "|"), gridlijnen)]
 
   heights <- heights[1:length(unique(student_scores))]
   percent <- heights / length(student_scores) * 100
-  df <- data.frame(
-    score   = sort(unique(student_scores)),
-    total   = heights,
-    percent = round(percent, 2)
-  )
+  df <- data.frame(score   = sort(unique(student_scores)), total   = heights, percent = round(percent, 2))
 
   vec <- df[1, 1]:df[nrow(df), 1] #  save all possible total scores
 
@@ -709,19 +694,14 @@ cum_hist <- function(student_scores, n_item) {
   # if no one gets a certain total score,
   # replace the 0 with the next total score
   suppressWarnings(
-    df[which(df[, 2] == 0, arr.ind = TRUE), 2] <-
-      df[(which(df[, 2] == 0, arr.ind = TRUE) + 1), 2]
+    df[which(df[, 2] == 0, arr.ind = TRUE), 2] <- df[(which(df[, 2] == 0, arr.ind = TRUE) + 1), 2]
   )
   suppressWarnings(
-    df[which(df[, 3] == 0, arr.ind = TRUE), 3] <-
-      df[(which(df[, 3] == 0, arr.ind = TRUE) + 1), 3]
+    df[which(df[, 3] == 0, arr.ind = TRUE), 3] <- df[(which(df[, 3] == 0, arr.ind = TRUE) + 1), 3]
   )
 
   g <-
-    ggplot2::ggplot(
-      data = df,
-      ggplot2::aes(x = score, y = total)
-    ) +
+    ggplot2::ggplot(data = df, ggplot2::aes(x = score, y = total)) +
     geom_hline(
       yintercept = c(gridlijnen),
       linetype   = "solid",
@@ -736,15 +716,8 @@ cum_hist <- function(student_scores, n_item) {
     geom_bar(stat = "identity") +
     xlab("Total score") +
     ylab("% of students with score or higher") +
-    scale_y_continuous(
-      breaks = c(0, cesuurlijnen, length(student_scores)),
-      labels = c(seq(0, 100, 20))
-    ) +
-    scale_x_continuous(
-      limits = c(df[1, 1] - 1, n_item + 1),
-      breaks = 0:n_item,
-      expand = c(0, 0)
-    ) +
+    scale_y_continuous(breaks = c(0, cesuurlijnen, length(student_scores)), labels = c(seq(0, 100, 20))) +
+    scale_x_continuous(limits = c(df[1, 1] - 1, n_item + 1), breaks = 0:n_item, expand = c(0, 0)) +
     theme_bw() +
     theme(panel.grid = element_blank())
 
@@ -785,22 +758,13 @@ if (!length(categories) == 0) {
       key[, items]
     )
 
-    correct_df <- create_df(
-      item_names[items],
-      correct_percentage[items],
-      categories[[i]]$corrected_item_tot_cor
-    )
+    correct_df <- create_df(item_names[items], correct_percentage[items], categories[[i]]$corrected_item_tot_cor)
 
     plot_overall_perc(correct_df)
     plot_overall_irc(correct_df)
 
     if (any(key[, items] != 0)) {
-      ans_opt_df <- make_answ_df(
-        item_list,
-        key[, items],
-        number_answeroptions[items],
-        item_names[items]
-      )
+      ans_opt_df <- make_answ_df(item_list, key[, items], number_answeroptions[items], item_names[items])
       calc_n_plot(ans_opt_df, key[, items])
       plot_perc_answ(ans_opt_df, items_in_plot, key[, items])
       plot_irc_answ(ans_opt_df, items_in_plot, key[, items])