From e4a91fbc2d307c11dc5b31e30e6e0293ec3dce08 Mon Sep 17 00:00:00 2001 From: Jacob Schwartz Date: Mon, 2 Feb 2026 17:04:56 -0700 Subject: [PATCH 01/30] Add ExamSoft integration and refactor question handling - Introduced ExamSoft converter for processing ExamSoft files. - Registered new converters for different file types. - Updated the convert method to accept an import source. - Refactored question handling to support multiple choice questions. - Added pandoc-ruby gem for document conversion. --- .DS_Store | Bin 0 -> 6148 bytes Gemfile | 1 + Gemfile.lock | 2 + bin/convert | 3 +- lib/atomic_assessments_import.rb | 29 ++- .../csv/converter.rb | 6 +- lib/atomic_assessments_import/exam_soft.rb | 8 + .../exam_soft/converter.rb | 208 ++++++++++++++++++ .../{csv => }/questions/multiple_choice.rb | 4 +- .../{csv => }/questions/question.rb | 6 +- .../{csv => }/utils.rb | 5 +- 11 files changed, 249 insertions(+), 23 deletions(-) create mode 100644 .DS_Store create mode 100644 lib/atomic_assessments_import/exam_soft.rb create mode 100644 lib/atomic_assessments_import/exam_soft/converter.rb rename lib/atomic_assessments_import/{csv => }/questions/multiple_choice.rb (99%) rename lib/atomic_assessments_import/{csv => }/questions/question.rb (95%) rename lib/atomic_assessments_import/{csv => }/utils.rb (93%) diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..53c90f0dc88c9b0cc805d91cf942741a91b46fb0 GIT binary patch literal 6148 zcmeHKOHRWu5PfbVwLlkLvJ4!c5;q8?5me$M(E|WU1FD*S)WSYz-~b$g3&I=QLP;97 z2*FHcKgWJ!dtPNv0>BJkrYArjK$k@@*k{#XGA?DuTE0j8jGbeK6nA*QmOnLBKo$7y z3dq?#;)#963Ja9aZ)Q*N0vVPZ-SS+|$XJuN;qw4b99gh`O-#r6DoyfL!rA018L|vM ztg!{OkO2 zD8{zD00@R2Baekcj?j#AB|2B*KQWARXMgB-Lc;3U_EEcYy8!YdqF7gjzU g+lHj#Ll$cs3#CDfJQfbwL(>-lD}z?5z>g~M37dA5C;$Ke literal 0 HcmV?d00001 diff --git a/Gemfile b/Gemfile index cc51bb7..f3001ab 100644 --- a/Gemfile +++ b/Gemfile @@ -10,6 +10,7 @@ group :development do gem "rubocop" gem "rubocop-performance" gem "rubocop-rspec" + gem 'pandoc-ruby' end group :test do diff --git a/Gemfile.lock b/Gemfile.lock index 44ed9ee..4c3575a 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -49,6 +49,7 @@ GEM racc (~> 1.4) nokogiri (1.18.3-arm64-darwin) racc (~> 1.4) + pandoc-ruby (2.1.10) parallel (1.26.3) parser (3.3.7.1) ast (~> 2.4.1) @@ -107,6 +108,7 @@ PLATFORMS DEPENDENCIES atomic_assessments_import! byebug + pandoc-ruby rspec rubocop rubocop-performance diff --git a/bin/convert b/bin/convert index e31fce8..f8da161 100755 --- a/bin/convert +++ b/bin/convert @@ -6,11 +6,12 @@ require "atomic_assessments_import" file = ARGV[0] export_path = ARGV[1] +import_from = ARGV[2] if file.nil? || export_path.nil? puts "Usage: convert.rb " exit 1 end -res = AtomicAssessmentsImport.convert(file) +res = AtomicAssessmentsImport.convert(file, import_from) AtomicAssessmentsImport::Export.create(export_path, res) diff --git a/lib/atomic_assessments_import.rb b/lib/atomic_assessments_import.rb index 1c276b6..f5db5b8 100644 --- a/lib/atomic_assessments_import.rb +++ b/lib/atomic_assessments_import.rb @@ -7,24 +7,31 @@ require_relative "atomic_assessments_import/csv" require_relative "atomic_assessments_import/writer" require_relative "atomic_assessments_import/export" +require_relative "atomic_assessments_import/exam_soft" module AtomicAssessmentsImport class Error < StandardError; end - def self.convert(path) - type = MimeMagic.by_path(path)&.type - - converter = - case type - when "text/csv" - CSV::Converter.new(path) - else - raise "Unsupported file type" - end + def self.register_converter(mime_type, source, klass) + @converters ||= {} + @converters[[mime_type, source]] = klass + end - converter.convert + def self.convert(path, import_from) + type = MimeMagic.by_path(path)&.type + converter_class = @converters[[type, import_from]] + + raise "Unsupported file type: #{type} from #{import_from == nil ? "Unspecified Source" : import_from}. Make sure the file type conversion is available for the specified source." unless converter_class + + converter_class.new(path).convert end + # Register converters + register_converter("text/csv", nil, CSV::Converter) + register_converter("application/rtf", "examsoft", ExamSoft::Converter) + register_converter("application/vnd.openxmlformats-officedocument.wordprocessingml.document", "examsoft", ExamSoft::Converter) + register_converter("text/html", "examsoft", ExamSoft::Converter) # Todo: check if this works correctly + def self.convert_to_aa_format(input_path, output_path) result = convert(input_path) AtomicAssessmentsImport::Export.create(output_path, result) diff --git a/lib/atomic_assessments_import/csv/converter.rb b/lib/atomic_assessments_import/csv/converter.rb index 4aabf64..0e442c2 100644 --- a/lib/atomic_assessments_import/csv/converter.rb +++ b/lib/atomic_assessments_import/csv/converter.rb @@ -3,9 +3,9 @@ require "csv" require "active_support/core_ext/digest/uuid" -require_relative "questions/question" -require_relative "questions/multiple_choice" -require_relative "utils" +require_relative "../questions/question" +require_relative "../questions/multiple_choice" +require_relative "../utils" module AtomicAssessmentsImport module CSV diff --git a/lib/atomic_assessments_import/exam_soft.rb b/lib/atomic_assessments_import/exam_soft.rb new file mode 100644 index 0000000..218413d --- /dev/null +++ b/lib/atomic_assessments_import/exam_soft.rb @@ -0,0 +1,8 @@ +# frozen_string_literal: true + +require_relative "exam_soft/converter" + +module AtomicAssessmentsImport + module ExamSoft + end +end diff --git a/lib/atomic_assessments_import/exam_soft/converter.rb b/lib/atomic_assessments_import/exam_soft/converter.rb new file mode 100644 index 0000000..1b1b3c5 --- /dev/null +++ b/lib/atomic_assessments_import/exam_soft/converter.rb @@ -0,0 +1,208 @@ +# frozen_string_literal: true + +require "pandoc-ruby" +require "active_support/core_ext/digest/uuid" + +require_relative "../questions/question" +require_relative "../questions/multiple_choice" +require_relative "../utils" + +module AtomicAssessmentsImport + module ExamSoft + class Converter + + def initialize(file) + @file = file + end + + + def convert + # Step 1: Parse the ExamSoft file to HTML using Pandoc to formalize the structure + html = PandocRuby.new([@file], from: @file.split('.').last).to_html + + # Step 2: Extract questions and convert them into AA format + + items = [] + questions = [] + + + # 1. Chunking Regex (The "Slicer") + chunk_pattern = /

(?:Type:.*?)?Folder:.*?(?=

(?:Type:.*?)?Folder:|\z)/m + + # 2. Field Extraction Regexes + meta_regex = /(?:Type:\s*(?[^<]+?)\s*)?Folder:\s*(?[^<]+?)\s*Title:\s*(?[^<]+?)\s*Category:\s*(?<category>.+?)\s*(?=\d+\))/m + question_regex = /\d+\)\s*(?<question>.+?)\s*~/m + explanation_regex = /~\s*(?<explanation>.+?)(?=<\/p>)/m + options_regex = /<p>(?<marker>\*?)(?<letter>[a-o])\)\s*(?<text>.+?)<\/p>/ + + parsed_questions = [] + + # Logic: Chunk the text first + chunks = html.scan(chunk_pattern) + + chunks.each do |chunk| + # Clean up internal whitespace for metadata/text matching + clean_chunk = chunk.gsub(/\n/, " ").gsub(/\s+/, " ") + + meta = clean_chunk.match(meta_regex) + q_text = clean_chunk.match(question_regex) + expl = clean_chunk.match(explanation_regex) + + # Scrape options from the original chunk to preserve HTML structure + raw_options = chunk.scan(options_regex) + + # Identify ALL indices where the marker is '*' + # We use .map { |i| i + 1 } to convert 0-index to 1-index numbers + correct_indices = raw_options.each_index.select { |i| raw_options[i][0] == "*" }.map { |i| i + 1 } + + + type = meta && meta[:type] ? meta[:type].strip.downcase : "mcq" + folder = meta ? meta[:folder].strip : nil + title = meta ? meta[:title].strip : nil + categories = meta ? meta[:category].split(",").map(&:strip) : [] + question = q_text ? q_text[:question].strip : nil + explanation = expl ? expl[:explanation].strip : nil + answer_options = raw_options.map { |opt| opt[2].strip } + correct_answer_indices = correct_indices + + + # Note: a lot of these are nil because ExamSoft RTF doesn't have all the same fields as CSV. + # They're listed here to show what is being mapped where possible. + row_mock = { + "question id" => nil, + "folder" => folder, + "title" => title, + "category" => categories, + "import type" => nil, + "description" => nil, + "question text" => question, + "question type" => type, + "stimulus review" => nil, + "instructor stimulus" => nil, + "correct answer" => correct_answer_indices.map { |i| ("a".ord + i - 1).chr }.join("; "), + "scoring type" => nil, + "points" => nil, + "distractor rationale" => nil, + "sample answer" => nil, + "acknowledgements" => nil, + "general feedback" => nil, + "correct feedback" => explanation, + "incorrect feedback" => nil, + "shuffle options" => nil, + "template" => nil, + } + + # Add option keys for the MultipleChoice class + answer_options.each_with_index do |option_text, index| + option_letter = ("a".ord + index).chr + row_mock["option #{option_letter}"] = option_text + end + + + + + item, question_widgets = convert_row(row_mock) + + items << item + questions += question_widgets + rescue StandardError => e + raise e, "Error processing title \"#{title}\": #{e.message}" + end + + + + # items << item + + # question_widgets = { + # type: question.question_type, + # widget_type: "response", + # reference: question.reference, + # data: { + # stimulus: question, + # type: question.question_type, + # metadata: metadata, + # **{ # TODO finish this part + # stimulus_review: nil,#@chunk["stimulus review"], + # instructor_stimulus: nil#@chunk["instructor stimulus"], + # }.compact, + # }, #question_data, + # } + # questions << question_widgets + + + # We should match what we return from the CSV converter + { + activities: [], + items:, + questions:, + features: [], + errors: [], + } + end + + private + + def categories_to_tags(categories) + tags = {} + categories.each do |cat| + if cat.include?("/") + key, value = cat.split("/", 2).map(&:strip) # TODO: deal with multiple slashes? - It could be Tag name/Tag Value/Tag Value2 + tags[key.to_sym] ||= [] + tags[key.to_sym] << value + else + tags[cat.to_sym] ||= [] + end + end + tags + end + + def convert_row(row) + # The csv files had a column for question id, but ExamSoft rtf files does not seem to have that. I'll include Folder instead + source = "<p>ExamSoft Import on #{Time.now.strftime('%Y-%m-%d')}</p>\n" + if row["question id"].present? + source += "<p>External id: #{row['question id']}</p>\n" + elsif row["folder"].present? + source += "<p>From Folder: #{row['folder']}</p>\n" # Is folder a good substitute? + end + + + question = Questions::Question.load(row) + item = { + reference: SecureRandom.uuid, + title: row["title"] || "", + status: "published", + tags: categories_to_tags(row["category"] || []), + metadata: { + import_date: Time.now.iso8601, + import_type: row["import_type"] || "examsoft", + **{ + external_id: row["question id"], + external_id_domain: row["question id"].present? ? "examsoft" : nil, # IDK about this one + alignment: nil # alignment_urls(row), # No alignment URLs in ExamSoft RTF? + }.compact, + }, + source: source, + description: row["description"] || "", + questions: [ + { + reference: question.reference, + type: question.question_type, + }, + ], + features: [], + definition: { + widgets: [ + { + reference: question.reference, + widget_type: "response", + }, + ] + }, + } + [item, [question.to_learnosity]] + end + + + end + end +end diff --git a/lib/atomic_assessments_import/csv/questions/multiple_choice.rb b/lib/atomic_assessments_import/questions/multiple_choice.rb similarity index 99% rename from lib/atomic_assessments_import/csv/questions/multiple_choice.rb rename to lib/atomic_assessments_import/questions/multiple_choice.rb index 716d418..043fb9f 100644 --- a/lib/atomic_assessments_import/csv/questions/multiple_choice.rb +++ b/lib/atomic_assessments_import/questions/multiple_choice.rb @@ -3,7 +3,7 @@ require_relative "question" module AtomicAssessmentsImport - module CSV + # module CSV module Questions class MultipleChoice < Question QUESTION_INDEXES = ("a".."o").to_a.freeze @@ -100,5 +100,5 @@ def ui_style end end end - end + # end end diff --git a/lib/atomic_assessments_import/csv/questions/question.rb b/lib/atomic_assessments_import/questions/question.rb similarity index 95% rename from lib/atomic_assessments_import/csv/questions/question.rb rename to lib/atomic_assessments_import/questions/question.rb index 5f9bbc0..6a8fbe1 100644 --- a/lib/atomic_assessments_import/csv/questions/question.rb +++ b/lib/atomic_assessments_import/questions/question.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true module AtomicAssessmentsImport - module CSV + # module CSV module Questions class Question def initialize(row) @@ -12,7 +12,7 @@ def initialize(row) def self.load(row) case row["question type"] - when nil, "", /multiple choice/i, /mcq/i + when nil, "", /multiple choice/i, /mcq/i, /ma/i # TODO - verify ma is fine here MultipleChoice.new(row) else raise "Unknown question type #{row['question type']}" @@ -82,5 +82,5 @@ def to_learnosity end end end - end + # end end diff --git a/lib/atomic_assessments_import/csv/utils.rb b/lib/atomic_assessments_import/utils.rb similarity index 93% rename from lib/atomic_assessments_import/csv/utils.rb rename to lib/atomic_assessments_import/utils.rb index bba5778..ea2a12c 100644 --- a/lib/atomic_assessments_import/csv/utils.rb +++ b/lib/atomic_assessments_import/utils.rb @@ -1,13 +1,12 @@ # frozen_string_literal: true -require "csv" require "active_support/core_ext/digest/uuid" require_relative "questions/question" require_relative "questions/multiple_choice" module AtomicAssessmentsImport - module CSV + # module CSV module Utils def self.parse_boolean(value, default:) case value&.downcase @@ -20,5 +19,5 @@ def self.parse_boolean(value, default:) end end end - end + # end end From 35baba2f90c83c42ace99975c430d5e744c05150 Mon Sep 17 00:00:00 2001 From: Jacob Schwartz <jaschwartz27@gmail.com> Date: Tue, 3 Feb 2026 13:10:25 -0700 Subject: [PATCH 02/30] Cleaned up a few comments and code sections. --- lib/atomic_assessments_import.rb | 9 +- .../exam_soft/converter.rb | 48 +----- .../questions/multiple_choice.rb | 156 +++++++++--------- .../questions/question.rb | 134 ++++++++------- lib/atomic_assessments_import/utils.rb | 22 ++- 5 files changed, 168 insertions(+), 201 deletions(-) diff --git a/lib/atomic_assessments_import.rb b/lib/atomic_assessments_import.rb index f5db5b8..d9e2dd6 100644 --- a/lib/atomic_assessments_import.rb +++ b/lib/atomic_assessments_import.rb @@ -26,11 +26,16 @@ def self.convert(path, import_from) converter_class.new(path).convert end - # Register converters + # Register converters: format is register_converter(mime_type, source, class) + # csv - source nil because it was the original/default register_converter("text/csv", nil, CSV::Converter) + # rtf register_converter("application/rtf", "examsoft", ExamSoft::Converter) + # docx register_converter("application/vnd.openxmlformats-officedocument.wordprocessingml.document", "examsoft", ExamSoft::Converter) - register_converter("text/html", "examsoft", ExamSoft::Converter) # Todo: check if this works correctly + # html + register_converter("text/html", "examsoft", ExamSoft::Converter) + register_converter("application/xhtml+xml", "examsoft", ExamSoft::Converter) def self.convert_to_aa_format(input_path, output_path) result = convert(input_path) diff --git a/lib/atomic_assessments_import/exam_soft/converter.rb b/lib/atomic_assessments_import/exam_soft/converter.rb index 1b1b3c5..23c2a7b 100644 --- a/lib/atomic_assessments_import/exam_soft/converter.rb +++ b/lib/atomic_assessments_import/exam_soft/converter.rb @@ -26,7 +26,7 @@ def convert questions = [] - # 1. Chunking Regex (The "Slicer") + # Chunking Regex (The "Slicer") for ExamSoft format - splits at each question block chunk_pattern = /<p>(?:Type:.*?)?Folder:.*?(?=<p>(?:Type:.*?)?Folder:|\z)/m # 2. Field Extraction Regexes @@ -37,25 +37,19 @@ def convert parsed_questions = [] - # Logic: Chunk the text first chunks = html.scan(chunk_pattern) - chunks.each do |chunk| - # Clean up internal whitespace for metadata/text matching clean_chunk = chunk.gsub(/\n/, " ").gsub(/\s+/, " ") meta = clean_chunk.match(meta_regex) q_text = clean_chunk.match(question_regex) expl = clean_chunk.match(explanation_regex) - - # Scrape options from the original chunk to preserve HTML structure raw_options = chunk.scan(options_regex) - # Identify ALL indices where the marker is '*' + # Identify ALL indices where the marker is '*' to denote correct answers # We use .map { |i| i + 1 } to convert 0-index to 1-index numbers correct_indices = raw_options.each_index.select { |i| raw_options[i][0] == "*" }.map { |i| i + 1 } - type = meta && meta[:type] ? meta[:type].strip.downcase : "mcq" folder = meta ? meta[:folder].strip : nil title = meta ? meta[:title].strip : nil @@ -64,7 +58,6 @@ def convert explanation = expl ? expl[:explanation].strip : nil answer_options = raw_options.map { |opt| opt[2].strip } correct_answer_indices = correct_indices - # Note: a lot of these are nil because ExamSoft RTF doesn't have all the same fields as CSV. # They're listed here to show what is being mapped where possible. @@ -96,10 +89,7 @@ def convert answer_options.each_with_index do |option_text, index| option_letter = ("a".ord + index).chr row_mock["option #{option_letter}"] = option_text - end - - - + end item, question_widgets = convert_row(row_mock) @@ -109,28 +99,6 @@ def convert raise e, "Error processing title \"#{title}\": #{e.message}" end - - - # items << item - - # question_widgets = { - # type: question.question_type, - # widget_type: "response", - # reference: question.reference, - # data: { - # stimulus: question, - # type: question.question_type, - # metadata: metadata, - # **{ # TODO finish this part - # stimulus_review: nil,#@chunk["stimulus review"], - # instructor_stimulus: nil#@chunk["instructor stimulus"], - # }.compact, - # }, #question_data, - # } - # questions << question_widgets - - - # We should match what we return from the CSV converter { activities: [], items:, @@ -146,7 +114,7 @@ def categories_to_tags(categories) tags = {} categories.each do |cat| if cat.include?("/") - key, value = cat.split("/", 2).map(&:strip) # TODO: deal with multiple slashes? - It could be Tag name/Tag Value/Tag Value2 + key, value = cat.split("/", 2).map(&:strip) # TODO: deal with multiple slashes? - It could be Tag name/Value/Value2/... Right now it just splits at the first slash and treats the rest as the value. tags[key.to_sym] ||= [] tags[key.to_sym] << value else @@ -162,7 +130,7 @@ def convert_row(row) if row["question id"].present? source += "<p>External id: #{row['question id']}</p>\n" elsif row["folder"].present? - source += "<p>From Folder: #{row['folder']}</p>\n" # Is folder a good substitute? + source += "<p>From Folder: #{row['folder']}</p>\n" # Is folder a good substitute if there's no question id? end @@ -175,10 +143,10 @@ def convert_row(row) metadata: { import_date: Time.now.iso8601, import_type: row["import_type"] || "examsoft", - **{ + **{ # TODO: decide about this section - what is the external id domain? Do we need alignment URLs from ExamSoft RTF? external_id: row["question id"], - external_id_domain: row["question id"].present? ? "examsoft" : nil, # IDK about this one - alignment: nil # alignment_urls(row), # No alignment URLs in ExamSoft RTF? + external_id_domain: row["question id"].present? ? "examsoft" : nil, + alignment: nil # alignment_urls(row) }.compact, }, source: source, diff --git a/lib/atomic_assessments_import/questions/multiple_choice.rb b/lib/atomic_assessments_import/questions/multiple_choice.rb index 043fb9f..ac1b7b4 100644 --- a/lib/atomic_assessments_import/questions/multiple_choice.rb +++ b/lib/atomic_assessments_import/questions/multiple_choice.rb @@ -3,102 +3,100 @@ require_relative "question" module AtomicAssessmentsImport - # module CSV - module Questions - class MultipleChoice < Question - QUESTION_INDEXES = ("a".."o").to_a.freeze + module Questions + class MultipleChoice < Question + QUESTION_INDEXES = ("a".."o").to_a.freeze - def question_type - "mcq" - end + def question_type + "mcq" + end - def question_data - raise "Missing correct answer" if correct_responses.empty? - raise "Missing options" if options.empty? + def question_data + raise "Missing correct answer" if correct_responses.empty? + raise "Missing options" if options.empty? - super.deep_merge( - { - multiple_responses: multiple_responses, - options: options, - validation: { - scoring_type: scoring_type, - valid_response: { - score: points, - value: correct_responses, - }, - rounding: "none", - penalty: 1, + super.deep_merge( + { + multiple_responses: multiple_responses, + options: options, + validation: { + scoring_type: scoring_type, + valid_response: { + score: points, + value: correct_responses, }, - shuffle_options: Utils.parse_boolean(@row["shuffle options"], default: false), - ui_style: ui_style, - } - ) - end + rounding: "none", + penalty: 1, + }, + shuffle_options: Utils.parse_boolean(@row["shuffle options"], default: false), + ui_style: ui_style, + } + ) + end + + def metadata + super.merge( + { + distractor_rationale_response_level: distractor_rationale_response_level, + } + ) + end - def metadata - super.merge( + def options + QUESTION_INDEXES.filter_map.with_index do |value, cnt| + key = "option #{value}" + if @row[key].present? { - distractor_rationale_response_level: distractor_rationale_response_level, + label: @row[key], + value: cnt.to_s, } - ) - end - - def options - QUESTION_INDEXES.filter_map.with_index do |value, cnt| - key = "option #{value}" - if @row[key].present? - { - label: @row[key], - value: cnt.to_s, - } - end end end + end - def correct_responses - correct = @row["correct answer"]&.split(";")&.map(&:strip)&.map(&:downcase) || [] + def correct_responses + correct = @row["correct answer"]&.split(";")&.map(&:strip)&.map(&:downcase) || [] - correct.filter_map do |value| - QUESTION_INDEXES.find_index(value).to_s - end + correct.filter_map do |value| + QUESTION_INDEXES.find_index(value).to_s end + end - def distractor_rationale_response_level - QUESTION_INDEXES.map do |value| - key = "option #{value} feedback" - @row[key].presence || "" - end.reverse.drop_while(&:blank?).reverse - end + def distractor_rationale_response_level + QUESTION_INDEXES.map do |value| + key = "option #{value} feedback" + @row[key].presence || "" + end.reverse.drop_while(&:blank?).reverse + end - def multiple_responses - case @row["template"]&.downcase - when "multiple response", "block layout multiple response", "choice matrix", - "choice matrix inline", "choice matrix labels" - true - else - false - end + def multiple_responses + case @row["template"]&.downcase + when "multiple response", "block layout multiple response", "choice matrix", + "choice matrix inline", "choice matrix labels" + true + else + false end + end - def ui_style - case @row["template"]&.downcase - when "multiple response" - { type: "horizontal" } - when "block layout", "block layout multiple response" - { choice_label: "upper-alpha", type: "block" } - when "choice matrix" - { horizontal_lines: false, type: "table" } - when "choice matrix inline" - { horizontal_lines: false, type: "inline" } - when "choice matrix labels" - { stem_numeration: "upper-alpha", horizontal_lines: false, type: "table" } - when nil, "", "multiple choice", "standard" - { type: "horizontal" } - else - raise "Unknown template: #{@row["template"]}" - end + def ui_style + case @row["template"]&.downcase + when "multiple response" + { type: "horizontal" } + when "block layout", "block layout multiple response" + { choice_label: "upper-alpha", type: "block" } + when "choice matrix" + { horizontal_lines: false, type: "table" } + when "choice matrix inline" + { horizontal_lines: false, type: "inline" } + when "choice matrix labels" + { stem_numeration: "upper-alpha", horizontal_lines: false, type: "table" } + when nil, "", "multiple choice", "standard" + { type: "horizontal" } + else + raise "Unknown template: #{@row["template"]}" end end end - # end + end end diff --git a/lib/atomic_assessments_import/questions/question.rb b/lib/atomic_assessments_import/questions/question.rb index 6a8fbe1..d99c7ab 100644 --- a/lib/atomic_assessments_import/questions/question.rb +++ b/lib/atomic_assessments_import/questions/question.rb @@ -1,86 +1,84 @@ # frozen_string_literal: true module AtomicAssessmentsImport - # module CSV - module Questions - class Question - def initialize(row) - @row = row - # @question_reference = Digest::UUID.uuid_v5(Digest::UUID::URL_NAMESPACE, "#{@item_url}/question") - @reference = SecureRandom.uuid - end + module Questions + class Question + def initialize(row) + @row = row + # @question_reference = Digest::UUID.uuid_v5(Digest::UUID::URL_NAMESPACE, "#{@item_url}/question") + @reference = SecureRandom.uuid + end - def self.load(row) - case row["question type"] - when nil, "", /multiple choice/i, /mcq/i, /ma/i # TODO - verify ma is fine here - MultipleChoice.new(row) - else - raise "Unknown question type #{row['question type']}" - end + def self.load(row) + case row["question type"] + when nil, "", /multiple choice/i, /mcq/i, /ma/i # TODO - verify ma is fine here + MultipleChoice.new(row) + else + raise "Unknown question type #{row['question type']}" end + end - attr_reader :reference + attr_reader :reference - def question_type - raise NotImplementedError - end + def question_type + raise NotImplementedError + end - def question_data - { - stimulus: @row["question text"], - type: question_type, - metadata: metadata, - **{ - stimulus_review: @row["stimulus review"], - instructor_stimulus: @row["instructor stimulus"], - }.compact, - } - end + def question_data + { + stimulus: @row["question text"], + type: question_type, + metadata: metadata, + **{ + stimulus_review: @row["stimulus review"], + instructor_stimulus: @row["instructor stimulus"], + }.compact, + } + end - def metadata - { - distractor_rationale: @row["distractor rationale"], - sample_answer: @row["sample answer"], - acknowledgements: @row["acknowledgements"], - general_feedback: @row["general feedback"], - correct_feedback: @row["correct feedback"], - partially_correct_feedback: @row["partially correct feedback"], - incorrect_feedback: @row["incorrect feedback"], - }.compact - end + def metadata + { + distractor_rationale: @row["distractor rationale"], + sample_answer: @row["sample answer"], + acknowledgements: @row["acknowledgements"], + general_feedback: @row["general feedback"], + correct_feedback: @row["correct feedback"], + partially_correct_feedback: @row["partially correct feedback"], + incorrect_feedback: @row["incorrect feedback"], + }.compact + end - def scoring_type - case @row["scoring type"] - when nil, "", /Partial Match Per Response/i - "partialMatchV2" - when /Partial Match/i - "partialMatch" - when /Exact Match/i - "exactMatch" - else - raise "Unknown scoring type #{@row['scoring type']}" - end + def scoring_type + case @row["scoring type"] + when nil, "", /Partial Match Per Response/i + "partialMatchV2" + when /Partial Match/i + "partialMatch" + when /Exact Match/i + "exactMatch" + else + raise "Unknown scoring type #{@row['scoring type']}" end + end - def points - if @row["points"].blank? - 1 - else - Float(@row["points"]) - end - rescue ArgumentError + def points + if @row["points"].blank? 1 + else + Float(@row["points"]) end + rescue ArgumentError + 1 + end - def to_learnosity - { - type: question_type, - widget_type: "response", - reference: @reference, - data: question_data, - } - end + def to_learnosity + { + type: question_type, + widget_type: "response", + reference: @reference, + data: question_data, + } end end - # end + end end diff --git a/lib/atomic_assessments_import/utils.rb b/lib/atomic_assessments_import/utils.rb index ea2a12c..5db66a4 100644 --- a/lib/atomic_assessments_import/utils.rb +++ b/lib/atomic_assessments_import/utils.rb @@ -6,18 +6,16 @@ require_relative "questions/multiple_choice" module AtomicAssessmentsImport - # module CSV - module Utils - def self.parse_boolean(value, default:) - case value&.downcase - when "true", "yes", "y", "1" - true - when "false", "no", "n", "0" - false - else - default - end + module Utils + def self.parse_boolean(value, default:) + case value&.downcase + when "true", "yes", "y", "1" + true + when "false", "no", "n", "0" + false + else + default end end - # end + end end From 7d46e2e0567e10ab6a20759403d254a5c360ac0e Mon Sep 17 00:00:00 2001 From: Jacob Schwartz <jaschwartz27@gmail.com> Date: Thu, 5 Feb 2026 16:54:23 -0700 Subject: [PATCH 03/30] Update .gitignore, refine Gemfile dependencies, enhance convert script usage instructions, and improve converter registration and specs --- .gitignore | 3 +++ Gemfile | 2 +- Gemfile.lock | 2 +- bin/convert | 11 ++++++++--- lib/atomic_assessments_import.rb | 15 ++++++++++----- .../exam_soft/converter.rb | 15 +++++++-------- .../csv/questions/multiple_choice_spec.rb | 2 +- .../csv/questions/question_spec.rb | 10 +++++----- spec/atomic_assessments_import/csv/utils_spec.rb | 2 +- spec/atomic_assessments_import_spec.rb | 2 +- 10 files changed, 38 insertions(+), 26 deletions(-) diff --git a/.gitignore b/.gitignore index bf4e0dc..a00b8d9 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,6 @@ # rspec failure tracking .rspec_status + +# MacOS system files +.DS_Store diff --git a/Gemfile b/Gemfile index f3001ab..9f4b35b 100644 --- a/Gemfile +++ b/Gemfile @@ -10,7 +10,7 @@ group :development do gem "rubocop" gem "rubocop-performance" gem "rubocop-rspec" - gem 'pandoc-ruby' + gem 'pandoc-ruby', '~> 2.1' end group :test do diff --git a/Gemfile.lock b/Gemfile.lock index 4c3575a..453b60b 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -108,7 +108,7 @@ PLATFORMS DEPENDENCIES atomic_assessments_import! byebug - pandoc-ruby + pandoc-ruby (~> 2.1) rspec rubocop rubocop-performance diff --git a/bin/convert b/bin/convert index f8da161..37dc8c8 100755 --- a/bin/convert +++ b/bin/convert @@ -6,12 +6,17 @@ require "atomic_assessments_import" file = ARGV[0] export_path = ARGV[1] -import_from = ARGV[2] +converter = ARGV[2] if file.nil? || export_path.nil? - puts "Usage: convert.rb <file> <export_path>" + puts "Usage: bin/convert <file> <export_path> [converter]" + puts " <file> Path to CSV or RTF file to convert" + puts " <export_path> Path for output ZIP file" + puts " [converter] Which converter to use- 'examsoft' for files coming from ExamSoft, 'csv' for standard CSV files. Defaults to csv if not specified." exit 1 end -res = AtomicAssessmentsImport.convert(file, import_from) +converter ||= "csv" + +res = AtomicAssessmentsImport.convert(file, converter) AtomicAssessmentsImport::Export.create(export_path, res) diff --git a/lib/atomic_assessments_import.rb b/lib/atomic_assessments_import.rb index d9e2dd6..2d22292 100644 --- a/lib/atomic_assessments_import.rb +++ b/lib/atomic_assessments_import.rb @@ -26,19 +26,24 @@ def self.convert(path, import_from) converter_class.new(path).convert end + ###################### # Register converters: format is register_converter(mime_type, source, class) - # csv - source nil because it was the original/default + ###################### + # CSV converter - csv is the original/default importer so it can be used with either source specified as "csv" or with no source specified + register_converter("text/csv", "csv", CSV::Converter) register_converter("text/csv", nil, CSV::Converter) - # rtf + + # ExamSoft converters + ## rtf register_converter("application/rtf", "examsoft", ExamSoft::Converter) - # docx + ## docx register_converter("application/vnd.openxmlformats-officedocument.wordprocessingml.document", "examsoft", ExamSoft::Converter) - # html + ## html register_converter("text/html", "examsoft", ExamSoft::Converter) register_converter("application/xhtml+xml", "examsoft", ExamSoft::Converter) def self.convert_to_aa_format(input_path, output_path) - result = convert(input_path) + result = convert(input_path, "csv") AtomicAssessmentsImport::Export.create(output_path, result) { errors: result[:errors], diff --git a/lib/atomic_assessments_import/exam_soft/converter.rb b/lib/atomic_assessments_import/exam_soft/converter.rb index 23c2a7b..1b3f058 100644 --- a/lib/atomic_assessments_import/exam_soft/converter.rb +++ b/lib/atomic_assessments_import/exam_soft/converter.rb @@ -125,12 +125,10 @@ def categories_to_tags(categories) end def convert_row(row) - # The csv files had a column for question id, but ExamSoft rtf files does not seem to have that. I'll include Folder instead + # The csv files had a column for question id, but ExamSoft rtf files does not seem to have that. source = "<p>ExamSoft Import on #{Time.now.strftime('%Y-%m-%d')}</p>\n" if row["question id"].present? source += "<p>External id: #{row['question id']}</p>\n" - elsif row["folder"].present? - source += "<p>From Folder: #{row['folder']}</p>\n" # Is folder a good substitute if there's no question id? end @@ -143,11 +141,12 @@ def convert_row(row) metadata: { import_date: Time.now.iso8601, import_type: row["import_type"] || "examsoft", - **{ # TODO: decide about this section - what is the external id domain? Do we need alignment URLs from ExamSoft RTF? - external_id: row["question id"], - external_id_domain: row["question id"].present? ? "examsoft" : nil, - alignment: nil # alignment_urls(row) - }.compact, + + # **{ # TODO: decide about this section - what is the external id domain? Do we need alignment URLs from ExamSoft RTF? + # external_id: row["question id"], + # external_id_domain: row["question id"].present? ? "examsoft" : nil, + # alignment: nil # alignment_urls(row) + # }.compact, }, source: source, description: row["description"] || "", diff --git a/spec/atomic_assessments_import/csv/questions/multiple_choice_spec.rb b/spec/atomic_assessments_import/csv/questions/multiple_choice_spec.rb index 6f6966e..95856b4 100644 --- a/spec/atomic_assessments_import/csv/questions/multiple_choice_spec.rb +++ b/spec/atomic_assessments_import/csv/questions/multiple_choice_spec.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -RSpec.describe AtomicAssessmentsImport::CSV::Questions::MultipleChoice do +RSpec.describe AtomicAssessmentsImport::Questions::MultipleChoice do describe "#to_learnosity" do let(:template) { "Multiple choice" } let(:correct_answer) { "A" } diff --git a/spec/atomic_assessments_import/csv/questions/question_spec.rb b/spec/atomic_assessments_import/csv/questions/question_spec.rb index 969f75f..57b5aa9 100644 --- a/spec/atomic_assessments_import/csv/questions/question_spec.rb +++ b/spec/atomic_assessments_import/csv/questions/question_spec.rb @@ -1,29 +1,29 @@ # frozen_string_literal: true -RSpec.describe AtomicAssessmentsImport::CSV::Questions::Question do +RSpec.describe AtomicAssessmentsImport::Questions::Question do describe "#load" do it "creates a mcq question" do row = CSV::Row.new([:'question type'], ["multiple choice"]) question = described_class.load(row) - expect(question).to be_a(AtomicAssessmentsImport::CSV::Questions::MultipleChoice) + expect(question).to be_a(AtomicAssessmentsImport::Questions::MultipleChoice) end it "creates a multiple choice question" do row = CSV::Row.new([:'question type'], ["mcq"]) question = described_class.load(row) - expect(question).to be_a(AtomicAssessmentsImport::CSV::Questions::MultipleChoice) + expect(question).to be_a(AtomicAssessmentsImport::Questions::MultipleChoice) end it "creates a multiple choice question if no question type is passed" do row = CSV::Row.new([:'question type'], [""]) question = described_class.load(row) - expect(question).to be_a(AtomicAssessmentsImport::CSV::Questions::MultipleChoice) + expect(question).to be_a(AtomicAssessmentsImport::Questions::MultipleChoice) end it "creates a multiple choice question by default" do row = CSV::Row.new([:'question id'], ["123"]) question = described_class.load(row) - expect(question).to be_a(AtomicAssessmentsImport::CSV::Questions::MultipleChoice) + expect(question).to be_a(AtomicAssessmentsImport::Questions::MultipleChoice) end end end diff --git a/spec/atomic_assessments_import/csv/utils_spec.rb b/spec/atomic_assessments_import/csv/utils_spec.rb index 2d3bec3..2f1feaa 100644 --- a/spec/atomic_assessments_import/csv/utils_spec.rb +++ b/spec/atomic_assessments_import/csv/utils_spec.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -RSpec.describe AtomicAssessmentsImport::CSV::Utils do +RSpec.describe AtomicAssessmentsImport::Utils do describe "#parse_boolean" do it "returns true for yes" do expect(described_class.parse_boolean("yes", default: false)).to be_truthy diff --git a/spec/atomic_assessments_import_spec.rb b/spec/atomic_assessments_import_spec.rb index 556a53e..0009d1b 100644 --- a/spec/atomic_assessments_import_spec.rb +++ b/spec/atomic_assessments_import_spec.rb @@ -29,7 +29,7 @@ describe "#convert" do it "converts a CSV file to an object" do input_path = "spec/fixtures/simple.csv" - data = described_class.convert(input_path) + data = described_class.convert(input_path, "csv") expect(data[:errors]).to be_empty expect(data[:items].length).to eq(3) From f882e4aefc6d350e6aabe5d7231697b0b73682fa Mon Sep 17 00:00:00 2001 From: Jacob Schwartz <jaschwartz27@gmail.com> Date: Thu, 5 Feb 2026 16:56:54 -0700 Subject: [PATCH 04/30] Remove .DS_Store file --- .DS_Store | Bin 6148 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 53c90f0dc88c9b0cc805d91cf942741a91b46fb0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKOHRWu5PfbVwLlkLvJ4!c5;q8?5me$M(E|WU1FD*S)WSYz-~b$g3&I=QLP;97 z2*FHcKgWJ!dtPNv0>BJkrYArjK$k@@*k{#XGA?DuTE0j8jGbeK6nA*QmOnLBKo$7y z3dq?#;)#963Ja9aZ)Q*N0vVPZ-SS+|$XJuN;qw4b99gh`O-#r6DoyfL!rA018L|vM ztg!{Ok<fctW4+f{J)GeJr?|#BVqD^eIN%!7s-HQo`E|&Xy*zS_99fP1T4VRBY_5<q zrWEt4UrTPcY79B&Pim}9jn(14$BZLp6cfZ%w$<gWq)QjK)PBz0H}72{E%W!tkEzfI zSHvc5B-*k^(EJbC=*m30sr@qrn6t&Y2M)DX1yli5;7b9yA3_$v$YbHqULC9?Ny>O2 zD8{zD00@R2Baekcj?j#AB|2B*KQWARXMgB<k;lTJbBD1ja~{94@n0y$uFn3@(qSTp zTB`!8KvjXZd+ba8kH7!^uR7_IDxeDdDFsY0nv8}F$>-Lc;3U_EEcYy8!YdqF7gjzU g+lHj#Ll$cs3#CDfJQfbwL(>-lD}z?5z>g~M37dA5C;$Ke From 0e54db97f5fe8f10c5a4385d98c3f02c524e0220 Mon Sep 17 00:00:00 2001 From: Jacob Schwartz <jaschwartz27@gmail.com> Date: Fri, 6 Feb 2026 14:26:51 -0700 Subject: [PATCH 05/30] Add tests for question loading and utility functions; include sample documents - Created a new spec file for testing the loading of questions in the AtomicAssessmentsImport module, ensuring that multiple choice questions are correctly instantiated from various input formats. - Added a spec file for utility functions, specifically testing the boolean parsing functionality with various inputs and defaults. - Introduced sample documents in different formats (DOCX, HTML, RTF) to be used as fixtures for testing the import functionality. --- .../exam_soft/converter.rb | 19 +- .../questions/multiple_choice.rb | 4 +- .../examsoft/converter_spec.rb | 205 ++++++++++++++++++ .../questions/multiple_choice_spec.rb | 0 .../{csv => }/questions/question_spec.rb | 0 .../{csv => }/utils_spec.rb | 0 spec/fixtures/simple.docx | Bin 0 -> 6042 bytes spec/fixtures/simple.html | 66 ++++++ spec/fixtures/simple.rtf | 56 +++++ 9 files changed, 344 insertions(+), 6 deletions(-) create mode 100644 spec/atomic_assessments_import/examsoft/converter_spec.rb rename spec/atomic_assessments_import/{csv => }/questions/multiple_choice_spec.rb (100%) rename spec/atomic_assessments_import/{csv => }/questions/question_spec.rb (100%) rename spec/atomic_assessments_import/{csv => }/utils_spec.rb (100%) create mode 100644 spec/fixtures/simple.docx create mode 100644 spec/fixtures/simple.html create mode 100644 spec/fixtures/simple.rtf diff --git a/lib/atomic_assessments_import/exam_soft/converter.rb b/lib/atomic_assessments_import/exam_soft/converter.rb index 1b3f058..d856c48 100644 --- a/lib/atomic_assessments_import/exam_soft/converter.rb +++ b/lib/atomic_assessments_import/exam_soft/converter.rb @@ -18,7 +18,15 @@ def initialize(file) def convert # Step 1: Parse the ExamSoft file to HTML using Pandoc to formalize the structure - html = PandocRuby.new([@file], from: @file.split('.').last).to_html + if @file.is_a?(String) + html = PandocRuby.new([@file], from: @file.split('.').last).to_html + else # If @file is not a string, we assume it's a Tempfile or similar object that PandocRuby can read from directly + # Just grab the text following the last . to determine the format for Pandoc. This is a bit of a hack but it allows us to handle Tempfile objects that don't have a path method. + source_type = @file.path.split('.').last.match(/^[a-zA-Z]+/)[0] # Remove any non-alphanumeric characters to get a clean source type for Pandoc + html = PandocRuby.new(@file.read, from: source_type).to_html + end + + # html = PandocRuby.new([@file], from: @file.split('.').last).to_html # Step 2: Extract questions and convert them into AA format @@ -46,11 +54,14 @@ def convert expl = clean_chunk.match(explanation_regex) raw_options = chunk.scan(options_regex) + # Validate that we have options + raise "Missing options" if raw_options.empty? + # Identify ALL indices where the marker is '*' to denote correct answers # We use .map { |i| i + 1 } to convert 0-index to 1-index numbers correct_indices = raw_options.each_index.select { |i| raw_options[i][0] == "*" }.map { |i| i + 1 } - type = meta && meta[:type] ? meta[:type].strip.downcase : "mcq" + type = meta && meta[:type] ? meta[:type].strip.downcase : "standard" # This is for the "template" field in AA, but ExamSoft RTF doesn't seem to have a direct equivalent, so we can use the "Type" field if it exists or default to "standard". folder = meta ? meta[:folder].strip : nil title = meta ? meta[:title].strip : nil categories = meta ? meta[:category].split(",").map(&:strip) : [] @@ -69,7 +80,7 @@ def convert "import type" => nil, "description" => nil, "question text" => question, - "question type" => type, + "question type" => "mcq", # We are treating all questions as multiple choice for now since that's the only type we have in our fixture. We could potentially add logic to determine question type based on the presence of certain fields or patterns in the question text. "stimulus review" => nil, "instructor stimulus" => nil, "correct answer" => correct_answer_indices.map { |i| ("a".ord + i - 1).chr }.join("; "), @@ -82,7 +93,7 @@ def convert "correct feedback" => explanation, "incorrect feedback" => nil, "shuffle options" => nil, - "template" => nil, + "template" => type, } # Add option keys for the MultipleChoice class diff --git a/lib/atomic_assessments_import/questions/multiple_choice.rb b/lib/atomic_assessments_import/questions/multiple_choice.rb index ac1b7b4..5204c14 100644 --- a/lib/atomic_assessments_import/questions/multiple_choice.rb +++ b/lib/atomic_assessments_import/questions/multiple_choice.rb @@ -72,7 +72,7 @@ def distractor_rationale_response_level def multiple_responses case @row["template"]&.downcase when "multiple response", "block layout multiple response", "choice matrix", - "choice matrix inline", "choice matrix labels" + "choice matrix inline", "choice matrix labels", "ma" true else false @@ -81,7 +81,7 @@ def multiple_responses def ui_style case @row["template"]&.downcase - when "multiple response" + when "multiple response", "ma" { type: "horizontal" } when "block layout", "block layout multiple response" { choice_label: "upper-alpha", type: "block" } diff --git a/spec/atomic_assessments_import/examsoft/converter_spec.rb b/spec/atomic_assessments_import/examsoft/converter_spec.rb new file mode 100644 index 0000000..18842a7 --- /dev/null +++ b/spec/atomic_assessments_import/examsoft/converter_spec.rb @@ -0,0 +1,205 @@ +# frozen_string_literal: true + +require "atomic_assessments_import" + +RSpec.describe AtomicAssessmentsImport::ExamSoft::Converter do + describe "#convert" do + before(:all) do + @data = described_class.new("spec/fixtures/simple.rtf").convert + end + + it "converts a simple RTF file" do + path = "spec/fixtures/simple.rtf" + data = described_class.new(path).convert + + expect(data[:activities]).to eq([]) + expect(data[:items].length).to eq(3) + expect(data[:questions].length).to eq(3) + expect(data[:features]).to eq([]) + + item1 = data[:items].find { |i| i[:title] == "Question 1" } + expect(item1).not_to be_nil + expect(item1[:reference]).not_to be_nil + + item2 = data[:items].find { |i| i[:title] == "Question 2" } + expect(item2).not_to be_nil + expect(item2[:reference]).not_to be_nil + + item3 = data[:items].find { |i| i[:title] == "Question 3" } + expect(item3).not_to be_nil + expect(item3[:reference]).not_to be_nil + + question1 = data[:questions].find { |q| q[:data][:stimulus] == "What is the capital of France?" } + expect(question1).not_to be_nil + expect(question1[:reference]).to eq(item1[:questions][0][:reference]) + + question2 = data[:questions].find { |q| q[:data][:stimulus] == "What is the capital of Germany?" } + expect(question2).not_to be_nil + expect(question2[:reference]).to eq(item2[:questions][0][:reference]) + + question3 = data[:questions].find { |q| q[:data][:stimulus] == "Which are US state capitals?" } + expect(question3).not_to be_nil + expect(question3[:reference]).to eq(item3[:questions][0][:reference]) + end + + it "converts a RTF from a Tempfile" do + rtf = Tempfile.new("temp.rtf") + original_content = File.read("spec/fixtures/simple.rtf") + rtf.write(original_content) + rtf.rewind + + data = described_class.new(rtf).convert + + + expect(data[:activities]).to eq([]) + expect(data[:items].length).to eq(3) + expect(data[:questions].length).to eq(3) + expect(data[:features]).to eq([]) + + item1 = data[:items].find { |i| i[:title] == "Question 1" } + expect(item1).not_to be_nil + expect(item1[:reference]).not_to be_nil + + item2 = data[:items].find { |i| i[:title] == "Question 2" } + expect(item2).not_to be_nil + expect(item2[:reference]).not_to be_nil + + item3 = data[:items].find { |i| i[:title] == "Question 3" } + expect(item3).not_to be_nil + expect(item3[:reference]).not_to be_nil + + end + + it "sets the title and source" do # Currently the converter doesn't set the description since ExamSoft RTF doesn't have a field that maps to it, but we can still test that the title is set correctly and that the source is tagged as coming from ExamSoft. + item1 = @data[:items].find { |i| i[:title] == "Question 1" } + expect(item1).not_to be_nil + # expect(item1[:description]).to eq("This is a question about France") + expect(item1[:title]).to eq("Question 1") + expect(item1[:source]).to match(/ExamSoft Import/) + end + + it "sets tags" do + item1 = @data[:items].find { |i| i[:title] == "Question 1" } + expect(item1).not_to be_nil + expect(item1[:tags]).to eq( + { + Subject: ["Capitals"], + Difficulty: ["Normal"], + } + ) + end + + it "sets duplicate tags" do + item2 = @data[:items].find { |i| i[:title] == "Question 2" } + expect(item2).not_to be_nil + expect(item2[:tags]).to eq( + { + Subject: %w[Capitals Geography], + } + ) + end + + # it "sets external id metadata" do + # csv = <<~CSV + # Question ID,Title,Tag:Subject,Question Text,Option A, Option B,Option C,Correct Answer + # Q001,Question 1,Capitals,What is the capital of France?,Paris, Versailles,Bordeaux,A + # CSV + # data = described_class.new(StringIO.new(csv)).convert + # item1 = data[:items].find { |i| i[:title] == "Question 1" } + # expect(item1).not_to be_nil + # expect(item1[:metadata][:external_id]).to eq("Q001") + # expect(item1[:metadata][:external_id_domain]).to eq("csv") + # expect(item1[:metadata][:import_type]).to eq("csv") + # expect(item1[:source]).to match(/External.*Q001/) + # end + + # it "sets alignment tags" do + # csv = <<~CSV + # Question ID,Title,Tag:Subject,Question Text,Option A, Option B,Option C,Correct Answer,Alignment URL + # Q001,Question 1,Capitals,What is the capital of France?,Paris, Versailles,Bordeaux,A,"https://example.com/alignment" + # CSV + # data = described_class.new(StringIO.new(csv)).convert + # item1 = data[:items].find { |i| i[:title] == "Question 1" } + # expect(item1).not_to be_nil + # expect(item1[:tags]).to eq( + # { + # Subject: ["Capitals"], + # lrn_aligned: ["ff8a5caa-0f2a-5a53-a128-c8c3e99768a8"], + # } + # ) + # expect(item1[:metadata][:alignment]).to eq(%w[https://example.com/alignment]) + # end + + # it "sets multiple alignment tags" do + # csv = <<~CSV + # Question ID,Title,Tag:Subject,Question Text,Option A, Option B,Option C,Correct Answer,Alignment URL,Alignment URL + # Q001,Question 1,Capitals,What is the capital of France?,Paris, Versailles,Bordeaux,A,https://example.com/alignment,https://example.com/alignment2 + # CSV + # data = described_class.new(StringIO.new(csv)).convert + # item1 = data[:items].find { |i| i[:title] == "Question 1" } + # expect(item1).not_to be_nil + # expect(item1[:tags]).to eq( + # { + # Subject: ["Capitals"], + # lrn_aligned: %w[ff8a5caa-0f2a-5a53-a128-c8c3e99768a8 f7d26914-3e2b-5c9c-a550-ce9c853f0c09], + # } + # ) + # expect(item1[:metadata][:alignment]).to eq(%w[https://example.com/alignment https://example.com/alignment2]) + # end + + # it "sets alignment tags when one is empty" do + # csv = <<~CSV + # Question ID,Title,Tag:Subject,Question Text,Option A, Option B,Option C,Correct Answer,Alignment URL,Alignment URL + # Q001,Question 1,Capitals,What is the capital of France?,Paris, Versailles,Bordeaux,A,,https://example.com/alignment2 + # CSV + # data = described_class.new(StringIO.new(csv)).convert + # item1 = data[:items].find { |i| i[:title] == "Question 1" } + # expect(item1).not_to be_nil + # expect(item1[:tags]).to eq( + # { + # Subject: ["Capitals"], + # lrn_aligned: %w[f7d26914-3e2b-5c9c-a550-ce9c853f0c09], + # } + # ) + # expect(item1[:metadata][:alignment]).to eq(%w[https://example.com/alignment2]) + # end + + # it "raises if an unknown header is present" do + # csv = <<~CSV + # Question ID,Title,Tag:Subject,Question Text,Option A, Option B,Option C,Correct Answer,Color + # Q001,Question 1,Capitals,What is the capital of France?,Paris, Versailles,Bordeaux,A, + # CSV + # expect do + # described_class.new(StringIO.new(csv)).convert + # end.to raise_error(StandardError, "Unknown column: Color") + # end + + it "raises if no options are given" do + modified_rtf_file = Tempfile.new("modified.rtf") + # Copy the original RTF content and remove the options + original_content = File.read("spec/fixtures/simple.rtf") + # Remove lines that look like options (e.g., "a) Paris", "b) Versailles", etc.) while keeping the rest of the content intact. This regex looks for lines that start with a letter followed by a parenthesis and some text, which is the typical format for options in ExamSoft RTF exports. + modified_content = original_content.gsub(/[a-o]\)\s*[^\}]*/, "") + modified_rtf_file.write(modified_content) + modified_rtf_file.rewind + + expect do + described_class.new(modified_rtf_file).convert + end.to raise_error(StandardError, /Missing options/) + end + + it "raises if no correct answer is given" do + modified_rtf_file = Tempfile.new("temp.rtf") + # Copy the original RTF content and remove only the asterisks marking correct answers + original_content = File.read("spec/fixtures/simple.rtf") + # Remove the asterisks (*) that mark correct answers, keeping the options + modified_content = original_content.gsub(/\*([a-o]\))/, '\1') + modified_rtf_file.write(modified_content) + modified_rtf_file.rewind + + expect do + described_class.new(modified_rtf_file).convert + end.to raise_error(StandardError, /Missing correct answer/) + end + end +end diff --git a/spec/atomic_assessments_import/csv/questions/multiple_choice_spec.rb b/spec/atomic_assessments_import/questions/multiple_choice_spec.rb similarity index 100% rename from spec/atomic_assessments_import/csv/questions/multiple_choice_spec.rb rename to spec/atomic_assessments_import/questions/multiple_choice_spec.rb diff --git a/spec/atomic_assessments_import/csv/questions/question_spec.rb b/spec/atomic_assessments_import/questions/question_spec.rb similarity index 100% rename from spec/atomic_assessments_import/csv/questions/question_spec.rb rename to spec/atomic_assessments_import/questions/question_spec.rb diff --git a/spec/atomic_assessments_import/csv/utils_spec.rb b/spec/atomic_assessments_import/utils_spec.rb similarity index 100% rename from spec/atomic_assessments_import/csv/utils_spec.rb rename to spec/atomic_assessments_import/utils_spec.rb diff --git a/spec/fixtures/simple.docx b/spec/fixtures/simple.docx new file mode 100644 index 0000000000000000000000000000000000000000..adf4590a3b5d3985513451b7ddef2fb2f94563cd GIT binary patch literal 6042 zcmaKQ2UJtr)-}Boigc7Fy#*4Qh$tXPkrwG4AwUoay*KGq2vv|4ibzMQ^h@u(NePI6 zA_@oyBJ~U2cb{DU|G6@9Ml!~kC)sQ5wdY(@Lj@a$3WI=v0HYhAV1#icDA1oR;T9Us za7P!Q1>6}b=xJ|f4>5*!uu%CeSJ-&Mk!}of^XYcc>R|m`tHC8YBR)eU&e&!fbJ;xX z!@?crjKP-pQ9$vtL$gdf*QXO9IRql`kSXn_GbLV9@`(M1#5&>(M6NjDR_k&jr0cPv z;tO<@CPC4%@$N}4u<TpJsVsWybDpaN$>BRq>UH}D)^&vKGV5TD5Tu#mW)Xg%-^Q{4 zv*)_xzmv<Kn`<N^zb;2gRO2C`3OW6FV3yqElcQUnp?sY8v}q<<;tiP^pr_70(S6FW z^X$>-Yft0DydH>0$@{<oBgX*&q2;%r-dUC@tn2j?N0obto^m)wVB@V0f90`V?>R%f z;sxK~4AT(tc|~&d-gu|*+wh=bFl{Ks2JBt*H)(Hw{b9{CgdJrfKj*5pWWj0-<=&iV z3+cvf!?%OmB}s99lbG)H?oAc&cjqD#7_wUh6)X2m2LhH0PjNL=@ChVK?iE8YF)*UA zFfcU!XF<rX7R21q@v<J7nuBVcLgdZ+)S6w%ZwXaju*|-~Vg;~}hxC%#Z>e5OdYfWT zetKs?Cn;kW)>EA|cM!E-;iM=%9=?<gO}Ca(9pf(Xsn&m~K0K|I%*h%wE~FPc_>ED{ zTQIJxr5@n-MVNiftOK<5*ws1)cY1|@_2yjsguz`|_D%7jI{S*R6UA{9s+PDPUBXN$ zVio+}1HwKn?S28D&uSUgZULplm!F;H+c>&n-%koUYwuB@OZUH@wx1A!P0jZ)%tR*t zE<*fleibDcLJie{R+mb&;<|l5I<7OgV`MaH-Jky<_JijG`PeBVY>d%aH{($n`N3%{ zwUYEzZveF4u;qDCg9Gl1m-czt-xuP&(wrolf|hSPkuj2hXzj?lq7TwN8_4a*y5;ei zwDR$*BnuiieREsBz4md<>B}oWs<F-0NDq(2pVhqodRR(>Ej+ZJ`LY-nri|+9=wc}R zu^7bYVwgHZ?OcF@m)EicRTXR*lCYgKYPoHc9bZJ>$RV37ob3QVB<;)g*a$utR(7!@ zL)54dmDAhowg2_{g#E~Qj#@@^v`l9)OvXuQyiv&3bKwq<*=!Qns4E>mrJQEOTf7vK zYfh;Y7t#IjOOc4M)2kP(R_c&msmE;9g$O59-S_7Kpqy;s=^5KT!zQfS6U9?;F>jhG z_7F}hlb1#n)e?f#YF?J(W_eykPCsl#l1V&`ZO(j<5+oyEABj8|l_~ey)tAt#t54=G zFjVlkfQi6;2b+8C#!cQw59fByF@DRw%*e(V=FK_udhh%(BiGS)c)*=4fq$n3eRR0l zLmgZ%cj@0Lf(#OMpttG?wfT-%SyY6`0b7u65i|MZ7WM)=d&$N!qgG8&L^zYR*lqAb zF=OM#l+5uTJvNw1RM>K5563r@|B_ygp{*QH_yjOiT8XS+{H*_W+~l6BWom|xwgs1; zCI6-nPwZ!Q+znob0?7fQAMDRQE*^-ooQzp6$ObOZ5LTYr?lRjyN{Q{L)hq;v`n`Ua zRW5Lx!)+YO{_bgB4)$r&nWf^%gw4P@vfCqw_JFU;bqo;CeF|+0<=W@#u<IkBa`(Bh zxDxy@i(Y}}LZb2u-eq>&EsO1^&}&cqV|K{?$&QPwmmSpQGAqM6?auQ;<SpO%q#Cbb zD=CFVCL(Lngc2vzl%((V5$oJkDQB16nDv0#t(s*>gw*wO*ytLx_`%mv`-8oDo@6<s zx*(R!Jh9z-AZq7ZWPPYD?awiuJrH^bk{!nqH_XVySHko#)|gO;DJ%758;PQ&{ls^_ zPPT}mTM$CUZ^KE?in<H3PE-my@AKjHm>uaIq1>B3_9m$$gn!D=!s^a|L8pe8gsD$# z#a2lsG(4Lp)dITN$jGZX?>#Qwp;aPY<(S&zp<OJ82z5p42Z*F9PZs@UM?u!`myukz zbZ3lUh^RwR+h@D9vWWm)WrJ0LlPRGQZ0F9x9O34KB^;4#3Rih-n61&h*c)_e*dR@3 zhX!$aNiZ0b-bO9dgX`8N*3vBDF3RtF4u^wh4jT)qw*OR+gsTB+Q)eGE;N^fl*Z4xJ z<=kV6;aMhZK(32ncgE%m;8_3Vx%+wlA#aN(f}J6hiNCbbgoC0FD}0)`4~tX(FK2=_ zSq}r4=X^Qk6gx6~L{yb6T-YtPBQPejnT=Xyf^M$~<dXiqqNE1rlyZQBY}z@Z5yr81 z6U<9esxE=&?BZj8-0zItN@PFiHKik4Y^rqr_{?@VjZ&F5VxsNBgro>Z721fmDJ_s0 zr7=Fg;s*Lq^z^GC0^(k()7x28)q3VtB|gODL3U4}J|$-Rgf&>aAm7IwL;2Ygo?HHR zlSEkaW@AWbSQ&?c{AGKRAv(Ooor_O+Yt{^#C$fkL3q}t=%lINzi0P_s1dw4Jl8pro zo(xPg#+gE@*oO2uhjL$G@zY?!Clv^4Ln@gJ-5v<^R6};ZgznDuYC+3R4eCrF4d%+8 zH4SHpM7s5F%KaXH$LN3K%CfT;!G(+|G`1p=2z~lQ+8#e?BwqWvWYd-cS(TKiCd~Fj z(NHzaJX!77E*y0pZ|^oN?zcz;sK<2zz&p3W32T$uGCVXir+RPnli%mdgNWJgY#}ID z)sLj-y1%z7p&TEna*J*cti$M&d)kZ}XNzipQ>>~+v07tQL+6w~ZWl{4yTIeW?XvZN zL_|e*ichIUO-+%X)rP`f?k3d(O>fe7ScI0<9+(-O-Tq;$*{IJfbUG4sK(xa&=S2T9 zb;Xk+KCWd_L7o2sDtL}&QG9}7yZ}5jb6`ZGd+&d<D&;?oX$5z1)iQr<_fzLgCyqLF z2$A&c_(XAgC`Q-7in(njY9_nPEAfSjRBo=TQ{#h{{E?x98<?0cY78kCvI9m^AAU4d zsJ`J18A*|1w$4B%mqXki?Tt<^Y@~sS%=pGe9`}D&^p*B`-jnMtMy|4+5~a7t8B#|c zWEqTS^TnWsN9Q)2CBJMrV#9aZn+w5R#3;b2f5=Y4Js1Z%r=-`NReoQRXc#Q^jg3h( zL<Cc`m_z#Z#A`i0Awus05=uieOXiWlElNx5tgrFa*v5POq)W^}Bpa!8%1p^+M){i3 zUKx(#MM0Y00mZwSdbBp$@xz~d@lDzdR$1A-lfe1wOQo4i><vB@sY#&4%_t-fZ!VwA z(lmpgHI-qmu3ImTBtPU^{{AGNa93uw3tTR9X)_zOc)|826=nwOt7h7>Ds(_LuSH*^ zTleA)w_;wFVBL7tSPspl>bQR@0rfv6aJ7NjLxDf9!j~r`Dp?i2AcSVs6Y8MtlJ{>2 zCs@>k`H9`RE9L4?^l5d|idC#dTYEc&eQNJ3kexzi8@`c>cYNieVp<w6knCOx7V<^c z7*G$nWrhc}Hl&cttq#jT?$-cUqN=IqV>b=F+x+W;3ivcwcUR^GS&^YG(+R?ZEsS4O z^^u1@`Y<X9czQ+wo=J0Qf}LdAP#P8zb`9>&)j9-fcY3tP6*@o7hQ}e1-6(J$$}^th z`^*$earPtoT_q{`w~4_j)vK)g>3-xs!DWHp$TR&*`@b_6u#U1^lO!aAex`21m>_N7 zmm;0T_+);ZzEm^A=zOOI1LI?B(#qU8)_bMJN`Y4<&(_FCcX9^nzpY}DImO&LI~rI9 z9EBG89M|jmi@p1Sy3w|%L|mM@PvOrX|8@miOA$$yC;rU(ILNZ#R4QuJih}=;)W1OQ ziSMT8mSpEt`c2%pf;n#%k=fB#oZEWlc#AH~F&Iip9M^$$IU!QVR6HxVbkQT?`>rDV z6b8bKz9Bp6miY6nO#NHg@>M49l{dr<x<#F49JFj+*fPovWZqWxtwbssDkB_Pz;u{R zG4k3+9HddHne&@CXD+y6{3@H}@+Op$an^+;!2@kDU-%hB8zx&e!h@N1GK%TJN#X)6 zT?iD|r0P&|3r}*iN&lYc|6-st;MQ|!9||a?JGnE>me&-(Y-N$QwZC<7tL>~q?<0H= zkNL3L`s~`}20N_W4PrtQ?>NDqHrVAT^HZ9*KwVv74%R>E7pf`qw{(u`^VqDxzo$7# z1fa%$H^Ij4U}!z5*JU!eB$6~+t;(Pox$14@ZZ$J_Q2(U*W{+ims;%^(cG!{z^bD!B z(=@ylJU|&-k;4&JAZZTyJUv_mqy!Q3L95yebAj5O*NMs!VPi7sn)iTus^gf0bYzW% zdZCJVk0^_t9I%b<)L?&}R@ONuZzu&~DFkP6sAci_;qjItpLpS)2s|PmpqPiCfPzA< z4@V2fa`01_u8q9zM2ww2&4yU88LfpGVKDh_#2)dm`TE@_z~pnX6E=QbXYpk2U3>qX z9^Iz;vh_(m?N>OPxn~}v)~fmUMMv-F7L}K^uQ1svgH1D5H$F@88!X0uq@|O>U(yeZ zKN8lbD{Lw2`kI?{4m}{g%<-t!ej^9Ei5JoD|9y`BH^*0n-<H9G^MV+;-;S=?(T8UO z%2AU_5(uf*7}3lgCN9XPHGxq=giIiIV9Ip3zYlG*l9SeK{Zx|NT|G8I{3f43+-HB0 z;fois%%Y`^^BSqQUtFLi?;427%hamSXGiKWe=;kzaz~$G&@P~-%~uZx`PhqSg7SVm zr^Zd-EJ%p|X;OMi61XfT>I>5&SWa#P_?2{XaX6D#WUc<Y6YavRfqRdQ^Ieq%q&MFJ zn=-U4esua@h?@rI&}@kkjy{Za_nSSMEqb*eunpdn{UkIQ^;DQHLMv(YWzry?)mU42 zC!p{X0%g6XG1)yIzs%qa(V2;$w~h<!G7+1t1P%J)PaI9ineq}5Jt8+BYlBXSAm<ni zPDN@Wb-Z-)3cl|F3yyh5joyMYXj}x}7}J!O$1r@kJ6vq|)Km%px2FUwNt2kp#mMJB zx|5<j+3?qTL<;#wCYk&2m=fpmj)Ie+r~ul=qv!Jl7*z!2MRZ;t1zkJ#qi3Yn+`QWO z--EgmLI&O_Vm>FM!Q{lSFWF(~e7U8o;~{8}6xw**)kIfbi|$p}Z81h=v~&^Ip$dS8 z{wNW=0Wgj<eR>jWeH_48$#A$m#$^80+R-Aw)A67vVWvTWbk%ol*pnzMqcg}?hbI}p zdn?IGF7|?Lv1WEjUc=7pX+L-8K@v<Tr=p!Po0lR8(1)W{3QOT9bO^G{b*xCz0<nM{ zaB;d==e(or5VNc5x9IAl6WZ$<E7K&L9}09S1M8o^fJt8v4ITH2tkS%$n??+iY&igh zxfZtu8~(bc&=BWP!gW#Qj(2+vgiLXiv%?BW-j90~kzt#90O(s@3$iV3o$lc1)uLAc zC9lh_hY3lAl}R4<WpjhLX|bYObs+-m?H1Qp=<;HSYAoyT-RcMijGgw>M%irUY+8BZ zVdt{;lun;;enrikO4fhat#B>OfZssf#YXLC){2a4o`W?;+|y&0DbD8!1H^0%Wpq{a zy}lN~>xMM?vTh{UPc)MqR*g4xZq)>EJYn=c1wI1R>N(7_7HtpgSJyETs5KMto)IdJ zal_8<MY6HPJSEBGwQV|M)Ly{R$R8+3w11F;o63$18^T=MN}&qX7WC?3E%l-3MCCF! z2Q2Tba0+nnynL*<;jwDexS@^Wr4O$H;@lc_Gg^Q;muU;4P?V7@{6Th$-~8N#XYI-2 zUdX9AB=6^2tgR0hQy~n!ZrlLEb;@*fBFiiasIA=E;VAQiWbVjh8_!8+m5Y5rI}QfH zWE$3@iWJH9uNJZa;TfLQlgH0ZIWssyY+1O4qru*O^YCutHRG4J%k~k54}lTG<zf-J za(?m!G8C)M6NHKnl^G3gG%?rPyz+kAW!uf@#qsLdjizo~THazX33Xi2BFFw>LL9*s zj@tBd(2JWUp$mkWG|#n(Gi#M5#`pHDsB3Z-d1%*M!0-2}4O8?85fd85m}7lGOkTlf z&)yVn=JKjIu#aF=YfnOF;yDqO#0<J$^`+df`Qu|(9NWye+IQf~(~Fk7^ltT}SZvYM z+f|s^B5{eey%P1fdC-6y^L;bzI$Ii^T3a#Qnc|w_(lE)<Nqv}Y5}PkUj5u7iF;yK{ zQ^ok$a=hSL`M9x%P3dN6*6`q_#_Pxr+d2c??d=L8v@`UqZ$gM2^dWCol=Wl}YNT|& zm3YoNefvy%=WN>N?P|bJ2|Brm_v#b}17k?wPe<o9G(8#0q2(ufTsGD6a)i1VU#g#4 zttK=<QK5cBaq}rJf>=T(+}{byhVNnav?`JA${pInF0wwe4`_tqaPuR+_?Sr6Jt_q> zsvW)yuChXm?CYloQt0qL5|~IGws21@PoeGpu))F}B5ICspg;*&$k@s3#~b2@aTXk; z-xToczD~Q-jO8x+!We8$8q`jbr_jj1ja+MDNDRleuWv1o=|>dy8%M{wM<?CZ<!eW< ze2Gu4!+tFjDULr)X!_|nlAgpRgZ^dLnR!^aWK8}dqZj1Cj5gw0b$dve^GX7SsYF-5 zBB{Vmn%WQ_VSl=h@B4I(hX$_?XK*(c<fH|`3E(R51KD?)GTE+Ei2MKtn?VR6OKc{m zGT=29;ogfS<{1^5uX=J43zD{ol0fo}`OlJYH?FPK*u}52$F%n992*5(ax8abZQAy$ zT|_{Wwb*Rkr*ZFZulEzQbY(x-;7aG4Tz|u@w~uY)k9EmvI@CML253eD{*<PeSX3Cl zY|yKI+$FF5JN_S2R72(8@T(5erD^vo5a=KLFZ@3i-tXwEZp)?1^edRq!^{=W>385& zBjVDG`4vLwE%+yE=JzLEot!R>i(jFJ&eq>b>c2L|@8GMN|8ltc6(7+e?I-wOqt@^E ztCH?=GWZp?ME}A6J}3PCh^va_-}M_L`RD&%QG&n2uXgRf;XuHD;8#V?@9?WT>$0_f cg)RE)|EJM4RB-VwZz4j!0?=X>_vf<z4~Up6X8-^I literal 0 HcmV?d00001 diff --git a/spec/fixtures/simple.html b/spec/fixtures/simple.html new file mode 100644 index 0000000..d4b0356 --- /dev/null +++ b/spec/fixtures/simple.html @@ -0,0 +1,66 @@ +<!DOCTYPE html> +<html> +<head> + <meta http-equiv="content-type" content="text/html; charset=utf-8"/> + <title> + + + + + +

+
+ +

+

Folder: +Geography and Governance Title: Question 1 Category: +Subject/Capitals, Difficulty/Normal 1) What is the capital of France? +~ Paris is the capital and most populous city of France. Versailles +is known for its palace, and Bordeaux is famous for its wine region.

+

*a) +Paris

+

b) +Versailles

+

c) +Bordeaux

+


+
+ +

+

Type: +MA Folder: Geography and Governance Title: Question 2 Category: +Geography/US States, Test plan/General Knowledge 2) What is the +capital of Germany? ~ Berlin has been the capital of a reunified +Germany since 1990. Bonn served as the capital of West Germany, and +Brandenburg is the surrounding federal state.

+

*a) +Berlin

+

b) +Bonn

+

c) +Brandenburg

+


+
+ +

+

Folder: +Geography and Governance Title: Question 3 Category: Geography/World +Capitals, Test plan/General Knowledge 3) Which are US state capitals? +~ Little Rock is the capital of Arkansas and Denver is the capital of +Colorado. Detroit is the largest city in Michigan, but Lansing is the +actual capital.

+

*a) +Little Rock

+

*b) +Denver

+

c) +Detroit

+ + \ No newline at end of file diff --git a/spec/fixtures/simple.rtf b/spec/fixtures/simple.rtf new file mode 100644 index 0000000..e337800 --- /dev/null +++ b/spec/fixtures/simple.rtf @@ -0,0 +1,56 @@ +{\rtf1\ansi\deff3\adeflang1025 +{\fonttbl{\f0\froman\fprq2\fcharset0 Times New Roman;}{\f1\froman\fprq2\fcharset2 Symbol;}{\f2\fswiss\fprq2\fcharset0 Arial;}{\f3\froman\fprq2\fcharset0 Liberation Serif{\*\falt Times New Roman};}{\f4\fswiss\fprq2\fcharset0 Liberation Sans{\*\falt Arial};}{\f5\fnil\fprq2\fcharset0 Arial Unicode MS;}{\f6\fswiss\fprq0\fcharset128 Arial Unicode MS;}} +{\colortbl;\red0\green0\blue0;\red0\green0\blue255;\red0\green255\blue255;\red0\green255\blue0;\red255\green0\blue255;\red255\green0\blue0;\red255\green255\blue0;\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green128\blue0;\red128\green0\blue128;\red128\green0\blue0;\red128\green128\blue0;\red128\green128\blue128;\red192\green192\blue192;} +{\stylesheet{\s0\snext0\widctlpar\hyphpar0\ltrpar\kerning1\cf0\rtlch\af5\afs24\alang1081\ltrch\hich\af3\afs24\alang1033\dbch\af7\langfe2052\loch\f3\fs24\lang1033 Normal;} +{\s15\sbasedon0\snext16\sb240\sa120\keepn\rtlch\af5\afs28\ltrch\hich\af4\afs28\dbch\af5\loch\f4\fs28 Heading;} +{\s16\sbasedon0\snext16\sl276\slmult1\sb0\sa140 Body Text;} +{\s17\sbasedon16\snext17\rtlch\af6\ltrch List;} +{\s18\sbasedon0\snext18\sb120\sa120\noline\rtlch\af6\afs24\ai\ltrch\fs24\i caption;} +{\s19\sbasedon0\snext19\noline\rtlch\af6\ltrch Index;} +}{\*\listtable{\list\listtemplateid1 +{\listlevel\levelnfc255\leveljc0\levelstartat1\levelfollow2{\leveltext \'00;}{\levelnumbers;}\fi0\li0} +{\listlevel\levelnfc255\leveljc0\levelstartat1\levelfollow2{\leveltext \'00;}{\levelnumbers;}\fi0\li0} +{\listlevel\levelnfc255\leveljc0\levelstartat1\levelfollow2{\leveltext \'00;}{\levelnumbers;}\fi0\li0} +{\listlevel\levelnfc255\leveljc0\levelstartat1\levelfollow2{\leveltext \'00;}{\levelnumbers;}\fi0\li0} +{\listlevel\levelnfc255\leveljc0\levelstartat1\levelfollow2{\leveltext \'00;}{\levelnumbers;}\fi0\li0} +{\listlevel\levelnfc255\leveljc0\levelstartat1\levelfollow2{\leveltext \'00;}{\levelnumbers;}\fi0\li0} +{\listlevel\levelnfc255\leveljc0\levelstartat1\levelfollow2{\leveltext \'00;}{\levelnumbers;}\fi0\li0} +{\listlevel\levelnfc255\leveljc0\levelstartat1\levelfollow2{\leveltext \'00;}{\levelnumbers;}\fi0\li0} +{\listlevel\levelnfc255\leveljc0\levelstartat1\levelfollow2{\leveltext \'00;}{\levelnumbers;}\fi0\li0}\listid1} +}{\listoverridetable{\listoverride\listid1\listoverridecount0\ls1}}{\*\generator LibreOffice/25.8.4.2$MacOSX_AARCH64 LibreOffice_project/290daaa01b999472f0c7a3890eb6a550fd74c6df}{\info{\creatim\yr2026\mo2\dy5\hr18\min59}{\revtim\yr2026\mo2\dy5\hr19\min5}{\printim\yr0\mo0\dy0\hr0\min0}}{\*\userprops}\deftab709 +\hyphauto1\viewscale100\formshade\nobrkwrptbl\paperh15840\paperw12240\margl1134\margr1134\margt1134\margb1134\sectd\sbknone\sftnnar\saftnnrlc\sectunlocked1\pgwsxn12240\pghsxn15840\marglsxn1134\margrsxn1134\margtsxn1134\margbsxn1134\ftnbj\ftnstart1\ftnrstcont\ftnnar\fet\aftnrstcont\aftnstart1\aftnnrlc +{\*\ftnsep\chftnsep}\pgndec\pard\plain \s0\widctlpar\hyphpar0\ltrpar\kerning1\cf0\rtlch\af5\afs24\alang1081\ltrch\hich\af3\afs24\alang1033\dbch\af7\langfe2052\loch\f3\fs24\lang1033\ql\ltrpar{ +Folder: Geography and Governance Title: Question 1 Category: Subject/Capitals, Difficulty/Normal 1) What is the capital of France? ~ Paris is the capital and most populous city of France. Versailles is known for its palace, and Bordeaux is famous for its wine region.} +\par \pard\plain \s0\widctlpar\hyphpar0\ltrpar\kerning1\cf0\rtlch\af5\afs24\alang1081\ltrch\hich\af3\afs24\alang1033\dbch\af7\langfe2052\loch\f3\fs24\lang1033\ql\ltrpar{ +*a) Paris} +\par \pard\plain \s0\widctlpar\hyphpar0\ltrpar\kerning1\cf0\rtlch\af5\afs24\alang1081\ltrch\hich\af3\afs24\alang1033\dbch\af7\langfe2052\loch\f3\fs24\lang1033\ql\ltrpar{ +b) Versailles} +\par \pard\plain \s0\widctlpar\hyphpar0\ltrpar\kerning1\cf0\rtlch\af5\afs24\alang1081\ltrch\hich\af3\afs24\alang1033\dbch\af7\langfe2052\loch\f3\fs24\lang1033\ql\ltrpar{ +c) Bordeaux} +\par \pard\plain \s0\widctlpar\hyphpar0\ltrpar\kerning1\cf0\rtlch\af5\afs24\alang1081\ltrch\hich\af3\afs24\alang1033\dbch\af7\langfe2052\loch\f3\fs24\lang1033\ql\ltrpar + +\par \pard\plain \s0\widctlpar\hyphpar0\ltrpar\kerning1\cf0\rtlch\af5\afs24\alang1081\ltrch\hich\af3\afs24\alang1033\dbch\af7\langfe2052\loch\f3\fs24\lang1033\ql\ltrpar + +\par \pard\plain \s0\widctlpar\hyphpar0\ltrpar\kerning1\cf0\rtlch\af5\afs24\alang1081\ltrch\hich\af3\afs24\alang1033\dbch\af7\langfe2052\loch\f3\fs24\lang1033\ql\ltrpar{ +Type: MA Folder: Geography and Governance Title: Question 2 Category: }{ +Subject/Capitals, Subject/Geography}{ + 2) What is the capital of Germany? ~ Berlin has been the capital of a reunified Germany since 1990. Bonn served as the capital of West Germany, and Brandenburg is the surrounding federal state.} +\par \pard\plain \s0\widctlpar\hyphpar0\ltrpar\kerning1\cf0\rtlch\af5\afs24\alang1081\ltrch\hich\af3\afs24\alang1033\dbch\af7\langfe2052\loch\f3\fs24\lang1033\ql\ltrpar{ +*a) Berlin} +\par \pard\plain \s0\widctlpar\hyphpar0\ltrpar\kerning1\cf0\rtlch\af5\afs24\alang1081\ltrch\hich\af3\afs24\alang1033\dbch\af7\langfe2052\loch\f3\fs24\lang1033\ql\ltrpar{ +b) Bonn} +\par \pard\plain \s0\widctlpar\hyphpar0\ltrpar\kerning1\cf0\rtlch\af5\afs24\alang1081\ltrch\hich\af3\afs24\alang1033\dbch\af7\langfe2052\loch\f3\fs24\lang1033\ql\ltrpar{ +c) Brandenburg} +\par \pard\plain \s0\widctlpar\hyphpar0\ltrpar\kerning1\cf0\rtlch\af5\afs24\alang1081\ltrch\hich\af3\afs24\alang1033\dbch\af7\langfe2052\loch\f3\fs24\lang1033\ql\ltrpar + +\par \pard\plain \s0\widctlpar\hyphpar0\ltrpar\kerning1\cf0\rtlch\af5\afs24\alang1081\ltrch\hich\af3\afs24\alang1033\dbch\af7\langfe2052\loch\f3\fs24\lang1033\ql\ltrpar + +\par \pard\plain \s0\widctlpar\hyphpar0\ltrpar\kerning1\cf0\rtlch\af5\afs24\alang1081\ltrch\hich\af3\afs24\alang1033\dbch\af7\langfe2052\loch\f3\fs24\lang1033\ql\ltrpar{ +Folder: Geography and Governance Title: Question 3 Category: Geography/World Capitals, Test plan/General Knowledge 3) Which are US state capitals? ~ Little Rock is the capital of Arkansas and Denver is the capital of Colorado. Detroit is the largest city in Michigan, but Lansing is the actual capital.} +\par \pard\plain \s0\widctlpar\hyphpar0\ltrpar\kerning1\cf0\rtlch\af5\afs24\alang1081\ltrch\hich\af3\afs24\alang1033\dbch\af7\langfe2052\loch\f3\fs24\lang1033\ql\ltrpar{ +*a) Little Rock} +\par \pard\plain \s0\widctlpar\hyphpar0\ltrpar\kerning1\cf0\rtlch\af5\afs24\alang1081\ltrch\hich\af3\afs24\alang1033\dbch\af7\langfe2052\loch\f3\fs24\lang1033\ql\ltrpar{ +*b) Denver} +\par \pard\plain \s0\widctlpar\hyphpar0\ltrpar\kerning1\cf0\rtlch\af5\afs24\alang1081\ltrch\hich\af3\afs24\alang1033\dbch\af7\langfe2052\loch\f3\fs24\lang1033\ql\ltrpar{ +c) Detroit} +\par } \ No newline at end of file From 42f73a68f765a4b6a13d6b08da2396a05b0ae49c Mon Sep 17 00:00:00 2001 From: Jacob Schwartz Date: Fri, 6 Feb 2026 14:51:50 -0700 Subject: [PATCH 06/30] Add tests for DOCX and HTML; include sample documents and error handling --- .../examsoft/docx_converter_spec.rb | 199 +++++++++++++++++ .../examsoft/html_converter_spec.rb | 204 ++++++++++++++++++ ...onverter_spec.rb => rtf_converter_spec.rb} | 4 +- spec/fixtures/no_correct.docx | Bin 0 -> 5843 bytes spec/fixtures/no_options.docx | Bin 0 -> 5803 bytes spec/fixtures/simple.docx | Bin 6042 -> 5546 bytes spec/fixtures/simple.html | 95 ++++---- 7 files changed, 454 insertions(+), 48 deletions(-) create mode 100644 spec/atomic_assessments_import/examsoft/docx_converter_spec.rb create mode 100644 spec/atomic_assessments_import/examsoft/html_converter_spec.rb rename spec/atomic_assessments_import/examsoft/{converter_spec.rb => rtf_converter_spec.rb} (98%) create mode 100644 spec/fixtures/no_correct.docx create mode 100644 spec/fixtures/no_options.docx diff --git a/spec/atomic_assessments_import/examsoft/docx_converter_spec.rb b/spec/atomic_assessments_import/examsoft/docx_converter_spec.rb new file mode 100644 index 0000000..8356f42 --- /dev/null +++ b/spec/atomic_assessments_import/examsoft/docx_converter_spec.rb @@ -0,0 +1,199 @@ +# frozen_string_literal: true + +require "atomic_assessments_import" + +RSpec.describe AtomicAssessmentsImport::ExamSoft::Converter do + describe "#convert" do + before(:all) do + @data = described_class.new("spec/fixtures/simple.docx").convert + end + + it "converts a simple DOCX file" do + path = "spec/fixtures/simple.docx" + data = described_class.new(path).convert + + expect(data[:activities]).to eq([]) + expect(data[:items].length).to eq(3) + expect(data[:questions].length).to eq(3) + expect(data[:features]).to eq([]) + + item1 = data[:items].find { |i| i[:title] == "Question 1" } + expect(item1).not_to be_nil + expect(item1[:reference]).not_to be_nil + + item2 = data[:items].find { |i| i[:title] == "Question 2" } + expect(item2).not_to be_nil + expect(item2[:reference]).not_to be_nil + + item3 = data[:items].find { |i| i[:title] == "Question 3" } + expect(item3).not_to be_nil + expect(item3[:reference]).not_to be_nil + + question1 = data[:questions].find { |q| q[:data][:stimulus] == "What is the capital of France?" } + expect(question1).not_to be_nil + expect(question1[:reference]).to eq(item1[:questions][0][:reference]) + + question2 = data[:questions].find { |q| q[:data][:stimulus] == "What is the capital of Germany?" } + expect(question2).not_to be_nil + expect(question2[:reference]).to eq(item2[:questions][0][:reference]) + + question3 = data[:questions].find { |q| q[:data][:stimulus] == "Which are US state capitals?" } + expect(question3).not_to be_nil + expect(question3[:reference]).to eq(item3[:questions][0][:reference]) + end + + it "converts a DOCX from a Tempfile" do + docx = Tempfile.new("temp.docx") + original_content = File.read("spec/fixtures/simple.docx") + docx.write(original_content) + docx.rewind + data = described_class.new(docx).convert + + + expect(data[:activities]).to eq([]) + expect(data[:items].length).to eq(3) + expect(data[:questions].length).to eq(3) + expect(data[:features]).to eq([]) + + item1 = data[:items].find { |i| i[:title] == "Question 1" } + expect(item1).not_to be_nil + expect(item1[:reference]).not_to be_nil + + item2 = data[:items].find { |i| i[:title] == "Question 2" } + expect(item2).not_to be_nil + expect(item2[:reference]).not_to be_nil + + item3 = data[:items].find { |i| i[:title] == "Question 3" } + expect(item3).not_to be_nil + expect(item3[:reference]).not_to be_nil + + end + + it "sets the title and source" do # Currently the converter doesn't set the description since ExamSoft RTF doesn't have a field that maps to it, but we can still test that the title is set correctly and that the source is tagged as coming from ExamSoft. + item1 = @data[:items].find { |i| i[:title] == "Question 1" } + expect(item1).not_to be_nil + # expect(item1[:description]).to eq("This is a question about France") + expect(item1[:title]).to eq("Question 1") + expect(item1[:source]).to match(/ExamSoft Import/) + end + + it "sets tags" do + item1 = @data[:items].find { |i| i[:title] == "Question 1" } + expect(item1).not_to be_nil + expect(item1[:tags]).to eq( + { + Subject: ["Capitals"], + Difficulty: ["Normal"], + } + ) + end + + it "sets duplicate tags" do + item2 = @data[:items].find { |i| i[:title] == "Question 2" } + expect(item2).not_to be_nil + expect(item2[:tags]).to eq( + { + Subject: %w[Capitals Geography], + } + ) + end + + # it "sets external id metadata" do + # csv = <<~CSV + # Question ID,Title,Tag:Subject,Question Text,Option A, Option B,Option C,Correct Answer + # Q001,Question 1,Capitals,What is the capital of France?,Paris, Versailles,Bordeaux,A + # CSV + # data = described_class.new(StringIO.new(csv)).convert + # item1 = data[:items].find { |i| i[:title] == "Question 1" } + # expect(item1).not_to be_nil + # expect(item1[:metadata][:external_id]).to eq("Q001") + # expect(item1[:metadata][:external_id_domain]).to eq("csv") + # expect(item1[:metadata][:import_type]).to eq("csv") + # expect(item1[:source]).to match(/External.*Q001/) + # end + + # it "sets alignment tags" do + # csv = <<~CSV + # Question ID,Title,Tag:Subject,Question Text,Option A, Option B,Option C,Correct Answer,Alignment URL + # Q001,Question 1,Capitals,What is the capital of France?,Paris, Versailles,Bordeaux,A,"https://example.com/alignment" + # CSV + # data = described_class.new(StringIO.new(csv)).convert + # item1 = data[:items].find { |i| i[:title] == "Question 1" } + # expect(item1).not_to be_nil + # expect(item1[:tags]).to eq( + # { + # Subject: ["Capitals"], + # lrn_aligned: ["ff8a5caa-0f2a-5a53-a128-c8c3e99768a8"], + # } + # ) + # expect(item1[:metadata][:alignment]).to eq(%w[https://example.com/alignment]) + # end + + # it "sets multiple alignment tags" do + # csv = <<~CSV + # Question ID,Title,Tag:Subject,Question Text,Option A, Option B,Option C,Correct Answer,Alignment URL,Alignment URL + # Q001,Question 1,Capitals,What is the capital of France?,Paris, Versailles,Bordeaux,A,https://example.com/alignment,https://example.com/alignment2 + # CSV + # data = described_class.new(StringIO.new(csv)).convert + # item1 = data[:items].find { |i| i[:title] == "Question 1" } + # expect(item1).not_to be_nil + # expect(item1[:tags]).to eq( + # { + # Subject: ["Capitals"], + # lrn_aligned: %w[ff8a5caa-0f2a-5a53-a128-c8c3e99768a8 f7d26914-3e2b-5c9c-a550-ce9c853f0c09], + # } + # ) + # expect(item1[:metadata][:alignment]).to eq(%w[https://example.com/alignment https://example.com/alignment2]) + # end + + # it "sets alignment tags when one is empty" do + # csv = <<~CSV + # Question ID,Title,Tag:Subject,Question Text,Option A, Option B,Option C,Correct Answer,Alignment URL,Alignment URL + # Q001,Question 1,Capitals,What is the capital of France?,Paris, Versailles,Bordeaux,A,,https://example.com/alignment2 + # CSV + # data = described_class.new(StringIO.new(csv)).convert + # item1 = data[:items].find { |i| i[:title] == "Question 1" } + # expect(item1).not_to be_nil + # expect(item1[:tags]).to eq( + # { + # Subject: ["Capitals"], + # lrn_aligned: %w[f7d26914-3e2b-5c9c-a550-ce9c853f0c09], + # } + # ) + # expect(item1[:metadata][:alignment]).to eq(%w[https://example.com/alignment2]) + # end + + # it "raises if an unknown header is present" do + # csv = <<~CSV + # Question ID,Title,Tag:Subject,Question Text,Option A, Option B,Option C,Correct Answer,Color + # Q001,Question 1,Capitals,What is the capital of France?,Paris, Versailles,Bordeaux,A, + # CSV + # expect do + # described_class.new(StringIO.new(csv)).convert + # end.to raise_error(StandardError, "Unknown column: Color") + # end + + it "raises if no options are given" do + no_options = Tempfile.new("temp.docx") + # Copy the original DOCX content and remove the options + original_content = File.read("spec/fixtures/no_options.docx") + no_options.write(original_content) + no_options.rewind + + expect do + described_class.new(no_options).convert + end.to raise_error(StandardError, /Missing options/) + end + + it "raises if no correct answer is given" do + no_correct = Tempfile.new("temp.docx") + original_content = File.read("spec/fixtures/no_correct.docx") + no_correct.write(original_content) + no_correct.rewind + + expect do + described_class.new(no_correct).convert + end.to raise_error(StandardError, /Missing correct answer/) + end + end +end diff --git a/spec/atomic_assessments_import/examsoft/html_converter_spec.rb b/spec/atomic_assessments_import/examsoft/html_converter_spec.rb new file mode 100644 index 0000000..e1aafde --- /dev/null +++ b/spec/atomic_assessments_import/examsoft/html_converter_spec.rb @@ -0,0 +1,204 @@ +# frozen_string_literal: true + +require "atomic_assessments_import" + +RSpec.describe AtomicAssessmentsImport::ExamSoft::Converter do + describe "#convert" do + before(:all) do + @data = described_class.new("spec/fixtures/simple.html").convert + end + + it "converts a simple HTML file" do + path = "spec/fixtures/simple.html" + data = described_class.new(path).convert + + expect(data[:activities]).to eq([]) + expect(data[:items].length).to eq(3) + expect(data[:questions].length).to eq(3) + expect(data[:features]).to eq([]) + + item1 = data[:items].find { |i| i[:title] == "Question 1" } + expect(item1).not_to be_nil + expect(item1[:reference]).not_to be_nil + + item2 = data[:items].find { |i| i[:title] == "Question 2" } + expect(item2).not_to be_nil + expect(item2[:reference]).not_to be_nil + + item3 = data[:items].find { |i| i[:title] == "Question 3" } + expect(item3).not_to be_nil + expect(item3[:reference]).not_to be_nil + + question1 = data[:questions].find { |q| q[:data][:stimulus] == "What is the capital of France?" } + expect(question1).not_to be_nil + expect(question1[:reference]).to eq(item1[:questions][0][:reference]) + + question2 = data[:questions].find { |q| q[:data][:stimulus] == "What is the capital of Germany?" } + expect(question2).not_to be_nil + expect(question2[:reference]).to eq(item2[:questions][0][:reference]) + + question3 = data[:questions].find { |q| q[:data][:stimulus] == "Which are US state capitals?" } + expect(question3).not_to be_nil + expect(question3[:reference]).to eq(item3[:questions][0][:reference]) + end + + it "converts a HTML from a Tempfile" do + html = Tempfile.new("temp.html") + original_content = File.read("spec/fixtures/simple.html") + html.write(original_content) + html.rewind + data = described_class.new(html).convert + + + expect(data[:activities]).to eq([]) + expect(data[:items].length).to eq(3) + expect(data[:questions].length).to eq(3) + expect(data[:features]).to eq([]) + + item1 = data[:items].find { |i| i[:title] == "Question 1" } + expect(item1).not_to be_nil + expect(item1[:reference]).not_to be_nil + + item2 = data[:items].find { |i| i[:title] == "Question 2" } + expect(item2).not_to be_nil + expect(item2[:reference]).not_to be_nil + + item3 = data[:items].find { |i| i[:title] == "Question 3" } + expect(item3).not_to be_nil + expect(item3[:reference]).not_to be_nil + + end + + it "sets the title and source" do # Currently the converter doesn't set the description since ExamSoft RTF doesn't have a field that maps to it, but we can still test that the title is set correctly and that the source is tagged as coming from ExamSoft. + item1 = @data[:items].find { |i| i[:title] == "Question 1" } + expect(item1).not_to be_nil + # expect(item1[:description]).to eq("This is a question about France") + expect(item1[:title]).to eq("Question 1") + expect(item1[:source]).to match(/ExamSoft Import/) + end + + it "sets tags" do + item1 = @data[:items].find { |i| i[:title] == "Question 1" } + expect(item1).not_to be_nil + expect(item1[:tags]).to eq( + { + Subject: ["Capitals"], + Difficulty: ["Normal"], + } + ) + end + + it "sets duplicate tags" do + item2 = @data[:items].find { |i| i[:title] == "Question 2" } + expect(item2).not_to be_nil + expect(item2[:tags]).to eq( + { + Subject: %w[Capitals Geography], + } + ) + end + + # it "sets external id metadata" do + # csv = <<~CSV + # Question ID,Title,Tag:Subject,Question Text,Option A, Option B,Option C,Correct Answer + # Q001,Question 1,Capitals,What is the capital of France?,Paris, Versailles,Bordeaux,A + # CSV + # data = described_class.new(StringIO.new(csv)).convert + # item1 = data[:items].find { |i| i[:title] == "Question 1" } + # expect(item1).not_to be_nil + # expect(item1[:metadata][:external_id]).to eq("Q001") + # expect(item1[:metadata][:external_id_domain]).to eq("csv") + # expect(item1[:metadata][:import_type]).to eq("csv") + # expect(item1[:source]).to match(/External.*Q001/) + # end + + # it "sets alignment tags" do + # csv = <<~CSV + # Question ID,Title,Tag:Subject,Question Text,Option A, Option B,Option C,Correct Answer,Alignment URL + # Q001,Question 1,Capitals,What is the capital of France?,Paris, Versailles,Bordeaux,A,"https://example.com/alignment" + # CSV + # data = described_class.new(StringIO.new(csv)).convert + # item1 = data[:items].find { |i| i[:title] == "Question 1" } + # expect(item1).not_to be_nil + # expect(item1[:tags]).to eq( + # { + # Subject: ["Capitals"], + # lrn_aligned: ["ff8a5caa-0f2a-5a53-a128-c8c3e99768a8"], + # } + # ) + # expect(item1[:metadata][:alignment]).to eq(%w[https://example.com/alignment]) + # end + + # it "sets multiple alignment tags" do + # csv = <<~CSV + # Question ID,Title,Tag:Subject,Question Text,Option A, Option B,Option C,Correct Answer,Alignment URL,Alignment URL + # Q001,Question 1,Capitals,What is the capital of France?,Paris, Versailles,Bordeaux,A,https://example.com/alignment,https://example.com/alignment2 + # CSV + # data = described_class.new(StringIO.new(csv)).convert + # item1 = data[:items].find { |i| i[:title] == "Question 1" } + # expect(item1).not_to be_nil + # expect(item1[:tags]).to eq( + # { + # Subject: ["Capitals"], + # lrn_aligned: %w[ff8a5caa-0f2a-5a53-a128-c8c3e99768a8 f7d26914-3e2b-5c9c-a550-ce9c853f0c09], + # } + # ) + # expect(item1[:metadata][:alignment]).to eq(%w[https://example.com/alignment https://example.com/alignment2]) + # end + + # it "sets alignment tags when one is empty" do + # csv = <<~CSV + # Question ID,Title,Tag:Subject,Question Text,Option A, Option B,Option C,Correct Answer,Alignment URL,Alignment URL + # Q001,Question 1,Capitals,What is the capital of France?,Paris, Versailles,Bordeaux,A,,https://example.com/alignment2 + # CSV + # data = described_class.new(StringIO.new(csv)).convert + # item1 = data[:items].find { |i| i[:title] == "Question 1" } + # expect(item1).not_to be_nil + # expect(item1[:tags]).to eq( + # { + # Subject: ["Capitals"], + # lrn_aligned: %w[f7d26914-3e2b-5c9c-a550-ce9c853f0c09], + # } + # ) + # expect(item1[:metadata][:alignment]).to eq(%w[https://example.com/alignment2]) + # end + + # it "raises if an unknown header is present" do + # csv = <<~CSV + # Question ID,Title,Tag:Subject,Question Text,Option A, Option B,Option C,Correct Answer,Color + # Q001,Question 1,Capitals,What is the capital of France?,Paris, Versailles,Bordeaux,A, + # CSV + # expect do + # described_class.new(StringIO.new(csv)).convert + # end.to raise_error(StandardError, "Unknown column: Color") + # end + + it "raises if no options are given" do + modified_file = Tempfile.new("modified.html") + # Copy the original content and remove the options + original_content = File.read("spec/fixtures/simple.html") + # Remove lines that look like options (e.g., "a) Paris", "b) Versailles", etc.) while keeping the rest of the content intact. This regex looks for lines that start with a letter followed by a parenthesis and some text, which is the typical format for options in ExamSoft RTF exports. + modified_content = original_content.gsub(/[a-oA-O]\)\s*[^\}]*/, "") + modified_file.write(modified_content) + modified_file.rewind + + expect do + described_class.new(modified_file).convert + end.to raise_error(StandardError, /Missing options/) + end + + it "raises if no correct answer is given" do + modified_file = Tempfile.new("temp.html") + # Copy the original RTF content and remove only the asterisks marking correct answers + original_content = File.read("spec/fixtures/simple.html") + # Remove the asterisks (*) that mark correct answers, keeping the options + modified_content = original_content.gsub(/\*([a-oA-O]\))/, '\1') + modified_file.write(modified_content) + modified_file.rewind + + expect do + described_class.new(modified_file).convert + end.to raise_error(StandardError, /Missing correct answer/) + end + end +end diff --git a/spec/atomic_assessments_import/examsoft/converter_spec.rb b/spec/atomic_assessments_import/examsoft/rtf_converter_spec.rb similarity index 98% rename from spec/atomic_assessments_import/examsoft/converter_spec.rb rename to spec/atomic_assessments_import/examsoft/rtf_converter_spec.rb index 18842a7..b4e5577 100644 --- a/spec/atomic_assessments_import/examsoft/converter_spec.rb +++ b/spec/atomic_assessments_import/examsoft/rtf_converter_spec.rb @@ -179,7 +179,7 @@ # Copy the original RTF content and remove the options original_content = File.read("spec/fixtures/simple.rtf") # Remove lines that look like options (e.g., "a) Paris", "b) Versailles", etc.) while keeping the rest of the content intact. This regex looks for lines that start with a letter followed by a parenthesis and some text, which is the typical format for options in ExamSoft RTF exports. - modified_content = original_content.gsub(/[a-o]\)\s*[^\}]*/, "") + modified_content = original_content.gsub(/[a-oA-O]\)\s*[^\}]*/, "") modified_rtf_file.write(modified_content) modified_rtf_file.rewind @@ -193,7 +193,7 @@ # Copy the original RTF content and remove only the asterisks marking correct answers original_content = File.read("spec/fixtures/simple.rtf") # Remove the asterisks (*) that mark correct answers, keeping the options - modified_content = original_content.gsub(/\*([a-o]\))/, '\1') + modified_content = original_content.gsub(/\*([a-oA-O]\))/, '\1') modified_rtf_file.write(modified_content) modified_rtf_file.rewind diff --git a/spec/fixtures/no_correct.docx b/spec/fixtures/no_correct.docx new file mode 100644 index 0000000000000000000000000000000000000000..108fd9aed1652adc1457a82da25584c3a8f77a31 GIT binary patch literal 5843 zcmaJ_2RNGz*QQGB8ML;d_SPb3OYNX2YVR0riM=T?OKI(*S}TIutEIM5tM*Gq!|Fq1aqK3hYrG+m%h zu7Xxj7l?qD!{cb}ZpUCD^11`w7ie=9GB^!+ev!+=995O!0`aj~@^|KZQv{vJu{$jG zX?LMgQVSEw@keERbvSlxJSkaR__2UEUE>aEg|xkvH>`d7Jd*r81l*t?+1(8Ri^x*d z)~I5j9dDCYuQFYWPNc^u53I~WiYuSsl^n>$Xf@6d;2uK_F%ttYgwFw!&AAat^X>f) zg4sYuE;scH>IDpOdWC?Ruu427*$_cPqFOO==E|PLa^QXrDz}mous0APt3T$h`P?vn z{dNnqds$biK+w8^2agNf8%>@YWQck#%3@OU+M7}4RuL^%RIArocp0jET$BPZU#&vxTqT>)>jj%NMufeIyaFgoZd9xdp0^+5Ffc=<#49X=>9Ozf|kS ze0KDFLR=FJzz_Ce<+R1d!kW5-g{AqgSt7rfB?~8~^CYEfsoz`2Bx#p7D}uL-K2A)w zSPNH<;ZBCq;rq;@dV3h90 zaz8X>vsQKvu8XEh?QrSk4SPcot>`~b8n!ySv%z*YqGME*CMaEH7V|yF&e?@cJ^^-` zR1_^#-5OG!yN?}F<+_l;GWfhB&0=9^ixaj8(X&-ZVn)JheZ9O*N`9CaKTr@EsvG2; z)f~7&GKSSeo0e-I37K`S3K^=Md!L`d=Z#1XEpX!;mv*2%twe?zCtOABvK=C+4ocft zXknW~JCEzH8`Hox!OjE00P0m(p(|zY_A_55%O9vau}b&Tdv!&_2Bja%ynlV|tH}~J zr9>e(0Ovdzu4@TH_b|zj|6?+!E+)gu-PH~1@G~F1WrQt4z|grF1pdU74j^{0GD<>9 z=itR`HMT3XLp{phmkqWR6V7}~V@T%oZXx*u#lspwUT!RY5>ifn=qX%6rMVMj&J>cd zh{yNAW6YQhH=+|eijU=yBm(#lKHiHWNtUz{U{io@U9l|g(&!*mj6Do|`B~X?Ih8Hc zWytlbVV5w;l`8KcoVi;2EuqtmK*vD?hay;I6>px$o7l6l=-2W_{B$IohV44l@e*Bd zV?>aA5MnlZj_O=>Xy9?Me3!I`MDEJ}mq{o)DkwVx=lh3KYvq$cyROB|HcuGs9`8rDe$p3Q-yw~moV&a&k2~;z9=?pz zbHQq_uPSo?kSN{2J7>qky1-!=bMMK2WQXi8c3j=OA46QvS*h2tfKCdLpEj6L97(C) z$$P6g6e+B$ur@9>$?POSJ3mq;GnFuCc6&;Niz6r7q)L#yaW@@3>$`unXw{hw7!Fg3 zjXP73y9U&;QRBPs;^{jg#THw}B7)L3SG{T1vDk*#-Rl@9QKG8xvrt@50FD-DG0+>D z6D-N|00ol3CNOt!{j)J`TUBsm`}?mP&jBhfbjeMZ?Yq8(7s+vC8`7mrQSo%^FG*%! zQLXtFK-r)=3*=fAj!wuxKPXsU<1T&NEK!lxPyAFBlqwK9T#9lu93!i7s<4R*Xk*Zu zrfVF&?N(p-U=FQH_$X@+pBTc&A^tdMGBNpDYPOA}bEYMqni78Vc$vkExEdh1JL~9U z>u`>B;nnRG4Iu=PB65j+e|aXG#<3M=JAs&|L)sz8B3P~GR=X@8J+T7X@$Bm%B1=5Q z+pbw;>`}ANP|Ce$>naPUMY6yclb_C#lK2dn zigrom>MtG6AVgZ>t5+oS)YVBxaAbU3Dx@QI{n0>36X98FT%lIXsUgnq?PonjEliR+ zEX|w2*I#>QS?n7&G;}&^*6~;&jW4EV298=#PpETb;;9){7e|Gimb@_hDVpfgPu&jKW0 z%+^;}o#U_jm65fN6yJMa(C4w_H0?(~maT_bY>u~hb%M+F?d7hQPpGz=KHDS7p0Le2 z-L9SKDL_7zJdX7d6ucihXBCx5=r792%xb#}5jey2K>+@3v*S0fFh$)!@Ta~=`B!1v zKpow5EG!@Y)JRpDvKp=D(Q`+34Bp_4@G?r5D1jUjImB^nBx1z68T*}!hGz7KA=ul9RwU%8yqizvE_=N`n2{&WiWqJx?lqgaO1gkIP!k z*ER@mI#Pkw{Ln_~9BIhfkdn?oFP`x##FI!-X|Fwkb86uSsx9=GQL2mcCu*txqSnn0 z;s6o+c@#eH95adP&?zC}F!TvEZ0qg(dxAk04Piba53bU?)iYO9sz;Q|S@So)q7fY$ z^Cg+BLgp)ePnBi-6e6G1*Z#<4_g1u0$fu&4QR9;R+^w@Bor!F4L<(}1k-8C<55MeZ zGxTW;cn5>?YO?OkO$xAPhPsZ#33u1iZ>Sq&?)w_p10|rTui$v5b%k+{K`A{cmon|C zpaGl>JtED{_rG$6PL5o|ZoIv_K;q#D_u&}uQXHHNHx~6Ui1p~hi3}9ha4o4$?218mrq@BZ}NfHiWN*LLCx_<*#o!Y7h1AssV< z(hr8vBiFJ&JYlx6O4;1q{2|$R+M>SGA|ns9c^6cV92 z<99auVR?CPY07JubVfjbYqm>S;{N9(txjy=23UW4D zaJ8FhjYDR(p7MfgF=u7oS0H-L<#&vx3RyYum`Bqw6B^!BB-lDAWH&%>Y#NaJP{hqQ`>@h|GQB9o79U6Xj2~v zow`XLfHZUNHwye%p6+HB3X-K~YcI{+w(<1N&5QpSc8wsed_iUdiqyKzC)gQTiMwPm z4z_;kiS+mU;?KUjzm^$SD!U${S79eYrxX~uu9jy><8O$3RUgyjohx#^s>Pu4F0F~l z*|tZTb?O*34_Qctah9!?gYLZ7Q)*Y+M3kOX`iW@qTlwW0)VNGmZ)oI4adK)|?trx&aQQaBVY4A-rK||5` z%2Cfr38gDCprVU>tCN>$@`$?CYH;ER_3gBQZ9pY`B0=FGvxjb( z5Q|G=Jyw~U0ztaz0g77-k8DD02h&@OLnm0UbP`M__5?ejz?WxJVZC$hVOS5g z0emU&IVGAPC3rd@TbX$Q^O`-`z7 zWX(Lbx`At-?X+0*`M4^y@}i(`hekJo~&WQxbXJ#lZM%|USQIC%xivy=e zyg0O-%}`V|=baa^WR)-}Rh4b~zVKbDmY!gS$mHiV`pM2Du}#frut7onn6CJE=&}BT^`wWU6n!m-jsLDc}t$^n$N7n2Ks3Z6}uOnw3IEZase&u^%Ih8D{VAMdPZ+4@$JfRu@|) zMUk0ee|X&oeJMmIYXy^KGqoYp{Avxos>cXP!1Ot zxa<>^u9PiM4TS{Qo!J{VOEW#%rlkk)yHKNK>Lqm%MO!)kUZnJAdOzk15R~5r<5^z> zVA+kewMo*YtWILbTAnHGq<^&zF`e&Y`$p9U-*V{@8U5bSEqI=!W;4W+HKvId;+`i7 z`w|fA*ZS|`%IUn7|1TmR~ zwaV`&xMl9{6|g zMPYpIEPe$W#)bU6jDPry-|-i9&$)d56#<0*!~ZU(e?P^=DgVy`Kob9T{TJlY@9>Ka t;Gb|Y(*MCP3WML_7fbV>6`+sd?=^}bN)rS`fe?c9u5<#TG^HaF zAQUllq(~Kb!T0W!>;GRT>ztLfl5d}VzL`BU-x%r=5Ypq3l9J*XmTFq#oiQ5RcSneW zAr#`_CFTHuf<*nn_u`FgZbXUGhwR=;XjVV}1|;1|do>99u**rRc?9~t&@iKx1>N$x z)y{Z#8m+YZ{KesjV`za6glas-GM}GzX>9GNU~I*VRlzf|=W6m$^z8PQT6ds6b-!G zH6#0>!mDMKw~!vBsZ61mV}l@oU)Lbsur$J=Q^{HN5K6GctfE-T$R7uk`mX&Yxl8`} zK-VRnFG{GEsn_>Ite1;HeXSjG7fWH4YxkA*xn?}N94oxe_o*I;6&Xn8j~l1@rrSQ!TSUphoy+~^$Zl72yQGfnjq?>1W0u4CE zwV0iQcH7D+GvC-BJn}xAxAOTGQ+rvACS2J|3$>l?zrl1OR9G_pfpPTvYc3@LH&$yZ z8iZY#J(z3TM<%p@X@uBK+&yDqKO2xLA)E;CJ+9L6h z+muV3pNW)a$5p&kYx%!z@~*IvGt83^+wvTV;gq4WqAKMn z$(pC3k+s!5#!*i~N(U#z=;K_W{zq5PopptSkC!(D{L>pBE6Cf#>0vXINYc+}6OzQi z>Nr^ilih@=T6`}^r+!>W5Le{d;|J`@2180m>c!NbY250>gaq(JC;@yTkRybwHmYmd zjx9QSo>;iS_medjQA`(poG`~-IV8OsV(jBIWtyCWD3=Dr=!K_|)Z>qB=_|AN$?_&6 zH$TJZ_Adv^?QbkyJ<H+gDF;j-*^@e6Bwk!Ks zELYwn>rrzcGUXBpQBi$FY^42iTlTciZgt}W{nswHUCI#GHy7@db{lk(YbNf6Kl=c* z#b$89phI4pmfaGR7i$8B2xjVC*Tj!j!rccgz$KB@H9`fxFB6YH#lN_2CBjU}XZg;g zHd(eCVU3Kq9s%02ez%`epnGK+)PJzfu@>aKkoW;*oS-WzPfn!k`y$WbIo+hSu_M4@ z>}2ygvX^ynCnL{w7QKb)%2j&Om!T1t{PJ@M{wYE8(4FfGxYzIhqvKg||L}uA9mW33 z5-w@_fI;rwr_TQ`V@5r=2%v5}WUybCE{Tni`hGd`)+=`Ep;ZDjH+RtjhQ*{hGUfrB zv$Vn$J!u=8`qZp}{dX?-+VlkKC3*u3I<3l>M~vm&nu`D{?U{#VEXx*S1GcLAj%m+> zOda?_9EF#}1>%>vi53Li5%S&S``nMQ6W_0L9QHe+)xyyi$jX1XZLoulQ{&reZXy9! zLn@|Ul!_c=^V`6}F5UAv9e$CF--X_K_X866e}GzGd|N_o_c}@G zeS?`Q278Dr{#Y&iJER9KxvwO%g&W={dGg>kHDc5)TUqN_T8IJo+l*QQ0BB&Lf7u?2PJAea7*TNG@W3N z`BypkMV-3sP}*jlDSG}ziTIT4^;?D5C4usLDA}saeu{@W@)@GA;qqyB%TH8w9#u|B zp&hJd6U;5cD&9@Sw`SIL$nWNUC#3)h^T^zb7=M~}IRow_=b3XySWk--HCADtkW@#n z+mpNhwtYCyv6$)Gf`K^lye8%=_YQUve!;z+;9ClXV5cHD!aho`SNYvFVHOIFb@$`V zJ!Gy-dVmW`^3z>Z;86OFN6XrCNBbkkf~<{3Itn@Og@npv3RNps+m{$diA-%gUSFTc zRgnDvLdQch`1{L;v&k_Ir217Uy^W2EF+ACCzZNqCyiS_QFU0uOTUY6o@)^j81Oz*x z86+q(h82afh5PGOu~OUk5ZFZSl>323rtsssNyPM=eoCV|n_%6rzRdLfqp}x;zKz=^ zhMVR!Wi&s|k0dpqgwk8>c5GNHBU`o5)3+UK>wK0Ck|($dKO3}F(To>T-EXMhCF#mE zCAOEpUdq*1T$>kSx_ODS4?r5QBNlj1Zi4YGNv^Z+?ZmtQnc5U6?>B6BOB?Ft)mizY zU>*52i!lF~^<98ZyIxQ>9$UsdBL4Bqd`UfQ(ZFh!C<3FfMwqf|archpTo~QA)(@^2 zsxYo850(1K-a^bnxr4+&F)`!B8Hc#1U^k}AdLA-p!q-(!8;hgp%Jja*^v(vZx#K?nCKJt6?Q$?!4dcYI}-OGb^cgw@nTL=(?a=iPvO2T84n{-C*pjtfB>OP36J7l_Zz!GZ*Dp(;HSDC_zF| zcdw$|ZfS*F+MP~*DR|vVpC=PjA6?cJ?k_l2gA9wkqUisf>@;T}c6HiVaHZ6g;7`;t z{6(#|3kVDn`+1i*%^Z_Y^&x0+iu>z_43TR^vv0@-ISeF(&-?P1tJh94rPq!CD>+}S zZmuIc4Q9)7+Qsb_f*%5vf;D0vHPxTwa0h5QXuP6hp48*N_MyjQK`{r@?2ZxRuK;+H zR}Mex=duiJ34I-j5HjT4m>C!4%z=50BuVr%v8?D@HTO!QJdHQ`VB;f~iFssS){9&R2@GThKfZkq_zBCcqOGVgmK>#9HOIM? z5keglRT92UofTT#wabp+?Bk%5C!+!_Gc@20Qq~D8Qhvf)v_Ht0svc#5DmCHZVVhHC zJ`bFm)t)F9DX@L~g}QG&yXW=x96pujBc-FgZVX@#h73A*eJfOYdVlg#%Y^p%!n7@# zP-cy>83Gev4i#MHvGYNsBjSf*Y@ZX2@D61t!u)RVlK-lF`)~#q(G$eyKn|%-eFeO0 z=Jv!BUhI$Xw6%Fy#cH#o6%XhIW{57veU#bqmJ+71lwb*tUccc;GTzMAwfa({(sov7 zLB_JEk|wUSV#{s`%abf5#4Ek?UZjsvf<9|0O_9s z>{Q(REZJTlZ*Nz3=bvQ>lpq1Y)oIx7acrl8tl`628nUHG`D!}vMPFIQI|4fvJFt}F z1GeZGoyp+isRk_e`&YEjXEKM%FS<3)u!8E4g4$#A!oDNlo#M*3({~p-9bSEWHk==5 zPNkvdbQ7T`y=Ov!a7$Ou@sSjf2YQh&z^>9?Du1+5KoaW{<0#HkQEwo&6^yuKUM6U< zul}Vnhx8(4U@G=Y-5{f(13}39Ala2?kwAm~r%M81OcL9A5Nv-gNh%v1c5IH{W&NA3 zn_lGm5pWA9WB599a!lWw?RhmDWEJI`+Q)7JrybThEUT*GHOjq@y)!T@H9B2WlOf62 zqnme}*)`iC5)k^}?pISk2@dX1{R{6F?e3B_)*&%vp+EbeWh-LI3%FC;hP(c~QT&_K zvmMatE!>IKq;A^1Md5$fJ}!5Ko{Z)*P}rQGnZyk-jAfAHma$zEbxKPeQ4b4p-sgPf z%Pb4%mo5;qOuVx7^@9f{v{AXGh+*Qgoa?rfj^-Sxp)+q<{c}xrv$XeOTpqRy*8Ff! zX7GRp))~_@l~>o5!CXV4*)Mu^sDLx8zPQcu&77r^<8y%@o=!`{^swG?_FU!2{G0Pl z`g3onn3>`-z_1)k|Ij?iH8rP@Qh!OdlZ$kMGwEeTc|HW=tJqDWI*y}EB-e$i_*9`O5YpZ134#P05w23i z384ssCh*-ZP(;X{s=vETJu}&LCEQGJhd>*@LiUqmVDpXzbmTOVvQ@7I~hZ9gLQVt%e^` zx5b~)Fl`zmuS)5zC_qiBTY8WFV76h+JLHf=rJd@;Y!e%e32hViGPCLtYzic}Exgd> zOr+HKxn2L*tl#TPLepM03X9+)?HFf;_pv2J4+ z3(Zdv&9eIsDU2IY?*PD~$)TwHSww4FV0wTQXL@()F~l z)YFVtJR%rka?3v;D?1P0(@V*uY4DeXzHa5e2D1CKcHEe6Ex}+x9zwYMu3h0ifOb8L zh&`UE?EO)g&XG#>N7*U)K%GVv%5vuQA8ILM*U->?1*<*FG(BEqaL8mH!%5gi(#bC1 zR^%SouKoU(FH(C6!bMTvBo)y6yEoqkkPV@+1}y;ERtql^z^#kdKwWU5tDz@+?M zZ;hF5P58WIXaRhB5yLoBmzT~hD&ykr!d^0(m0mXPl_cjKLRt=+R)=z+(A=VVeQF=( zQq9cBEG?Crh%-FVy*W=dJXB(KKkZhj{5<_iM|=b=xOz>18#es==|7dfR_eG`2q%4R zn*?}(ylhS@c8y5`t{JSZ>CwSzQ%sl&G1wDAI^hrANplF-7C3{XpkJIPr`?5^r7in7&=g(hVP+HL=%1>U$>^zph zm8my3;)(Q=2W2}FYfJB>#!=bgH@xVBJQHWW<`8*}%hrj?aMLD!U@qdZ6~@Q`mH2)j zB#zQIptqz1F%6{l1?mh%jAg%VfGtIfnRfP5+?O+>GH%-!Z@;#xWwhqwX{0r1nn+uP z+AFC(j>W^sMt1=zMb=^Ny}eA5g3*lHk57NJfSGI&!f zNg30hf}mGrR?tYHS0f^##|Fk_X~9KnA)ru~V^?dGBHP_>j4UJ~P=;xxCOK1N$y#2B zKY-=f>}0kONxMCm%$X31=klqeLykFpaU4JK&au`;)}~{$?Q9>{HeCl|4caR?`lGo= z?6mHoZX>@s;?i?5(W#U0&(Y)k8ta`MC7mYef7<^U^8F4!JN`JGH~q4F+ynoH|8M&A zJNoR9;WS2mnFKDT{zm^AnEbxLSrL9Zi}+<^g#VaK{0=;;Pfs=aFB8O(^e4ss`$}g8 z!Kt48WkBM;8i~Kv?eE~TE$^vj_+^|peerV}|4`{`WTd_a)BO z?|%lsB*kC*Khp^P4nLa!{t1@^{11M%U-zB^UizczI)&Oy?cK5-Yl7lON=H4)HG}qbaZqSx|J!6g4l;< z_QsvEY!L@HBqQc$9$xhY3z~M}ghZ|?woUBIS9nhmV>@0q%uchcbTou-T~b>+Ig=w9+iXYSVmkTs(<>=O%AFY{Q+B@z&MEz*-(qoGC zvg)Q-`OuV-1}?Z5vcNP}WjEaNh$nyYLYk;hW{ePhe+akn`>zrDec^G~RxFzFRik`p zm&4u1m#y7nf==SNL-9L}#TC;>v7~~|p$a@>`D&VQ-R&=r+t}NA5pzqrRomC{FsIP| zVTCrHa%GQ85S94V?UiCF)%H^FSEq+GtIa9r7F5{@I%UzhAZJPn zidiZOia);qmrE&)t$PVcC6?Pgt)TOxh~hK3?ct5)5ql{$%Xb+5(JChAtEc-d_1gWK z!Ro@t8l%mK=Yeoz4`?B)ZBT_x@9@N)4_NQrz0?u!*!4q*{hj{d>3b9B2-_n%9hyyn zAH&dAF)E{r6w>rH4Elsk3JsH47W=8(_DfP4=PdGy(>Q2eW8o4H{M~iC4woB$em_;h z+d@|{G7}B-kvWzs89Z=*-4mx0_3xI-d}W1j)_S?pZJNU!L2~^pvgL|yvZ8VN_4Hy= zFyB_b!#pgzche|E2bb5dlMu}2oRq3hQLr0flm0<~P;TAcT~pjB!#C_H1PZ;!m8oIB)G-)JoO|Yp z(1EnZeE9ap^Xw;Dy2BpVJ@7ByMAXHoYHJJ!$WB9-(cF6yb%_-l#bZH&z>mpF>n>fOl$S?;% z;Ok{m@(A1a?Zd0eSLfyKWd>Np?LQ%OsWMa}2~eF~9u|;*^MOnBRJ^ENKaqu{AMB=ndp)V%alBHETYWEy? zL_|c>2*PMd@@r{3_MQVL>3ay6&3j9$c9}NGR%P~NlZV{dV0mEI*3sk4_rVu%Pi^=A z-C$ggE|qSy6}a8cEizOcbYV$d4(sIX)vt71Gywj^-F`LSCk;00T+St3;%=~R_#f_MEXfbsG;Q7z_Mk!RE7 z0)FOd#)-u&)benVi=2%BF>B0y*#j1V36deZI7VMUyzz)5(0UCJbN?;2aIazUL$Ccg z9W}!$1$0Gy094e_A(F%dX_qaqe_JC5o{YbgX=b!ckdd;K_}-OuCoXpgWPeI~7Q>Q|+N4Ln;y0H5|A=Cj zP6*qY+Tled#}c<4k&yh#o!xd|Q6hHC{Hw_(b&}sClMqfAXcTeOV;^gFVDe9x!D{{w zQucNCM44ONl++Mod`{8@@nqsW*No*i%rPu1Jz8*tqf)eT8DJy5%3NzPnJ18Fhp;Qv zhso8Km{mdo+Fj=bZW8u8G~Z6WFz>&Qf?!*8S2wmQvpBkO{!Z8OP@RrGckCe#s~xtv z%{GjvY~w?zZW|8zN0tK1g?zzbC}@qf3oGv&WgvZXRC~z4<+}v8l=Iv73IyZWd-O%L zk@+EtnHq?aHMc4TV4PtPlI^%>XMj>pn9J*7MCHCMXp-)Z4CVaAD-vmMbvEz=y=%>~L0tw(LL2?8{=~Xs_&8|4vgXZ# z_vb#>@^=~IrD4|0I$G|gC5Es=3&s+!Ozpxzc`4Ov7>3WWN^IOOlD-ZY z={!Kv^pcpz_KtyNP*;?snOyX`=w193l^Tvm+>dnAhSd>$u)gb#1MPSy3Fx zWprF|a-ps+OCB_6P<)czzt}Am8mVyKd(|>nj!!7kh@&SER z9KZ1E{5AhdU}_hIu-5bFSr|*?yrbU;GVV=HZh%LuswPCR=SAs`S8!gjOf8Js2B|3RKko#lfiVXW2LT#IPrX26zlV-;&I zDcUvoK{K6NqW-oKYp?ytHHd}VU>zZ&42gV$>T4NfH|||>bV*DNU0m09wtF0OP)82N zZydvh`hrxe*_PMubQP* zOI;*>6il|4!U|TDAfw|L_zioQuMza!_eTl(D*sA zf`_!&PBmFW`A5A>14u9Rnx7gWE0qi;t&mLynnYSl7(>KYRPapOF>ZZFLCtH{*M#HC zsDnB4Kn(9@`lb{7^XHiMzPkdTKS*%*%$I(3akIOF>MYc29Dg1#)g^TmY(G!YMGqmF&TeCdy{$6c>Zt8ePHD-dPV?&+=kkxTMY;AffiqbOLm(@+k$wL5)g;8XD4JG( zUvW6I(qo8ii(Amm&+*(IJw^&g8a>B14B=$lzhs#+J}`RG%KhaRmzJ|oQ2f-P+?7%Zc{{gVfElvOc delta 3972 zcmZ9Pc{J4BAIE3LzKwmau}gL{M3RI&mTVJa8(Y?qC9*Glv+r4EP(p|pYca@{?EBIT zCVN78lw{3Q=%>#4{f_5%&$;KGd(Y=?uRq@J&!<8vSB}xdfP#_(L`zEx>V;-7iU3|n zTc0jLj)?y5lEaC`bVwBq4` zL%yefBr&FtR?#PB(mZmyGDPi0#_ml9lrkT^KV`h@n${T8T57EF9M228sASFbK-?3E zSGzKYQ-4$8BZDX{8efuSZ}@}8E+B4O00kbSee6J0K=+aNE5r#QT70kMy;jN0Obav1 zIV~o7LXe6f^61mPLsn~8p+!xe{*Pyot<%XWub7RXeT`n&z)73#<6B2BgKZC<^&x{Q zKRg&h-yM>XUwQ}cCkswO&NUGaU+iFmwJ6aD+s&@~`V-rsGd9!}bK#?T_CSpCtS*aj zf4Xn{6`!aI1a~a!I_7=yo_gqgt5MU(8J=udY4H_v)zXy|ww)QJcBb_!Ht&YNR^rkk zNZ`Pxz}8xy_a}-(kmhD-?XtH05U^BsL}g+?L#tY;QGo=5KuHh~=qz&`H0VrY!M1`0 zfUI|PhED{^vpJMLY!7t7JrITS8$8`D=`FteYuTkET<{x*Iw*Cp3Y z&2U1ZGOXU}x$)?fUY58})LVJW=;1wntx(ytp7tha#1{q8RfjJ4=52qs6soCZS|O2{ zbfWb&P0N0oGKVh!4=XzZbW^AWq0;HVvC{YRXFwk4oI{ z`cr7*qK-TJba-;_Yd_x2jG^F^`V?n(z2sVk^6~7-f@}<@3(BRwO64_G!1u!+jn-Gu z=yA8fl8>n$ga5jbI*FzLjgtdx$GL6{PeF_-b5}y4E`v7hkMPajR8O9JJ$w3nF5NNY zv9D@t)Y27SW`0IEw#-_U>A^BWtVLf82}==CZTEMQ14v|gX`-0u_z+@(Jh>Szl@~Q-2+B(5AnUF1wRc+yS1gL`y37}4;KPkKlS4bGUNZwGlTTz zVO%ChU<#Z+Jua;<5>0~JNJc=fFf&WNxW?^@Ep9!A6|u>ki`?@yNco;*D?l+vpM-zo z$;#aP2Cdbr-kl;FqvVb6`0V+(nh03vTd&CcoRlA<@arxsRA|dw1Sl1svijekz<8iF zQqS`kQNT_8yqlGIRS^gLNV{)LSey$hQT9Q#tY3Z?u}ki zLm}Q2Gc8Zl+E0Qe`5xwDhO~P4wx`{{Kz~?0KM6b;JdkV;&Jg8`g-KVn*om&KuU#cO)6;gCs`W`(`p$?ap0(Z_81+zG^vUv`Xm zN<)_x>J4?7{3MgNv&#YSkNC)MHW|pkDpY?rzV7vLNS*Qs01IN)&}qs}`TA2RTDi0& zED)ss@Xp1{_pjj;g`Sa97`cS_N22a)_GKYak__GRcO(g`Hf_WLdb-l_gU{E)GnN^6 z>dphqkOSt4sNtVOQ~YW6$U5N>EAf%ya)>k+*oUM;+Ys{tW)tw2OkX{6`%CQhOuv~+ z^^tX>9kSUG&<`dwA7kk)nqJjJ-2M(4eC02=wUcn6D5cESm0l$_@{YO}4Q?#m{G4j* zrVewRnvyBT<0F2g9^;s0^usfL;UwKFU{pC`o*vpn7@2~Y1?%pz#Q2IKwhT+ZX13GM5$asR>q>v#x)oh|}^? zD;4Sj_k7a#wuSrPib_gb6-S&(_Vz`ejYi_1Z{wQe>_vFHoMNjR_8rj2SN^dzZLt!N zKN?Hgr{99j`0_r_Sq^4RPirUX7)$?hkv%!pJsR3kYJmDw1VD+*|EGgkMOqPH3OnxI zCC}Km6_z9sq?=5@R7kiJ38Ws!7c}yC1CcdjP8#^){i4|6bujn|!G>+_DKM6E^OL?ZNN5;p$e%^DQtJ7nHB}27}s-n^kG@m=n_-) zki5HZ@lmFsK*e^xCAYhI`sm-`GW%-$*LDC|N9Z zeQ}B}!i~eG*dm~xQdJtcSF%fzQs~Klde28Ie{tjK!u&7cS2-95%s@SC$F)#z*RZ_I19E4Esv%4baVZ_M@OboP+nif6~fjTK~M0!*(Pb<4A~?IsNbZ4hWQ zOiA=mPMY^bVNq`zHKf=eJ-pv^r(UGbc`(OAeb_v1(ZuDr z$ZV^1bTfL0ExNW)EUi@45&3y)v`&r<&LHJd*I8C9XWo5|z8Z^}xSngOA!lj$7Cg+u z+)`#4t4naC98_=gwXtyxy3TXI*VM8 zla=?sIbJqVNRtg?AA8xIF>w_66zL?4UX4S8VBzbjhZlvzBeZG3Qof!Fwl5oA2Z$i`XgB`O|*Z!B0_&0@ibWbC*~mwk#YDKR%Sv zPa^55WT?GPQOfUwT|jQQ(V{aGu0No7kUbTDKa9Ig&)1CHG>Ox?W+^O7(;D`G>fsw^ z8fCm7`P4)FR+&O0(jGWVuEJp<_O4}NTWYtZvxGSHy9R8cwxN9;;g*y|w?pWOY})DK zCL(fVxV&^EfXi*iA*xf7zQAr_3X2mET=!&O*>cs?r0{*YHdZ0 zUd`4gHNRO)$Y%KjyRPk%Qt4OIRr+%z3E*xyd^B4Ms-vyJ^Mrho_3zqEo>p582 zPo?a6sBNNs1G|KmGITkVEVYc?x=9o#Vn}g zWt^*4>M!AW0(tRA4p5p^VX~#^yrZ0p@7ChX;DQKm zD#{+Anb0K94DyQcWuB(w%+AJ^7mro^Zp!rN%QZcEf>HlPKk}nrafRz;<5b2d_Z2ZR--(sUQcz2^_%em z2Joyk&9yjrwYX~4gMp_Ka0zZm(rXK(jA*CRxn-VbDf9&ACXLHo(a?#bzJ?_CjlvDr zU}}nDp}wlApW@#ZrjJycK5p0gm*x4KU${oGu$$kY_}26UVKNq&o3g}uGD`2xO~Fee&`!h$K0jQNrtt;hW4bCf0}L#0@+zY3{xM2q4^z^j|t9I@uIAw5D>VXWN? z#R6@>($=!LjM&BJw{_QpR?sc$<_nU%@pW>Pm&XIpb1r_>=CTV5Y>9%>QJ(X_UPPb* z*^4>tiI$Of)^3T@>e^@pM}|$v`SWr0*eXTfnlOCmALJ(%QY@M~Ar zQ;bhGi%SS!o2|O`%}Eo8&kL?6{di>?b3HO=$ZyaizTMezsAHFKvD3&zv799iCo0D9o)5SAhFz{J?vr-G7kWNoszT4q&p>xXzSelx5-sTNx zVuA=KbAGJ@KUGY_HuS4BQ9W}33LSSH%o{zpE;e{vvY4(QR8+;Oj+3k~9ysA}+7x9U z!@{y~gWRh&VhcuxH%wk8e*B6W>h0{*QRJTH6?zrJ;BAF`x2$ie zxld3-?NtVoefK_dUp<}@3VXK#{PB0vJazdIB?vSk^KX@`k7IoTX6l9Np#N71VP)Cj z0>4+zrs*tasv!_40}HSV{I;t?_H$quww;~t_hg3s97LWHYfTB0GBE%{I6(g`V0F-E z(~Hv#<4iNcYH+~*TzA$R+0I0OK%cM?972Dt<2VQLpvS)E5E1 - - + + -

+

+Folder: Geography and Governance Title: Question 1 Category: +Subject/Capitals, Difficulty/Normal 1) What is the capital of France? +~ Paris is the capital and most populous city of France. Versailles +is known for its palace, and Bordeaux is famous for its wine region.

+

+*a) Paris

+

+b) Versailles

+

+c) Bordeaux

+


-

Folder: -Geography and Governance Title: Question 1 Category: -Subject/Capitals, Difficulty/Normal 1) What is the capital of France? -~ Paris is the capital and most populous city of France. Versailles -is known for its palace, and Bordeaux is famous for its wine region.

-

*a) -Paris

-

b) -Versailles

-

c) -Bordeaux

-


+

+
+ +

+

+Type: MA Folder: Geography and Governance Title: Question 2 Category: +Subject/Capitals, Subject/Geography 2) What is the capital of +Germany? ~ Berlin has been the capital of a reunified Germany since +1990. Bonn served as the capital of West Germany, and Brandenburg is +the surrounding federal state.

+

+*a) Berlin

+

+b) Bonn

+

+c) Brandenburg

+


-

Type: -MA Folder: Geography and Governance Title: Question 2 Category: -Geography/US States, Test plan/General Knowledge 2) What is the -capital of Germany? ~ Berlin has been the capital of a reunified -Germany since 1990. Bonn served as the capital of West Germany, and -Brandenburg is the surrounding federal state.

-

*a) -Berlin

-

b) -Bonn

-

c) -Brandenburg

-


+


-

Folder: -Geography and Governance Title: Question 3 Category: Geography/World -Capitals, Test plan/General Knowledge 3) Which are US state capitals? -~ Little Rock is the capital of Arkansas and Denver is the capital of -Colorado. Detroit is the largest city in Michigan, but Lansing is the -actual capital.

-

*a) -Little Rock

-

*b) -Denver

-

c) -Detroit

+

+Folder: Geography and Governance Title: Question 3 Category: +Geography/World Capitals, Test plan/General Knowledge 3) Which are US +state capitals? ~ Little Rock is the capital of Arkansas and Denver +is the capital of Colorado. Detroit is the largest city in Michigan, +but Lansing is the actual capital.

+

+*a) Little Rock

+

+*b) Denver

+

+c) Detroit

\ No newline at end of file From a5b35be10ad29c3efb32e3df5c956c4da34e289b Mon Sep 17 00:00:00 2001 From: Jacob Schwartz Date: Fri, 6 Feb 2026 15:20:24 -0700 Subject: [PATCH 07/30] Add tests to verify question data structure in DOCX, HTML, and RTF converters --- .../examsoft/docx_converter_spec.rb | 21 +++++++++++++++++++ .../examsoft/html_converter_spec.rb | 21 +++++++++++++++++++ .../examsoft/rtf_converter_spec.rb | 21 +++++++++++++++++++ 3 files changed, 63 insertions(+) diff --git a/spec/atomic_assessments_import/examsoft/docx_converter_spec.rb b/spec/atomic_assessments_import/examsoft/docx_converter_spec.rb index 8356f42..f9ae18d 100644 --- a/spec/atomic_assessments_import/examsoft/docx_converter_spec.rb +++ b/spec/atomic_assessments_import/examsoft/docx_converter_spec.rb @@ -42,6 +42,27 @@ expect(question3[:reference]).to eq(item3[:questions][0][:reference]) end + it "correctly structures question data including options and correct answers" do + question1 = @data[:questions].find { |q| q[:data][:stimulus] == "What is the capital of France?" } + expect(question1).not_to be_nil + + # Verify basic question structure + expect(question1[:data][:type]).to eq("mcq") + expect(question1[:data][:stimulus]).to eq("What is the capital of France?") + + # Verify options are structured correctly + expect(question1[:data][:options]).to be_a(Array) + expect(question1[:data][:options].length).to be > 0 + expect(question1[:data][:options][0]).to have_key(:label) + expect(question1[:data][:options][0]).to have_key(:value) + + # Verify correct answer is marked + expect(question1[:data][:validation]).to have_key(:valid_response) + expect(question1[:data][:validation][:valid_response]).to have_key(:value) + expect(question1[:data][:validation][:valid_response][:value]).to be_a(Array) + expect(question1[:data][:validation][:valid_response][:value].length).to be > 0 + end + it "converts a DOCX from a Tempfile" do docx = Tempfile.new("temp.docx") original_content = File.read("spec/fixtures/simple.docx") diff --git a/spec/atomic_assessments_import/examsoft/html_converter_spec.rb b/spec/atomic_assessments_import/examsoft/html_converter_spec.rb index e1aafde..ca341bf 100644 --- a/spec/atomic_assessments_import/examsoft/html_converter_spec.rb +++ b/spec/atomic_assessments_import/examsoft/html_converter_spec.rb @@ -42,6 +42,27 @@ expect(question3[:reference]).to eq(item3[:questions][0][:reference]) end + it "correctly structures question data including options and correct answers" do + question1 = @data[:questions].find { |q| q[:data][:stimulus] == "What is the capital of France?" } + expect(question1).not_to be_nil + + # Verify basic question structure + expect(question1[:data][:type]).to eq("mcq") + expect(question1[:data][:stimulus]).to eq("What is the capital of France?") + + # Verify options are structured correctly + expect(question1[:data][:options]).to be_a(Array) + expect(question1[:data][:options].length).to be > 0 + expect(question1[:data][:options][0]).to have_key(:label) + expect(question1[:data][:options][0]).to have_key(:value) + + # Verify correct answer is marked + expect(question1[:data][:validation]).to have_key(:valid_response) + expect(question1[:data][:validation][:valid_response]).to have_key(:value) + expect(question1[:data][:validation][:valid_response][:value]).to be_a(Array) + expect(question1[:data][:validation][:valid_response][:value].length).to be > 0 + end + it "converts a HTML from a Tempfile" do html = Tempfile.new("temp.html") original_content = File.read("spec/fixtures/simple.html") diff --git a/spec/atomic_assessments_import/examsoft/rtf_converter_spec.rb b/spec/atomic_assessments_import/examsoft/rtf_converter_spec.rb index b4e5577..2a221e9 100644 --- a/spec/atomic_assessments_import/examsoft/rtf_converter_spec.rb +++ b/spec/atomic_assessments_import/examsoft/rtf_converter_spec.rb @@ -42,6 +42,27 @@ expect(question3[:reference]).to eq(item3[:questions][0][:reference]) end + it "correctly structures question data including options and correct answers" do + question1 = @data[:questions].find { |q| q[:data][:stimulus] == "What is the capital of France?" } + expect(question1).not_to be_nil + + # Verify basic question structure + expect(question1[:data][:type]).to eq("mcq") + expect(question1[:data][:stimulus]).to eq("What is the capital of France?") + + # Verify options are structured correctly + expect(question1[:data][:options]).to be_a(Array) + expect(question1[:data][:options].length).to be > 0 + expect(question1[:data][:options][0]).to have_key(:label) + expect(question1[:data][:options][0]).to have_key(:value) + + # Verify correct answer is marked + expect(question1[:data][:validation]).to have_key(:valid_response) + expect(question1[:data][:validation][:valid_response]).to have_key(:value) + expect(question1[:data][:validation][:valid_response][:value]).to be_a(Array) + expect(question1[:data][:validation][:valid_response][:value].length).to be > 0 + end + it "converts a RTF from a Tempfile" do rtf = Tempfile.new("temp.rtf") original_content = File.read("spec/fixtures/simple.rtf") From 396d8913dbd11e7b88ad46f539a05949c3f662d4 Mon Sep 17 00:00:00 2001 From: Jacob Schwartz Date: Tue, 10 Feb 2026 11:35:29 -0700 Subject: [PATCH 08/30] Refactor convert_to_aa_format to accept dynamic import_from parameter and update feedback fields in ExamSoft converter --- lib/atomic_assessments_import.rb | 4 ++-- lib/atomic_assessments_import/exam_soft/converter.rb | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/atomic_assessments_import.rb b/lib/atomic_assessments_import.rb index 2d22292..acd9049 100644 --- a/lib/atomic_assessments_import.rb +++ b/lib/atomic_assessments_import.rb @@ -42,8 +42,8 @@ def self.convert(path, import_from) register_converter("text/html", "examsoft", ExamSoft::Converter) register_converter("application/xhtml+xml", "examsoft", ExamSoft::Converter) - def self.convert_to_aa_format(input_path, output_path) - result = convert(input_path, "csv") + def self.convert_to_aa_format(input_path, output_path, import_from: nil) + result = convert(input_path, import_from) AtomicAssessmentsImport::Export.create(output_path, result) { errors: result[:errors], diff --git a/lib/atomic_assessments_import/exam_soft/converter.rb b/lib/atomic_assessments_import/exam_soft/converter.rb index d856c48..15a9a1e 100644 --- a/lib/atomic_assessments_import/exam_soft/converter.rb +++ b/lib/atomic_assessments_import/exam_soft/converter.rb @@ -89,8 +89,8 @@ def convert "distractor rationale" => nil, "sample answer" => nil, "acknowledgements" => nil, - "general feedback" => nil, - "correct feedback" => explanation, + "general feedback" => explanation, + "correct feedback" => nil, "incorrect feedback" => nil, "shuffle options" => nil, "template" => type, From 6d48ce2f8fcfb6d5716a00bdc8e36bee6bdd0737 Mon Sep 17 00:00:00 2001 From: Jacob Schwartz Date: Thu, 12 Feb 2026 17:01:36 -0700 Subject: [PATCH 09/30] Add design doc for flexible ExamSoft importer Documents the heuristic chunker + field detector approach for handling unknown ExamSoft export formats with best-effort parsing. Co-Authored-By: Claude Opus 4.6 --- ...02-11-flexible-examsoft-importer-design.md | 127 ++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 docs/plans/2026-02-11-flexible-examsoft-importer-design.md diff --git a/docs/plans/2026-02-11-flexible-examsoft-importer-design.md b/docs/plans/2026-02-11-flexible-examsoft-importer-design.md new file mode 100644 index 0000000..cab4af6 --- /dev/null +++ b/docs/plans/2026-02-11-flexible-examsoft-importer-design.md @@ -0,0 +1,127 @@ +# Flexible ExamSoft Importer Design + +## Problem + +The current ExamSoft converter uses rigid regex patterns tied to an assumed export format. Since we don't have real ExamSoft export files and can't confirm the actual format, the importer needs to be flexible enough to handle format variations gracefully. + +## Goals + +- Handle unknown ExamSoft export formats without breaking +- Support all ExamSoft question types (MCQ, multiple-select, T/F, essay, short answer, fill-in-the-blank, matching, ordering) +- Best-effort import with warnings for unparseable content +- Easy to extend with new chunking strategies and question types + +## Pipeline + +``` +Input File (docx/html/rtf/etc.) + | + v +1. Normalize -- Pandoc converts to HTML, Nokogiri parses to DOM + | + v +2. Chunk -- Split DOM into one chunk per question + Tries multiple strategies, picks best + | + v +3. Extract -- Per chunk: detect question type, + extract fields, build row_mock + | + v +Existing Question pipeline (Questions::Question.load -> to_learnosity) +``` + +### Stage 1: Normalize + +Unchanged from current approach. Pandoc converts any input format to HTML. Nokogiri (already in bundle) parses the HTML into a DOM. All subsequent processing works on DOM nodes and text content, not raw HTML strings. + +### Stage 2: Chunk + +The chunker tries multiple splitting strategies in order and picks the first one that produces reasonable results. + +**Strategies (in priority order):** + +1. Metadata marker split -- split where `Folder:` or `Type:` appears at the start of a paragraph +2. Numbered question split -- split where a paragraph starts with `\d+)` or `\d+.` +3. Heading split -- split on `

`-`

` tags +4. Horizontal rule split -- split on `
` tags + +**Scoring:** Each strategy produces candidate chunks. The chunker picks the strategy where the most chunks look "question-like" (contain text followed by lettered/numbered items). Must produce > 1 chunk. + +**Exam header:** Content before the first question chunk is treated as a document-level header. Logged for informational purposes (question count, total points, creation date). Can be wired into output later if valuable. + +**Extensibility:** Each strategy is a self-contained class with a `split(doc)` method. Adding a new strategy means writing the class and adding it to the list. + +If no strategy produces good results, the whole document becomes one chunk and the extractor does its best. + +### Stage 3: Extract + +The extractor runs independent field detectors against each chunk: + +| Detector | What it looks for | Required? | +|------------------|-------------------------------------------------------------------------|------------------------------------| +| QuestionType | "Type:" labels, keywords, or inferred from structure | No (defaults based on structure) | +| QuestionStem | Main question text before options, after numbered prefix | Yes (warns if missing) | +| Options | Lettered/numbered items, bulleted lists | Required for MCQ types | +| CorrectAnswer | `*` prefix, bold, "Answer:" label | Required for MCQ types | +| Metadata | `Folder:`, `Title:`, `Category:` labels (any order) | No | +| Feedback | Text after `~`, or "Explanation:"/"Rationale:" labels | No | +| MatchingPairs | Two parallel lists or table structure | Required for matching type | +| OrderingSequence | Numbered/labeled sequence with correct order indicator | Required for ordering type | + +Each detector returns its result or nil. The extractor assembles findings into a `row_mock` hash compatible with the existing `Questions::Question.load` pipeline. + +## Question Type Mapping + +| ExamSoft Type | Question Class | Learnosity type | Notes | +|-------------------|-----------------------------|-----------------|---------------------------------------------| +| Multiple Choice | MultipleChoice (existing) | mcq | Single response | +| Multiple Select | MultipleChoice (existing) | mcq | `multiple_responses: true` | +| True/False | MultipleChoice (existing) | mcq | Two options (True/False) | +| Essay | Essay (new) | longanswer | Optional word limit, sample answer | +| Short Answer | ShortAnswer (new) | shorttext | Expected answer(s) | +| Fill in the Blank | FillInTheBlank (new) | cloze | Text with blanks, accepted answers per blank| +| Matching | Matching (new) | association | Two lists of items to pair | +| Ordering | Ordering (new) | orderlist | Items with correct sequence | + +**Future types (out of scope):** Drag and drop, hotspot, numeric/formula, matrix/grid, NGN types (bowtie). When encountered, these are imported best-effort as draft items with a warning. + +## Error Handling + +**Approach:** Best-effort throughout. Never fail the whole import due to one bad question. + +**Warning/error levels:** + +- **Info** -- exam header metadata (logged, not surfaced) +- **Warning** -- missing optional fields, unsupported question type imported as draft +- **Error** -- chunk with no usable content, skipped entirely + +**Item status based on parse completeness:** + +- Fully parsed -> `status: "published"` +- Partially parsed (missing required fields or unsupported type) -> `status: "draft"` +- Completely unparseable -> skipped, error logged + +All warnings and errors collected in the output `:errors` array with chunk identifiers. + +## Dependencies + +- **Nokogiri** -- already in bundle (v1.18.3), used for DOM parsing of Pandoc HTML output +- **Pandoc** -- already used, unchanged +- No new external dependencies + +## Testing Strategy + +**Fixture-based tests:** +- Existing fixtures (simple.docx, simple.html, simple.rtf) for backward compatibility +- New fixtures for each question type +- "Messy" fixtures: missing fields, mixed types, exam headers, unexpected formatting + +**Unit tests:** +- Chunker strategies tested independently +- Field detectors tested independently +- New question type classes tested same as MultipleChoice + +**Integration tests:** +- Full pipeline: file in -> items + questions + warnings out +- Partial-parse scenarios: document with N questions where some are unparseable From 74541f0bcb4ec59f59fecaae815cd1ad64034c54 Mon Sep 17 00:00:00 2001 From: Jacob Schwartz Date: Thu, 12 Feb 2026 17:16:01 -0700 Subject: [PATCH 10/30] feat: add chunker base class and MetadataMarkerStrategy Introduce the ExamSoft::Chunker module with a Strategy base class and MetadataMarkerStrategy that splits HTML documents on Folder:/Type: markers. This is the first step in refactoring the ExamSoft converter from rigid regex parsing to a flexible chunker+extractor pipeline. Co-Authored-By: Claude Opus 4.6 --- lib/atomic_assessments_import/exam_soft.rb | 2 + .../chunker/metadata_marker_strategy.rb | 38 ++++++++++ .../exam_soft/chunker/strategy.rb | 22 ++++++ .../chunker/metadata_marker_strategy_spec.rb | 76 +++++++++++++++++++ 4 files changed, 138 insertions(+) create mode 100644 lib/atomic_assessments_import/exam_soft/chunker/metadata_marker_strategy.rb create mode 100644 lib/atomic_assessments_import/exam_soft/chunker/strategy.rb create mode 100644 spec/atomic_assessments_import/examsoft/chunker/metadata_marker_strategy_spec.rb diff --git a/lib/atomic_assessments_import/exam_soft.rb b/lib/atomic_assessments_import/exam_soft.rb index 218413d..bee388b 100644 --- a/lib/atomic_assessments_import/exam_soft.rb +++ b/lib/atomic_assessments_import/exam_soft.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true require_relative "exam_soft/converter" +require_relative "exam_soft/chunker/strategy" +require_relative "exam_soft/chunker/metadata_marker_strategy" module AtomicAssessmentsImport module ExamSoft diff --git a/lib/atomic_assessments_import/exam_soft/chunker/metadata_marker_strategy.rb b/lib/atomic_assessments_import/exam_soft/chunker/metadata_marker_strategy.rb new file mode 100644 index 0000000..11736de --- /dev/null +++ b/lib/atomic_assessments_import/exam_soft/chunker/metadata_marker_strategy.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +require_relative "strategy" + +module AtomicAssessmentsImport + module ExamSoft + module Chunker + class MetadataMarkerStrategy < Strategy + MARKER_PATTERN = /\A\s*(?:Type:|Folder:)\s*/i + + def split(doc) + @header_nodes = [] + chunks = [] + current_chunk = [] + found_first = false + + doc.children.each do |node| + text = node.text.strip + next if text.empty? && !node.name.match?(/^(img|table|hr)$/i) + + if text.match?(MARKER_PATTERN) + found_first = true + chunks << current_chunk unless current_chunk.empty? + current_chunk = [node] + elsif found_first + current_chunk << node + else + @header_nodes << node + end + end + + chunks << current_chunk unless current_chunk.empty? + chunks + end + end + end + end +end diff --git a/lib/atomic_assessments_import/exam_soft/chunker/strategy.rb b/lib/atomic_assessments_import/exam_soft/chunker/strategy.rb new file mode 100644 index 0000000..7f9acdb --- /dev/null +++ b/lib/atomic_assessments_import/exam_soft/chunker/strategy.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +module AtomicAssessmentsImport + module ExamSoft + module Chunker + class Strategy + attr_reader :header_nodes + + def initialize + @header_nodes = [] + end + + # Subclasses implement this. Returns an array of chunks, + # where each chunk is an array of Nokogiri nodes belonging to one question. + # Returns empty array if this strategy doesn't apply to the document. + def split(doc) + raise NotImplementedError + end + end + end + end +end diff --git a/spec/atomic_assessments_import/examsoft/chunker/metadata_marker_strategy_spec.rb b/spec/atomic_assessments_import/examsoft/chunker/metadata_marker_strategy_spec.rb new file mode 100644 index 0000000..46b8653 --- /dev/null +++ b/spec/atomic_assessments_import/examsoft/chunker/metadata_marker_strategy_spec.rb @@ -0,0 +1,76 @@ +# frozen_string_literal: true + +require "atomic_assessments_import" +require "nokogiri" + +RSpec.describe AtomicAssessmentsImport::ExamSoft::Chunker::MetadataMarkerStrategy do + describe "#split" do + it "splits HTML on Folder: markers" do + html = <<~HTML +

Folder: Geography Title: Q1 Category: Test 1) What is the capital? ~ Explanation

+

*a) Paris

+

b) London

+

Folder: Science Title: Q2 Category: Test 2) What is H2O? ~ Water

+

*a) Water

+

b) Fire

+ HTML + doc = Nokogiri::HTML.fragment(html) + strategy = described_class.new + chunks = strategy.split(doc) + + expect(chunks.length).to eq(2) + end + + it "splits HTML on Type: markers" do + html = <<~HTML +

Type: MA Title: Q1 Category: Test 1) Question? ~ Expl

+

*a) Answer

+

Type: MCQ Title: Q2 Category: Test 2) Question2? ~ Expl

+

*a) Answer2

+ HTML + doc = Nokogiri::HTML.fragment(html) + strategy = described_class.new + chunks = strategy.split(doc) + + expect(chunks.length).to eq(2) + end + + it "returns empty array when no markers found" do + html = "

Just some text with no markers

" + doc = Nokogiri::HTML.fragment(html) + strategy = described_class.new + chunks = strategy.split(doc) + + expect(chunks).to eq([]) + end + + it "separates exam header from questions" do + html = <<~HTML +

Exam: Midterm 2024

+

Total Questions: 30

+

Folder: Geography Title: Q1 Category: Test 1) Question? ~ Expl

+

*a) Answer

+ HTML + doc = Nokogiri::HTML.fragment(html) + strategy = described_class.new + chunks = strategy.split(doc) + + expect(chunks.length).to eq(1) + expect(strategy.header_nodes).not_to be_empty + end + + it "returns chunks as arrays of Nokogiri nodes" do + html = <<~HTML +

Folder: Geo Title: Q1 Category: Test 1) Question? ~ Expl

+

*a) Answer

+

b) Wrong

+ HTML + doc = Nokogiri::HTML.fragment(html) + strategy = described_class.new + chunks = strategy.split(doc) + + expect(chunks.length).to eq(1) + expect(chunks[0]).to all(be_a(Nokogiri::XML::Node)) + end + end +end From 1b7f111a6f897109b053395d9d1c1728d17dfbfa Mon Sep 17 00:00:00 2001 From: Jacob Schwartz Date: Thu, 12 Feb 2026 17:27:43 -0700 Subject: [PATCH 11/30] feat: add NumberedQuestionStrategy for chunking Add a chunking strategy that splits HTML documents on numbered question patterns (e.g., "1)" or "1.") while ignoring lettered answer options. Header content before the first question is captured separately. Co-Authored-By: Claude Opus 4.6 --- lib/atomic_assessments_import/exam_soft.rb | 1 + .../chunker/numbered_question_strategy.rb | 39 ++++++++++ .../numbered_question_strategy_spec.rb | 72 +++++++++++++++++++ 3 files changed, 112 insertions(+) create mode 100644 lib/atomic_assessments_import/exam_soft/chunker/numbered_question_strategy.rb create mode 100644 spec/atomic_assessments_import/examsoft/chunker/numbered_question_strategy_spec.rb diff --git a/lib/atomic_assessments_import/exam_soft.rb b/lib/atomic_assessments_import/exam_soft.rb index bee388b..b92a08f 100644 --- a/lib/atomic_assessments_import/exam_soft.rb +++ b/lib/atomic_assessments_import/exam_soft.rb @@ -3,6 +3,7 @@ require_relative "exam_soft/converter" require_relative "exam_soft/chunker/strategy" require_relative "exam_soft/chunker/metadata_marker_strategy" +require_relative "exam_soft/chunker/numbered_question_strategy" module AtomicAssessmentsImport module ExamSoft diff --git a/lib/atomic_assessments_import/exam_soft/chunker/numbered_question_strategy.rb b/lib/atomic_assessments_import/exam_soft/chunker/numbered_question_strategy.rb new file mode 100644 index 0000000..93f0d45 --- /dev/null +++ b/lib/atomic_assessments_import/exam_soft/chunker/numbered_question_strategy.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +require_relative "strategy" + +module AtomicAssessmentsImport + module ExamSoft + module Chunker + class NumberedQuestionStrategy < Strategy + # Matches "1)" or "1." or "12)" etc. at start of text, but NOT single letters like "a)" + NUMBERED_PATTERN = /\A\s*(\d+)\s*[.)]/ + + def split(doc) + @header_nodes = [] + chunks = [] + current_chunk = [] + found_first = false + + doc.children.each do |node| + text = node.text.strip + next if text.empty? && !node.name.match?(/^(img|table|hr)$/i) + + if text.match?(NUMBERED_PATTERN) + found_first = true + chunks << current_chunk unless current_chunk.empty? + current_chunk = [node] + elsif found_first + current_chunk << node + else + @header_nodes << node + end + end + + chunks << current_chunk unless current_chunk.empty? + found_first ? chunks : [] + end + end + end + end +end diff --git a/spec/atomic_assessments_import/examsoft/chunker/numbered_question_strategy_spec.rb b/spec/atomic_assessments_import/examsoft/chunker/numbered_question_strategy_spec.rb new file mode 100644 index 0000000..fd8e046 --- /dev/null +++ b/spec/atomic_assessments_import/examsoft/chunker/numbered_question_strategy_spec.rb @@ -0,0 +1,72 @@ +# frozen_string_literal: true + +require "atomic_assessments_import" +require "nokogiri" + +RSpec.describe AtomicAssessmentsImport::ExamSoft::Chunker::NumberedQuestionStrategy do + describe "#split" do + it "splits on paragraphs starting with number-paren pattern" do + html = <<~HTML +

1) What is the capital of France?

+

a) Paris

+

b) London

+

2) What is H2O?

+

a) Water

+

b) Fire

+ HTML + doc = Nokogiri::HTML.fragment(html) + chunks = described_class.new.split(doc) + + expect(chunks.length).to eq(2) + end + + it "splits on paragraphs starting with number-dot pattern" do + html = <<~HTML +

1. What is the capital of France?

+

a) Paris

+

2. What is H2O?

+

a) Water

+ HTML + doc = Nokogiri::HTML.fragment(html) + chunks = described_class.new.split(doc) + + expect(chunks.length).to eq(2) + end + + it "returns empty array when no numbered questions found" do + html = "

Just some regular text

More text

" + doc = Nokogiri::HTML.fragment(html) + chunks = described_class.new.split(doc) + + expect(chunks).to eq([]) + end + + it "separates header content before first question" do + html = <<~HTML +

Exam: Midterm

+

Total: 30 questions

+

1) First question?

+

a) Answer

+ HTML + doc = Nokogiri::HTML.fragment(html) + strategy = described_class.new + chunks = strategy.split(doc) + + expect(chunks.length).to eq(1) + expect(strategy.header_nodes.length).to eq(2) + end + + it "does not split on lettered options like a) b) c)" do + html = <<~HTML +

1) What is the capital of France?

+

a) Paris

+

b) London

+

c) Berlin

+ HTML + doc = Nokogiri::HTML.fragment(html) + chunks = described_class.new.split(doc) + + expect(chunks.length).to eq(1) + end + end +end From eae8147fcff96d3fef5abc302b99863401e0b39c Mon Sep 17 00:00:00 2001 From: Jacob Schwartz Date: Thu, 12 Feb 2026 17:33:26 -0700 Subject: [PATCH 12/30] feat: add HeadingSplitStrategy and HorizontalRuleSplitStrategy Co-Authored-By: Claude Opus 4.6 --- lib/atomic_assessments_import/exam_soft.rb | 2 + .../chunker/heading_split_strategy.rb | 38 ++++++++++++++ .../chunker/horizontal_rule_split_strategy.rb | 39 ++++++++++++++ .../chunker/heading_split_strategy_spec.rb | 52 +++++++++++++++++++ .../horizontal_rule_split_strategy_spec.rb | 48 +++++++++++++++++ 5 files changed, 179 insertions(+) create mode 100644 lib/atomic_assessments_import/exam_soft/chunker/heading_split_strategy.rb create mode 100644 lib/atomic_assessments_import/exam_soft/chunker/horizontal_rule_split_strategy.rb create mode 100644 spec/atomic_assessments_import/examsoft/chunker/heading_split_strategy_spec.rb create mode 100644 spec/atomic_assessments_import/examsoft/chunker/horizontal_rule_split_strategy_spec.rb diff --git a/lib/atomic_assessments_import/exam_soft.rb b/lib/atomic_assessments_import/exam_soft.rb index b92a08f..178c818 100644 --- a/lib/atomic_assessments_import/exam_soft.rb +++ b/lib/atomic_assessments_import/exam_soft.rb @@ -4,6 +4,8 @@ require_relative "exam_soft/chunker/strategy" require_relative "exam_soft/chunker/metadata_marker_strategy" require_relative "exam_soft/chunker/numbered_question_strategy" +require_relative "exam_soft/chunker/heading_split_strategy" +require_relative "exam_soft/chunker/horizontal_rule_split_strategy" module AtomicAssessmentsImport module ExamSoft diff --git a/lib/atomic_assessments_import/exam_soft/chunker/heading_split_strategy.rb b/lib/atomic_assessments_import/exam_soft/chunker/heading_split_strategy.rb new file mode 100644 index 0000000..3fbce5a --- /dev/null +++ b/lib/atomic_assessments_import/exam_soft/chunker/heading_split_strategy.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +require_relative "strategy" + +module AtomicAssessmentsImport + module ExamSoft + module Chunker + class HeadingSplitStrategy < Strategy + HEADING_PATTERN = /^h[1-6]$/i + + def split(doc) + @header_nodes = [] + chunks = [] + current_chunk = [] + found_first = false + + doc.children.each do |node| + text = node.text.strip + next if text.empty? && !node.name.match?(/^(img|table|hr)$/i) + + if node.name.match?(HEADING_PATTERN) + found_first = true + chunks << current_chunk unless current_chunk.empty? + current_chunk = [node] + elsif found_first + current_chunk << node + else + @header_nodes << node + end + end + + chunks << current_chunk unless current_chunk.empty? + chunks.length >= 2 ? chunks : [] + end + end + end + end +end diff --git a/lib/atomic_assessments_import/exam_soft/chunker/horizontal_rule_split_strategy.rb b/lib/atomic_assessments_import/exam_soft/chunker/horizontal_rule_split_strategy.rb new file mode 100644 index 0000000..e3e46c2 --- /dev/null +++ b/lib/atomic_assessments_import/exam_soft/chunker/horizontal_rule_split_strategy.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +require_relative "strategy" + +module AtomicAssessmentsImport + module ExamSoft + module Chunker + class HorizontalRuleSplitStrategy < Strategy + def split(doc) + @header_nodes = [] + segments = [] + current_segment = [] + + doc.children.each do |node| + text = node.text.strip + + if node.name.match?(/^hr$/i) + segments << current_segment unless current_segment.empty? + current_segment = [] + next + end + + next if text.empty? && !node.name.match?(/^(img|table)$/i) + + current_segment << node + end + + segments << current_segment unless current_segment.empty? + + if segments.length >= 3 + @header_nodes = segments.shift + end + + segments.length >= 2 ? segments : [] + end + end + end + end +end diff --git a/spec/atomic_assessments_import/examsoft/chunker/heading_split_strategy_spec.rb b/spec/atomic_assessments_import/examsoft/chunker/heading_split_strategy_spec.rb new file mode 100644 index 0000000..513ad58 --- /dev/null +++ b/spec/atomic_assessments_import/examsoft/chunker/heading_split_strategy_spec.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: true + +require "atomic_assessments_import" +require "nokogiri" + +RSpec.describe AtomicAssessmentsImport::ExamSoft::Chunker::HeadingSplitStrategy do + describe "#split" do + it "splits on heading tags" do + html = <<~HTML +

Question 1

+

What is the capital of France?

+

a) Paris

+

b) London

+

Question 2

+

What is H2O?

+

a) Water

+

b) Fire

+ HTML + doc = Nokogiri::HTML.fragment(html) + chunks = described_class.new.split(doc) + + expect(chunks.length).to eq(2) + end + + it "returns empty array when no headings found" do + html = "

Just some regular text

More text

" + doc = Nokogiri::HTML.fragment(html) + chunks = described_class.new.split(doc) + + expect(chunks).to eq([]) + end + + it "separates header content before first heading" do + html = <<~HTML +

Exam: Midterm 2024

+

Total Questions: 30

+

Question 1

+

What is the capital of France?

+

a) Paris

+

Question 2

+

What is H2O?

+

a) Water

+ HTML + doc = Nokogiri::HTML.fragment(html) + strategy = described_class.new + chunks = strategy.split(doc) + + expect(chunks.length).to eq(2) + expect(strategy.header_nodes).not_to be_empty + end + end +end diff --git a/spec/atomic_assessments_import/examsoft/chunker/horizontal_rule_split_strategy_spec.rb b/spec/atomic_assessments_import/examsoft/chunker/horizontal_rule_split_strategy_spec.rb new file mode 100644 index 0000000..22c17df --- /dev/null +++ b/spec/atomic_assessments_import/examsoft/chunker/horizontal_rule_split_strategy_spec.rb @@ -0,0 +1,48 @@ +# frozen_string_literal: true + +require "atomic_assessments_import" +require "nokogiri" + +RSpec.describe AtomicAssessmentsImport::ExamSoft::Chunker::HorizontalRuleSplitStrategy do + describe "#split" do + it "splits on hr tags" do + html = <<~HTML +

Question 1 text

+

a) Answer

+
+

Question 2 text

+

a) Answer

+ HTML + doc = Nokogiri::HTML.fragment(html) + chunks = described_class.new.split(doc) + + expect(chunks.length).to eq(2) + end + + it "returns empty array when no hr tags found" do + html = "

Just some regular text

More text

" + doc = Nokogiri::HTML.fragment(html) + chunks = described_class.new.split(doc) + + expect(chunks).to eq([]) + end + + it "separates header content before first hr" do + html = <<~HTML +

Exam header info

+
+

Question 1 text

+

a) Answer

+
+

Question 2 text

+

a) Answer

+ HTML + doc = Nokogiri::HTML.fragment(html) + strategy = described_class.new + chunks = strategy.split(doc) + + expect(chunks.length).to eq(2) + expect(strategy.header_nodes).not_to be_empty + end + end +end From 944d1d006f2362d31e090a7967cf337bf292ffc8 Mon Sep 17 00:00:00 2001 From: Jacob Schwartz Date: Fri, 13 Feb 2026 14:39:44 -0700 Subject: [PATCH 13/30] feat: add Chunker orchestrator with strategy cascade Add module-level .chunk(doc) method to ExamSoft::Chunker that tries each strategy in priority order (MetadataMarker > NumberedQuestion > HeadingSplit > HorizontalRuleSplit) and returns the first valid result. Falls back to treating the entire document as a single chunk with a warning when no strategy matches. Co-Authored-By: Claude Opus 4.6 --- lib/atomic_assessments_import.rb | 1 + .../exam_soft/chunker.rb | 46 +++++++++++ .../examsoft/chunker_spec.rb | 76 +++++++++++++++++++ 3 files changed, 123 insertions(+) create mode 100644 lib/atomic_assessments_import/exam_soft/chunker.rb create mode 100644 spec/atomic_assessments_import/examsoft/chunker_spec.rb diff --git a/lib/atomic_assessments_import.rb b/lib/atomic_assessments_import.rb index acd9049..cf9a83c 100644 --- a/lib/atomic_assessments_import.rb +++ b/lib/atomic_assessments_import.rb @@ -8,6 +8,7 @@ require_relative "atomic_assessments_import/writer" require_relative "atomic_assessments_import/export" require_relative "atomic_assessments_import/exam_soft" +require_relative "atomic_assessments_import/exam_soft/chunker" module AtomicAssessmentsImport class Error < StandardError; end diff --git a/lib/atomic_assessments_import/exam_soft/chunker.rb b/lib/atomic_assessments_import/exam_soft/chunker.rb new file mode 100644 index 0000000..2e057d5 --- /dev/null +++ b/lib/atomic_assessments_import/exam_soft/chunker.rb @@ -0,0 +1,46 @@ +# frozen_string_literal: true + +require_relative "chunker/strategy" +require_relative "chunker/metadata_marker_strategy" +require_relative "chunker/numbered_question_strategy" +require_relative "chunker/heading_split_strategy" +require_relative "chunker/horizontal_rule_split_strategy" + +module AtomicAssessmentsImport + module ExamSoft + module Chunker + STRATEGIES = [ + MetadataMarkerStrategy, + NumberedQuestionStrategy, + HeadingSplitStrategy, + HorizontalRuleSplitStrategy, + ].freeze + + def self.chunk(doc) + warnings = [] + + STRATEGIES.each do |strategy_class| + strategy = strategy_class.new + chunks = strategy.split(doc) + next if chunks.empty? + + return { + chunks: chunks, + header_nodes: strategy.header_nodes, + warnings: warnings, + } + end + + # No strategy matched — return entire document as one chunk + all_nodes = doc.children.reject { |n| n.text.strip.empty? && !n.name.match?(/^(img|table|hr)$/i) } + warnings << "No chunking strategy matched. Treating entire document as a single question." + + { + chunks: [all_nodes], + header_nodes: [], + warnings: warnings, + } + end + end + end +end diff --git a/spec/atomic_assessments_import/examsoft/chunker_spec.rb b/spec/atomic_assessments_import/examsoft/chunker_spec.rb new file mode 100644 index 0000000..ae65d26 --- /dev/null +++ b/spec/atomic_assessments_import/examsoft/chunker_spec.rb @@ -0,0 +1,76 @@ +# frozen_string_literal: true + +require "atomic_assessments_import" +require "nokogiri" + +RSpec.describe AtomicAssessmentsImport::ExamSoft::Chunker do + describe ".chunk" do + it "uses MetadataMarkerStrategy when Folder: markers are present" do + html = <<~HTML +

Folder: Geo Title: Q1 Category: Test 1) Question? ~ Expl

+

*a) Answer

+

Folder: Sci Title: Q2 Category: Test 2) Question2? ~ Expl

+

*a) Answer2

+ HTML + doc = Nokogiri::HTML.fragment(html) + result = described_class.chunk(doc) + + expect(result[:chunks].length).to eq(2) + end + + it "falls back to NumberedQuestionStrategy when no metadata markers" do + html = <<~HTML +

1) What is the capital of France?

+

a) Paris

+

b) London

+

2) What is H2O?

+

a) Water

+

b) Fire

+ HTML + doc = Nokogiri::HTML.fragment(html) + result = described_class.chunk(doc) + + expect(result[:chunks].length).to eq(2) + end + + it "falls back to HeadingSplitStrategy when no numbers" do + html = <<~HTML +

Question 1

+

What is the capital?

+

a) Paris

+

Question 2

+

What is H2O?

+

a) Water

+ HTML + doc = Nokogiri::HTML.fragment(html) + result = described_class.chunk(doc) + + expect(result[:chunks].length).to eq(2) + end + + it "returns whole document as single chunk when no strategy matches" do + html = <<~HTML +

Some question text here

+

a) An option

+ HTML + doc = Nokogiri::HTML.fragment(html) + result = described_class.chunk(doc) + + expect(result[:chunks].length).to eq(1) + expect(result[:warnings]).to include(a_string_matching(/no chunking strategy/i)) + end + + it "extracts header nodes" do + html = <<~HTML +

Exam: Midterm 2024

+

Total Questions: 30

+

Folder: Geo Title: Q1 Category: Test 1) Question? ~ Expl

+

*a) Answer

+ HTML + doc = Nokogiri::HTML.fragment(html) + result = described_class.chunk(doc) + + expect(result[:header_nodes]).not_to be_empty + end + end +end From 33517f27af794dea10782e3cf86bce3ffad7d929 Mon Sep 17 00:00:00 2001 From: Jacob Schwartz Date: Fri, 13 Feb 2026 14:43:31 -0700 Subject: [PATCH 14/30] feat: add core field detectors (stem, options, correct answer) Add three extractor detector classes for parsing ExamSoft question chunks: - QuestionStemDetector: extracts question text, strips metadata prefixes and explanations - OptionsDetector: finds lettered answer options with asterisk/bold correct markers - CorrectAnswerDetector: determines correct answers from option markers or Answer: labels Co-Authored-By: Claude Opus 4.6 --- lib/atomic_assessments_import/exam_soft.rb | 3 + .../extractor/correct_answer_detector.rb | 36 +++++++++++ .../exam_soft/extractor/options_detector.rb | 44 +++++++++++++ .../extractor/question_stem_detector.rb | 44 +++++++++++++ .../extractor/correct_answer_detector_spec.rb | 50 +++++++++++++++ .../extractor/options_detector_spec.rb | 62 +++++++++++++++++++ .../extractor/question_stem_detector_spec.rb | 46 ++++++++++++++ 7 files changed, 285 insertions(+) create mode 100644 lib/atomic_assessments_import/exam_soft/extractor/correct_answer_detector.rb create mode 100644 lib/atomic_assessments_import/exam_soft/extractor/options_detector.rb create mode 100644 lib/atomic_assessments_import/exam_soft/extractor/question_stem_detector.rb create mode 100644 spec/atomic_assessments_import/examsoft/extractor/correct_answer_detector_spec.rb create mode 100644 spec/atomic_assessments_import/examsoft/extractor/options_detector_spec.rb create mode 100644 spec/atomic_assessments_import/examsoft/extractor/question_stem_detector_spec.rb diff --git a/lib/atomic_assessments_import/exam_soft.rb b/lib/atomic_assessments_import/exam_soft.rb index 178c818..481caec 100644 --- a/lib/atomic_assessments_import/exam_soft.rb +++ b/lib/atomic_assessments_import/exam_soft.rb @@ -6,6 +6,9 @@ require_relative "exam_soft/chunker/numbered_question_strategy" require_relative "exam_soft/chunker/heading_split_strategy" require_relative "exam_soft/chunker/horizontal_rule_split_strategy" +require_relative "exam_soft/extractor/question_stem_detector" +require_relative "exam_soft/extractor/options_detector" +require_relative "exam_soft/extractor/correct_answer_detector" module AtomicAssessmentsImport module ExamSoft diff --git a/lib/atomic_assessments_import/exam_soft/extractor/correct_answer_detector.rb b/lib/atomic_assessments_import/exam_soft/extractor/correct_answer_detector.rb new file mode 100644 index 0000000..79c44ef --- /dev/null +++ b/lib/atomic_assessments_import/exam_soft/extractor/correct_answer_detector.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +module AtomicAssessmentsImport + module ExamSoft + module Extractor + class CorrectAnswerDetector + ANSWER_LABEL_PATTERN = /\AAnswer:\s*(.+)/i + + def initialize(nodes, options) + @nodes = nodes + @options = options + end + + def detect + # First: check options for correct: true markers + from_options = @options.select { |opt| opt[:correct] }.map { |opt| opt[:letter] } + return from_options unless from_options.empty? + + # Second: scan nodes for "Answer:" label + @nodes.each do |node| + text = node.text.strip + match = text.match(ANSWER_LABEL_PATTERN) + next unless match + + answer_text = match[1].strip + # Parse comma/space-separated letters + letters = answer_text.split(/[\s,;]+/).map(&:strip).reject(&:empty?).map(&:downcase) + return letters unless letters.empty? + end + + [] + end + end + end + end +end diff --git a/lib/atomic_assessments_import/exam_soft/extractor/options_detector.rb b/lib/atomic_assessments_import/exam_soft/extractor/options_detector.rb new file mode 100644 index 0000000..2acff4b --- /dev/null +++ b/lib/atomic_assessments_import/exam_soft/extractor/options_detector.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +module AtomicAssessmentsImport + module ExamSoft + module Extractor + class OptionsDetector + OPTION_PATTERN = /\A\s*(\*?)([a-oA-O])\s*[.)]\s*(.+)/m + + def initialize(nodes) + @nodes = nodes + end + + def detect + options = [] + + @nodes.each do |node| + text = node.text.strip + match = text.match(OPTION_PATTERN) + next unless match + + marker = match[1] + letter = match[2].downcase + option_text = match[3].strip + + correct = marker == "*" || bold_node?(node) + + options << { text: option_text, letter: letter, correct: correct } + end + + options + end + + private + + def bold_node?(node) + # Check if the node's first significant child is a or element + node.css("strong, b").any? do |bold_el| + bold_el.text.strip == node.text.strip + end + end + end + end + end +end diff --git a/lib/atomic_assessments_import/exam_soft/extractor/question_stem_detector.rb b/lib/atomic_assessments_import/exam_soft/extractor/question_stem_detector.rb new file mode 100644 index 0000000..5ad4f77 --- /dev/null +++ b/lib/atomic_assessments_import/exam_soft/extractor/question_stem_detector.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +module AtomicAssessmentsImport + module ExamSoft + module Extractor + class QuestionStemDetector + OPTION_PATTERN = /\A\s*\*?[a-oA-O]\s*[.)]/ + + def initialize(nodes) + @nodes = nodes + end + + def detect + stem_node = @nodes.find do |node| + text = node.text.strip + next if text.empty? + next if text.match?(OPTION_PATTERN) + + true + end + + return nil unless stem_node + + text = stem_node.text.strip + + # Strip metadata prefixes and numbered prefix together + # e.g. "Folder: Geo Title: Q1 Category: Test 1) What is the capital?" + if text.match?(/\d+[.)]/) + text = text.sub(/\A.*?\d+[.)]\s*/, "") + else + # Strip standalone metadata labels if present (Folder:, Title:, Category:, Type:) + text = text.sub(/\A\s*(?:(?:Folder|Title|Category|Type):\s*\S+\s*)*/, "") + end + + # Split on tilde and take the first part (remove explanation) + text = text.split("~").first + + text = text&.strip + text.nil? || text.empty? ? nil : text + end + end + end + end +end diff --git a/spec/atomic_assessments_import/examsoft/extractor/correct_answer_detector_spec.rb b/spec/atomic_assessments_import/examsoft/extractor/correct_answer_detector_spec.rb new file mode 100644 index 0000000..df5f606 --- /dev/null +++ b/spec/atomic_assessments_import/examsoft/extractor/correct_answer_detector_spec.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true + +require "atomic_assessments_import" +require "nokogiri" + +RSpec.describe AtomicAssessmentsImport::ExamSoft::Extractor::CorrectAnswerDetector do + def nodes_from(html) + Nokogiri::HTML.fragment(html).children.to_a + end + + describe "#detect" do + it "detects correct answers from asterisk-marked options" do + options = [ + { text: "Paris", letter: "a", correct: true }, + { text: "London", letter: "b", correct: false }, + ] + result = described_class.new(nodes_from(""), options).detect + expect(result).to eq(["a"]) + end + + it "detects multiple correct answers" do + options = [ + { text: "Little Rock", letter: "a", correct: true }, + { text: "Denver", letter: "b", correct: true }, + { text: "Detroit", letter: "c", correct: false }, + ] + result = described_class.new(nodes_from(""), options).detect + expect(result).to eq(["a", "b"]) + end + + it "detects correct answer from Answer: label in chunk" do + nodes = nodes_from("

Answer: A

") + options = [ + { text: "Paris", letter: "a", correct: false }, + { text: "London", letter: "b", correct: false }, + ] + result = described_class.new(nodes, options).detect + expect(result).to eq(["a"]) + end + + it "returns empty array when no correct answer found" do + options = [ + { text: "Paris", letter: "a", correct: false }, + { text: "London", letter: "b", correct: false }, + ] + result = described_class.new(nodes_from(""), options).detect + expect(result).to eq([]) + end + end +end diff --git a/spec/atomic_assessments_import/examsoft/extractor/options_detector_spec.rb b/spec/atomic_assessments_import/examsoft/extractor/options_detector_spec.rb new file mode 100644 index 0000000..326ee3b --- /dev/null +++ b/spec/atomic_assessments_import/examsoft/extractor/options_detector_spec.rb @@ -0,0 +1,62 @@ +# frozen_string_literal: true + +require "atomic_assessments_import" +require "nokogiri" + +RSpec.describe AtomicAssessmentsImport::ExamSoft::Extractor::OptionsDetector do + def nodes_from(html) + Nokogiri::HTML.fragment(html).children.to_a + end + + describe "#detect" do + it "extracts lettered options with paren format" do + nodes = nodes_from(<<~HTML) +

Question text

+

a) Paris

+

b) London

+

c) Berlin

+ HTML + result = described_class.new(nodes).detect + expect(result.length).to eq(3) + expect(result[0][:text]).to eq("Paris") + expect(result[1][:text]).to eq("London") + expect(result[2][:text]).to eq("Berlin") + end + + it "detects correct answer markers with asterisk" do + nodes = nodes_from(<<~HTML) +

*a) Paris

+

b) London

+ HTML + result = described_class.new(nodes).detect + expect(result[0][:correct]).to be true + expect(result[1][:correct]).to be false + end + + it "detects correct answer markers with bold" do + nodes = nodes_from(<<~HTML) +

a) Paris

+

b) London

+ HTML + result = described_class.new(nodes).detect + expect(result[0][:correct]).to be true + expect(result[1][:correct]).to be false + end + + it "returns empty array when no options found" do + nodes = nodes_from("

Just a paragraph

") + result = described_class.new(nodes).detect + expect(result).to eq([]) + end + + it "handles uppercase letter options" do + nodes = nodes_from(<<~HTML) +

A) Paris

+

B) London

+ HTML + result = described_class.new(nodes).detect + expect(result.length).to eq(2) + expect(result[0][:text]).to eq("Paris") + end + end +end diff --git a/spec/atomic_assessments_import/examsoft/extractor/question_stem_detector_spec.rb b/spec/atomic_assessments_import/examsoft/extractor/question_stem_detector_spec.rb new file mode 100644 index 0000000..efc617e --- /dev/null +++ b/spec/atomic_assessments_import/examsoft/extractor/question_stem_detector_spec.rb @@ -0,0 +1,46 @@ +# frozen_string_literal: true + +require "atomic_assessments_import" +require "nokogiri" + +RSpec.describe AtomicAssessmentsImport::ExamSoft::Extractor::QuestionStemDetector do + def nodes_from(html) + Nokogiri::HTML.fragment(html).children.to_a + end + + describe "#detect" do + it "extracts question text before options" do + nodes = nodes_from(<<~HTML) +

1) What is the capital of France?

+

a) Paris

+

b) London

+ HTML + result = described_class.new(nodes).detect + expect(result).to eq("What is the capital of France?") + end + + it "extracts question text with tilde-separated explanation removed" do + nodes = nodes_from(<<~HTML) +

Folder: Geo Title: Q1 Category: Test 1) What is the capital? ~ Paris is the capital of France.

+

*a) Paris

+ HTML + result = described_class.new(nodes).detect + expect(result).to eq("What is the capital?") + end + + it "extracts question text without numbered prefix" do + nodes = nodes_from(<<~HTML) +

What is the capital of France?

+

a) Paris

+ HTML + result = described_class.new(nodes).detect + expect(result).to eq("What is the capital of France?") + end + + it "returns nil when no question text found" do + nodes = nodes_from("

a) Paris

b) London

") + result = described_class.new(nodes).detect + expect(result).to be_nil + end + end +end From d46448ea8fab94a2e360b54314791dd15314fc17 Mon Sep 17 00:00:00 2001 From: Jacob Schwartz Date: Fri, 13 Feb 2026 14:47:22 -0700 Subject: [PATCH 15/30] feat: add metadata, feedback, and question type detectors Co-Authored-By: Claude Opus 4.6 --- lib/atomic_assessments_import/exam_soft.rb | 3 + .../exam_soft/extractor/feedback_detector.rb | 35 +++++++++++ .../exam_soft/extractor/metadata_detector.rb | 37 ++++++++++++ .../extractor/question_type_detector.rb | 47 +++++++++++++++ .../extractor/feedback_detector_spec.rb | 44 ++++++++++++++ .../extractor/metadata_detector_spec.rb | 39 ++++++++++++ .../extractor/question_type_detector_spec.rb | 60 +++++++++++++++++++ 7 files changed, 265 insertions(+) create mode 100644 lib/atomic_assessments_import/exam_soft/extractor/feedback_detector.rb create mode 100644 lib/atomic_assessments_import/exam_soft/extractor/metadata_detector.rb create mode 100644 lib/atomic_assessments_import/exam_soft/extractor/question_type_detector.rb create mode 100644 spec/atomic_assessments_import/examsoft/extractor/feedback_detector_spec.rb create mode 100644 spec/atomic_assessments_import/examsoft/extractor/metadata_detector_spec.rb create mode 100644 spec/atomic_assessments_import/examsoft/extractor/question_type_detector_spec.rb diff --git a/lib/atomic_assessments_import/exam_soft.rb b/lib/atomic_assessments_import/exam_soft.rb index 481caec..e88c867 100644 --- a/lib/atomic_assessments_import/exam_soft.rb +++ b/lib/atomic_assessments_import/exam_soft.rb @@ -9,6 +9,9 @@ require_relative "exam_soft/extractor/question_stem_detector" require_relative "exam_soft/extractor/options_detector" require_relative "exam_soft/extractor/correct_answer_detector" +require_relative "exam_soft/extractor/metadata_detector" +require_relative "exam_soft/extractor/feedback_detector" +require_relative "exam_soft/extractor/question_type_detector" module AtomicAssessmentsImport module ExamSoft diff --git a/lib/atomic_assessments_import/exam_soft/extractor/feedback_detector.rb b/lib/atomic_assessments_import/exam_soft/extractor/feedback_detector.rb new file mode 100644 index 0000000..c95581e --- /dev/null +++ b/lib/atomic_assessments_import/exam_soft/extractor/feedback_detector.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +module AtomicAssessmentsImport + module ExamSoft + module Extractor + class FeedbackDetector + TILDE_PATTERN = /~\s*(.+)/m + LABEL_PATTERN = /\A\s*(?:Explanation|Rationale):\s*(.+)/im + + def initialize(nodes) + @nodes = nodes + end + + def detect + @nodes.each do |node| + text = node.text.strip + match = text.match(TILDE_PATTERN) + if match + feedback = match[1].strip + return feedback unless feedback.empty? + end + end + + @nodes.each do |node| + text = node.text.strip + match = text.match(LABEL_PATTERN) + return match[1].strip if match + end + + nil + end + end + end + end +end diff --git a/lib/atomic_assessments_import/exam_soft/extractor/metadata_detector.rb b/lib/atomic_assessments_import/exam_soft/extractor/metadata_detector.rb new file mode 100644 index 0000000..27892d5 --- /dev/null +++ b/lib/atomic_assessments_import/exam_soft/extractor/metadata_detector.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +module AtomicAssessmentsImport + module ExamSoft + module Extractor + class MetadataDetector + FOLDER_PATTERN = /Folder:\s*(.+?)(?=\s*(?:Title:|Category:|\d+[.)]))/i + TITLE_PATTERN = /Title:\s*(.+?)(?=\s*(?:Category:|\d+[.)]))/i + CATEGORY_PATTERN = /Category:\s*(.+?)(?=\s*\d+[.)]|\z)/i + TYPE_PATTERN = /Type:\s*(\S+)/i + + def initialize(nodes) + @nodes = nodes + end + + def detect + full_text = @nodes.map { |n| n.text.strip }.join(" ") + result = {} + + type_match = full_text.match(TYPE_PATTERN) + result[:type] = type_match[1].strip.downcase if type_match + + folder_match = full_text.match(FOLDER_PATTERN) + result[:folder] = folder_match[1].strip if folder_match + + title_match = full_text.match(TITLE_PATTERN) + result[:title] = title_match[1].strip if title_match + + category_match = full_text.match(CATEGORY_PATTERN) + result[:categories] = category_match[1].split(",").map(&:strip) if category_match + + result + end + end + end + end +end diff --git a/lib/atomic_assessments_import/exam_soft/extractor/question_type_detector.rb b/lib/atomic_assessments_import/exam_soft/extractor/question_type_detector.rb new file mode 100644 index 0000000..348bb5a --- /dev/null +++ b/lib/atomic_assessments_import/exam_soft/extractor/question_type_detector.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +module AtomicAssessmentsImport + module ExamSoft + module Extractor + class QuestionTypeDetector + TYPE_LABEL_PATTERN = /Type:\s*(.+?)(?=\s*(?:Folder:|Title:|Category:|\d+[.)]|\z))/i + + TYPE_MAP = { + /\Amcq?\z/i => "mcq", + /\Amultiple\s*choice\z/i => "mcq", + /\Ama\z/i => "ma", + /\Amultiple\s*(?:select|answer|response)\z/i => "ma", + /\Atrue[\s\/]*false\z/i => "true_false", + /\At\s*\/?\s*f\z/i => "true_false", + /\Aessay\z/i => "essay", + /\Along\s*answer\z/i => "essay", + /\Ashort\s*answer\z/i => "short_answer", + /\Afill[\s_-]*in[\s_-]*(?:the[\s_-]*)?blank\z/i => "fill_in_the_blank", + /\Acloze\z/i => "fill_in_the_blank", + /\Amatching\z/i => "matching", + /\Aorder(?:ing)?\z/i => "ordering", + }.freeze + + def initialize(nodes, has_options:) + @nodes = nodes + @has_options = has_options + end + + def detect + full_text = @nodes.map { |n| n.text.strip }.join(" ") + match = full_text.match(TYPE_LABEL_PATTERN) + + if match + type_text = match[1].strip + TYPE_MAP.each do |pattern, type| + return type if type_text.match?(pattern) + end + return type_text.downcase.gsub(/\s+/, "_") + end + + @has_options ? "mcq" : "short_answer" + end + end + end + end +end diff --git a/spec/atomic_assessments_import/examsoft/extractor/feedback_detector_spec.rb b/spec/atomic_assessments_import/examsoft/extractor/feedback_detector_spec.rb new file mode 100644 index 0000000..2d76e28 --- /dev/null +++ b/spec/atomic_assessments_import/examsoft/extractor/feedback_detector_spec.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +require "atomic_assessments_import" +require "nokogiri" + +RSpec.describe AtomicAssessmentsImport::ExamSoft::Extractor::FeedbackDetector do + def nodes_from(html) + Nokogiri::HTML.fragment(html).children.to_a + end + + describe "#detect" do + it "extracts feedback after tilde" do + nodes = nodes_from(<<~HTML) +

1) Question? ~ Paris is the capital.

+ HTML + result = described_class.new(nodes).detect + expect(result).to eq("Paris is the capital.") + end + + it "extracts feedback from Explanation: label" do + nodes = nodes_from(<<~HTML) +

1) Question?

+

Explanation: Paris is the capital of France.

+ HTML + result = described_class.new(nodes).detect + expect(result).to eq("Paris is the capital of France.") + end + + it "extracts feedback from Rationale: label" do + nodes = nodes_from(<<~HTML) +

1) Question?

+

Rationale: Paris is the capital of France.

+ HTML + result = described_class.new(nodes).detect + expect(result).to eq("Paris is the capital of France.") + end + + it "returns nil when no feedback found" do + nodes = nodes_from("

1) What is the capital of France?

") + result = described_class.new(nodes).detect + expect(result).to be_nil + end + end +end diff --git a/spec/atomic_assessments_import/examsoft/extractor/metadata_detector_spec.rb b/spec/atomic_assessments_import/examsoft/extractor/metadata_detector_spec.rb new file mode 100644 index 0000000..6199e3b --- /dev/null +++ b/spec/atomic_assessments_import/examsoft/extractor/metadata_detector_spec.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +require "atomic_assessments_import" +require "nokogiri" + +RSpec.describe AtomicAssessmentsImport::ExamSoft::Extractor::MetadataDetector do + def nodes_from(html) + Nokogiri::HTML.fragment(html).children.to_a + end + + describe "#detect" do + it "extracts folder, title, and category" do + nodes = nodes_from(<<~HTML) +

Folder: Geography Title: Question 1 Category: Subject/Capitals, Difficulty/Normal 1) Question?

+ HTML + result = described_class.new(nodes).detect + expect(result[:folder]).to eq("Geography") + expect(result[:title]).to eq("Question 1") + expect(result[:categories]).to eq(["Subject/Capitals", "Difficulty/Normal"]) + end + + it "extracts type when present" do + nodes = nodes_from(<<~HTML) +

Type: MA Folder: Geography Title: Question 1 Category: Subject/Capitals 1) Question?

+ HTML + result = described_class.new(nodes).detect + expect(result[:type]).to eq("ma") + expect(result[:folder]).to eq("Geography") + expect(result[:title]).to eq("Question 1") + expect(result[:categories]).to eq(["Subject/Capitals"]) + end + + it "returns empty hash when no metadata found" do + nodes = nodes_from("

1) What is the capital of France?

") + result = described_class.new(nodes).detect + expect(result).to eq({}) + end + end +end diff --git a/spec/atomic_assessments_import/examsoft/extractor/question_type_detector_spec.rb b/spec/atomic_assessments_import/examsoft/extractor/question_type_detector_spec.rb new file mode 100644 index 0000000..2a0d527 --- /dev/null +++ b/spec/atomic_assessments_import/examsoft/extractor/question_type_detector_spec.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +require "atomic_assessments_import" +require "nokogiri" + +RSpec.describe AtomicAssessmentsImport::ExamSoft::Extractor::QuestionTypeDetector do + def nodes_from(html) + Nokogiri::HTML.fragment(html).children.to_a + end + + describe "#detect" do + it "detects type from Type: label" do + nodes = nodes_from("

Type: MA Folder: Geography 1) Question?

") + result = described_class.new(nodes, has_options: true).detect + expect(result).to eq("ma") + end + + it "detects essay from Type: label" do + nodes = nodes_from("

Type: Essay Folder: Geography 1) Question?

") + result = described_class.new(nodes, has_options: false).detect + expect(result).to eq("essay") + end + + it "defaults to mcq when options are present" do + nodes = nodes_from("

1) Question?

") + result = described_class.new(nodes, has_options: true).detect + expect(result).to eq("mcq") + end + + it "defaults to short_answer when no options" do + nodes = nodes_from("

1) Question?

") + result = described_class.new(nodes, has_options: false).detect + expect(result).to eq("short_answer") + end + + it "detects true/false from Type: label" do + nodes = nodes_from("

Type: True/False 1) Question?

") + result = described_class.new(nodes, has_options: true).detect + expect(result).to eq("true_false") + end + + it "detects matching from Type: label" do + nodes = nodes_from("

Type: Matching 1) Question?

") + result = described_class.new(nodes, has_options: true).detect + expect(result).to eq("matching") + end + + it "detects ordering from Type: label" do + nodes = nodes_from("

Type: Ordering 1) Question?

") + result = described_class.new(nodes, has_options: true).detect + expect(result).to eq("ordering") + end + + it "detects fill_in_the_blank from Type: label" do + nodes = nodes_from("

Type: Fill in the Blank 1) Question?

") + result = described_class.new(nodes, has_options: false).detect + expect(result).to eq("fill_in_the_blank") + end + end +end From 4a97f957892c9002af901249d4e5ed949617987e Mon Sep 17 00:00:00 2001 From: Jacob Schwartz Date: Fri, 13 Feb 2026 14:51:41 -0700 Subject: [PATCH 16/30] feat: add Extractor orchestrator with field detection pipeline Co-Authored-By: Claude Opus 4.6 --- lib/atomic_assessments_import/exam_soft.rb | 1 + .../exam_soft/extractor.rb | 92 +++++++++++++++++++ .../examsoft/extractor_spec.rb | 91 ++++++++++++++++++ 3 files changed, 184 insertions(+) create mode 100644 lib/atomic_assessments_import/exam_soft/extractor.rb create mode 100644 spec/atomic_assessments_import/examsoft/extractor_spec.rb diff --git a/lib/atomic_assessments_import/exam_soft.rb b/lib/atomic_assessments_import/exam_soft.rb index e88c867..d6690cb 100644 --- a/lib/atomic_assessments_import/exam_soft.rb +++ b/lib/atomic_assessments_import/exam_soft.rb @@ -12,6 +12,7 @@ require_relative "exam_soft/extractor/metadata_detector" require_relative "exam_soft/extractor/feedback_detector" require_relative "exam_soft/extractor/question_type_detector" +require_relative "exam_soft/extractor" module AtomicAssessmentsImport module ExamSoft diff --git a/lib/atomic_assessments_import/exam_soft/extractor.rb b/lib/atomic_assessments_import/exam_soft/extractor.rb new file mode 100644 index 0000000..2bf026c --- /dev/null +++ b/lib/atomic_assessments_import/exam_soft/extractor.rb @@ -0,0 +1,92 @@ +# frozen_string_literal: true + +require_relative "extractor/question_stem_detector" +require_relative "extractor/options_detector" +require_relative "extractor/correct_answer_detector" +require_relative "extractor/metadata_detector" +require_relative "extractor/feedback_detector" +require_relative "extractor/question_type_detector" + +module AtomicAssessmentsImport + module ExamSoft + module Extractor + SUPPORTED_TYPES = %w[mcq ma true_false essay short_answer fill_in_the_blank matching ordering].freeze + OPTION_TYPES = %w[mcq ma true_false].freeze + + def self.extract(nodes) + warnings = [] + + # Run detectors + options = OptionsDetector.new(nodes).detect + has_options = !options.empty? + + metadata = MetadataDetector.new(nodes).detect + question_type = QuestionTypeDetector.new(nodes, has_options: has_options).detect + stem = QuestionStemDetector.new(nodes).detect + feedback = FeedbackDetector.new(nodes).detect + correct_answers = has_options ? CorrectAnswerDetector.new(nodes, options).detect : [] + + # Determine status + status = "published" + + unless SUPPORTED_TYPES.include?(question_type) + warnings << "Unsupported question type '#{question_type}', imported as draft" + status = "draft" + end + + if stem.nil? + warnings << "No question text found, imported as draft" + status = "draft" + end + + if OPTION_TYPES.include?(question_type) + if options.empty? + warnings << "No options found for #{question_type} question, imported as draft" + status = "draft" + end + if correct_answers.empty? + warnings << "No correct answer found, imported as draft" + status = "draft" + end + end + + # Build row_mock + row = { + "question id" => nil, + "folder" => metadata[:folder], + "title" => metadata[:title], + "category" => metadata[:categories] || [], + "import type" => nil, + "description" => nil, + "question text" => stem, + "question type" => question_type, + "stimulus review" => nil, + "instructor stimulus" => nil, + "correct answer" => correct_answers.join("; "), + "scoring type" => nil, + "points" => nil, + "distractor rationale" => nil, + "sample answer" => nil, + "acknowledgements" => nil, + "general feedback" => feedback, + "correct feedback" => nil, + "incorrect feedback" => nil, + "shuffle options" => nil, + "template" => question_type, + } + + # Add option keys + options.each_with_index do |opt, index| + letter = ("a".ord + index).chr + row["option #{letter}"] = opt[:text] + end + + { + row: row, + status: status, + warnings: warnings, + } + end + end + end +end diff --git a/spec/atomic_assessments_import/examsoft/extractor_spec.rb b/spec/atomic_assessments_import/examsoft/extractor_spec.rb new file mode 100644 index 0000000..702a76c --- /dev/null +++ b/spec/atomic_assessments_import/examsoft/extractor_spec.rb @@ -0,0 +1,91 @@ +# frozen_string_literal: true + +require "atomic_assessments_import" +require "nokogiri" + +RSpec.describe AtomicAssessmentsImport::ExamSoft::Extractor do + def nodes_from(html) + Nokogiri::HTML.fragment(html).children.to_a + end + + describe ".extract" do + it "extracts a complete MCQ question" do + nodes = nodes_from(<<~HTML) +

Folder: Geography Title: Question 1 Category: Subject/Capitals 1) What is the capital of France? ~ Paris is the capital.

+

*a) Paris

+

b) London

+

c) Berlin

+ HTML + result = described_class.extract(nodes) + + expect(result[:row]["question text"]).to eq("What is the capital of France?") + expect(result[:row]["option a"]).to eq("Paris") + expect(result[:row]["option b"]).to eq("London") + expect(result[:row]["option c"]).to eq("Berlin") + expect(result[:row]["correct answer"]).to eq("a") + expect(result[:row]["title"]).to eq("Question 1") + expect(result[:row]["folder"]).to eq("Geography") + expect(result[:row]["general feedback"]).to eq("Paris is the capital.") + expect(result[:row]["question type"]).to eq("mcq") + expect(result[:status]).to eq("published") + expect(result[:warnings]).to be_empty + end + + it "returns draft status when no correct answer" do + nodes = nodes_from(<<~HTML) +

1) What is the capital of France?

+

a) Paris

+

b) London

+ HTML + result = described_class.extract(nodes) + + expect(result[:status]).to eq("draft") + expect(result[:warnings]).to include(a_string_matching(/correct answer/i)) + end + + it "returns draft status when no question text found" do + nodes = nodes_from(<<~HTML) +

a) Paris

+

b) London

+ HTML + result = described_class.extract(nodes) + + expect(result[:status]).to eq("draft") + expect(result[:warnings]).to include(a_string_matching(/question text/i)) + end + + it "handles multiple correct answers for MA type" do + nodes = nodes_from(<<~HTML) +

Type: MA Folder: Geo Title: Q1 Category: Test 1) Pick capitals? ~ Explanation

+

*a) Paris

+

*b) Berlin

+

c) Detroit

+ HTML + result = described_class.extract(nodes) + + expect(result[:row]["correct answer"]).to eq("a; b") + expect(result[:row]["question type"]).to eq("ma") + end + + it "extracts essay questions without options" do + nodes = nodes_from(<<~HTML) +

Type: Essay Folder: Writing Title: Q1 Category: Test 1) Discuss the causes of WWI.

+ HTML + result = described_class.extract(nodes) + + expect(result[:row]["question type"]).to eq("essay") + expect(result[:row]["question text"]).to eq("Discuss the causes of WWI.") + expect(result[:status]).to eq("published") + end + + it "warns for unsupported question types but still imports" do + nodes = nodes_from(<<~HTML) +

Type: Hotspot 1) Identify the region on the map.

+ HTML + result = described_class.extract(nodes) + + expect(result[:status]).to eq("draft") + expect(result[:warnings]).to include(a_string_matching(/unsupported.*hotspot/i)) + end + end +end From 950e8f2902700442608e742865986c0565076d2f Mon Sep 17 00:00:00 2001 From: Jacob Schwartz Date: Fri, 13 Feb 2026 14:55:10 -0700 Subject: [PATCH 17/30] feat: add Essay and ShortAnswer question types Add Essay (longanswer) and ShortAnswer (shorttext) question type classes that inherit from Question. Update Question.load to dispatch essay, longanswer, short_answer, shorttext, and true_false question types. Also tighten the /ma/ regex to /^ma$/ to avoid false matches. Co-Authored-By: Claude Opus 4.6 --- .../questions/essay.rb | 20 ++++++++ .../questions/question.rb | 11 ++++- .../questions/short_answer.rb | 24 ++++++++++ .../questions/essay_spec.rb | 46 +++++++++++++++++++ .../questions/short_answer_spec.rb | 38 +++++++++++++++ 5 files changed, 138 insertions(+), 1 deletion(-) create mode 100644 lib/atomic_assessments_import/questions/essay.rb create mode 100644 lib/atomic_assessments_import/questions/short_answer.rb create mode 100644 spec/atomic_assessments_import/questions/essay_spec.rb create mode 100644 spec/atomic_assessments_import/questions/short_answer_spec.rb diff --git a/lib/atomic_assessments_import/questions/essay.rb b/lib/atomic_assessments_import/questions/essay.rb new file mode 100644 index 0000000..076a6cc --- /dev/null +++ b/lib/atomic_assessments_import/questions/essay.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +require_relative "question" + +module AtomicAssessmentsImport + module Questions + class Essay < Question + def question_type + "longanswer" + end + + def question_data + data = super + word_limit = @row["word_limit"]&.to_i + data[:max_length] = word_limit if word_limit && word_limit > 0 + data + end + end + end +end diff --git a/lib/atomic_assessments_import/questions/question.rb b/lib/atomic_assessments_import/questions/question.rb index d99c7ab..4d1a109 100644 --- a/lib/atomic_assessments_import/questions/question.rb +++ b/lib/atomic_assessments_import/questions/question.rb @@ -11,8 +11,14 @@ def initialize(row) def self.load(row) case row["question type"] - when nil, "", /multiple choice/i, /mcq/i, /ma/i # TODO - verify ma is fine here + when nil, "", /multiple choice/i, /mcq/i, /^ma$/i MultipleChoice.new(row) + when /true_false/i, /true\/false/i + MultipleChoice.new(row) + when /essay/i, /longanswer/i + Essay.new(row) + when /short_answer/i, /shorttext/i + ShortAnswer.new(row) else raise "Unknown question type #{row['question type']}" end @@ -82,3 +88,6 @@ def to_learnosity end end end + +require_relative "essay" +require_relative "short_answer" diff --git a/lib/atomic_assessments_import/questions/short_answer.rb b/lib/atomic_assessments_import/questions/short_answer.rb new file mode 100644 index 0000000..fe90372 --- /dev/null +++ b/lib/atomic_assessments_import/questions/short_answer.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +require_relative "question" + +module AtomicAssessmentsImport + module Questions + class ShortAnswer < Question + def question_type + "shorttext" + end + + def question_data + super.merge( + validation: { + valid_response: { + score: points, + value: @row["correct answer"] || "", + }, + } + ) + end + end + end +end diff --git a/spec/atomic_assessments_import/questions/essay_spec.rb b/spec/atomic_assessments_import/questions/essay_spec.rb new file mode 100644 index 0000000..abdf9c2 --- /dev/null +++ b/spec/atomic_assessments_import/questions/essay_spec.rb @@ -0,0 +1,46 @@ +# frozen_string_literal: true + +require "atomic_assessments_import" + +RSpec.describe AtomicAssessmentsImport::Questions::Essay do + let(:row) do + { + "question text" => "Discuss the causes of World War I.", + "question type" => "essay", + "general feedback" => "A good answer covers alliances, imperialism, and nationalism.", + "sample answer" => "World War I was caused by...", + "points" => "10", + } + end + + describe "#question_type" do + it "returns longanswer" do + question = described_class.new(row) + expect(question.question_type).to eq("longanswer") + end + end + + describe "#to_learnosity" do + it "returns correct structure" do + question = described_class.new(row) + result = question.to_learnosity + expect(result[:type]).to eq("longanswer") + expect(result[:widget_type]).to eq("response") + expect(result[:data][:stimulus]).to eq("Discuss the causes of World War I.") + end + + it "includes max_length when word limit specified" do + row["word_limit"] = "500" + question = described_class.new(row) + result = question.to_learnosity + expect(result[:data][:max_length]).to eq(500) + end + + it "sets metadata" do + question = described_class.new(row) + result = question.to_learnosity + expect(result[:data][:metadata][:sample_answer]).to eq("World War I was caused by...") + expect(result[:data][:metadata][:general_feedback]).to eq("A good answer covers alliances, imperialism, and nationalism.") + end + end +end diff --git a/spec/atomic_assessments_import/questions/short_answer_spec.rb b/spec/atomic_assessments_import/questions/short_answer_spec.rb new file mode 100644 index 0000000..fa60d05 --- /dev/null +++ b/spec/atomic_assessments_import/questions/short_answer_spec.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +require "atomic_assessments_import" + +RSpec.describe AtomicAssessmentsImport::Questions::ShortAnswer do + let(:row) do + { + "question text" => "What is the chemical symbol for water?", + "question type" => "short_answer", + "correct answer" => "H2O", + "points" => "1", + } + end + + describe "#question_type" do + it "returns shorttext" do + question = described_class.new(row) + expect(question.question_type).to eq("shorttext") + end + end + + describe "#to_learnosity" do + it "returns correct structure" do + question = described_class.new(row) + result = question.to_learnosity + expect(result[:type]).to eq("shorttext") + expect(result[:widget_type]).to eq("response") + expect(result[:data][:stimulus]).to eq("What is the chemical symbol for water?") + end + + it "includes validation with correct answer" do + question = described_class.new(row) + result = question.to_learnosity + expect(result[:data][:validation][:valid_response][:value]).to eq("H2O") + expect(result[:data][:validation][:valid_response][:score]).to eq(1) + end + end +end From 0939911ca8430ec5c8ab269e003702b922eeb98a Mon Sep 17 00:00:00 2001 From: Jacob Schwartz Date: Fri, 13 Feb 2026 14:58:09 -0700 Subject: [PATCH 18/30] feat: add FillInTheBlank, Matching, and Ordering question types Co-Authored-By: Claude Opus 4.6 --- .../questions/fill_in_the_blank.rb | 25 ++++++++++ .../questions/matching.rb | 42 ++++++++++++++++ .../questions/ordering.rb | 37 ++++++++++++++ .../questions/question.rb | 9 ++++ .../questions/fill_in_the_blank_spec.rb | 39 +++++++++++++++ .../questions/matching_spec.rb | 50 +++++++++++++++++++ .../questions/ordering_spec.rb | 39 +++++++++++++++ 7 files changed, 241 insertions(+) create mode 100644 lib/atomic_assessments_import/questions/fill_in_the_blank.rb create mode 100644 lib/atomic_assessments_import/questions/matching.rb create mode 100644 lib/atomic_assessments_import/questions/ordering.rb create mode 100644 spec/atomic_assessments_import/questions/fill_in_the_blank_spec.rb create mode 100644 spec/atomic_assessments_import/questions/matching_spec.rb create mode 100644 spec/atomic_assessments_import/questions/ordering_spec.rb diff --git a/lib/atomic_assessments_import/questions/fill_in_the_blank.rb b/lib/atomic_assessments_import/questions/fill_in_the_blank.rb new file mode 100644 index 0000000..99dda32 --- /dev/null +++ b/lib/atomic_assessments_import/questions/fill_in_the_blank.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require_relative "question" + +module AtomicAssessmentsImport + module Questions + class FillInTheBlank < Question + def question_type + "clozetext" + end + + def question_data + answers = (@row["correct answer"] || "").split(";").map(&:strip) + super.merge( + validation: { + valid_response: { + score: points, + value: answers, + }, + } + ) + end + end + end +end diff --git a/lib/atomic_assessments_import/questions/matching.rb b/lib/atomic_assessments_import/questions/matching.rb new file mode 100644 index 0000000..f5518f6 --- /dev/null +++ b/lib/atomic_assessments_import/questions/matching.rb @@ -0,0 +1,42 @@ +# frozen_string_literal: true + +require_relative "question" + +module AtomicAssessmentsImport + module Questions + class Matching < Question + INDEXES = ("a".."o").to_a.freeze + + def question_type + "association" + end + + def question_data + stimulus_list = [] + possible_responses = [] + valid_values = [] + + INDEXES.each do |letter| + option = @row["option #{letter}"] + match_val = @row["match #{letter}"] + break unless option + + stimulus_list << option + possible_responses << match_val if match_val + valid_values << match_val if match_val + end + + super.merge( + stimulus_list: stimulus_list, + possible_responses: possible_responses, + validation: { + valid_response: { + score: points, + value: valid_values, + }, + } + ) + end + end + end +end diff --git a/lib/atomic_assessments_import/questions/ordering.rb b/lib/atomic_assessments_import/questions/ordering.rb new file mode 100644 index 0000000..160100d --- /dev/null +++ b/lib/atomic_assessments_import/questions/ordering.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +require_relative "question" + +module AtomicAssessmentsImport + module Questions + class Ordering < Question + INDEXES = ("a".."o").to_a.freeze + + def question_type + "orderlist" + end + + def question_data + items = [] + INDEXES.each do |letter| + option = @row["option #{letter}"] + break unless option + items << option + end + + order = (@row["correct answer"] || "").split(";").map(&:strip).map(&:downcase) + valid_values = order.filter_map { |letter| INDEXES.find_index(letter)&.to_s } + + super.merge( + list: items, + validation: { + valid_response: { + score: points, + value: valid_values, + }, + } + ) + end + end + end +end diff --git a/lib/atomic_assessments_import/questions/question.rb b/lib/atomic_assessments_import/questions/question.rb index 4d1a109..a71eb43 100644 --- a/lib/atomic_assessments_import/questions/question.rb +++ b/lib/atomic_assessments_import/questions/question.rb @@ -19,6 +19,12 @@ def self.load(row) Essay.new(row) when /short_answer/i, /shorttext/i ShortAnswer.new(row) + when /fill_in_the_blank/i, /cloze/i + FillInTheBlank.new(row) + when /matching/i, /association/i + Matching.new(row) + when /ordering/i, /orderlist/i + Ordering.new(row) else raise "Unknown question type #{row['question type']}" end @@ -91,3 +97,6 @@ def to_learnosity require_relative "essay" require_relative "short_answer" +require_relative "fill_in_the_blank" +require_relative "matching" +require_relative "ordering" diff --git a/spec/atomic_assessments_import/questions/fill_in_the_blank_spec.rb b/spec/atomic_assessments_import/questions/fill_in_the_blank_spec.rb new file mode 100644 index 0000000..a7bdc4c --- /dev/null +++ b/spec/atomic_assessments_import/questions/fill_in_the_blank_spec.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +require "atomic_assessments_import" + +RSpec.describe AtomicAssessmentsImport::Questions::FillInTheBlank do + let(:row) do + { + "question text" => "The capital of France is {{response}}.", + "question type" => "fill_in_the_blank", + "correct answer" => "Paris", + "points" => "1", + } + end + + describe "#question_type" do + it "returns clozetext" do + question = described_class.new(row) + expect(question.question_type).to eq("clozetext") + end + end + + describe "#to_learnosity" do + it "returns correct structure" do + question = described_class.new(row) + result = question.to_learnosity + expect(result[:type]).to eq("clozetext") + expect(result[:widget_type]).to eq("response") + expect(result[:data][:stimulus]).to eq("The capital of France is {{response}}.") + end + + it "includes validation with correct answers array" do + row["correct answer"] = "Paris; Lyon; Marseille" + question = described_class.new(row) + result = question.to_learnosity + expect(result[:data][:validation][:valid_response][:value]).to eq(["Paris", "Lyon", "Marseille"]) + expect(result[:data][:validation][:valid_response][:score]).to eq(1) + end + end +end diff --git a/spec/atomic_assessments_import/questions/matching_spec.rb b/spec/atomic_assessments_import/questions/matching_spec.rb new file mode 100644 index 0000000..003f04c --- /dev/null +++ b/spec/atomic_assessments_import/questions/matching_spec.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true + +require "atomic_assessments_import" + +RSpec.describe AtomicAssessmentsImport::Questions::Matching do + let(:row) do + { + "question text" => "Match the countries to their capitals.", + "question type" => "matching", + "option a" => "France", + "option b" => "Germany", + "option c" => "Spain", + "match a" => "Paris", + "match b" => "Berlin", + "match c" => "Madrid", + "points" => "3", + } + end + + describe "#question_type" do + it "returns association" do + question = described_class.new(row) + expect(question.question_type).to eq("association") + end + end + + describe "#to_learnosity" do + it "returns correct structure" do + question = described_class.new(row) + result = question.to_learnosity + expect(result[:type]).to eq("association") + expect(result[:widget_type]).to eq("response") + expect(result[:data][:stimulus]).to eq("Match the countries to their capitals.") + end + + it "has correct stimulus_list and possible_responses lengths" do + question = described_class.new(row) + result = question.to_learnosity + expect(result[:data][:stimulus_list]).to eq(["France", "Germany", "Spain"]) + expect(result[:data][:possible_responses]).to eq(["Paris", "Berlin", "Madrid"]) + end + + it "includes validation with correct match values" do + question = described_class.new(row) + result = question.to_learnosity + expect(result[:data][:validation][:valid_response][:value]).to eq(["Paris", "Berlin", "Madrid"]) + expect(result[:data][:validation][:valid_response][:score]).to eq(3) + end + end +end diff --git a/spec/atomic_assessments_import/questions/ordering_spec.rb b/spec/atomic_assessments_import/questions/ordering_spec.rb new file mode 100644 index 0000000..d4095b0 --- /dev/null +++ b/spec/atomic_assessments_import/questions/ordering_spec.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +require "atomic_assessments_import" + +RSpec.describe AtomicAssessmentsImport::Questions::Ordering do + let(:row) do + { + "question text" => "Arrange these events in chronological order.", + "question type" => "ordering", + "option a" => "World War I", + "option b" => "World War II", + "option c" => "Cold War", + "correct answer" => "a; b; c", + "points" => "3", + } + end + + describe "#question_type" do + it "returns orderlist" do + question = described_class.new(row) + expect(question.question_type).to eq("orderlist") + end + end + + describe "#to_learnosity" do + it "has correct list items" do + question = described_class.new(row) + result = question.to_learnosity + expect(result[:data][:list]).to eq(["World War I", "World War II", "Cold War"]) + end + + it "includes validation with correct order indices" do + question = described_class.new(row) + result = question.to_learnosity + expect(result[:data][:validation][:valid_response][:value]).to eq(["0", "1", "2"]) + expect(result[:data][:validation][:valid_response][:score]).to eq(3) + end + end +end From c103dda434c7be342bfbf821ee0211423dd85f70 Mon Sep 17 00:00:00 2001 From: Jacob Schwartz Date: Sat, 14 Feb 2026 12:45:06 -0700 Subject: [PATCH 19/30] refactor: rewrite ExamSoft converter to use chunker + extractor pipeline Replace the monolithic ExamSoft converter with a pipeline that: 1. Normalizes input to HTML via Pandoc 2. Chunks the document into per-question segments 3. Extracts fields (stem, options, answers, metadata, feedback) per chunk 4. Builds Learnosity items/questions from extracted data 5. Collects warnings in :errors array instead of raising Key fixes: - Clean embedded newlines from stems and feedback text - Set template to nil (not question type) to avoid ui_style errors - Update specs to expect warnings instead of raised errors - Fix HTML spec option-removal regex to use [^<] instead of [^\}] Co-Authored-By: Claude Opus 4.6 --- .../chunker/numbered_question_strategy.rb | 2 +- .../exam_soft/converter.rb | 189 ++++++++---------- .../exam_soft/extractor.rb | 2 +- .../exam_soft/extractor/feedback_detector.rb | 6 +- .../extractor/question_stem_detector.rb | 6 +- .../examsoft/docx_converter_spec.rb | 15 +- .../examsoft/html_converter_spec.rb | 23 +-- .../examsoft/rtf_converter_spec.rb | 18 +- 8 files changed, 115 insertions(+), 146 deletions(-) diff --git a/lib/atomic_assessments_import/exam_soft/chunker/numbered_question_strategy.rb b/lib/atomic_assessments_import/exam_soft/chunker/numbered_question_strategy.rb index 93f0d45..16fdde0 100644 --- a/lib/atomic_assessments_import/exam_soft/chunker/numbered_question_strategy.rb +++ b/lib/atomic_assessments_import/exam_soft/chunker/numbered_question_strategy.rb @@ -6,7 +6,7 @@ module AtomicAssessmentsImport module ExamSoft module Chunker class NumberedQuestionStrategy < Strategy - # Matches "1)" or "1." or "12)" etc. at start of text, but NOT single letters like "a)" + # Matches "1)" or "1." or "12)" etc. at start of text, but NOT single letters like "a)" because those are used for options, not question numbering NUMBERED_PATTERN = /\A\s*(\d+)\s*[.)]/ def split(doc) diff --git a/lib/atomic_assessments_import/exam_soft/converter.rb b/lib/atomic_assessments_import/exam_soft/converter.rb index 15a9a1e..c9191c5 100644 --- a/lib/atomic_assessments_import/exam_soft/converter.rb +++ b/lib/atomic_assessments_import/exam_soft/converter.rb @@ -1,131 +1,100 @@ # frozen_string_literal: true require "pandoc-ruby" +require "nokogiri" require "active_support/core_ext/digest/uuid" require_relative "../questions/question" require_relative "../questions/multiple_choice" +require_relative "../questions/essay" +require_relative "../questions/short_answer" +require_relative "../questions/fill_in_the_blank" +require_relative "../questions/matching" +require_relative "../questions/ordering" require_relative "../utils" +require_relative "chunker" +require_relative "extractor" module AtomicAssessmentsImport module ExamSoft class Converter - def initialize(file) @file = file end - def convert - # Step 1: Parse the ExamSoft file to HTML using Pandoc to formalize the structure - if @file.is_a?(String) - html = PandocRuby.new([@file], from: @file.split('.').last).to_html - else # If @file is not a string, we assume it's a Tempfile or similar object that PandocRuby can read from directly - # Just grab the text following the last . to determine the format for Pandoc. This is a bit of a hack but it allows us to handle Tempfile objects that don't have a path method. - source_type = @file.path.split('.').last.match(/^[a-zA-Z]+/)[0] # Remove any non-alphanumeric characters to get a clean source type for Pandoc - html = PandocRuby.new(@file.read, from: source_type).to_html - end - - # html = PandocRuby.new([@file], from: @file.split('.').last).to_html + html = normalize_to_html + doc = Nokogiri::HTML.fragment(html) + + # Chunk the document + chunk_result = Chunker.chunk(doc) + all_warnings = chunk_result[:warnings].dup - # Step 2: Extract questions and convert them into AA format + # Log header info if present + unless chunk_result[:header_nodes].empty? + header_text = chunk_result[:header_nodes].map { |n| n.text.strip }.join(" ") + all_warnings << "Exam header detected: #{header_text}" unless header_text.empty? + end items = [] questions = [] + chunk_result[:chunks].each_with_index do |chunk_nodes, index| + # Extract fields from this chunk + extraction = Extractor.extract(chunk_nodes) + all_warnings.concat(extraction[:warnings].map { |w| "Question #{index + 1}: #{w}" }) - # Chunking Regex (The "Slicer") for ExamSoft format - splits at each question block - chunk_pattern = /

(?:Type:.*?)?Folder:.*?(?=

(?:Type:.*?)?Folder:|\z)/m - - # 2. Field Extraction Regexes - meta_regex = /(?:Type:\s*(?[^<]+?)\s*)?Folder:\s*(?[^<]+?)\s*Title:\s*(?[^<]+?)\s*Category:\s*(?<category>.+?)\s*(?=\d+\))/m - question_regex = /\d+\)\s*(?<question>.+?)\s*~/m - explanation_regex = /~\s*(?<explanation>.+?)(?=<\/p>)/m - options_regex = /<p>(?<marker>\*?)(?<letter>[a-o])\)\s*(?<text>.+?)<\/p>/ - - parsed_questions = [] - - chunks = html.scan(chunk_pattern) - chunks.each do |chunk| - clean_chunk = chunk.gsub(/\n/, " ").gsub(/\s+/, " ") - - meta = clean_chunk.match(meta_regex) - q_text = clean_chunk.match(question_regex) - expl = clean_chunk.match(explanation_regex) - raw_options = chunk.scan(options_regex) - - # Validate that we have options - raise "Missing options" if raw_options.empty? - - # Identify ALL indices where the marker is '*' to denote correct answers - # We use .map { |i| i + 1 } to convert 0-index to 1-index numbers - correct_indices = raw_options.each_index.select { |i| raw_options[i][0] == "*" }.map { |i| i + 1 } - - type = meta && meta[:type] ? meta[:type].strip.downcase : "standard" # This is for the "template" field in AA, but ExamSoft RTF doesn't seem to have a direct equivalent, so we can use the "Type" field if it exists or default to "standard". - folder = meta ? meta[:folder].strip : nil - title = meta ? meta[:title].strip : nil - categories = meta ? meta[:category].split(",").map(&:strip) : [] - question = q_text ? q_text[:question].strip : nil - explanation = expl ? expl[:explanation].strip : nil - answer_options = raw_options.map { |opt| opt[2].strip } - correct_answer_indices = correct_indices - - # Note: a lot of these are nil because ExamSoft RTF doesn't have all the same fields as CSV. - # They're listed here to show what is being mapped where possible. - row_mock = { - "question id" => nil, - "folder" => folder, - "title" => title, - "category" => categories, - "import type" => nil, - "description" => nil, - "question text" => question, - "question type" => "mcq", # We are treating all questions as multiple choice for now since that's the only type we have in our fixture. We could potentially add logic to determine question type based on the presence of certain fields or patterns in the question text. - "stimulus review" => nil, - "instructor stimulus" => nil, - "correct answer" => correct_answer_indices.map { |i| ("a".ord + i - 1).chr }.join("; "), - "scoring type" => nil, - "points" => nil, - "distractor rationale" => nil, - "sample answer" => nil, - "acknowledgements" => nil, - "general feedback" => explanation, - "correct feedback" => nil, - "incorrect feedback" => nil, - "shuffle options" => nil, - "template" => type, - } - - # Add option keys for the MultipleChoice class - answer_options.each_with_index do |option_text, index| - option_letter = ("a".ord + index).chr - row_mock["option #{option_letter}"] = option_text - end - - item, question_widgets = convert_row(row_mock) - - items << item - questions += question_widgets - rescue StandardError => e - raise e, "Error processing title \"#{title}\": #{e.message}" + row = extraction[:row] + status = extraction[:status] + + # Skip completely unparseable chunks + if row["question text"].nil? && row["option a"].nil? + all_warnings << "Question #{index + 1}: Skipped — no usable content found" + next + end + + begin + item, question_widgets = convert_row(row, status) + items << item + questions += question_widgets + rescue StandardError => e + title = row["title"] || "Question #{index + 1}" + all_warnings << "#{title}: #{e.message}, imported as draft" + begin + item, question_widgets = convert_row_minimal(row) + items << item + questions += question_widgets + rescue StandardError + all_warnings << "#{title}: Could not import even minimally, skipped" + end + end end { activities: [], - items:, - questions:, + items: items, + questions: questions, features: [], - errors: [], + errors: all_warnings, } end private + def normalize_to_html + if @file.is_a?(String) + PandocRuby.new([@file], from: @file.split(".").last).to_html + else + source_type = @file.path.split(".").last.match(/^[a-zA-Z]+/)[0] + PandocRuby.new(@file.read, from: source_type).to_html + end + end + def categories_to_tags(categories) tags = {} - categories.each do |cat| + (categories || []).each do |cat| if cat.include?("/") - key, value = cat.split("/", 2).map(&:strip) # TODO: deal with multiple slashes? - It could be Tag name/Value/Value2/... Right now it just splits at the first slash and treats the rest as the value. + key, value = cat.split("/", 2).map(&:strip) tags[key.to_sym] ||= [] tags[key.to_sym] << value else @@ -135,29 +104,21 @@ def categories_to_tags(categories) tags end - def convert_row(row) - # The csv files had a column for question id, but ExamSoft rtf files does not seem to have that. + def convert_row(row, status = "published") source = "<p>ExamSoft Import on #{Time.now.strftime('%Y-%m-%d')}</p>\n" if row["question id"].present? source += "<p>External id: #{row['question id']}</p>\n" end - question = Questions::Question.load(row) item = { reference: SecureRandom.uuid, title: row["title"] || "", - status: "published", - tags: categories_to_tags(row["category"] || []), + status: status, + tags: categories_to_tags(row["category"]), metadata: { import_date: Time.now.iso8601, import_type: row["import_type"] || "examsoft", - - # **{ # TODO: decide about this section - what is the external id domain? Do we need alignment URLs from ExamSoft RTF? - # external_id: row["question id"], - # external_id_domain: row["question id"].present? ? "examsoft" : nil, - # alignment: nil # alignment_urls(row) - # }.compact, }, source: source, description: row["description"] || "", @@ -174,13 +135,31 @@ def convert_row(row) reference: question.reference, widget_type: "response", }, - ] + ], }, } [item, [question.to_learnosity]] end - + def convert_row_minimal(row) + reference = SecureRandom.uuid + item = { + reference: reference, + title: row["title"] || "", + status: "draft", + tags: {}, + metadata: { + import_date: Time.now.iso8601, + import_type: "examsoft", + }, + source: "<p>ExamSoft Import on #{Time.now.strftime('%Y-%m-%d')}</p>\n", + description: row["question text"] || "", + questions: [], + features: [], + definition: { widgets: [] }, + } + [item, []] + end end end end diff --git a/lib/atomic_assessments_import/exam_soft/extractor.rb b/lib/atomic_assessments_import/exam_soft/extractor.rb index 2bf026c..8351853 100644 --- a/lib/atomic_assessments_import/exam_soft/extractor.rb +++ b/lib/atomic_assessments_import/exam_soft/extractor.rb @@ -72,7 +72,7 @@ def self.extract(nodes) "correct feedback" => nil, "incorrect feedback" => nil, "shuffle options" => nil, - "template" => question_type, + "template" => nil, } # Add option keys diff --git a/lib/atomic_assessments_import/exam_soft/extractor/feedback_detector.rb b/lib/atomic_assessments_import/exam_soft/extractor/feedback_detector.rb index c95581e..eba3b2b 100644 --- a/lib/atomic_assessments_import/exam_soft/extractor/feedback_detector.rb +++ b/lib/atomic_assessments_import/exam_soft/extractor/feedback_detector.rb @@ -16,7 +16,7 @@ def detect text = node.text.strip match = text.match(TILDE_PATTERN) if match - feedback = match[1].strip + feedback = match[1].gsub(/\s+/, " ").strip return feedback unless feedback.empty? end end @@ -24,7 +24,9 @@ def detect @nodes.each do |node| text = node.text.strip match = text.match(LABEL_PATTERN) - return match[1].strip if match + if match + return match[1].gsub(/\s+/, " ").strip + end end nil diff --git a/lib/atomic_assessments_import/exam_soft/extractor/question_stem_detector.rb b/lib/atomic_assessments_import/exam_soft/extractor/question_stem_detector.rb index 5ad4f77..0efb725 100644 --- a/lib/atomic_assessments_import/exam_soft/extractor/question_stem_detector.rb +++ b/lib/atomic_assessments_import/exam_soft/extractor/question_stem_detector.rb @@ -25,8 +25,8 @@ def detect # Strip metadata prefixes and numbered prefix together # e.g. "Folder: Geo Title: Q1 Category: Test 1) What is the capital?" - if text.match?(/\d+[.)]/) - text = text.sub(/\A.*?\d+[.)]\s*/, "") + if text.match?(/\d+[.)]/m) + text = text.sub(/\A.*?\d+[.)]\s*/m, "") else # Strip standalone metadata labels if present (Folder:, Title:, Category:, Type:) text = text.sub(/\A\s*(?:(?:Folder|Title|Category|Type):\s*\S+\s*)*/, "") @@ -35,7 +35,7 @@ def detect # Split on tilde and take the first part (remove explanation) text = text.split("~").first - text = text&.strip + text = text&.gsub(/\s+/, " ")&.strip text.nil? || text.empty? ? nil : text end end diff --git a/spec/atomic_assessments_import/examsoft/docx_converter_spec.rb b/spec/atomic_assessments_import/examsoft/docx_converter_spec.rb index f9ae18d..e4472d9 100644 --- a/spec/atomic_assessments_import/examsoft/docx_converter_spec.rb +++ b/spec/atomic_assessments_import/examsoft/docx_converter_spec.rb @@ -194,27 +194,24 @@ # end.to raise_error(StandardError, "Unknown column: Color") # end - it "raises if no options are given" do + it "warns if no options are given" do no_options = Tempfile.new("temp.docx") - # Copy the original DOCX content and remove the options original_content = File.read("spec/fixtures/no_options.docx") no_options.write(original_content) no_options.rewind - expect do - described_class.new(no_options).convert - end.to raise_error(StandardError, /Missing options/) + data = described_class.new(no_options).convert + expect(data[:errors]).to include(a_string_matching(/no options|missing options/i)) end - it "raises if no correct answer is given" do + it "warns if no correct answer is given" do no_correct = Tempfile.new("temp.docx") original_content = File.read("spec/fixtures/no_correct.docx") no_correct.write(original_content) no_correct.rewind - expect do - described_class.new(no_correct).convert - end.to raise_error(StandardError, /Missing correct answer/) + data = described_class.new(no_correct).convert + expect(data[:errors]).to include(a_string_matching(/correct answer/i)) end end end diff --git a/spec/atomic_assessments_import/examsoft/html_converter_spec.rb b/spec/atomic_assessments_import/examsoft/html_converter_spec.rb index ca341bf..b9a5fef 100644 --- a/spec/atomic_assessments_import/examsoft/html_converter_spec.rb +++ b/spec/atomic_assessments_import/examsoft/html_converter_spec.rb @@ -194,32 +194,29 @@ # end.to raise_error(StandardError, "Unknown column: Color") # end - it "raises if no options are given" do + it "warns if no options are given" do modified_file = Tempfile.new("modified.html") - # Copy the original content and remove the options original_content = File.read("spec/fixtures/simple.html") - # Remove lines that look like options (e.g., "a) Paris", "b) Versailles", etc.) while keeping the rest of the content intact. This regex looks for lines that start with a letter followed by a parenthesis and some text, which is the typical format for options in ExamSoft RTF exports. - modified_content = original_content.gsub(/[a-oA-O]\)\s*[^\}]*/, "") + # Remove option lines (e.g., "*a) Paris" or "b) Versailles") from the HTML. + # In HTML, options appear as text within <p> tags, so we remove lines + # matching the option pattern up to the next tag boundary. + modified_content = original_content.gsub(/\*?[a-oA-O]\)\s*[^<]*/, "") modified_file.write(modified_content) modified_file.rewind - expect do - described_class.new(modified_file).convert - end.to raise_error(StandardError, /Missing options/) + data = described_class.new(modified_file).convert + expect(data[:errors]).to include(a_string_matching(/no options|missing options/i)) end - it "raises if no correct answer is given" do + it "warns if no correct answer is given" do modified_file = Tempfile.new("temp.html") - # Copy the original RTF content and remove only the asterisks marking correct answers original_content = File.read("spec/fixtures/simple.html") - # Remove the asterisks (*) that mark correct answers, keeping the options modified_content = original_content.gsub(/\*([a-oA-O]\))/, '\1') modified_file.write(modified_content) modified_file.rewind - expect do - described_class.new(modified_file).convert - end.to raise_error(StandardError, /Missing correct answer/) + data = described_class.new(modified_file).convert + expect(data[:errors]).to include(a_string_matching(/correct answer/i)) end end end diff --git a/spec/atomic_assessments_import/examsoft/rtf_converter_spec.rb b/spec/atomic_assessments_import/examsoft/rtf_converter_spec.rb index 2a221e9..e932b75 100644 --- a/spec/atomic_assessments_import/examsoft/rtf_converter_spec.rb +++ b/spec/atomic_assessments_import/examsoft/rtf_converter_spec.rb @@ -195,32 +195,26 @@ # end.to raise_error(StandardError, "Unknown column: Color") # end - it "raises if no options are given" do + it "warns if no options are given" do modified_rtf_file = Tempfile.new("modified.rtf") - # Copy the original RTF content and remove the options original_content = File.read("spec/fixtures/simple.rtf") - # Remove lines that look like options (e.g., "a) Paris", "b) Versailles", etc.) while keeping the rest of the content intact. This regex looks for lines that start with a letter followed by a parenthesis and some text, which is the typical format for options in ExamSoft RTF exports. modified_content = original_content.gsub(/[a-oA-O]\)\s*[^\}]*/, "") modified_rtf_file.write(modified_content) modified_rtf_file.rewind - expect do - described_class.new(modified_rtf_file).convert - end.to raise_error(StandardError, /Missing options/) + data = described_class.new(modified_rtf_file).convert + expect(data[:errors]).to include(a_string_matching(/no options|missing options/i)) end - it "raises if no correct answer is given" do + it "warns if no correct answer is given" do modified_rtf_file = Tempfile.new("temp.rtf") - # Copy the original RTF content and remove only the asterisks marking correct answers original_content = File.read("spec/fixtures/simple.rtf") - # Remove the asterisks (*) that mark correct answers, keeping the options modified_content = original_content.gsub(/\*([a-oA-O]\))/, '\1') modified_rtf_file.write(modified_content) modified_rtf_file.rewind - expect do - described_class.new(modified_rtf_file).convert - end.to raise_error(StandardError, /Missing correct answer/) + data = described_class.new(modified_rtf_file).convert + expect(data[:errors]).to include(a_string_matching(/correct answer/i)) end end end From 5c3779d9245d749cce400cbab546b9753a3c180a Mon Sep 17 00:00:00 2001 From: Jacob Schwartz <jaschwartz27@gmail.com> Date: Sat, 14 Feb 2026 12:48:23 -0700 Subject: [PATCH 20/30] test: add integration tests for mixed types, messy docs, backward compat Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --- .../examsoft/integration_spec.rb | 70 +++++++++++++++++++ spec/fixtures/messy_document.html | 9 +++ spec/fixtures/mixed_types.html | 15 ++++ 3 files changed, 94 insertions(+) create mode 100644 spec/atomic_assessments_import/examsoft/integration_spec.rb create mode 100644 spec/fixtures/messy_document.html create mode 100644 spec/fixtures/mixed_types.html diff --git a/spec/atomic_assessments_import/examsoft/integration_spec.rb b/spec/atomic_assessments_import/examsoft/integration_spec.rb new file mode 100644 index 0000000..647e459 --- /dev/null +++ b/spec/atomic_assessments_import/examsoft/integration_spec.rb @@ -0,0 +1,70 @@ +# frozen_string_literal: true + +require "atomic_assessments_import" + +RSpec.describe "ExamSoft Integration" do + describe "mixed question types" do + it "handles a document with MCQ, essay, and MA questions" do + data = AtomicAssessmentsImport::ExamSoft::Converter.new("spec/fixtures/mixed_types.html").convert + + expect(data[:items].length).to eq(4) + + # MCQ question + q1 = data[:questions].find { |q| q[:data][:stimulus]&.include?("powerhouse") } + expect(q1).not_to be_nil + expect(q1[:type]).to eq("mcq") + + # Essay question + q2 = data[:questions].find { |q| q[:data][:stimulus]&.include?("Hamlet") } + expect(q2).not_to be_nil + expect(q2[:type]).to eq("longanswer") + + # MA question + q3 = data[:questions].find { |q| q[:data][:stimulus]&.include?("European capitals") } + expect(q3).not_to be_nil + expect(q3[:type]).to eq("mcq") # MA maps to mcq with multiple_responses + + # Another MCQ + q4 = data[:questions].find { |q| q[:data][:stimulus]&.include?("chemical symbol") } + expect(q4).not_to be_nil + expect(q4[:type]).to eq("mcq") + end + + it "reports exam header in warnings" do + data = AtomicAssessmentsImport::ExamSoft::Converter.new("spec/fixtures/mixed_types.html").convert + expect(data[:errors]).to include(a_string_matching(/header/i)) + end + end + + describe "messy documents with partial parse" do + it "imports what it can and warns about problems" do + data = AtomicAssessmentsImport::ExamSoft::Converter.new("spec/fixtures/messy_document.html").convert + + # Should get at least 2 good items (Q1 and Q3 have options) + published = data[:items].select { |i| i[:status] == "published" } + expect(published.length).to be >= 2 + + # Should have warnings about Q2 (no options for what looks like MCQ) + expect(data[:errors].length).to be > 0 + end + end + + describe "backward compatibility" do + it "produces the same structure from simple.html as before" do + data = AtomicAssessmentsImport::ExamSoft::Converter.new("spec/fixtures/simple.html").convert + + expect(data[:items].length).to eq(3) + expect(data[:questions].length).to eq(3) + expect(data[:activities]).to eq([]) + expect(data[:features]).to eq([]) + + item1 = data[:items].find { |i| i[:title] == "Question 1" } + expect(item1).not_to be_nil + expect(item1[:status]).to eq("published") + + q1 = data[:questions].find { |q| q[:data][:stimulus] == "What is the capital of France?" } + expect(q1).not_to be_nil + expect(q1[:data][:options].length).to eq(3) + end + end +end diff --git a/spec/fixtures/messy_document.html b/spec/fixtures/messy_document.html new file mode 100644 index 0000000..54a48b8 --- /dev/null +++ b/spec/fixtures/messy_document.html @@ -0,0 +1,9 @@ +<p>Some random header text</p> +<p></p> +<p>Folder: Test Title: Q1 Category: General 1) A normal question? ~ Normal explanation</p> +<p>*a) Correct</p> +<p>b) Wrong</p> +<p>Folder: Test Title: Q2 Category: General 2) A question with no options at all</p> +<p>Folder: Test Title: Q3 Category: General 3) Another normal question? ~ Another explanation</p> +<p>*a) Right</p> +<p>b) Wrong</p> diff --git a/spec/fixtures/mixed_types.html b/spec/fixtures/mixed_types.html new file mode 100644 index 0000000..846d2ed --- /dev/null +++ b/spec/fixtures/mixed_types.html @@ -0,0 +1,15 @@ +<p>Exam: Midterm 2024</p> +<p>Total Questions: 4</p> +<p>Folder: Science Title: Q1 Category: Biology/Cells 1) What is the powerhouse of the cell? ~ The mitochondria produces ATP.</p> +<p>*a) Mitochondria</p> +<p>b) Nucleus</p> +<p>c) Ribosome</p> +<p>Type: Essay Folder: Writing Title: Q2 Category: English/Composition 2) Discuss the themes of Hamlet.</p> +<p>Type: MA Folder: Geography Title: Q3 Category: Capitals 3) Select all European capitals.</p> +<p>*a) Paris</p> +<p>*b) Berlin</p> +<p>c) New York</p> +<p>Folder: Science Title: Q4 Category: Chemistry 4) What is the chemical symbol for gold?</p> +<p>*a) Au</p> +<p>b) Ag</p> +<p>c) Fe</p> From 8bb55482416955f53d71b0c21ec2e827305c5807 Mon Sep 17 00:00:00 2001 From: Jacob Schwartz <jaschwartz27@gmail.com> Date: Sat, 14 Feb 2026 12:52:58 -0700 Subject: [PATCH 21/30] chore: cleanup after ExamSoft converter refactor Remove commented-out code, consolidate redundant require_relative statements in exam_soft.rb, and apply safe rubocop auto-corrections (modifier if/unless, %r regexp literals, safe navigation, etc). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --- lib/atomic_assessments_import.rb | 1 - lib/atomic_assessments_import/exam_soft.rb | 14 ++------------ .../chunker/horizontal_rule_split_strategy.rb | 4 +--- .../exam_soft/converter.rb | 4 +--- .../exam_soft/extractor/feedback_detector.rb | 4 +--- .../exam_soft/extractor/question_stem_detector.rb | 12 ++++++------ .../exam_soft/extractor/question_type_detector.rb | 4 ++-- lib/atomic_assessments_import/questions/essay.rb | 2 +- .../questions/multiple_choice.rb | 2 +- .../questions/ordering.rb | 1 + .../questions/question.rb | 3 +-- 11 files changed, 17 insertions(+), 34 deletions(-) diff --git a/lib/atomic_assessments_import.rb b/lib/atomic_assessments_import.rb index cf9a83c..acd9049 100644 --- a/lib/atomic_assessments_import.rb +++ b/lib/atomic_assessments_import.rb @@ -8,7 +8,6 @@ require_relative "atomic_assessments_import/writer" require_relative "atomic_assessments_import/export" require_relative "atomic_assessments_import/exam_soft" -require_relative "atomic_assessments_import/exam_soft/chunker" module AtomicAssessmentsImport class Error < StandardError; end diff --git a/lib/atomic_assessments_import/exam_soft.rb b/lib/atomic_assessments_import/exam_soft.rb index d6690cb..e46d810 100644 --- a/lib/atomic_assessments_import/exam_soft.rb +++ b/lib/atomic_assessments_import/exam_soft.rb @@ -1,18 +1,8 @@ # frozen_string_literal: true -require_relative "exam_soft/converter" -require_relative "exam_soft/chunker/strategy" -require_relative "exam_soft/chunker/metadata_marker_strategy" -require_relative "exam_soft/chunker/numbered_question_strategy" -require_relative "exam_soft/chunker/heading_split_strategy" -require_relative "exam_soft/chunker/horizontal_rule_split_strategy" -require_relative "exam_soft/extractor/question_stem_detector" -require_relative "exam_soft/extractor/options_detector" -require_relative "exam_soft/extractor/correct_answer_detector" -require_relative "exam_soft/extractor/metadata_detector" -require_relative "exam_soft/extractor/feedback_detector" -require_relative "exam_soft/extractor/question_type_detector" +require_relative "exam_soft/chunker" require_relative "exam_soft/extractor" +require_relative "exam_soft/converter" module AtomicAssessmentsImport module ExamSoft diff --git a/lib/atomic_assessments_import/exam_soft/chunker/horizontal_rule_split_strategy.rb b/lib/atomic_assessments_import/exam_soft/chunker/horizontal_rule_split_strategy.rb index e3e46c2..a028a20 100644 --- a/lib/atomic_assessments_import/exam_soft/chunker/horizontal_rule_split_strategy.rb +++ b/lib/atomic_assessments_import/exam_soft/chunker/horizontal_rule_split_strategy.rb @@ -27,9 +27,7 @@ def split(doc) segments << current_segment unless current_segment.empty? - if segments.length >= 3 - @header_nodes = segments.shift - end + @header_nodes = segments.shift if segments.length >= 3 segments.length >= 2 ? segments : [] end diff --git a/lib/atomic_assessments_import/exam_soft/converter.rb b/lib/atomic_assessments_import/exam_soft/converter.rb index c9191c5..e07eb23 100644 --- a/lib/atomic_assessments_import/exam_soft/converter.rb +++ b/lib/atomic_assessments_import/exam_soft/converter.rb @@ -106,9 +106,7 @@ def categories_to_tags(categories) def convert_row(row, status = "published") source = "<p>ExamSoft Import on #{Time.now.strftime('%Y-%m-%d')}</p>\n" - if row["question id"].present? - source += "<p>External id: #{row['question id']}</p>\n" - end + source += "<p>External id: #{row['question id']}</p>\n" if row["question id"].present? question = Questions::Question.load(row) item = { diff --git a/lib/atomic_assessments_import/exam_soft/extractor/feedback_detector.rb b/lib/atomic_assessments_import/exam_soft/extractor/feedback_detector.rb index eba3b2b..283f390 100644 --- a/lib/atomic_assessments_import/exam_soft/extractor/feedback_detector.rb +++ b/lib/atomic_assessments_import/exam_soft/extractor/feedback_detector.rb @@ -24,9 +24,7 @@ def detect @nodes.each do |node| text = node.text.strip match = text.match(LABEL_PATTERN) - if match - return match[1].gsub(/\s+/, " ").strip - end + return match[1].gsub(/\s+/, " ").strip if match end nil diff --git a/lib/atomic_assessments_import/exam_soft/extractor/question_stem_detector.rb b/lib/atomic_assessments_import/exam_soft/extractor/question_stem_detector.rb index 0efb725..a5d1529 100644 --- a/lib/atomic_assessments_import/exam_soft/extractor/question_stem_detector.rb +++ b/lib/atomic_assessments_import/exam_soft/extractor/question_stem_detector.rb @@ -25,12 +25,12 @@ def detect # Strip metadata prefixes and numbered prefix together # e.g. "Folder: Geo Title: Q1 Category: Test 1) What is the capital?" - if text.match?(/\d+[.)]/m) - text = text.sub(/\A.*?\d+[.)]\s*/m, "") - else - # Strip standalone metadata labels if present (Folder:, Title:, Category:, Type:) - text = text.sub(/\A\s*(?:(?:Folder|Title|Category|Type):\s*\S+\s*)*/, "") - end + text = if text.match?(/\d+[.)]/m) + text.sub(/\A.*?\d+[.)]\s*/m, "") + else + # Strip standalone metadata labels if present (Folder:, Title:, Category:, Type:) + text.sub(/\A\s*(?:(?:Folder|Title|Category|Type):\s*\S+\s*)*/, "") + end # Split on tilde and take the first part (remove explanation) text = text.split("~").first diff --git a/lib/atomic_assessments_import/exam_soft/extractor/question_type_detector.rb b/lib/atomic_assessments_import/exam_soft/extractor/question_type_detector.rb index 348bb5a..32dcbfc 100644 --- a/lib/atomic_assessments_import/exam_soft/extractor/question_type_detector.rb +++ b/lib/atomic_assessments_import/exam_soft/extractor/question_type_detector.rb @@ -11,8 +11,8 @@ class QuestionTypeDetector /\Amultiple\s*choice\z/i => "mcq", /\Ama\z/i => "ma", /\Amultiple\s*(?:select|answer|response)\z/i => "ma", - /\Atrue[\s\/]*false\z/i => "true_false", - /\At\s*\/?\s*f\z/i => "true_false", + %r{\Atrue[\s/]*false\z}i => "true_false", + %r{\At\s*/?\s*f\z}i => "true_false", /\Aessay\z/i => "essay", /\Along\s*answer\z/i => "essay", /\Ashort\s*answer\z/i => "short_answer", diff --git a/lib/atomic_assessments_import/questions/essay.rb b/lib/atomic_assessments_import/questions/essay.rb index 076a6cc..1c68a00 100644 --- a/lib/atomic_assessments_import/questions/essay.rb +++ b/lib/atomic_assessments_import/questions/essay.rb @@ -12,7 +12,7 @@ def question_type def question_data data = super word_limit = @row["word_limit"]&.to_i - data[:max_length] = word_limit if word_limit && word_limit > 0 + data[:max_length] = word_limit if word_limit&.positive? data end end diff --git a/lib/atomic_assessments_import/questions/multiple_choice.rb b/lib/atomic_assessments_import/questions/multiple_choice.rb index 5204c14..b011cff 100644 --- a/lib/atomic_assessments_import/questions/multiple_choice.rb +++ b/lib/atomic_assessments_import/questions/multiple_choice.rb @@ -94,7 +94,7 @@ def ui_style when nil, "", "multiple choice", "standard" { type: "horizontal" } else - raise "Unknown template: #{@row["template"]}" + raise "Unknown template: #{@row['template']}" end end end diff --git a/lib/atomic_assessments_import/questions/ordering.rb b/lib/atomic_assessments_import/questions/ordering.rb index 160100d..07affd7 100644 --- a/lib/atomic_assessments_import/questions/ordering.rb +++ b/lib/atomic_assessments_import/questions/ordering.rb @@ -16,6 +16,7 @@ def question_data INDEXES.each do |letter| option = @row["option #{letter}"] break unless option + items << option end diff --git a/lib/atomic_assessments_import/questions/question.rb b/lib/atomic_assessments_import/questions/question.rb index a71eb43..05e84a8 100644 --- a/lib/atomic_assessments_import/questions/question.rb +++ b/lib/atomic_assessments_import/questions/question.rb @@ -5,7 +5,6 @@ module Questions class Question def initialize(row) @row = row - # @question_reference = Digest::UUID.uuid_v5(Digest::UUID::URL_NAMESPACE, "#{@item_url}/question") @reference = SecureRandom.uuid end @@ -13,7 +12,7 @@ def self.load(row) case row["question type"] when nil, "", /multiple choice/i, /mcq/i, /^ma$/i MultipleChoice.new(row) - when /true_false/i, /true\/false/i + when /true_false/i, %r{true/false}i MultipleChoice.new(row) when /essay/i, /longanswer/i Essay.new(row) From ea874af6731fa25a7b718327b0890516ec055fb5 Mon Sep 17 00:00:00 2001 From: Jacob Schwartz <jaschwartz27@gmail.com> Date: Wed, 18 Feb 2026 17:47:12 -0700 Subject: [PATCH 22/30] =?UTF-8?q?=E2=80=94=20Added=20normalize=5Fhtml=5Fst?= =?UTF-8?q?ructure(doc)=20that=20splits=20<p>=20tags=20containing=20<br>?= =?UTF-8?q?=20children=20into=20separate=20<p>=20elements;=20added=20singl?= =?UTF-8?q?e-chunk=20warning=20=E2=80=94=20Fixed=20to=20collect=20multi-li?= =?UTF-8?q?ne=20feedback=20after=20tilde,=20stopping=20at=20option=20lines?= =?UTF-8?q?=20=E2=80=94=20Added=20F,=20FIB,=20E,=20and=20SA=20type=20codes?= =?UTF-8?q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?UTF-8?q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?UTF-8?q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?UTF-8?q?=20=20=20=20=20=E2=80=94=20Added=20FITB=20correct=20answer=20de?= =?UTF-8?q?tection=20from=20option=20texts?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...6-02-11-flexible-examsoft-importer-plan.md | 2635 +++++++++++++++++ lib/atomic_assessments_import.rb | 1 + .../chunker/numbered_question_strategy.rb | 6 +- .../exam_soft/converter.rb | 60 +- .../exam_soft/extractor.rb | 5 + .../exam_soft/extractor/feedback_detector.rb | 19 +- .../extractor/question_type_detector.rb | 4 + .../examsoft/docx_converter_spec.rb | 79 +- .../extractor/feedback_detector_spec.rb | 21 + .../extractor/question_type_detector_spec.rb | 24 + .../examsoft/extractor_spec.rb | 12 + .../examsoft/html_converter_spec.rb | 79 +- .../examsoft/integration_spec.rb | 40 +- .../examsoft/rtf_converter_spec.rb | 79 +- spec/fixtures/single_paragraph_rtf.html | 19 + 15 files changed, 2841 insertions(+), 242 deletions(-) create mode 100644 docs/plans/2026-02-11-flexible-examsoft-importer-plan.md create mode 100644 spec/fixtures/single_paragraph_rtf.html diff --git a/docs/plans/2026-02-11-flexible-examsoft-importer-plan.md b/docs/plans/2026-02-11-flexible-examsoft-importer-plan.md new file mode 100644 index 0000000..d7a1fec --- /dev/null +++ b/docs/plans/2026-02-11-flexible-examsoft-importer-plan.md @@ -0,0 +1,2635 @@ +# Flexible ExamSoft Importer Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** Refactor the ExamSoft converter from rigid regex parsing into a flexible chunker + field detector pipeline that handles unknown format variations with best-effort extraction. + +**Architecture:** Pandoc normalizes input to HTML, Nokogiri parses to DOM, a strategy-based chunker splits into per-question chunks, independent field detectors extract data from each chunk, and the existing Question pipeline produces Learnosity output. Warnings accumulate rather than halting. + +**Tech Stack:** Ruby, RSpec, Nokogiri (already in bundle), PandocRuby (already in bundle), Learnosity format output + +--- + +### Task 1: Chunking Strategy Base Class + MetadataMarkerStrategy + +This is the foundation. The MetadataMarkerStrategy replicates the current chunking behavior (split on `Folder:` / `Type:` markers) so we can verify backward compatibility. + +**Files:** +- Create: `lib/atomic_assessments_import/exam_soft/chunker/strategy.rb` +- Create: `lib/atomic_assessments_import/exam_soft/chunker/metadata_marker_strategy.rb` +- Test: `spec/atomic_assessments_import/examsoft/chunker/metadata_marker_strategy_spec.rb` + +**Step 1: Write the failing test** + +Create `spec/atomic_assessments_import/examsoft/chunker/metadata_marker_strategy_spec.rb`: + +```ruby +# frozen_string_literal: true + +require "atomic_assessments_import" +require "nokogiri" + +RSpec.describe AtomicAssessmentsImport::ExamSoft::Chunker::MetadataMarkerStrategy do + describe "#split" do + it "splits HTML on Folder: markers" do + html = <<~HTML + <p>Folder: Geography Title: Q1 Category: Test 1) What is the capital? ~ Explanation</p> + <p>*a) Paris</p> + <p>b) London</p> + <p>Folder: Science Title: Q2 Category: Test 2) What is H2O? ~ Water</p> + <p>*a) Water</p> + <p>b) Fire</p> + HTML + doc = Nokogiri::HTML.fragment(html) + strategy = described_class.new + chunks = strategy.split(doc) + + expect(chunks.length).to eq(2) + end + + it "splits HTML on Type: markers" do + html = <<~HTML + <p>Type: MA Folder: Geography Title: Q1 Category: Test 1) Question? ~ Expl</p> + <p>*a) Answer</p> + <p>Type: MCQ Folder: Science Title: Q2 Category: Test 2) Question2? ~ Expl</p> + <p>*a) Answer2</p> + HTML + doc = Nokogiri::HTML.fragment(html) + strategy = described_class.new + chunks = strategy.split(doc) + + expect(chunks.length).to eq(2) + end + + it "returns empty array when no markers found" do + html = "<p>Just some text with no markers</p>" + doc = Nokogiri::HTML.fragment(html) + strategy = described_class.new + chunks = strategy.split(doc) + + expect(chunks).to eq([]) + end + + it "separates exam header from questions" do + html = <<~HTML + <p>Exam: Midterm 2024</p> + <p>Total Questions: 30</p> + <p>Folder: Geography Title: Q1 Category: Test 1) Question? ~ Expl</p> + <p>*a) Answer</p> + HTML + doc = Nokogiri::HTML.fragment(html) + strategy = described_class.new + chunks = strategy.split(doc) + + expect(chunks.length).to eq(1) + expect(strategy.header_nodes).not_to be_empty + end + + it "returns chunks as arrays of Nokogiri nodes" do + html = <<~HTML + <p>Folder: Geo Title: Q1 Category: Test 1) Question? ~ Expl</p> + <p>*a) Answer</p> + <p>b) Wrong</p> + HTML + doc = Nokogiri::HTML.fragment(html) + strategy = described_class.new + chunks = strategy.split(doc) + + expect(chunks.length).to eq(1) + expect(chunks[0]).to all(be_a(Nokogiri::XML::Node)) + end + end +end +``` + +**Step 2: Run test to verify it fails** + +Run: `bundle exec rspec spec/atomic_assessments_import/examsoft/chunker/metadata_marker_strategy_spec.rb -v` +Expected: FAIL — uninitialized constant + +**Step 3: Write the base Strategy class** + +Create `lib/atomic_assessments_import/exam_soft/chunker/strategy.rb`: + +```ruby +# frozen_string_literal: true + +module AtomicAssessmentsImport + module ExamSoft + module Chunker + class Strategy + attr_reader :header_nodes + + def initialize + @header_nodes = [] + end + + # Subclasses implement this. Returns an array of chunks, + # where each chunk is an array of Nokogiri nodes belonging to one question. + # Returns empty array if this strategy doesn't apply to the document. + def split(doc) + raise NotImplementedError + end + end + end + end +end +``` + +**Step 4: Write MetadataMarkerStrategy** + +Create `lib/atomic_assessments_import/exam_soft/chunker/metadata_marker_strategy.rb`: + +```ruby +# frozen_string_literal: true + +require_relative "strategy" + +module AtomicAssessmentsImport + module ExamSoft + module Chunker + class MetadataMarkerStrategy < Strategy + MARKER_PATTERN = /\A\s*(?:Type:\s*.+?\s+)?Folder:\s*/i + + def split(doc) + @header_nodes = [] + chunks = [] + current_chunk = [] + found_first = false + + doc.children.each do |node| + text = node.text.strip + next if text.empty? && !node.name.match?(/^(img|table|hr)$/i) + + if text.match?(MARKER_PATTERN) + found_first = true + chunks << current_chunk unless current_chunk.empty? + current_chunk = [node] + elsif found_first + current_chunk << node + else + @header_nodes << node + end + end + + chunks << current_chunk unless current_chunk.empty? + chunks + end + end + end + end +end +``` + +**Step 5: Run test to verify it passes** + +Run: `bundle exec rspec spec/atomic_assessments_import/examsoft/chunker/metadata_marker_strategy_spec.rb -v` +Expected: PASS + +**Step 6: Commit** + +```bash +git add lib/atomic_assessments_import/exam_soft/chunker/ spec/atomic_assessments_import/examsoft/chunker/ +git commit -m "feat: add chunker base class and MetadataMarkerStrategy" +``` + +--- + +### Task 2: NumberedQuestionStrategy + +**Files:** +- Create: `lib/atomic_assessments_import/exam_soft/chunker/numbered_question_strategy.rb` +- Test: `spec/atomic_assessments_import/examsoft/chunker/numbered_question_strategy_spec.rb` + +**Step 1: Write the failing test** + +Create `spec/atomic_assessments_import/examsoft/chunker/numbered_question_strategy_spec.rb`: + +```ruby +# frozen_string_literal: true + +require "atomic_assessments_import" +require "nokogiri" + +RSpec.describe AtomicAssessmentsImport::ExamSoft::Chunker::NumberedQuestionStrategy do + describe "#split" do + it "splits on paragraphs starting with number-paren pattern" do + html = <<~HTML + <p>1) What is the capital of France?</p> + <p>a) Paris</p> + <p>b) London</p> + <p>2) What is H2O?</p> + <p>a) Water</p> + <p>b) Fire</p> + HTML + doc = Nokogiri::HTML.fragment(html) + chunks = described_class.new.split(doc) + + expect(chunks.length).to eq(2) + end + + it "splits on paragraphs starting with number-dot pattern" do + html = <<~HTML + <p>1. What is the capital of France?</p> + <p>a) Paris</p> + <p>2. What is H2O?</p> + <p>a) Water</p> + HTML + doc = Nokogiri::HTML.fragment(html) + chunks = described_class.new.split(doc) + + expect(chunks.length).to eq(2) + end + + it "returns empty array when no numbered questions found" do + html = "<p>Just some regular text</p><p>More text</p>" + doc = Nokogiri::HTML.fragment(html) + chunks = described_class.new.split(doc) + + expect(chunks).to eq([]) + end + + it "separates header content before first question" do + html = <<~HTML + <p>Exam: Midterm</p> + <p>Total: 30 questions</p> + <p>1) First question?</p> + <p>a) Answer</p> + HTML + doc = Nokogiri::HTML.fragment(html) + strategy = described_class.new + chunks = strategy.split(doc) + + expect(chunks.length).to eq(1) + expect(strategy.header_nodes.length).to eq(2) + end + + it "does not split on lettered options like a) b) c)" do + html = <<~HTML + <p>1) What is the capital of France?</p> + <p>a) Paris</p> + <p>b) London</p> + <p>c) Berlin</p> + HTML + doc = Nokogiri::HTML.fragment(html) + chunks = described_class.new.split(doc) + + expect(chunks.length).to eq(1) + end + end +end +``` + +**Step 2: Run test to verify it fails** + +Run: `bundle exec rspec spec/atomic_assessments_import/examsoft/chunker/numbered_question_strategy_spec.rb -v` +Expected: FAIL — uninitialized constant + +**Step 3: Write implementation** + +Create `lib/atomic_assessments_import/exam_soft/chunker/numbered_question_strategy.rb`: + +```ruby +# frozen_string_literal: true + +require_relative "strategy" + +module AtomicAssessmentsImport + module ExamSoft + module Chunker + class NumberedQuestionStrategy < Strategy + # Matches "1)" or "1." or "12)" etc. at start of text, but NOT single letters like "a)" + NUMBERED_PATTERN = /\A\s*(\d+)\s*[.)]/ + + def split(doc) + @header_nodes = [] + chunks = [] + current_chunk = [] + found_first = false + + doc.children.each do |node| + text = node.text.strip + next if text.empty? && !node.name.match?(/^(img|table|hr)$/i) + + if text.match?(NUMBERED_PATTERN) + found_first = true + chunks << current_chunk unless current_chunk.empty? + current_chunk = [node] + elsif found_first + current_chunk << node + else + @header_nodes << node + end + end + + chunks << current_chunk unless current_chunk.empty? + # Only valid if we found more than one chunk (single could be a false positive) + chunks.length > 1 ? chunks : [] + end + end + end + end +end +``` + +**Step 4: Run test to verify it passes** + +Run: `bundle exec rspec spec/atomic_assessments_import/examsoft/chunker/numbered_question_strategy_spec.rb -v` +Expected: PASS + +**Step 5: Commit** + +```bash +git add lib/atomic_assessments_import/exam_soft/chunker/numbered_question_strategy.rb spec/atomic_assessments_import/examsoft/chunker/numbered_question_strategy_spec.rb +git commit -m "feat: add NumberedQuestionStrategy for chunking" +``` + +--- + +### Task 3: HeadingSplitStrategy + HorizontalRuleSplitStrategy + +These two are simple and follow the same pattern, so they're combined. + +**Files:** +- Create: `lib/atomic_assessments_import/exam_soft/chunker/heading_split_strategy.rb` +- Create: `lib/atomic_assessments_import/exam_soft/chunker/horizontal_rule_split_strategy.rb` +- Test: `spec/atomic_assessments_import/examsoft/chunker/heading_split_strategy_spec.rb` +- Test: `spec/atomic_assessments_import/examsoft/chunker/horizontal_rule_split_strategy_spec.rb` + +**Step 1: Write failing tests** + +Create `spec/atomic_assessments_import/examsoft/chunker/heading_split_strategy_spec.rb`: + +```ruby +# frozen_string_literal: true + +require "atomic_assessments_import" +require "nokogiri" + +RSpec.describe AtomicAssessmentsImport::ExamSoft::Chunker::HeadingSplitStrategy do + describe "#split" do + it "splits on heading tags" do + html = <<~HTML + <h2>Question 1</h2> + <p>What is the capital of France?</p> + <p>a) Paris</p> + <h2>Question 2</h2> + <p>What is H2O?</p> + <p>a) Water</p> + HTML + doc = Nokogiri::HTML.fragment(html) + chunks = described_class.new.split(doc) + + expect(chunks.length).to eq(2) + end + + it "returns empty array when no headings found" do + html = "<p>No headings here</p>" + doc = Nokogiri::HTML.fragment(html) + chunks = described_class.new.split(doc) + + expect(chunks).to eq([]) + end + + it "separates header content before first heading" do + html = <<~HTML + <p>Exam header info</p> + <h2>Question 1</h2> + <p>What is the capital?</p> + HTML + doc = Nokogiri::HTML.fragment(html) + strategy = described_class.new + chunks = strategy.split(doc) + + expect(chunks.length).to eq(1) + expect(strategy.header_nodes).not_to be_empty + end + end +end +``` + +Create `spec/atomic_assessments_import/examsoft/chunker/horizontal_rule_split_strategy_spec.rb`: + +```ruby +# frozen_string_literal: true + +require "atomic_assessments_import" +require "nokogiri" + +RSpec.describe AtomicAssessmentsImport::ExamSoft::Chunker::HorizontalRuleSplitStrategy do + describe "#split" do + it "splits on hr tags" do + html = <<~HTML + <p>Question 1: What is the capital of France?</p> + <p>a) Paris</p> + <hr/> + <p>Question 2: What is H2O?</p> + <p>a) Water</p> + HTML + doc = Nokogiri::HTML.fragment(html) + chunks = described_class.new.split(doc) + + expect(chunks.length).to eq(2) + end + + it "returns empty array when no hr tags found" do + html = "<p>No rules here</p>" + doc = Nokogiri::HTML.fragment(html) + chunks = described_class.new.split(doc) + + expect(chunks).to eq([]) + end + + it "separates header content before first hr" do + html = <<~HTML + <p>Exam header info</p> + <hr/> + <p>Question 1</p> + HTML + doc = Nokogiri::HTML.fragment(html) + strategy = described_class.new + chunks = strategy.split(doc) + + expect(chunks.length).to eq(1) + expect(strategy.header_nodes).not_to be_empty + end + end +end +``` + +**Step 2: Run tests to verify they fail** + +Run: `bundle exec rspec spec/atomic_assessments_import/examsoft/chunker/heading_split_strategy_spec.rb spec/atomic_assessments_import/examsoft/chunker/horizontal_rule_split_strategy_spec.rb -v` +Expected: FAIL — uninitialized constants + +**Step 3: Write implementations** + +Create `lib/atomic_assessments_import/exam_soft/chunker/heading_split_strategy.rb`: + +```ruby +# frozen_string_literal: true + +require_relative "strategy" + +module AtomicAssessmentsImport + module ExamSoft + module Chunker + class HeadingSplitStrategy < Strategy + HEADING_PATTERN = /^h[1-6]$/i + + def split(doc) + @header_nodes = [] + chunks = [] + current_chunk = [] + found_first = false + + doc.children.each do |node| + if node.name.match?(HEADING_PATTERN) + found_first = true + chunks << current_chunk unless current_chunk.empty? + current_chunk = [node] + elsif found_first + text = node.text.strip + next if text.empty? && !node.name.match?(/^(img|table|hr)$/i) + + current_chunk << node + else + @header_nodes << node unless node.text.strip.empty? + end + end + + chunks << current_chunk unless current_chunk.empty? + chunks.length > 1 ? chunks : [] + end + end + end + end +end +``` + +Create `lib/atomic_assessments_import/exam_soft/chunker/horizontal_rule_split_strategy.rb`: + +```ruby +# frozen_string_literal: true + +require_relative "strategy" + +module AtomicAssessmentsImport + module ExamSoft + module Chunker + class HorizontalRuleSplitStrategy < Strategy + def split(doc) + @header_nodes = [] + chunks = [] + current_chunk = [] + found_first = false + + doc.children.each do |node| + if node.name == "hr" + if current_chunk.empty? && !found_first + # Content before first hr with no question content is header + next + end + found_first = true + chunks << current_chunk unless current_chunk.empty? + current_chunk = [] + elsif found_first || !chunks.empty? + text = node.text.strip + next if text.empty? && !node.name.match?(/^(img|table)$/i) + + current_chunk << node + else + text = node.text.strip + if text.empty? + next + else + # Before any hr — could be header or first question + current_chunk << node + end + end + end + + chunks << current_chunk unless current_chunk.empty? + + if chunks.length > 1 + chunks + else + @header_nodes = [] + [] + end + end + end + end + end +end +``` + +Note: The HorizontalRuleSplitStrategy is a bit different — the `<hr>` is a separator *between* chunks, not part of a chunk. Content before the first `<hr>` is the first chunk (or header if there's no question content before it). + +**Step 4: Run tests to verify they pass** + +Run: `bundle exec rspec spec/atomic_assessments_import/examsoft/chunker/heading_split_strategy_spec.rb spec/atomic_assessments_import/examsoft/chunker/horizontal_rule_split_strategy_spec.rb -v` +Expected: PASS + +**Step 5: Commit** + +```bash +git add lib/atomic_assessments_import/exam_soft/chunker/heading_split_strategy.rb lib/atomic_assessments_import/exam_soft/chunker/horizontal_rule_split_strategy.rb spec/atomic_assessments_import/examsoft/chunker/heading_split_strategy_spec.rb spec/atomic_assessments_import/examsoft/chunker/horizontal_rule_split_strategy_spec.rb +git commit -m "feat: add HeadingSplitStrategy and HorizontalRuleSplitStrategy" +``` + +--- + +### Task 4: Chunker Orchestrator + +The orchestrator tries each strategy and picks the best one. + +**Files:** +- Create: `lib/atomic_assessments_import/exam_soft/chunker.rb` +- Test: `spec/atomic_assessments_import/examsoft/chunker_spec.rb` + +**Step 1: Write the failing test** + +Create `spec/atomic_assessments_import/examsoft/chunker_spec.rb`: + +```ruby +# frozen_string_literal: true + +require "atomic_assessments_import" +require "nokogiri" + +RSpec.describe AtomicAssessmentsImport::ExamSoft::Chunker do + describe "#chunk" do + it "uses MetadataMarkerStrategy when Folder: markers are present" do + html = <<~HTML + <p>Folder: Geo Title: Q1 Category: Test 1) Question? ~ Expl</p> + <p>*a) Answer</p> + <p>Folder: Sci Title: Q2 Category: Test 2) Question2? ~ Expl</p> + <p>*a) Answer2</p> + HTML + doc = Nokogiri::HTML.fragment(html) + chunker = described_class.new(doc) + result = chunker.chunk + + expect(result[:chunks].length).to eq(2) + end + + it "falls back to NumberedQuestionStrategy when no metadata markers" do + html = <<~HTML + <p>1) What is the capital of France?</p> + <p>a) Paris</p> + <p>b) London</p> + <p>2) What is H2O?</p> + <p>a) Water</p> + <p>b) Fire</p> + HTML + doc = Nokogiri::HTML.fragment(html) + chunker = described_class.new(doc) + result = chunker.chunk + + expect(result[:chunks].length).to eq(2) + end + + it "falls back to HeadingSplitStrategy when no numbers" do + html = <<~HTML + <h2>Question 1</h2> + <p>What is the capital?</p> + <p>a) Paris</p> + <h2>Question 2</h2> + <p>What is H2O?</p> + <p>a) Water</p> + HTML + doc = Nokogiri::HTML.fragment(html) + chunker = described_class.new(doc) + result = chunker.chunk + + expect(result[:chunks].length).to eq(2) + end + + it "returns whole document as single chunk when no strategy matches" do + html = <<~HTML + <p>Some question text here</p> + <p>a) An option</p> + HTML + doc = Nokogiri::HTML.fragment(html) + chunker = described_class.new(doc) + result = chunker.chunk + + expect(result[:chunks].length).to eq(1) + expect(result[:warnings]).to include(/No chunking strategy/i) + end + + it "extracts header nodes" do + html = <<~HTML + <p>Exam: Midterm 2024</p> + <p>Total Questions: 30</p> + <p>Folder: Geo Title: Q1 Category: Test 1) Question? ~ Expl</p> + <p>*a) Answer</p> + HTML + doc = Nokogiri::HTML.fragment(html) + chunker = described_class.new(doc) + result = chunker.chunk + + expect(result[:header_nodes]).not_to be_empty + end + end +end +``` + +**Step 2: Run test to verify it fails** + +Run: `bundle exec rspec spec/atomic_assessments_import/examsoft/chunker_spec.rb -v` +Expected: FAIL + +**Step 3: Write implementation** + +Create `lib/atomic_assessments_import/exam_soft/chunker.rb`: + +```ruby +# frozen_string_literal: true + +require_relative "chunker/strategy" +require_relative "chunker/metadata_marker_strategy" +require_relative "chunker/numbered_question_strategy" +require_relative "chunker/heading_split_strategy" +require_relative "chunker/horizontal_rule_split_strategy" + +module AtomicAssessmentsImport + module ExamSoft + class Chunker + STRATEGIES = [ + Chunker::MetadataMarkerStrategy, + Chunker::NumberedQuestionStrategy, + Chunker::HeadingSplitStrategy, + Chunker::HorizontalRuleSplitStrategy, + ].freeze + + def initialize(doc) + @doc = doc + end + + def chunk + warnings = [] + + STRATEGIES.each do |strategy_class| + strategy = strategy_class.new + chunks = strategy.split(@doc) + next if chunks.empty? + + return { + chunks: chunks, + header_nodes: strategy.header_nodes, + warnings: warnings, + } + end + + # No strategy matched — return entire document as one chunk + all_nodes = @doc.children.reject { |n| n.text.strip.empty? && !n.name.match?(/^(img|table|hr)$/i) } + warnings << "No chunking strategy matched. Treating entire document as a single question." + + { + chunks: [all_nodes], + header_nodes: [], + warnings: warnings, + } + end + end + end +end +``` + +**Step 4: Run test to verify it passes** + +Run: `bundle exec rspec spec/atomic_assessments_import/examsoft/chunker_spec.rb -v` +Expected: PASS + +**Step 5: Commit** + +```bash +git add lib/atomic_assessments_import/exam_soft/chunker.rb spec/atomic_assessments_import/examsoft/chunker_spec.rb +git commit -m "feat: add Chunker orchestrator with strategy cascade" +``` + +--- + +### Task 5: Field Detectors — QuestionStem, Options, CorrectAnswer + +These three are the core detectors needed for MCQ questions. + +**Files:** +- Create: `lib/atomic_assessments_import/exam_soft/extractor/question_stem_detector.rb` +- Create: `lib/atomic_assessments_import/exam_soft/extractor/options_detector.rb` +- Create: `lib/atomic_assessments_import/exam_soft/extractor/correct_answer_detector.rb` +- Test: `spec/atomic_assessments_import/examsoft/extractor/question_stem_detector_spec.rb` +- Test: `spec/atomic_assessments_import/examsoft/extractor/options_detector_spec.rb` +- Test: `spec/atomic_assessments_import/examsoft/extractor/correct_answer_detector_spec.rb` + +**Step 1: Write failing tests** + +Create `spec/atomic_assessments_import/examsoft/extractor/question_stem_detector_spec.rb`: + +```ruby +# frozen_string_literal: true + +require "atomic_assessments_import" +require "nokogiri" + +RSpec.describe AtomicAssessmentsImport::ExamSoft::Extractor::QuestionStemDetector do + def nodes_from(html) + Nokogiri::HTML.fragment(html).children.to_a + end + + describe "#detect" do + it "extracts question text before options" do + nodes = nodes_from(<<~HTML) + <p>1) What is the capital of France?</p> + <p>a) Paris</p> + <p>b) London</p> + HTML + result = described_class.new(nodes).detect + + expect(result).to eq("What is the capital of France?") + end + + it "extracts question text with tilde-separated explanation removed" do + nodes = nodes_from(<<~HTML) + <p>Folder: Geo Title: Q1 Category: Test 1) What is the capital? ~ Paris is the capital.</p> + <p>*a) Paris</p> + HTML + result = described_class.new(nodes).detect + + expect(result).to eq("What is the capital?") + end + + it "extracts question text without numbered prefix" do + nodes = nodes_from(<<~HTML) + <p>What is the capital of France?</p> + <p>a) Paris</p> + HTML + result = described_class.new(nodes).detect + + expect(result).to eq("What is the capital of France?") + end + + it "returns nil when no question text found" do + nodes = nodes_from("<p>a) Paris</p><p>b) London</p>") + result = described_class.new(nodes).detect + + expect(result).to be_nil + end + end +end +``` + +Create `spec/atomic_assessments_import/examsoft/extractor/options_detector_spec.rb`: + +```ruby +# frozen_string_literal: true + +require "atomic_assessments_import" +require "nokogiri" + +RSpec.describe AtomicAssessmentsImport::ExamSoft::Extractor::OptionsDetector do + def nodes_from(html) + Nokogiri::HTML.fragment(html).children.to_a + end + + describe "#detect" do + it "extracts lettered options with paren format" do + nodes = nodes_from(<<~HTML) + <p>Question text</p> + <p>a) Paris</p> + <p>b) London</p> + <p>c) Berlin</p> + HTML + result = described_class.new(nodes).detect + + expect(result.length).to eq(3) + expect(result[0][:text]).to eq("Paris") + expect(result[1][:text]).to eq("London") + expect(result[2][:text]).to eq("Berlin") + end + + it "detects correct answer markers with asterisk" do + nodes = nodes_from(<<~HTML) + <p>*a) Paris</p> + <p>b) London</p> + HTML + result = described_class.new(nodes).detect + + expect(result[0][:correct]).to be true + expect(result[1][:correct]).to be false + end + + it "detects correct answer markers with bold" do + nodes = nodes_from(<<~HTML) + <p><strong>a) Paris</strong></p> + <p>b) London</p> + HTML + result = described_class.new(nodes).detect + + expect(result[0][:correct]).to be true + expect(result[1][:correct]).to be false + end + + it "returns empty array when no options found" do + nodes = nodes_from("<p>Just a paragraph</p>") + result = described_class.new(nodes).detect + + expect(result).to eq([]) + end + + it "handles uppercase letter options" do + nodes = nodes_from(<<~HTML) + <p>A) Paris</p> + <p>B) London</p> + HTML + result = described_class.new(nodes).detect + + expect(result.length).to eq(2) + expect(result[0][:text]).to eq("Paris") + end + end +end +``` + +Create `spec/atomic_assessments_import/examsoft/extractor/correct_answer_detector_spec.rb`: + +```ruby +# frozen_string_literal: true + +require "atomic_assessments_import" +require "nokogiri" + +RSpec.describe AtomicAssessmentsImport::ExamSoft::Extractor::CorrectAnswerDetector do + def nodes_from(html) + Nokogiri::HTML.fragment(html).children.to_a + end + + describe "#detect" do + it "detects correct answers from asterisk-marked options" do + options = [ + { text: "Paris", letter: "a", correct: true }, + { text: "London", letter: "b", correct: false }, + ] + result = described_class.new(nodes_from(""), options).detect + + expect(result).to eq(["a"]) + end + + it "detects multiple correct answers" do + options = [ + { text: "Little Rock", letter: "a", correct: true }, + { text: "Denver", letter: "b", correct: true }, + { text: "Detroit", letter: "c", correct: false }, + ] + result = described_class.new(nodes_from(""), options).detect + + expect(result).to eq(["a", "b"]) + end + + it "detects correct answer from Answer: label in chunk" do + nodes = nodes_from("<p>Answer: A</p>") + options = [ + { text: "Paris", letter: "a", correct: false }, + { text: "London", letter: "b", correct: false }, + ] + result = described_class.new(nodes, options).detect + + expect(result).to eq(["a"]) + end + + it "returns empty array when no correct answer found" do + options = [ + { text: "Paris", letter: "a", correct: false }, + { text: "London", letter: "b", correct: false }, + ] + result = described_class.new(nodes_from(""), options).detect + + expect(result).to eq([]) + end + end +end +``` + +**Step 2: Run tests to verify they fail** + +Run: `bundle exec rspec spec/atomic_assessments_import/examsoft/extractor/ -v` +Expected: FAIL — uninitialized constants + +**Step 3: Write implementations** + +Create `lib/atomic_assessments_import/exam_soft/extractor/question_stem_detector.rb`: + +```ruby +# frozen_string_literal: true + +module AtomicAssessmentsImport + module ExamSoft + module Extractor + class QuestionStemDetector + OPTION_PATTERN = /\A\s*\*?[a-oA-O][.)]/ + NUMBERED_PREFIX = /\A\s*\d+\s*[.)]\s*/ + METADATA_PREFIX = /\A\s*(?:(?:Type:\s*.+?\s+)?Folder:.+?(?:Title:.+?)?(?:Category:.+?)?)?\s*\d*\s*[.)]?\s*/m + TILDE_SPLIT = /\s*~\s*/ + + def initialize(nodes) + @nodes = nodes + end + + def detect + @nodes.each do |node| + text = node.text.strip + next if text.empty? + next if text.match?(OPTION_PATTERN) + + # This node contains the question stem (possibly with metadata prefix) + # Try to extract just the question part + stem = extract_stem(text) + return stem unless stem.nil? || stem.empty? + end + + nil + end + + private + + def extract_stem(text) + # Remove metadata prefix if present (Folder:, Title:, Category:, etc.) + cleaned = text.sub(METADATA_PREFIX, "") + # Remove numbered prefix + cleaned = cleaned.sub(NUMBERED_PREFIX, "") + # Split on tilde (explanation separator) and take the question part + cleaned = cleaned.split(TILDE_SPLIT).first + cleaned&.strip.presence + end + end + end + end +end +``` + +Create `lib/atomic_assessments_import/exam_soft/extractor/options_detector.rb`: + +```ruby +# frozen_string_literal: true + +module AtomicAssessmentsImport + module ExamSoft + module Extractor + class OptionsDetector + OPTION_PATTERN = /\A\s*(\*?)([a-oA-O])\s*[.)]\s*(.+)/m + + def initialize(nodes) + @nodes = nodes + end + + def detect + options = [] + + @nodes.each do |node| + text = node.text.strip + match = text.match(OPTION_PATTERN) + next unless match + + marker = match[1] + letter = match[2].downcase + option_text = match[3].strip + + # Check for bold formatting as correct marker + bold = node.at_css("strong, b") + is_correct = marker == "*" || (bold && bold.text.strip == text.strip) + + options << { + text: option_text, + letter: letter, + correct: is_correct || false, + } + end + + options + end + end + end + end +end +``` + +Create `lib/atomic_assessments_import/exam_soft/extractor/correct_answer_detector.rb`: + +```ruby +# frozen_string_literal: true + +module AtomicAssessmentsImport + module ExamSoft + module Extractor + class CorrectAnswerDetector + ANSWER_LABEL_PATTERN = /\bAnswer:\s*([A-Oa-o,;\s]+)/i + + def initialize(nodes, options) + @nodes = nodes + @options = options + end + + def detect + # First: check options for correct markers (asterisk, bold) + from_markers = @options.select { |o| o[:correct] }.map { |o| o[:letter] } + return from_markers unless from_markers.empty? + + # Second: look for "Answer:" label in the chunk + @nodes.each do |node| + text = node.text.strip + match = text.match(ANSWER_LABEL_PATTERN) + next unless match + + letters = match[1].scan(/[a-oA-O]/).map(&:downcase) + return letters unless letters.empty? + end + + [] + end + end + end + end +end +``` + +**Step 4: Run tests to verify they pass** + +Run: `bundle exec rspec spec/atomic_assessments_import/examsoft/extractor/ -v` +Expected: PASS + +**Step 5: Commit** + +```bash +git add lib/atomic_assessments_import/exam_soft/extractor/ spec/atomic_assessments_import/examsoft/extractor/ +git commit -m "feat: add core field detectors (stem, options, correct answer)" +``` + +--- + +### Task 6: Field Detectors — Metadata, Feedback, QuestionType + +**Files:** +- Create: `lib/atomic_assessments_import/exam_soft/extractor/metadata_detector.rb` +- Create: `lib/atomic_assessments_import/exam_soft/extractor/feedback_detector.rb` +- Create: `lib/atomic_assessments_import/exam_soft/extractor/question_type_detector.rb` +- Test: `spec/atomic_assessments_import/examsoft/extractor/metadata_detector_spec.rb` +- Test: `spec/atomic_assessments_import/examsoft/extractor/feedback_detector_spec.rb` +- Test: `spec/atomic_assessments_import/examsoft/extractor/question_type_detector_spec.rb` + +**Step 1: Write failing tests** + +Create `spec/atomic_assessments_import/examsoft/extractor/metadata_detector_spec.rb`: + +```ruby +# frozen_string_literal: true + +require "atomic_assessments_import" +require "nokogiri" + +RSpec.describe AtomicAssessmentsImport::ExamSoft::Extractor::MetadataDetector do + def nodes_from(html) + Nokogiri::HTML.fragment(html).children.to_a + end + + describe "#detect" do + it "extracts folder, title, and category" do + nodes = nodes_from("<p>Folder: Geography Title: Question 1 Category: Subject/Capitals, Difficulty/Normal 1) Question?</p>") + result = described_class.new(nodes).detect + + expect(result[:folder]).to eq("Geography") + expect(result[:title]).to eq("Question 1") + expect(result[:categories]).to include("Subject/Capitals") + end + + it "extracts type when present" do + nodes = nodes_from("<p>Type: MA Folder: Geography Title: Q1 Category: Test 1) Question?</p>") + result = described_class.new(nodes).detect + + expect(result[:type]).to eq("ma") + end + + it "returns empty hash when no metadata found" do + nodes = nodes_from("<p>Just a question with no metadata</p>") + result = described_class.new(nodes).detect + + expect(result).to eq({}) + end + end +end +``` + +Create `spec/atomic_assessments_import/examsoft/extractor/feedback_detector_spec.rb`: + +```ruby +# frozen_string_literal: true + +require "atomic_assessments_import" +require "nokogiri" + +RSpec.describe AtomicAssessmentsImport::ExamSoft::Extractor::FeedbackDetector do + def nodes_from(html) + Nokogiri::HTML.fragment(html).children.to_a + end + + describe "#detect" do + it "extracts feedback after tilde" do + nodes = nodes_from("<p>1) What is the capital? ~ Paris is the capital of France.</p>") + result = described_class.new(nodes).detect + + expect(result).to eq("Paris is the capital of France.") + end + + it "extracts feedback from Explanation: label" do + nodes = nodes_from(<<~HTML) + <p>What is the capital?</p> + <p>Explanation: Paris is the capital of France.</p> + HTML + result = described_class.new(nodes).detect + + expect(result).to eq("Paris is the capital of France.") + end + + it "extracts feedback from Rationale: label" do + nodes = nodes_from(<<~HTML) + <p>What is the capital?</p> + <p>Rationale: Paris is the capital of France.</p> + HTML + result = described_class.new(nodes).detect + + expect(result).to eq("Paris is the capital of France.") + end + + it "returns nil when no feedback found" do + nodes = nodes_from("<p>Just a question</p>") + result = described_class.new(nodes).detect + + expect(result).to be_nil + end + end +end +``` + +Create `spec/atomic_assessments_import/examsoft/extractor/question_type_detector_spec.rb`: + +```ruby +# frozen_string_literal: true + +require "atomic_assessments_import" +require "nokogiri" + +RSpec.describe AtomicAssessmentsImport::ExamSoft::Extractor::QuestionTypeDetector do + def nodes_from(html) + Nokogiri::HTML.fragment(html).children.to_a + end + + describe "#detect" do + it "detects type from Type: label" do + nodes = nodes_from("<p>Type: MA Folder: Geo 1) Question?</p>") + result = described_class.new(nodes, has_options: true).detect + + expect(result).to eq("ma") + end + + it "detects essay from Type: label" do + nodes = nodes_from("<p>Type: Essay Folder: Geo 1) Question?</p>") + result = described_class.new(nodes, has_options: false).detect + + expect(result).to eq("essay") + end + + it "defaults to mcq when options are present" do + nodes = nodes_from("<p>A question with no type label</p>") + result = described_class.new(nodes, has_options: true).detect + + expect(result).to eq("mcq") + end + + it "defaults to short_answer when no options" do + nodes = nodes_from("<p>A question with no type label and no options</p>") + result = described_class.new(nodes, has_options: false).detect + + expect(result).to eq("short_answer") + end + + it "detects true/false from Type: label" do + nodes = nodes_from("<p>Type: True/False 1) Question?</p>") + result = described_class.new(nodes, has_options: true).detect + + expect(result).to eq("true_false") + end + + it "detects matching from Type: label" do + nodes = nodes_from("<p>Type: Matching 1) Question?</p>") + result = described_class.new(nodes, has_options: false).detect + + expect(result).to eq("matching") + end + + it "detects ordering from Type: label" do + nodes = nodes_from("<p>Type: Ordering 1) Question?</p>") + result = described_class.new(nodes, has_options: false).detect + + expect(result).to eq("ordering") + end + + it "detects fill_in_the_blank from Type: label" do + nodes = nodes_from("<p>Type: Fill in the Blank 1) Question?</p>") + result = described_class.new(nodes, has_options: false).detect + + expect(result).to eq("fill_in_the_blank") + end + end +end +``` + +**Step 2: Run tests to verify they fail** + +Run: `bundle exec rspec spec/atomic_assessments_import/examsoft/extractor/ -v` +Expected: FAIL — uninitialized constants for new detectors + +**Step 3: Write implementations** + +Create `lib/atomic_assessments_import/exam_soft/extractor/metadata_detector.rb`: + +```ruby +# frozen_string_literal: true + +module AtomicAssessmentsImport + module ExamSoft + module Extractor + class MetadataDetector + FOLDER_PATTERN = /Folder:\s*(.+?)(?=\s*(?:Title:|Category:|\d+[.)]))/ + TITLE_PATTERN = /Title:\s*(.+?)(?=\s*(?:Category:|\d+[.)]))/ + CATEGORY_PATTERN = /Category:\s*(.+?)(?=\s*\d+[.)]|\z)/ + TYPE_PATTERN = /Type:\s*(\S+)/ + + def initialize(nodes) + @nodes = nodes + end + + def detect + # Combine all text from nodes to search for metadata + full_text = @nodes.map { |n| n.text.strip }.join(" ") + result = {} + + type_match = full_text.match(TYPE_PATTERN) + result[:type] = type_match[1].strip.downcase if type_match + + folder_match = full_text.match(FOLDER_PATTERN) + result[:folder] = folder_match[1].strip if folder_match + + title_match = full_text.match(TITLE_PATTERN) + result[:title] = title_match[1].strip if title_match + + category_match = full_text.match(CATEGORY_PATTERN) + if category_match + result[:categories] = category_match[1].split(",").map(&:strip) + end + + result + end + end + end + end +end +``` + +Create `lib/atomic_assessments_import/exam_soft/extractor/feedback_detector.rb`: + +```ruby +# frozen_string_literal: true + +module AtomicAssessmentsImport + module ExamSoft + module Extractor + class FeedbackDetector + TILDE_PATTERN = /~\s*(.+)/m + LABEL_PATTERN = /\A\s*(?:Explanation|Rationale):\s*(.+)/im + + def initialize(nodes) + @nodes = nodes + end + + def detect + # First: look for tilde-separated feedback in any node + @nodes.each do |node| + text = node.text.strip + match = text.match(TILDE_PATTERN) + if match + feedback = match[1].strip + return feedback unless feedback.empty? + end + end + + # Second: look for labeled feedback (Explanation:, Rationale:) + @nodes.each do |node| + text = node.text.strip + match = text.match(LABEL_PATTERN) + return match[1].strip if match + end + + nil + end + end + end + end +end +``` + +Create `lib/atomic_assessments_import/exam_soft/extractor/question_type_detector.rb`: + +```ruby +# frozen_string_literal: true + +module AtomicAssessmentsImport + module ExamSoft + module Extractor + class QuestionTypeDetector + TYPE_LABEL_PATTERN = /Type:\s*(.+?)(?=\s*(?:Folder:|Title:|Category:|\d+[.)]|\z))/i + + TYPE_MAP = { + /\Amcq?\z/i => "mcq", + /\Amultiple\s*choice\z/i => "mcq", + /\Ama\z/i => "ma", + /\Amultiple\s*(?:select|answer|response)\z/i => "ma", + /\Atrue[\s\/]*false\z/i => "true_false", + /\At\s*\/?\s*f\z/i => "true_false", + /\Aessay\z/i => "essay", + /\Along\s*answer\z/i => "essay", + /\Ashort\s*answer\z/i => "short_answer", + /\Afill[\s_-]*in[\s_-]*(?:the[\s_-]*)?blank\z/i => "fill_in_the_blank", + /\Acloze\z/i => "fill_in_the_blank", + /\Amatching\z/i => "matching", + /\Aorder(?:ing)?\z/i => "ordering", + }.freeze + + def initialize(nodes, has_options:) + @nodes = nodes + @has_options = has_options + end + + def detect + # Try to find an explicit Type: label + full_text = @nodes.map { |n| n.text.strip }.join(" ") + match = full_text.match(TYPE_LABEL_PATTERN) + + if match + type_text = match[1].strip + TYPE_MAP.each do |pattern, type| + return type if type_text.match?(pattern) + end + # Unknown explicit type — return it lowercased as-is + return type_text.downcase.gsub(/\s+/, "_") + end + + # No explicit type — infer from structure + @has_options ? "mcq" : "short_answer" + end + end + end + end +end +``` + +**Step 4: Run tests to verify they pass** + +Run: `bundle exec rspec spec/atomic_assessments_import/examsoft/extractor/ -v` +Expected: PASS + +**Step 5: Commit** + +```bash +git add lib/atomic_assessments_import/exam_soft/extractor/ spec/atomic_assessments_import/examsoft/extractor/ +git commit -m "feat: add metadata, feedback, and question type detectors" +``` + +--- + +### Task 7: Extractor Orchestrator + +Assembles all detectors and builds the `row_mock` hash. + +**Files:** +- Create: `lib/atomic_assessments_import/exam_soft/extractor.rb` +- Test: `spec/atomic_assessments_import/examsoft/extractor_spec.rb` + +**Step 1: Write the failing test** + +Create `spec/atomic_assessments_import/examsoft/extractor_spec.rb`: + +```ruby +# frozen_string_literal: true + +require "atomic_assessments_import" +require "nokogiri" + +RSpec.describe AtomicAssessmentsImport::ExamSoft::Extractor do + def nodes_from(html) + Nokogiri::HTML.fragment(html).children.to_a + end + + describe "#extract" do + it "extracts a complete MCQ question" do + nodes = nodes_from(<<~HTML) + <p>Folder: Geography Title: Question 1 Category: Subject/Capitals 1) What is the capital of France? ~ Paris is the capital.</p> + <p>*a) Paris</p> + <p>b) London</p> + <p>c) Berlin</p> + HTML + result = described_class.new(nodes).extract + + expect(result[:row]["question text"]).to eq("What is the capital of France?") + expect(result[:row]["option a"]).to eq("Paris") + expect(result[:row]["option b"]).to eq("London") + expect(result[:row]["option c"]).to eq("Berlin") + expect(result[:row]["correct answer"]).to eq("a") + expect(result[:row]["title"]).to eq("Question 1") + expect(result[:row]["folder"]).to eq("Geography") + expect(result[:row]["general feedback"]).to eq("Paris is the capital.") + expect(result[:row]["question type"]).to eq("mcq") + expect(result[:status]).to eq("published") + expect(result[:warnings]).to be_empty + end + + it "returns draft status when no correct answer" do + nodes = nodes_from(<<~HTML) + <p>1) What is the capital of France?</p> + <p>a) Paris</p> + <p>b) London</p> + HTML + result = described_class.new(nodes).extract + + expect(result[:status]).to eq("draft") + expect(result[:warnings]).to include(/correct answer/i) + end + + it "returns draft status when no question text found" do + nodes = nodes_from(<<~HTML) + <p>a) Paris</p> + <p>b) London</p> + HTML + result = described_class.new(nodes).extract + + expect(result[:status]).to eq("draft") + expect(result[:warnings]).to include(/question text/i) + end + + it "handles multiple correct answers for MA type" do + nodes = nodes_from(<<~HTML) + <p>Type: MA Folder: Geo Title: Q1 Category: Test 1) Pick capitals? ~ Explanation</p> + <p>*a) Paris</p> + <p>*b) Berlin</p> + <p>c) Detroit</p> + HTML + result = described_class.new(nodes).extract + + expect(result[:row]["correct answer"]).to eq("a; b") + expect(result[:row]["question type"]).to eq("ma") + end + + it "extracts essay questions without options" do + nodes = nodes_from(<<~HTML) + <p>Type: Essay Folder: Writing Title: Q1 Category: Test 1) Discuss the causes of WWI.</p> + HTML + result = described_class.new(nodes).extract + + expect(result[:row]["question type"]).to eq("essay") + expect(result[:row]["question text"]).to eq("Discuss the causes of WWI.") + expect(result[:status]).to eq("published") + end + + it "warns for unsupported question types but still imports" do + nodes = nodes_from(<<~HTML) + <p>Type: Hotspot 1) Identify the region on the map.</p> + HTML + result = described_class.new(nodes).extract + + expect(result[:status]).to eq("draft") + expect(result[:warnings]).to include(/unsupported.*hotspot/i) + end + end +end +``` + +**Step 2: Run test to verify it fails** + +Run: `bundle exec rspec spec/atomic_assessments_import/examsoft/extractor_spec.rb -v` +Expected: FAIL + +**Step 3: Write implementation** + +Create `lib/atomic_assessments_import/exam_soft/extractor.rb`: + +```ruby +# frozen_string_literal: true + +require_relative "extractor/question_stem_detector" +require_relative "extractor/options_detector" +require_relative "extractor/correct_answer_detector" +require_relative "extractor/metadata_detector" +require_relative "extractor/feedback_detector" +require_relative "extractor/question_type_detector" + +module AtomicAssessmentsImport + module ExamSoft + class Extractor + SUPPORTED_TYPES = %w[mcq ma true_false essay short_answer fill_in_the_blank matching ordering].freeze + # Types that require options and a correct answer + OPTION_TYPES = %w[mcq ma true_false].freeze + + def initialize(nodes) + @nodes = nodes + end + + def extract + warnings = [] + + # Run detectors + options = Extractor::OptionsDetector.new(@nodes).detect + has_options = !options.empty? + + metadata = Extractor::MetadataDetector.new(@nodes).detect + question_type = Extractor::QuestionTypeDetector.new(@nodes, has_options: has_options).detect + stem = Extractor::QuestionStemDetector.new(@nodes).detect + feedback = Extractor::FeedbackDetector.new(@nodes).detect + correct_answers = has_options ? Extractor::CorrectAnswerDetector.new(@nodes, options).detect : [] + + # Determine status + status = "published" + + unless SUPPORTED_TYPES.include?(question_type) + warnings << "Unsupported question type '#{question_type}', imported as draft" + status = "draft" + end + + if stem.nil? + warnings << "No question text found, imported as draft" + status = "draft" + end + + if OPTION_TYPES.include?(question_type) + if options.empty? + warnings << "No options found for #{question_type} question, imported as draft" + status = "draft" + end + if correct_answers.empty? + warnings << "No correct answer found, imported as draft" + status = "draft" + end + end + + # Build row_mock + row = { + "question id" => nil, + "folder" => metadata[:folder], + "title" => metadata[:title], + "category" => metadata[:categories] || [], + "import type" => nil, + "description" => nil, + "question text" => stem, + "question type" => question_type, + "stimulus review" => nil, + "instructor stimulus" => nil, + "correct answer" => correct_answers.join("; "), + "scoring type" => nil, + "points" => nil, + "distractor rationale" => nil, + "sample answer" => nil, + "acknowledgements" => nil, + "general feedback" => feedback, + "correct feedback" => nil, + "incorrect feedback" => nil, + "shuffle options" => nil, + "template" => question_type, + } + + # Add option keys + options.each_with_index do |opt, index| + letter = ("a".ord + index).chr + row["option #{letter}"] = opt[:text] + end + + { + row: row, + status: status, + warnings: warnings, + } + end + end + end +end +``` + +**Step 4: Run test to verify it passes** + +Run: `bundle exec rspec spec/atomic_assessments_import/examsoft/extractor_spec.rb -v` +Expected: PASS + +**Step 5: Commit** + +```bash +git add lib/atomic_assessments_import/exam_soft/extractor.rb spec/atomic_assessments_import/examsoft/extractor_spec.rb +git commit -m "feat: add Extractor orchestrator with field detection pipeline" +``` + +--- + +### Task 8: New Question Type Classes — Essay and ShortAnswer + +**Files:** +- Create: `lib/atomic_assessments_import/questions/essay.rb` +- Create: `lib/atomic_assessments_import/questions/short_answer.rb` +- Test: `spec/atomic_assessments_import/questions/essay_spec.rb` +- Test: `spec/atomic_assessments_import/questions/short_answer_spec.rb` +- Modify: `lib/atomic_assessments_import/questions/question.rb:12-18` (add cases to `self.load`) + +**Step 1: Write failing tests** + +Create `spec/atomic_assessments_import/questions/essay_spec.rb`: + +```ruby +# frozen_string_literal: true + +require "atomic_assessments_import" + +RSpec.describe AtomicAssessmentsImport::Questions::Essay do + let(:row) do + { + "question text" => "Discuss the causes of World War I.", + "question type" => "essay", + "general feedback" => "A good answer covers alliances, imperialism, and nationalism.", + "sample answer" => "World War I was caused by...", + "points" => "10", + } + end + + describe "#question_type" do + it "returns longanswer" do + question = described_class.new(row) + expect(question.question_type).to eq("longanswer") + end + end + + describe "#to_learnosity" do + it "returns correct structure" do + question = described_class.new(row) + result = question.to_learnosity + + expect(result[:type]).to eq("longanswer") + expect(result[:widget_type]).to eq("response") + expect(result[:data][:stimulus]).to eq("Discuss the causes of World War I.") + end + + it "includes max_length when word limit specified" do + row["word_limit"] = "500" + question = described_class.new(row) + result = question.to_learnosity + + expect(result[:data][:max_length]).to eq(500) + end + + it "sets metadata" do + question = described_class.new(row) + result = question.to_learnosity + + expect(result[:data][:metadata][:sample_answer]).to eq("World War I was caused by...") + expect(result[:data][:metadata][:general_feedback]).to eq("A good answer covers alliances, imperialism, and nationalism.") + end + end +end +``` + +Create `spec/atomic_assessments_import/questions/short_answer_spec.rb`: + +```ruby +# frozen_string_literal: true + +require "atomic_assessments_import" + +RSpec.describe AtomicAssessmentsImport::Questions::ShortAnswer do + let(:row) do + { + "question text" => "What is the chemical symbol for water?", + "question type" => "short_answer", + "correct answer" => "H2O", + "points" => "1", + } + end + + describe "#question_type" do + it "returns shorttext" do + question = described_class.new(row) + expect(question.question_type).to eq("shorttext") + end + end + + describe "#to_learnosity" do + it "returns correct structure" do + question = described_class.new(row) + result = question.to_learnosity + + expect(result[:type]).to eq("shorttext") + expect(result[:widget_type]).to eq("response") + expect(result[:data][:stimulus]).to eq("What is the chemical symbol for water?") + end + + it "includes validation with correct answer" do + question = described_class.new(row) + result = question.to_learnosity + + expect(result[:data][:validation][:valid_response][:value]).to eq("H2O") + expect(result[:data][:validation][:valid_response][:score]).to eq(1) + end + end +end +``` + +**Step 2: Run tests to verify they fail** + +Run: `bundle exec rspec spec/atomic_assessments_import/questions/essay_spec.rb spec/atomic_assessments_import/questions/short_answer_spec.rb -v` +Expected: FAIL — uninitialized constants + +**Step 3: Write implementations** + +Create `lib/atomic_assessments_import/questions/essay.rb`: + +```ruby +# frozen_string_literal: true + +require_relative "question" + +module AtomicAssessmentsImport + module Questions + class Essay < Question + def question_type + "longanswer" + end + + def question_data + data = super + word_limit = @row["word_limit"]&.to_i + data[:max_length] = word_limit if word_limit && word_limit > 0 + data + end + end + end +end +``` + +Create `lib/atomic_assessments_import/questions/short_answer.rb`: + +```ruby +# frozen_string_literal: true + +require_relative "question" + +module AtomicAssessmentsImport + module Questions + class ShortAnswer < Question + def question_type + "shorttext" + end + + def question_data + super.merge( + validation: { + valid_response: { + score: points, + value: @row["correct answer"] || "", + }, + } + ) + end + end + end +end +``` + +**Step 4: Update Question.load** in `lib/atomic_assessments_import/questions/question.rb` + +Change the `self.load` method to include new types: + +```ruby +def self.load(row) + case row["question type"] + when nil, "", /multiple choice/i, /mcq/i, /^ma$/i + MultipleChoice.new(row) + when /true_false/i, /true\/false/i + MultipleChoice.new(row) + when /essay/i, /longanswer/i + Essay.new(row) + when /short_answer/i, /shorttext/i + ShortAnswer.new(row) + else + raise "Unknown question type #{row['question type']}" + end +end +``` + +Also add requires at the top of `question.rb` — actually, since `question.rb` is loaded first and subclasses require it, just add the requires in the extractor/converter that uses `Question.load`. The existing pattern is that `converter.rb` files require all question classes. We'll add the new requires there. + +For now, add to the top of `lib/atomic_assessments_import/questions/question.rb` after the class definition is loaded — actually the simplest approach: add requires in the files that use `Question.load`. The existing exam_soft converter already requires question and multiple_choice. We'll add essay and short_answer requires alongside those. + +**Step 5: Run tests to verify they pass** + +Run: `bundle exec rspec spec/atomic_assessments_import/questions/essay_spec.rb spec/atomic_assessments_import/questions/short_answer_spec.rb -v` +Expected: PASS + +**Step 6: Run all tests to check nothing broke** + +Run: `bundle exec rspec` +Expected: All pass + +**Step 7: Commit** + +```bash +git add lib/atomic_assessments_import/questions/essay.rb lib/atomic_assessments_import/questions/short_answer.rb lib/atomic_assessments_import/questions/question.rb spec/atomic_assessments_import/questions/essay_spec.rb spec/atomic_assessments_import/questions/short_answer_spec.rb +git commit -m "feat: add Essay and ShortAnswer question types" +``` + +--- + +### Task 9: New Question Type Classes — FillInTheBlank, Matching, Ordering + +**Files:** +- Create: `lib/atomic_assessments_import/questions/fill_in_the_blank.rb` +- Create: `lib/atomic_assessments_import/questions/matching.rb` +- Create: `lib/atomic_assessments_import/questions/ordering.rb` +- Test: `spec/atomic_assessments_import/questions/fill_in_the_blank_spec.rb` +- Test: `spec/atomic_assessments_import/questions/matching_spec.rb` +- Test: `spec/atomic_assessments_import/questions/ordering_spec.rb` +- Modify: `lib/atomic_assessments_import/questions/question.rb:12-18` (add remaining cases to `self.load`) + +**Step 1: Write failing tests** + +Create `spec/atomic_assessments_import/questions/fill_in_the_blank_spec.rb`: + +```ruby +# frozen_string_literal: true + +require "atomic_assessments_import" + +RSpec.describe AtomicAssessmentsImport::Questions::FillInTheBlank do + let(:row) do + { + "question text" => "The capital of France is {{response}}.", + "question type" => "fill_in_the_blank", + "correct answer" => "Paris", + "points" => "1", + } + end + + describe "#question_type" do + it "returns clozetext" do + question = described_class.new(row) + expect(question.question_type).to eq("clozetext") + end + end + + describe "#to_learnosity" do + it "returns correct structure" do + question = described_class.new(row) + result = question.to_learnosity + + expect(result[:type]).to eq("clozetext") + expect(result[:data][:stimulus]).to eq("The capital of France is {{response}}.") + end + + it "includes validation with correct answer" do + question = described_class.new(row) + result = question.to_learnosity + + expect(result[:data][:validation][:valid_response][:score]).to eq(1) + expect(result[:data][:validation][:valid_response][:value]).to eq(["Paris"]) + end + end +end +``` + +Create `spec/atomic_assessments_import/questions/matching_spec.rb`: + +```ruby +# frozen_string_literal: true + +require "atomic_assessments_import" + +RSpec.describe AtomicAssessmentsImport::Questions::Matching do + let(:row) do + { + "question text" => "Match the countries to their capitals.", + "question type" => "matching", + "option a" => "France", + "option b" => "Germany", + "option c" => "Spain", + "match a" => "Paris", + "match b" => "Berlin", + "match c" => "Madrid", + "points" => "3", + } + end + + describe "#question_type" do + it "returns association" do + question = described_class.new(row) + expect(question.question_type).to eq("association") + end + end + + describe "#to_learnosity" do + it "returns correct structure" do + question = described_class.new(row) + result = question.to_learnosity + + expect(result[:type]).to eq("association") + expect(result[:data][:stimulus]).to eq("Match the countries to their capitals.") + end + + it "includes stimulus and possible responses" do + question = described_class.new(row) + result = question.to_learnosity + + expect(result[:data][:stimulus_list].length).to eq(3) + expect(result[:data][:possible_responses].length).to eq(3) + end + + it "includes validation" do + question = described_class.new(row) + result = question.to_learnosity + + expect(result[:data][:validation][:valid_response][:score]).to eq(3) + expect(result[:data][:validation][:valid_response][:value].length).to eq(3) + end + end +end +``` + +Create `spec/atomic_assessments_import/questions/ordering_spec.rb`: + +```ruby +# frozen_string_literal: true + +require "atomic_assessments_import" + +RSpec.describe AtomicAssessmentsImport::Questions::Ordering do + let(:row) do + { + "question text" => "Arrange these events in chronological order.", + "question type" => "ordering", + "option a" => "World War I", + "option b" => "World War II", + "option c" => "Cold War", + "correct answer" => "a; b; c", + "points" => "3", + } + end + + describe "#question_type" do + it "returns orderlist" do + question = described_class.new(row) + expect(question.question_type).to eq("orderlist") + end + end + + describe "#to_learnosity" do + it "returns correct structure" do + question = described_class.new(row) + result = question.to_learnosity + + expect(result[:type]).to eq("orderlist") + expect(result[:data][:stimulus]).to eq("Arrange these events in chronological order.") + end + + it "includes list of items" do + question = described_class.new(row) + result = question.to_learnosity + + expect(result[:data][:list].length).to eq(3) + end + + it "includes validation with correct order" do + question = described_class.new(row) + result = question.to_learnosity + + expect(result[:data][:validation][:valid_response][:score]).to eq(3) + expect(result[:data][:validation][:valid_response][:value]).to eq(["0", "1", "2"]) + end + end +end +``` + +**Step 2: Run tests to verify they fail** + +Run: `bundle exec rspec spec/atomic_assessments_import/questions/fill_in_the_blank_spec.rb spec/atomic_assessments_import/questions/matching_spec.rb spec/atomic_assessments_import/questions/ordering_spec.rb -v` +Expected: FAIL — uninitialized constants + +**Step 3: Write implementations** + +Create `lib/atomic_assessments_import/questions/fill_in_the_blank.rb`: + +```ruby +# frozen_string_literal: true + +require_relative "question" + +module AtomicAssessmentsImport + module Questions + class FillInTheBlank < Question + def question_type + "clozetext" + end + + def question_data + answers = (@row["correct answer"] || "").split(";").map(&:strip) + + super.merge( + validation: { + valid_response: { + score: points, + value: answers, + }, + } + ) + end + end + end +end +``` + +Create `lib/atomic_assessments_import/questions/matching.rb`: + +```ruby +# frozen_string_literal: true + +require_relative "question" + +module AtomicAssessmentsImport + module Questions + class Matching < Question + INDEXES = ("a".."o").to_a.freeze + + def question_type + "association" + end + + def question_data + stimulus_list = [] + possible_responses = [] + valid_values = [] + + INDEXES.each do |letter| + option = @row["option #{letter}"] + match = @row["match #{letter}"] + break unless option + + stimulus_list << option + possible_responses << match if match + valid_values << match if match + end + + super.merge( + stimulus_list: stimulus_list, + possible_responses: possible_responses, + validation: { + valid_response: { + score: points, + value: valid_values, + }, + } + ) + end + end + end +end +``` + +Create `lib/atomic_assessments_import/questions/ordering.rb`: + +```ruby +# frozen_string_literal: true + +require_relative "question" + +module AtomicAssessmentsImport + module Questions + class Ordering < Question + INDEXES = ("a".."o").to_a.freeze + + def question_type + "orderlist" + end + + def question_data + items = [] + INDEXES.each do |letter| + option = @row["option #{letter}"] + break unless option + + items << option + end + + # Parse correct order from "a; b; c" format + order = (@row["correct answer"] || "").split(";").map(&:strip).map(&:downcase) + valid_values = order.filter_map { |letter| INDEXES.find_index(letter)&.to_s } + + super.merge( + list: items, + validation: { + valid_response: { + score: points, + value: valid_values, + }, + } + ) + end + end + end +end +``` + +**Step 4: Update Question.load** in `lib/atomic_assessments_import/questions/question.rb` + +Final version of `self.load`: + +```ruby +def self.load(row) + case row["question type"] + when nil, "", /multiple choice/i, /mcq/i, /^ma$/i + MultipleChoice.new(row) + when /true_false/i, /true\/false/i + MultipleChoice.new(row) + when /essay/i, /longanswer/i + Essay.new(row) + when /short_answer/i, /shorttext/i + ShortAnswer.new(row) + when /fill_in_the_blank/i, /cloze/i + FillInTheBlank.new(row) + when /matching/i, /association/i + Matching.new(row) + when /ordering/i, /orderlist/i + Ordering.new(row) + else + raise "Unknown question type #{row['question type']}" + end +end +``` + +**Step 5: Run tests to verify they pass** + +Run: `bundle exec rspec spec/atomic_assessments_import/questions/ -v` +Expected: PASS + +**Step 6: Commit** + +```bash +git add lib/atomic_assessments_import/questions/ spec/atomic_assessments_import/questions/ +git commit -m "feat: add FillInTheBlank, Matching, and Ordering question types" +``` + +--- + +### Task 10: Refactor ExamSoft::Converter to Use New Pipeline + +Replace the monolithic regex-based converter with the chunker + extractor pipeline. + +**Files:** +- Modify: `lib/atomic_assessments_import/exam_soft/converter.rb` (major rewrite) +- Modify: `lib/atomic_assessments_import/exam_soft.rb` (add requires) + +**Step 1: Read and understand the existing converter** + +The existing converter is at `lib/atomic_assessments_import/exam_soft/converter.rb`. It handles: +1. File input (String path or Tempfile) +2. Pandoc conversion to HTML +3. Regex chunking + extraction +4. Building row_mock +5. Calling convert_row to build items/questions + +We keep steps 1-2 and 5, replace step 3-4 with Chunker + Extractor. + +**Step 2: Rewrite the converter** + +Replace `lib/atomic_assessments_import/exam_soft/converter.rb` with: + +```ruby +# frozen_string_literal: true + +require "pandoc-ruby" +require "nokogiri" +require "active_support/core_ext/digest/uuid" + +require_relative "../questions/question" +require_relative "../questions/multiple_choice" +require_relative "../questions/essay" +require_relative "../questions/short_answer" +require_relative "../questions/fill_in_the_blank" +require_relative "../questions/matching" +require_relative "../questions/ordering" +require_relative "../utils" +require_relative "chunker" +require_relative "extractor" + +module AtomicAssessmentsImport + module ExamSoft + class Converter + def initialize(file) + @file = file + end + + def convert + html = normalize_to_html + doc = Nokogiri::HTML.fragment(html) + + # Chunk the document + chunk_result = Chunker.new(doc).chunk + all_warnings = chunk_result[:warnings].dup + + # Log header info if present + unless chunk_result[:header_nodes].empty? + header_text = chunk_result[:header_nodes].map { |n| n.text.strip }.join(" ") + all_warnings << "Exam header detected: #{header_text}" unless header_text.empty? + end + + items = [] + questions = [] + + chunk_result[:chunks].each_with_index do |chunk_nodes, index| + # Extract fields from this chunk + extraction = Extractor.new(chunk_nodes).extract + all_warnings.concat(extraction[:warnings].map { |w| "Question #{index + 1}: #{w}" }) + + row = extraction[:row] + status = extraction[:status] + + # Skip completely unparseable chunks + if row["question text"].nil? && row["option a"].nil? + all_warnings << "Question #{index + 1}: Skipped — no usable content found" + next + end + + begin + item, question_widgets = convert_row(row, status) + items << item + questions += question_widgets + rescue StandardError => e + title = row["title"] || "Question #{index + 1}" + all_warnings << "#{title}: #{e.message}, imported as draft" + # Attempt bare-minimum import + begin + item, question_widgets = convert_row_minimal(row) + items << item + questions += question_widgets + rescue StandardError + all_warnings << "#{title}: Could not import even minimally, skipped" + end + end + end + + { + activities: [], + items: items, + questions: questions, + features: [], + errors: all_warnings, + } + end + + private + + def normalize_to_html + if @file.is_a?(String) + PandocRuby.new([@file], from: @file.split(".").last).to_html + else + source_type = @file.path.split(".").last.match(/^[a-zA-Z]+/)[0] + PandocRuby.new(@file.read, from: source_type).to_html + end + end + + def categories_to_tags(categories) + tags = {} + (categories || []).each do |cat| + if cat.include?("/") + key, value = cat.split("/", 2).map(&:strip) + tags[key.to_sym] ||= [] + tags[key.to_sym] << value + else + tags[cat.to_sym] ||= [] + end + end + tags + end + + def convert_row(row, status = "published") + source = "<p>ExamSoft Import on #{Time.now.strftime('%Y-%m-%d')}</p>\n" + if row["question id"].present? + source += "<p>External id: #{row['question id']}</p>\n" + end + + question = Questions::Question.load(row) + item = { + reference: SecureRandom.uuid, + title: row["title"] || "", + status: status, + tags: categories_to_tags(row["category"]), + metadata: { + import_date: Time.now.iso8601, + import_type: row["import_type"] || "examsoft", + }, + source: source, + description: row["description"] || "", + questions: [ + { + reference: question.reference, + type: question.question_type, + }, + ], + features: [], + definition: { + widgets: [ + { + reference: question.reference, + widget_type: "response", + }, + ], + }, + } + [item, [question.to_learnosity]] + end + + def convert_row_minimal(row) + # Fallback: create a bare item with just the question text + reference = SecureRandom.uuid + item = { + reference: reference, + title: row["title"] || "", + status: "draft", + tags: {}, + metadata: { + import_date: Time.now.iso8601, + import_type: "examsoft", + }, + source: "<p>ExamSoft Import on #{Time.now.strftime('%Y-%m-%d')}</p>\n", + description: row["question text"] || "", + questions: [], + features: [], + definition: { widgets: [] }, + } + [item, []] + end + end + end +end +``` + +**Step 3: Run existing tests to check backward compatibility** + +Run: `bundle exec rspec spec/atomic_assessments_import/examsoft/ -v` +Expected: Existing tests should mostly pass. Some may need minor adjustments due to error handling changes (e.g., "raises if no options" now produces a warning instead of an exception). + +**Step 4: Update existing ExamSoft specs for new behavior** + +The tests that expect `raise_error` for missing options/correct answers need to change — the new converter uses best-effort and produces warnings instead. Update `spec/atomic_assessments_import/examsoft/docx_converter_spec.rb`: + +Change the "raises if no options" test to: +```ruby +it "warns and imports as draft if no options are given" do + no_options = Tempfile.new("temp.docx") + original_content = File.read("spec/fixtures/no_options.docx") + no_options.write(original_content) + no_options.rewind + + data = described_class.new(no_options).convert + expect(data[:errors]).to include(a_string_matching(/no options|missing options/i)) +end +``` + +Change the "raises if no correct answer" test to: +```ruby +it "warns and imports as draft if no correct answer is given" do + no_correct = Tempfile.new("temp.docx") + original_content = File.read("spec/fixtures/no_correct.docx") + no_correct.write(original_content) + no_correct.rewind + + data = described_class.new(no_correct).convert + expect(data[:errors]).to include(a_string_matching(/correct answer/i)) +end +``` + +Apply similar changes to `html_converter_spec.rb` and `rtf_converter_spec.rb`. + +**Step 5: Run all tests** + +Run: `bundle exec rspec` +Expected: All pass + +**Step 6: Commit** + +```bash +git add lib/atomic_assessments_import/exam_soft/ spec/atomic_assessments_import/examsoft/ +git commit -m "refactor: rewrite ExamSoft converter to use chunker + extractor pipeline" +``` + +--- + +### Task 11: Integration Tests — Mixed Types, Messy Documents, Partial Parse + +**Files:** +- Create: `spec/fixtures/mixed_types.html` +- Create: `spec/fixtures/messy_document.html` +- Create: `spec/atomic_assessments_import/examsoft/integration_spec.rb` + +**Step 1: Create test fixtures** + +Create `spec/fixtures/mixed_types.html`: + +```html +<p>Exam: Midterm 2024</p> +<p>Total Questions: 4</p> +<p>Folder: Science Title: Q1 Category: Biology/Cells 1) What is the powerhouse of the cell? ~ The mitochondria produces ATP.</p> +<p>*a) Mitochondria</p> +<p>b) Nucleus</p> +<p>c) Ribosome</p> +<p>Type: Essay Folder: Writing Title: Q2 Category: English/Composition 2) Discuss the themes of Hamlet.</p> +<p>Type: MA Folder: Geography Title: Q3 Category: Capitals 3) Select all European capitals.</p> +<p>*a) Paris</p> +<p>*b) Berlin</p> +<p>c) New York</p> +<p>Folder: Science Title: Q4 Category: Chemistry 4) What is the chemical symbol for gold?</p> +<p>*a) Au</p> +<p>b) Ag</p> +<p>c) Fe</p> +``` + +Create `spec/fixtures/messy_document.html`: + +```html +<p>Some random header text</p> +<p></p> +<p>Folder: Test Title: Q1 Category: General 1) A normal question? ~ Normal explanation</p> +<p>*a) Correct</p> +<p>b) Wrong</p> +<p>Folder: Test Title: Q2 Category: General 2) A question with no options at all</p> +<p>Folder: Test Title: Q3 Category: General 3) Another normal question? ~ Another explanation</p> +<p>*a) Right</p> +<p>b) Wrong</p> +``` + +**Step 2: Write integration tests** + +Create `spec/atomic_assessments_import/examsoft/integration_spec.rb`: + +```ruby +# frozen_string_literal: true + +require "atomic_assessments_import" + +RSpec.describe "ExamSoft Integration" do + describe "mixed question types" do + it "handles a document with MCQ, essay, and MA questions" do + data = AtomicAssessmentsImport::ExamSoft::Converter.new("spec/fixtures/mixed_types.html").convert + + expect(data[:items].length).to eq(4) + + # MCQ question + q1 = data[:questions].find { |q| q[:data][:stimulus]&.include?("powerhouse") } + expect(q1).not_to be_nil + expect(q1[:type]).to eq("mcq") + + # Essay question + q2 = data[:questions].find { |q| q[:data][:stimulus]&.include?("Hamlet") } + expect(q2).not_to be_nil + expect(q2[:type]).to eq("longanswer") + + # MA question + q3 = data[:questions].find { |q| q[:data][:stimulus]&.include?("European capitals") } + expect(q3).not_to be_nil + end + + it "reports exam header in warnings" do + data = AtomicAssessmentsImport::ExamSoft::Converter.new("spec/fixtures/mixed_types.html").convert + + expect(data[:errors]).to include(a_string_matching(/header/i)) + end + end + + describe "messy documents with partial parse" do + it "imports what it can and warns about problems" do + data = AtomicAssessmentsImport::ExamSoft::Converter.new("spec/fixtures/messy_document.html").convert + + # Should get at least 2 good items (Q1 and Q3) + published = data[:items].select { |i| i[:status] == "published" } + expect(published.length).to be >= 2 + + # Should have warnings about Q2 (no options for what looks like MCQ) + expect(data[:errors].length).to be > 0 + end + end + + describe "backward compatibility" do + it "produces the same structure from simple.html as before" do + data = AtomicAssessmentsImport::ExamSoft::Converter.new("spec/fixtures/simple.html").convert + + expect(data[:items].length).to eq(3) + expect(data[:questions].length).to eq(3) + expect(data[:activities]).to eq([]) + expect(data[:features]).to eq([]) + + item1 = data[:items].find { |i| i[:title] == "Question 1" } + expect(item1).not_to be_nil + expect(item1[:status]).to eq("published") + + q1 = data[:questions].find { |q| q[:data][:stimulus] == "What is the capital of France?" } + expect(q1).not_to be_nil + expect(q1[:data][:options].length).to eq(3) + end + end +end +``` + +**Step 3: Run integration tests** + +Run: `bundle exec rspec spec/atomic_assessments_import/examsoft/integration_spec.rb -v` +Expected: PASS + +**Step 4: Run full test suite** + +Run: `bundle exec rspec` +Expected: All pass + +**Step 5: Commit** + +```bash +git add spec/fixtures/mixed_types.html spec/fixtures/messy_document.html spec/atomic_assessments_import/examsoft/integration_spec.rb +git commit -m "test: add integration tests for mixed types, messy docs, backward compat" +``` + +--- + +### Task 12: Final Cleanup and Full Test Run + +**Files:** +- Review: all modified files +- Clean up: any dead code from old converter, unused comments + +**Step 1: Run full test suite** + +Run: `bundle exec rspec --format documentation` +Expected: All pass + +**Step 2: Check for dead code** + +Look for any leftover references to the old regex patterns in the converter that are no longer needed. The old `chunk_pattern`, `meta_regex`, `question_regex`, `explanation_regex`, `options_regex` constants should all be gone since they were local variables in the old `convert` method. + +**Step 3: Run rubocop if configured** + +Run: `bundle exec rubocop lib/atomic_assessments_import/exam_soft/ lib/atomic_assessments_import/questions/` +Fix any style issues. + +**Step 4: Final commit** + +```bash +git add -A +git commit -m "chore: cleanup after ExamSoft converter refactor" +``` diff --git a/lib/atomic_assessments_import.rb b/lib/atomic_assessments_import.rb index acd9049..2f4ad6f 100644 --- a/lib/atomic_assessments_import.rb +++ b/lib/atomic_assessments_import.rb @@ -42,6 +42,7 @@ def self.convert(path, import_from) register_converter("text/html", "examsoft", ExamSoft::Converter) register_converter("application/xhtml+xml", "examsoft", ExamSoft::Converter) + def self.convert_to_aa_format(input_path, output_path, import_from: nil) result = convert(input_path, import_from) AtomicAssessmentsImport::Export.create(output_path, result) diff --git a/lib/atomic_assessments_import/exam_soft/chunker/numbered_question_strategy.rb b/lib/atomic_assessments_import/exam_soft/chunker/numbered_question_strategy.rb index 16fdde0..e628087 100644 --- a/lib/atomic_assessments_import/exam_soft/chunker/numbered_question_strategy.rb +++ b/lib/atomic_assessments_import/exam_soft/chunker/numbered_question_strategy.rb @@ -6,8 +6,10 @@ module AtomicAssessmentsImport module ExamSoft module Chunker class NumberedQuestionStrategy < Strategy - # Matches "1)" or "1." or "12)" etc. at start of text, but NOT single letters like "a)" because those are used for options, not question numbering - NUMBERED_PATTERN = /\A\s*(\d+)\s*[.)]/ + # Matches "1)" or "1." or "1" or "12)" etc. at start of text, but NOT single letters like "a)" because those are used for options, not question numbering + # We also allow for an optional "Question" prefix, e.g. "Question 1)" or "Question #: 1" + # NUMBERED_PATTERN = /\A\s*(\d+)\s*[.)]/ + NUMBERED_PATTERN = /\A\s*(?:Question\s*[:#]?\s*)?(\d+)\s*[.)]/ def split(doc) @header_nodes = [] diff --git a/lib/atomic_assessments_import/exam_soft/converter.rb b/lib/atomic_assessments_import/exam_soft/converter.rb index e07eb23..c85809f 100644 --- a/lib/atomic_assessments_import/exam_soft/converter.rb +++ b/lib/atomic_assessments_import/exam_soft/converter.rb @@ -25,15 +25,20 @@ def initialize(file) def convert html = normalize_to_html doc = Nokogiri::HTML.fragment(html) + normalize_html_structure(doc) # Chunk the document chunk_result = Chunker.chunk(doc) - all_warnings = chunk_result[:warnings].dup + all_warnings = chunk_result[:warnings].map { |w| build_warning(w) } + + if chunk_result[:chunks].length == 1 + all_warnings << build_warning("Only 1 chunk detected — document may not be in a recognized format") + end # Log header info if present unless chunk_result[:header_nodes].empty? header_text = chunk_result[:header_nodes].map { |n| n.text.strip }.join(" ") - all_warnings << "Exam header detected: #{header_text}" unless header_text.empty? + all_warnings << build_warning("Exam header detected: #{header_text}") unless header_text.empty? end items = [] @@ -42,14 +47,16 @@ def convert chunk_result[:chunks].each_with_index do |chunk_nodes, index| # Extract fields from this chunk extraction = Extractor.extract(chunk_nodes) - all_warnings.concat(extraction[:warnings].map { |w| "Question #{index + 1}: #{w}" }) + extraction[:warnings].each do |w| + all_warnings << build_warning("Question #{index + 1}: #{w}", index: index, question_type: extraction[:row]["question type"]) + end row = extraction[:row] status = extraction[:status] # Skip completely unparseable chunks if row["question text"].nil? && row["option a"].nil? - all_warnings << "Question #{index + 1}: Skipped — no usable content found" + all_warnings << build_warning("Question #{index + 1}: Skipped — no usable content found", index: index) next end @@ -59,13 +66,13 @@ def convert questions += question_widgets rescue StandardError => e title = row["title"] || "Question #{index + 1}" - all_warnings << "#{title}: #{e.message}, imported as draft" + all_warnings << build_warning("#{title}: #{e.message}, imported as draft", index: index, question_type: row["question type"]) begin item, question_widgets = convert_row_minimal(row) items << item questions += question_widgets rescue StandardError - all_warnings << "#{title}: Could not import even minimally, skipped" + all_warnings << build_warning("#{title}: Could not import even minimally, skipped", index: index, question_type: row["question type"]) end end end @@ -81,6 +88,47 @@ def convert private + def build_warning(message, index: nil, question_type: nil) + { + error_type: "warning", + question_type: question_type, + message: message, + qti_item_id: nil, + index: index, + } + end + + def normalize_html_structure(doc) + doc.css("p").each do |p_node| + br_children = p_node.css("br") + next if br_children.empty? + + # Split the <p> at each <br> into separate <p> elements + segments = [] + current_segment = [] + + p_node.children.each do |child| + if child.name == "br" + segments << current_segment unless current_segment.empty? + current_segment = [] + else + current_segment << child + end + end + segments << current_segment unless current_segment.empty? + + next if segments.length <= 1 + + # Replace original <p> with multiple <p> elements + segments.reverse_each do |segment| + new_p = Nokogiri::XML::Node.new("p", doc) + segment.each { |child| new_p.add_child(child.clone) } + p_node.add_next_sibling(new_p) + end + p_node.remove + end + end + def normalize_to_html if @file.is_a?(String) PandocRuby.new([@file], from: @file.split(".").last).to_html diff --git a/lib/atomic_assessments_import/exam_soft/extractor.rb b/lib/atomic_assessments_import/exam_soft/extractor.rb index 8351853..4a75ce7 100644 --- a/lib/atomic_assessments_import/exam_soft/extractor.rb +++ b/lib/atomic_assessments_import/exam_soft/extractor.rb @@ -81,6 +81,11 @@ def self.extract(nodes) row["option #{letter}"] = opt[:text] end + # For FITB questions, options ARE the answers (no asterisk marking) + if question_type == "fill_in_the_blank" && row["correct answer"].blank? && !options.empty? + row["correct answer"] = options.map { |opt| opt[:text] }.join("; ") + end + { row: row, status: status, diff --git a/lib/atomic_assessments_import/exam_soft/extractor/feedback_detector.rb b/lib/atomic_assessments_import/exam_soft/extractor/feedback_detector.rb index 283f390..3b39fe1 100644 --- a/lib/atomic_assessments_import/exam_soft/extractor/feedback_detector.rb +++ b/lib/atomic_assessments_import/exam_soft/extractor/feedback_detector.rb @@ -6,21 +6,36 @@ module Extractor class FeedbackDetector TILDE_PATTERN = /~\s*(.+)/m LABEL_PATTERN = /\A\s*(?:Explanation|Rationale):\s*(.+)/im + OPTION_PATTERN = /\A\s*\*?[a-oA-O]\s*[.)]/ def initialize(nodes) @nodes = nodes end def detect + feedback_parts = [] + collecting = false + @nodes.each do |node| text = node.text.strip + + if collecting + # Stop collecting if we hit an option line + break if text.match?(OPTION_PATTERN) + feedback_parts << text unless text.empty? + next + end + match = text.match(TILDE_PATTERN) if match - feedback = match[1].gsub(/\s+/, " ").strip - return feedback unless feedback.empty? + first_part = match[1].strip + feedback_parts << first_part unless first_part.empty? + collecting = true end end + return feedback_parts.join(" ").gsub(/\s+/, " ").strip unless feedback_parts.empty? + @nodes.each do |node| text = node.text.strip match = text.match(LABEL_PATTERN) diff --git a/lib/atomic_assessments_import/exam_soft/extractor/question_type_detector.rb b/lib/atomic_assessments_import/exam_soft/extractor/question_type_detector.rb index 32dcbfc..eda1a3b 100644 --- a/lib/atomic_assessments_import/exam_soft/extractor/question_type_detector.rb +++ b/lib/atomic_assessments_import/exam_soft/extractor/question_type_detector.rb @@ -14,9 +14,13 @@ class QuestionTypeDetector %r{\Atrue[\s/]*false\z}i => "true_false", %r{\At\s*/?\s*f\z}i => "true_false", /\Aessay\z/i => "essay", + /\Ae\z/i => "essay", /\Along\s*answer\z/i => "essay", /\Ashort\s*answer\z/i => "short_answer", + /\Asa\z/i => "short_answer", /\Afill[\s_-]*in[\s_-]*(?:the[\s_-]*)?blank\z/i => "fill_in_the_blank", + /\Afib\z/i => "fill_in_the_blank", + /\Af\z/i => "fill_in_the_blank", /\Acloze\z/i => "fill_in_the_blank", /\Amatching\z/i => "matching", /\Aorder(?:ing)?\z/i => "ordering", diff --git a/spec/atomic_assessments_import/examsoft/docx_converter_spec.rb b/spec/atomic_assessments_import/examsoft/docx_converter_spec.rb index e4472d9..9025138 100644 --- a/spec/atomic_assessments_import/examsoft/docx_converter_spec.rb +++ b/spec/atomic_assessments_import/examsoft/docx_converter_spec.rb @@ -119,81 +119,6 @@ ) end - # it "sets external id metadata" do - # csv = <<~CSV - # Question ID,Title,Tag:Subject,Question Text,Option A, Option B,Option C,Correct Answer - # Q001,Question 1,Capitals,What is the capital of France?,Paris, Versailles,Bordeaux,A - # CSV - # data = described_class.new(StringIO.new(csv)).convert - # item1 = data[:items].find { |i| i[:title] == "Question 1" } - # expect(item1).not_to be_nil - # expect(item1[:metadata][:external_id]).to eq("Q001") - # expect(item1[:metadata][:external_id_domain]).to eq("csv") - # expect(item1[:metadata][:import_type]).to eq("csv") - # expect(item1[:source]).to match(/External.*Q001/) - # end - - # it "sets alignment tags" do - # csv = <<~CSV - # Question ID,Title,Tag:Subject,Question Text,Option A, Option B,Option C,Correct Answer,Alignment URL - # Q001,Question 1,Capitals,What is the capital of France?,Paris, Versailles,Bordeaux,A,"https://example.com/alignment" - # CSV - # data = described_class.new(StringIO.new(csv)).convert - # item1 = data[:items].find { |i| i[:title] == "Question 1" } - # expect(item1).not_to be_nil - # expect(item1[:tags]).to eq( - # { - # Subject: ["Capitals"], - # lrn_aligned: ["ff8a5caa-0f2a-5a53-a128-c8c3e99768a8"], - # } - # ) - # expect(item1[:metadata][:alignment]).to eq(%w[https://example.com/alignment]) - # end - - # it "sets multiple alignment tags" do - # csv = <<~CSV - # Question ID,Title,Tag:Subject,Question Text,Option A, Option B,Option C,Correct Answer,Alignment URL,Alignment URL - # Q001,Question 1,Capitals,What is the capital of France?,Paris, Versailles,Bordeaux,A,https://example.com/alignment,https://example.com/alignment2 - # CSV - # data = described_class.new(StringIO.new(csv)).convert - # item1 = data[:items].find { |i| i[:title] == "Question 1" } - # expect(item1).not_to be_nil - # expect(item1[:tags]).to eq( - # { - # Subject: ["Capitals"], - # lrn_aligned: %w[ff8a5caa-0f2a-5a53-a128-c8c3e99768a8 f7d26914-3e2b-5c9c-a550-ce9c853f0c09], - # } - # ) - # expect(item1[:metadata][:alignment]).to eq(%w[https://example.com/alignment https://example.com/alignment2]) - # end - - # it "sets alignment tags when one is empty" do - # csv = <<~CSV - # Question ID,Title,Tag:Subject,Question Text,Option A, Option B,Option C,Correct Answer,Alignment URL,Alignment URL - # Q001,Question 1,Capitals,What is the capital of France?,Paris, Versailles,Bordeaux,A,,https://example.com/alignment2 - # CSV - # data = described_class.new(StringIO.new(csv)).convert - # item1 = data[:items].find { |i| i[:title] == "Question 1" } - # expect(item1).not_to be_nil - # expect(item1[:tags]).to eq( - # { - # Subject: ["Capitals"], - # lrn_aligned: %w[f7d26914-3e2b-5c9c-a550-ce9c853f0c09], - # } - # ) - # expect(item1[:metadata][:alignment]).to eq(%w[https://example.com/alignment2]) - # end - - # it "raises if an unknown header is present" do - # csv = <<~CSV - # Question ID,Title,Tag:Subject,Question Text,Option A, Option B,Option C,Correct Answer,Color - # Q001,Question 1,Capitals,What is the capital of France?,Paris, Versailles,Bordeaux,A, - # CSV - # expect do - # described_class.new(StringIO.new(csv)).convert - # end.to raise_error(StandardError, "Unknown column: Color") - # end - it "warns if no options are given" do no_options = Tempfile.new("temp.docx") original_content = File.read("spec/fixtures/no_options.docx") @@ -201,7 +126,7 @@ no_options.rewind data = described_class.new(no_options).convert - expect(data[:errors]).to include(a_string_matching(/no options|missing options/i)) + expect(data[:errors]).to include(a_hash_including(message: a_string_matching(/no options|missing options/i))) end it "warns if no correct answer is given" do @@ -211,7 +136,7 @@ no_correct.rewind data = described_class.new(no_correct).convert - expect(data[:errors]).to include(a_string_matching(/correct answer/i)) + expect(data[:errors]).to include(a_hash_including(message: a_string_matching(/correct answer/i))) end end end diff --git a/spec/atomic_assessments_import/examsoft/extractor/feedback_detector_spec.rb b/spec/atomic_assessments_import/examsoft/extractor/feedback_detector_spec.rb index 2d76e28..e381b8b 100644 --- a/spec/atomic_assessments_import/examsoft/extractor/feedback_detector_spec.rb +++ b/spec/atomic_assessments_import/examsoft/extractor/feedback_detector_spec.rb @@ -35,6 +35,27 @@ def nodes_from(html) expect(result).to eq("Paris is the capital of France.") end + it "collects multi-line feedback after tilde" do + nodes = nodes_from(<<~HTML) + <p>1) Question? ~ Kava has been associated with</p> + <p>hepatotoxicity in several case reports.</p> + <p>*a) Kava</p> + HTML + result = described_class.new(nodes).detect + expect(result).to eq("Kava has been associated with hepatotoxicity in several case reports.") + end + + it "stops collecting feedback at option lines" do + nodes = nodes_from(<<~HTML) + <p>1) Question? ~ First line of feedback.</p> + <p>Second line of feedback.</p> + <p>a) Option A</p> + <p>b) Option B</p> + HTML + result = described_class.new(nodes).detect + expect(result).to eq("First line of feedback. Second line of feedback.") + end + it "returns nil when no feedback found" do nodes = nodes_from("<p>1) What is the capital of France?</p>") result = described_class.new(nodes).detect diff --git a/spec/atomic_assessments_import/examsoft/extractor/question_type_detector_spec.rb b/spec/atomic_assessments_import/examsoft/extractor/question_type_detector_spec.rb index 2a0d527..e2ef4f5 100644 --- a/spec/atomic_assessments_import/examsoft/extractor/question_type_detector_spec.rb +++ b/spec/atomic_assessments_import/examsoft/extractor/question_type_detector_spec.rb @@ -56,5 +56,29 @@ def nodes_from(html) result = described_class.new(nodes, has_options: false).detect expect(result).to eq("fill_in_the_blank") end + + it "detects fill_in_the_blank from Type: F" do + nodes = nodes_from("<p>Type: F Folder: Science 1) Question?</p>") + result = described_class.new(nodes, has_options: false).detect + expect(result).to eq("fill_in_the_blank") + end + + it "detects fill_in_the_blank from Type: FIB" do + nodes = nodes_from("<p>Type: FIB Folder: Science 1) Question?</p>") + result = described_class.new(nodes, has_options: false).detect + expect(result).to eq("fill_in_the_blank") + end + + it "detects essay from Type: E" do + nodes = nodes_from("<p>Type: E Folder: Writing 1) Question?</p>") + result = described_class.new(nodes, has_options: false).detect + expect(result).to eq("essay") + end + + it "detects short_answer from Type: SA" do + nodes = nodes_from("<p>Type: SA Folder: Science 1) Question?</p>") + result = described_class.new(nodes, has_options: false).detect + expect(result).to eq("short_answer") + end end end diff --git a/spec/atomic_assessments_import/examsoft/extractor_spec.rb b/spec/atomic_assessments_import/examsoft/extractor_spec.rb index 702a76c..0d1768c 100644 --- a/spec/atomic_assessments_import/examsoft/extractor_spec.rb +++ b/spec/atomic_assessments_import/examsoft/extractor_spec.rb @@ -87,5 +87,17 @@ def nodes_from(html) expect(result[:status]).to eq("draft") expect(result[:warnings]).to include(a_string_matching(/unsupported.*hotspot/i)) end + + it "sets correct answer from options for FITB questions" do + nodes = nodes_from(<<~HTML) + <p>Type: F Folder: Science Title: Q1 1) Name the active compound.</p> + <p>a) Salicin</p> + HTML + result = described_class.extract(nodes) + + expect(result[:row]["question type"]).to eq("fill_in_the_blank") + expect(result[:row]["correct answer"]).to eq("Salicin") + expect(result[:status]).to eq("published") + end end end diff --git a/spec/atomic_assessments_import/examsoft/html_converter_spec.rb b/spec/atomic_assessments_import/examsoft/html_converter_spec.rb index b9a5fef..468f157 100644 --- a/spec/atomic_assessments_import/examsoft/html_converter_spec.rb +++ b/spec/atomic_assessments_import/examsoft/html_converter_spec.rb @@ -119,81 +119,6 @@ ) end - # it "sets external id metadata" do - # csv = <<~CSV - # Question ID,Title,Tag:Subject,Question Text,Option A, Option B,Option C,Correct Answer - # Q001,Question 1,Capitals,What is the capital of France?,Paris, Versailles,Bordeaux,A - # CSV - # data = described_class.new(StringIO.new(csv)).convert - # item1 = data[:items].find { |i| i[:title] == "Question 1" } - # expect(item1).not_to be_nil - # expect(item1[:metadata][:external_id]).to eq("Q001") - # expect(item1[:metadata][:external_id_domain]).to eq("csv") - # expect(item1[:metadata][:import_type]).to eq("csv") - # expect(item1[:source]).to match(/External.*Q001/) - # end - - # it "sets alignment tags" do - # csv = <<~CSV - # Question ID,Title,Tag:Subject,Question Text,Option A, Option B,Option C,Correct Answer,Alignment URL - # Q001,Question 1,Capitals,What is the capital of France?,Paris, Versailles,Bordeaux,A,"https://example.com/alignment" - # CSV - # data = described_class.new(StringIO.new(csv)).convert - # item1 = data[:items].find { |i| i[:title] == "Question 1" } - # expect(item1).not_to be_nil - # expect(item1[:tags]).to eq( - # { - # Subject: ["Capitals"], - # lrn_aligned: ["ff8a5caa-0f2a-5a53-a128-c8c3e99768a8"], - # } - # ) - # expect(item1[:metadata][:alignment]).to eq(%w[https://example.com/alignment]) - # end - - # it "sets multiple alignment tags" do - # csv = <<~CSV - # Question ID,Title,Tag:Subject,Question Text,Option A, Option B,Option C,Correct Answer,Alignment URL,Alignment URL - # Q001,Question 1,Capitals,What is the capital of France?,Paris, Versailles,Bordeaux,A,https://example.com/alignment,https://example.com/alignment2 - # CSV - # data = described_class.new(StringIO.new(csv)).convert - # item1 = data[:items].find { |i| i[:title] == "Question 1" } - # expect(item1).not_to be_nil - # expect(item1[:tags]).to eq( - # { - # Subject: ["Capitals"], - # lrn_aligned: %w[ff8a5caa-0f2a-5a53-a128-c8c3e99768a8 f7d26914-3e2b-5c9c-a550-ce9c853f0c09], - # } - # ) - # expect(item1[:metadata][:alignment]).to eq(%w[https://example.com/alignment https://example.com/alignment2]) - # end - - # it "sets alignment tags when one is empty" do - # csv = <<~CSV - # Question ID,Title,Tag:Subject,Question Text,Option A, Option B,Option C,Correct Answer,Alignment URL,Alignment URL - # Q001,Question 1,Capitals,What is the capital of France?,Paris, Versailles,Bordeaux,A,,https://example.com/alignment2 - # CSV - # data = described_class.new(StringIO.new(csv)).convert - # item1 = data[:items].find { |i| i[:title] == "Question 1" } - # expect(item1).not_to be_nil - # expect(item1[:tags]).to eq( - # { - # Subject: ["Capitals"], - # lrn_aligned: %w[f7d26914-3e2b-5c9c-a550-ce9c853f0c09], - # } - # ) - # expect(item1[:metadata][:alignment]).to eq(%w[https://example.com/alignment2]) - # end - - # it "raises if an unknown header is present" do - # csv = <<~CSV - # Question ID,Title,Tag:Subject,Question Text,Option A, Option B,Option C,Correct Answer,Color - # Q001,Question 1,Capitals,What is the capital of France?,Paris, Versailles,Bordeaux,A, - # CSV - # expect do - # described_class.new(StringIO.new(csv)).convert - # end.to raise_error(StandardError, "Unknown column: Color") - # end - it "warns if no options are given" do modified_file = Tempfile.new("modified.html") original_content = File.read("spec/fixtures/simple.html") @@ -205,7 +130,7 @@ modified_file.rewind data = described_class.new(modified_file).convert - expect(data[:errors]).to include(a_string_matching(/no options|missing options/i)) + expect(data[:errors]).to include(a_hash_including(message: a_string_matching(/no options|missing options/i))) end it "warns if no correct answer is given" do @@ -216,7 +141,7 @@ modified_file.rewind data = described_class.new(modified_file).convert - expect(data[:errors]).to include(a_string_matching(/correct answer/i)) + expect(data[:errors]).to include(a_hash_including(message: a_string_matching(/correct answer/i))) end end end diff --git a/spec/atomic_assessments_import/examsoft/integration_spec.rb b/spec/atomic_assessments_import/examsoft/integration_spec.rb index 647e459..13adeb3 100644 --- a/spec/atomic_assessments_import/examsoft/integration_spec.rb +++ b/spec/atomic_assessments_import/examsoft/integration_spec.rb @@ -32,7 +32,7 @@ it "reports exam header in warnings" do data = AtomicAssessmentsImport::ExamSoft::Converter.new("spec/fixtures/mixed_types.html").convert - expect(data[:errors]).to include(a_string_matching(/header/i)) + expect(data[:errors]).to include(a_hash_including(message: a_string_matching(/header/i))) end end @@ -49,6 +49,44 @@ end end + describe "single-paragraph RTF format" do + it "handles documents where all content is in one <p> with <br> separators" do + data = AtomicAssessmentsImport::ExamSoft::Converter.new("spec/fixtures/single_paragraph_rtf.html").convert + + expect(data[:items].length).to eq(4) + + # Q1: MCQ + q1 = data[:questions].find { |q| q[:data][:stimulus]&.include?("Which state starts with the letter U") } + expect(q1).not_to be_nil + expect(q1[:type]).to eq("mcq") + + # Q2: FITB (Type: F) + q2 = data[:questions].find { |q| q[:data][:stimulus]&.include?("largest state in the US") } + expect(q2).not_to be_nil + expect(q2[:type]).to eq("clozetext") + + # Q3: Essay (Type: E) + q3 = data[:questions].find { |q| q[:data][:stimulus]&.include?("Discuss the pros and cons") } + expect(q3).not_to be_nil + expect(q3[:type]).to eq("longanswer") + + # Q4: MCQ with multiple correct (MA) + q4 = data[:questions].find { |q| q[:data][:stimulus]&.include?("southern states") } + expect(q4).not_to be_nil + expect(q4[:type]).to eq("mcq") + end + + it "extracts feedback correctly from single-paragraph format" do + data = AtomicAssessmentsImport::ExamSoft::Converter.new("spec/fixtures/single_paragraph_rtf.html").convert + + q1 = data[:questions].find { |q| q[:data][:stimulus]&.include?("Which state starts with the letter U") } + expect(q1[:data][:metadata][:general_feedback]).to include("Utah starts with the letter U") + + q2 = data[:questions].find { |q| q[:data][:stimulus]&.include?("largest state in the US") } + expect(q2[:data][:metadata][:general_feedback]).to include("Alaska is the largest state") + end + end + describe "backward compatibility" do it "produces the same structure from simple.html as before" do data = AtomicAssessmentsImport::ExamSoft::Converter.new("spec/fixtures/simple.html").convert diff --git a/spec/atomic_assessments_import/examsoft/rtf_converter_spec.rb b/spec/atomic_assessments_import/examsoft/rtf_converter_spec.rb index e932b75..378702c 100644 --- a/spec/atomic_assessments_import/examsoft/rtf_converter_spec.rb +++ b/spec/atomic_assessments_import/examsoft/rtf_converter_spec.rb @@ -120,81 +120,6 @@ ) end - # it "sets external id metadata" do - # csv = <<~CSV - # Question ID,Title,Tag:Subject,Question Text,Option A, Option B,Option C,Correct Answer - # Q001,Question 1,Capitals,What is the capital of France?,Paris, Versailles,Bordeaux,A - # CSV - # data = described_class.new(StringIO.new(csv)).convert - # item1 = data[:items].find { |i| i[:title] == "Question 1" } - # expect(item1).not_to be_nil - # expect(item1[:metadata][:external_id]).to eq("Q001") - # expect(item1[:metadata][:external_id_domain]).to eq("csv") - # expect(item1[:metadata][:import_type]).to eq("csv") - # expect(item1[:source]).to match(/External.*Q001/) - # end - - # it "sets alignment tags" do - # csv = <<~CSV - # Question ID,Title,Tag:Subject,Question Text,Option A, Option B,Option C,Correct Answer,Alignment URL - # Q001,Question 1,Capitals,What is the capital of France?,Paris, Versailles,Bordeaux,A,"https://example.com/alignment" - # CSV - # data = described_class.new(StringIO.new(csv)).convert - # item1 = data[:items].find { |i| i[:title] == "Question 1" } - # expect(item1).not_to be_nil - # expect(item1[:tags]).to eq( - # { - # Subject: ["Capitals"], - # lrn_aligned: ["ff8a5caa-0f2a-5a53-a128-c8c3e99768a8"], - # } - # ) - # expect(item1[:metadata][:alignment]).to eq(%w[https://example.com/alignment]) - # end - - # it "sets multiple alignment tags" do - # csv = <<~CSV - # Question ID,Title,Tag:Subject,Question Text,Option A, Option B,Option C,Correct Answer,Alignment URL,Alignment URL - # Q001,Question 1,Capitals,What is the capital of France?,Paris, Versailles,Bordeaux,A,https://example.com/alignment,https://example.com/alignment2 - # CSV - # data = described_class.new(StringIO.new(csv)).convert - # item1 = data[:items].find { |i| i[:title] == "Question 1" } - # expect(item1).not_to be_nil - # expect(item1[:tags]).to eq( - # { - # Subject: ["Capitals"], - # lrn_aligned: %w[ff8a5caa-0f2a-5a53-a128-c8c3e99768a8 f7d26914-3e2b-5c9c-a550-ce9c853f0c09], - # } - # ) - # expect(item1[:metadata][:alignment]).to eq(%w[https://example.com/alignment https://example.com/alignment2]) - # end - - # it "sets alignment tags when one is empty" do - # csv = <<~CSV - # Question ID,Title,Tag:Subject,Question Text,Option A, Option B,Option C,Correct Answer,Alignment URL,Alignment URL - # Q001,Question 1,Capitals,What is the capital of France?,Paris, Versailles,Bordeaux,A,,https://example.com/alignment2 - # CSV - # data = described_class.new(StringIO.new(csv)).convert - # item1 = data[:items].find { |i| i[:title] == "Question 1" } - # expect(item1).not_to be_nil - # expect(item1[:tags]).to eq( - # { - # Subject: ["Capitals"], - # lrn_aligned: %w[f7d26914-3e2b-5c9c-a550-ce9c853f0c09], - # } - # ) - # expect(item1[:metadata][:alignment]).to eq(%w[https://example.com/alignment2]) - # end - - # it "raises if an unknown header is present" do - # csv = <<~CSV - # Question ID,Title,Tag:Subject,Question Text,Option A, Option B,Option C,Correct Answer,Color - # Q001,Question 1,Capitals,What is the capital of France?,Paris, Versailles,Bordeaux,A, - # CSV - # expect do - # described_class.new(StringIO.new(csv)).convert - # end.to raise_error(StandardError, "Unknown column: Color") - # end - it "warns if no options are given" do modified_rtf_file = Tempfile.new("modified.rtf") original_content = File.read("spec/fixtures/simple.rtf") @@ -203,7 +128,7 @@ modified_rtf_file.rewind data = described_class.new(modified_rtf_file).convert - expect(data[:errors]).to include(a_string_matching(/no options|missing options/i)) + expect(data[:errors]).to include(a_hash_including(message: a_string_matching(/no options|missing options/i))) end it "warns if no correct answer is given" do @@ -214,7 +139,7 @@ modified_rtf_file.rewind data = described_class.new(modified_rtf_file).convert - expect(data[:errors]).to include(a_string_matching(/correct answer/i)) + expect(data[:errors]).to include(a_hash_including(message: a_string_matching(/correct answer/i))) end end end diff --git a/spec/fixtures/single_paragraph_rtf.html b/spec/fixtures/single_paragraph_rtf.html new file mode 100644 index 0000000..ddddd7d --- /dev/null +++ b/spec/fixtures/single_paragraph_rtf.html @@ -0,0 +1,19 @@ +<p>Folder: CountryTrivia Title: Q1 Category: Subject/CountryTrivia 1) Which state starts with the letter U?<br /> +~ Utah starts with the letter U. This was an easy one.<br /> +*a) Utah<br /> +b) Kansas<br /> +c) Washington<br /> +d) Vermont<br /> +<br /> +Type: F Folder: CountryTrivia Title: Q2 Category: Subject/CountryTrivia 2) What's the largest state in the US?<br /> +~ Alaska is the largest state in the US. It has a square footage of 663,267 to 665,384 square miles<br /> +a) Alaska<br /> +<br /> +Type: E Folder: CountryTrivia Title: Q3 Category: Subject/CountryTrivia 3) Discuss the pros and cons of living in different states.<br /> +<br /> +Folder: CountryTrivia Title: Q4 Category: Subject/CountryTrivia 4) Which of the following are considered southern states?<br /> +~ Mississippi and South Carolina are both considered southern states.<br /> +*a) Mississippi<br /> +*b) South Carolina<br /> +c) North Carolina<br /> +d) Virginia</p> \ No newline at end of file From 8e6847fc8126a3b7738101f24c3de1c55d827dd6 Mon Sep 17 00:00:00 2001 From: Jacob Schwartz <jaschwartz27@gmail.com> Date: Wed, 18 Feb 2026 21:18:38 -0700 Subject: [PATCH 23/30] feat: enhance metadata extraction and question handling in ExamSoft converter - Improved category extraction to handle line-wrapped categories - Updated title extraction to avoid truncation at parenthetical numbers - Enhanced FillInTheBlank question type to build stimulus with response placeholders - Added tests for new functionality in metadata and question stem detectors --- .../exam_soft/converter.rb | 4 ++- .../exam_soft/extractor/metadata_detector.rb | 6 ++-- .../extractor/question_stem_detector.rb | 2 +- .../questions/fill_in_the_blank.rb | 15 +++++++++ .../extractor/metadata_detector_spec.rb | 23 +++++++++++++ .../extractor/question_stem_detector_spec.rb | 11 +++++++ .../questions/fill_in_the_blank_spec.rb | 32 +++++++++++++++++++ 7 files changed, 88 insertions(+), 5 deletions(-) diff --git a/lib/atomic_assessments_import/exam_soft/converter.rb b/lib/atomic_assessments_import/exam_soft/converter.rb index c85809f..884fa9a 100644 --- a/lib/atomic_assessments_import/exam_soft/converter.rb +++ b/lib/atomic_assessments_import/exam_soft/converter.rb @@ -142,7 +142,9 @@ def categories_to_tags(categories) tags = {} (categories || []).each do |cat| if cat.include?("/") - key, value = cat.split("/", 2).map(&:strip) + key, _, value = cat.rpartition("/") + key = key.strip + value = value.strip tags[key.to_sym] ||= [] tags[key.to_sym] << value else diff --git a/lib/atomic_assessments_import/exam_soft/extractor/metadata_detector.rb b/lib/atomic_assessments_import/exam_soft/extractor/metadata_detector.rb index 27892d5..2d405a9 100644 --- a/lib/atomic_assessments_import/exam_soft/extractor/metadata_detector.rb +++ b/lib/atomic_assessments_import/exam_soft/extractor/metadata_detector.rb @@ -4,8 +4,8 @@ module AtomicAssessmentsImport module ExamSoft module Extractor class MetadataDetector - FOLDER_PATTERN = /Folder:\s*(.+?)(?=\s*(?:Title:|Category:|\d+[.)]))/i - TITLE_PATTERN = /Title:\s*(.+?)(?=\s*(?:Category:|\d+[.)]))/i + FOLDER_PATTERN = /Folder:\s*(.+?)(?=\s*(?:Title:|Category:|(?<=\s)\d+[.)]))/i + TITLE_PATTERN = /Title:\s*(.+?)(?=\s*(?:Category:|(?<=\s)\d+[.)]))/i CATEGORY_PATTERN = /Category:\s*(.+?)(?=\s*\d+[.)]|\z)/i TYPE_PATTERN = /Type:\s*(\S+)/i @@ -14,7 +14,7 @@ def initialize(nodes) end def detect - full_text = @nodes.map { |n| n.text.strip }.join(" ") + full_text = @nodes.map { |n| n.text.strip }.join(" ").gsub(/\s+/, " ") result = {} type_match = full_text.match(TYPE_PATTERN) diff --git a/lib/atomic_assessments_import/exam_soft/extractor/question_stem_detector.rb b/lib/atomic_assessments_import/exam_soft/extractor/question_stem_detector.rb index a5d1529..19f85e6 100644 --- a/lib/atomic_assessments_import/exam_soft/extractor/question_stem_detector.rb +++ b/lib/atomic_assessments_import/exam_soft/extractor/question_stem_detector.rb @@ -26,7 +26,7 @@ def detect # Strip metadata prefixes and numbered prefix together # e.g. "Folder: Geo Title: Q1 Category: Test 1) What is the capital?" text = if text.match?(/\d+[.)]/m) - text.sub(/\A.*?\d+[.)]\s*/m, "") + text.sub(/\A.*?(?<!\S)\d+[.)]\s*/m, "") else # Strip standalone metadata labels if present (Folder:, Title:, Category:, Type:) text.sub(/\A\s*(?:(?:Folder|Title|Category|Type):\s*\S+\s*)*/, "") diff --git a/lib/atomic_assessments_import/questions/fill_in_the_blank.rb b/lib/atomic_assessments_import/questions/fill_in_the_blank.rb index 99dda32..d15c0ad 100644 --- a/lib/atomic_assessments_import/questions/fill_in_the_blank.rb +++ b/lib/atomic_assessments_import/questions/fill_in_the_blank.rb @@ -12,6 +12,8 @@ def question_type def question_data answers = (@row["correct answer"] || "").split(";").map(&:strip) super.merge( + stimulus: "Fill in the blank(s):", + template: build_stimulus(answers), # Note: ExamSoft doesn't use a template like Learnosity validation: { valid_response: { score: points, @@ -20,6 +22,19 @@ def question_data } ) end + + private + + def build_stimulus(answers) + text = @row["question text"] || "" + return text if text.include?("{{response}}") + + if text.match?(/__\d+__/) + text.gsub(/__\d+__/, "{{response}}") + else + "#{text} #{Array.new(answers.size, "{{response}}").join(" ")}" + end + end end end end diff --git a/spec/atomic_assessments_import/examsoft/extractor/metadata_detector_spec.rb b/spec/atomic_assessments_import/examsoft/extractor/metadata_detector_spec.rb index 6199e3b..e22484f 100644 --- a/spec/atomic_assessments_import/examsoft/extractor/metadata_detector_spec.rb +++ b/spec/atomic_assessments_import/examsoft/extractor/metadata_detector_spec.rb @@ -30,6 +30,29 @@ def nodes_from(html) expect(result[:categories]).to eq(["Subject/Capitals"]) end + it "does not truncate title at parenthetical numbers like (Q4)" do + nodes = nodes_from(<<~HTML) + <p>Type: MA Folder: Geography Title: Question 4 (Q4) Category: Subject/Capitals 4) What are the smallest capital cities?</p> + HTML + result = described_class.new(nodes).detect + expect(result[:title]).to eq("Question 4 (Q4)") + expect(result[:categories]).to eq(["Subject/Capitals"]) + end + + it "extracts categories spanning multiple lines (line-wrapped by Pandoc)" do + nodes = nodes_from(<<~HTML) + <p>Folder: Geography Title: Atlanta Category: + Capital Categories by State/GA,Capital Categories by Country/United States + of America,Difficulty/Easy 1) Question?</p> + HTML + result = described_class.new(nodes).detect + expect(result[:categories]).to eq([ + "Capital Categories by State/GA", + "Capital Categories by Country/United States of America", + "Difficulty/Easy", + ]) + end + it "returns empty hash when no metadata found" do nodes = nodes_from("<p>1) What is the capital of France?</p>") result = described_class.new(nodes).detect diff --git a/spec/atomic_assessments_import/examsoft/extractor/question_stem_detector_spec.rb b/spec/atomic_assessments_import/examsoft/extractor/question_stem_detector_spec.rb index efc617e..e72c233 100644 --- a/spec/atomic_assessments_import/examsoft/extractor/question_stem_detector_spec.rb +++ b/spec/atomic_assessments_import/examsoft/extractor/question_stem_detector_spec.rb @@ -37,6 +37,17 @@ def nodes_from(html) expect(result).to eq("What is the capital of France?") end + it "strips metadata prefix up to question number, not a number embedded in parentheses like (Q-62)" do + nodes = nodes_from(<<~HTML) + <p>Type: MA Folder: Geography Title: Last Question (Q-62) Category: Difficulty/Very Hard 62) What is the population of Denver, CO as of 2021?</p> + <p>*a) 711,000–713,000</p> + <p>b) 713,000–715,000</p> + <p>c) 715,000–717,000</p> + HTML + result = described_class.new(nodes).detect + expect(result).to eq("What is the population of Denver, CO as of 2021?") + end + it "returns nil when no question text found" do nodes = nodes_from("<p>a) Paris</p><p>b) London</p>") result = described_class.new(nodes).detect diff --git a/spec/atomic_assessments_import/questions/fill_in_the_blank_spec.rb b/spec/atomic_assessments_import/questions/fill_in_the_blank_spec.rb index a7bdc4c..5dcd7aa 100644 --- a/spec/atomic_assessments_import/questions/fill_in_the_blank_spec.rb +++ b/spec/atomic_assessments_import/questions/fill_in_the_blank_spec.rb @@ -35,5 +35,37 @@ expect(result[:data][:validation][:valid_response][:value]).to eq(["Paris", "Lyon", "Marseille"]) expect(result[:data][:validation][:valid_response][:score]).to eq(1) end + + it "replaces __n__ blank markers in the stimulus with {{response}}" do + row["question text"] = "The color __1__ consists of primary, secondary, and __2__ colors." + row["correct answer"] = "wheel; tertiary" + question = described_class.new(row) + result = question.to_learnosity + expect(result[:data][:stimulus]).to eq("The color {{response}} consists of primary, secondary, and {{response}} colors.") + end + + it "appends {{response}} to stimulus when question text has no placeholder" do + row["question text"] = "Name the active compound." + row["correct answer"] = "Salicin" + question = described_class.new(row) + result = question.to_learnosity + expect(result[:data][:stimulus]).to eq("Name the active compound. {{response}}") + end + + it "appends one {{response}} per answer when question text has no placeholder and multiple answers" do + row["question text"] = "Fill in both capitals." + row["correct answer"] = "Paris; Berlin" + question = described_class.new(row) + result = question.to_learnosity + expect(result[:data][:stimulus]).to eq("Fill in both capitals. {{response}} {{response}}") + end + + it "leaves stimulus unchanged when it already contains {{response}}" do + row["question text"] = "The capital of France is {{response}}." + row["correct answer"] = "Paris" + question = described_class.new(row) + result = question.to_learnosity + expect(result[:data][:stimulus]).to eq("The capital of France is {{response}}.") + end end end From 242c104f6dfd5512e74b782df34dfbcef7646148 Mon Sep 17 00:00:00 2001 From: Jacob Schwartz <jaschwartz27@gmail.com> Date: Fri, 20 Feb 2026 15:08:36 -0700 Subject: [PATCH 24/30] Address review comments: - Clarified normalize_to_html method to show how it handles both file paths and file-like objects. - Updated categories_to_tags method for better key-value extraction. - Adjusted question handling for Multiple Answer types in the conversion process. - Modified metadata extraction to support new category parsing logic. - Updated Fill in the Blank and Multiple Choice question templates for consistency. - Fixed integration tests to reflect changes in question data structure. --- .../exam_soft/converter.rb | 36 ++++++++++++------ .../exam_soft/extractor.rb | 2 +- .../exam_soft/extractor/metadata_detector.rb | 2 +- .../questions/fill_in_the_blank.rb | 4 +- .../questions/multiple_choice.rb | 4 +- .../extractor/metadata_detector_spec.rb | 2 +- .../examsoft/integration_spec.rb | 4 +- .../questions/fill_in_the_blank_spec.rb | 14 +++---- spec/fixtures/simple.docx | Bin 5546 -> 5848 bytes spec/fixtures/simple.html | 6 +-- spec/fixtures/simple.rtf | 6 +-- 11 files changed, 47 insertions(+), 33 deletions(-) diff --git a/lib/atomic_assessments_import/exam_soft/converter.rb b/lib/atomic_assessments_import/exam_soft/converter.rb index 884fa9a..510a03d 100644 --- a/lib/atomic_assessments_import/exam_soft/converter.rb +++ b/lib/atomic_assessments_import/exam_soft/converter.rb @@ -130,26 +130,33 @@ def normalize_html_structure(doc) end def normalize_to_html + # Note: Pandoc Ruby takes either a file path or a string of content, but not a File object directly, so we have to handle both cases here if @file.is_a?(String) + # File path as string PandocRuby.new([@file], from: @file.split(".").last).to_html - else + elsif @file.respond_to?(:path) && @file.respond_to?(:read) + # File-like object (File, Tempfile, etc.) source_type = @file.path.split(".").last.match(/^[a-zA-Z]+/)[0] PandocRuby.new(@file.read, from: source_type).to_html + else + raise ArgumentError, "Expected a file path (String) or file-like object, got #{@file.class}" end end def categories_to_tags(categories) tags = {} (categories || []).each do |cat| - if cat.include?("/") - key, _, value = cat.rpartition("/") - key = key.strip - value = value.strip - tags[key.to_sym] ||= [] - tags[key.to_sym] << value - else - tags[cat.to_sym] ||= [] - end + parts = cat.to_s.split("/") + key = parts.shift&.strip + value = parts.join("/").strip + next if key.blank? || value.blank? + + key = key.delete(":")[0, 255] + value = value[0, 255] + next if key.blank? || value.blank? + + tags[key.to_sym] ||= [] + tags[key.to_sym] |= [value] end tags end @@ -159,6 +166,13 @@ def convert_row(row, status = "published") source += "<p>External id: #{row['question id']}</p>\n" if row["question id"].present? question = Questions::Question.load(row) + # ExamSoft has a dedicated Multiple Answer question type, but Learnosity does not, so we need to update the question type and UI style for those questions + question_learnosity = question.to_learnosity + if row["question type"] == "ma" + question_learnosity[:data][:ui_style] = { choice_label: "upper-alpha", type: "block" } + question_learnosity[:data][:multiple_responses] = true + end + item = { reference: SecureRandom.uuid, title: row["title"] || "", @@ -186,7 +200,7 @@ def convert_row(row, status = "published") ], }, } - [item, [question.to_learnosity]] + [item, [question_learnosity]] end def convert_row_minimal(row) diff --git a/lib/atomic_assessments_import/exam_soft/extractor.rb b/lib/atomic_assessments_import/exam_soft/extractor.rb index 4a75ce7..3aef511 100644 --- a/lib/atomic_assessments_import/exam_soft/extractor.rb +++ b/lib/atomic_assessments_import/exam_soft/extractor.rb @@ -72,7 +72,7 @@ def self.extract(nodes) "correct feedback" => nil, "incorrect feedback" => nil, "shuffle options" => nil, - "template" => nil, + "template" => "block layout", } # Add option keys diff --git a/lib/atomic_assessments_import/exam_soft/extractor/metadata_detector.rb b/lib/atomic_assessments_import/exam_soft/extractor/metadata_detector.rb index 2d405a9..41c4cae 100644 --- a/lib/atomic_assessments_import/exam_soft/extractor/metadata_detector.rb +++ b/lib/atomic_assessments_import/exam_soft/extractor/metadata_detector.rb @@ -27,7 +27,7 @@ def detect result[:title] = title_match[1].strip if title_match category_match = full_text.match(CATEGORY_PATTERN) - result[:categories] = category_match[1].split(",").map(&:strip) if category_match + result[:categories] = category_match[1].split(/(?<!\s),(?!\s)/).map(&:strip) if category_match result end diff --git a/lib/atomic_assessments_import/questions/fill_in_the_blank.rb b/lib/atomic_assessments_import/questions/fill_in_the_blank.rb index d15c0ad..d302d06 100644 --- a/lib/atomic_assessments_import/questions/fill_in_the_blank.rb +++ b/lib/atomic_assessments_import/questions/fill_in_the_blank.rb @@ -12,8 +12,8 @@ def question_type def question_data answers = (@row["correct answer"] || "").split(";").map(&:strip) super.merge( - stimulus: "Fill in the blank(s):", - template: build_stimulus(answers), # Note: ExamSoft doesn't use a template like Learnosity + stimulus: "", # Note: ExamSoft doesn't use a template like Learnosity, so we put the full question text in the template and leave the stimulus blank + template: build_stimulus(answers), validation: { valid_response: { score: points, diff --git a/lib/atomic_assessments_import/questions/multiple_choice.rb b/lib/atomic_assessments_import/questions/multiple_choice.rb index b011cff..371b27a 100644 --- a/lib/atomic_assessments_import/questions/multiple_choice.rb +++ b/lib/atomic_assessments_import/questions/multiple_choice.rb @@ -72,7 +72,7 @@ def distractor_rationale_response_level def multiple_responses case @row["template"]&.downcase when "multiple response", "block layout multiple response", "choice matrix", - "choice matrix inline", "choice matrix labels", "ma" + "choice matrix inline", "choice matrix labels" true else false @@ -81,7 +81,7 @@ def multiple_responses def ui_style case @row["template"]&.downcase - when "multiple response", "ma" + when "multiple response" { type: "horizontal" } when "block layout", "block layout multiple response" { choice_label: "upper-alpha", type: "block" } diff --git a/spec/atomic_assessments_import/examsoft/extractor/metadata_detector_spec.rb b/spec/atomic_assessments_import/examsoft/extractor/metadata_detector_spec.rb index e22484f..a0346bc 100644 --- a/spec/atomic_assessments_import/examsoft/extractor/metadata_detector_spec.rb +++ b/spec/atomic_assessments_import/examsoft/extractor/metadata_detector_spec.rb @@ -11,7 +11,7 @@ def nodes_from(html) describe "#detect" do it "extracts folder, title, and category" do nodes = nodes_from(<<~HTML) - <p>Folder: Geography Title: Question 1 Category: Subject/Capitals, Difficulty/Normal 1) Question?</p> + <p>Folder: Geography Title: Question 1 Category: Subject/Capitals,Difficulty/Normal 1) Question?</p> HTML result = described_class.new(nodes).detect expect(result[:folder]).to eq("Geography") diff --git a/spec/atomic_assessments_import/examsoft/integration_spec.rb b/spec/atomic_assessments_import/examsoft/integration_spec.rb index 13adeb3..98b6a62 100644 --- a/spec/atomic_assessments_import/examsoft/integration_spec.rb +++ b/spec/atomic_assessments_import/examsoft/integration_spec.rb @@ -61,7 +61,7 @@ expect(q1[:type]).to eq("mcq") # Q2: FITB (Type: F) - q2 = data[:questions].find { |q| q[:data][:stimulus]&.include?("largest state in the US") } + q2 = data[:questions].find { |q| q[:data][:template]&.include?("largest state in the US") } expect(q2).not_to be_nil expect(q2[:type]).to eq("clozetext") @@ -82,7 +82,7 @@ q1 = data[:questions].find { |q| q[:data][:stimulus]&.include?("Which state starts with the letter U") } expect(q1[:data][:metadata][:general_feedback]).to include("Utah starts with the letter U") - q2 = data[:questions].find { |q| q[:data][:stimulus]&.include?("largest state in the US") } + q2 = data[:questions].find { |q| q[:data][:template]&.include?("largest state in the US") } expect(q2[:data][:metadata][:general_feedback]).to include("Alaska is the largest state") end end diff --git a/spec/atomic_assessments_import/questions/fill_in_the_blank_spec.rb b/spec/atomic_assessments_import/questions/fill_in_the_blank_spec.rb index 5dcd7aa..df948ac 100644 --- a/spec/atomic_assessments_import/questions/fill_in_the_blank_spec.rb +++ b/spec/atomic_assessments_import/questions/fill_in_the_blank_spec.rb @@ -25,7 +25,7 @@ result = question.to_learnosity expect(result[:type]).to eq("clozetext") expect(result[:widget_type]).to eq("response") - expect(result[:data][:stimulus]).to eq("The capital of France is {{response}}.") + expect(result[:data][:template]).to eq("The capital of France is {{response}}.") end it "includes validation with correct answers array" do @@ -41,15 +41,15 @@ row["correct answer"] = "wheel; tertiary" question = described_class.new(row) result = question.to_learnosity - expect(result[:data][:stimulus]).to eq("The color {{response}} consists of primary, secondary, and {{response}} colors.") + expect(result[:data][:template]).to eq("The color {{response}} consists of primary, secondary, and {{response}} colors.") end - it "appends {{response}} to stimulus when question text has no placeholder" do + it "appends {{response}} to template when question text has no placeholder" do row["question text"] = "Name the active compound." row["correct answer"] = "Salicin" question = described_class.new(row) result = question.to_learnosity - expect(result[:data][:stimulus]).to eq("Name the active compound. {{response}}") + expect(result[:data][:template]).to eq("Name the active compound. {{response}}") end it "appends one {{response}} per answer when question text has no placeholder and multiple answers" do @@ -57,15 +57,15 @@ row["correct answer"] = "Paris; Berlin" question = described_class.new(row) result = question.to_learnosity - expect(result[:data][:stimulus]).to eq("Fill in both capitals. {{response}} {{response}}") + expect(result[:data][:template]).to eq("Fill in both capitals. {{response}} {{response}}") end - it "leaves stimulus unchanged when it already contains {{response}}" do + it "leaves template unchanged when it already contains {{response}}" do row["question text"] = "The capital of France is {{response}}." row["correct answer"] = "Paris" question = described_class.new(row) result = question.to_learnosity - expect(result[:data][:stimulus]).to eq("The capital of France is {{response}}.") + expect(result[:data][:template]).to eq("The capital of France is {{response}}.") end end end diff --git a/spec/fixtures/simple.docx b/spec/fixtures/simple.docx index c3e44e21f4615cb77071dae3fe694b312c2a0b4d..347f0c5e7cc54bdacebc760794dba05d2d013adf 100644 GIT binary patch delta 4714 zcmZ8lcQjmGyw%I-J<(-|=pqCYJv#B}(M9h@^qMiEB!pm;XwglgN122OMlYiiq7EWN z8@&c^eDAIICHbv&);fRev+lin|H|37RA5SoLRSl)fF6g0gapT+(lCkQHsrY|ec18i z#O8-&KK<_DJg!IIX<;_v%@f|;rMj62mYJ51txpV&(KE8g*>BG$0-wIq1XE4FFuQ$+ zoMca755!cXKCFc}Snau&)|;JLA4$N5%u@-b^HB+^STvmk)G<7fapI;};a4<wW9XdP z5i&#(=y0($y!Y!Yf8N*{y9*I9#z`Q_SOf_6XE=P4IG9*_p_3-fAvB?ro})N;H$@j( zM--#@V#tiFQS32?FDj~r_UIkD)R$B35cxuHO8n8VHt79^d<VD;?k8C)V*i<+@=p9e z@w$~z(;itTg>!HGeNa#!D(z0Zy7<1vcbE&~;Ln2u+CBDVYm9ljTqVSBy2c5;#7aJ? zWE#-^_E(bEC;OVIhA24aMY<pFO@l2yEyvA1wlE-Pi#+JHKtq1vddhjXI!$kzIf%zs ztmTG_=o-J<RNN7!*ygcsq{pP=@h<7?5iq{B(|C&Z?11|6^z~0kT`gh~6N}(}CtMty z89W@E|5U)o$^fgt1wux2+OCxlNb7w3RjiBJK$E2FBoDf#ywGDA((Q;qxClU7J}Pv` z&cH(k8)R1z=>t@jEL$Uus%54!+Rm)`v}X5RCF2(2x{;Qw(o~vCWekH$z{bEq@YJ;H z^RJR?1?aq4!OAa^$B|NccErM@H2}?G4`Mlm&%nk~W+)ZJEHvPqj0WETDZ32+OYbL@ z(Yu|y&oEbR{Pj`;tM@xc2;>I=o7!aT(m;qqzHFV!7#AGcjE4{{@gO+a0sd;>6RI4f z;U0jA=l8B15q7L^`Fu8M63*QdTt9Sg7iiWE#4|G0<QIsoK{=@QyK8HKm>(&8H?<o5 z=|up`2B<>LT;OX14osm^I?ht7(bfqSHZciQHyh|TADZ9XkrGW$7ILfR4E@xK8NCaX zogI^7wr;sejkL>jx~P0hJ1V#-_y(k01V;&wqZ@@U@vjL#i57$FU<p_LgK#Y07C65J z9d>UIwu9(;gS~u2?EHLu!5+ea9_~4V9{^ZIq88`iB;RHXiIYa_;-nrJ9w*KtaDBi% zkK)2YfDzjXF&uI_W3m?y%c;N7xHgCg@ZktiQ1S|aFN&o!kiF<9?2&mZM1r6FCoO=4 zFM4s~1UcQL;q<P>Q-f%VbZI+bpbA+3?Nfbmm4v~zjLjun7|3u!?^_hz>8()d)7^E6 z3oT0f1<c7jm@CAi@R`#L)Ag4ve#Hq-$G-P2cKw%1*O?@Z&juMqYU1CeCPIhg+Gd0V zAdHmlYrni0&QcExPeg4KWofIIm@Ulf0vdIVk?i+QS66{WbF3H5!SEC|V#2aUXl#Vd zMq3`*@3^ih=FvxIYv-zB<-IlA%08VQfNIi7_?rQBtJO?kl=qm=o>|{riW~JoWB7}W zF599Po8g|LrVx*ch`M@#cmBCamy_{tmCc2iDR|Ag3=uG?zG4eFR2k~9Z_#y{@=og> z+F|$%!?_*ew3al0G)UAE1_21Q{NEPYWz)@B7}!&qPF?LO!%?h1k1~r~mS*;my#=zb zhcZ>-V7cGIaIcwI@jg;s!?G~^pT<Q0+uLfj2v~6ctu7ZrAOt)uN`296O>_F-k;1z& z-LcraMk?!5;?o>nQcOz|A7p1zMy=&%H16;e6<XGdP`B*oVCF-PPFL)D3yH_0HIkAq zHI#1A8#-zW8h8hUOgsQ4ec%*B>p#)F@7%M}3Ew~LnW9jmYY4Sb-AJMTTB^s&V)lge zhcZ9CaGI87gdarf%d1I!Crz!`u1|YBuZcChnbTVdT>1`TDwKE%&6qQ0==l3hen=PI z&}=w(M%%18Pk-m<-T0I|%;VD4b-rqMq*QIzF!@VOP^NIycs1J7Y?7+MtJX33StqOU zEOX1ayziIt$BP(EfLp;K3AuwHkEA<vIyL=PW}&0>(|lWqptc$ba_WOkVsZn$R)4{1 zNBek@efiCuH62knnd-t1?xWSYLPpPa{GAkX{vH_*sLgY2lw6mhAPcz)#`ALT1YRJS z5#)>%n{-14k7fMxYC~iB;&kGYpS4L}^Fe;MfIyXGsltb?_H~A@ghrNLAC+ed9!L#1 z%*1<V-WjfjjOPIs>_{Hfrl6XdWM1&(b^Iu2ru4aL1~9$|Xtb!+uH@B`6bcHppJBL5 zkvT3SkS93YsIV%2fD4YAEtvN_Q^^ucXqYQTFF#6Y@?__47=I**j=rdRTk79*V5qxi zQeQ>$i)<qKGg2U<^~sSXYfVI}+6>y-9?{^pp#z(R0878=wAIo~mr_N4ZagOL%`zgg z0V!7khsqH}VMcpwTtk#3K}RCN?$WbN9mEAr{?<uFL6V3R@7p`8eJyRsT!|&nMW`ld z+Y}P`0&`61*RCCshr^z^QXHR<TOy{tS~jwkBV4@jV4EQ2*woF|Y#Bng(>ma?Km`HL zd&xg&oI{l^yp%pm3KkJDNLsXuO9h18<K^IT+IJAX#MUq|iM;h$ZZ<ZHn@Rt!X>k%D zq+VB1r(Kk~X&=xZuwF+TSx{oc(?NQu(2tMUF8N?z1+3Co!#tf+R!YQIT5-gn22q3F z7mMX<qSd>Gy{F}gcpB%0`35`-M?=T2^}Pr{5r>hfGQj@Bb<c0<_3)ls6?ExgJXtqp z^!dJ0fY2G>BwFKhG-nA|>iv9-9ZM5L^+(Zmxfy-ubMGhWlwymLV$GHDUA$EbZ2C>H z1a;J7gtKqbR$SG2&yrh<heMVVI~hZE*V%M$uMy!bM~y)%?Df=Bk6Uksu*12=?hR+Y z%X&JmDC$)m2pyl;jQP2}Nja|*EabUWuaV+X)?52Y5(TLXzDrEr;Qpcg$@(S$bl;N> zv>u8vf5elu&=^_O8y?6%RS$>6-jfMDB)z7u_*k=z1>5Y6@c*XoJt-gLHMabtF&7MJ z7)!buPK8JY!UA^VbtvK(`IJ@!LHBKbZV%&@jdNOxDCIYrvDkT)8zj4&k+{-M#Wzeg z7R|)phMXe-mN+MQrK+6FOr9*01ML)fI02ovLT!oU%jE=ZY<zCt^j_=}<ZNrGUx}c8 zn8Sjy)Dv4d8dcJ&9)yhF)DUk1nez1nif7W2S}cOBu5?0^O0sJ_6mxkD5VRxO;ePM4 zveSuxgrh4bozDaPG`xAIq}#ID4A*22GahdO<(<Ehs3#y#yVo|WX_&u+zu4)Ke^Z|J z9Y8X(YI`+qLBZX<YaYC;SvBN$AcK?AsDI`}T<a$Va6{l4^+TYnZ9hZjez2U`yta;6 ztSsc1KRIWY_wiM`y6pNV_?M&Hf(51s3;c;APZ?nucdRS?2}C)6vaMvDR7aglyxYrd zG9))~5`Qax_x)0;2Ze!M0=EnU$UhuhnWY!C`QDySx4S}u7^>GRG+>u#k!=xsV}!NN z=BvbhsJP$_afmHqleAgBQ1;HgKeq&gQ3Lc+t8(6j;bVtJtzNT7jf)r8(GmAl;jT)i z(hj%34mfZV#OSb{HYH#cz+c92FvOtVBA7qT7#zPNo}FDq*+&Ae{OHNo&)uf#a0A*i zyBcn`71J=@<%#@m1h_0JeVA-svT?<GD3_{n>}`@MChrPaY{>AeqP;nLaV~nZZ!@v; zYW0^t8L(n9?LcmmX=t7II)}br8HICUOK^nb?;NtGV65dNm-*j$Vof0i$@X=Q0x7Zp zyQ)ieTmyni-oZMeZ;>X)F3W9#7xd)aC3|D*2!Gg2Y1zLFSO^RF9>%?Yw$z;F74wy} zqC`)OSuH$v!{VJcbC`u?PSdN_pc3!f^=&S7icFT-R|mm4KQbjH_^BfEEK1>ec1G)g zQ4&79^=RV<mM3(T8-`3#5cHIM!F;<;VP6QW>nTQjl!zgoVfrg-tW0Qm(BW%$EbsEJ z$On~PnO(nBQBQ@>=Vre?n^F32tSVc&WwDe=gVdtlIWT>6RPwM%-n~alDP>NzpWV~o zu<<3)nl{7lIer1nuLqpH{gR9Ub+f=k4`E<mac)E!fQx&XxG0U`9wfwh<#ql*g(oW9 zSYDtd^V>{UWDScMV@6^dc-k<aD7luy?l5m>1JgcZ9@!fQ#((3E>-zT6jNilb_ItLM zq>%^V)P+9lecG#8JK|fpQ63yxElDIB2MN6{2XZMos!wm6cEnWCk}ZVQBKhWJL#WT4 z-E+?N1%!O+_)vQ3X^;$8d~OEO$)Y>4d<oUGR<?i^$u*^ZCA@T$-TSTEQ4X*$rR`-c z$4IA9wjt%7#w2kFo0fV4y2O?}65pC@XOPwx_wjwSb!%?*leB0K69VRXfA&}R<HzIi zb^2{rr6T0j58fownlG3aPXe4xp*xi1uvg1Z<Mz@3uge>#j3I<&_~TDh-SCjM8ZYCO zOhX@MSGr%J1%IIu8u?k1mHq1;k3OVbRGX20Kp%hgtW|e%qD$A!x&;b0hW1VxJ~^p= znDwgyfJ)la%i_cbk7(xS*;0xr9}Xw$72emR$J<AesZYPO*jt|nRMt}E^K!;GXUoR= zJnSI;XvL_m>J2e8wd;qby9sVt@n`4cDU_0&j(+Q1Mt0_x4L~Qj3{Qp|tF!8^iWjZ@ zgr(*TA}b6+3oB4IW#nbUQL17p(|OZJXYmM+mq9PRn8lTL0B=-IK5a6Coo$#1mU4FJ z>ExWBoku1&efX#?z5YwVH#+u>oQ>am9=0#uvbX5K^-~bnWP1ep=ClD(@34d(4Ua*0 zq6GUqjjwLh{F@Qd?497*=Vk3yHk6uC;spY~phg9`)4Zlq(;2}A(-+Yu0L(>1>Qv{w zmA!lM!;kJ|Xb-Nbe$f#)yIA76*ruq=D6WFuqTq%*A-iI0rbC2SFiG1zIr253=S%h9 zG8oJ-C^5*^m&$Xb-1vEkm7if;Cfet_t$Br4yqU>Xj10gvRgUx@zJbVM--jWT67^-} zEG08nv$3p=AcCog5u|KlUJemc+kWEyr{Ycl$Mk5Oaj4@gqt81#84J^;ISBpW72b6> z*rOZCUc*k4H0r;)0oFdiVrlpw=L~wrDW4n^NO-nd?II~x@L&1Bn3rNUXGSk&CpA*? z0i?y%CN-F#pC<kY&^yrd^Zn5^8LOgwDLf%<F2B_7sx0{T`&L3u-+nkF)gmY4UVnUX zlyFxBk@xAJc)~}pQK`-(M5S$79F-OB=eI-PH=@jnb`gp|D@Q8bJ<IqJ$TBp+d_mt1 znbbWJ7DwS9gsP}0Mypf%t80!yr}8>JN3BPS81)R3M@t)18MK{>wkvL_>2Lcz)mIxe zN}{bro>bIb#^NkUMfR#w2w|c;QN4Ht5&<VDhI|v7j_I2l@4>`BKcUnITXh6Up$|#J zM)lN^<raohis==YAE=~2=#>Zw=~si}a?}ci5nzXB&X+D0NEvpw9VQlHA#Vn>>=$Vx zc*S;6SRf_KrSa8LDV+9T6vmbK499u0vs0Qm<L5MPlI^A1ZqA;4q}9?8@PMwfc-tE# z_VrhDzsR)&NNe~Hd+bmzC%l#b7mpt2|9@Rrm=Ik1X&67<eU58O_q*A{3iex+z#w!S zHrJTnZPT^w`!B?>I%7@X-yZPqXa4Rs$o@Nx-2n(-&2%^awHwoPEO-KhFf#&n!N1S{ ze#fKw<GcnDOp%_A^ZG4+zsb=40lGyBdq&UsFX;DC#qb9x3;=7U2mTA%FeegY#M0M> zT{M4y7Rh0_3>^Q0eoym1LH8+P3jYWCU5G!Kv!sPV7}zMuaL904v7-|!5jOq@B{*>U delta 4439 zcmZ{n2T)Vn+J-}w4gms$fP`K{7Z3sp96AJ~cR`AP^e7!d7XhV&-lGHr2`W`-Qlv|l zB3+OoRf!aZ$dBj#bFZGc_nX->Yi7NB&0f#`>I|$>l-5X(gp?UTK|uk~tkO)Q6+m@L zGKU{qh=niBKD_P==DpE{<`cLs+ctS5U3q7^F0SKE<J=6xT1R8(-c{w}XCKal2A2Un ziMm5Z96;vCTOiiT9L!J%5OKqS)2tvXGa9TIrGY;acT#l8jAAu$mqbL;wxrbvPsr3E z5#?+)4J0~VpFjiXxdH7hxGdRJ1}c1v*jKHORL?iVM9a^KZL0%V2)haqJ6z1b5#46f zO)V+t_G4a6nu<z0T93*4j>+gOl%3Bw`c$G~Ek&{2^l0mb=pmu^#Or&DhZb#|hj}+( z?ZfM|3~&iu&@%0Kwe3jDqszrp)ER<&xv_kd{Xv|D6JNu1`$FSUtv)o(uTav#UG|YF zS1ny*1AfMH24{YGRa!Z78n;o>IaHZRQ?Z`GU+?`nr47{1g;-qGtlrlyLY{N<kI1xL zu8?;Vg(xJf@2{1@6x+-0G@qZ4t~aHTFFOg^Qrb1qpP%)fL5%b+QOFA}1~?J{0CU6u zz#sf=0RVIZ5fn9u=#pe;!GmtRt$ztgudA?qQc2+lh=Lk<yWR1*Ec6mK2R#X$H;*m& z>h5}5x%Q}L@Hr+$WvmJD>>k|EjRV7I9Z+f2J2H9b3Dz2pNgurvw|xS!3+o@5iJ2tF z?T>17sJ^~8AL3&kt1z|#5T~r6(#7opRE*{r?4}FbMNu#na?_&H3>MNiaFGZ8u9{sZ zpI-g=c77$(nqwU)G1*8NQDCN!a~bZh`O^Wb{LM@rT8@D;Hb|B2lb(FxJ!_b=X}PAE zDr=aflU2GA2;D2TUy{r3-8D$lKo>QBNepCmOitGYl;FdyvOe(ODlFT(Yf4{9Ku6qR zthqwYL#RI4T?T^xELrgeCGEz{7aJ$~(UMi9CL(gTE;DKa%q!T}0JCIJv0WaNz3tp7 zI9rY1MH2zx1Gl7h^MtsE)SNo%xOtSFAIYZr*_~bbg}l4m`vh@;{ICCl9Pn?WzF6J# zcR`DiQliog^hi``;`a$4^}Pj8(S*L?BOWy$p2JJAxjTEK!<S^-ap#MwR82;SCA}@d z_%9seUc*025Cus|s$G@ts(zAVO_Cl#bIKB24igeh7AmRlb#rY+Di>oPJ2L8|By{WV zRLDyCJ<aEKH8k#p-{Pq)PxD)7IL<*ymlR7)PN6*dY?_JdPqokF<w96$_+kZI?aOSR z*D8pE5W&u44o`wB{Jwk0X3_-Nxe-D!3N*uCOk}l2RI7u(S}9mIG-e86Z8QVV-DQ2k z23vYP$Lv><h8}l)fP5?dg=2G+i1)+AWtIOYj#^bRMk11noPahb=S3OLNiq=HZUVoD z>Zm4Q5+j=s3A$DJ<|jlJ8{?J=4I4<|wdoPR^l>>aVb0lXzGHEfrO^GIXZhJ`nj>yH zZkaD8(Ber(euY$53}#llyf)G`K3&H`>pNp%sWU@|+0TMg=GAyKB3d6W!W~~D%UFBg zBE=#Rb*gK~+y*iQF4U|$d|K{`Ig;;SjT&5hubI`4NbP0WyU)STjJRJ-mdXTn-UDun zF>$C1zd5Uev{8Lv7CPa6$m~$;TDH0k_PxbZ>YFr5lSH)$L1Z>arWN7#@7sk|SFA5d z#pK>KjX%o9btzIkx505};4d?<@;Dyfik`}z7GZw^0^=)Mo}=8fsG@9(N%=ghCf`4B zg9|w#D7S5hq(e+QPou|Dq>=^+MWZ6;T{3;Kg=P$`;!${V@Th=*ssV(?Y{Rdmjc{iX zoUH4{W4v@nT(RrMYsu=|p8T^RS7x~)uv6<8A$KB>8vVrj=;IE}c66EeD{CI_eold* z=K<8K%2E!F9;l8_E-Bns^qJ&Db~8J~$jIbECw-R*8*<6lTgKOHOG8w?v{}JcN#?3! z-ADIC&?Cihdq2U;)*f?p#BCeritLkI+fRTrSKrqu#U4C%e?7IFIeR_lr|BGr*~tp~ zSw4c5US;v~A=6_pPAmwIrtt+P7>?d#vfKj3-hYQVY(%Ym=(Qu)P|~lK@u_U6TDbE) zF)iCBx#f)YS{A!EyRpDy`FqEf;~eMMQe&YrWl@O<$>J+3N`#OObLN{9r(yYvz3EM? z?**9Ky=<p?H+|(M+dZA|!#g|vKP;O_B-aY++3@VBk#?A1dAP~|)7L2c%g{WHLypmo z?DsF5Xv|=V5->B7Z(Vs|@r6UIcIRZjd{_)poBZg)f8!|skwwuGl)R|c^bQYVDTes{ z@Wj++SCI9<ib&kJ$ycLYl4QRrT0S&ClR@}tk6oPcvC&^4dh5mCH}Z8{lO%3;6Olj+ zGYgWbg;VR6*k{cSkOaAqtZ3d*mMX!jPe3d2b^2P<sUn^vTZC<yuAEc@*0>6Cx7}%p z#|3xPq55w6g-QQ?F9@ndbA4y8DvzZb?eCyn<kfl8Gt3V1@VWgyr}2&fv2{W)vA6!9 ze?%Em1)ncC#EYZG@)jf4BGEm{rl|Iyfvb@?=d?@vF(th5pdMWTb(`WK*<2L_cFVQ{ z9NO|ecDRbkn&}?$0T+~Q#uBbmsr6dTnscFNPBp#xIHX^=3uo%hmWOt11AbcYl)OvN zZ_cK_AQkm+u*yCx;!P&|N|7yc->L9?t5E)!i7nxN(COMwiM8+Tc%NTjD>*J3Ub)DX zZ%F<LHjzpg<ufOFkrhut(R<HUUQv!i5mI8*0YWd-8y3L14U^h6+!ebA`{jLzmFJi) zNr%m_2inSE8i~oRP3IWCU`;2D3MTdieIeDMLDNk*W~t44NkZr)-Ce4h_c^1y)*){q z8l$nIM%-UN&Wdwm;*gW+QiH9K9Rn-OQK*$Wvw{yQuK-JK-%^$CshImNml)&D<P#y- z_D;IP&*VTbZg!G~oL`~13HFzZ@K{OMqnpIq(;Ix(bl@5@=or1)Rc?G<I74U*F7g{g zVMJB`F&CD51j?jHN$;`^Y6gtcz7~ViP6IwW{>WRbn}YZ&w*mkgZ5bawj1jNs%)fv= zwSR>A#4xg7GT8WSm5AOiS($J$utawfS04JK(IP@_`TGLrn|U4T%4|F^f?Z?gBZ;Xt zhW?5CquW2Cov~-|#1U7Z*b!|6*78o+R?x0u*ZVwPvJ~ux5UA|*$WwtmOGol~Bv&#( zM~9z7qM;-f{)1WgBN<!Lusr^*tQgQ*3KE8j-8Xc;G~0H$f45kp#(qV2Q{KA!ir<vC zsaw7WL~Wo@NjK~TM%zX=&AU~Gji@<U!|a5gHZi+!c8ipNB$E@<+p5;EXH-nNUH&X) z;Ej8j55d^XT@5!ZOq6>%LKG-KgLJ8yi-c`4ck0}`pW*A4cZ`n6AE-iI&2i~lkbuK2 zCiW+Jo;0JlT%B@v@w?|&-w--2H+%-kxsGaY6PSKE(9w&p(U&hGbBy924ph5GWBjk8 zvPZ*$wxZ)8Dntj+T}Fw?_b1kotrN}ZIe*@3JKgI${cG*8JDmR`9gfL*=yAUr2a!m% z3?|X)n_u|718L%o+<~0;RJX@i?C$#goY*!<cel5A3TKG+e^8`RZ}wo7-~3&nX|&Dm z-95us)r^H3(N2L6nkbZ#boUJyd+kQGA*RlQ^|+jJ8=JRYeJz71kYVrY&09(7!7JOk zj<zWQ$Mt7{nL7lzk^ZJJIT>^0-AC*<`sIAOV`n?`dq#v5mUnpGA*F9;c^w%?=r}L3 zhyKNT5_sJ><2j73Zk{*Q4#r!FwTX_W;@0n>*T(6(9*rh)soPlE<zL2yn1_RE8|x{( z<m+I@+;K~CSh`Y_!0l^2TdJ$_OZOWaDFr3kXdRkJo(3J<!PL1_l=|@rFbt5WB@erc zm=t<-P0FIO8eUhOne%w2D+$~lI~`R7cXu2-I5u?e!TE5_GJxo(>AF>FyckgtSeJOY zD6*|iX%p|LI%1YQJ`JgE;8drc{_-)qU5-^wJ3rpit=RzOU{+q7dU}!${+N5WSCSR8 zXFX2U0C)5!mFLGpIZha(8mmi?uWGnE$j&JBT>H=3dA~fZ!#mVfV5>QOXW7)3DSkpf zIH~D+-tx&@6e@X0)~O_^Fa4;OcEH9%x#ovT&{`FhQ7dGZiZqGLOpYr2t5@J`8v(uj zMMlYE&esUV@_EeOgnl5FYd34x0sfO3tbS+OMx7~wK;C9WQ+5*+^RpiyPq2})gqoE$ zsM0Pz&Qw$F=O|6s!U#SI#%1=AxrQ^R_AoW~KeJc`dO@)5m{wk;(2R}voMXpA#ne3K zr{l9M$`TefArC8j6QVgP_?h*F4ugUoy5%F?(kM{MDzD%)+edHI&gKx5<)NJ~s1tqa zGQl*&FX_76_)PeKmAN%boBZNZq6~-_Qwn?HS_&?U-LpgVPBfy0sVKek(vVLvLx%iO zmarBvL(cFG;G|%Sczx*;%QCG`D>xP<_w;uxQFh36=#?e`k;e{OY~!<CO3`#HqgQ1V z)0j~9_d|oyX13oszD_l*!(FShy!-5wk5+5{>2RifY`ApXaYR3rCqDPqVXoM>uUB&s zBb&UT=!|ct-FiP3uG-4z8S<|e(V!=4mz^|j>@Bd=A8Qv6dBJNu16S+8^ZvL?ji#Z` zpAFAoLvC7ZXL;BI!{TjsW<hOLS>~s0j|$4{v7((OI!MQgdCA9xcC+_p^JIot&7DV& zxZBs05eHGEt^U5kaQZ7L5bGA_0GG`KPPZNdnNyV>f`vZ(4E^y;;#brgylCb8d{Htg z82%CUXb(m))b~UoQQdiIXzX~vvXt8W1EQWb9onzzM_%sdMcaG>MscJ<Vre^}_KFQA zRdh{=qve<y*R)~0RbC{}T*L%6o;~CgfUeGB?Oxx6^2J_pO!EdVz?@V)Rq?4dm*NGm zhk3lP@NN#){TAYYEBSUZj<nGoG*2{}#P%?sn%U8)`y^hCZCKU*wDNwtq=x}X8A=wS z9cj>=)XWBwm#PosFHKV3{G5XU(;y4LWp4?N@%I&zOXnfM##&QmX-u^pu{i&a>42LG zZ~L`rVf(p=ccPU2d7)t|c}DuyFOR0kw&v7T#AVWDYGkz3mT#yQ-+iA}it=_FG^T_; zEELd1(Y++5Jjh?<n$lzWVyUh$r|6OPERtb!c3aWsp5X54!}%}MKUlpC`8TnG>QH4h zw>S7&q^$HrJqb_uS;~siT0ceW+hVOv!93B4wO2xxcoN$P;$PIwLHoMa7sXnF>=$(+ zVrIa<r+M4o=1epvvn1C=H*fp<AawDgezOv^Co>oHm*)5B;MYj>9}^=%?=XY^76mw0 zdcD8c$-YSR9RE>VtW-$RT4a~S{`ob(_iOZjykJX)_5pGJed_PM2jd?iK?-yg2>Q3^ z_xb|*LsUSC-U0FcE!wsrmu9&D(|Pd#{}2(V(Go2Gy6Nve`=`j57VZ82qTdlS{c%zV U5RGHuq@w~*0Z<nhK!5N4AA8xwQ~&?~ diff --git a/spec/fixtures/simple.html b/spec/fixtures/simple.html index bc5f0b0..58248b7 100644 --- a/spec/fixtures/simple.html +++ b/spec/fixtures/simple.html @@ -16,7 +16,7 @@ </head> <body lang="en-US" text="#000000" link="#000080" vlink="#800000" dir="ltr"><p class="western" align="left" style="margin-bottom: 0in; line-height: 100%"> Folder: Geography and Governance Title: Question 1 Category: -Subject/Capitals, Difficulty/Normal 1) What is the capital of France? +Subject/Capitals,Difficulty/Normal 1) What is the capital of France? ~ Paris is the capital and most populous city of France. Versailles is known for its palace, and Bordeaux is famous for its wine region.</p> <p class="western" align="left" style="margin-bottom: 0in; line-height: 100%"> @@ -35,7 +35,7 @@ </p> <p class="western" align="left" style="margin-bottom: 0in; line-height: 100%"> Type: MA Folder: Geography and Governance Title: Question 2 Category: -Subject/Capitals, Subject/Geography 2) What is the capital of +Subject/Capitals,Subject/Geography 2) What is the capital of Germany? ~ Berlin has been the capital of a reunified Germany since 1990. Bonn served as the capital of West Germany, and Brandenburg is the surrounding federal state.</p> @@ -55,7 +55,7 @@ </p> <p class="western" align="left" style="margin-bottom: 0in; line-height: 100%"> Folder: Geography and Governance Title: Question 3 Category: -Geography/World Capitals, Test plan/General Knowledge 3) Which are US +Geography/World Capitals,Test plan/General Knowledge 3) Which are US state capitals? ~ Little Rock is the capital of Arkansas and Denver is the capital of Colorado. Detroit is the largest city in Michigan, but Lansing is the actual capital.</p> diff --git a/spec/fixtures/simple.rtf b/spec/fixtures/simple.rtf index e337800..cb1c2f4 100644 --- a/spec/fixtures/simple.rtf +++ b/spec/fixtures/simple.rtf @@ -20,7 +20,7 @@ }{\listoverridetable{\listoverride\listid1\listoverridecount0\ls1}}{\*\generator LibreOffice/25.8.4.2$MacOSX_AARCH64 LibreOffice_project/290daaa01b999472f0c7a3890eb6a550fd74c6df}{\info{\creatim\yr2026\mo2\dy5\hr18\min59}{\revtim\yr2026\mo2\dy5\hr19\min5}{\printim\yr0\mo0\dy0\hr0\min0}}{\*\userprops}\deftab709 \hyphauto1\viewscale100\formshade\nobrkwrptbl\paperh15840\paperw12240\margl1134\margr1134\margt1134\margb1134\sectd\sbknone\sftnnar\saftnnrlc\sectunlocked1\pgwsxn12240\pghsxn15840\marglsxn1134\margrsxn1134\margtsxn1134\margbsxn1134\ftnbj\ftnstart1\ftnrstcont\ftnnar\fet\aftnrstcont\aftnstart1\aftnnrlc {\*\ftnsep\chftnsep}\pgndec\pard\plain \s0\widctlpar\hyphpar0\ltrpar\kerning1\cf0\rtlch\af5\afs24\alang1081\ltrch\hich\af3\afs24\alang1033\dbch\af7\langfe2052\loch\f3\fs24\lang1033\ql\ltrpar{ -Folder: Geography and Governance Title: Question 1 Category: Subject/Capitals, Difficulty/Normal 1) What is the capital of France? ~ Paris is the capital and most populous city of France. Versailles is known for its palace, and Bordeaux is famous for its wine region.} +Folder: Geography and Governance Title: Question 1 Category: Subject/Capitals,Difficulty/Normal 1) What is the capital of France? ~ Paris is the capital and most populous city of France. Versailles is known for its palace, and Bordeaux is famous for its wine region.} \par \pard\plain \s0\widctlpar\hyphpar0\ltrpar\kerning1\cf0\rtlch\af5\afs24\alang1081\ltrch\hich\af3\afs24\alang1033\dbch\af7\langfe2052\loch\f3\fs24\lang1033\ql\ltrpar{ *a) Paris} \par \pard\plain \s0\widctlpar\hyphpar0\ltrpar\kerning1\cf0\rtlch\af5\afs24\alang1081\ltrch\hich\af3\afs24\alang1033\dbch\af7\langfe2052\loch\f3\fs24\lang1033\ql\ltrpar{ @@ -33,7 +33,7 @@ c) Bordeaux} \par \pard\plain \s0\widctlpar\hyphpar0\ltrpar\kerning1\cf0\rtlch\af5\afs24\alang1081\ltrch\hich\af3\afs24\alang1033\dbch\af7\langfe2052\loch\f3\fs24\lang1033\ql\ltrpar{ Type: MA Folder: Geography and Governance Title: Question 2 Category: }{ -Subject/Capitals, Subject/Geography}{ +Subject/Capitals,Subject/Geography}{ 2) What is the capital of Germany? ~ Berlin has been the capital of a reunified Germany since 1990. Bonn served as the capital of West Germany, and Brandenburg is the surrounding federal state.} \par \pard\plain \s0\widctlpar\hyphpar0\ltrpar\kerning1\cf0\rtlch\af5\afs24\alang1081\ltrch\hich\af3\afs24\alang1033\dbch\af7\langfe2052\loch\f3\fs24\lang1033\ql\ltrpar{ *a) Berlin} @@ -46,7 +46,7 @@ c) Brandenburg} \par \pard\plain \s0\widctlpar\hyphpar0\ltrpar\kerning1\cf0\rtlch\af5\afs24\alang1081\ltrch\hich\af3\afs24\alang1033\dbch\af7\langfe2052\loch\f3\fs24\lang1033\ql\ltrpar \par \pard\plain \s0\widctlpar\hyphpar0\ltrpar\kerning1\cf0\rtlch\af5\afs24\alang1081\ltrch\hich\af3\afs24\alang1033\dbch\af7\langfe2052\loch\f3\fs24\lang1033\ql\ltrpar{ -Folder: Geography and Governance Title: Question 3 Category: Geography/World Capitals, Test plan/General Knowledge 3) Which are US state capitals? ~ Little Rock is the capital of Arkansas and Denver is the capital of Colorado. Detroit is the largest city in Michigan, but Lansing is the actual capital.} +Folder: Geography and Governance Title: Question 3 Category: Geography/World Capitals,Test plan/General Knowledge 3) Which are US state capitals? ~ Little Rock is the capital of Arkansas and Denver is the capital of Colorado. Detroit is the largest city in Michigan, but Lansing is the actual capital.} \par \pard\plain \s0\widctlpar\hyphpar0\ltrpar\kerning1\cf0\rtlch\af5\afs24\alang1081\ltrch\hich\af3\afs24\alang1033\dbch\af7\langfe2052\loch\f3\fs24\lang1033\ql\ltrpar{ *a) Little Rock} \par \pard\plain \s0\widctlpar\hyphpar0\ltrpar\kerning1\cf0\rtlch\af5\afs24\alang1081\ltrch\hich\af3\afs24\alang1033\dbch\af7\langfe2052\loch\f3\fs24\lang1033\ql\ltrpar{ From 18bfb28f0eee2fa6b1c708390fdc4dfbd18a073a Mon Sep 17 00:00:00 2001 From: Jacob Schwartz <jaschwartz27@gmail.com> Date: Sat, 21 Feb 2026 18:05:46 -0700 Subject: [PATCH 25/30] feat: Ensured all current question types work when imported. - Added ClozeDropdown class for handling dropdown options in fill-in-the-blank questions. - Updated question_type for Essay from "longanswer" to "longtext". - Improved validation structure for FillInTheBlank and Ordering questions. - Added tests for new ClozeDropdown functionality and updated existing tests for consistency. --- .../exam_soft/extractor.rb | 6 +- .../questions/cloze_dropdown.rb | 62 +++++++++++++ .../questions/essay.rb | 2 +- .../questions/fill_in_the_blank.rb | 11 ++- .../questions/ordering.rb | 33 +++++-- .../questions/question.rb | 7 +- .../examsoft/extractor_spec.rb | 1 + .../examsoft/integration_spec.rb | 4 +- .../questions/cloze_dropdown_spec.rb | 92 +++++++++++++++++++ .../questions/essay_spec.rb | 6 +- .../questions/fill_in_the_blank_spec.rb | 9 +- .../questions/ordering_spec.rb | 38 +++++++- 12 files changed, 249 insertions(+), 22 deletions(-) create mode 100644 lib/atomic_assessments_import/questions/cloze_dropdown.rb create mode 100644 spec/atomic_assessments_import/questions/cloze_dropdown_spec.rb diff --git a/lib/atomic_assessments_import/exam_soft/extractor.rb b/lib/atomic_assessments_import/exam_soft/extractor.rb index 3aef511..6151b34 100644 --- a/lib/atomic_assessments_import/exam_soft/extractor.rb +++ b/lib/atomic_assessments_import/exam_soft/extractor.rb @@ -81,8 +81,10 @@ def self.extract(nodes) row["option #{letter}"] = opt[:text] end - # For FITB questions, options ARE the answers (no asterisk marking) - if question_type == "fill_in_the_blank" && row["correct answer"].blank? && !options.empty? + # For FITB questions, options ARE the answers (no asterisk marking). + # Skip if options are dropdown choices ("Choice of: ...") — those are handled by ClozeDropdown. + if question_type == "fill_in_the_blank" && row["correct answer"].blank? && !options.empty? && + !options.first[:text].to_s.match?(/\AChoice of:/i) row["correct answer"] = options.map { |opt| opt[:text] }.join("; ") end diff --git a/lib/atomic_assessments_import/questions/cloze_dropdown.rb b/lib/atomic_assessments_import/questions/cloze_dropdown.rb new file mode 100644 index 0000000..a1428be --- /dev/null +++ b/lib/atomic_assessments_import/questions/cloze_dropdown.rb @@ -0,0 +1,62 @@ +# frozen_string_literal: true + +require_relative "question" + +module AtomicAssessmentsImport + module Questions + class ClozeDropdown < Question + CHOICE_OF_PATTERN = /\AChoice of:\s*(.+)/i + + def question_type + "clozedropdown" + end + + def question_data + parsed = parse_dropdown_options + super.merge( + stimulus: "", + template: build_template(parsed.size), + possible_responses: parsed.map { |p| p[:choices] }, + validation: { + scoring_type: scoring_type, + valid_response: { + score: points, + value: parsed.map { |p| p[:correct] }, + }, + } + ) + end + + private + + def parse_dropdown_options + ("a".."o").each_with_object([]) do |letter, acc| + option = @row["option #{letter}"] + break acc unless option + + m = option.match(CHOICE_OF_PATTERN) + next unless m + + parts = m[1].split("|").map(&:strip) + correct_index = parts.pop.to_i - 1 + acc << { choices: parts, correct: parts[correct_index] } + end + end + + def build_template(blank_count) + text = @row["question text"] || "" + return text if text.include?("{{response}}") + + if text.match?(/__\d+__/) + text.gsub(/__\d+__/, "{{response}}") + elsif text.match?(/_____/) + text.gsub(/_____/, "{{response}}") + elsif text.match?(/\[[A-Za-z0-9]\]/) + text.gsub(/\[[A-Za-z0-9]\]/, "{{response}}") + else + "#{text} #{Array.new(blank_count, "{{response}}").join(" ")}" + end + end + end + end +end diff --git a/lib/atomic_assessments_import/questions/essay.rb b/lib/atomic_assessments_import/questions/essay.rb index 1c68a00..cb0529f 100644 --- a/lib/atomic_assessments_import/questions/essay.rb +++ b/lib/atomic_assessments_import/questions/essay.rb @@ -6,7 +6,7 @@ module AtomicAssessmentsImport module Questions class Essay < Question def question_type - "longanswer" + "longtext" end def question_data diff --git a/lib/atomic_assessments_import/questions/fill_in_the_blank.rb b/lib/atomic_assessments_import/questions/fill_in_the_blank.rb index d302d06..3b34d4b 100644 --- a/lib/atomic_assessments_import/questions/fill_in_the_blank.rb +++ b/lib/atomic_assessments_import/questions/fill_in_the_blank.rb @@ -15,9 +15,10 @@ def question_data stimulus: "", # Note: ExamSoft doesn't use a template like Learnosity, so we put the full question text in the template and leave the stimulus blank template: build_stimulus(answers), validation: { + scoring_type: scoring_type, valid_response: { score: points, - value: answers, + value: answers }, } ) @@ -29,8 +30,16 @@ def build_stimulus(answers) text = @row["question text"] || "" return text if text.include?("{{response}}") + # You can indicate the blank(s) in various ways: + # Five underscores + # Number enclosed by two underscores on each side + # Number, uppercase letter, or lowercase letter enclosed by square brackets if text.match?(/__\d+__/) text.gsub(/__\d+__/, "{{response}}") + elsif text.match?(/_____/) + text.gsub(/_____/, "{{response}}") + elsif text.match?(/\[[A-Za-z0-9]\]/) + text.gsub(/\[[A-Za-z0-9]\]/, "{{response}}") else "#{text} #{Array.new(answers.size, "{{response}}").join(" ")}" end diff --git a/lib/atomic_assessments_import/questions/ordering.rb b/lib/atomic_assessments_import/questions/ordering.rb index 07affd7..7cca3e7 100644 --- a/lib/atomic_assessments_import/questions/ordering.rb +++ b/lib/atomic_assessments_import/questions/ordering.rb @@ -11,21 +11,23 @@ def question_type "orderlist" end + ORDER_MARKER = /\s*---\s*(\d+)\s*\z/ + def question_data - items = [] - INDEXES.each do |letter| - option = @row["option #{letter}"] - break unless option + raw_items = INDEXES.filter_map { |letter| @row["option #{letter}"] } - items << option + if raw_items.any? { |item| item.match?(ORDER_MARKER) } + list, valid_values = parse_order_markers(raw_items) + else + list = raw_items + order = (@row["correct answer"] || "").split(";").map(&:strip).map(&:downcase) + valid_values = order.filter_map { |letter| INDEXES.find_index(letter) } end - order = (@row["correct answer"] || "").split(";").map(&:strip).map(&:downcase) - valid_values = order.filter_map { |letter| INDEXES.find_index(letter)&.to_s } - super.merge( - list: items, + list: list, validation: { + scoring_type: scoring_type, valid_response: { score: points, value: valid_values, @@ -33,6 +35,19 @@ def question_data } ) end + + private + + def parse_order_markers(raw_items) + items_with_rank = raw_items.map.with_index do |item, idx| + m = item.match(ORDER_MARKER) + { text: item.sub(ORDER_MARKER, "").strip, rank: m ? m[1].to_i : idx + 1, original_index: idx } + end + list = items_with_rank.map { |i| i[:text] } + sorted = items_with_rank.sort_by { |i| i[:rank] } + valid_values = sorted.map { |i| i[:original_index] } + [list, valid_values] + end end end end diff --git a/lib/atomic_assessments_import/questions/question.rb b/lib/atomic_assessments_import/questions/question.rb index 05e84a8..b0ba388 100644 --- a/lib/atomic_assessments_import/questions/question.rb +++ b/lib/atomic_assessments_import/questions/question.rb @@ -19,7 +19,11 @@ def self.load(row) when /short_answer/i, /shorttext/i ShortAnswer.new(row) when /fill_in_the_blank/i, /cloze/i - FillInTheBlank.new(row) + if row["option a"]&.match?(/\AChoice of:/i) + ClozeDropdown.new(row) + else + FillInTheBlank.new(row) + end when /matching/i, /association/i Matching.new(row) when /ordering/i, /orderlist/i @@ -97,5 +101,6 @@ def to_learnosity require_relative "essay" require_relative "short_answer" require_relative "fill_in_the_blank" +require_relative "cloze_dropdown" require_relative "matching" require_relative "ordering" diff --git a/spec/atomic_assessments_import/examsoft/extractor_spec.rb b/spec/atomic_assessments_import/examsoft/extractor_spec.rb index 0d1768c..7a6a3ac 100644 --- a/spec/atomic_assessments_import/examsoft/extractor_spec.rb +++ b/spec/atomic_assessments_import/examsoft/extractor_spec.rb @@ -99,5 +99,6 @@ def nodes_from(html) expect(result[:row]["correct answer"]).to eq("Salicin") expect(result[:status]).to eq("published") end + end end diff --git a/spec/atomic_assessments_import/examsoft/integration_spec.rb b/spec/atomic_assessments_import/examsoft/integration_spec.rb index 98b6a62..7f49339 100644 --- a/spec/atomic_assessments_import/examsoft/integration_spec.rb +++ b/spec/atomic_assessments_import/examsoft/integration_spec.rb @@ -17,7 +17,7 @@ # Essay question q2 = data[:questions].find { |q| q[:data][:stimulus]&.include?("Hamlet") } expect(q2).not_to be_nil - expect(q2[:type]).to eq("longanswer") + expect(q2[:type]).to eq("longtext") # MA question q3 = data[:questions].find { |q| q[:data][:stimulus]&.include?("European capitals") } @@ -68,7 +68,7 @@ # Q3: Essay (Type: E) q3 = data[:questions].find { |q| q[:data][:stimulus]&.include?("Discuss the pros and cons") } expect(q3).not_to be_nil - expect(q3[:type]).to eq("longanswer") + expect(q3[:type]).to eq("longtext") # Q4: MCQ with multiple correct (MA) q4 = data[:questions].find { |q| q[:data][:stimulus]&.include?("southern states") } diff --git a/spec/atomic_assessments_import/questions/cloze_dropdown_spec.rb b/spec/atomic_assessments_import/questions/cloze_dropdown_spec.rb new file mode 100644 index 0000000..b7c50ef --- /dev/null +++ b/spec/atomic_assessments_import/questions/cloze_dropdown_spec.rb @@ -0,0 +1,92 @@ +# frozen_string_literal: true + +require "atomic_assessments_import" + +RSpec.describe AtomicAssessmentsImport::Questions::ClozeDropdown do + let(:row) do + { + "question text" => "The _____ Ocean is the world's largest body of water.", + "question type" => "fill_in_the_blank", + "option a" => "Choice of: Pacific | Indian | Southern | Atlantic | Arctic | 1", + "points" => "1", + } + end + + describe "#question_type" do + it "returns clozedropdown" do + question = described_class.new(row) + expect(question.question_type).to eq("clozedropdown") + end + end + + describe "#to_learnosity" do + it "puts scoring_type at top level of validation, not inside valid_response" do + question = described_class.new(row) + result = question.to_learnosity + expect(result[:data][:validation][:scoring_type]).to eq("partialMatchV2") + expect(result[:data][:validation][:valid_response].keys).not_to include(:scoring_type) + end + + it "puts question text with {{response}} in template and leaves stimulus empty" do + question = described_class.new(row) + result = question.to_learnosity + expect(result[:data][:template]).to include("{{response}}") + expect(result[:data][:stimulus]).to eq("") + end + + it "replaces _____ blank marker with {{response}} in template" do + question = described_class.new(row) + result = question.to_learnosity + expect(result[:data][:template]).to eq("The {{response}} Ocean is the world's largest body of water.") + end + + it "builds possible_responses as array of arrays from Choice of: options" do + question = described_class.new(row) + result = question.to_learnosity + expect(result[:data][:possible_responses]).to eq([ + ["Pacific", "Indian", "Southern", "Atlantic", "Arctic"], + ]) + end + + it "sets valid_response value to the correct answer string" do + question = described_class.new(row) + result = question.to_learnosity + # "| 1" means first item (1-indexed) = "Pacific" + expect(result[:data][:validation][:valid_response][:value]).to eq(["Pacific"]) + end + + context "with multiple blanks" do + let(:row) do + { + "question text" => "The __1__ and __2__ are both planets.", + "question type" => "fill_in_the_blank", + "option a" => "Choice of: Mercury | Venus | Earth | 1", + "option b" => "Choice of: Mars | Jupiter | Saturn | 2", + "points" => "2", + } + end + + it "builds possible_responses for each blank" do + question = described_class.new(row) + result = question.to_learnosity + expect(result[:data][:possible_responses]).to eq([ + ["Mercury", "Venus", "Earth"], + ["Mars", "Jupiter", "Saturn"], + ]) + end + + it "sets valid_response value for each blank in order" do + question = described_class.new(row) + result = question.to_learnosity + # option a: index 1 = "Mercury"; option b: index 2 = "Jupiter" + expect(result[:data][:validation][:valid_response][:value]).to eq(["Mercury", "Jupiter"]) + end + + it "replaces __n__ markers with {{response}} in template" do + question = described_class.new(row) + result = question.to_learnosity + expect(result[:data][:template]).to eq("The {{response}} and {{response}} are both planets.") + end + end + end +end diff --git a/spec/atomic_assessments_import/questions/essay_spec.rb b/spec/atomic_assessments_import/questions/essay_spec.rb index abdf9c2..14d0229 100644 --- a/spec/atomic_assessments_import/questions/essay_spec.rb +++ b/spec/atomic_assessments_import/questions/essay_spec.rb @@ -14,9 +14,9 @@ end describe "#question_type" do - it "returns longanswer" do + it "returns longtext" do question = described_class.new(row) - expect(question.question_type).to eq("longanswer") + expect(question.question_type).to eq("longtext") end end @@ -24,7 +24,7 @@ it "returns correct structure" do question = described_class.new(row) result = question.to_learnosity - expect(result[:type]).to eq("longanswer") + expect(result[:type]).to eq("longtext") expect(result[:widget_type]).to eq("response") expect(result[:data][:stimulus]).to eq("Discuss the causes of World War I.") end diff --git a/spec/atomic_assessments_import/questions/fill_in_the_blank_spec.rb b/spec/atomic_assessments_import/questions/fill_in_the_blank_spec.rb index df948ac..be9b4d0 100644 --- a/spec/atomic_assessments_import/questions/fill_in_the_blank_spec.rb +++ b/spec/atomic_assessments_import/questions/fill_in_the_blank_spec.rb @@ -28,7 +28,14 @@ expect(result[:data][:template]).to eq("The capital of France is {{response}}.") end - it "includes validation with correct answers array" do + it "puts scoring_type at top level of validation, not inside valid_response" do + question = described_class.new(row) + result = question.to_learnosity + expect(result[:data][:validation][:scoring_type]).to eq("partialMatchV2") + expect(result[:data][:validation][:valid_response].keys).not_to include(:scoring_type) + end + + it "includes each answer as a flat array of strings in valid_response value" do row["correct answer"] = "Paris; Lyon; Marseille" question = described_class.new(row) result = question.to_learnosity diff --git a/spec/atomic_assessments_import/questions/ordering_spec.rb b/spec/atomic_assessments_import/questions/ordering_spec.rb index d4095b0..7779589 100644 --- a/spec/atomic_assessments_import/questions/ordering_spec.rb +++ b/spec/atomic_assessments_import/questions/ordering_spec.rb @@ -29,11 +29,45 @@ expect(result[:data][:list]).to eq(["World War I", "World War II", "Cold War"]) end - it "includes validation with correct order indices" do + it "includes validation with correct order indices as integers" do question = described_class.new(row) result = question.to_learnosity - expect(result[:data][:validation][:valid_response][:value]).to eq(["0", "1", "2"]) + expect(result[:data][:validation][:valid_response][:value]).to eq([0, 1, 2]) expect(result[:data][:validation][:valid_response][:score]).to eq(3) end + + it "puts scoring_type at top level of validation" do + question = described_class.new(row) + result = question.to_learnosity + expect(result[:data][:validation][:scoring_type]).to eq("partialMatchV2") + expect(result[:data][:validation][:valid_response].keys).not_to include(:scoring_type) + end + + context "with ExamSoft --- n order markers" do + let(:row) do + { + "question text" => "Drag and rearrange the following colors in alphabetical order.", + "question type" => "ordering", + "option a" => "Yellow --- 4", + "option b" => "Orange --- 2", + "option c" => "Green --- 1", + "option d" => "Red --- 3", + "points" => "1", + } + end + + it "strips --- n markers from list items" do + question = described_class.new(row) + result = question.to_learnosity + expect(result[:data][:list]).to eq(["Yellow", "Orange", "Green", "Red"]) + end + + it "derives correct order from --- n numbers as integers" do + question = described_class.new(row) + result = question.to_learnosity + # Green=1, Orange=2, Red=3, Yellow=4 → indices in list: [2, 1, 3, 0] + expect(result[:data][:validation][:valid_response][:value]).to eq([2, 1, 3, 0]) + end + end end end From 323249e883bbc7378d5ecc62cddd3d219d193a54 Mon Sep 17 00:00:00 2001 From: Jacob Schwartz <jaschwartz27@gmail.com> Date: Tue, 24 Feb 2026 10:37:59 -0700 Subject: [PATCH 26/30] feat: prevent inclusion of items with empty definition.widgets in conversion output --- .../exam_soft/converter.rb | 8 ++++++-- .../examsoft/html_converter_spec.rb | 15 +++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/lib/atomic_assessments_import/exam_soft/converter.rb b/lib/atomic_assessments_import/exam_soft/converter.rb index 510a03d..c714ba9 100644 --- a/lib/atomic_assessments_import/exam_soft/converter.rb +++ b/lib/atomic_assessments_import/exam_soft/converter.rb @@ -69,8 +69,12 @@ def convert all_warnings << build_warning("#{title}: #{e.message}, imported as draft", index: index, question_type: row["question type"]) begin item, question_widgets = convert_row_minimal(row) - items << item - questions += question_widgets + if item[:definition][:widgets].empty? + all_warnings << build_warning("#{title}: Could not import even minimally, skipped", index: index, question_type: row["question type"]) + else + items << item + questions += question_widgets + end rescue StandardError all_warnings << build_warning("#{title}: Could not import even minimally, skipped", index: index, question_type: row["question type"]) end diff --git a/spec/atomic_assessments_import/examsoft/html_converter_spec.rb b/spec/atomic_assessments_import/examsoft/html_converter_spec.rb index 468f157..bf6e34a 100644 --- a/spec/atomic_assessments_import/examsoft/html_converter_spec.rb +++ b/spec/atomic_assessments_import/examsoft/html_converter_spec.rb @@ -143,5 +143,20 @@ data = described_class.new(modified_file).convert expect(data[:errors]).to include(a_hash_including(message: a_string_matching(/correct answer/i))) end + + it "does not include items with empty definition.widgets in the output" do + modified_file = Tempfile.new("temp.html") + original_content = File.read("spec/fixtures/simple.html") + # Remove asterisk from correct answers so MCQ raises "Missing correct answer" + # and falls back to convert_row_minimal which produces definition: { widgets: [] } + modified_content = original_content.gsub(/\*([a-oA-O]\))/, '\1') + modified_file.write(modified_content) + modified_file.rewind + + data = described_class.new(modified_file).convert + # Items with empty definition.widgets cause Learnosity to reject the entire batch + items_with_empty_widgets = data[:items].select { |i| i[:definition][:widgets].empty? } + expect(items_with_empty_widgets).to be_empty + end end end From 22ae432dbcb2aac5a802df914b131300415deb92 Mon Sep 17 00:00:00 2001 From: Jacob Schwartz <jaschwartz27@gmail.com> Date: Tue, 24 Feb 2026 12:18:42 -0700 Subject: [PATCH 27/30] feat: update status handling for unsupported question types and missing data in extractor --- .../exam_soft/converter.rb | 4 ++-- .../exam_soft/extractor.rb | 14 ++++++-------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/lib/atomic_assessments_import/exam_soft/converter.rb b/lib/atomic_assessments_import/exam_soft/converter.rb index c714ba9..07b7018 100644 --- a/lib/atomic_assessments_import/exam_soft/converter.rb +++ b/lib/atomic_assessments_import/exam_soft/converter.rb @@ -61,12 +61,12 @@ def convert end begin - item, question_widgets = convert_row(row, status) + item, question_widgets = convert_row(row, "published") items << item questions += question_widgets rescue StandardError => e title = row["title"] || "Question #{index + 1}" - all_warnings << build_warning("#{title}: #{e.message}, imported as draft", index: index, question_type: row["question type"]) + all_warnings << build_warning("#{title}: #{e.message}", index: index, question_type: row["question type"]) # TODO: see if we can support drafts: ", imported as draft" begin item, question_widgets = convert_row_minimal(row) if item[:definition][:widgets].empty? diff --git a/lib/atomic_assessments_import/exam_soft/extractor.rb b/lib/atomic_assessments_import/exam_soft/extractor.rb index 6151b34..c25ed15 100644 --- a/lib/atomic_assessments_import/exam_soft/extractor.rb +++ b/lib/atomic_assessments_import/exam_soft/extractor.rb @@ -30,22 +30,22 @@ def self.extract(nodes) status = "published" unless SUPPORTED_TYPES.include?(question_type) - warnings << "Unsupported question type '#{question_type}', imported as draft" + warnings << "Unsupported question type '#{question_type}'"#, imported as draft" status = "draft" end if stem.nil? - warnings << "No question text found, imported as draft" + warnings << "No question text found"#, imported as draft" status = "draft" end if OPTION_TYPES.include?(question_type) if options.empty? - warnings << "No options found for #{question_type} question, imported as draft" + warnings << "No options found for #{question_type} question"#, imported as draft" status = "draft" end if correct_answers.empty? - warnings << "No correct answer found, imported as draft" + warnings << "No correct answer found"#, imported as draft" status = "draft" end end @@ -81,10 +81,8 @@ def self.extract(nodes) row["option #{letter}"] = opt[:text] end - # For FITB questions, options ARE the answers (no asterisk marking). - # Skip if options are dropdown choices ("Choice of: ...") — those are handled by ClozeDropdown. - if question_type == "fill_in_the_blank" && row["correct answer"].blank? && !options.empty? && - !options.first[:text].to_s.match?(/\AChoice of:/i) + # For FITB questions, options ARE the answers (no asterisk marking) + if question_type == "fill_in_the_blank" && row["correct answer"].blank? && !options.empty? row["correct answer"] = options.map { |opt| opt[:text] }.join("; ") end From f2c79f85cc5db0e871ea802b208fbb6e299526b6 Mon Sep 17 00:00:00 2001 From: Jacob Schwartz <jaschwartz27@gmail.com> Date: Tue, 24 Feb 2026 12:51:11 -0700 Subject: [PATCH 28/30] feat: expand supported file types and update usage instructions in README --- README.md | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 9409723..8f5666d 100755 --- a/README.md +++ b/README.md @@ -1,6 +1,14 @@ # Atomic Assessments Import -Import converters for atomic assessments. Currently only CSV multiple choice format is supported by this GEM. +Import converters for atomic assessments. Currently this GEM supports the following export and file types: +* CSV + - Multiple Choice +* ExamSoft (in RTF, HTML, or DOCX file format) + - Multiple Choice + - True/False + - Fill in the Blank / Cloze + - Ordering + - Essay For QTI conversion, see: @@ -21,6 +29,14 @@ If bundler is not being used to manage dependencies, install the gem by executin $ gem install atomic_assessments_import +## Usage +``` +Usage: bin/convert <file> <export_path> [converter] + <file> Path to CSV or RTF file to convert + <export_path> Path for output ZIP file + [converter] Which converter to use- 'examsoft' for files coming from ExamSoft, 'csv' for standard CSV files. Defaults to csv if not specified. +``` + ## Standalone conversion scripts Convert a CSV to a learnosity archive: @@ -31,6 +47,10 @@ Convert a CSV to json on standard out: $ bin/convert_to_json input.csv +Convert an ExamSoft RTF to a learnosity archive: + + $ bin/convert input.rtf output.zip examsoft + ## CSV input format All columns are optional execpt "Option A", "Option B", and "Correct Answer". From 58af77fb8c9a1fc4d36af9745027c9e6f96e2675 Mon Sep 17 00:00:00 2001 From: Jacob Schwartz <jaschwartz27@gmail.com> Date: Wed, 25 Feb 2026 15:46:09 -0700 Subject: [PATCH 29/30] feat: update extractor logic to handle non-published status and add pandoc-ruby dependency --- Gemfile | 1 - Gemfile.lock | 4 +-- atomic_assessments_import.gemspec | 3 +- .../exam_soft/converter.rb | 34 ++----------------- .../exam_soft/extractor.rb | 15 ++++---- lib/atomic_assessments_import/version.rb | 2 +- .../examsoft/extractor_spec.rb | 12 +++---- 7 files changed, 21 insertions(+), 50 deletions(-) diff --git a/Gemfile b/Gemfile index 9f4b35b..cc51bb7 100644 --- a/Gemfile +++ b/Gemfile @@ -10,7 +10,6 @@ group :development do gem "rubocop" gem "rubocop-performance" gem "rubocop-rspec" - gem 'pandoc-ruby', '~> 2.1' end group :test do diff --git a/Gemfile.lock b/Gemfile.lock index 453b60b..64a9fd4 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,10 +1,11 @@ PATH remote: . specs: - atomic_assessments_import (0.3.0) + atomic_assessments_import (0.4.0) activesupport csv mimemagic + pandoc-ruby (~> 2.1) rubyzip (~> 3.0) GEM @@ -108,7 +109,6 @@ PLATFORMS DEPENDENCIES atomic_assessments_import! byebug - pandoc-ruby (~> 2.1) rspec rubocop rubocop-performance diff --git a/atomic_assessments_import.gemspec b/atomic_assessments_import.gemspec index 748755e..3684460 100644 --- a/atomic_assessments_import.gemspec +++ b/atomic_assessments_import.gemspec @@ -5,7 +5,7 @@ require_relative "lib/atomic_assessments_import/version" Gem::Specification.new do |spec| spec.name = "atomic_assessments_import" spec.version = AtomicAssessmentsImport::VERSION - spec.authors = ["Sean Collings", "Matt Petro"] + spec.authors = ["Sean Collings", "Matt Petro", "Jacob Schwartz"] spec.email = ["support@atomicjolt.com"] spec.summary = "Importer to Convert different formats to AA's import format" @@ -37,4 +37,5 @@ Gem::Specification.new do |spec| spec.add_dependency "csv" spec.add_dependency "mimemagic" spec.add_dependency "rubyzip", "~> 3.0" + spec.add_dependency "pandoc-ruby", "~> 2.1" end diff --git a/lib/atomic_assessments_import/exam_soft/converter.rb b/lib/atomic_assessments_import/exam_soft/converter.rb index 07b7018..f2765e2 100644 --- a/lib/atomic_assessments_import/exam_soft/converter.rb +++ b/lib/atomic_assessments_import/exam_soft/converter.rb @@ -60,24 +60,15 @@ def convert next end + next unless status == "published" + begin item, question_widgets = convert_row(row, "published") items << item questions += question_widgets rescue StandardError => e title = row["title"] || "Question #{index + 1}" - all_warnings << build_warning("#{title}: #{e.message}", index: index, question_type: row["question type"]) # TODO: see if we can support drafts: ", imported as draft" - begin - item, question_widgets = convert_row_minimal(row) - if item[:definition][:widgets].empty? - all_warnings << build_warning("#{title}: Could not import even minimally, skipped", index: index, question_type: row["question type"]) - else - items << item - questions += question_widgets - end - rescue StandardError - all_warnings << build_warning("#{title}: Could not import even minimally, skipped", index: index, question_type: row["question type"]) - end + all_warnings << build_warning("#{title}: #{e.message}", index: index, question_type: row["question type"]) end end @@ -207,25 +198,6 @@ def convert_row(row, status = "published") [item, [question_learnosity]] end - def convert_row_minimal(row) - reference = SecureRandom.uuid - item = { - reference: reference, - title: row["title"] || "", - status: "draft", - tags: {}, - metadata: { - import_date: Time.now.iso8601, - import_type: "examsoft", - }, - source: "<p>ExamSoft Import on #{Time.now.strftime('%Y-%m-%d')}</p>\n", - description: row["question text"] || "", - questions: [], - features: [], - definition: { widgets: [] }, - } - [item, []] - end end end end diff --git a/lib/atomic_assessments_import/exam_soft/extractor.rb b/lib/atomic_assessments_import/exam_soft/extractor.rb index c25ed15..34093c9 100644 --- a/lib/atomic_assessments_import/exam_soft/extractor.rb +++ b/lib/atomic_assessments_import/exam_soft/extractor.rb @@ -30,23 +30,22 @@ def self.extract(nodes) status = "published" unless SUPPORTED_TYPES.include?(question_type) - warnings << "Unsupported question type '#{question_type}'"#, imported as draft" - status = "draft" + warnings << "Unsupported question type '#{question_type}'" + status = nil end if stem.nil? - warnings << "No question text found"#, imported as draft" - status = "draft" + warnings << "No question text found" + status = nil end if OPTION_TYPES.include?(question_type) if options.empty? - warnings << "No options found for #{question_type} question"#, imported as draft" - status = "draft" + warnings << "No options found for #{question_type} question" end if correct_answers.empty? - warnings << "No correct answer found"#, imported as draft" - status = "draft" + warnings << "No correct answer found" + status = nil end end diff --git a/lib/atomic_assessments_import/version.rb b/lib/atomic_assessments_import/version.rb index 7398df9..8a67e45 100644 --- a/lib/atomic_assessments_import/version.rb +++ b/lib/atomic_assessments_import/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module AtomicAssessmentsImport - VERSION = "0.3.0" + VERSION = "0.4.0" end diff --git a/spec/atomic_assessments_import/examsoft/extractor_spec.rb b/spec/atomic_assessments_import/examsoft/extractor_spec.rb index 7a6a3ac..0e17c85 100644 --- a/spec/atomic_assessments_import/examsoft/extractor_spec.rb +++ b/spec/atomic_assessments_import/examsoft/extractor_spec.rb @@ -31,7 +31,7 @@ def nodes_from(html) expect(result[:warnings]).to be_empty end - it "returns draft status when no correct answer" do + it "returns non-published status when no correct answer" do nodes = nodes_from(<<~HTML) <p>1) What is the capital of France?</p> <p>a) Paris</p> @@ -39,18 +39,18 @@ def nodes_from(html) HTML result = described_class.extract(nodes) - expect(result[:status]).to eq("draft") + expect(result[:status]).not_to eq("published") expect(result[:warnings]).to include(a_string_matching(/correct answer/i)) end - it "returns draft status when no question text found" do + it "returns non-published status when no question text found" do nodes = nodes_from(<<~HTML) <p>a) Paris</p> <p>b) London</p> HTML result = described_class.extract(nodes) - expect(result[:status]).to eq("draft") + expect(result[:status]).not_to eq("published") expect(result[:warnings]).to include(a_string_matching(/question text/i)) end @@ -78,13 +78,13 @@ def nodes_from(html) expect(result[:status]).to eq("published") end - it "warns for unsupported question types but still imports" do + it "warns and returns non-published status for unsupported question types" do nodes = nodes_from(<<~HTML) <p>Type: Hotspot 1) Identify the region on the map.</p> HTML result = described_class.extract(nodes) - expect(result[:status]).to eq("draft") + expect(result[:status]).not_to eq("published") expect(result[:warnings]).to include(a_string_matching(/unsupported.*hotspot/i)) end From 1422b271a6e370cf08ce88e2de02a704e8d85396 Mon Sep 17 00:00:00 2001 From: Matt Petro <matt.petro@atomicjolt.com> Date: Thu, 26 Feb 2026 15:09:03 +0000 Subject: [PATCH 30/30] Fix CI --- .github/workflows/github-actions-ci-rspec.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/github-actions-ci-rspec.yml b/.github/workflows/github-actions-ci-rspec.yml index 138cefb..2a2faeb 100644 --- a/.github/workflows/github-actions-ci-rspec.yml +++ b/.github/workflows/github-actions-ci-rspec.yml @@ -22,6 +22,9 @@ jobs: ruby-version: ${{ matrix.ruby-version }} bundler-cache: true + - name: Install pandoc + run: sudo apt-get install -y pandoc + - name: Install dependencies env: RAILS_ENV: test