From 5eee44f5b875f2616fbac836df2da621a88b4e14 Mon Sep 17 00:00:00 2001 From: nks2109 Date: Thu, 26 Mar 2026 13:56:34 -0400 Subject: [PATCH] add data anonymization rake task --- lib/tasks/anonymize_data.rake | 865 ++++++++++++++++++++++++++++++++++ 1 file changed, 865 insertions(+) create mode 100644 lib/tasks/anonymize_data.rake diff --git a/lib/tasks/anonymize_data.rake b/lib/tasks/anonymize_data.rake new file mode 100644 index 00000000000..fd65fe8aec7 --- /dev/null +++ b/lib/tasks/anonymize_data.rake @@ -0,0 +1,865 @@ +# frozen_string_literal: true + +require 'csv' +require 'securerandom' +require 'ffaker' + +# Data anonymization rake tasks for creating PII-safe database dumps. +# +# Workflow: +# 1. DevOps restores prod backup into a lower environment +# 2. DevOps runs: bundle exec rake data:anonymize +# 3. DevOps runs: bundle exec rake data:anonymize:verify +# 4. If PASS, DevOps runs mongodump and shares the anonymized dump +# +# Usage: +# bundle exec rake data:anonymize +# bundle exec rake data:anonymize:verify +# bundle exec rake data:anonymize BATCH_SIZE=1000 +# +# Uses bulk_write for performance (~500k people in ~15-20 min). +# Uses SymmetricEncryption for SSN (writes encrypted_ssn, not plain ssn). +namespace :data do + desc "Anonymize all PII data in the current database. NOT safe for production." + task :anonymize => :environment do + anonymizer = DataAnonymizer::Runner.new( + batch_size: (ENV['BATCH_SIZE'] || 1000).to_i + ) + anonymizer.run + end + + namespace :anonymize do + desc "Generate a verification report confirming PII has been anonymized" + task :verify => :environment do + DataAnonymizer::Verifier.new.run + end + end +end + +module DataAnonymizer + # ------------------------------------------------------------------ + # Fake data generators + # ------------------------------------------------------------------ + module FakeData + module_function + + def first_name + FFaker::Name.first_name + end + + def last_name + FFaker::Name.last_name + end + + def ssn + # Valid-format SSN: area 001-665 or 667-899, group 01-99, serial 0001-9999 + area = rand(1..665) + area = rand(667..899) if area == 666 + group = rand(1..99) + serial = rand(1..9999) + format('%03d%02d%04d', area, group, serial) + end + + def encrypt_ssn(plain_ssn) + Ssn.encrypt_ssn(plain_ssn) + end + + def encrypted_ssn + encrypt_ssn(ssn) + end + + def dob_shift_days + rand(-1095..1095) # ±3 years + end + + def shift_dob(original_dob, shift_days: nil) + return nil if original_dob.nil? + + shift_days ||= dob_shift_days + new_dob = original_dob + shift_days + new_dob = Date.new(1920, 1, 1) if new_dob.year < 1920 + new_dob = Date.today - 1 if new_dob >= Date.today + new_dob + end + + def address_1 + "#{rand(100..9999)} #{FFaker::Address.street_name}" + end + + def city + FFaker::Address.city + end + + def zip + FFaker::AddressUS.zip_code + end + + def county + "#{FFaker::Address.city} County" + end + + def email(index = nil) + prefix = index ? "user#{index}" : "user#{SecureRandom.hex(4)}" + "#{prefix}@example.com" + end + + def phone_number + format('%07d', rand(1_000_000..9_999_999)) + end + + def area_code + format('%03d', rand(200..999)) + end + + def full_phone + "#{area_code}#{phone_number}" + end + + def fein + format('%09d', rand(10_000_000..99_999_999)) + end + + def company_name + FFaker::Company.name + end + + def routing_number + format('%09d', rand(10_000_000..99_999_999)) + end + + def account_number + SecureRandom.hex(6) + end + + def doc_number + SecureRandom.hex(5).upcase + end + end + + # ------------------------------------------------------------------ + # Main runner — orchestrates all phases + # ------------------------------------------------------------------ + class Runner + attr_reader :batch_size, :client, :db + + def initialize(batch_size: 1000) + @batch_size = batch_size + @client = Mongoid.default_client + @db = @client.database + @reference_date = TimeKeeper.date_of_record + end + + def run + abort_if_production! + confirm_anonymization! + + log "=== Starting Data Anonymization ===" + log "Database: #{db.name}" + log "Time: #{Time.current}" + start_time = Time.current + + stats = {} + stats[:people] = anonymize_people + stats[:users] = anonymize_users + stats[:census_members] = anonymize_census_members + stats[:fa_applications] = anonymize_fa_applications + stats[:organizations] = anonymize_organizations + stats[:bs_organizations] = anonymize_bs_organizations + + elapsed = (Time.current - start_time).round(1) + log "\n=== Anonymization Complete (#{elapsed}s) ===" + stats.each { |k, v| log " #{k}: #{v} records processed" } + end + + private + + # --- Safety --- + + def abort_if_production! + if Rails.env.production? || db.name =~ /production/i + abort( + "*** SAFETY ABORT ***\n" \ + "Refusing to run anonymization.\n" \ + "Rails.env=#{Rails.env}, database=#{db.name}\n" \ + "This task must NOT run against a production database." + ) + end + end + + def confirm_anonymization! + return if ENV['SKIP_CONFIRMATION'] == 'true' + + puts "\nWARNING: This will IRREVERSIBLY rewrite all PII in database '#{db.name}'." + puts "Type YES_ANONYMIZE to proceed:" + input = $stdin.gets&.strip + abort("Aborted.") unless input == 'YES_ANONYMIZE' + end + + # --- Phase 1: People --- + + def anonymize_people + collection = db[:people] + total = collection.count_documents({}) + log "\n--- Phase 1: Anonymizing People (#{total}) ---" + family_shifts = build_family_shift_map + processed = 0 + + collection.find.batch_size(batch_size).each_slice(batch_size) do |batch| + updates = batch.map do |doc| + shift_days = family_shifts[doc['_id']] || FakeData.dob_shift_days + set_fields = build_person_update(doc, shift_days: shift_days) + { + update_one: { + filter: { '_id' => doc['_id'] }, + update: { + '$set' => set_fields, + '$unset' => { 'ssn' => '' } + } + } + } + end + + collection.bulk_write(updates, ordered: false) unless updates.empty? + processed += batch.size + log " #{processed}/#{total} people" if (processed % (batch_size * 5)).zero? || processed >= total + end + processed + end + + def build_person_update(doc, shift_days:) + new_first = FakeData.first_name + new_last = FakeData.last_name + fields = { + 'first_name' => new_first, + 'last_name' => new_last, + 'full_name' => "#{new_first} #{new_last}", + 'middle_name' => FakeData.first_name.first(1), + 'alternate_name' => nil + } + + # SSN — write encrypted_ssn, not plain ssn + if doc['encrypted_ssn'].present? + fields['encrypted_ssn'] = FakeData.encrypted_ssn + end + + # DOB — shift ±3 years (consistent shift within family) + if doc['dob'].present? + fields['dob'] = FakeData.shift_dob(doc['dob'].to_date, shift_days: shift_days) + end + + if doc['date_of_death'].present? + fields['date_of_death'] = FakeData.shift_dob(doc['date_of_death'].to_date, shift_days: shift_days) + end + + # Embedded addresses + if doc['addresses'].present? + fields['addresses'] = doc['addresses'].map { |addr| anonymize_address_hash(addr) } + end + + # Embedded phones + if doc['phones'].present? + fields['phones'] = doc['phones'].map { |phone| anonymize_phone_hash(phone) } + end + + # Embedded emails + if doc['emails'].present? + fields['emails'] = doc['emails'].map { |em| anonymize_email_hash(em) } + end + + fields + end + + def build_family_shift_map + families_collection = db[:families] + people_collection = db[:people] + shift_map = {} + + families_collection.find.batch_size(batch_size).each do |family| + person_ids = (family['family_members'] || []).map { |fm| fm['person_id'] }.compact + next if person_ids.empty? + + people = people_collection.find('_id' => { '$in' => person_ids }).projection('_id' => 1, 'dob' => 1).to_a + dobs = people.map { |p| p['dob']&.to_date }.compact + shift_days = pick_group_shift_days(dobs) + + person_ids.each do |person_id| + shift_map[person_id] = shift_days + end + end + + log "Built family shift map for #{shift_map.size} people" + shift_map + end + + def pick_group_shift_days(dobs) + return FakeData.dob_shift_days if dobs.empty? + + ranges = dobs.map { |dob| allowed_shift_range(dob, @reference_date) }.compact + return FakeData.dob_shift_days if ranges.empty? + + min_shift = ranges.map(&:first).max + max_shift = ranges.map(&:last).min + return 0 if min_shift.nil? || max_shift.nil? + return 0 if min_shift > max_shift + + rand(min_shift..max_shift) + end + + def allowed_shift_range(dob, reference_date) + return nil unless dob.is_a?(Date) + + band = age_band(dob, reference_date) + min_shift = -1095 + max_shift = 1095 + + # Global bounds + min_shift = [min_shift, (Date.new(1920, 1, 1) - dob).to_i].max + max_shift = [max_shift, (reference_date - 1 - dob).to_i].min + + # Band bounds + cutoff_18 = reference_date - 18.years + cutoff_26 = reference_date - 26.years + + case band + when :under_18 + min_shift = [min_shift, (cutoff_18 + 1 - dob).to_i].max + when :between_18_25 + min_shift = [min_shift, (cutoff_26 + 1 - dob).to_i].max + max_shift = [max_shift, (cutoff_18 - dob).to_i].min + when :over_26 + max_shift = [max_shift, (cutoff_26 - dob).to_i].min + end + + return nil if min_shift > max_shift + + [min_shift, max_shift] + end + + def age_band(dob, reference_date) + age = age_on(dob, reference_date) + return :under_18 if age < 18 + return :between_18_25 if age < 26 + + :over_26 + end + + def age_on(dob, reference_date) + age = reference_date.year - dob.year + if reference_date.month < dob.month || (reference_date.month == dob.month && reference_date.day < dob.day) + age - 1 + else + age + end + end + + # --- Phase 2: Users --- + + def anonymize_users + collection = db[:users] + total = collection.count_documents({}) + log "\n--- Phase 2: Anonymizing Users (#{total}) ---" + processed = 0 + + collection.find.batch_size(batch_size).each_slice(batch_size) do |batch| + updates = batch.each_with_index.map do |doc, idx| + seq = processed + idx + { + update_one: { + filter: { '_id' => doc['_id'] }, + update: { + '$set' => { + 'email' => FakeData.email(seq), + 'oim_id' => "user_#{seq}", + 'authentication_token' => nil, + 'identity_verified_date' => nil, + 'idp_uuid' => nil + } + } + } + } + end + + collection.bulk_write(updates, ordered: false) unless updates.empty? + processed += batch.size + log " #{processed}/#{total} users" if (processed % (batch_size * 5)).zero? || processed >= total + end + processed + end + + # --- Phase 3: Census Members (employees + dependents) --- + + def anonymize_census_members + collection = db[:census_members] + total = collection.count_documents({}) + log "\n--- Phase 3: Anonymizing Census Members (#{total}) ---" + processed = 0 + + collection.find.batch_size(batch_size).each_slice(batch_size) do |batch| + updates = batch.map do |doc| + dobs = [] + dobs << doc['dob']&.to_date if doc['dob'].present? + if doc['census_dependents'].present? + dobs.concat(doc['census_dependents'].map { |dep| dep['dob']&.to_date }) + end + shift_days = pick_group_shift_days(dobs.compact) + set_fields = build_census_member_update(doc, shift_days: shift_days) + + # Embedded census_dependents (same shift preserves age gaps) + if doc['census_dependents'].present? + set_fields['census_dependents'] = doc['census_dependents'].map do |dep| + anonymize_census_dependent_hash(dep, shift_days: shift_days) + end + end + + { + update_one: { + filter: { '_id' => doc['_id'] }, + update: { '$set' => set_fields } + } + } + end + + collection.bulk_write(updates, ordered: false) unless updates.empty? + processed += batch.size + log " #{processed}/#{total} census members" if (processed % (batch_size * 5)).zero? || processed >= total + end + processed + end + + def build_census_member_update(doc, shift_days:) + fields = { + 'first_name' => FakeData.first_name, + 'last_name' => FakeData.last_name, + 'middle_name' => nil + } + + fields['encrypted_ssn'] = FakeData.encrypted_ssn if doc['encrypted_ssn'].present? + fields['dob'] = FakeData.shift_dob(doc['dob'].to_date, shift_days: shift_days) if doc['dob'].present? + + if doc['address'].present? + fields['address'] = anonymize_address_hash(doc['address']) + end + + if doc['email'].present? + fields['email'] = anonymize_email_hash(doc['email']) + end + + fields + end + + def anonymize_census_dependent_hash(dep, shift_days:) + dep = dep.dup + dep['first_name'] = FakeData.first_name + dep['last_name'] = FakeData.last_name + dep['middle_name'] = nil + dep['encrypted_ssn'] = FakeData.encrypted_ssn if dep['encrypted_ssn'].present? + dep['dob'] = FakeData.shift_dob(dep['dob'].to_date, shift_days: shift_days) if dep['dob'].present? + + if dep['address'].present? + dep['address'] = anonymize_address_hash(dep['address']) + end + + if dep['email'].present? + dep['email'] = anonymize_email_hash(dep['email']) + end + + dep + end + + # --- Phase 4: Financial Assistance Applications --- + + def anonymize_fa_applications + collection = db[:financial_assistance_applications] + return 0 if collection.nil? + + total = collection.count_documents({}) + log "\n--- Phase 4: Anonymizing FA Applications (#{total}) ---" + processed = 0 + + collection.find.batch_size(batch_size).each_slice(batch_size) do |batch| + updates = batch.map do |doc| + set_fields = {} + + if doc['applicants'].present? + dobs = doc['applicants'].map { |applicant| applicant['dob']&.to_date }.compact + shift_days = pick_group_shift_days(dobs) + set_fields['applicants'] = doc['applicants'].map do |applicant| + anonymize_fa_applicant_hash(applicant, shift_days: shift_days) + end + end + + next nil if set_fields.empty? + + { + update_one: { + filter: { '_id' => doc['_id'] }, + update: { '$set' => set_fields } + } + } + end.compact + + collection.bulk_write(updates, ordered: false) unless updates.empty? + processed += batch.size + log " #{processed}/#{total} FA applications" if (processed % (batch_size * 5)).zero? || processed >= total + end + processed + end + + def anonymize_fa_applicant_hash(applicant, shift_days:) + applicant = applicant.dup + applicant['first_name'] = FakeData.first_name + applicant['last_name'] = FakeData.last_name + applicant['middle_name'] = nil + + applicant['encrypted_ssn'] = FakeData.encrypted_ssn if applicant['encrypted_ssn'].present? + applicant['dob'] = FakeData.shift_dob(applicant['dob'].to_date, shift_days: shift_days) if applicant['dob'].present? + + # Immigration document numbers + %w[alien_number i94_number visa_number passport_number sevis_id + naturalization_number receipt_number citizenship_number card_number].each do |field| + applicant[field] = FakeData.doc_number if applicant[field].present? + end + + # Embedded addresses + if applicant['addresses'].present? + applicant['addresses'] = applicant['addresses'].map { |a| anonymize_address_hash(a) } + end + + # Embedded phones + if applicant['phones'].present? + applicant['phones'] = applicant['phones'].map { |p| anonymize_phone_hash(p) } + end + + # Embedded emails + if applicant['emails'].present? + applicant['emails'] = applicant['emails'].map { |e| anonymize_email_hash(e) } + end + + applicant + end + + # --- Phase 5: Legacy Organizations --- + + def anonymize_organizations + collection = db[:organizations] + total = collection.count_documents({}) + log "\n--- Phase 5: Anonymizing Organizations (#{total}) ---" + processed = 0 + + collection.find.batch_size(batch_size).each_slice(batch_size) do |batch| + updates = batch.map do |doc| + set_fields = { + 'legal_name' => FakeData.company_name, + 'dba' => FakeData.company_name, + 'fein' => FakeData.fein + } + + # Broker ACH + if doc['broker_agency_profile'].present? + bap = doc['broker_agency_profile'].dup + bap['ach_routing_number'] = FakeData.routing_number if bap['ach_routing_number'].present? + bap['ach_account_number'] = FakeData.account_number if bap['ach_account_number'].present? + set_fields['broker_agency_profile'] = bap + end + + # Office locations + if doc['office_locations'].present? + set_fields['office_locations'] = doc['office_locations'].map do |ol| + ol = ol.dup + ol['address'] = anonymize_address_hash(ol['address']) if ol['address'].present? + ol['phone'] = anonymize_phone_hash(ol['phone']) if ol['phone'].present? + ol + end + end + + { + update_one: { + filter: { '_id' => doc['_id'] }, + update: { '$set' => set_fields } + } + } + end + + collection.bulk_write(updates, ordered: false) unless updates.empty? + processed += batch.size + log " #{processed}/#{total} organizations" if (processed % (batch_size * 5)).zero? || processed >= total + end + processed + end + + # --- Phase 6: BenefitSponsors Organizations --- + + def anonymize_bs_organizations + collection = db[:benefit_sponsors_organizations_organizations] + return 0 if collection.nil? + + total = collection.count_documents({}) + log "\n--- Phase 6: Anonymizing BS Organizations (#{total}) ---" + processed = 0 + + collection.find.batch_size(batch_size).each_slice(batch_size) do |batch| + updates = batch.map do |doc| + set_fields = { + 'legal_name' => FakeData.company_name, + 'dba' => FakeData.company_name, + 'fein' => FakeData.fein + } + + # Embedded profiles (polymorphic — may include broker with ACH) + if doc['profiles'].present? + set_fields['profiles'] = doc['profiles'].map do |profile| + profile = profile.dup + profile['ach_routing_number'] = FakeData.routing_number if profile['ach_routing_number'].present? + profile['ach_account_number'] = FakeData.account_number if profile['ach_account_number'].present? + + if profile['office_locations'].present? + profile['office_locations'] = profile['office_locations'].map do |ol| + ol = ol.dup + ol['address'] = anonymize_address_hash(ol['address']) if ol['address'].present? + ol['phone'] = anonymize_phone_hash(ol['phone']) if ol['phone'].present? + ol + end + end + + profile + end + end + + { + update_one: { + filter: { '_id' => doc['_id'] }, + update: { '$set' => set_fields } + } + } + end + + collection.bulk_write(updates, ordered: false) unless updates.empty? + processed += batch.size + log " #{processed}/#{total} BS organizations" if (processed % (batch_size * 5)).zero? || processed >= total + end + processed + end + + # --- Shared embedded doc helpers --- + + def anonymize_address_hash(addr) + return addr if addr.nil? + + addr = addr.dup + addr['address_1'] = FakeData.address_1 + addr['address_2'] = nil + addr['address_3'] = nil if addr.key?('address_3') + addr['city'] = FakeData.city + addr['zip'] = FakeData.zip + addr['county'] = FakeData.county if addr.key?('county') + addr + end + + def anonymize_phone_hash(phone) + return phone if phone.nil? + + phone = phone.dup + phone['area_code'] = FakeData.area_code + phone['number'] = FakeData.phone_number + phone['full_phone_number'] = "#{phone['area_code']}#{phone['number']}" + phone['extension'] = nil + phone + end + + def anonymize_email_hash(em) + return em if em.nil? + + em = em.dup + em['address'] = FakeData.email + em + end + + def log(msg) + puts msg unless Rails.env.test? + Rails.logger.info("[DataAnonymizer] #{msg}") + end + end + + # ------------------------------------------------------------------ + # Verification report + # ------------------------------------------------------------------ + class Verifier + GENERATED_EMAIL_PATTERN = /@example\.com\z/ + SAMPLE_SIZE = 5000 + + def initialize + @client = Mongoid.default_client + @db = @client.database + end + + def run + log "=== Anonymization Verification Report ===" + log "Database: #{@db.name}" + log "Time: #{Time.current}" + + report_dir = File.join(Rails.root, 'tmp') + Dir.mkdir(report_dir) unless File.exist?(report_dir) + report_path = File.join(report_dir, "anonymization_report_#{Date.today.strftime('%Y%m%d')}.csv") + + results = [] + results << check_people + results << check_users + results << check_census_members + results << check_fa_applications + results << check_organizations + results << check_bs_organizations + + all_passed = results.all? { |r| r[:passed] } + + # Console summary + log "\n--- Summary ---" + results.each do |r| + status = r[:passed] ? "PASS" : "FAIL" + log " #{r[:collection].ljust(45)} #{r[:total].to_s.rjust(8)} records | #{status} | #{r[:issues]}" + end + + # CSV report + CSV.open(report_path, "w") do |csv| + csv << %w[collection total_records passed issues sample_values] + results.each do |r| + csv << [r[:collection], r[:total], r[:passed], r[:issues], r[:samples]] + end + end + + if all_passed + log "\nSTATUS: PASS - All checks passed. Safe to dump and share." + else + log "\nSTATUS: FAIL - Some checks failed. Review issues above." + end + + log "Report written to: #{report_path}" + end + + private + + def check_people + collection = @db[:people] + total = collection.count_documents({}) + issues = [] + + # Check emails for real domains + real_email_count = 0 + collection.find.limit(SAMPLE_SIZE).each do |doc| + (doc['emails'] || []).each do |em| + addr = em['address'] + real_email_count += 1 if addr.present? && !addr.match?(GENERATED_EMAIL_PATTERN) + end + end + issues << "#{real_email_count} real emails in sample of #{SAMPLE_SIZE}" if real_email_count > 0 + + # Check for plain text SSN field (should not exist after anonymization) + plain_ssn_count = collection.count_documents('ssn' => { '$exists' => true }) + issues << "#{plain_ssn_count} records with plain-text 'ssn' field" if plain_ssn_count > 0 + + # Sample values + sample = collection.find.limit(3).to_a + sample_names = sample.map { |d| "#{d['first_name']} #{d['last_name']}" }.join(", ") + + build_result("People (people)", total, issues, sample_names) + end + + def check_users + collection = @db[:users] + total = collection.count_documents({}) + issues = [] + + real_email_count = 0 + collection.find.limit(SAMPLE_SIZE).each do |doc| + addr = doc['email'] + real_email_count += 1 if addr.present? && !addr.match?(GENERATED_EMAIL_PATTERN) + end + issues << "#{real_email_count} users with real email domains" if real_email_count > 0 + + sample = collection.find.limit(3).to_a + sample_emails = sample.map { |d| d['email'] }.join(", ") + + build_result("Users (users)", total, issues, sample_emails) + end + + def check_census_members + collection = @db[:census_members] + total = collection.count_documents({}) + issues = [] + + # Check for plain-text SSN + plain_ssn_count = collection.count_documents('ssn' => { '$exists' => true }) + issues << "#{plain_ssn_count} records with plain-text 'ssn' field" if plain_ssn_count > 0 + + sample = collection.find.limit(3).to_a + sample_names = sample.map { |d| "#{d['first_name']} #{d['last_name']}" }.join(", ") + + build_result("Census Members (census_members)", total, issues, sample_names) + end + + def check_fa_applications + collection = @db[:financial_assistance_applications] + return build_result("FA Applications", 0, ["collection not found"], "") if collection.nil? + + total = collection.count_documents({}) + issues = [] + + # Check for real immigration doc patterns in sample + real_doc_count = 0 + collection.find.limit(SAMPLE_SIZE).each do |doc| + (doc['applicants'] || []).each do |applicant| + %w[passport_number visa_number alien_number].each do |field| + val = applicant[field] + # Anonymized values are 10-char hex strings + real_doc_count += 1 if val.present? && val.length > 10 && !val.match?(/\A[0-9a-fA-F]+\z/) + end + end + end + issues << "#{real_doc_count} real doc numbers found in sample" if real_doc_count > 0 + + sample = collection.find.limit(1).to_a + sample_applicant = sample.dig(0, 'applicants', 0) + sample_val = sample_applicant ? "#{sample_applicant['first_name']} #{sample_applicant['last_name']}" : "" + + build_result("FA Applications (financial_assistance_applications)", total, issues, sample_val) + end + + def check_organizations + collection = @db[:organizations] + total = collection.count_documents({}) + issues = [] + + sample = collection.find.limit(3).to_a + sample_names = sample.map { |d| d['legal_name'] }.join(", ") + + build_result("Organizations (organizations)", total, issues, sample_names) + end + + def check_bs_organizations + collection = @db[:benefit_sponsors_organizations_organizations] + return build_result("BS Organizations", 0, ["collection not found"], "") if collection.nil? + + total = collection.count_documents({}) + issues = [] + + sample = collection.find.limit(3).to_a + sample_names = sample.map { |d| d['legal_name'] }.join(", ") + + build_result("BS Organizations (benefit_sponsors_organizations)", total, issues, sample_names) + end + + def build_result(collection_name, total, issues, samples) + { + collection: collection_name, + total: total, + passed: issues.empty?, + issues: issues.empty? ? "None" : issues.join("; "), + samples: samples + } + end + + def log(msg) + puts msg unless Rails.env.test? + Rails.logger.info("[DataAnonymizer::Verifier] #{msg}") + end + end +end