diff --git a/.gitignore b/.gitignore index b268508..593e331 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ test/dummy/log/*.log test/dummy/tmp/ test/dummy/.sass-cache *.gem +*.xlsx diff --git a/Gemfile.lock b/Gemfile.lock index a9d8372..b40b321 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,14 +1,26 @@ PATH remote: . specs: - spreadsheet_exporter (0.1.1) + spreadsheet_exporter (0.1.2) + activesupport (>= 6) write_xlsx GEM remote: https://rubygems.org/ specs: - rubyzip (2.3.0) - write_xlsx (0.83.0) + activesupport (7.0.3.1) + concurrent-ruby (~> 1.0, >= 1.0.2) + i18n (>= 1.6, < 2) + minitest (>= 5.1) + tzinfo (~> 2.0) + concurrent-ruby (1.1.10) + i18n (1.12.0) + concurrent-ruby (~> 1.0) + minitest (5.16.2) + rubyzip (2.3.2) + tzinfo (2.0.5) + concurrent-ruby (~> 1.0) + write_xlsx (1.09.3) rubyzip (>= 1.0.0) zip-zip zip-zip (0.3) @@ -21,4 +33,4 @@ DEPENDENCIES spreadsheet_exporter! BUNDLED WITH - 1.10.4 + 2.3.19 diff --git a/README.md b/README.md index 0e9a8f0..926c575 100644 --- a/README.md +++ b/README.md @@ -7,8 +7,9 @@ gem 'spreadsheet_exporter' ## Usage -Objects that are exported must respond to ```as_csv``` or ```as_json``` and return a hash -representing column names and values. +Objects that are exported must respond to `as_spreadsheet`, `as_csv` or `as_json` and return a hash +representing column names and values. In Rails you can `include SpreadsheetExporter::GeneratesSpreadsheet` into your model. + ### CSV or XLSX Output can be .csv or .xlsx. Choose by using SpreadsheetExporter::CSV or SpreadsheetExporter::XLSX modules. @@ -28,3 +29,41 @@ that is actually comma-delimited, pass ```:col_sep => ','``` as an option when e ```ruby SpreadsheetExporter::CSV.from_spreadsheet([["First Name", "Last Name"], ["Bob", "Hoskins"], ["Roger", "Rabbit"]]) ``` + +### XLSX with Pick Lists + +```ruby +# data sources are written to a `data` worksheet and may be referenced by +# multiple rows +data_sources = { + "food_types" => %w[Polenta Paella Papaya], + "countries" => %w[Canada Türkiye], + "cities" => {"Canada"=>["Sxwōxwiyám", "Toronto"], "Türkiye"=>["Eskişehir", "İzmir", "İstanbul"]} +} + +validations = { + "favourite_food" => SpreadsheetExporter::ColumnValidation.new( + data_source: "food_types" + ), + "yuckiest_food" => SpreadsheetExporter::ColumnValidation.new( + data_source: "food_types" + ), + "country" => SpreadsheetExporter::ColumnValidation.new( + data_source: "countries" + ), + "city" => SpreadsheetExporter::ColumnValidation.new( + dependent_on: "country", + data_source: "cities" + ), +} + +SpreadsheetExporter::XLSX.from_objects(array_of_objects, + data_sources: data_sources, + validations: validations, + freeze_panes: [1, 0] # number of rows and columns to freeze (only applies to XLSX) +) +``` + +### Testing + +There isn't currently a test suite. You can generate a sample test XLSX file by running `test/test.rb`. diff --git a/lib/spreadsheet_exporter.rb b/lib/spreadsheet_exporter.rb index b07bded..7c210a4 100644 --- a/lib/spreadsheet_exporter.rb +++ b/lib/spreadsheet_exporter.rb @@ -1,6 +1,14 @@ -require 'spreadsheet_exporter/csv' -require 'spreadsheet_exporter/xlsx' +require_relative './spreadsheet_exporter/generates_spreadsheet' +require_relative './spreadsheet_exporter/column_validation' +require_relative './spreadsheet_exporter/csv' +require_relative './spreadsheet_exporter/xlsx' +require 'active_support' +require 'active_support/core_ext/object/json' +require 'active_support/core_ext/hash/reverse_merge' + module SpreadsheetExporter + VALIDATION_ERROR_TYPES = %w[stop warning information].freeze + begin Mime::Type.register "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", :xlsx rescue NameError diff --git a/lib/spreadsheet_exporter/column_validation.rb b/lib/spreadsheet_exporter/column_validation.rb new file mode 100644 index 0000000..fdab080 --- /dev/null +++ b/lib/spreadsheet_exporter/column_validation.rb @@ -0,0 +1,9 @@ +module SpreadsheetExporter + ColumnValidation = Struct.new(:ignore_blank, :data_source, :dependent_on, :error_type, keyword_init: true) do + def initialize(*) + super + self.ignore_blank = true if ignore_blank.nil? + self.error_type ||= VALIDATION_ERROR_TYPES[0] + end + end +end diff --git a/lib/spreadsheet_exporter/csv.rb b/lib/spreadsheet_exporter/csv.rb index ea6704c..2152ec5 100644 --- a/lib/spreadsheet_exporter/csv.rb +++ b/lib/spreadsheet_exporter/csv.rb @@ -5,19 +5,19 @@ module SpreadsheetExporter module CSV BOM = "\377\376".force_encoding("utf-16le") # Byte Order Mark so Excel displays characters correctly - def self.from_objects(objects, options = {}) - spreadsheet = Spreadsheet.from_objects(objects, options).compact + def self.from_objects(objects, humanize_headers_class: nil, **options) + spreadsheet = Spreadsheet.from_objects(objects, humanize_headers_class: humanize_headers_class, **options).compact from_spreadsheet(spreadsheet) end - def self.from_spreadsheet(spreadsheet, options = {}) - output = ::CSV.generate(**options.reverse_merge(:encoding => 'UTF-8', :col_sep => "\t")) do |csv| + def self.from_spreadsheet(spreadsheet, encoding: 'UTF-8', col_sep: "\t", **options) + output = ::CSV.generate(encoding: encoding, col_sep: col_sep, **options) do |csv| spreadsheet.each do |row| csv << row end end - return BOM + output.encode!('utf-16le') + BOM + output.encode!('utf-16le') end end end diff --git a/lib/spreadsheet_exporter/generates_spreadsheet.rb b/lib/spreadsheet_exporter/generates_spreadsheet.rb new file mode 100644 index 0000000..27f8ad2 --- /dev/null +++ b/lib/spreadsheet_exporter/generates_spreadsheet.rb @@ -0,0 +1,11 @@ +require "active_support/concern" + +module SpreadsheetExporter + module GeneratesSpreadsheet + extend ActiveSupport::Concern + + def as_spreadsheet(options = {}) + serializable_hash(options) + end + end +end diff --git a/lib/spreadsheet_exporter/spreadsheet.rb b/lib/spreadsheet_exporter/spreadsheet.rb index 0baf9a7..e958386 100644 --- a/lib/spreadsheet_exporter/spreadsheet.rb +++ b/lib/spreadsheet_exporter/spreadsheet.rb @@ -1,24 +1,43 @@ # TODO: Find out why we can't detect arrays properly and must resort to crappy class.name comparison module SpreadsheetExporter + HeaderCell = Struct.new(:attribute_name, :human_attribute_name) do + def to_s + human_attribute_name.presence || attribute_name + end + end + module Spreadsheet - def self.from_objects(objects, options = {}) + def self.from_objects(objects, humanize_headers_class: nil, **options) headers = [] rows = [] # Get all the data and accumulate headers from each row (since rows may not have all the same attributes) Array(objects).each do |object| - data = object.respond_to?(:as_csv) ? get_values(object.as_csv(options)) : get_values(object.as_json(options)) - headers = headers | data.keys + data = if object.respond_to?(:as_spreadsheet) + get_values(object.as_spreadsheet(options)) + elsif object.respond_to?(:as_csv) + get_values(object.as_csv(options)) + else + get_values(object.as_json(options)) + end + + headers |= data.keys.map { |v| HeaderCell.new(v) } rows << data end # Create the csv, ensuring to place each row's attributes under the appropriate header (since rows may not have all the same attributes) [].tap do |spreadsheet| - spreadsheet << (options[:humanize_headers_class] ? han(options[:humanize_headers_class], headers) : headers) + if humanize_headers_class + headers = han(headers, humanize_headers_class: humanize_headers_class, **options) + end + + spreadsheet << headers + rows.each do |row| sorted_row = [] row.each do |header, value| - sorted_row[headers.index(header)] = value + col_index = headers.find_index { |h| h.attribute_name == header } + sorted_row[col_index] = value end spreadsheet << sorted_row @@ -28,14 +47,14 @@ def self.from_objects(objects, options = {}) # Return an array of human_attribute_name's # Used by the CSV Import/Export process to match CSV headers to model attribute names - def self.han(klass, *attributes) - options = attributes.extract_options! + def self.han(headers, humanize_headers_class:, downcase: false, **) + headers.flatten! - attributes.flatten! - attributes.collect! {|attribute| klass.human_attribute_name(attribute) } - attributes.collect!(&:downcase) if options[:downcase] - - return attributes.many? ? attributes : attributes.first + headers.collect! do |header| + header.human_attribute_name = humanize_headers_class.human_attribute_name(header.attribute_name) + header.human_attribute_name.downcase! if downcase + header + end end def self.get_values(node, current_header = nil) diff --git a/lib/spreadsheet_exporter/xlsx.rb b/lib/spreadsheet_exporter/xlsx.rb index ce2378b..cc9a886 100644 --- a/lib/spreadsheet_exporter/xlsx.rb +++ b/lib/spreadsheet_exporter/xlsx.rb @@ -1,43 +1,182 @@ require 'write_xlsx' require_relative 'spreadsheet' +require "active_support" +require "active_support/core_ext/hash/keys" module SpreadsheetExporter module XLSX - def self.from_objects(objects, options = {}) - spreadsheet = Spreadsheet.from_objects(objects, options).compact - from_spreadsheet(spreadsheet) + extend Writexlsx::Utility # gets us `xl_rowcol_to_cell` and `xl_col_to_name` + + ROW_MAX = 65_536 - 1 + DATA_WORKSHEET_NAME = "data".freeze + + def self.from_objects(objects, humanize_headers_class: nil, **options) + spreadsheet = Spreadsheet.from_objects(objects, humanize_headers_class: humanize_headers_class, **options).compact + from_spreadsheet(spreadsheet, **options) end - def self.from_spreadsheet(spreadsheet, temp_file_path = 'tmp/items.xlsx') - # Create a new Excel workbook - workbook = WriteXLSX.new(temp_file_path) + def self.from_spreadsheet(spreadsheet, validations: {}, data_sources: {}, freeze_panes: false, **options) + io = StringIO.new + workbook = WriteXLSX.new(io) - # Add a worksheet worksheet = workbook.add_worksheet - # Add and define a format - headerFormat = workbook.add_format # Add a format - headerFormat.set_bold + header_format = workbook.add_format + header_format.set_bold + + column_indexes = {} # Write header row - Array(spreadsheet.first).each_with_index do |column_name, col| - worksheet.write(0, col, column_name, headerFormat) + Array(spreadsheet.first).each_with_index do |header, col| + worksheet.write(0, col, header.to_s, header_format) + column_indexes[header.attribute_name] = col end - Array(spreadsheet[1..-1]).each_with_index do |values, row| - Array(values).each_with_index do |value, col| - worksheet.write(row + 1, col, value) - end + Array(spreadsheet[1..]).each_with_index do |values, row| + worksheet.write_row(row + 1, 0, Array(values)) end - # Output the file contents and delete it + added_data_sources = add_data_sources(workbook, header_format, data_sources) + + add_worksheet_validation(workbook, worksheet, column_indexes, added_data_sources, header_format, validations) + + add_frozen_panes(worksheet, freeze_panes) + workbook.close - file = File.open(temp_file_path) - output = file.read - file.close - File.delete(temp_file_path) + io.string + end + + def self.sanitize_defined_name(raw) + raw.gsub(/[^A-Za-z0-9_]/, "_") + end + + # freeze_panes => [1, 2] # freeze the top row and left two cols + def self.add_frozen_panes(worksheet, freeze_panes) + return unless freeze_panes + rows, cols = freeze_panes + worksheet.freeze_panes(Integer(rows), Integer(cols)) + end + + # Write each data_source to the `data` worksheet and wrap it with a named range so + # we can easily reference it later. + # + # `data_sources` is a hash in the format: + # { 'data_source_id' => ['data', 'source', 'options'] } + # + # This will create a named range called `data_source_id`. + # + # For data sources dependent on the value in another column, the format is + # { 'data_source_id' => { + # 'other_col_val_1' => ['options', 'when', 'val is val_1'], + # 'other_col_val_2' => ['options', 'when', 'val is val_2'] + # } + # } + # + # This will create two named ranges: `data_source_id_val_1` and `data_source_id_val_2`. + def self.add_data_sources(workbook, header_format, data_sources) + return {} if data_sources.empty? + + unless (data_sheet = workbook.worksheet_by_name(DATA_WORKSHEET_NAME)) + data_sheet = workbook.add_worksheet(DATA_WORKSHEET_NAME) + data_sheet.freeze_panes(1, 0) + end + + data_source_refs = {} + + column_index = 0 + data_sources.stringify_keys.each do |data_key, data_values| + if data_values.is_a?(Hash) + # this is a dependent data source + data_values.each do |data_value, sub_values| + sub_key = sanitize_defined_name("#{data_key}_#{data_value}") + data_source_refs[sub_key] = add_data_source(workbook, data_sheet, sub_key, sub_values, column_index, header_format) + column_index += 1 + end + else + # this is an independent data source + data_source_refs[data_key] = add_data_source(workbook, data_sheet, data_key, data_values, column_index, header_format) + column_index += 1 + end + end + + data_source_refs + end + + # Write a data column to the `data` worksheet and define it as a named range + # + # Returns the named range's name + def self.add_data_source(workbook, data_sheet, data_key, data_values, column_index, header_format) + unless data_values.is_a?(Array) + raise ArgumentError, "data_values should be an array (got #{data_values.inspect}" + end + + data_start = xl_rowcol_to_cell(1, column_index, true, true) + data_end = xl_rowcol_to_cell(data_values.length, column_index, true, true) + + defined_name_source = "=#{DATA_WORKSHEET_NAME}!#{data_start}:#{data_end}" + + data_sheet.write(0, column_index, data_key, header_format) + data_sheet.write_col(1, column_index, data_values.map(&:strip)) + workbook.define_name(data_key, defined_name_source) + + data_key + end + + def self.add_worksheet_validation(workbook, worksheet, column_indexes, added_data_sources, header_format, validations) + return if validations.empty? + + validations.each do |column_name, column_validation| + column_index = column_indexes[column_name.to_s] + + if column_index.nil? + warn "attempted to apply validation to missing column '#{column_name}'" + next + end + + defined_name = if column_validation.dependent_on + parent_col_index = column_indexes[column_validation.dependent_on] + parent_col = xl_col_to_name(parent_col_index, true) + dependent_named_range(column_validation.data_source, parent_col) + else + added_data_sources[column_validation.data_source] + end + + unless defined_name + raise ArgumentError, "missing data for data_source=#{column_validation.data_source}, " \ + "tried defined_name #{defined_name}" + end + + validation_options = generate_validation(column_validation, defined_name) + pp validation_options + worksheet.data_validation(1, column_index, ROW_MAX, column_index, validation_options) + rescue StandardError => e + debugger + end + end + + # We build up the reference to the named range by leaning on Excel's INDIRECT function + # to dynamically build the name. The resulting formula becomes the validation drop down's + # source. It resolves thusly... + # + # =INDIRECT("sub_data_source" & "_" & SUBSTITUTE(INDIRECT("$AA" & ROW()), " ", "_")) + # =INDIRECT("sub_data_source" & "_" & SUBSTITUTE("Parent Value, " ", "_")) + # =INDIRECT("sub_data_source" & "_" & "Parent_Value") + # =INDIRECT("sub_data_source_Parent_Value") + # =sub_data_source_Parent_Value + def self.dependent_named_range(data_source, parent_col) + "INDIRECT(\"#{data_source}\" & \"_\" & "\ + "SUBSTITUTE(INDIRECT(\"#{parent_col}\" & ROW()), \" \", \"_\"))" + end - return output + def self.generate_validation(column_validation, defined_name) + { + "validate" => "list", + "input_title" => "Select a value", + "source" => "=#{defined_name}", + "error_type" => column_validation.error_type, + "ignore_blank" => column_validation.ignore_blank, + "dropdown" => true + } end end end diff --git a/macros.vb b/macros.vb new file mode 100644 index 0000000..aafc784 --- /dev/null +++ b/macros.vb @@ -0,0 +1,110 @@ +' This Visual Basic for Applications code can be added to the generated +' XLSX files to gain some data entry UX improvements. +' +' Specifically: +' - the ability to select multiple values for a single column with a data source +' - clearing dependent columns values when the column they are dependent_on changes +' +' Limitations: +' - only one parent col is supported and the dependent_on child columns must be +' all beside each other so they can be selected as a range +' +' You will need to change `ParentTypeCol`, `DependentTypeStartCol` and `DependentTypeEndCol` +' below before adding the code to your Excel file. + +Option Explicit + +' https://stackoverflow.com/a/48375276/559596 +Public Function ExistsInCollection(col As Collection, key As Variant) As Boolean + On Error GoTo err + ExistsInCollection = True + IsObject(col.item(key)) + Exit Function +err: + ExistsInCollection = False +End Function + +' https://stackoverflow.com/a/47500463/559596 +Public Function CollectionToArray(myCol As Collection) As Variant + Dim result As Variant + Dim cnt As Long + + ReDim result(myCol.Count - 1) + + For cnt = 0 To myCol.Count - 1 + result(cnt) = myCol(cnt + 1) + Next cnt + + CollectionToArray = result +End Function + +Private Sub Worksheet_Change(ByVal Target As Range) + Dim existingValue As String + Dim toggledValue As String + Dim tokenArr() As String + Dim tokenCollection As Collection + Dim token as Variant + + ' The column containing the parent type ("site type") - we watch this for changes and then update + ' the dependent columns accordingly + Dim ParentTypeCol As String + ParentTypeCol = "AS" + + ' All the columns that are dependent on the parent type ("site type") + Dim DependentTypeStartCol As String + Dim DependentTypeEndCol As String + DependentTypeStartCol = "AT" + DependentTypeEndCol = "AZ" + + Dim DependentTypeRangeSelector As String + DependentTypeRangeSelector = DependentTypeStartCol & Target.Row & ":" & DependentTypeEndCol & Target.Row + + If Intersect(Target, Range(ParentTypeCol & ":" & ParentTypeCol & "," & DependentTypeRangeSelector)) Is Nothing Then Exit Sub + + ' If an error occurs, enable events and quit the code + On Error GoTo Quit + + Application.EnableEvents = False + Application.ScreenUpdating = False + + ' If we change anything in the parent-type col then clear all the dependent types + If Not Intersect(Target, Range(ParentTypeCol & ":" & ParentTypeCol)) Is Nothing Then + Debug.Print "in col for clearing... " & DependentTypeRangeSelector + Range(DependentTypeRangeSelector).ClearContents + GoTo Quit + End If + + ' Handle pick-list changes + If Not Intersect(Target, Range(DependentTypeRangeSelector)) Is Nothing Then + ' If user deletes the dropdown cell's data do nothing + If Target.Value = "" Then GoTo Quit + + ' If we already have a comma we assume this is the result of copy-and-pasting + ' and we bail early + toggledValue = Target.Value + If InStr(toggledValue, ",") > 0 Then GoTo Quit + + Application.Undo + + existingValue = Target.Value + + tokenArr() = Split(existingValue, ",") + Set tokenCollection = New Collection + For Each token in tokenArr + tokenCollection.Add Trim(token), Trim(token) + Next + + + If ExistsInCollection(tokenCollection, toggledValue) Then + tokenCollection.Remove toggledValue + Else + tokenCollection.Add Trim(toggledValue), Trim(toggledValue) + End If + + Target.Value = Join(CollectionToArray(tokenCollection), ",") + End If + +Quit: + Application.EnableEvents = True + Application.ScreenUpdating = True +End Sub diff --git a/spreadsheet_exporter.gemspec b/spreadsheet_exporter.gemspec index a216056..0a1402a 100644 --- a/spreadsheet_exporter.gemspec +++ b/spreadsheet_exporter.gemspec @@ -17,5 +17,6 @@ Gem::Specification.new do |s| s.files = Dir["{app,config,db,lib}/**/*", "MIT-LICENSE", "Rakefile", "README.md"] s.test_files = Dir["test/**/*"] + s.add_dependency "activesupport", ">= 6" s.add_dependency "write_xlsx" end diff --git a/test/fixtures.rb b/test/fixtures.rb new file mode 100644 index 0000000..c04afcf --- /dev/null +++ b/test/fixtures.rb @@ -0,0 +1,25 @@ +def country_and_city + country = sample_country + city = CONDITIONAL_CITIES[country].sample + {country: country, city: city} +end + +def sample_country + CONDITIONAL_CITIES.keys.sample +end + +MEALS = %w[Omnivore Veg Vegan] + +COUNTRIES = %w[Canada Turkey] + +CONDITIONAL_CITIES = { + COUNTRIES[0] => [ + "Sxwōxwiyám", + "Toronto" + ].map { |s| s.encode("UTF-8") }, + COUNTRIES[1] => [ + "Eskişehir", + "İzmir", + "İstanbul" + ].map { |s| s.encode("UTF-8") } +} diff --git a/test/test.rb b/test/test.rb new file mode 100755 index 0000000..7f90048 --- /dev/null +++ b/test/test.rb @@ -0,0 +1,57 @@ +#!/usr/bin/env ruby +require_relative "../lib/spreadsheet_exporter" +require_relative "./fixtures" + +data = [ + {"name" => "Jim", "role" => "admin"}.merge(country_and_city), + {"name" => "Sally", "role" => "user", "favourite_meal" => MEALS.sample, "most_recent_meal" => MEALS.sample}.merge(country_and_city), + {"name" => "Horatio", "role" => "user", "favourite_meal" => MEALS.sample, "most_recent_meal" => MEALS.sample}, + {"name" => "Jan", "role" => "user"} +] + +options = { + :data_sources => { + "all_meals" => MEALS, + "roles" => %w[admin user spammer boss], + "countries" => COUNTRIES, + "cities" => CONDITIONAL_CITIES + }, + + :validations => { + "role" => SpreadsheetExporter::ColumnValidation.new( + ignore_blank: false, + data_source: "roles" + ), + "country" => SpreadsheetExporter::ColumnValidation.new( + ignore_blank: true, + error_type: "information", + data_source: "countries" + ), + "city" => SpreadsheetExporter::ColumnValidation.new( + ignore_blank: true, + error_type: "information", + dependent_on: "country", + data_source: "cities" + ), + "favourite_meal" => SpreadsheetExporter::ColumnValidation.new( + ignore_blank: true, + error_type: "warning", + data_source: "all_meals" + ), + "most_recent_meal" => SpreadsheetExporter::ColumnValidation.new( + ignore_blank: true, + error_type: "warning", + data_source: "all_meals" + ) + } +} +class Humanizer + def self.human_attribute_name(att) + att.upcase + end +end + +options[:humanize_headers_class] = Humanizer +options[:freeze_panes] = [1, 1] + +File.binwrite("output.xlsx", SpreadsheetExporter::XLSX.from_objects(data, **options))