From a367bd8c08018c834956c4100c7e2f1919c94023 Mon Sep 17 00:00:00 2001 From: Erwan Thomas Date: Wed, 13 Sep 2023 23:06:08 +0200 Subject: [PATCH 1/4] Remove backends auto-detection --- lib/sheetah/backends.rb | 7 +-- lib/sheetah/backends/csv.rb | 18 ------- lib/sheetah/backends/xlsx.rb | 14 ------ lib/sheetah/backends_registry.rb | 27 ----------- spec/sheetah/backends/csv_spec.rb | 30 ------------ spec/sheetah/backends/xlsx_spec.rb | 20 -------- spec/sheetah/backends_registry_spec.rb | 65 -------------------------- spec/sheetah/backends_spec.rb | 22 --------- 8 files changed, 1 insertion(+), 202 deletions(-) delete mode 100644 lib/sheetah/backends_registry.rb delete mode 100644 spec/sheetah/backends_registry_spec.rb diff --git a/lib/sheetah/backends.rb b/lib/sheetah/backends.rb index 946ad7c..1bb5a1b 100644 --- a/lib/sheetah/backends.rb +++ b/lib/sheetah/backends.rb @@ -1,20 +1,15 @@ # frozen_string_literal: true -require_relative "backends_registry" require_relative "utils/monadic_result" module Sheetah module Backends - @registry = BackendsRegistry.new - SimpleError = Struct.new(:msg_code) private_constant :SimpleError class << self - attr_reader :registry - def open(*args, **opts, &block) - backend = opts.delete(:backend) || registry.get(*args, **opts) + backend = opts.delete(:backend) if backend.nil? return Utils::MonadicResult::Failure.new(SimpleError.new("no_applicable_backend")) diff --git a/lib/sheetah/backends/csv.rb b/lib/sheetah/backends/csv.rb index 56c0cc9..389cfee 100644 --- a/lib/sheetah/backends/csv.rb +++ b/lib/sheetah/backends/csv.rb @@ -3,7 +3,6 @@ require "csv" require_relative "../sheet" -require_relative "../backends" module Sheetah module Backends @@ -28,23 +27,6 @@ class EncodingError < Error private_constant :CSV_OPTS - def self.register(registry = Backends.registry) - registry.set(self) do |args, opts| - next false unless args.empty? - - case opts - in { io: _, **nil } | \ - { io: _, encoding: String | Encoding, **nil } | \ - { path: /\.csv$/i, **nil } | \ - { path: /\.csv$/i, encoding: String | Encoding, **nil } - then - true - else - false - end - end - end - def initialize(io: nil, path: nil, encoding: nil) io = setup_io(io, path, encoding) diff --git a/lib/sheetah/backends/xlsx.rb b/lib/sheetah/backends/xlsx.rb index 9a417a2..013b1c9 100644 --- a/lib/sheetah/backends/xlsx.rb +++ b/lib/sheetah/backends/xlsx.rb @@ -6,26 +6,12 @@ require "roo" require_relative "../sheet" -require_relative "../backends" module Sheetah module Backends class Xlsx include Sheet - def self.register(registry = Backends.registry) - registry.set(self) do |args, opts| - next false unless args.empty? - - case opts - in { path: /\.xlsx$/i, **nil } - true - else - false - end - end - end - def initialize(path:) raise Error if path.nil? diff --git a/lib/sheetah/backends_registry.rb b/lib/sheetah/backends_registry.rb deleted file mode 100644 index 9dfab52..0000000 --- a/lib/sheetah/backends_registry.rb +++ /dev/null @@ -1,27 +0,0 @@ -# frozen_string_literal: true - -module Sheetah - class BackendsRegistry - def initialize - @registry = {} - end - - def set(backend, &matcher) - @registry[backend] = matcher - self - end - - def get(*args, **opts) - @registry.each do |backend, matcher| - return backend if matcher.call(args, opts) - end - - nil - end - - def freeze - @registry.freeze - super - end - end -end diff --git a/spec/sheetah/backends/csv_spec.rb b/spec/sheetah/backends/csv_spec.rb index 1357f58..cd45c3e 100644 --- a/spec/sheetah/backends/csv_spec.rb +++ b/spec/sheetah/backends/csv_spec.rb @@ -41,36 +41,6 @@ def new_sheet(...) described_class.new(io: stub_sheet(...)) end - describe "::register" do - let(:registry) { Sheetah::BackendsRegistry.new } - - before do - described_class.register(registry) - end - - it "matches any so-called IO and an optional encoding" do - io = double - - expect(registry.get(io: io)).to eq(described_class) - expect(registry.get(io: io, encoding: "UTF-8")).to eq(described_class) - expect(registry.get(io: io, encoding: Encoding::UTF_8)).to eq(described_class) - end - - it "matches a CSV path and an optional encoding" do - expect(registry.get(path: "foo.csv")).to eq(described_class) - expect(registry.get(path: "foo.csv", encoding: "UTF-8")).to eq(described_class) - expect(registry.get(path: "foo.csv", encoding: Encoding::UTF_8)).to eq(described_class) - end - - it "doesn't match any other path" do - expect(registry.get(path: "foo.tsv")).to be_nil - end - - it "doesn't match extra args" do - expect(registry.get(2, path: "foo.csv")).to be_nil - end - end - describe "#initialize" do let(:utf8_path) { fixture_path("csv/utf8.csv") } let(:latin9_path) { fixture_path("csv/latin9.csv") } diff --git a/spec/sheetah/backends/xlsx_spec.rb b/spec/sheetah/backends/xlsx_spec.rb index 01540cb..53de067 100644 --- a/spec/sheetah/backends/xlsx_spec.rb +++ b/spec/sheetah/backends/xlsx_spec.rb @@ -14,26 +14,6 @@ def new_sheet(path) described_class.new(path: path && fixture_path(path)) end - describe "::register" do - let(:registry) { Sheetah::BackendsRegistry.new } - - before do - described_class.register(registry) - end - - it "matches a XLSX path" do - expect(registry.get(path: "foo.xlsx")).to eq(described_class) - end - - it "doesn't match any other path" do - expect(registry.get(path: "foo.xls")).to be_nil - end - - it "doesn't match extra args" do - expect(registry.get(2, path: "foo.xlsx")).to be_nil - end - end - describe "#each_header" do let(:expected_headers) do [ diff --git a/spec/sheetah/backends_registry_spec.rb b/spec/sheetah/backends_registry_spec.rb deleted file mode 100644 index 9216925..0000000 --- a/spec/sheetah/backends_registry_spec.rb +++ /dev/null @@ -1,65 +0,0 @@ -# frozen_string_literal: true - -require "sheetah/backends_registry" - -RSpec.describe Sheetah::BackendsRegistry do - let(:registry) { described_class.new } - - let(:backend0) { double(:backend0) } - let(:backend1) { double(:backend1) } - - before do - registry.set(backend0) do |args, opts| - ok = (case args; in [[1, 2, Symbol]] then true; else false; end) - ok && (case opts; in { foo: Hash } then true; else false; end) - end - - registry.set(backend1) do |args, opts| - ok = (case args; in [] then true; else false; end) - ok && (case opts; in { path: /\.csv$/ } then true; else false; end) - end - end - - describe "#get / #set" do - it "can set a new backend with a matcher" do - expect(registry.get([1, 2, :ozij], foo: { 1 => 2 })).to be(backend0) - expect(registry.get(path: "file.csv")).to be(backend1) - expect(registry.get(double, path: "file.csv")).to be_nil - end - - it "can overwrite a previous backend matcher" do - registry.set(backend0) do |args, opts| - ok = (case args; in ["foo"] then true; else false; end) - ok && (case opts; in {} then true; else false; end) - end - - expect(registry.get("foo")).to be(backend0) - end - end - - describe "#set" do - it "returns the registry itself" do - result = registry.set(backend0) {} - - expect(result).to be(registry) - end - end - - describe "#freeze" do - before { registry.freeze } - - it "freezes the registry" do - expect(registry).to be_frozen - end - - it "prevents further modifications" do - expect do - registry.set(backend0) {} - end.to raise_error(FrozenError) - end - - it "doesn't prevent further readings" do - expect(registry.get(path: "foo.csv")).to be(backend1) - end - end -end diff --git a/spec/sheetah/backends_spec.rb b/spec/sheetah/backends_spec.rb index 2922b2a..ef46c47 100644 --- a/spec/sheetah/backends_spec.rb +++ b/spec/sheetah/backends_spec.rb @@ -3,12 +3,6 @@ require "sheetah/backends" RSpec.describe Sheetah::Backends do - describe "::registry" do - it "is a registry of backends" do - expect(described_class.registry).to be_a(Sheetah::BackendsRegistry) - end - end - describe "::open" do let(:backend) do double @@ -20,24 +14,8 @@ it "may open with an explicit backend" do allow(backend).to receive(:open).with(foo, bar: bar).and_return(res) - expect(described_class.registry).not_to receive(:get) expect(described_class.open(foo, backend: backend, bar: bar)).to be(res) end - - it "may open with an implicit backend" do - allow(backend).to receive(:open).with(foo, bar: bar).and_return(res) - allow(described_class.registry).to receive(:get).with(foo, bar: bar).and_return(backend) - - expect(described_class.open(foo, bar: bar)).to be(res) - end - - it "may miss a backend to open" do - allow(described_class.registry).to receive(:get).with(foo, bar: bar).and_return(nil) - - result = described_class.open(foo, bar: bar) - expect(result).to be_failure - expect(result.failure).to have_attributes(msg_code: "no_applicable_backend") - end end end From 45d7e410a63d366da795c73fef12ba91269e1206 Mon Sep 17 00:00:00 2001 From: Erwan Thomas Date: Wed, 13 Sep 2023 23:06:46 +0200 Subject: [PATCH 2/4] Simplify CSV backend --- lib/sheetah/backends/csv.rb | 95 +++++++++++++------------- spec/sheetah/backends/csv_spec.rb | 110 ++++++++++++------------------ 2 files changed, 91 insertions(+), 114 deletions(-) diff --git a/lib/sheetah/backends/csv.rb b/lib/sheetah/backends/csv.rb index 389cfee..94acb57 100644 --- a/lib/sheetah/backends/csv.rb +++ b/lib/sheetah/backends/csv.rb @@ -6,31 +6,42 @@ module Sheetah module Backends - # Expect: - # - UTF-8 without BOM, or the correct encoding given explicitly - # - line endings as \n or \r\n - # - comma-separated - # - quoted with " class Csv include Sheet - class ArgumentError < Error + class InvalidEncodingError < Error end - class EncodingError < Error + class InvalidCSVError < Error end - CSV_OPTS = { + DEFAULTS = { + row_sep: :auto, col_sep: ",", quote_char: '"', }.freeze - private_constant :CSV_OPTS + private_constant :DEFAULTS - def initialize(io: nil, path: nil, encoding: nil) - io = setup_io(io, path, encoding) + def self.defaults + DEFAULTS + end + + def initialize( + io, + row_sep: self.class.defaults[:row_sep], + col_sep: self.class.defaults[:col_sep], + quote_char: self.class.defaults[:quote_char] + ) + ensure_utf8(io) + + @csv = CSV.new( + io, + row_sep: row_sep, + col_sep: col_sep, + quote_char: quote_char + ) - @csv = CSV.new(io, **CSV_OPTS) @headers = detect_headers(@csv) @cols_count = @headers.size end @@ -50,62 +61,48 @@ def each_header def each_row return to_enum(:each_row) unless block_given? - @csv.each.with_index(1) do |raw, row| - value = Array.new(@cols_count) do |col_idx| - col = Sheet.int2col(col_idx + 1) + handle_malformed_csv do + @csv.each.with_index(1) do |raw, row| + value = Array.new(@cols_count) do |col_idx| + col = Sheet.int2col(col_idx + 1) - Cell.new(row: row, col: col, value: raw[col_idx]) - end + Cell.new(row: row, col: col, value: raw[col_idx]) + end - yield Row.new(row: row, value: value) + yield Row.new(row: row, value: value) + end end self end def close - @csv.close - - nil + # Do nothing: this backend isn't responsible for opening the IO, and therefore it is not + # responsible for closing it either. end private - def setup_io(io, path, encoding) - if io.nil? && !path.nil? - setup_io_from_path(path, encoding) - elsif !io.nil? && path.nil? - setup_io_from_io(io, encoding) - else - raise ArgumentError, "Expected either IO or path" - end - end + def ensure_utf8(io) + target = Encoding::UTF_8 - def setup_io_from_io(io, encoding) - io.set_encoding(encoding, Encoding::UTF_8) if encoding - io - end + internal = io.internal_encoding + return if internal == target - def setup_io_from_path(path, encoding) - opts = { mode: "r" } + external = io.external_encoding + return if external == target && internal.nil? - if encoding - opts[:external_encoding] = encoding - opts[:internal_encoding] = Encoding::UTF_8 - end + raise InvalidEncodingError + end - File.new(path, **opts) + def handle_malformed_csv + yield + rescue CSV::MalformedCSVError + raise InvalidCSVError end def detect_headers(csv) - headers = - begin - csv.shift - rescue CSV::MalformedCSVError - raise EncodingError - end - - headers || [] + handle_malformed_csv { csv.shift } || [] end end end diff --git a/spec/sheetah/backends/csv_spec.rb b/spec/sheetah/backends/csv_spec.rb index cd45c3e..9c0c275 100644 --- a/spec/sheetah/backends/csv_spec.rb +++ b/spec/sheetah/backends/csv_spec.rb @@ -4,7 +4,6 @@ require "support/shared/sheet_factories" require "csv" require "stringio" -require "tempfile" RSpec.describe Sheetah::Backends::Csv do include_context "sheet_factories" @@ -22,23 +21,21 @@ end let(:sheet) do - described_class.new(io: raw_sheet) + described_class.new(raw_sheet) end def stub_sheet(table) - return if table.nil? - csv = CSV.generate do |csv_io| table.each do |row| csv_io << row end end - StringIO.new(csv, "r") + StringIO.new(csv, "r:UTF-8") end def new_sheet(...) - described_class.new(io: stub_sheet(...)) + described_class.new(stub_sheet(...)) end describe "#initialize" do @@ -59,72 +56,59 @@ def new_sheet(...) ] end - context "when no io nor path is given" do - it "fails" do - expect do - described_class.new - end.to raise_error(described_class::ArgumentError) - end - end + let(:sheet) { described_class.new(io) } + let(:sheet_headers) { sheet.each_header.map(&:value) } - context "when both an io and a path are given" do - it "fails" do - expect do - described_class.new(io: double, path: double) - end.to raise_error(described_class::ArgumentError) + context "when the IO is opened with a correct UTF-8 external encoding" do + let(:io) do + File.new(utf8_path, external_encoding: Encoding::UTF_8) end - end - context "when only an io is given" do - let(:io) { File.new(io_path) } - - context "when the default encoding is valid" do - alias_method :io_path, :utf8_path - - it "can read CSV data" do - sheet = described_class.new(io: io) - expect(sheet.each_header.map(&:value)).to eq(headers) - end + it "does not fail" do + expect { sheet }.not_to raise_error end - context "when the default encoding is invalid" do - alias_method :io_path, :latin9_path + it "produces UTF-8 strings" do + expect(sheet_headers).to eq(headers) + end + end - it "fails" do - expect do - described_class.new(io: io) - end.to raise_error(described_class::EncodingError) - end + context "when the IO is opened with an incorrect external encoding" do + let(:io) do + File.new(latin9_path, external_encoding: Encoding::UTF_8) + end - it "can read CSV data once given a valid encoding" do - sheet = described_class.new(io: io, encoding: Encoding::ISO_8859_15) - expect(sheet.each_header.map(&:value)).to eq(headers) - end + it "fails" do + expect { sheet }.to raise_error(described_class::InvalidCSVError) end end - context "when only a path is given" do - context "when the default encoding is valid" do - alias_method :path, :utf8_path + context "when the IO is opened with a correct, non-UTF-8 external encoding" do + context "when UTF-8 is not set as the internal encoding" do + let(:io) do + File.new(latin9_path, external_encoding: Encoding::ISO_8859_15) + end - it "can read CSV data" do - sheet = described_class.new(path: path) - expect(sheet.each_header.map(&:value)).to eq(headers) + it "fails" do + expect { sheet }.to raise_error(described_class::InvalidEncodingError) end end - context "when the default encoding is invalid" do - alias_method :path, :latin9_path + context "when UTF-8 is set as the internal encoding" do + let(:io) do + File.new( + latin9_path, + external_encoding: Encoding::ISO_8859_15, + internal_encoding: Encoding::UTF_8 + ) + end - it "fails" do - expect do - described_class.new(path: path) - end.to raise_error(described_class::EncodingError) + it "does not fail" do + expect { sheet }.not_to raise_error end - it "can read CSV data once given a valid encoding" do - sheet = described_class.new(path: path, encoding: Encoding::ISO_8859_15) - expect(sheet.each_header.map(&:value)).to eq(headers) + it "produces UTF-8 strings" do + expect(sheet_headers).to eq(headers) end end end @@ -196,8 +180,8 @@ def new_sheet(...) expect(sheet.close).to be_nil end - it "closes the underlying sheet" do - expect { sheet.close }.to change(raw_sheet, :closed?).from(false).to(true) + it "doesn't close the underlying sheet" do + expect { sheet.close }.not_to change(raw_sheet, :closed?).from(false) end end @@ -212,12 +196,6 @@ def new_sheet(...) end end - context "when the input table is nil" do - it "raises an error" do - expect { new_sheet(nil) }.to raise_error(Sheetah::Sheet::Error) - end - end - context "when the input table is empty" do let(:sheet) { new_sheet [] } @@ -232,8 +210,10 @@ def new_sheet(...) end describe "CSV options" do - it "requires a specific col_sep and quote_char" do - expect(CSV).to receive(:new).with(raw_sheet, col_sep: ",", quote_char: '"').and_call_original + it "requires a specific col_sep and quote_char, and an automatic row_sep" do + expect(CSV).to receive(:new) + .with(raw_sheet, row_sep: :auto, col_sep: ",", quote_char: '"') + .and_call_original sheet end From 936f832c25555299503002822c42e743d5c6f615 Mon Sep 17 00:00:00 2001 From: Erwan Thomas Date: Wed, 13 Sep 2023 23:06:59 +0200 Subject: [PATCH 3/4] Simplify XLSX backend --- lib/sheetah/backends/xlsx.rb | 2 +- spec/sheetah/backends/xlsx_spec.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/sheetah/backends/xlsx.rb b/lib/sheetah/backends/xlsx.rb index 013b1c9..7eeb098 100644 --- a/lib/sheetah/backends/xlsx.rb +++ b/lib/sheetah/backends/xlsx.rb @@ -12,7 +12,7 @@ module Backends class Xlsx include Sheet - def initialize(path:) + def initialize(path) raise Error if path.nil? @roo = Roo::Excelx.new(path) diff --git a/spec/sheetah/backends/xlsx_spec.rb b/spec/sheetah/backends/xlsx_spec.rb index 53de067..10d20ca 100644 --- a/spec/sheetah/backends/xlsx_spec.rb +++ b/spec/sheetah/backends/xlsx_spec.rb @@ -11,7 +11,7 @@ end def new_sheet(path) - described_class.new(path: path && fixture_path(path)) + described_class.new(path && fixture_path(path)) end describe "#each_header" do From b18c8341865f59b68f3dcc6c9afca5ad4e22bc8e Mon Sep 17 00:00:00 2001 From: Erwan Thomas Date: Thu, 12 Oct 2023 13:12:45 +0200 Subject: [PATCH 4/4] Allow any encoding on CSV backends' IO inputs The CSV backend should not have an opinion on the encoding(s) of the IO object it is given. The IO is supposed to be configured with consistent encodings, and that's it. --- lib/sheetah/backends/csv.rb | 17 ------------ spec/sheetah/backends/csv_spec.rb | 44 +++++++++---------------------- 2 files changed, 13 insertions(+), 48 deletions(-) diff --git a/lib/sheetah/backends/csv.rb b/lib/sheetah/backends/csv.rb index 94acb57..253a6e9 100644 --- a/lib/sheetah/backends/csv.rb +++ b/lib/sheetah/backends/csv.rb @@ -9,9 +9,6 @@ module Backends class Csv include Sheet - class InvalidEncodingError < Error - end - class InvalidCSVError < Error end @@ -33,8 +30,6 @@ def initialize( col_sep: self.class.defaults[:col_sep], quote_char: self.class.defaults[:quote_char] ) - ensure_utf8(io) - @csv = CSV.new( io, row_sep: row_sep, @@ -83,18 +78,6 @@ def close private - def ensure_utf8(io) - target = Encoding::UTF_8 - - internal = io.internal_encoding - return if internal == target - - external = io.external_encoding - return if external == target && internal.nil? - - raise InvalidEncodingError - end - def handle_malformed_csv yield rescue CSV::MalformedCSVError diff --git a/spec/sheetah/backends/csv_spec.rb b/spec/sheetah/backends/csv_spec.rb index 9c0c275..5cbdeb2 100644 --- a/spec/sheetah/backends/csv_spec.rb +++ b/spec/sheetah/backends/csv_spec.rb @@ -59,18 +59,14 @@ def new_sheet(...) let(:sheet) { described_class.new(io) } let(:sheet_headers) { sheet.each_header.map(&:value) } - context "when the IO is opened with a correct UTF-8 external encoding" do + context "when the IO is opened with a correct external encoding" do let(:io) do - File.new(utf8_path, external_encoding: Encoding::UTF_8) + File.new(latin9_path, external_encoding: Encoding::ISO_8859_15) end it "does not fail" do expect { sheet }.not_to raise_error end - - it "produces UTF-8 strings" do - expect(sheet_headers).to eq(headers) - end end context "when the IO is opened with an incorrect external encoding" do @@ -83,33 +79,19 @@ def new_sheet(...) end end - context "when the IO is opened with a correct, non-UTF-8 external encoding" do - context "when UTF-8 is not set as the internal encoding" do - let(:io) do - File.new(latin9_path, external_encoding: Encoding::ISO_8859_15) - end - - it "fails" do - expect { sheet }.to raise_error(described_class::InvalidEncodingError) - end + context "when the IO is setup with different encodings" do + let(:io) do + File.new( + utf8_path, + external_encoding: Encoding::UTF_8, + internal_encoding: Encoding::ISO_8859_15 + ) end - context "when UTF-8 is set as the internal encoding" do - let(:io) do - File.new( - latin9_path, - external_encoding: Encoding::ISO_8859_15, - internal_encoding: Encoding::UTF_8 - ) - end - - it "does not fail" do - expect { sheet }.not_to raise_error - end - - it "produces UTF-8 strings" do - expect(sheet_headers).to eq(headers) - end + it "does not interfere" do + latin9_headers = headers.map { |str| str.encode(Encoding::ISO_8859_15) } + + expect(sheet_headers).to eq(latin9_headers) end end end