diff --git a/lib/sheetah/backends.rb b/lib/sheetah/backends.rb index 946ad7c..1bb5a1b 100644 --- a/lib/sheetah/backends.rb +++ b/lib/sheetah/backends.rb @@ -1,20 +1,15 @@ # frozen_string_literal: true -require_relative "backends_registry" require_relative "utils/monadic_result" module Sheetah module Backends - @registry = BackendsRegistry.new - SimpleError = Struct.new(:msg_code) private_constant :SimpleError class << self - attr_reader :registry - def open(*args, **opts, &block) - backend = opts.delete(:backend) || registry.get(*args, **opts) + backend = opts.delete(:backend) if backend.nil? return Utils::MonadicResult::Failure.new(SimpleError.new("no_applicable_backend")) diff --git a/lib/sheetah/backends/csv.rb b/lib/sheetah/backends/csv.rb index 56c0cc9..253a6e9 100644 --- a/lib/sheetah/backends/csv.rb +++ b/lib/sheetah/backends/csv.rb @@ -3,52 +3,40 @@ require "csv" require_relative "../sheet" -require_relative "../backends" module Sheetah module Backends - # Expect: - # - UTF-8 without BOM, or the correct encoding given explicitly - # - line endings as \n or \r\n - # - comma-separated - # - quoted with " class Csv include Sheet - class ArgumentError < Error + class InvalidCSVError < Error end - class EncodingError < Error - end - - CSV_OPTS = { + DEFAULTS = { + row_sep: :auto, col_sep: ",", quote_char: '"', }.freeze - private_constant :CSV_OPTS - - def self.register(registry = Backends.registry) - registry.set(self) do |args, opts| - next false unless args.empty? - - case opts - in { io: _, **nil } | \ - { io: _, encoding: String | Encoding, **nil } | \ - { path: /\.csv$/i, **nil } | \ - { path: /\.csv$/i, encoding: String | Encoding, **nil } - then - true - else - false - end - end + private_constant :DEFAULTS + + def self.defaults + DEFAULTS end - def initialize(io: nil, path: nil, encoding: nil) - io = setup_io(io, path, encoding) + def initialize( + io, + row_sep: self.class.defaults[:row_sep], + col_sep: self.class.defaults[:col_sep], + quote_char: self.class.defaults[:quote_char] + ) + @csv = CSV.new( + io, + row_sep: row_sep, + col_sep: col_sep, + quote_char: quote_char + ) - @csv = CSV.new(io, **CSV_OPTS) @headers = detect_headers(@csv) @cols_count = @headers.size end @@ -68,62 +56,36 @@ def each_header def each_row return to_enum(:each_row) unless block_given? - @csv.each.with_index(1) do |raw, row| - value = Array.new(@cols_count) do |col_idx| - col = Sheet.int2col(col_idx + 1) + handle_malformed_csv do + @csv.each.with_index(1) do |raw, row| + value = Array.new(@cols_count) do |col_idx| + col = Sheet.int2col(col_idx + 1) - Cell.new(row: row, col: col, value: raw[col_idx]) - end + Cell.new(row: row, col: col, value: raw[col_idx]) + end - yield Row.new(row: row, value: value) + yield Row.new(row: row, value: value) + end end self end def close - @csv.close - - nil + # Do nothing: this backend isn't responsible for opening the IO, and therefore it is not + # responsible for closing it either. end private - def setup_io(io, path, encoding) - if io.nil? && !path.nil? - setup_io_from_path(path, encoding) - elsif !io.nil? && path.nil? - setup_io_from_io(io, encoding) - else - raise ArgumentError, "Expected either IO or path" - end - end - - def setup_io_from_io(io, encoding) - io.set_encoding(encoding, Encoding::UTF_8) if encoding - io - end - - def setup_io_from_path(path, encoding) - opts = { mode: "r" } - - if encoding - opts[:external_encoding] = encoding - opts[:internal_encoding] = Encoding::UTF_8 - end - - File.new(path, **opts) + def handle_malformed_csv + yield + rescue CSV::MalformedCSVError + raise InvalidCSVError end def detect_headers(csv) - headers = - begin - csv.shift - rescue CSV::MalformedCSVError - raise EncodingError - end - - headers || [] + handle_malformed_csv { csv.shift } || [] end end end diff --git a/lib/sheetah/backends/xlsx.rb b/lib/sheetah/backends/xlsx.rb index 9a417a2..7eeb098 100644 --- a/lib/sheetah/backends/xlsx.rb +++ b/lib/sheetah/backends/xlsx.rb @@ -6,27 +6,13 @@ require "roo" require_relative "../sheet" -require_relative "../backends" module Sheetah module Backends class Xlsx include Sheet - def self.register(registry = Backends.registry) - registry.set(self) do |args, opts| - next false unless args.empty? - - case opts - in { path: /\.xlsx$/i, **nil } - true - else - false - end - end - end - - def initialize(path:) + def initialize(path) raise Error if path.nil? @roo = Roo::Excelx.new(path) diff --git a/lib/sheetah/backends_registry.rb b/lib/sheetah/backends_registry.rb deleted file mode 100644 index 9dfab52..0000000 --- a/lib/sheetah/backends_registry.rb +++ /dev/null @@ -1,27 +0,0 @@ -# frozen_string_literal: true - -module Sheetah - class BackendsRegistry - def initialize - @registry = {} - end - - def set(backend, &matcher) - @registry[backend] = matcher - self - end - - def get(*args, **opts) - @registry.each do |backend, matcher| - return backend if matcher.call(args, opts) - end - - nil - end - - def freeze - @registry.freeze - super - end - end -end diff --git a/spec/sheetah/backends/csv_spec.rb b/spec/sheetah/backends/csv_spec.rb index 1357f58..5cbdeb2 100644 --- a/spec/sheetah/backends/csv_spec.rb +++ b/spec/sheetah/backends/csv_spec.rb @@ -4,7 +4,6 @@ require "support/shared/sheet_factories" require "csv" require "stringio" -require "tempfile" RSpec.describe Sheetah::Backends::Csv do include_context "sheet_factories" @@ -22,53 +21,21 @@ end let(:sheet) do - described_class.new(io: raw_sheet) + described_class.new(raw_sheet) end def stub_sheet(table) - return if table.nil? - csv = CSV.generate do |csv_io| table.each do |row| csv_io << row end end - StringIO.new(csv, "r") + StringIO.new(csv, "r:UTF-8") end def new_sheet(...) - described_class.new(io: stub_sheet(...)) - end - - describe "::register" do - let(:registry) { Sheetah::BackendsRegistry.new } - - before do - described_class.register(registry) - end - - it "matches any so-called IO and an optional encoding" do - io = double - - expect(registry.get(io: io)).to eq(described_class) - expect(registry.get(io: io, encoding: "UTF-8")).to eq(described_class) - expect(registry.get(io: io, encoding: Encoding::UTF_8)).to eq(described_class) - end - - it "matches a CSV path and an optional encoding" do - expect(registry.get(path: "foo.csv")).to eq(described_class) - expect(registry.get(path: "foo.csv", encoding: "UTF-8")).to eq(described_class) - expect(registry.get(path: "foo.csv", encoding: Encoding::UTF_8)).to eq(described_class) - end - - it "doesn't match any other path" do - expect(registry.get(path: "foo.tsv")).to be_nil - end - - it "doesn't match extra args" do - expect(registry.get(2, path: "foo.csv")).to be_nil - end + described_class.new(stub_sheet(...)) end describe "#initialize" do @@ -89,73 +56,42 @@ def new_sheet(...) ] end - context "when no io nor path is given" do - it "fails" do - expect do - described_class.new - end.to raise_error(described_class::ArgumentError) + let(:sheet) { described_class.new(io) } + let(:sheet_headers) { sheet.each_header.map(&:value) } + + context "when the IO is opened with a correct external encoding" do + let(:io) do + File.new(latin9_path, external_encoding: Encoding::ISO_8859_15) end - end - context "when both an io and a path are given" do - it "fails" do - expect do - described_class.new(io: double, path: double) - end.to raise_error(described_class::ArgumentError) + it "does not fail" do + expect { sheet }.not_to raise_error end end - context "when only an io is given" do - let(:io) { File.new(io_path) } - - context "when the default encoding is valid" do - alias_method :io_path, :utf8_path - - it "can read CSV data" do - sheet = described_class.new(io: io) - expect(sheet.each_header.map(&:value)).to eq(headers) - end + context "when the IO is opened with an incorrect external encoding" do + let(:io) do + File.new(latin9_path, external_encoding: Encoding::UTF_8) end - context "when the default encoding is invalid" do - alias_method :io_path, :latin9_path - - it "fails" do - expect do - described_class.new(io: io) - end.to raise_error(described_class::EncodingError) - end - - it "can read CSV data once given a valid encoding" do - sheet = described_class.new(io: io, encoding: Encoding::ISO_8859_15) - expect(sheet.each_header.map(&:value)).to eq(headers) - end + it "fails" do + expect { sheet }.to raise_error(described_class::InvalidCSVError) end end - context "when only a path is given" do - context "when the default encoding is valid" do - alias_method :path, :utf8_path - - it "can read CSV data" do - sheet = described_class.new(path: path) - expect(sheet.each_header.map(&:value)).to eq(headers) - end + context "when the IO is setup with different encodings" do + let(:io) do + File.new( + utf8_path, + external_encoding: Encoding::UTF_8, + internal_encoding: Encoding::ISO_8859_15 + ) end - context "when the default encoding is invalid" do - alias_method :path, :latin9_path - - it "fails" do - expect do - described_class.new(path: path) - end.to raise_error(described_class::EncodingError) - end + it "does not interfere" do + latin9_headers = headers.map { |str| str.encode(Encoding::ISO_8859_15) } - it "can read CSV data once given a valid encoding" do - sheet = described_class.new(path: path, encoding: Encoding::ISO_8859_15) - expect(sheet.each_header.map(&:value)).to eq(headers) - end + expect(sheet_headers).to eq(latin9_headers) end end end @@ -226,8 +162,8 @@ def new_sheet(...) expect(sheet.close).to be_nil end - it "closes the underlying sheet" do - expect { sheet.close }.to change(raw_sheet, :closed?).from(false).to(true) + it "doesn't close the underlying sheet" do + expect { sheet.close }.not_to change(raw_sheet, :closed?).from(false) end end @@ -242,12 +178,6 @@ def new_sheet(...) end end - context "when the input table is nil" do - it "raises an error" do - expect { new_sheet(nil) }.to raise_error(Sheetah::Sheet::Error) - end - end - context "when the input table is empty" do let(:sheet) { new_sheet [] } @@ -262,8 +192,10 @@ def new_sheet(...) end describe "CSV options" do - it "requires a specific col_sep and quote_char" do - expect(CSV).to receive(:new).with(raw_sheet, col_sep: ",", quote_char: '"').and_call_original + it "requires a specific col_sep and quote_char, and an automatic row_sep" do + expect(CSV).to receive(:new) + .with(raw_sheet, row_sep: :auto, col_sep: ",", quote_char: '"') + .and_call_original sheet end diff --git a/spec/sheetah/backends/xlsx_spec.rb b/spec/sheetah/backends/xlsx_spec.rb index 01540cb..10d20ca 100644 --- a/spec/sheetah/backends/xlsx_spec.rb +++ b/spec/sheetah/backends/xlsx_spec.rb @@ -11,27 +11,7 @@ end def new_sheet(path) - described_class.new(path: path && fixture_path(path)) - end - - describe "::register" do - let(:registry) { Sheetah::BackendsRegistry.new } - - before do - described_class.register(registry) - end - - it "matches a XLSX path" do - expect(registry.get(path: "foo.xlsx")).to eq(described_class) - end - - it "doesn't match any other path" do - expect(registry.get(path: "foo.xls")).to be_nil - end - - it "doesn't match extra args" do - expect(registry.get(2, path: "foo.xlsx")).to be_nil - end + described_class.new(path && fixture_path(path)) end describe "#each_header" do diff --git a/spec/sheetah/backends_registry_spec.rb b/spec/sheetah/backends_registry_spec.rb deleted file mode 100644 index 9216925..0000000 --- a/spec/sheetah/backends_registry_spec.rb +++ /dev/null @@ -1,65 +0,0 @@ -# frozen_string_literal: true - -require "sheetah/backends_registry" - -RSpec.describe Sheetah::BackendsRegistry do - let(:registry) { described_class.new } - - let(:backend0) { double(:backend0) } - let(:backend1) { double(:backend1) } - - before do - registry.set(backend0) do |args, opts| - ok = (case args; in [[1, 2, Symbol]] then true; else false; end) - ok && (case opts; in { foo: Hash } then true; else false; end) - end - - registry.set(backend1) do |args, opts| - ok = (case args; in [] then true; else false; end) - ok && (case opts; in { path: /\.csv$/ } then true; else false; end) - end - end - - describe "#get / #set" do - it "can set a new backend with a matcher" do - expect(registry.get([1, 2, :ozij], foo: { 1 => 2 })).to be(backend0) - expect(registry.get(path: "file.csv")).to be(backend1) - expect(registry.get(double, path: "file.csv")).to be_nil - end - - it "can overwrite a previous backend matcher" do - registry.set(backend0) do |args, opts| - ok = (case args; in ["foo"] then true; else false; end) - ok && (case opts; in {} then true; else false; end) - end - - expect(registry.get("foo")).to be(backend0) - end - end - - describe "#set" do - it "returns the registry itself" do - result = registry.set(backend0) {} - - expect(result).to be(registry) - end - end - - describe "#freeze" do - before { registry.freeze } - - it "freezes the registry" do - expect(registry).to be_frozen - end - - it "prevents further modifications" do - expect do - registry.set(backend0) {} - end.to raise_error(FrozenError) - end - - it "doesn't prevent further readings" do - expect(registry.get(path: "foo.csv")).to be(backend1) - end - end -end diff --git a/spec/sheetah/backends_spec.rb b/spec/sheetah/backends_spec.rb index 2922b2a..ef46c47 100644 --- a/spec/sheetah/backends_spec.rb +++ b/spec/sheetah/backends_spec.rb @@ -3,12 +3,6 @@ require "sheetah/backends" RSpec.describe Sheetah::Backends do - describe "::registry" do - it "is a registry of backends" do - expect(described_class.registry).to be_a(Sheetah::BackendsRegistry) - end - end - describe "::open" do let(:backend) do double @@ -20,24 +14,8 @@ it "may open with an explicit backend" do allow(backend).to receive(:open).with(foo, bar: bar).and_return(res) - expect(described_class.registry).not_to receive(:get) expect(described_class.open(foo, backend: backend, bar: bar)).to be(res) end - - it "may open with an implicit backend" do - allow(backend).to receive(:open).with(foo, bar: bar).and_return(res) - allow(described_class.registry).to receive(:get).with(foo, bar: bar).and_return(backend) - - expect(described_class.open(foo, bar: bar)).to be(res) - end - - it "may miss a backend to open" do - allow(described_class.registry).to receive(:get).with(foo, bar: bar).and_return(nil) - - result = described_class.open(foo, bar: bar) - expect(result).to be_failure - expect(result.failure).to have_attributes(msg_code: "no_applicable_backend") - end end end