From 8bee90d0439204be2a0a6a5b506d155475473526 Mon Sep 17 00:00:00 2001 From: Joshua Moody Date: Tue, 2 Apr 2013 23:05:43 +0200 Subject: [PATCH 01/10] added gitignore file --- .gitignore | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8855b5e --- /dev/null +++ b/.gitignore @@ -0,0 +1,29 @@ +*.gem +*.rbc +.bundle +.config +.yardoc +Gemfile.lock +InstalledFiles +_yardoc +vendor +coverage +doc/ +lib/bundler/man +pkg +rdoc +spec/reports +test/tmp +test/version_tmp +tmp + +# JetBrains +.idea + +# mac +.DS_Store +__MACOSX + +# emacs turds +(.*/)?\#[^/]*\#$ + From baecf823cc9d8de4c1376acf69392d68dc9c892c Mon Sep 17 00:00:00 2001 From: Joshua Moody Date: Tue, 2 Apr 2013 23:13:07 +0200 Subject: [PATCH 02/10] added additional specs for parsing ASCII files. commented out some code i did not understand in the rake file --- Gemfile.lock | 28 ++++++++++++++-------------- Rakefile | 9 +++++---- spec/apfel_spec.rb | 29 +++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+), 18 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 5fcc440..b5c344d 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -6,23 +6,23 @@ PATH GEM remote: https://rubygems.org/ specs: - coderay (1.0.8) - diff-lcs (1.1.3) + coderay (1.0.9) + diff-lcs (1.2.2) method_source (0.8.1) - pry (0.9.10) + pry (0.9.12) coderay (~> 1.0.5) method_source (~> 0.8) - slop (~> 3.3.1) - rake (0.9.2.2) - rspec (2.12.0) - rspec-core (~> 2.12.0) - rspec-expectations (~> 2.12.0) - rspec-mocks (~> 2.12.0) - rspec-core (2.12.0) - rspec-expectations (2.12.0) - diff-lcs (~> 1.1.3) - rspec-mocks (2.12.0) - slop (3.3.3) + slop (~> 3.4) + rake (10.0.4) + rspec (2.13.0) + rspec-core (~> 2.13.0) + rspec-expectations (~> 2.13.0) + rspec-mocks (~> 2.13.0) + rspec-core (2.13.1) + rspec-expectations (2.13.0) + diff-lcs (>= 1.1.3, < 2.0) + rspec-mocks (2.13.0) + slop (3.4.4) PLATFORMS ruby diff --git a/Rakefile b/Rakefile index 353afb5..afba023 100644 --- a/Rakefile +++ b/Rakefile @@ -6,10 +6,11 @@ rescue LoadError end Bundler::GemHelper.install_tasks -require 'rspec/core/rake_task' -RSpec::Core::RakeTask.new do |t| - t.rspec_opts = ["--color", '--order rand'] -end +# this was causing a world of hurt +#require 'rspec/core/rake_task' +#RSpec::Core::RakeTask.new do |t| +# t.rspec_opts = ["--color", '--order rand'] +#end desc "Run all tests and documentation checks" task :qa => [:spec] diff --git a/spec/apfel_spec.rb b/spec/apfel_spec.rb index 77a753d..98b5ae2 100644 --- a/spec/apfel_spec.rb +++ b/spec/apfel_spec.rb @@ -10,9 +10,38 @@ Apfel.parse(valid_file) end + it 'returns a ParsedDotStrings object' do parsed_file.should be_a(Apfel::ParsedDotStrings) end + + it 'should have the correct keys' do + parsed_file.keys.should include 'key_number_one' + parsed_file.keys.should include 'key_number_two' + parsed_file.keys.should include 'key_number_three' + end + + it 'should have the correct values' do + parsed_file.values.should include 'value number one' + parsed_file.values.should include 'value number two' + parsed_file.values.should include 'value number three' + end + + describe 'should have the correct comments' do + it 'should have the correct comment for first' do + parsed_file.comments(with_keys: false).should include 'This is the first comment' + end + + it 'should have the correct comment for second' do + parsed_file.comments['key_number_two'].should == 'This is a multiline comment' + end + + + it 'should have the correct comment for third' do + parsed_file.comments(with_keys: false).should include 'This is comment number 3' + end + end + end context 'when given an invalid strings file' do From 709ad5cd3b416d570f7339842df8b80cc882544a Mon Sep 17 00:00:00 2001 From: Joshua Moody Date: Tue, 2 Apr 2013 23:24:40 +0200 Subject: [PATCH 03/10] refactored create_temp_file to take (required) encoding argument --- spec/apfel_spec.rb | 1 - spec/reader_spec.rb | 2 +- spec/spec_helper.rb | 11 ++++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/spec/apfel_spec.rb b/spec/apfel_spec.rb index 98b5ae2..6c81b31 100644 --- a/spec/apfel_spec.rb +++ b/spec/apfel_spec.rb @@ -10,7 +10,6 @@ Apfel.parse(valid_file) end - it 'returns a ParsedDotStrings object' do parsed_file.should be_a(Apfel::ParsedDotStrings) end diff --git a/spec/reader_spec.rb b/spec/reader_spec.rb index 123bff8..0d02c24 100644 --- a/spec/reader_spec.rb +++ b/spec/reader_spec.rb @@ -5,7 +5,7 @@ module Apfel describe Reader do describe '#read' do let(:temp_file) do - create_temp_file(<<-EOS + create_temp_file('ascii', <<-EOS This is a file with some lines. Roses are red, violets are blue. This text is really boring, diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index ed46613..40b1c43 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -1,13 +1,14 @@ require 'tempfile' require 'json' -def create_temp_file(string) - temp_file = Tempfile.new('temp') + +def create_temp_file(encoding, string) + temp_file = Tempfile.new('temp', encoding => encoding) temp_file << string temp_file.flush end -def valid_file - create_temp_file(<<-EOS +def valid_file(encoding='ascii') + create_temp_file(encoding, <<-EOS /* This is the first comment */ "key_number_one" = "value number one"; @@ -18,6 +19,6 @@ def valid_file "key_number_two" = "value number two"; /* This is comment number 3 */ "key_number_three" = " value number three "; - EOS + EOS ) end From 60329bd7ecd32999c072d025fce2f2b145575e39 Mon Sep 17 00:00:00 2001 From: Joshua Moody Date: Tue, 2 Apr 2013 23:45:03 +0200 Subject: [PATCH 04/10] cannot reproduce bug using Tempfile + encoding - will try with actual strings files --- spec/apfel_parse_utf8_spec.rb | 45 +++++++++++++++++++++++ spec/apfel_spec.rb | 9 ++--- spec/ascii.strings | 63 ++++++++++++++++++++++++++++++++ spec/spec_helper.rb | 2 +- spec/utf8.strings | 69 +++++++++++++++++++++++++++++++++++ 5 files changed, 182 insertions(+), 6 deletions(-) create mode 100644 spec/apfel_parse_utf8_spec.rb create mode 100644 spec/ascii.strings create mode 100644 spec/utf8.strings diff --git a/spec/apfel_parse_utf8_spec.rb b/spec/apfel_parse_utf8_spec.rb new file mode 100644 index 0000000..5c13cbc --- /dev/null +++ b/spec/apfel_parse_utf8_spec.rb @@ -0,0 +1,45 @@ +require 'spec_helper' +require 'apfel' +require 'apfel/parsed_dot_strings' + +describe Apfel do + describe '::parse_file' do + context 'when given a UTF8 DotStrings file'do + + let(:parsed_file) do + Apfel.parse(valid_file 'utf-8') + end + + it 'returns a ParsedDotStrings object' do + parsed_file.should be_a(Apfel::ParsedDotStrings) + end + + it 'should have the correct keys' do + parsed_file.keys.should include 'key_number_one' + parsed_file.keys.should include 'key_number_two' + parsed_file.keys.should include 'key_number_three' + end + + it 'should have the correct values' do + parsed_file.values.should include 'value number one' + parsed_file.values.should include 'value number two' + parsed_file.values.should include 'value number three' + end + + describe 'should have the correct comments' do + it 'should have the correct comment for first' do + parsed_file.comments(with_keys: false).should include 'This is the first comment' + end + + it 'should have the correct comment for second' do + parsed_file.comments['key_number_two'].should == 'This is a multiline comment' + end + + + it 'should have the correct comment for third' do + parsed_file.comments(with_keys: false).should include 'This is comment number 3' + end + end + end + end +end diff --git a/spec/apfel_spec.rb b/spec/apfel_spec.rb index 6c81b31..a96203b 100644 --- a/spec/apfel_spec.rb +++ b/spec/apfel_spec.rb @@ -4,7 +4,7 @@ describe Apfel do describe '::parse_file' do - context 'when given a valid DotStrings file'do + context 'when given an ASCII DotStrings file'do let(:parsed_file) do Apfel.parse(valid_file) @@ -40,17 +40,16 @@ parsed_file.comments(with_keys: false).should include 'This is comment number 3' end end - end context 'when given an invalid strings file' do context 'missing a semicolon' do let(:invalid_file_semicolon) do - create_temp_file( <<-EOS + create_temp_file('ascii', <<-EOS /* This is the first comment */ "key_number_one" = "value number one" - EOS + EOS ) end @@ -63,7 +62,7 @@ context 'not closed comment' do let(:invalid_file_comment) do - create_temp_file(<<-EOS + create_temp_file('ascii', <<-EOS /* This is the first comment "key_number_one" = "value number one"; diff --git a/spec/ascii.strings b/spec/ascii.strings new file mode 100644 index 0000000..6955d57 --- /dev/null +++ b/spec/ascii.strings @@ -0,0 +1,63 @@ +/* No comment provided by engineer. */ +"avoided social event" = "avoided social event"; + +/* No comment provided by engineer. */ +"binged" = "binged"; + +/* No comment provided by engineer. */ +"called a friend" = "called a friend"; + +/* No comment provided by engineer. */ +"called a mentor" = "called a mentor"; + +/* No comment provided by engineer. */ +"called my sponsor" = "called my sponsor"; + +/* No comment provided by engineer. */ +"chewed and spit" = "chewed and spit"; + +/* No comment provided by engineer. */ +"exercised" = "exercised"; + +/* No comment provided by engineer. */ +"expressed gratitude" = "expressed gratitude"; + +/* No comment provided by engineer. */ +"lied" = "lied"; + +/* No comment provided by engineer. */ +"meditated" = "meditated"; + +/* No comment provided by engineer. */ +"practiced mindfulness" = "practiced mindfulness"; + +/* No comment provided by engineer. */ +"procrastinated" = "procrastinated"; + +/* No comment provided by engineer. */ +"purged" = "purged"; + +/* No comment provided by engineer. */ +"restricted" = "restricted"; + +/* No comment provided by engineer. */ +"self-harmed" = "self-harmed"; + +/* No comment provided by engineer. */ +"skipped meals" = "skipped meals"; + +/* No comment provided by engineer. */ +"stole" = "stole"; + +/* No comment provided by engineer. */ +"took diuretics" = "took diuretics"; + +/* No comment provided by engineer. */ +"took laxatives" = "took laxatives"; + +/* No comment provided by engineer. */ +"used a coping skill" = "used a coping skill"; + +/* No comment provided by engineer. */ +"wrote in journal" = "wrote in journal"; + diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 40b1c43..c7ff331 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -2,7 +2,7 @@ require 'json' def create_temp_file(encoding, string) - temp_file = Tempfile.new('temp', encoding => encoding) + temp_file = Tempfile.new([encoding, 'temp']) temp_file << string temp_file.flush end diff --git a/spec/utf8.strings b/spec/utf8.strings new file mode 100644 index 0000000..d6b4f89 --- /dev/null +++ b/spec/utf8.strings @@ -0,0 +1,69 @@ +/* No comment provided by engineer. */ +"anger" = "anger"; + +/* No comment provided by engineer. */ +"anxiety" = "anxiety"; + +/* No comment provided by engineer. */ +"boredom" = "boredom"; + +/* No comment provided by engineer. */ +"confidence" = "confidence"; + +/* No comment provided by engineer. */ +"depression" = "depression"; + +/* No comment provided by engineer. */ +"disappointment" = "disappointment"; + +/* No comment provided by engineer. */ +"disgust" = "disgust"; + +/* No comment provided by engineer. */ +"fear" = "fear"; + +/* No comment provided by engineer. */ +"guilt" = "guilt"; + +/* No comment provided by engineer. */ +"happiness" = "happiness"; + +/* No comment provided by engineer. */ +"hope" = "hope"; + +/* No comment provided by engineer. */ +"hurt" = "hurt"; + +/* No comment provided by engineer. */ +"insecurity" = "insecurity"; + +/* No comment provided by engineer. */ +"irritability" = "irritability"; + +/* No comment provided by engineer. */ +"isolation" = "isolation"; + +/* No comment provided by engineer. */ +"joy" = "joy"; + +/* No comment provided by engineer. */ +"loneliness" = "loneliness"; + +/* No comment provided by engineer. */ +"pride" = "pride"; + +/* No comment provided by engineer. */ +"sadness" = "sadness"; + +/* No comment provided by engineer. */ +"shame" = "shame"; + +/* No comment provided by engineer. */ +"stress" = "stress"; + +/* No comment provided by engineer. */ +"tired" = "tired"; + +/* No comment provided by engineer. */ +"worry" = "worry"; + From 7a6349ef10b2eeaf3913173a834ba00fd0bc1a9c Mon Sep 17 00:00:00 2001 From: Joshua Moody Date: Wed, 3 Apr 2013 00:37:46 +0200 Subject: [PATCH 05/10] added some tests around encoding --- spec/apfel_parse_ascii_file_spec.rb | 50 +++++++++++++++++++++++ spec/apfel_parse_utf8_file_spec.rb | 51 ++++++++++++++++++++++++ spec/apfel_parse_utf8_spec.rb | 45 --------------------- spec/ascii.strings | 54 ------------------------- spec/spec_helper.rb | 2 + spec/utf8.strings | 61 ----------------------------- 6 files changed, 103 insertions(+), 160 deletions(-) create mode 100644 spec/apfel_parse_ascii_file_spec.rb create mode 100644 spec/apfel_parse_utf8_file_spec.rb delete mode 100644 spec/apfel_parse_utf8_spec.rb diff --git a/spec/apfel_parse_ascii_file_spec.rb b/spec/apfel_parse_ascii_file_spec.rb new file mode 100644 index 0000000..bf5bbb5 --- /dev/null +++ b/spec/apfel_parse_ascii_file_spec.rb @@ -0,0 +1,50 @@ +require 'spec_helper' +require 'apfel' +require 'apfel/parsed_dot_strings' + +describe Apfel do + describe '::parse_file' do + context 'when given a ASCII DotStrings file'do + + it 'the file should be ascii' do + res = `file -I ./spec/ascii.strings` + encoding = res.split(/=/).last.gsub!("\n",'') + encoding.should == 'us-ascii' + end + + let(:parsed_file) do + Apfel.parse('./spec/ascii.strings') + end + + it 'returns a ParsedDotStrings object' do + parsed_file.should be_a(Apfel::ParsedDotStrings) + end + + #it 'should have the correct keys' do + # parsed_file.keys.should include 'key_number_one' + # parsed_file.keys.should include 'key_number_two' + # parsed_file.keys.should include 'key_number_three' + #end + # + #it 'should have the correct values' do + # parsed_file.values.should include 'value number one' + # parsed_file.values.should include 'value number two' + # parsed_file.values.should include 'value number three' + #end + # + describe 'should have the correct comments' do + it 'should have the correct comment for avoided social event' do + parsed_file.comments['avoided social event'].should == 'No comment provided by engineer.' + end + + it 'should have the correct comment for binged' do + parsed_file.comments['binged'].should == 'No comment provided by engineer.' + end + + it 'should have the correct comment for called a friend' do + parsed_file.comments['called a friend'].should == 'No comment provided by engineer.' + end + end + end + end +end diff --git a/spec/apfel_parse_utf8_file_spec.rb b/spec/apfel_parse_utf8_file_spec.rb new file mode 100644 index 0000000..d672442 --- /dev/null +++ b/spec/apfel_parse_utf8_file_spec.rb @@ -0,0 +1,51 @@ +require 'spec_helper' +require 'apfel' +require 'apfel/parsed_dot_strings' + +describe Apfel do + describe '::parse_file' do + context 'when given a UTF8 DotStrings file'do + + it 'the file should be utf-8' do + res = `file -I ./spec/utf8.strings` + encoding = res.split(/=/).last.gsub!("\n",'') + encoding.should == 'utf-8' + end + + let(:parsed_file) do + Apfel.parse('./spec/utf8.strings') + end + + it 'returns a ParsedDotStrings object' do + parsed_file.should be_a(Apfel::ParsedDotStrings) + end + + #it 'should have the correct keys' do + # parsed_file.keys.should include 'key_number_one' + # parsed_file.keys.should include 'key_number_two' + # parsed_file.keys.should include 'key_number_three' + #end + # + #it 'should have the correct values' do + # parsed_file.values.should include 'value number one' + # parsed_file.values.should include 'value number two' + # parsed_file.values.should include 'value number three' + #end + # + describe 'should have the correct comments' do + it 'should have the correct comment for anger' do + parsed_file.comments['anger'].should == 'No comment provided by engineer.' + end + + it 'should have the correct comment for anxiety' do + parsed_file.comments['anxiety'].should == 'No comment provided by engineer.' + end + + it 'should have the correct comment for boredom' do + parsed_file.comments['boredom'].should == 'No comment provided by engineer.' + end + end + + end + end +end diff --git a/spec/apfel_parse_utf8_spec.rb b/spec/apfel_parse_utf8_spec.rb deleted file mode 100644 index 5c13cbc..0000000 --- a/spec/apfel_parse_utf8_spec.rb +++ /dev/null @@ -1,45 +0,0 @@ -require 'spec_helper' -require 'apfel' -require 'apfel/parsed_dot_strings' - -describe Apfel do - describe '::parse_file' do - context 'when given a UTF8 DotStrings file'do - - let(:parsed_file) do - Apfel.parse(valid_file 'utf-8') - end - - it 'returns a ParsedDotStrings object' do - parsed_file.should be_a(Apfel::ParsedDotStrings) - end - - it 'should have the correct keys' do - parsed_file.keys.should include 'key_number_one' - parsed_file.keys.should include 'key_number_two' - parsed_file.keys.should include 'key_number_three' - end - - it 'should have the correct values' do - parsed_file.values.should include 'value number one' - parsed_file.values.should include 'value number two' - parsed_file.values.should include 'value number three' - end - - describe 'should have the correct comments' do - it 'should have the correct comment for first' do - parsed_file.comments(with_keys: false).should include 'This is the first comment' - end - - it 'should have the correct comment for second' do - parsed_file.comments['key_number_two'].should == 'This is a multiline comment' - end - - - it 'should have the correct comment for third' do - parsed_file.comments(with_keys: false).should include 'This is comment number 3' - end - end - end - end -end diff --git a/spec/ascii.strings b/spec/ascii.strings index 6955d57..4508787 100644 --- a/spec/ascii.strings +++ b/spec/ascii.strings @@ -7,57 +7,3 @@ /* No comment provided by engineer. */ "called a friend" = "called a friend"; -/* No comment provided by engineer. */ -"called a mentor" = "called a mentor"; - -/* No comment provided by engineer. */ -"called my sponsor" = "called my sponsor"; - -/* No comment provided by engineer. */ -"chewed and spit" = "chewed and spit"; - -/* No comment provided by engineer. */ -"exercised" = "exercised"; - -/* No comment provided by engineer. */ -"expressed gratitude" = "expressed gratitude"; - -/* No comment provided by engineer. */ -"lied" = "lied"; - -/* No comment provided by engineer. */ -"meditated" = "meditated"; - -/* No comment provided by engineer. */ -"practiced mindfulness" = "practiced mindfulness"; - -/* No comment provided by engineer. */ -"procrastinated" = "procrastinated"; - -/* No comment provided by engineer. */ -"purged" = "purged"; - -/* No comment provided by engineer. */ -"restricted" = "restricted"; - -/* No comment provided by engineer. */ -"self-harmed" = "self-harmed"; - -/* No comment provided by engineer. */ -"skipped meals" = "skipped meals"; - -/* No comment provided by engineer. */ -"stole" = "stole"; - -/* No comment provided by engineer. */ -"took diuretics" = "took diuretics"; - -/* No comment provided by engineer. */ -"took laxatives" = "took laxatives"; - -/* No comment provided by engineer. */ -"used a coping skill" = "used a coping skill"; - -/* No comment provided by engineer. */ -"wrote in journal" = "wrote in journal"; - diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index c7ff331..6523a6c 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -1,6 +1,8 @@ require 'tempfile' require 'json' +# added encoding to see if could could reproduce the missing +# first comment in utf8 strings files def create_temp_file(encoding, string) temp_file = Tempfile.new([encoding, 'temp']) temp_file << string diff --git a/spec/utf8.strings b/spec/utf8.strings index d6b4f89..40632e1 100644 --- a/spec/utf8.strings +++ b/spec/utf8.strings @@ -6,64 +6,3 @@ /* No comment provided by engineer. */ "boredom" = "boredom"; - -/* No comment provided by engineer. */ -"confidence" = "confidence"; - -/* No comment provided by engineer. */ -"depression" = "depression"; - -/* No comment provided by engineer. */ -"disappointment" = "disappointment"; - -/* No comment provided by engineer. */ -"disgust" = "disgust"; - -/* No comment provided by engineer. */ -"fear" = "fear"; - -/* No comment provided by engineer. */ -"guilt" = "guilt"; - -/* No comment provided by engineer. */ -"happiness" = "happiness"; - -/* No comment provided by engineer. */ -"hope" = "hope"; - -/* No comment provided by engineer. */ -"hurt" = "hurt"; - -/* No comment provided by engineer. */ -"insecurity" = "insecurity"; - -/* No comment provided by engineer. */ -"irritability" = "irritability"; - -/* No comment provided by engineer. */ -"isolation" = "isolation"; - -/* No comment provided by engineer. */ -"joy" = "joy"; - -/* No comment provided by engineer. */ -"loneliness" = "loneliness"; - -/* No comment provided by engineer. */ -"pride" = "pride"; - -/* No comment provided by engineer. */ -"sadness" = "sadness"; - -/* No comment provided by engineer. */ -"shame" = "shame"; - -/* No comment provided by engineer. */ -"stress" = "stress"; - -/* No comment provided by engineer. */ -"tired" = "tired"; - -/* No comment provided by engineer. */ -"worry" = "worry"; - From 467bf0c129aac94229b9601971ded0738959aa9a Mon Sep 17 00:00:00 2001 From: Joshua Moody Date: Wed, 3 Apr 2013 02:13:24 +0200 Subject: [PATCH 06/10] solved the problem with BOM at the beginning of strings files --- lib/apfel.rb | 4 +++- lib/apfel/reader.rb | 9 ++++++--- spec/apfel_parse_utf8_file_spec.rb | 8 +++++--- spec/spec_helper.rb | 5 +++-- 4 files changed, 17 insertions(+), 9 deletions(-) diff --git a/lib/apfel.rb b/lib/apfel.rb index 46c2330..55295d1 100644 --- a/lib/apfel.rb +++ b/lib/apfel.rb @@ -7,10 +7,12 @@ module Apfel # strings object def self.parse(file) file = read(file) + # confirmed that read does remove the first comment in utf DotStringsParser.new(file).parse_file end def self.read(file) - Reader.read(file) + # confirmed that read does remove the first comment in utf + Reader.read(file) end end diff --git a/lib/apfel/reader.rb b/lib/apfel/reader.rb index 0e26838..bc792eb 100644 --- a/lib/apfel/reader.rb +++ b/lib/apfel/reader.rb @@ -4,9 +4,12 @@ class Reader # Reads in a file and returns an array consisting of each line of input # cleaned of new line characters def self.read(file) - File.open(file, "r") do |f| - content_array=[] - content = f.read + File.open(file, 'r') do |f| + content_array=[] + # http://stackoverflow.com/questions/5011504/is-there-a-way-to-remove-the-bom-from-a-utf-8-encoded-file + # problem is the BOM that can be found at char 0 in strings files + content = f.read.force_encoding('UTF-8') + content.sub!("\xEF\xBB\xBF".force_encoding("UTF-8"), '') content.each_line do |line| line.gsub!("\n","") content_array.push(line) diff --git a/spec/apfel_parse_utf8_file_spec.rb b/spec/apfel_parse_utf8_file_spec.rb index d672442..931b646 100644 --- a/spec/apfel_parse_utf8_file_spec.rb +++ b/spec/apfel_parse_utf8_file_spec.rb @@ -7,9 +7,11 @@ context 'when given a UTF8 DotStrings file'do it 'the file should be utf-8' do - res = `file -I ./spec/utf8.strings` - encoding = res.split(/=/).last.gsub!("\n",'') - encoding.should == 'utf-8' + File.open('./spec/utf8.strings', 'r') do |f| + f.external_encoding.name.should == 'UTF-8' + content = f.read + content.encoding.name.should == 'UTF-8' + end end let(:parsed_file) do diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 6523a6c..5aee9b1 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -1,8 +1,9 @@ require 'tempfile' require 'json' -# added encoding to see if could could reproduce the missing -# first comment in utf8 strings files +# added encoding to see if could could reproduce the missing first comment in +# utf8 strings files - did not reproduce the problem +# SOLVED: it was the BOM at char 0 in the strings file def create_temp_file(encoding, string) temp_file = Tempfile.new([encoding, 'temp']) temp_file << string From 21e8504f3881f96324bc42bd9cb92b4e1f0f2bf9 Mon Sep 17 00:00:00 2001 From: Gabe da Silveira Date: Fri, 5 Apr 2013 09:30:03 +0100 Subject: [PATCH 07/10] That's what allow rake spec to work. --- Rakefile | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Rakefile b/Rakefile index afba023..353afb5 100644 --- a/Rakefile +++ b/Rakefile @@ -6,11 +6,10 @@ rescue LoadError end Bundler::GemHelper.install_tasks -# this was causing a world of hurt -#require 'rspec/core/rake_task' -#RSpec::Core::RakeTask.new do |t| -# t.rspec_opts = ["--color", '--order rand'] -#end +require 'rspec/core/rake_task' +RSpec::Core::RakeTask.new do |t| + t.rspec_opts = ["--color", '--order rand'] +end desc "Run all tests and documentation checks" task :qa => [:spec] From 5afc5de9695c560d56970bd3f3d8c9d20546ec5b Mon Sep 17 00:00:00 2001 From: Gabe da Silveira Date: Fri, 5 Apr 2013 09:38:16 +0100 Subject: [PATCH 08/10] Gemfile.lock removed (previously only ignored) As per best practice in lib development. --- Gemfile.lock | 34 ---------------------------------- 1 file changed, 34 deletions(-) delete mode 100644 Gemfile.lock diff --git a/Gemfile.lock b/Gemfile.lock deleted file mode 100644 index b5c344d..0000000 --- a/Gemfile.lock +++ /dev/null @@ -1,34 +0,0 @@ -PATH - remote: . - specs: - apfel (0.0.4) - -GEM - remote: https://rubygems.org/ - specs: - coderay (1.0.9) - diff-lcs (1.2.2) - method_source (0.8.1) - pry (0.9.12) - coderay (~> 1.0.5) - method_source (~> 0.8) - slop (~> 3.4) - rake (10.0.4) - rspec (2.13.0) - rspec-core (~> 2.13.0) - rspec-expectations (~> 2.13.0) - rspec-mocks (~> 2.13.0) - rspec-core (2.13.1) - rspec-expectations (2.13.0) - diff-lcs (>= 1.1.3, < 2.0) - rspec-mocks (2.13.0) - slop (3.4.4) - -PLATFORMS - ruby - -DEPENDENCIES - apfel! - pry - rake - rspec From 86f37505866d45b0be1383feb8d8dc5512fba85c Mon Sep 17 00:00:00 2001 From: Gabe da Silveira Date: Fri, 5 Apr 2013 10:06:02 +0100 Subject: [PATCH 09/10] Move spec sample files into subdirectory --- spec/apfel_parse_ascii_file_spec.rb | 4 ++-- spec/apfel_parse_utf8_file_spec.rb | 4 ++-- spec/{ => files}/ascii.strings | 0 spec/{ => files}/utf8.strings | 0 4 files changed, 4 insertions(+), 4 deletions(-) rename spec/{ => files}/ascii.strings (100%) rename spec/{ => files}/utf8.strings (100%) diff --git a/spec/apfel_parse_ascii_file_spec.rb b/spec/apfel_parse_ascii_file_spec.rb index bf5bbb5..c0f4411 100644 --- a/spec/apfel_parse_ascii_file_spec.rb +++ b/spec/apfel_parse_ascii_file_spec.rb @@ -7,13 +7,13 @@ context 'when given a ASCII DotStrings file'do it 'the file should be ascii' do - res = `file -I ./spec/ascii.strings` + res = `file -I ./spec/files/ascii.strings` encoding = res.split(/=/).last.gsub!("\n",'') encoding.should == 'us-ascii' end let(:parsed_file) do - Apfel.parse('./spec/ascii.strings') + Apfel.parse('./spec/files/ascii.strings') end it 'returns a ParsedDotStrings object' do diff --git a/spec/apfel_parse_utf8_file_spec.rb b/spec/apfel_parse_utf8_file_spec.rb index 931b646..064476b 100644 --- a/spec/apfel_parse_utf8_file_spec.rb +++ b/spec/apfel_parse_utf8_file_spec.rb @@ -7,7 +7,7 @@ context 'when given a UTF8 DotStrings file'do it 'the file should be utf-8' do - File.open('./spec/utf8.strings', 'r') do |f| + File.open('./spec/files/utf8.strings', 'r') do |f| f.external_encoding.name.should == 'UTF-8' content = f.read content.encoding.name.should == 'UTF-8' @@ -15,7 +15,7 @@ end let(:parsed_file) do - Apfel.parse('./spec/utf8.strings') + Apfel.parse('./spec/files/utf8.strings') end it 'returns a ParsedDotStrings object' do diff --git a/spec/ascii.strings b/spec/files/ascii.strings similarity index 100% rename from spec/ascii.strings rename to spec/files/ascii.strings diff --git a/spec/utf8.strings b/spec/files/utf8.strings similarity index 100% rename from spec/utf8.strings rename to spec/files/utf8.strings From 653366a382ca9505017a33e804af91455bd2fe48 Mon Sep 17 00:00:00 2001 From: Gabe da Silveira Date: Fri, 5 Apr 2013 10:53:57 +0100 Subject: [PATCH 10/10] Handle basic escape sequences in values This is more or less a hack based on information gleaned from http://developer.apple.com/library/mac/#documentation/cocoa/conceptual/LoadingResources/Strings/Strings.html It only escapes NLs, CRs, tabs, double quotes and backslashes. Unicode escapes are not handled because I don't need them right now. In the case of unknown characters it does not remove the backslash which I did to be minimally invasive, however I did not confirm the actual behavior on Apple's side. --- lib/apfel/line.rb | 37 ++++++++++++++++++++++++++++++++++++- spec/apfel_escapes_spec.rb | 32 ++++++++++++++++++++++++++++++++ spec/files/escapes.strings | 5 +++++ 3 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 spec/apfel_escapes_spec.rb create mode 100644 spec/files/escapes.strings diff --git a/lib/apfel/line.rb b/lib/apfel/line.rb index 500cac5..f2537a6 100644 --- a/lib/apfel/line.rb +++ b/lib/apfel/line.rb @@ -61,12 +61,47 @@ def key def value if key_value_pair? - cleaned_content.partition(/"\s*=\s*"/)[2].gsub!(/(^"|"$)/, "") + unescape_value cleaned_content.partition(/"\s*=\s*"/)[2].gsub!(/(^"|"$)/, "") end end def is_comment? whole_comment? || open_comment? || close_comment? || in_comment end + + private + + # http://developer.apple.com/library/mac/#documentation/cocoa/conceptual/LoadingResources/Strings/Strings.html + def unescape_value(string) + state = :normal + out = '' + string.each_char do |c| + case state + when :normal + if c == '\\' + state = :escape + else + out += c + end + when :escape + state = :normal + case c + when '\\' + out += '\\' + when '"' + out += '"' + when 'r' + out += "\r" + when 'n' + out += "\n" + when 't' + out += "\t" + else + out += '\\' + c # Do nothing, however in the future handling unicode escapes could be good + end + end + end + out + end end end diff --git a/spec/apfel_escapes_spec.rb b/spec/apfel_escapes_spec.rb new file mode 100644 index 0000000..d1ac7cf --- /dev/null +++ b/spec/apfel_escapes_spec.rb @@ -0,0 +1,32 @@ +require 'spec_helper' +require 'apfel' + +describe Apfel do + describe '::parse_file' do + context 'when given DotStrings file with escapes'do + let(:parsed_file_hash) do + Apfel.parse('./spec/files/escapes.strings').to_hash(:with_comments => false) + end + + it 'should parse nl' do + parsed_file_hash['multiline'].should eq "line 1\nline 2" + end + + it 'should parse cr' do + parsed_file_hash['mac'].should eq "before cr\rafter cr" + end + + it 'should parse tabs' do + parsed_file_hash['tabs'].should eq "two spaces \t equals tab" + end + + it 'should parse double quotes' do + parsed_file_hash['dq'].should eq "\"someone said this\"" + end + + it 'should parse backslashes' do + parsed_file_hash['backslash'].should eq "\\not a forward slash" + end + end + end +end diff --git a/spec/files/escapes.strings b/spec/files/escapes.strings new file mode 100644 index 0000000..f6c7d71 --- /dev/null +++ b/spec/files/escapes.strings @@ -0,0 +1,5 @@ +"multiline" = "line 1\nline 2"; +"mac" = "before cr\rafter cr"; +"tabs" = "two spaces \t equals tab"; +"dq" = "\"someone said this\""; +"backslash" = "\\not a forward slash";