diff --git a/lib/ocrsdk/image.rb b/lib/ocrsdk/image.rb index ad45c7f..649ebd0 100644 --- a/lib/ocrsdk/image.rb +++ b/lib/ocrsdk/image.rb @@ -8,20 +8,21 @@ def initialize(image_path) @image_path = image_path end - def as_text(languages) - xml_string = api_process_image @image_path, languages, :txt, :text_extraction - + def as_languages_and_formats(languages, formats, profile, options={}) + xml_string = api_process_image @image_path, languages, formats, profile,options OCRSDK::Promise.from_response xml_string end + def as_text(languages, profile=:text_extraction) + as_languages_and_formats(languages, :txt, profile) + end + def as_text_sync(languages, wait_interval=OCRSDK.config.default_poll_time) as_text(languages).wait(wait_interval).result.force_encoding('utf-8') end def as_xml(languages, profile=:text_extraction) - xml_string = api_process_image @image_path, languages, :xml, profile - - OCRSDK::Promise.from_response xml_string + as_languages_and_formats(languages, :xml, profile) end def as_xml_sync(languages, wait_interval=OCRSDK.config.default_poll_time, profile=:text_extraction) @@ -48,22 +49,23 @@ def as_pdf_sync(languages, out_path=nil, wait_interval=OCRSDK.config.default_pol # TODO handle 4xx and 5xx responses and errors, file not found error # http://ocrsdk.com/documentation/apireference/processImage/ - def api_process_image(image_path, languages, format=:txt, profile=:document_conversion) + def api_process_image(image_path, languages, format=:txt, profile=:document_conversion, options={}) raise OCRSDK::UnsupportedInputFormat unless supported_input_format? File.extname(image_path)[1..-1] raise OCRSDK::UnsupportedOutputFormat unless supported_output_format? format raise OCRSDK::UnsupportedProfile unless supported_profile? (profile) - params = URI.encode_www_form( + params = URI.encode_www_form({ language: languages_to_s(languages).join(','), exportFormat: format_to_s(format), - profile: profile_to_s(profile)) + profile: profile_to_s(profile)}.merge(options)) uri = URI.join @url, '/processImage', "?#{params}" retryable tries: OCRSDK.config.number_or_retries, on: OCRSDK::NetworkError, sleep: OCRSDK.config.retry_wait_time do begin - RestClient.post uri.to_s, upload: { file: File.new(image_path, 'rb') } - rescue RestClient::ExceptionWithResponse - raise OCRSDK::NetworkError + RestClient::Request.execute(method: :post, url: uri.to_s, payload: {upload: { file: File.new(image_path, 'rb')}}, + timeout: 120000, open_timeout: 120000, headers: {}) + rescue RestClient::ExceptionWithResponse => e + raise OCRSDK::NetworkError.new(":#{e.message}:#{e.response}:#{e.http_body}:#{e.http_code}") end end end diff --git a/lib/ocrsdk/mock.rb b/lib/ocrsdk/mock.rb index 1c51c96..e256d1d 100644 --- a/lib/ocrsdk/mock.rb +++ b/lib/ocrsdk/mock.rb @@ -9,6 +9,8 @@ def success stub_process_image response(:process_image, :success) stub_get_task_status response(:get_task_status, :completed) stub_result response(:result, :simple) + stub_result response(:result2, :simple2),'http://cloud.ocrsdk.com/result_url2' + stub_result response(:result3, :simple3),'http://cloud.ocrsdk.com/result_url3' end def in_progress @@ -30,8 +32,8 @@ def stub_get_task_status(response) WebMock::API.stub_request(:get, /.*:.*@cloud.ocrsdk.com\/getTaskStatus\?taskId=.*/).to_return(body: response) end - def stub_result(response) - WebMock::API.stub_request(:get, 'http://cloud.ocrsdk.com/result_url').to_return(body: response) + def stub_result(response, url = "http://cloud.ocrsdk.com/result_url") + WebMock::API.stub_request(:get, url).to_return(body: response) end def response(method, status) @@ -44,4 +46,4 @@ def response(method, status) File.new(path).read end end -end \ No newline at end of file +end diff --git a/lib/ocrsdk/promise.rb b/lib/ocrsdk/promise.rb index 44ffdb3..e78fc90 100644 --- a/lib/ocrsdk/promise.rb +++ b/lib/ocrsdk/promise.rb @@ -1,7 +1,7 @@ class OCRSDK::Promise < OCRSDK::AbstractEntity include OCRSDK::Verifiers::Status - attr_reader :task_id, :status, :result_url, :estimate_processing_time + attr_reader :task_id, :status, :result_urls, :estimate_processing_time def self.from_response(xml_string) OCRSDK::Promise.new(nil).parse_response xml_string @@ -26,7 +26,7 @@ def parse_response(xml_string) end @status = status_to_sym task['status'] - @result_url = task['resultUrl'] + @result_urls = [task['resultUrl'], task['resultUrl2'], task['resultUrl3']] @registration_time = DateTime.parse task['registrationTime'] @estimate_processing_time = task['estimatedProcessingTime'].to_i @@ -55,9 +55,21 @@ def processing? end def result(retry_sleep=OCRSDK.config.retry_wait_time) + result_with_number(0, retry_sleep) + end + + def result2(retry_sleep=OCRSDK.config.retry_wait_time) + result_with_number(1, retry_sleep) + end + + def result3(retry_sleep=OCRSDK.config.retry_wait_time) + result_with_number(2, retry_sleep) + end + + def result_with_number(number, retry_sleep) raise OCRSDK::ProcessingFailed if failed? retryable tries: OCRSDK.config.number_or_retries, on: OCRSDK::NetworkError, sleep: retry_sleep do - api_get_result + api_get_result(number) end end @@ -82,8 +94,8 @@ def api_update_status raise OCRSDK::NetworkError end - def api_get_result - RestClient.get @result_url.to_s + def api_get_result(result_number=0) + RestClient.get @result_urls[result_number].to_s rescue RestClient::ExceptionWithResponse raise OCRSDK::NetworkError end diff --git a/lib/ocrsdk/verifiers/format.rb b/lib/ocrsdk/verifiers/format.rb index c23f51c..de70661 100644 --- a/lib/ocrsdk/verifiers/format.rb +++ b/lib/ocrsdk/verifiers/format.rb @@ -8,7 +8,9 @@ module OCRSDK::Verifiers::Format :pdf_text_and_images, :xml, :alto].freeze def format_to_s(format) - format.to_s.camelize(:lower) + Array(format).map do |f| + f.to_s.camelize(:lower) + end.join(",") end def supported_input_format?(format) @@ -18,9 +20,11 @@ def supported_input_format?(format) end def supported_output_format?(format) - format = format.underscore.to_sym if format.kind_of? String + formats = Array(format).map do |f| + f.kind_of?(String) ? f.underscore.to_sym : f + end - OUTPUT_FORMATS.include? format + formats.all? {|format| OUTPUT_FORMATS.include? format } end end diff --git a/mocks/get_task_status/completed.xml b/mocks/get_task_status/completed.xml index aead51c..6f08811 100644 --- a/mocks/get_task_status/completed.xml +++ b/mocks/get_task_status/completed.xml @@ -9,6 +9,8 @@ credits="10" estimatedProcessingTime="3600" resultUrl="http://cloud.ocrsdk.com/result_url" + resultUrl2="http://cloud.ocrsdk.com/result_url2" + resultUrl3="http://cloud.ocrsdk.com/result_url3" description="My first OCR task"/> diff --git a/mocks/result2/simple2.xml b/mocks/result2/simple2.xml new file mode 100644 index 0000000..c46526f --- /dev/null +++ b/mocks/result2/simple2.xml @@ -0,0 +1 @@ +meow2 \ No newline at end of file diff --git a/mocks/result3/simple3.xml b/mocks/result3/simple3.xml new file mode 100644 index 0000000..be93dee --- /dev/null +++ b/mocks/result3/simple3.xml @@ -0,0 +1 @@ +meow3 \ No newline at end of file diff --git a/ocrsdk.gemspec b/ocrsdk.gemspec index 142475e..351b5dc 100644 --- a/ocrsdk.gemspec +++ b/ocrsdk.gemspec @@ -16,7 +16,7 @@ Gem::Specification.new do |s| s.add_runtime_dependency "nokogiri" s.add_runtime_dependency "pdf-reader" s.add_runtime_dependency "activesupport" - s.add_runtime_dependency "retryable" + s.add_runtime_dependency "retryable", "~> 1.3" s.add_development_dependency "rake", ">= 0.8" s.add_development_dependency "rspec", "~> 2" diff --git a/spec/ocrsdk/image_spec.rb b/spec/ocrsdk/image_spec.rb index 3d3fa20..f1bbce3 100644 --- a/spec/ocrsdk/image_spec.rb +++ b/spec/ocrsdk/image_spec.rb @@ -13,6 +13,12 @@ subject { OCRSDK::Image.new TestFiles.russian_jpg_path } + describe ".as_languages_and_formats" do + it "should call api and return Promise" do + subject.as_languages_and_formats([:russian],[:txt, :xml], :text_extraction).should be_kind_of(OCRSDK::Promise) + end + end + describe ".as_text" do it "should call api and return Promise" do subject.as_text([:russian]).should be_kind_of(OCRSDK::Promise) diff --git a/spec/ocrsdk/promise_spec.rb b/spec/ocrsdk/promise_spec.rb index ed0eada..3d07494 100644 --- a/spec/ocrsdk/promise_spec.rb +++ b/spec/ocrsdk/promise_spec.rb @@ -15,7 +15,7 @@ its(:task_id) { should == '22345200-abe8-4f60-90c8-0d43c5f6c0f6' } its(:status) { should == :submitted } - its(:result_url) { should == 'http://cloud.ocrsdk.com/result_url' } + its(:result_urls) { should == ['http://cloud.ocrsdk.com/result_url',nil,nil] } its(:estimate_processing_time) { should == 3600 } its(:estimate_completion) { should == DateTime.parse("2001-01-01T13:18:22Z") + 3600.seconds } end @@ -46,7 +46,7 @@ its(:task_id) { should == '22345200-abe8-4f60-90c8-0d43c5f6c0f6' } its(:status) { should == :submitted } - its(:result_url) { should == 'http://cloud.ocrsdk.com/result_url' } + its(:result_urls) { should == ['http://cloud.ocrsdk.com/result_url',nil,nil] } its(:estimate_processing_time) { should == 3600 } its(:estimate_completion) { should == DateTime.parse("2001-01-01T13:18:22Z") + 3600.seconds } end @@ -98,6 +98,8 @@ subject { OCRSDK::Promise.from_response OCRSDK::Mock.response(:get_task_status, :completed) } its(:result) { should == 'meow' } + its(:result2) { should == 'meow2' } + its(:result3) { should == 'meow3' } it "should raise NetworkError in case getting file fails, but retry 3 times before failing" do RestClient.stub(:get) {|url| raise RestClient::ExceptionWithResponse }