Skip to content
This repository was archived by the owner on Mar 31, 2023. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 14 additions & 12 deletions lib/ocrsdk/image.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,21 @@ def initialize(image_path)
@image_path = image_path
end

def as_text(languages)
xml_string = api_process_image @image_path, languages, :txt, :text_extraction

def as_languages_and_formats(languages, formats, profile, options={})
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what kind of options you're going to pass here?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mostly pass "readBarcodes" => false

xml_string = api_process_image @image_path, languages, formats, profile,options
OCRSDK::Promise.from_response xml_string
end

def as_text(languages, profile=:text_extraction)
as_languages_and_formats(languages, :txt, profile)
end

def as_text_sync(languages, wait_interval=OCRSDK.config.default_poll_time)
as_text(languages).wait(wait_interval).result.force_encoding('utf-8')
end

def as_xml(languages, profile=:text_extraction)
xml_string = api_process_image @image_path, languages, :xml, profile

OCRSDK::Promise.from_response xml_string
as_languages_and_formats(languages, :xml, profile)
end

def as_xml_sync(languages, wait_interval=OCRSDK.config.default_poll_time, profile=:text_extraction)
Expand All @@ -48,22 +49,23 @@ def as_pdf_sync(languages, out_path=nil, wait_interval=OCRSDK.config.default_pol

# TODO handle 4xx and 5xx responses and errors, file not found error
# http://ocrsdk.com/documentation/apireference/processImage/
def api_process_image(image_path, languages, format=:txt, profile=:document_conversion)
def api_process_image(image_path, languages, format=:txt, profile=:document_conversion, options={})
raise OCRSDK::UnsupportedInputFormat unless supported_input_format? File.extname(image_path)[1..-1]
raise OCRSDK::UnsupportedOutputFormat unless supported_output_format? format
raise OCRSDK::UnsupportedProfile unless supported_profile? (profile)

params = URI.encode_www_form(
params = URI.encode_www_form({
language: languages_to_s(languages).join(','),
exportFormat: format_to_s(format),
profile: profile_to_s(profile))
profile: profile_to_s(profile)}.merge(options))
uri = URI.join @url, '/processImage', "?#{params}"

retryable tries: OCRSDK.config.number_or_retries, on: OCRSDK::NetworkError, sleep: OCRSDK.config.retry_wait_time do
begin
RestClient.post uri.to_s, upload: { file: File.new(image_path, 'rb') }
rescue RestClient::ExceptionWithResponse
raise OCRSDK::NetworkError
RestClient::Request.execute(method: :post, url: uri.to_s, payload: {upload: { file: File.new(image_path, 'rb')}},
timeout: 120000, open_timeout: 120000, headers: {})
rescue RestClient::ExceptionWithResponse => e
raise OCRSDK::NetworkError.new(":#{e.message}:#{e.response}:#{e.http_body}:#{e.http_code}")
end
end
end
Expand Down
8 changes: 5 additions & 3 deletions lib/ocrsdk/mock.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ def success
stub_process_image response(:process_image, :success)
stub_get_task_status response(:get_task_status, :completed)
stub_result response(:result, :simple)
stub_result response(:result2, :simple2),'http://cloud.ocrsdk.com/result_url2'
stub_result response(:result3, :simple3),'http://cloud.ocrsdk.com/result_url3'
end

def in_progress
Expand All @@ -30,8 +32,8 @@ def stub_get_task_status(response)
WebMock::API.stub_request(:get, /.*:.*@cloud.ocrsdk.com\/getTaskStatus\?taskId=.*/).to_return(body: response)
end

def stub_result(response)
WebMock::API.stub_request(:get, 'http://cloud.ocrsdk.com/result_url').to_return(body: response)
def stub_result(response, url = "http://cloud.ocrsdk.com/result_url")
WebMock::API.stub_request(:get, url).to_return(body: response)
end

def response(method, status)
Expand All @@ -44,4 +46,4 @@ def response(method, status)
File.new(path).read
end
end
end
end
22 changes: 17 additions & 5 deletions lib/ocrsdk/promise.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
class OCRSDK::Promise < OCRSDK::AbstractEntity
include OCRSDK::Verifiers::Status

attr_reader :task_id, :status, :result_url, :estimate_processing_time
attr_reader :task_id, :status, :result_urls, :estimate_processing_time

def self.from_response(xml_string)
OCRSDK::Promise.new(nil).parse_response xml_string
Expand All @@ -26,7 +26,7 @@ def parse_response(xml_string)
end

@status = status_to_sym task['status']
@result_url = task['resultUrl']
@result_urls = [task['resultUrl'], task['resultUrl2'], task['resultUrl3']]
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so now it's always going to return 3 results? this seems like a breaking change to me

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, not at all. The result method still returns the first result so as not to break the api. There are result2 and result3 methods to match the terms in the API. I'm not a huge fan of them, but it matches the ocrsdk api and doesn't break anything.

@registration_time = DateTime.parse task['registrationTime']
@estimate_processing_time = task['estimatedProcessingTime'].to_i

Expand Down Expand Up @@ -55,9 +55,21 @@ def processing?
end

def result(retry_sleep=OCRSDK.config.retry_wait_time)
result_with_number(0, retry_sleep)
end

def result2(retry_sleep=OCRSDK.config.retry_wait_time)
result_with_number(1, retry_sleep)
end

def result3(retry_sleep=OCRSDK.config.retry_wait_time)
result_with_number(2, retry_sleep)
end

def result_with_number(number, retry_sleep)
raise OCRSDK::ProcessingFailed if failed?
retryable tries: OCRSDK.config.number_or_retries, on: OCRSDK::NetworkError, sleep: retry_sleep do
api_get_result
api_get_result(number)
end
end

Expand All @@ -82,8 +94,8 @@ def api_update_status
raise OCRSDK::NetworkError
end

def api_get_result
RestClient.get @result_url.to_s
def api_get_result(result_number=0)
RestClient.get @result_urls[result_number].to_s
rescue RestClient::ExceptionWithResponse
raise OCRSDK::NetworkError
end
Expand Down
10 changes: 7 additions & 3 deletions lib/ocrsdk/verifiers/format.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ module OCRSDK::Verifiers::Format
:pdf_text_and_images, :xml, :alto].freeze

def format_to_s(format)
format.to_s.camelize(:lower)
Array(format).map do |f|
f.to_s.camelize(:lower)
end.join(",")
end

def supported_input_format?(format)
Expand All @@ -18,9 +20,11 @@ def supported_input_format?(format)
end

def supported_output_format?(format)
format = format.underscore.to_sym if format.kind_of? String
formats = Array(format).map do |f|
f.kind_of?(String) ? f.underscore.to_sym : f
end

OUTPUT_FORMATS.include? format
formats.all? {|format| OUTPUT_FORMATS.include? format }
end

end
2 changes: 2 additions & 0 deletions mocks/get_task_status/completed.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
credits="10"
estimatedProcessingTime="3600"
resultUrl="http://cloud.ocrsdk.com/result_url"
resultUrl2="http://cloud.ocrsdk.com/result_url2"
resultUrl3="http://cloud.ocrsdk.com/result_url3"
description="My first OCR task"/>
<task/>
</response>
1 change: 1 addition & 0 deletions mocks/result2/simple2.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
meow2
1 change: 1 addition & 0 deletions mocks/result3/simple3.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
meow3
2 changes: 1 addition & 1 deletion ocrsdk.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Gem::Specification.new do |s|
s.add_runtime_dependency "nokogiri"
s.add_runtime_dependency "pdf-reader"
s.add_runtime_dependency "activesupport"
s.add_runtime_dependency "retryable"
s.add_runtime_dependency "retryable", "~> 1.3"

s.add_development_dependency "rake", ">= 0.8"
s.add_development_dependency "rspec", "~> 2"
Expand Down
6 changes: 6 additions & 0 deletions spec/ocrsdk/image_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@

subject { OCRSDK::Image.new TestFiles.russian_jpg_path }

describe ".as_languages_and_formats" do
it "should call api and return Promise" do
subject.as_languages_and_formats([:russian],[:txt, :xml], :text_extraction).should be_kind_of(OCRSDK::Promise)
end
end

describe ".as_text" do
it "should call api and return Promise" do
subject.as_text([:russian]).should be_kind_of(OCRSDK::Promise)
Expand Down
6 changes: 4 additions & 2 deletions spec/ocrsdk/promise_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

its(:task_id) { should == '22345200-abe8-4f60-90c8-0d43c5f6c0f6' }
its(:status) { should == :submitted }
its(:result_url) { should == 'http://cloud.ocrsdk.com/result_url' }
its(:result_urls) { should == ['http://cloud.ocrsdk.com/result_url',nil,nil] }
its(:estimate_processing_time) { should == 3600 }
its(:estimate_completion) { should == DateTime.parse("2001-01-01T13:18:22Z") + 3600.seconds }
end
Expand Down Expand Up @@ -46,7 +46,7 @@

its(:task_id) { should == '22345200-abe8-4f60-90c8-0d43c5f6c0f6' }
its(:status) { should == :submitted }
its(:result_url) { should == 'http://cloud.ocrsdk.com/result_url' }
its(:result_urls) { should == ['http://cloud.ocrsdk.com/result_url',nil,nil] }
its(:estimate_processing_time) { should == 3600 }
its(:estimate_completion) { should == DateTime.parse("2001-01-01T13:18:22Z") + 3600.seconds }
end
Expand Down Expand Up @@ -98,6 +98,8 @@
subject { OCRSDK::Promise.from_response OCRSDK::Mock.response(:get_task_status, :completed) }

its(:result) { should == 'meow' }
its(:result2) { should == 'meow2' }
its(:result3) { should == 'meow3' }

it "should raise NetworkError in case getting file fails, but retry 3 times before failing" do
RestClient.stub(:get) {|url| raise RestClient::ExceptionWithResponse }
Expand Down