-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscraper.rb
More file actions
executable file
·71 lines (58 loc) · 1.97 KB
/
scraper.rb
File metadata and controls
executable file
·71 lines (58 loc) · 1.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/env ruby
# frozen_string_literal: true
Bundler.require
$LOAD_PATH << "./lib"
require "technology_one_scraper"
# Main Scraper class
class Scraper
AUTHORITIES = TechnologyOneScraper::AUTHORITIES
def self.scrape(authorities)
exceptions = {}
authorities.each do |authority_label|
puts "\nCollecting feed data for #{authority_label}..."
begin
TechnologyOneScraper.scrape(authority_label) do |record|
record["authority_label"] = authority_label.to_s
TechnologyOneScraper.log(record)
ScraperWiki.save_sqlite(%w[authority_label council_reference], record)
end
rescue StandardError => e
warn "#{authority_label}: ERROR: #{e}"
warn e.backtrace
exceptions[authority_label] = e
end
end
exceptions
end
def self.selected_authorities
# ScraperUtils::AuthorityUtils.selected_authorities(AUTHORITIES.keys)
AUTHORITIES.keys
end
def self.run(authorities)
puts "Scraping authorities: #{authorities.join(', ')}"
exceptions = scrape(authorities)
unless exceptions.empty?
puts "\n***************************************************"
puts "Now retrying authorities which earlier had failures"
puts "***************************************************"
exceptions = scrape(exceptions.keys)
end
unless exceptions.empty?
raise "There were errors with the following authorities: #{exceptions.keys}. " \
"See earlier output for details"
end
end
end
if __FILE__ == $PROGRAM_NAME
# Default to list of authorities we can't or won't fix in code, explain why
# some: url-for-issue Summary Reason
# councils: url-for-issue Summary Reason
# if ENV['MORPH_EXPECT_BAD'].nil?
# default_expect_bad = {
# }
# puts 'Default EXPECT_BAD:', default_expect_bad.to_yaml if default_expect_bad.any?
#
# ENV["MORPH_EXPECT_BAD"] = default_expect_bad.keys.join(',')
# end
Scraper.run(Scraper.selected_authorities)
end