This repository was archived by the owner on Oct 10, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfetch_data_from_old_feed.rb
More file actions
96 lines (71 loc) · 2.25 KB
/
fetch_data_from_old_feed.rb
File metadata and controls
96 lines (71 loc) · 2.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
require 'nokogiri'
require 'open-uri'
require 'aws-sdk-s3'
require 'uri'
require 'pp'
require 'json'
require 'date'
config = JSON.parse(File.read("config.json"),:symbolize_names => true)
puts "Running with config:"
pp config
mapping = config[:rssXMLXPathMappings]
# Fetch and parse HTML document
doc = Nokogiri::XML(open(config[:sourceRSSFeed]))
items = doc.xpath('//item' )
sermons = items.map do |item|
mapping.map {|key,xpath| [key, item.xpath(xpath).text]}.to_h
end
keys_to_int = config[:keysToInteger]
# Convert some keys to ints
sermons = sermons.map do |sermon|
keys_to_int.each do |key|
sermon[key.to_sym] = sermon[key.to_sym].to_i
end
sermon
end
keys_to_date_time_iso8601 = config[:keysToTimeStamp]
# Convert some keys to 8601 timestamps
sermons = sermons.map do |sermon|
keys_to_date_time_iso8601.each do |key|
sermon[key.to_sym] = DateTime.parse(sermon[key.to_sym]).iso8601
end
sermon
end
#Tidy up double speakers, not very nice but since we only have one bad instance this will do.
sermons = sermons.map do |sermon|
sermon[:speakers] = sermon[:speaker].split(" & ")
sermon
end
event_name_fixes = config[:eventNameFixes]
# Fix event names
sermons = sermons.map do |sermon|
sermon[:event] = event_name_fixes[sermon[:event].to_sym] if event_name_fixes[sermon[:event].to_sym]
sermon
end
# Fix sermons with missing event...
sermons = sermons.map do |sermon|
if sermon[:event] == ""
if sermon[:media_url].include?('_AM_')
sermon[:event] = "Morning Service"
elsif sermon[:media_url].include?('_PM_')
sermon[:event] = "Evening Service"
end
end
sermon
end
speakers = sermons.flat_map {|sermon| sermon[:speakers]}.uniq
series = sermons.map do |sermon|
{ name: sermon[:series_name],
subtitle: sermon[:series_subtitle],
image: sermon[:image_url]
}
end.uniq
events = sermons.map {|sermon| sermon[:event]}.uniq
puts "sermons: #{sermons.count}"
puts "series: #{series.count}"
puts "events: #{events.count}"
puts "speakers: #{speakers.count}"
File.write("fetched_sermons.json",JSON.pretty_generate(sermons))
File.write("fetched_series.json",JSON.pretty_generate(series))
File.write("fetched_events.json",JSON.pretty_generate(events))
File.write("fetched_speakers.json",JSON.pretty_generate(speakers))