-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmech.rb
More file actions
74 lines (64 loc) · 1.77 KB
/
mech.rb
File metadata and controls
74 lines (64 loc) · 1.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
require 'mechanize'
require 'logger'
require 'nokogiri'
require 'set'
class Gen_mech
def initialize(urls, agentid, cookies = false)
# default cookie to false, user_agent_alias to Mac Safari
@urls = urls
@mech = Mechanize.new agentid
@mech.user_agent_alias = 'Mac Safari'
@mech.log = Logger.new 'mech.log' if cookies == true
end
def return_obj
# returns an array of mechanized obj from urls
mech_array = []
@urls.each do |url|
mech_array << @mech.get(url)
end
mech_array
end
def generate_html_body
@urls.each do |url|
puts @mech.get(url).body
end
end
def parse_for_links
# 2d dict for {ogurls => {linkname:href...}...}
sitesfromurls = {}
@urls.each do |url|
urlspersite = {}
@mech.get(url).links.each do |link|
urlspersite[link.to_s] = link.href.to_s
end
sitesfromurls[url] = urlspersite
end
sitesfromurls
end
def hard_parse_for_class(tag, classname, classonly)
# returns an array of element text values from a given tag and css classname
# 2d dict for {ogurls => {itemname:price...}...}
pricesfromurls = {}
@urls.each do |url|
unfiltered_array = []
ele = @mech.get(url)
elenew = if (classname != '') && (tag != '')
ele.search("#{tag}.#{classname}")
elsif classonly == true
ele.search(".#{classname}")
elsif tag.to_s == ''
ele.search('html')
else
ele.search(tag.to_s)
end
# puts elenew
# puts elenew
elenew.each do |item|
unfiltered_array << item.text
end
# puts unfiltered_array.inspect
pricesfromurls[url] = unfiltered_array
end
pricesfromurls
end
end