-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathexample_bs4_LMO.py
More file actions
95 lines (73 loc) · 2.8 KB
/
example_bs4_LMO.py
File metadata and controls
95 lines (73 loc) · 2.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
from bs4 import BeautifulSoup
import requests
helloworld = "<p>Hello World</p>"
soup_string = BeautifulSoup(helloworld, "html.parser")
print(soup_string)
url = "https://www.barnesandnoble.com/b/barnes-noble-classics/_/N-rqv"
response = requests.get(url)
soup_page = BeautifulSoup(response.content,"html.parser")
#print(soup_page)
# with open("foo.html") as foo_file:
# soup = BeautifulSoup(foo_file, "html.parser")
# print(soup)
html_atag = """<html><body><p>Test html a tag example</p>
<a href="http://www.test.com">Home</a>
<a href="http;//www.test.com/books">Books</a>
</body>
</html>"""
soup = BeautifulSoup(html_atag,"html.parser")
atag = soup.a
print(atag)
for atag in soup.findAll('a'):
print(atag.string)
print(type(atag))
html_identical = """<p class="identical">
Example of p tag with class identical
</p>
<div class="identical">
Example of div tag with class identical
</div>"""
soup = BeautifulSoup(html_identical, "html.parser")
identical_div = soup.find("div", class_="identical")
print(identical_div)
with open("ecologicalpyramid.html") as ecological_pyramid:
soup = BeautifulSoup(ecological_pyramid, "html.parser")
# producer_entries = soup.find("ul")
# print(producer_entries.li.div.string)
for producer_entry in soup.findAll("ul"):
print(producer_entry.li.div.string)
search=soup.find(text='fox')
print(search)
css_class = soup.findAll(attrs={"class": "primaryconsumerlist"})
print(css_class)
#Est équivalent à (attention c'est "class_", parce que le mot "class" est réservé en Python)
css_class = soup.findAll(class_="primaryconsumerlist")
print(css_class)
print(type(css_class))
def is_secondary_consumer(tag):
return tag.has_attr("id") and tag.get("id") == "secondaryconsumers"
secondary_consumers = soup.find(is_secondary_consumer)
print(type(secondary_consumers))
for producer_entry in secondary_consumers.findAll(attrs={"class": "name"}):
print(producer_entry.string)
all_texts = soup.find_all(text=True)
print(all_texts)
all_texts_in_list = soup.find_all(text=["plants", "algae"])
print(all_texts_in_list)
# div_li_tags = soup.find_all(["div", "li"])
# print(div_li_tags)
# div_li_tags = soup.find_all(["div", "li"], recursive=True)
# print(div_li_tags)
# css_class = soup.findAll(["producerlist", "primaryconsumerlist", "secondaryconsumerlist", "tertiaryconsumerlist"])
# print(css_class)
url = r"https://en.wikipedia.org/wiki/List_of_stations_of_the_Paris_M%C3%A9tro"
response = requests.get(url)
soup_page = BeautifulSoup(response.content,"html.parser")
soup_table = soup_page.find('table')
for soup_line in soup_table.findAll('tr'):
indice = 0
for soup_case in soup_line.findAll('td'):
for soup_station in soup_case.findAll('a'):
if((indice == 0 or indice ==3) and not soup_station.string == None):
print(soup_station.string)
indice+=1