-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathindex.py
More file actions
60 lines (47 loc) · 1.79 KB
/
index.py
File metadata and controls
60 lines (47 loc) · 1.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import os
from dotenv import load_dotenv
from database import Database
from engine.iterator import get_complete_anime_info
from engine.scrappers.gogoanime_scrapper import GogoAnimeScrapper
from database import Database
from engine.wrappers.free_proxy_wrapper import FreeProxyListWrapper
from concurrent.futures import ThreadPoolExecutor
load_dotenv()
CONNECTION_STRING = os.getenv("MONGO_CONNECTION_STRING")
START_PAGE = int(os.getenv("START_PAGE"))
END_PAGE = int(os.getenv("END_PAGE"))
MAX_WORKERS = int(os.getenv("MAX_WORKERS"))
database = Database(CONNECTION_STRING)
scrapper = GogoAnimeScrapper()
proxy = FreeProxyListWrapper()
anime_list = scrapper.get_a_to_z_list(
start_page = START_PAGE,
end_page = END_PAGE,
max_workers = MAX_WORKERS,
log = True,
proxy = True
)
def expand_on_anime_from_anime_list_and_upload_to_database(anime, log = False):
proxy_ip = proxy.get_random_proxy()
proxies = { "http": proxy_ip }
anime_name = anime['name']
anime_link = anime['url']
if log:
print("[*] Getting anime info for " + anime_name + " using proxy " + proxy_ip)
anime_data = get_complete_anime_info(anime_link, proxies)
return anime_data
if __name__ == "__main__":
futures = []
print("[*] Starting the thread pool")
total = len(anime_list)
progress = 0
percentage = 0
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
for anime_data in executor.map(expand_on_anime_from_anime_list_and_upload_to_database, anime_list):
percentage = round((progress / total) * 100, 2)
print("[{}/{} - {}%] {}".format(progress, total, percentage, anime_data['anime_name']))
database.insert_anime(anime_data)
progress += 1
print("[+] Complete")
while True:
pass