-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparser_practice.py
More file actions
97 lines (75 loc) · 2.67 KB
/
parser_practice.py
File metadata and controls
97 lines (75 loc) · 2.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import random
import time
import psycopg2
import requests
db_config = {
'database': "vacancies",
'user': "postgres",
'password': "83276320",
'host': "localhost",
'port': "6320"
}
def get_vacancies(keyword, page):
url = "https://api.hh.ru/vacancies"
params = {
"text": keyword,
"area": None,
"page": page,
"per_page": 20,
}
headers = {
"User-Agent": "Your User Agent",
}
response = requests.get(url, params=params, headers=headers)
return response.json()
def create_table(conn):
cur = conn.cursor()
create_table_query = ("CREATE TABLE IF NOT EXISTS vacancies ("
"id SERIAL PRIMARY KEY,"
"title VARCHAR(255),"
"salary VARCHAR(50),"
"schedule VARCHAR(50),"
"experience VARCHAR(50),"
"city VARCHAR(50),"
"employer VARCHAR(255),"
"url VARCHAR(255))")
cur.execute(create_table_query)
conn.commit()
cur.close()
def drop_table(conn):
cur = conn.cursor()
drop_table_query = "DROP TABLE IF EXISTS vacancies"
cur.execute(drop_table_query)
conn.commit()
cur.close()
def parse_vacancies(keyword):
with psycopg2.connect(**db_config) as conn:
drop_table(conn)
create_table(conn)
page = 0
while True:
data = get_vacancies(keyword, page)
if not data.get('items'):
break
with conn.cursor() as cur:
for item in data['items']:
title = item['name']
city = item['area']['name']
employer = item['employer']['name']
experience = item['experience']['name']
schedule = item['schedule']['name']
salary = item['salary']
url = item['alternate_url']
if salary is None:
salary = "не указана"
else:
salary = f"от {salary.get('from')} до {salary.get('to')}"
insert_query = ("INSERT INTO vacancies "
"(title, salary, city, schedule, employer, experience, url)"
"VALUES (%s, %s, %s, %s, %s, %s, %s)")
cur.execute(insert_query, (title, salary, city, schedule, employer, experience, url))
if page >= data['pages'] - 1:
break
page += 1
time.sleep(random.uniform(1, 3))
conn.commit()