-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtask.py
More file actions
59 lines (50 loc) · 1.74 KB
/
task.py
File metadata and controls
59 lines (50 loc) · 1.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import sqlite3
import pandas as pd
import numpy
import matplotlib.pyplot as plt
# task1 and task2
con = sqlite3.connect('works.sqlite')
cursor = con.cursor()
cursor.execute('drop table if exists works')
cursor.execute('create table works ('
'ID INTEGER PRIMARY KEY AUTOINCREMENT,'
'salary INTEGER,'
'educationType TEXT,'
'jobTitle TEXT,'
'qualification TEXT,'
'gender TEXT,'
'dateModify TEXT,'
'skills TEXT,'
'otherInfo TEXT)')
con.commit()
df = pd.read_csv("works.csv")
df.to_sql("works", con, if_exists='append', index=False)
con.commit()
cursor.execute('create index salary_index on works (salary)')
con.commit()
#task 3, 4, 5, 6, 7
cursor.execute('SELECT COUNT(*) FROM works')
print(cursor.fetchall()[0][0])
cursor.execute('SELECT COUNT(*) FROM works WHERE gender = "Женский"')
w_salary = [t[0] for t in cursor.fetchall()]
cursor.execute('SELECT COUNT(*) FROM works WHERE gender = "Мужской"')
m_salary = [t[0] for t in cursor.fetchall()]
cursor.execute('SELECT gender, COUNT(*) FROM works GROUP BY gender')
cursor.execute('SELECT skills FROM works WHERE skills NOT NULL')
cursor.execute('SELECT salary FROM works WHERE skills LIKE "%Python%"')
#tasks 8, 9
percentiles = numpy.linspace(.1, 1, 10)
w_salary = numpy.quantile(w_salary, percentiles)
m_salary = numpy.quantile(m_salary, percentiles)
plt.hist(m_salary, bins=100)
plt.show()
plt.hist(w_salary, bins=100)
plt.show()
plt.plot(percentiles, m_salary)
plt.xlabel("Перцентили")
plt.ylabel("Зарплата у мужчин")
plt.show()
plt.plot(percentiles, w_salary)
plt.xlabel("Перцентили")
plt.ylabel("Зарплата у женщин")
plt.show()