-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
54 lines (41 loc) · 1.7 KB
/
app.py
File metadata and controls
54 lines (41 loc) · 1.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import streamlit as st
import pandas as pd
from predict_page import show_predict_page
from explore_page import show_explore_page
def load_data():
url = 'https://drive.google.com/uc?export=download&id=1ebNIs3jPNJpz1jOF2VBusHU2BsUneVU6'
df = pd.read_csv(url)
# Check for 'ConvertedCompYearly' and rename it to 'Salary'
if 'ConvertedCompYearly' in df.columns:
df = df.rename({'ConvertedCompYearly': 'Salary'}, axis=1)
else:
print("ConvertedCompYearly column not found!")
# Confirm that 'Salary' is now a column
if 'Salary' in df.columns:
# Filter rows where 'Salary' is not null
df = df[df['Salary'].notnull()]
else:
print("Salary column not found!")
return pd.DataFrame() # Return an empty DataFrame if Salary column doesn't exist
# Group smaller countries into 'Others'
country_counts = df['Country'].value_counts()
cutoff = 400
countries_to_keep = country_counts[country_counts > cutoff].index
df['Country'] = df['Country'].apply(lambda x: x if x in countries_to_keep else 'Others')
# Select relevant columns
df = df[['Country', 'EdLevel', 'YearsCodePro', 'Employment', 'Salary']]
# Drop rows with missing values
df.dropna(inplace=True)
# Correct experience and education fields
df['YearsCodePro'] = df['YearsCodePro'].apply(correct_exp)
df['EdLevel'] = df['EdLevel'].apply(correct_Education)
# Drop unnecessary columns
df.drop(['Employment'], axis=1, inplace=True)
return df
page = st.sidebar.selectbox("Explore or Predict", ("Explore", "Predict"))
df = load_data()
if page == 'Predict':
show_predict_page()
else:
show_explore_page(df)
#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