StackOverFlow-Software-Developer-Survey-Analysis/app.py at main · Inazuma1002/StackOverFlow-Software-Developer-Survey-Analysis · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import streamlit as st
import pandas as pd
from predict_page import show_predict_page
from explore_page import show_explore_page

def load_data():
    url = 'https://drive.google.com/uc?export=download&id=1ebNIs3jPNJpz1jOF2VBusHU2BsUneVU6'

    df = pd.read_csv(url)


    # Check for 'ConvertedCompYearly' and rename it to 'Salary'
    if 'ConvertedCompYearly' in df.columns:
        df = df.rename({'ConvertedCompYearly': 'Salary'}, axis=1)
    else:
        print("ConvertedCompYearly column not found!")

    # Confirm that 'Salary' is now a column
    if 'Salary' in df.columns:
        # Filter rows where 'Salary' is not null
        df = df[df['Salary'].notnull()]
    else:
        print("Salary column not found!")
        return pd.DataFrame()  # Return an empty DataFrame if Salary column doesn't exist

    # Group smaller countries into 'Others'
    country_counts = df['Country'].value_counts()
    cutoff = 400
    countries_to_keep = country_counts[country_counts > cutoff].index
    df['Country'] = df['Country'].apply(lambda x: x if x in countries_to_keep else 'Others')

    # Select relevant columns
    df = df[['Country', 'EdLevel', 'YearsCodePro', 'Employment', 'Salary']]

    # Drop rows with missing values
    df.dropna(inplace=True)

    # Correct experience and education fields
    df['YearsCodePro'] = df['YearsCodePro'].apply(correct_exp)
    df['EdLevel'] = df['EdLevel'].apply(correct_Education)

    # Drop unnecessary columns
    df.drop(['Employment'], axis=1, inplace=True)
    return df

page = st.sidebar.selectbox("Explore or Predict", ("Explore", "Predict"))
df = load_data()
if page == 'Predict':
    show_predict_page()
else:
    show_explore_page(df)

#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