Joblytics/app.py at master · Danish2351/Joblytics · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import streamlit as st
from sqlalchemy import create_engine
import pandas as pd
import plotly.express as px


# ---------- Page Configuration ----------
st.set_page_config(layout="wide")


# ---------- Database Connection ----------
def mySQL_to_df(role):
    host = st.secrets["db_credentials"]["host"]
    port = st.secrets["db_credentials"]["port"]
    database = st.secrets["db_credentials"]["database"]
    user = st.secrets["db_credentials"]["user"]
    password = st.secrets["db_credentials"]["password"]

    engine = create_engine(f"mysql+pymysql://{user}:{password}@{host}:{port}/{database}")
    query = f"SELECT * FROM job_data WHERE job_title = '{role}'"
    return pd.read_sql(query, engine)


# ---------- Role Selection Mapping ----------
role_to_table = [
    "Data Scientist",
    "Data Analyst",
    "AI Engineer"
]

# ---------- Centered Title and Subtitle ----------
st.markdown("<h1 style='text-align: center;'>Your Career, Backed by Data</h1>", unsafe_allow_html=True)
st.markdown("<p style='text-align: center; font-size: 18px;'>Discover top skills, tools, and education trends for your dream job role.</p>", unsafe_allow_html=True)

# ---------- Centered Role Selection ----------
col1, col2, col3 = st.columns([2, 4, 2])
with col2:
    selected_role = st.selectbox("Select Job Role:", role_to_table)

# ---------- Load Data ----------
role = selected_role
df = mySQL_to_df(role)

# ---------- Right-aligned Job Count ----------
_, right_col = st.columns([6, 1])
with right_col:
    st.markdown(f"<p style='text-align:right; font-weight:500;'>According to <b>{len(df)}</b> job posts.</p>", unsafe_allow_html=True)

# ---------- Normalization Map ----------
normalization_map = {
    'bachelors': 'Bachelors', 'bachelor': 'Bachelors', 'bs': 'Bachelors', 'ba': 'Bachelors', 'b.sc': 'Bachelors', 'bsc': 'Bachelors',
    'masters': 'Masters', 'master': 'Masters', 'ms': 'Masters', 'msc': 'Masters', 'm.sc': 'Masters', 'ma': 'Masters',
    'phd': 'Phd', 'doctorate': 'Phd',
}

# ---------- Frequency Counting ----------
def get_value_counts(column, normalization_map=None):
    all_values = df[column].dropna().str.lower().str.split(",")
    flat_list = []
    for sublist in all_values:
        for item in sublist:
            word = item.strip()
            if not word:
                continue
            word = normalization_map.get(word, word) if normalization_map else word
            flat_list.append(word)
    return pd.Series(flat_list).value_counts()

# ---------- Top-N Setup ----------
top_n_per_column = {
    'Languages': 6,
    'Technologies': 8,
    'Skills': 6,
    'Education': 3
}

# ---------- Visualization Grid ----------
col1, col2 = st.columns(2)

# --- Education Pie ---
with col1:
    st.subheader("Education")
    edu_counts = get_value_counts("Education", normalization_map=normalization_map).head(top_n_per_column["Education"])
    fig_edu = px.pie(
        names=edu_counts.index,
        values=edu_counts.values,
        hole=0.4
    )
    st.plotly_chart(fig_edu, use_container_width=True, config={'displayModeBar': False})

# --- Languages Bar ---
with col2:
    st.subheader("Languages")
    lang_counts = get_value_counts("Languages").head(top_n_per_column["Languages"]).sort_values(ascending=False)
    fig_lang = px.bar(
        x=lang_counts.index,
        y=lang_counts.values,
        labels={'x': '', 'y': ''},
        height=400
    )
    fig_lang.update_layout(xaxis=dict(tickangle=45))
    st.plotly_chart(fig_lang, use_container_width=True, config={'displayModeBar': False})

# ---------- Bottom Row ----------
col3, col4 = st.columns(2)

# --- Technologies Bar ---
with col3:
    st.subheader("Technologies")
    tech_counts = get_value_counts("Technologies").head(top_n_per_column["Technologies"]).sort_values(ascending=False)
    fig_tech = px.bar(
        x=tech_counts.index,
        y=tech_counts.values,
        labels={'x': '', 'y': ''},
        height=400
    )
    fig_tech.update_layout(xaxis=dict(tickangle=45))
    st.plotly_chart(fig_tech, use_container_width=True, config={'displayModeBar': False})

# --- Skills Bar ---
with col4:
    st.subheader("Skills")
    skill_counts = get_value_counts("Skills").head(top_n_per_column["Skills"]).sort_values(ascending=False)
    fig_skill = px.bar(
        x=skill_counts.index,
        y=skill_counts.values,
        labels={'x': '', 'y': ''},
        height=400
    )
    fig_skill.update_layout(xaxis=dict(tickangle=45))
    st.plotly_chart(fig_skill, use_container_width=True, config={'displayModeBar': False})