Heart-Hero/app.py at main · Arijit2175/Heart-Hero · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
import streamlit as st
import numpy as np
import pandas as pd
import joblib
import shap
import matplotlib.pyplot as plt
import seaborn as sns

st.set_page_config(page_title="Heart Hero", layout="wide")

model = joblib.load("heart_model.pkl")
scaler = joblib.load("scaler.pkl")
columns = joblib.load("columns.pkl")
raw_df = joblib.load("raw_data.pkl")

st.title("❤️ Heart-Hero: Heart Disease Predictor")
st.warning("⚠️ Disclaimer: This application is for educational purposes only and may not be fully accurate enough. Please consult a qualified healthcare professional for medical advice.")

df = pd.read_csv("dataset/dataset.csv")

left_col, right_col = st.columns([1, 2])

with left_col:
    st.subheader("🧾 Patient Input")

    age = st.number_input("Age", 1, 120, 50)
    sex = st.selectbox("Sex (0 = Female, 1 = Male)", [0, 1])
    cp = st.selectbox("Chest Pain Type (0–3)", [0, 1, 2, 3])
    trestbps = st.number_input("Resting Blood Pressure", 80, 200, 120)
    chol = st.number_input("Cholesterol", 100, 400, 200)
    fbs = st.selectbox("Fasting Blood Sugar > 120 mg/dl", [0, 1])
    restecg = st.selectbox("Resting ECG (0–2)", [0, 1, 2])
    thalach = st.number_input("Max Heart Rate", 60, 220, 150)
    exang = st.selectbox("Exercise Induced Angina", [0, 1])
    oldpeak = st.number_input("ST Depression", 0.0, 6.0, 1.0)
    slope = st.selectbox("Slope (0–2)", [0, 1, 2])
    ca = st.selectbox("Major Vessels (0–4)", [0, 1, 2, 3, 4])
    thal = st.selectbox("Thal (0–3)", [0, 1, 2, 3])

    predict_btn = st.button("🔍 Predict")

input_dict = {
    'age': age, 'sex': sex, 'cp': cp, 'trestbps': trestbps,
    'chol': chol, 'fbs': fbs, 'restecg': restecg,
    'thalach': thalach, 'exang': exang, 'oldpeak': oldpeak,
    'slope': slope, 'ca': ca, 'thal': thal
}

input_df = pd.DataFrame([input_dict])
input_df = pd.get_dummies(input_df)
input_df = input_df.reindex(columns=columns, fill_value=0)

scale_cols = ['age','trestbps','chol','thalach','oldpeak']
input_df[scale_cols] = scaler.transform(input_df[scale_cols])

with right_col:
    st.subheader("📊 Prediction Result")

    if predict_btn:
        prediction = model.predict(input_df)[0]
        probability = model.predict_proba(input_df)[0][1]

        if probability >= 0.5:
            st.error(f"Heart Disease Detected (Probability: {probability:.2f})")
        else:
            st.success(f"No Heart Disease Detected (Probability: {probability:.2f})")

        st.subheader("🧠 Patient Feature Contributions")

        explainer = shap.TreeExplainer(model)
        shap_values = explainer.shap_values(input_df)

        if isinstance(shap_values, list):
            shap_val = shap_values[1][0]
        else:
            shap_val = shap_values[0]

        feature_names = list(input_df.columns)
        shap_val_flat = np.asarray(shap_val).ravel()
        input_val_list = input_df.iloc[0].tolist()
        min_len = min(len(feature_names), len(shap_val_flat), len(input_val_list))
        shap_df = pd.DataFrame({
            'feature': feature_names[:min_len],
            'shap_value': shap_val_flat[:min_len],
            'input_value': input_val_list[:min_len]
        })

        def active_feature_name(feat):
            if "_" in feat:
                base, val = feat.split("_")
                val = int(val)
                if input_dict.get(base) == val:
                    return base
                else:
                    return None
            else:
                return feat

        shap_df['original'] = shap_df['feature'].apply(active_feature_name)
        shap_df_clean = shap_df.dropna(subset=['original'])

        agg_shap = shap_df_clean.groupby('original')['shap_value'].sum().sort_values(key=abs, ascending=False)

        st.write("**Top 5 Factors Contributing to Heart Disease Risk:**")
        for feat, val in agg_shap.head(5).items():
            direction = "increased" if val > 0 else "reduced"
            st.write(f"• **{feat}** contributed **{val:+.2f}** to {direction} heart disease probability")

        st.subheader("📈 Feature Impact")
        top5 = agg_shap.head(5)
        fig, ax = plt.subplots(figsize=(10, 5))
        top5.plot(kind="barh", ax=ax, color=['red' if x > 0 else 'green' for x in top5.values])
        ax.set_xlabel("SHAP Contribution (Impact on Prediction)")
        ax.set_title("Top 5 Features Influencing Heart Disease Prediction")
        st.pyplot(fig)

        st.subheader("Clinical Interpretation")

        for feat, val in agg_shap.head(5).items():
            user_val = input_dict.get(feat, "N/A")

            if feat == "chol":
                msg = "High cholesterol is associated with increased heart disease risk."
            elif feat == "thalach":
                msg = "Lower maximum heart rate during exercise may indicate cardiac issues."
            elif feat == "trestbps":
                msg = "High resting blood pressure increases strain on the heart."
            elif feat == "age":
                msg = "Older age is associated with increased cardiovascular risk."
            elif feat == "sex":
                msg = "Males historically show higher heart disease prevalence in the dataset."
            elif feat == "cp":
                msg = "Certain chest pain types are linked with higher cardiac risk."
            elif feat == "oldpeak":
                msg = "ST depression induced by exercise relative to rest (higher = more cardiac stress)."
            elif feat == "exang":
                msg = "Exercise-induced angina is a strong indicator of heart disease."
            elif feat == "fbs":
                msg = "Elevated fasting blood sugar indicates metabolic risk."
            elif feat == "restecg":
                msg = "Abnormal resting ECG findings suggest cardiac abnormalities."
            elif feat == "slope":
                msg = "The slope of ST segment during exercise is clinically significant."
            elif feat == "ca":
                msg = "Number of major vessels with calcification indicates atherosclerosis severity."
            elif feat == "thal":
                msg = "Thalassemia type affects oxygen transport and heart function."
            else:
                msg = "This feature significantly influenced the prediction."

            st.write(f"**{feat} = {user_val}** → {msg}")

        st.subheader("📥 Export Report")

        report_text = "Heart Disease Prediction Report\n\n"
        report_text += "=" * 50 + "\n"
        report_text += f"Prediction: {'Heart Disease Detected' if prediction == 1 else 'No Heart Disease'}\n"
        report_text += f"Confidence: {probability:.2%}\n"
        report_text += "=" * 50 + "\n\n"
        report_text += "Patient Input Values:\n"
        report_text += "-" * 50 + "\n"
        for key, value in input_dict.items():
            report_text += f"{key}: {value}\n"
        report_text += f"\nPredicted Probability of Heart Disease: {probability:.2%}\n\n"
        report_text += "Top 5 Contributing Factors:\n"
        report_text += "-" * 50 + "\n"
        for feat, val in agg_shap.head(5).items():
            direction = "increased" if val > 0 else "reduced"
            report_text += f"• {feat}: contributed {val:+.2f} to {direction} heart disease probability\n"
        report_text += "\n" + "=" * 50 + "\n"
        report_text += "DISCLAIMER:\n"
        report_text += "This result is generated by a machine learning model and is for\n"
        report_text += "educational purposes only. It is NOT a medical diagnosis.\n"
        report_text += "Please consult with a qualified healthcare professional for\n"
        report_text += "accurate medical evaluation and diagnosis.\n"
        report_text += "=" * 50 + "\n"

        st.download_button(
            label="Download Prediction Report",
            data=report_text,
            file_name="heart_disease_report.txt",
            mime="text/plain"
        )

st.markdown("---")
st.subheader("Dataset Preview")
st.write(df.head())

st.subheader("Target Distribution")
fig1 = plt.figure()
sns.countplot(x='target', data=df)
st.pyplot(fig1)

st.subheader("Correlation Heatmap")
fig2 = plt.figure(figsize=(8,6))
sns.heatmap(df.corr(), cmap="coolwarm", annot=True)
st.pyplot(fig2)