From 92754c26168677eed9c7e19e42d3d28f936c59d8 Mon Sep 17 00:00:00 2001 From: Nandini Marepalli Date: Tue, 15 Jul 2025 08:34:41 +0530 Subject: [PATCH] NDV_Code_By_NandiniM_HeartDiseasePrediction --- .../HeartDiseasePrediction.py | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 NDV_Code_BY_NandiniM_HeartDiseasePrediction/HeartDiseasePrediction.py diff --git a/NDV_Code_BY_NandiniM_HeartDiseasePrediction/HeartDiseasePrediction.py b/NDV_Code_BY_NandiniM_HeartDiseasePrediction/HeartDiseasePrediction.py new file mode 100644 index 000000000..ae0f064f7 --- /dev/null +++ b/NDV_Code_BY_NandiniM_HeartDiseasePrediction/HeartDiseasePrediction.py @@ -0,0 +1,74 @@ +import pandas as pd + +# Load Kaggle heart dataset +df = pd.read_csv("heart.csv") +print(df.head()) + + +# Initial Exploration and Cleaning +print(df.info()) +print(df.describe()) +print(df.isnull().sum()) + + +# Visualize Relationships +import seaborn as sns +import matplotlib.pyplot as plt + +# Correlation heatmap +plt.figure(figsize=(12,8)) +sns.heatmap(df.corr(), annot=True, cmap="coolwarm") +plt.title("Feature Correlation Heatmap") +plt.show() + + + +# Distribution of target classes +sns.countplot(data=df, x='target') +plt.title("Target Class Distribution") +plt.show() + + + + +# Age vs. Heart Disease +sns.boxplot(x='target', y='age', data=df) +plt.title("Age vs. Heart Disease") +plt.show() + +# Preprocessing +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler + +X = df.drop("target", axis=1) +y = df["target"] + +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + +scaler = StandardScaler() +X_train = scaler.fit_transform(X_train) +X_test = scaler.transform(X_test) + + +# Model Training + Tuning + +from sklearn.ensemble import RandomForestClassifier +from sklearn.model_selection import GridSearchCV + +param_grid = { + 'n_estimators': [100, 150], + 'max_depth': [4, 6, 8], + 'min_samples_split': [2, 4]} + +rf = RandomForestClassifier(random_state=42) +grid = GridSearchCV(rf, param_grid, cv=5) +grid.fit(X_train, y_train) + +print("Best Parameters:", grid.best_params_) + + +# Evaluate the Model +from sklearn.metrics import classification_report, confusion_matrix +y_pred = grid.predict(X_test) +print(confusion_matrix(y_test, y_pred)) +print(classification_report(y_test, y_pred))