ndvtechsyssolutions · Jaya400 · Jul 12, 2025 · Jul 12, 2025 · Jul 12, 2025 · Jul 12, 2025
diff --git a/NDV_Code_By_Jaya_Capstone_Project/House_Price_Prediction Dashboard.pdf b/NDV_Code_By_Jaya_Capstone_Project/House_Price_Prediction Dashboard.pdf
diff --git a/NDV_Code_By_Jaya_Capstone_Project/README (1).md b/NDV_Code_By_Jaya_Capstone_Project/README (1).md
@@ -0,0 +1,101 @@
+
+# 🏠 Real Estate Price Prediction Dashboard
+
+This is a simple and fun machine learning web app built using **Streamlit** that predicts house prices based on neighborhood and house features. It's powered by a **Random Forest model** trained on the **Boston Housing Dataset**.
+
+---
+
+##  Objective
+
+To help anyone — even a 10-year-old! — understand how things like house size, air quality, and nearby facilities can affect the price of a house. Enter some easy values, click a button, and get a predicted price in **Indian Rupees (₹)**!
+
+---
+
+##  How to Run
+
+###  Step 1: Install Python Libraries
+
+Install all the required packages using this one command:
+
+
+pip install -r requirements.txt
+
+
+###  Step 2: Train the Model (run once)
+
+python model_train.py
+
+
+This saves:
+- `boston_rf_model.pkl` (trained model)
+- `boston_scaler.pkl` (preprocessing scaler)
+
+###  Step 3: Launch the Web App
+
+```bash
+streamlit run app.py
+```
+
+It will open in your browser automatically at:  
+`http://localhost:8501`
+
+---
+
+##  Features Explained (Input Questions)
+
+| Feature | Description |
+|--------|-------------|
+| Neighborhood Safety | 0 = very safe, 100 = very unsafe |
+| Open Land | 0 = no land, 100 = lots of land |
+| Industries Nearby | 0 = no factories, 30 = many factories |
+| Next to River | Better views may increase price |
+| Air Pollution | 0 = clean air, 1 = very polluted |
+| Number of Rooms | 1 = small home, 10 = big home |
+| Age of Home | 0 = brand new, 100 = very old |
+| Distance to Schools & Shops | 0 = very close, 15 = very far |
+| Road Access | 1 = hard to reach, 24 = easy access |
+| Tax Rate | 100 = low tax area, 800 = high tax |
+| Student-Teacher Ratio | 10 = better schools, 30 = crowded schools |
+| Neighborhood Diversity | 0 = less diversity, 400 = more diversity |
+| Low Income % | 0 = wealthy area, 40 = poor area |
+
+---
+
+##  Output
+
+You’ll get a predicted house price like:
+```
+💰 Your predicted house price is: ₹ 45,23,120.00
+```
+
+---
+
+##  Project Files
+
+```
+📁 real_estate_dashboard/
+├── app.py                # Streamlit web app
+├── model_train.py        # Model training script
+├── boston_rf_model.pkl   # Trained ML model
+├── boston_scaler.pkl     # Scaler used for preprocessing
+├── requirements.txt      # Python packages
+├── README.md             # You're reading it!
+```
+
+---
+
+##  Future Ideas
+
+- Add map-based predictions
+- Add images or icons for better engagement
+- Use sliders for even smoother input
+
+---
+
+##  Author
+
+Koruprolu Jayalakshmi 
+Capstone Project – AI/ML Internship  
+2025
+
+---
diff --git a/NDV_Code_By_Jaya_Capstone_Project/Screenshot_Deployment.png b/NDV_Code_By_Jaya_Capstone_Project/Screenshot_Deployment.png
diff --git a/NDV_Code_By_Jaya_Capstone_Project/app.py b/NDV_Code_By_Jaya_Capstone_Project/app.py
@@ -0,0 +1,50 @@
+import os
+import streamlit as st
+import pandas as pd
+import numpy as np
+import joblib
+from sklearn.model_selection import train_test_split
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.preprocessing import StandardScaler
+
+# Auto-train if model not found
+if not os.path.exists("boston_rf_model.pkl") or not os.path.exists("boston_scaler.pkl"):
+    df = pd.read_csv(os.path.join(os.path.dirname(__file__), "real_estate.csv"))  # ✅ Properly indented
+    features = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT']
+    X = df[features]
+    y = df["MEDV"]
+    scaler = StandardScaler()
+    X_scaled = scaler.fit_transform(X)
+    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
+    model = RandomForestRegressor(n_estimators=100, random_state=42)
+    model.fit(X_train, y_train)
+    joblib.dump(model, "boston_rf_model.pkl")
+    joblib.dump(scaler, "boston_scaler.pkl")
+else:
+    model = joblib.load("boston_rf_model.pkl")
+    scaler = joblib.load("boston_scaler.pkl")
+
+# Streamlit UI
+st.title("🏠 House Price Predictor")
+
+CRIM = st.number_input("Neighborhood Safety (0 = very safe, 100 = unsafe)", min_value=0.0, max_value=100.0, value=0.1)
+ZN = st.number_input("Open Land Around Home (0 = none, 100 = lots of land)", min_value=0.0, max_value=100.0, value=12.5)
+INDUS = st.number_input("Nearby Industries (0 = none, 30 = many)", min_value=0.0, max_value=30.0, value=7.0)
+CHAS = st.selectbox("Is the house next to a river? (Better view = more price)", ["No", "Yes"])
+CHAS = 1 if CHAS == "Yes" else 0
+NOX = st.number_input("Air Pollution (0 = clean air, 1 = polluted)", min_value=0.0, max_value=1.0, value=0.5)
+RM = st.number_input("Number of Rooms (1 = small home, 10 = big home)", min_value=1.0, max_value=10.0, value=6.0)
+AGE = st.number_input("House Age (0 = new, 100 = very old)", min_value=0.0, max_value=100.0, value=60.0)
+DIS = st.number_input("Distance to Schools/Stores (0 = near, 15 = far)", min_value=0.0, max_value=15.0, value=4.0)
+RAD = st.number_input("Access to Main Roads (1 = difficult, 24 = easy)", min_value=1, max_value=24, value=5)
+TAX = st.number_input("Tax Amount Paid (100 = low tax, 800 = high tax)", min_value=100, max_value=800, value=300)
+PTRATIO = st.number_input("Student-Teacher Ratio Nearby", min_value=10.0, max_value=30.0, value=18.0)
+B = st.number_input("Neighborhood Diversity (0 = less, 400 = more)", min_value=0.0, max_value=400.0, value=390.0)
+LSTAT = st.number_input("Low Income Population % (0 = wealthy, 40 = poor)", min_value=0.0, max_value=40.0, value=12.0)
+
+if st.button("🎯 Predict My House Price"):
+    input_data = np.array([[CRIM, ZN, INDUS, CHAS, NOX, RM, AGE, DIS, RAD, TAX, PTRATIO, B, LSTAT]])
+    input_scaled = scaler.transform(input_data)
+    prediction = model.predict(input_scaled)
+    price_inr = prediction[0] * 1000 * 83  # Convert to INR (approx)
+    st.success(f"💰 Your predicted house price is: ₹ {price_inr:,.2f}")
diff --git a/NDV_Code_By_Jaya_Capstone_Project/boston_rf_model.pkl b/NDV_Code_By_Jaya_Capstone_Project/boston_rf_model.pkl
@@ -0,0 +1,27 @@
+import os
+import joblib
+from sklearn.ensemble import RandomForestRegressor
+
+if not os.path.exists("boston_rf_model.pkl"):
+    # Load data, train model
+    import pandas as pd
+    from sklearn.model_selection import train_test_split
+    from sklearn.preprocessing import StandardScaler
+
+    df = pd.read_csv("real_estate.csv")
+    features = ['CRIM','ZN','INDUS','CHAS','NOX','RM','AGE','DIS','RAD','TAX','PTRATIO','B','LSTAT']
+    X = df[features]
+    y = df["MEDV"]
+
+    scaler = StandardScaler()
+    X_scaled = scaler.fit_transform(X)
+    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
+
+    model = RandomForestRegressor()
+    model.fit(X_train, y_train)
+
+    joblib.dump(model, "boston_rf_model.pkl")
+    joblib.dump(scaler, "boston_scaler.pkl")
+else:
+    model = joblib.load("boston_rf_model.pkl")
+    scaler = joblib.load("boston_scaler.pkl")
diff --git a/NDV_Code_By_Jaya_Capstone_Project/boston_scaler.pkl b/NDV_Code_By_Jaya_Capstone_Project/boston_scaler.pkl
diff --git a/NDV_Code_By_Jaya_Capstone_Project/model_train.py b/NDV_Code_By_Jaya_Capstone_Project/model_train.py
@@ -0,0 +1,54 @@
+# Real Estate Price Prediction with Boston Housing Dataset
+
+## 1. Import Libraries
+
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sklearn.model_selection import train_test_split
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
+from sklearn.impute import SimpleImputer
+from sklearn.preprocessing import StandardScaler
+import joblib
+
+
+## 2. Load and Preprocess Dataset
+
+# Load the dataset
+df = pd.read_csv("real_estate.csv")
+
+# Handle missing values
+imputer = SimpleImputer(strategy='mean')
+df[['RM']] = imputer.fit_transform(df[['RM']])
+
+# Feature Scaling
+scaler = StandardScaler()
+scaled_features = scaler.fit_transform(df.drop("MEDV", axis=1))
+X = pd.DataFrame(scaled_features, columns=df.columns[:-1])
+y = df["MEDV"]
+
+
+## 3. Split Data and Train Model
+
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+model = RandomForestRegressor(n_estimators=100, random_state=42)
+model.fit(X_train, y_train)
+y_pred = model.predict(X_test)
+
+## 4. Evaluate Model
+
+from math import sqrt
+
+rmse = sqrt(mean_squared_error(y_test, y_pred))
+print("RMSE:", rmse)
+print("MAE:", mean_absolute_error(y_test, y_pred))
+print("R² Score:", r2_score(y_test, y_pred))
+
+
+## 5. Save Model and Scaler
+
+joblib.dump(model, "boston_rf_model.pkl")
+joblib.dump(scaler, "boston_scaler.pkl")